diff --git a/bia-converter-light/.env_template b/bia-converter-light/.env_template
deleted file mode 100644
index f74f43c8..00000000
--- a/bia-converter-light/.env_template
+++ /dev/null
@@ -1,9 +0,0 @@
-bia_data_dir=Root of directory to save/read models from disk if using persistence_mode=disk
-cache_root_dirpath=PATH_TO_CACHE_FILES_DOWNLOADED_AND_CONVERTED
-bia_api_basepath=CREDENTIALS_FOR_LOCALHOST
-bia_api_username=CREDENTIALS_FOR_LOCALHOST
-bia_api_password=CREDENTIALS_FOR_LOCALHOST
-endpoint_url=EMBASSY_S3_ENDPOINT_URL_LEAVE_THIS_DUMMY_FOR_NO_S3
-bucket_name=BUCKET_NAME_OF_S3_BUCKET
-bioformats2raw_java_home=DIDNT_NEED_TO_CONFIGURE_THIS
-bioformats2raw_bin=PATH_TO_BIOFORMATS_TO_RAW_BIN_DIR
diff --git a/bia-converter-light/README.md b/bia-converter-light/README.md
deleted file mode 100644
index a379ee1d..00000000
--- a/bia-converter-light/README.md
+++ /dev/null
@@ -1,82 +0,0 @@
-## Description
-This sub-package creates image representations *and* actual images associated with the representations. It is named *bia-converter-light* because it only converts one file reference per image representation. Whereas the upcoming *bia-converter* sub-package will be able to handle more complex conversion including creation of multichannel images and multi-slice images from multiple file references per image representation.
-
-## Setup
-
-1. Install the project using poetry.
-2. Configure your environment. Either create a .env file from .env_template in this directory or set environment variables for the items in .env_template
- * For getting objects from the API set:
- - bia_api_basepath
- - bia_api_username
- - bia_api_password
- * For caching downloaded/converted images locally the default location is `~/.cache/bia-converter/` which can be changed by setting `cache_root_dirpath`
- * For conversion to zarr format [bioformats2raw](https://github.com/glencoesoftware/bioformats2raw) is used. Set:
- - bioformats2raw_java_home
- - bioformats2raw_bin
- * For upload to S3 set:
- - endpoint_url
- - bucket_name
-
-The AWS credentails for the endpoint also need to be set. This is done using exclusively by environment variables. Either:
-* AWS_ACCESS_KEY_ID *and* AWS_SECRET_ACCESS_KEY
-
OR
-* AWS_SHARED_CREDENTIALS_FILE with optional AWS_PROFILE and/or AWS_CONFIG_FILE
-
-## Usage
-This package has 3 cli applications:
- * **propose**: used to create a tsv file with details of file references that can be converted to images.
- * **convert-image**: used to create actual images associated with the representations (the necessary BIA image object and associated UPLOADED_BY_SUBMITTER representation are created if they do not exist).
- * **update-example-image-uri-for-dataset**: used to update the example image uri for a dataset.
-
-Subsequent instructions assume the project is installed and the environment configured, assuming this is the working directory.
-
-## Creating details of file references to convert
-To create a tsv file with details of file references to convert for one or more studies, run:
-``` sh
-$ poetry run bia-converter-light propose --accession-ids-path
-```
-or to specify accession ids on command line:
-``` sh
-$ poetry run bia-converter-light propose -a -a
-```
-E.g.:
-```sh
-$ poetry run bia-converter-light propose -a S-BIAD1444 S-BIAD1266
-```
-By default this writes output to `./file_references_to_convert.tsv` which can be changed with the `--output-path` option.
-
-
-## Converting images associated with representations
-The input is a file containing details of file references for conversion. This is of the format produced by the `propose` command above. Additionally, if conversion is required for a subset of accession ids in the file, these can be specified on the command line. INTERACTIVE_DISPLAY and THUMBNAIL representations are created for all file references, and a STATIC_DISPLAY is created for the first file reference processed for each study.
-
-The STATIC_DISPLAY representation is not created by default because the BIA website only needs one static display per experimental imaging dataset. All interactive images need a thumbnail for the website, so they are created together.
-
-The convention followed for conversion is:
-1. Ensure a BIA Image object and UPLOADED_BY_SUBMITTER representation exist - the `bia-assign-image` subpackage is called to create these if they do not exist
-2. Create an INTERACTIVE_DISPLAY representation
-3. Create a THUMBNAIL representation - an INTERACTIVE_DISPLAY representation is a prerequisite
-4. Create a STATIC_DISPLAY representation if necessary - an INTERACTIVE_DISPLAY representation is a prerequisite
-
-Example cli use:
-```sh
-$ poetry run bia-converter-light convert-image --conversion-details-path
-```
-
-## Updating example image uri for dataset
-```sh
-$ poetry run bia-converter-light update-example-image-uri-for-dataset
-```
-
-
-
-## convert-images dependencies
-
-bioformats2raw see [this](https://github.com/glencoesoftware/bioformats2raw)
-
-As a prerequisitite to installing bioformats2raw (which is documented in the link above) need to install blosc for image file compression
-On Ubuntu (at least): `sudo apt-get install libblosc-dev`
-On mac: `brew install c-blosc`
-
-aws cli see [this](https://aws.amazon.com/cli/)
-
-note: issue on mac that fsherwood had: bioformats2raw may not be able to find blosc, and fails with an error along the lines of 'Exception java.lang.UnsatisfiedLinkError: Unable to load library 'blosc'' even after adding to Djna.library.path. To solve, I created a symlink to the library in a location that was being searched: e.g. in `/opt/homebrew/Cellar/openjdk/23/libexec/openjdk.jdk/Contents/Home/bin` running `ln -s /opt/homebrew/Cellar/c-blosc/1.21.6/lib/libblosc.dylib libblosc.dylib`.
diff --git a/bia-converter-light/bia_converter_light/__init__.py b/bia-converter-light/bia_converter_light/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/bia-converter-light/bia_converter_light/cli.py b/bia-converter-light/bia_converter_light/cli.py
deleted file mode 100644
index 5bbf4445..00000000
--- a/bia-converter-light/bia_converter_light/cli.py
+++ /dev/null
@@ -1,315 +0,0 @@
-from typing import List, Union
-import csv
-from pathlib import Path
-from uuid import UUID
-import typer
-from typing_extensions import Annotated
-
-from bia_shared_datamodels.semantic_models import ImageRepresentationUseType
-from bia_integrator_api.exceptions import NotFoundException
-from bia_integrator_api import PrivateApi
-from bia_assign_image.cli import assign as assign_image
-from bia_converter_light.config import api_client
-from bia_converter_light.utils import save_to_api
-from bia_converter_light.conversion import (
- convert_to_zarr,
- convert_to_png,
-)
-
-from bia_converter_light.propose_utils import (
- write_convertible_file_references_for_accession_id,
-)
-
-import logging
-from rich.logging import RichHandler
-
-from bia_shared_datamodels import uuid_creation
-
-app = typer.Typer()
-
-
-logging.basicConfig(
- level=logging.INFO, format="%(message)s", handlers=[RichHandler(show_time=False)]
-)
-
-# Set default page size for API queries
-DEFAULT_PAGE_SIZE = 10000
-
-logger = logging.getLogger()
-
-representations_app = typer.Typer()
-app.add_typer(
- representations_app,
- name="representations",
- help="Create specified representations",
-)
-
-
-def validate_propose_inputs(
- accession_ids: list[str] = None, accession_ids_path: Path = None
-) -> None:
- """Validate that only one of accession_ids or file_path is provided."""
- if accession_ids and accession_ids_path:
- typer.echo(
- "Error: Provide either a list of accession IDs or a file path, not both.",
- err=True,
- )
- raise typer.Exit(code=1)
- if not accession_ids and not accession_ids_path:
- typer.echo(
- "Error: You must provide either a list of accession IDs or a file path.",
- err=True,
- )
- raise typer.Exit(code=1)
-
-
-def ensure_assigned(
- accession_id: str, image_uuid: str, file_reference_uuid: str
-) -> None:
- """Ensure Image and corresponding UPLOADED_BY_USER representation exist"""
- try:
- api_client.get_image(image_uuid)
- except NotFoundException:
- logger.warning(
- f"Could not find Image with uuid {image_uuid}. Attempting creation"
- )
- assign_image(
- accession_id,
- [
- file_reference_uuid,
- ],
- "api",
- )
-
-
-def get_conversion_details(conversion_details_path: Path) -> List[dict]:
- with conversion_details_path.open("r") as fid:
- field_names = [
- "accession_id",
- "study_uuid",
- "file_path",
- "file_reference_uuid",
- "size_in_bytes",
- "size_human_readable",
- ]
- reader = csv.DictReader(fid, fieldnames=field_names, delimiter="\t")
- # Some files may not have header, so check first row
- first_row = next(reader)
- conversion_details = [row for row in reader]
- if first_row.get("accession_id") != "accession_id":
- conversion_details.insert(0, first_row)
-
- return conversion_details
-
-
-def convert_file_reference_to_image_representation(
- accession_id: str,
- file_reference_uuid: str,
- use_type: ImageRepresentationUseType,
- verbose: bool = False,
-) -> None:
- """Convert file ref to image rep of use type. Upload to s3
-
- Create the actual image for the image representation and stage to S3,
- and persist the image representation in the API.
-
- This function is only temporary whilst the API image conversion
- using the API is being developed
- """
-
- if verbose:
- logger.setLevel(logging.DEBUG)
-
- assert isinstance(
- api_client, PrivateApi
- ), f"Expected valid instance of . Got : {type(api_client)} - are your API credentials valid and/or is the API server online?"
-
- bia_images = api_client.get_image_linking_file_reference(
- file_reference_uuid, page_size=DEFAULT_PAGE_SIZE
- )
- n_bia_images = len(bia_images)
- assert (
- n_bia_images < 2
- ), f"Expected one image to be associated with file reference uuid {file_reference_uuid}. Got {n_bia_images}: {bia_images}. Not sure what to do!!!"
- if n_bia_images == 1:
- bia_image = bia_images[0]
- image_uuid = f"{bia_image.uuid}"
- ensure_assigned(accession_id, image_uuid, file_reference_uuid)
- else:
- image_uuid = uuid_creation.create_image_uuid(
- [
- file_reference_uuid,
- ]
- )
- image_uuid = str(image_uuid)
- ensure_assigned(accession_id, image_uuid, file_reference_uuid)
- bia_image = api_client.get_image(image_uuid)
- file_reference = api_client.get_file_reference(file_reference_uuid)
-
- if use_type == ImageRepresentationUseType.INTERACTIVE_DISPLAY:
- return convert_to_zarr(accession_id, file_reference, bia_image)
- elif use_type in (
- ImageRepresentationUseType.THUMBNAIL,
- ImageRepresentationUseType.STATIC_DISPLAY,
- ):
- return convert_to_png(accession_id, file_reference, bia_image, use_type)
- else:
- logger.warning(
- f"Cannot create/convert images for image representation of type: {use_type.value} - exiting"
- )
- return
-
-
-def update_example_image_uri(
- representation_uuid: Union[UUID, str],
- verbose: bool = False,
-) -> bool:
- # pdb.set_trace()
- try:
- representation = api_client.get_image_representation(representation_uuid)
- except Exception as e:
- # raise(e)
- logger.error(f"Could not retrieve image representation. Error was {e}.")
- return False
- if representation.use_type == ImageRepresentationUseType.STATIC_DISPLAY:
- image = api_client.get_image(representation.representation_of_uuid)
- dataset = api_client.get_dataset(image.submission_dataset_uuid)
- dataset.example_image_uri.append(representation.file_uri[0])
- save_to_api(
- [
- dataset,
- ]
- )
-
- logger.info(
- f"Updated example image uri of dataset {dataset.uuid} to {dataset.example_image_uri}"
- )
- return True
- else:
- logger.warning(
- f"Cannot update dataset example image uri when image representation use type is {representation.use_type.value}"
- )
- return False
-
-
-@app.command()
-def update_example_image_uri_for_dataset(
- representation_uuid: Annotated[
- str,
- typer.Argument(help="UUID for a STATIC_DISPLAY representation of the dataset"),
- ],
- # TODO: Have a 'mode' option to allow replace, prepend or append
- verbose: Annotated[bool, typer.Option("-v")] = False,
-):
- update_example_image_uri(representation_uuid, verbose)
-
-
-@app.command()
-def convert_image(
- accession_ids: Annotated[
- List[str], typer.Option("--accession-ids", "-a", help="Accession ID(s).")
- ] = ["all"],
- conversion_details_path: Annotated[
- Path,
- typer.Option(
- "--conversion-details-path",
- "-c",
- exists=True,
- help="Path to tsv file containing details needed for conversion (produced by 'propose' command).",
- ),
- ] = None,
-):
- """Convert file references to image representations"""
- # The convention is to create
- # i) INTERACTIVE_DISPLAY
- # ii) THUMBNAIL
- # iii) If first image for accession ID STATIC_DISPLAY
- conversion_details = get_conversion_details(conversion_details_path)
- if accession_ids == ["all"]:
- set_accession_ids = {cd["accession_id"] for cd in conversion_details}
- accession_ids = list(set_accession_ids)
- accession_ids.sort()
- else:
- # Filter conversion details for accession IDs to process
- conversion_details_temp = [
- cd for cd in conversion_details if cd["accession_id"] in accession_ids
- ]
- conversion_details = conversion_details_temp
-
- accession_ids_with_static_display = set()
- for conversion_detail in conversion_details:
- accession_id = conversion_detail["accession_id"]
- file_reference_uuid = conversion_detail["file_reference_uuid"]
- for use_type in (
- ImageRepresentationUseType.INTERACTIVE_DISPLAY,
- ImageRepresentationUseType.THUMBNAIL,
- ):
- convert_file_reference_to_image_representation(
- accession_id,
- file_reference_uuid,
- use_type,
- )
- if accession_id not in accession_ids_with_static_display:
- # Get STATIC_DISPLAY
- convert_file_reference_to_image_representation(
- accession_id,
- file_reference_uuid,
- ImageRepresentationUseType.STATIC_DISPLAY,
- )
- accession_ids_with_static_display.add(accession_id)
-
-
-@app.command()
-def propose(
- accession_ids: Annotated[
- List[str], typer.Option("--accession-ids", "-a", help="Accession ID(s).")
- ] = None,
- accession_ids_path: Annotated[
- Path,
- typer.Option(
- "--accession-ids-path",
- "-p",
- exists=True,
- help="Path to a file containing accession IDs one per line.",
- ),
- ] = None,
- max_items: Annotated[int, typer.Option()] = 5,
- output_path: Annotated[Path, typer.Option()] = None,
- append: Annotated[bool, typer.Option("--append/--no-append")] = True,
-):
- """Propose images to convert"""
-
- # TODO: Make this output yaml in form of bia-converter
- # TODO: Write test
-
- # Get accession IDs
- validate_propose_inputs(accession_ids, accession_ids_path)
- if accession_ids_path:
- accession_ids = [a for a in accession_ids_path.read_text().strip().split("\n")]
-
- if not output_path:
- output_path = Path(__file__).parent.parent / "file_references_to_convert.tsv"
- if output_path.exists():
- assert output_path.is_file()
- if not append:
- output_path.unlink()
-
- for accession_id in accession_ids:
- n_lines_written = write_convertible_file_references_for_accession_id(
- accession_id,
- output_path,
- max_items,
- append=True,
- )
- logger.info(
- f"Written {n_lines_written} proposals to {output_path} for {accession_id}"
- )
-
-
-@app.callback()
-def main() -> None:
- return
-
-
-if __name__ == "__main__":
- app()
diff --git a/bia-converter-light/bia_converter_light/config.py b/bia-converter-light/bia_converter_light/config.py
deleted file mode 100644
index caa3cc1a..00000000
--- a/bia-converter-light/bia_converter_light/config.py
+++ /dev/null
@@ -1,60 +0,0 @@
-from pathlib import Path
-import os
-import logging
-
-from pydantic import Field
-from pydantic_settings import BaseSettings, SettingsConfigDict
-
-from bia_integrator_api.util import get_client_private, get_client
-
-logger = logging.getLogger("__main__." + __name__)
-
-default_output_base = (
- f"{Path(os.environ.get('HOME', '')) / '.cache' / 'bia-integrator-data-sm'}"
-)
-
-
-class Settings(BaseSettings):
- model_config = SettingsConfigDict(
- env_file=f"{Path(__file__).parent.parent / '.env'}",
- env_file_encoding="utf-8",
- case_sensitive=False,
- # extra="forbid",
- )
-
- bia_data_dir: str = Field(default_output_base)
- endpoint_url: str = Field("https://uk1s3.embassy.ebi.ac.uk")
- bucket_name: str = Field("bia-integrator-data")
- cache_root_dirpath: Path = Field(Path.home() / ".cache" / "bia-converter")
- bioformats2raw_java_home: str = Field("")
- bioformats2raw_bin: str = Field("")
- bia_api_basepath: str = Field(
- "http://localhost:8080", json_schema_extra={"env": "BIA_API_BASEPATH"}
- )
- bia_api_username: str = Field(
- "test@example.com", json_schema_extra={"env": "BIA_API_USERNAME"}
- )
- bia_api_password: str = Field("test", json_schema_extra={"env": "BIA_API_PASSWORD"})
-
-
-settings = Settings()
-
-try:
- api_client = get_client_private(
- username=settings.bia_api_username,
- password=settings.bia_api_password,
- api_base_url=settings.bia_api_basepath,
- )
-except Exception as e:
- message = f"Could not initialise private api_client: {e}"
- logger.warning(message)
- api_client = None
-
-try:
- read_only_client = get_client(
- api_base_url=settings.bia_api_basepath,
- )
-except Exception as e:
- message = f"Could not initialise public api_client: {e}"
- logger.warning(message)
- api_client = None
diff --git a/bia-converter-light/bia_converter_light/conversion.py b/bia-converter-light/bia_converter_light/conversion.py
deleted file mode 100644
index 91363810..00000000
--- a/bia-converter-light/bia_converter_light/conversion.py
+++ /dev/null
@@ -1,219 +0,0 @@
-# All code in this module originate from bia-converter/bia_converter/io.py
-import logging
-import subprocess
-from uuid import UUID
-from pathlib import Path
-
-
-from bia_converter_light.config import settings, api_client
-from bia_converter_light.io import stage_fileref_and_get_fpath, copy_local_to_s3
-from bia_converter_light import utils
-from bia_shared_datamodels import bia_data_model, semantic_models
-from bia_assign_image import image_representation
-from bia_converter_light.rendering import generate_padded_thumbnail_from_ngff_uri
-from bia_converter_light.utils import save_to_api
-
-logger = logging.getLogger(__name__)
-DEFAULT_PAGE_SIZE = 10000
-
-
-def run_zarr_conversion(input_fpath, output_dirpath):
- """Convert the local file at input_fpath to Zarr format, in a directory specified by
- output_dirpath"""
-
- zarr_cmd = f'export JAVA_HOME={settings.bioformats2raw_java_home} && {settings.bioformats2raw_bin} "{input_fpath}" "{output_dirpath}"'
-
- logger.info(f"Converting with {zarr_cmd}")
-
- retval = subprocess.run(
- zarr_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
- )
- assert (
- retval.returncode == 0
- ), f"Error converting to zarr: {retval.stderr.decode('utf-8')}"
-
-
-def cached_convert_to_zarr_and_get_fpath(representation, input_fpath):
- zarr_fpath = get_local_path_to_zarr(representation.uuid)
- dst_dir_basepath = zarr_fpath.parent
- dst_dir_basepath.mkdir(exist_ok=True, parents=True)
-
- if not zarr_fpath.exists():
- run_zarr_conversion(input_fpath, zarr_fpath)
-
- return zarr_fpath
-
-
-def get_local_path_to_zarr(image_representation_uuid: str | UUID) -> Path:
- return (
- settings.cache_root_dirpath / "zarr" / f"{image_representation_uuid}.ome.zarr"
- )
-
-
-def convert_to_zarr(
- accession_id: str,
- file_reference: bia_data_model.FileReference,
- image: bia_data_model.Image,
-) -> bia_data_model.ImageRepresentation:
- """Create zarr image of file reference"""
-
- local_path_to_uploaded_by_submitter_rep = stage_fileref_and_get_fpath(
- file_reference
- )
-
- # Check if representation already exists -> update. Otherwise, create.
- all_representations_for_image = api_client.get_image_representation_linking_image(
- str(image.uuid), page_size=DEFAULT_PAGE_SIZE
- )
- representations = [
- r
- for r in all_representations_for_image
- if r.use_type == semantic_models.ImageRepresentationUseType.INTERACTIVE_DISPLAY
- ]
- n_representations = len(representations)
- assert (
- n_representations < 2
- ), f"Expected one interactive display to be associated with image {image.uuid}. Got {n_representations}: {representations}. Not sure what to do!!!"
- if n_representations == 1:
- representation = representations[0]
- else:
- representation = image_representation.get_image_representation(
- accession_id,
- [
- file_reference,
- ],
- image,
- semantic_models.ImageRepresentationUseType.INTERACTIVE_DISPLAY,
- )
- local_path_to_zarr = cached_convert_to_zarr_and_get_fpath(
- representation,
- local_path_to_uploaded_by_submitter_rep,
- )
- pixel_metadata = utils.get_ome_zarr_pixel_metadata(str(local_path_to_zarr))
-
- # When converting for SAB in August 2024, some images returned tuples in metadata for XYZCT.
- def _format_pixel_metadata(key):
- value = pixel_metadata.pop(key, None)
- if isinstance(value, tuple):
- value = value[0]
- if isinstance(value, str):
- value = int(value)
- return value
-
- representation.size_x = _format_pixel_metadata("SizeX")
- representation.size_y = _format_pixel_metadata("SizeY")
- representation.size_z = _format_pixel_metadata("SizeZ")
- representation.size_c = _format_pixel_metadata("SizeC")
- representation.size_t = _format_pixel_metadata("SizeT")
-
- attributes_from_ome = {
- "name": "attributes_from_bioformat2raw_conversion",
- "provenance": semantic_models.AttributeProvenance.bia_conversion,
- "value": pixel_metadata,
- }
- representation.attribute.append(
- semantic_models.Attribute.model_validate(attributes_from_ome)
- )
-
- representation.image_format = ".ome.zarr"
- file_uri = copy_local_to_s3(
- local_path_to_zarr,
- utils.create_s3_uri_suffix_for_image_representation(
- accession_id, representation
- ),
- )
- representation.file_uri = [
- file_uri + "/0",
- ]
- save_to_api(
- [
- representation,
- ]
- )
- message = f"Converted uploaded by submitter to ome.zarr and uploaded to S3: {representation.file_uri}"
- logger.info(message)
-
- return representation
-
-
-def convert_to_png(
- accession_id: str,
- file_reference: bia_data_model.FileReference,
- image: bia_data_model.Image,
- use_type: semantic_models.ImageRepresentationUseType,
-) -> bia_data_model.ImageRepresentation:
- """Create png image of file reference"""
-
- # Check for interactive display representation (ome.zarr)
- # This has to exist before we can generate thumbnails/static display
- all_representations_for_image = api_client.get_image_representation_linking_image(
- str(image.uuid), page_size=DEFAULT_PAGE_SIZE
- )
- representations = [
- r
- for r in all_representations_for_image
- if r.use_type == semantic_models.ImageRepresentationUseType.INTERACTIVE_DISPLAY
- ]
- n_representations = len(representations)
- assert (
- n_representations == 1
- ), f"Need exactly one interactive display to be associated with image {image.uuid}. For generation of {use_type.value} representation. Got {n_representations}: {representations}."
- interactive_image_representation = representations[0]
-
- # Check for local path to zarr and use if it exists
- local_path_to_zarr = get_local_path_to_zarr(interactive_image_representation.uuid)
- if local_path_to_zarr.exists():
- source_uri = f"{local_path_to_zarr / '0'}"
- logger.info(
- f"Cached version of required ome.zarr exists locally at {source_uri}. Using this instead of S3 version"
- )
- else:
- source_uri = interactive_image_representation.file_uri[0]
- logger.info(
- f"No cached version of required ome.zarr exists locally. Using {source_uri}"
- )
-
- # create image
- if use_type == semantic_models.ImageRepresentationUseType.THUMBNAIL:
- dims = (256, 256)
- else:
- dims = (512, 512)
-
- representation = image_representation.get_image_representation(
- accession_id,
- [
- file_reference,
- ],
- image,
- use_type,
- )
- created_image = generate_padded_thumbnail_from_ngff_uri(source_uri, dims=dims)
- created_image_path = utils.get_local_path_for_representation(
- representation.uuid, ".png"
- )
- with created_image_path.open("wb") as fh:
- created_image.save(fh)
- logger.info(
- f"Saved {representation.use_type} representation to {created_image_path}"
- )
-
- # upload to s3
- representation.image_format = ".png"
- s3_uri = utils.create_s3_uri_suffix_for_image_representation(
- accession_id, representation
- )
- file_uri = copy_local_to_s3(created_image_path, s3_uri)
-
- # update representation
- representation.file_uri = [
- file_uri,
- ]
- save_to_api(
- [
- representation,
- ]
- )
- message = f"Created {representation.use_type} image and uploaded to S3: {representation.file_uri}"
- logger.info(message)
-
- return representation
diff --git a/bia-converter-light/bia_converter_light/io.py b/bia-converter-light/bia_converter_light/io.py
deleted file mode 100644
index a3a5ee44..00000000
--- a/bia-converter-light/bia_converter_light/io.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# All code in this module originate from bia-converter/bia_converter/io.py
-
-from pathlib import Path
-import urllib
-import logging
-import shutil
-import subprocess
-
-import requests
-
-from .config import settings
-from .utils import get_image_extension
-
-
-logger = logging.getLogger(__name__)
-
-
-def upload_dirpath_as_zarr_image_rep(src_dirpath, accession_id, image_id):
- dst_prefix = f"{settings.bucket_name}/{accession_id}/{image_id}/{image_id}.zarr"
- logger.info(f"Uploading with prefix {dst_prefix}")
- cmd = f'aws --region us-east-1 --endpoint-url {settings.endpoint_url} s3 sync "{src_dirpath}/" s3://{dst_prefix} --acl public-read'
- logger.info(f"Uploading using command {cmd}")
- subprocess.run(cmd, shell=True)
-
- uri = f"{settings.endpoint_url}/{settings.bucket_name}/{accession_id}/{image_id}/{image_id}.zarr"
-
- return uri
-
-
-def copy_uri_to_local(src_uri: str, dst_fpath: Path):
- """Copy the object at the given source URI to the local path specified by dst_fpath."""
-
- logger.info(f"Fetching {src_uri} to {dst_fpath}")
-
- with requests.get(src_uri, stream=True) as r:
- r.raise_for_status()
- with open(dst_fpath, "wb") as fh:
- shutil.copyfileobj(r.raw, fh)
-
-
-def copy_local_to_s3(src_fpath: Path, dst_key: str) -> str:
- """Copy the local file with the given path to the S3 location for which the endpoint
- and bucket are described in the global Config object, and the destination key is
- passed as an argument.
-
- Returns: URI of uploaded object."""
-
- endpoint_url = settings.endpoint_url
- bucket_name = settings.bucket_name
-
- recursive = "--recursive" if src_fpath.is_dir() else ""
- cmd = f"aws --region us-east-1 --endpoint-url {settings.endpoint_url} s3 cp {recursive} {src_fpath} s3://{bucket_name}/{dst_key} --acl public-read"
- logger.info(f"Uploading {src_fpath} to {dst_key}")
- subprocess.run(cmd, shell=True)
-
- return f"{endpoint_url}/{bucket_name}/{dst_key}"
-
-
-def fetch_fileref_to_local(fileref, dst_fpath, max_retries=3):
- # TODO: Clarify if 'format' represents old 'type' e.g. fire_object, file_in_zip etc.
- # if fileref.type == "file_in_zip":
- if fileref.format == "file_in_zip":
- raise NotImplementedError
- else:
- # Ensure uri is encoded
- fileref_uri = urllib.parse.quote(fileref.uri, safe="/:")
- # Check size after download and retry if necessary
- expected_size = (
- requests.header(fileref_uri)["content-length"]
- if fileref.size_in_bytes == 0
- else fileref.size_in_bytes
- )
- for attempt in range(1, max_retries + 1):
- try:
- copy_uri_to_local(fileref_uri, dst_fpath)
- download_size = dst_fpath.stat().st_size
- if download_size == expected_size:
- break
-
- logger.warning(
- f"Download attempt {attempt} did not give expected size. Got {download_size} expected {expected_size}"
- )
- if attempt >= max_retries:
- raise Exception(
- f"{attempt} download attempt(s) did not give expected size. Got {download_size} expected {expected_size}. Maximum retries reached"
- )
- except requests.exceptions.HTTPError as download_error:
- if attempt >= max_retries:
- logger.error(
- f"Download attempt {attempt} resulted in error: {download_error} - exiting"
- )
- raise download_error
-
-
-# ToDo add max_retries as parameter to function definition
-def stage_fileref_and_get_fpath(fileref) -> Path:
- cache_dirpath = settings.cache_root_dirpath / "files"
- cache_dirpath.mkdir(exist_ok=True, parents=True)
-
- # suffix = Path(urlparse(fileref.file_path).path).suffix
- suffix = get_image_extension(fileref.file_path)
- dst_fname = f"{fileref.uuid}{suffix}"
- dst_fpath = cache_dirpath / dst_fname
- logger.info(f"Checking cache for {fileref.file_path}")
-
- if not dst_fpath.exists():
- logger.info(f"File not in cache. Downloading file to {dst_fpath}")
- fetch_fileref_to_local(fileref, dst_fpath)
- elif dst_fpath.stat().st_size != fileref.size_in_bytes:
- # ToDo: As of 04/12/2023 filerefs for type file_in_zip have size_in_bytes=0
- # Need to modify index_from_zips to get filesize info
- logger.info(
- f"File in cache with size {dst_fpath.stat().st_size}. Expected size={fileref.size_in_bytes}. Downloading again to {dst_fpath}"
- )
- fetch_fileref_to_local(fileref, dst_fpath)
- else:
- logger.info(f"File exists at {dst_fpath}")
-
- return dst_fpath
diff --git a/bia-converter-light/bia_converter_light/omezarrmeta.py b/bia-converter-light/bia_converter_light/omezarrmeta.py
deleted file mode 100644
index 4a2aa109..00000000
--- a/bia-converter-light/bia_converter_light/omezarrmeta.py
+++ /dev/null
@@ -1,86 +0,0 @@
-from typing import List, Optional
-
-from pydantic import BaseModel
-
-
-class RDefs(BaseModel):
- defaultT: int
- model: str
- defaultZ: int
-
-
-class Window(BaseModel):
- min: float
- max: float
- start: float
- end: float
-
-
-class Channel(BaseModel):
- color: str
- coefficient: int
- active: bool
- label: str
- window: Window
- family: Optional[str] = None
- inverted: Optional[bool] = None
-
-
-class Omero(BaseModel):
- rdefs: RDefs
- channels: List[Channel]
-
-
-class CoordinateTransformation(BaseModel):
- scale: List[float]
- type: str
-
-
-class DataSet(BaseModel):
- path: str
- coordinateTransformations: Optional[List[CoordinateTransformation]] = None
-
-
-class MSMetadata(BaseModel):
- method: str
- version: str
-
-
-class Axis(BaseModel):
- name: str
- type: str
- unit: Optional[str] = None
-
-
-class MultiScaleImage(BaseModel):
- datasets: List[DataSet]
- metadata: Optional[MSMetadata] = None
- axes: Optional[List[Axis]] = None
- version: str
-
-
-class Column(BaseModel):
- name: str
-
-
-class Row(BaseModel):
- name: str
-
-
-class Well(BaseModel):
- columnIndex: int
- path: str
- rowIndex: int
-
-
-class Plate(BaseModel):
- columns: List[Column]
- rows: List[Row]
- wells: List[Well]
- version: str
-
-
-class ZMeta(BaseModel):
- omero: Optional[Omero] = None
- multiscales: Optional[List[MultiScaleImage]] = []
- plates: Optional[Plate] = None
diff --git a/bia-converter-light/bia_converter_light/propose_utils.py b/bia-converter-light/bia_converter_light/propose_utils.py
deleted file mode 100644
index dae3fe42..00000000
--- a/bia-converter-light/bia_converter_light/propose_utils.py
+++ /dev/null
@@ -1,160 +0,0 @@
-"""Functions to allow proposing images to convert
-
-Propose file references to convert by sorting based on size,
-partitioning into n groups and randomly selecting one
-file reference from each group
-"""
-
-import math
-import random
-from typing import List, Dict
-from pathlib import Path
-from bia_converter_light.config import read_only_client
-from bia_converter_light.utils import in_bioformats_single_file_formats_list
-
-
-def select_indicies(n_indicies: int, n_to_select: int = 5) -> list[int]:
- """Select a number of indicies from input list
-
- Select a number of indicies from input list. Split list into
- n_to_select chunks and randomly select an index from each chunk
- """
-
- # Seed to allow reproducibility on repeated runs.
- # Note: Only applies to selections after 23/12/2024
- random.seed(42)
-
- if n_indicies <= n_to_select:
- return list(range(n_indicies))
-
- min_per_chunk = math.floor(n_indicies / n_to_select)
- remainder = n_indicies % n_to_select
- selected_indicies = []
- stop = -1
- for i in range(n_to_select):
- n_per_chunk = min_per_chunk
- if remainder > 0 and i < remainder:
- n_per_chunk += 1
- start = stop + 1
- stop = start + n_per_chunk - 1
- selected_index = random.randint(start, stop)
- selected_indicies.append(selected_index)
- return selected_indicies
-
-
-def count_lines(file_path):
- with open(file_path, "r") as file:
- return sum(1 for _ in file)
-
-
-def read_specific_line(file_path, line_number):
- with open(file_path, "r") as file:
- for current_line_number, line in enumerate(file, start=0):
- if current_line_number == line_number:
- return line # .strip()
- return None # If the line number is beyond the end of the file
-
-
-def sizeof_fmt(num, suffix="B"):
- for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]:
- if abs(num) < 1024.0:
- return f"{num:3.1f}{unit}{suffix}"
- num /= 1024.0
- return f"{num:.1f}Yi{suffix}"
-
-
-def get_convertible_file_references(accession_id: str) -> List[Dict]:
- """Get details of convertible images for given accession ID"""
-
- # ToDo: Fix this to recursively call using until all data returned
- PAGE_SIZE_DEFAULT = 10000000
-
- study = read_only_client.search_study_by_accession(accession_id)
- if not study:
- return []
- datasets = read_only_client.get_dataset_linking_study(
- study.uuid, page_size=PAGE_SIZE_DEFAULT
- )
- file_references = []
- for dataset in datasets:
- file_references.extend(
- read_only_client.get_file_reference_linking_dataset(
- dataset.uuid, PAGE_SIZE_DEFAULT
- )
- )
-
- convertible_file_references = [
- {
- "accession_id": accession_id,
- "study_uuid": study.uuid,
- "name": fr.file_path,
- "uuid": fr.uuid,
- "size_in_bytes": fr.size_in_bytes,
- "size_human_readable": sizeof_fmt(fr.size_in_bytes),
- }
- for fr in file_references
- if in_bioformats_single_file_formats_list(fr.file_path)
- ]
-
- convertible_file_references = sorted(
- convertible_file_references,
- key=lambda fr: (fr["size_in_bytes"], fr["name"]),
- reverse=True,
- )
- return convertible_file_references
-
-
-def write_convertible_file_references_for_accession_id(
- accession_id: str,
- output_path: Path,
- max_items: int = 5,
- append: bool = True,
-) -> int:
- """
- Write details of file references proposed for conversion to file
- """
-
- convertible_file_references = get_convertible_file_references(accession_id)
-
- n_proposal_candidates = len(convertible_file_references)
- indicies_to_select = select_indicies(n_proposal_candidates, max_items)
-
- if append:
- open_text_mode = "a"
- else:
- open_text_mode = "w"
-
- lines = [
- "\t".join(
- [
- convertible_file_references[i]["accession_id"],
- f"{convertible_file_references[i]['study_uuid']}",
- convertible_file_references[i]["name"],
- f"{convertible_file_references[i]['uuid']}",
- f"{convertible_file_references[i]['size_in_bytes']}",
- convertible_file_references[i]["size_human_readable"],
- ]
- )
- for i in indicies_to_select
- ]
- with output_path.open(open_text_mode) as fid:
- # If we are at start of file write header.
- if fid.tell() == 0:
- fid.writelines(
- "\t".join(
- [
- "accession_id",
- "study_uuid",
- "name",
- "file_reference_uuid",
- "size_in_bytes",
- "size_human_readable",
- ]
- )
- )
- fid.writelines("\n")
- fid.writelines("\n".join(lines))
- # Write a new line so next append starts on next line
- fid.writelines("\n")
-
- return len(indicies_to_select)
diff --git a/bia-converter-light/bia_converter_light/rendering.py b/bia-converter-light/bia_converter_light/rendering.py
deleted file mode 100644
index a21224f6..00000000
--- a/bia-converter-light/bia_converter_light/rendering.py
+++ /dev/null
@@ -1,487 +0,0 @@
-import logging
-from typing import Dict, List, Optional
-
-import zarr
-import numpy as np
-import dask.array as da
-from PIL import Image, ImageOps
-from pydantic import BaseModel
-from microfilm.colorify import multichannel_to_rgb
-from matplotlib.colors import LinearSegmentedColormap
-
-from urllib.parse import urlparse
-import s3fs
-
-# from bia_integrator_tools.utils import get_ome_ngff_rep_by_accession_and_image
-from .omezarrmeta import ZMeta
-
-logger = logging.getLogger("__main__." + __name__)
-
-DEFAULT_COLORS = [[1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 1, 1], [1, 0, 1], [1, 1, 0]]
-
-
-class ChannelRenderingSettings(BaseModel):
- """Rendering settings for a specific channel."""
-
- label: Optional[str] = ""
- colormap_start: List[float] = [0.0, 0.0, 0.0]
- colormap_end: List[float] = None
- window_start: Optional[int] = None
- window_end: Optional[int] = None
-
-
-class RenderingInfo(BaseModel):
- """Rending settings for a whole image."""
-
- channel_renders: List[ChannelRenderingSettings]
- default_z: Optional[int] = None
- default_t: Optional[int] = None
-
-
-class NGFFProxyImage(object):
- """Helper class for working with remove NGFF images to allow us to access
- size properties of that image, and fetch multiscale data with specific
- resolutions."""
-
- def __init__(self, uri):
- self.uri = uri.rstrip("/0")
- self.zgroup = open_zarr_wrapper(self.uri)
- self.array_paths = []
- try:
- self.zgroup.visititems(self._get_array_paths)
- except Exception as e:
- print(
- f"Exception {e} when trying to get array_paths. Setting array_paths to ['0,']"
- )
-
- if len(self.array_paths) == 0:
- self.array_paths = [
- "0",
- ]
-
- self.ngff_metadata = ZMeta.parse_obj(self.zgroup.attrs.asdict())
- self._init_darray()
-
- def _get_array_paths(self, name, obj):
- """Get the paths of groups containing array data"""
- if not obj:
- return None
- if "Array" in obj.__str__():
- self.array_paths.append(name)
- elif len(self.array_paths) > 0:
- # We terminate once we have array paths for a subgroup to
- # prevent recursively traversing groups which may take a while
- # especially when a store has a large number of groups
- return obj
- return None
-
- # @classmethod
- # def from_bia_accession_and_image_ids(cls, accession_id, image_id):
- # ome_ngff_rep = get_ome_ngff_rep_by_accession_and_image(accession_id, image_id)
- # return cls(ome_ngff_rep.uri)
-
- def _init_darray(self):
- self.darray = dask_array_from_ome_ngff_uri(self.uri, self.array_paths[0])
-
- # Try to get axes info from image metadata - if this goes wrong
- # or if image is plate well, fallback to old method
- try:
- axes = self.ngff_metadata.multiscales[0].axes
- if axes is not None and len(axes) > 2:
- size_t, size_c, size_z, size_y, size_x = (1, 1, 1, 1, 1)
- for index, axis in enumerate(axes):
- # The conditional statements below could be avoided
- # using eval(f"size_{axis.name} = self.darray.shape[index]")
- if axis.name == "t":
- size_t = self.darray.shape[index]
- elif axis.name == "c":
- size_c = self.darray.shape[index]
- elif axis.name == "z":
- size_z = self.darray.shape[index]
- elif axis.name == "y":
- size_y = self.darray.shape[index]
- elif axis.name == "x":
- size_x = self.darray.shape[index]
-
- self.size_t = size_t
- self.size_c = size_c
- self.size_z = size_z
- self.size_y = size_y
- self.size_x = size_x
- return
- else:
- raise Exception("NGFF metadata ({axes}) less than 2 entries")
- except Exception as e:
- message = f"Could not get axes info from NGFF. Message was {e}. Falling back to old method"
- logger.warning(message)
-
- # FIXME - this is not a reliable way to determine which dimensions are present in which
- # order, we should be parsing the NGFF metadata to do this
-
- if len(self.darray.shape) == 5:
- size_t, size_c, size_z, size_y, size_x = self.darray.shape
- elif len(self.darray.shape) == 4:
- size_t = 1
- size_c, size_z, size_y, size_x = self.darray.shape
- elif len(self.darray.shape) == 3:
- size_z, size_y, size_x = self.darray.shape
- size_t = 1
- size_c = 1
- elif len(self.darray.shape) == 2:
- size_y, size_x = self.darray.shape
- size_z = 1
- size_t = 1
- size_c = 1
- else:
- raise Exception("Can't handle this array shape")
-
- self.size_t = size_t
- self.size_c = size_c
- self.size_z = size_z
- self.size_y = size_y
- self.size_x = size_x
-
- def get_dask_array_with_min_dimensions(self, dims):
- ydim, xdim = dims
- # path_keys = [dataset.path for dataset in self.ngff_metadata.multiscales[0].datasets]
- path_keys = self.array_paths
-
- for path_key in reversed(path_keys):
- zarr_array = self.zgroup[path_key]
- if len(zarr_array.shape) >= 2:
- size_y, size_x = zarr_array.shape[-2:]
- else:
- raise Exception("Can't handle this array shape")
-
- if (size_y >= ydim) and (size_x >= xdim):
- break
-
- return da.from_zarr(zarr_array)
-
- @property
- def all_sizes(self):
- path_keys = [
- dataset.path for dataset in self.ngff_metadata.multiscales[0].datasets
- ]
-
- for path_key in path_keys:
- zarr_array = self.zgroup[path_key]
- yield zarr_array.shape
-
-
-class BoundingBox2DRel(BaseModel):
- """Bounding box within a plane, described in relative coordniates such that
- 1.0 is the full width/height of the plane image."""
-
- x: float
- y: float
- xsize: float
- ysize: float
-
-
-class BoundingBox2DAbs(BaseModel):
- """Bounding box within a plane, described in absolute coordinates."""
-
- x: int
- y: int
- xsize: int
- ysize: int
-
-
-class PlaneRegionSelection(BaseModel):
- """A 2D rectangular region."""
-
- t: int
- z: int
- c: int
- bb: BoundingBox2DRel
-
-
-class RenderingView(BaseModel):
- """A view of a BIAImage that should provide settings to produce a 2D image.
-
- Used for, e.g., generating thumbnails or example images."""
-
- t: int = 0
- z: int = 0
- region: Optional[PlaneRegionSelection]
-
- channel_rendering: Dict[int, ChannelRenderingSettings]
-
-
-def open_zarr_wrapper(uri):
- """Wrapper using s3fs to open a S3 zarr or normal method for file zarr"""
-
- if uri.startswith("http"):
- uri_parts = urlparse(uri)
- endpoint_url = f"{uri_parts.scheme}://{uri_parts.netloc}"
- s3_bucket = f"s3:/{uri_parts.path}"
- fs = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": endpoint_url})
- return zarr.open(s3fs.S3Map(s3_bucket, s3=fs), mode="r", path=r"/")
- else:
- return zarr.open(uri)
-
-
-def scale_to_uint8(array):
- """Given an input array, convert to uint8, including scaling to fill the
- 0-255 range.
-
- Primarily used to convert general numpy arrays into an image rendering
- suitable dtype."""
-
- scaled = array.astype(np.float32)
-
- if scaled.max() - scaled.min() == 0:
- return np.zeros(array.shape, dtype=np.uint8)
-
- scaled = 255 * (scaled - scaled.min()) / (scaled.max() - scaled.min())
-
- return scaled.astype(np.uint8)
-
-
-def apply_window(array, window_start, window_end):
- """Apply a windowing function to the given array, values above or below
- the window are clipped to the edges, and the range is scaled to the
- window range."""
-
- scaled = (array - window_start) / (window_end - window_start)
- clipped = np.clip(scaled, 0, 1)
-
- return clipped
-
-
-def generate_channel_renderings(n_channels):
- """Generate a list channel renderings for a number of channels."""
-
- threemap_ends = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]
-
- channel_renderings = {
- n: ChannelRenderingSettings(colormap_end=colormap_end)
- for n, colormap_end in enumerate(threemap_ends)
- }
-
- return channel_renderings
-
-
-def dask_array_from_ome_ngff_uri(uri, path_key="0"):
- """Get a dask array from a specific OME-NGFF uri"""
-
- # zgroup = zarr.open(uri)
- # fs = s3fs.S3FileSystem(
- # anon=True, client_kwargs={"endpoint_url": "https://uk1s3.embassy.ebi.ac.uk"}
- # )
- zgroup = open_zarr_wrapper(uri)
- darray = da.from_zarr(zgroup[path_key])
-
- return darray
-
-
-def dask_array_from_ome_ngff_rep(ome_ngff_rep, path_key="0"):
- """Get a dask array from an OME-NGFF image representation."""
-
- zgroup = open_zarr_wrapper(ome_ngff_rep.uri)
- darray = da.from_zarr(zgroup[path_key])
-
- return darray
-
-
-def pad_to_target_dims(im, target_dims, fill=(0, 0, 0)):
- """Given a PIL Image and a set of target dimensions, pad the image so that
- it fits those dimensions."""
-
- w, h = im.size
-
- delta_w = target_dims[0] - w
- delta_h = target_dims[1] - h
-
- padding = (
- delta_w // 2,
- delta_h // 2,
- delta_w - (delta_w // 2),
- delta_h - (delta_h // 2),
- )
- padded_im = ImageOps.expand(im, padding, fill=fill)
-
- return padded_im
-
-
-def select_region_from_dask_array(darray, region):
- """Select a single plane from a Dask array, and compute it."""
-
- if len(darray.shape) >= 2:
- ydim, xdim = darray.shape[-2:]
- else:
- raise Exception("Can't handle this array shape")
-
- # Select region - reduce size if number of elements greater than
- # N_ELEMENTS_MAX, to avoid OOM errors.
- # TODO: Discuss using different pyramid levels with MH
- N_ELEMENTS_MAX = 16384 * 16384
- ymin = int(region.bb.y * ydim)
- ymax = int((region.bb.y + region.bb.ysize) * ydim)
-
- xmin = int(region.bb.x * xdim)
- xmax = int((region.bb.x + region.bb.xsize) * xdim)
- n_elements = (ymax - ymin + 1) * (xmax - xmin + 1)
- to_truncate = 100
- while n_elements > N_ELEMENTS_MAX:
- ymin += to_truncate
- ymax -= to_truncate
-
- xmin += to_truncate
- xmax -= to_truncate
-
- n_elements = (ymax - ymin + 1) * (xmax - xmin + 1)
-
- if len(darray.shape) == 5:
- return darray[region.t, region.c, region.z, ymin:ymax, xmin:xmax].compute()
- elif len(darray.shape) == 4:
- return darray[region.c, region.z, ymin:ymax, xmin:xmax].compute()
- elif len(darray.shape) == 3:
- return darray[region.z, ymin:ymax, xmin:xmax].compute()
- elif len(darray.shape) == 2:
- return darray[ymin:ymax, xmin:xmax].compute()
- else:
- raise Exception("Can't handle this array shape")
-
-
-def render_multiple_2D_arrays(arrays, colormaps):
- """Given a list of 2D arrays and a list of colormaps, apply each colormap
- merge into a single 2D RGB image."""
-
- imarray, _, _, _ = multichannel_to_rgb(arrays, colormaps)
- im = Image.fromarray(scale_to_uint8(imarray))
-
- return im
-
-
-DEFAULT_BB = BoundingBox2DRel(x=0, y=0, xsize=1, ysize=1)
-
-
-def render_proxy_image(
- proxy_im,
- bbrel=DEFAULT_BB,
- dims=(512, 512),
- t=None,
- z=None,
- csettings=None,
- mode=None,
-):
- """In order to render a 2D plane we need to:
-
- 1. Lazy-load the image as a Dask array.
- 2. Select the plane (single t and z values) we'll use.
- 3. Separate channels.
- 4. Apply a color map to each channel array.
- 5. Merge the channel arrays."""
-
- ydim, xdim = dims
-
- min_ydim_needed = ydim / bbrel.ysize
- min_xdim_needed = xdim / bbrel.xsize
-
- darray = proxy_im.get_dask_array_with_min_dimensions(
- (min_xdim_needed, min_ydim_needed)
- )
-
- if not t:
- t = proxy_im.size_t // 2
- if not z:
- z = proxy_im.size_z // 2
-
- channels_to_render = min(proxy_im.size_c, len(DEFAULT_COLORS))
- if not mode:
- if channels_to_render == 1:
- mode = "grayscale"
- elif channels_to_render == 3:
- mode = "RGB"
- else:
- mode = "channels"
-
- if not csettings:
- if mode == "grayscale":
- csettings = {
- n: ChannelRenderingSettings(colormap_end=[1, 1, 1])
- for n in range(channels_to_render)
- }
- else:
- csettings = {
- n: ChannelRenderingSettings(colormap_end=DEFAULT_COLORS[n])
- for n in range(channels_to_render)
- }
-
- region_per_channel = {
- c: PlaneRegionSelection(t=t, z=z, c=c, bb=bbrel)
- for c in range(channels_to_render)
- }
-
- channel_arrays = {
- c: select_region_from_dask_array(darray, region)
- for c, region in region_per_channel.items()
- }
-
- for c, channel_array in channel_arrays.items():
- if csettings[c].window_end:
- windowed_array = apply_window(
- channel_array, csettings[c].window_start, csettings[c].window_end
- )
- channel_arrays[c] = windowed_array
-
- # ToDo: Discuss whether to create global DEFAULT_COLORMAPS constant
- # so we do not call create_linear_cmap_dict too many times...
- # OR use hex values: https://stackoverflow.com/questions/38147997/how-to-change-a-linearsegmentedcolormap-to-a-different-distribution-of-color
- colormaps = {
- c: LinearSegmentedColormap(
- f"n{n}", create_linear_cmap_dict([0, 0, 0], csetting.colormap_end)
- )
- for n, (c, csetting) in enumerate(csettings.items())
- }
-
- im = render_multiple_2D_arrays(channel_arrays.values(), list(colormaps.values()))
-
- return im
-
-
-def generate_padded_thumbnail_from_ngff_uri(
- ngff_uri, dims=(256, 256), autocontrast=True
-):
- """Given a NGFF URI, generate a 2D thumbnail of the given dimensions."""
-
- proxy_im = NGFFProxyImage(str(ngff_uri))
-
- im = render_proxy_image(proxy_im)
- im.thumbnail(dims)
- im_rgb = im.convert("RGB")
-
- if autocontrast:
- cim = ImageOps.autocontrast(im_rgb, (0, 1))
- else:
- cim = im_rgb
-
- padded = pad_to_target_dims(cim, dims)
-
- return padded
-
-
-def create_linear_cmap_dict(
- start_rgb=[
- 0.0,
- 0.0,
- 0.0,
- ],
- end_rgb=[
- 1.0,
- 1.0,
- 1.0,
- ],
-):
- """Return a colormap dict for 'segmentedData' of LinearSegmentedColormap"""
- cdict = {
- "red": None,
- "green": None,
- "blue": None,
- }
- for i, key in enumerate(cdict.keys()):
- cdict[key] = [(0.0, start_rgb[i], start_rgb[i]), (1.0, end_rgb[i], end_rgb[i])]
-
- return cdict
diff --git a/bia-converter-light/bia_converter_light/resources/bioformats_curated_file_formats_readme.txt b/bia-converter-light/bia_converter_light/resources/bioformats_curated_file_formats_readme.txt
deleted file mode 100644
index 2e7dac28..00000000
--- a/bia-converter-light/bia_converter_light/resources/bioformats_curated_file_formats_readme.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Curated files last updated 21/05/2024
-
-Notes on creating the curated files:
-
-1. Copy supported formats from https://bio-formats.readthedocs.io/en/stable/supported-formats.html
-2. Paste into spreadsheet or text editor and get all extensions
-3. Ensure extensions are unique and sorted
-4. Manually curate into 'bioformats_curated_single_file_formats.txt' Which have 1-2-1 conversion with bioformats2raw and bioformats_curated_other_file_formats.txt which require more input for conversion (e.g. pattern files)
-
-The above steps can be accomplished in a browser developer console using the following js snippet (thanks to LA):
-
-[...new Set(Array.from(document.getElementsByTagName("tbody")[0].querySelectorAll("td:nth-child(2)")).map(el => el.innerText.split(",")).flat().filter(extension => extension.length))].sort()
-
-TODO: write python version of js snippet e.g. using selinium
diff --git a/bia-converter-light/bia_converter_light/resources/bioformats_curated_other_file_formats.txt b/bia-converter-light/bia_converter_light/resources/bioformats_curated_other_file_formats.txt
deleted file mode 100644
index d311ed78..00000000
--- a/bia-converter-light/bia_converter_light/resources/bioformats_curated_other_file_formats.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-.afi
-.apl
-.avi
-.c01
-.cfg
-.csv
-.dat
-.db
-.dcm
-.dib
-.dicom
-.dv
-.exp
-.flex
-.h5
-.hdr
-.hed
-.htd
-.html
-.ics
-.ids
-.l2d
-.labels
-.lei
-.mdb
-.mea
-.mov
-.mtb
-.mvd2
-.nd
-.ndpis
-.nhdr
-.nii.gz
-.nrrd
-.obf
-.obsep
-.oib
-.oif
-.ome
-.ome.btf
-.ome.tf2
-.ome.tf8
-.ome.tif
-.ome.tiff
-.ome.xml
-.omp2info
-.par
-.pcoraw
-.pds
-.pic
-.pnl
-.r3d
-.rcpnl
-.res
-.spc
-.stk
-.tnb
-.txt
-.vff
-.vms
-.vsi
-.vws
-.wpi
-.xdce
-.xml
-.xys
diff --git a/bia-converter-light/bia_converter_light/resources/bioformats_curated_single_file_formats.txt b/bia-converter-light/bia_converter_light/resources/bioformats_curated_single_file_formats.txt
deleted file mode 100644
index 4c5918a8..00000000
--- a/bia-converter-light/bia_converter_light/resources/bioformats_curated_single_file_formats.txt
+++ /dev/null
@@ -1,120 +0,0 @@
-.1sc
-.2fl
-.acff
-.afm
-.aim
-.al3d
-.ali
-.am
-.amiramesh
-.arf
-.bif
-.bin
-.bip
-.bmp
-.btf
-.ch5
-.cif
-.cr2
-.crw
-.cxd
-.czi
-.dm2
-.dm3
-.dm4
-.dti
-.eps
-.epsi
-.fdf
-.fff
-.ffr
-.fits
-.fli
-.frm
-.gel
-.gif
-.grey
-.hdf
-.his
-.hx
-.i2i
-.im3
-.img
-.ims
-.inr
-.ipl
-.ipm
-.ipw
-.j2k
-.jp2
-.jpeg
-.jpf
-.jpg
-.jpk
-.jpx
-.klb
-.lif
-.liff
-.lim
-.lms
-.lof
-.lsm
-.map
-.mnc
-.mng
-.mod
-.mrc
-.mrcs
-.mrw
-.msr
-.naf
-.nd2
-.ndpi
-.nef
-.nii
-.oir
-.pbm
-.pcx
-.pgm
-.pict
-.png
-.ppm
-.pr3
-.ps
-.psd
-.qptiff
-.raw
-.rec
-.scn
-.sdt
-.seq
-.sif
-.sld
-.sldy
-.sm2
-.sm3
-.spe
-.spi
-.st
-.stp
-.svs
-.sxm
-.tf2
-.tf8
-.tfr
-.tga
-.tif
-.tiff
-tiff
-.top
-.v
-.wat
-.wav
-.wlz
-.xlef
-.xqd
-.xqf
-.xv
-.zfp
-.zfr
-.zvi
diff --git a/bia-converter-light/bia_converter_light/utils.py b/bia-converter-light/bia_converter_light/utils.py
deleted file mode 100644
index 7e08acbf..00000000
--- a/bia-converter-light/bia_converter_light/utils.py
+++ /dev/null
@@ -1,231 +0,0 @@
-from pydantic import BaseModel
-from uuid import UUID
-from pathlib import Path
-from typing import Dict, List
-import logging
-import xml.etree.ElementTree as ET
-
-from pydantic.alias_generators import to_snake
-
-from bia_integrator_api.exceptions import NotFoundException
-import bia_integrator_api.models as api_models
-from bia_shared_datamodels import bia_data_model
-from bia_converter_light.config import settings, api_client
-
-logger = logging.getLogger("__main__." + __name__)
-
-
-def get_total_zarr_size(zarr_path: str) -> int:
- """Return size of zarr archive in bytes"""
-
- # Assume the zarr store is a local disk
- # TODO: Generalise for any uri (including file:// and s3://)
- # TODO: so the argument name for this func should be 'zarr_uri'
- zarr_path = Path(zarr_path)
- return (
- sum(f.stat().st_size for f in zarr_path.rglob("*")) + zarr_path.stat().st_size
- )
-
-
-single_file_formats_path = (
- Path(__file__).parent / "resources" / "bioformats_curated_single_file_formats.txt"
-)
-single_file_formats = [
- s for s in single_file_formats_path.read_text().split("\n") if len(s) > 0
-]
-
-
-def extension_in_bioformats_single_file_formats_list(ext: str) -> bool:
- if len(ext) > 1 and not ext[0] == ".":
- ext = "." + ext
- return ext in single_file_formats
-
-
-def in_bioformats_single_file_formats_list(file_location: [Path | str]) -> bool:
- """Check if ext of path/uri/name of file in bioformats single file formats list"""
- ext = get_image_extension(f"{file_location}")
- return extension_in_bioformats_single_file_formats_list(ext)
-
-
-def get_ome_zarr_pixel_metadata(zarr_location: str) -> dict:
- """Return pixel metadata entry of METADATA.ome.xml of bioformats2raw zarr"""
-
- # This function assumes the zarr has been created at specified location
- # on disk and was produced by bioformats2raw -> OME/METADATA.ome.xml
- # exists
- #
- # TODO: handle general uris e.g. https://, s3:// or file://
- metadata_path = Path(zarr_location) / "OME" / "METADATA.ome.xml"
- if metadata_path.is_file():
- metadata = parse_xml_string(metadata_path.read_text())
- try:
- image_metadata = metadata[
- "{http://www.openmicroscopy.org/Schemas/OME/2016-06}OME"
- ][r"{http://www.openmicroscopy.org/Schemas/OME/2016-06}Image"]
-
- # Multichannel images may have a list - use first element
- # TODO: Discuss with team what to do in this case
- if isinstance(image_metadata, list):
- image_metadata = image_metadata[0]
-
- pixel_metadata = image_metadata[
- "{http://www.openmicroscopy.org/Schemas/OME/2016-06}Pixels"
- ]
- except KeyError:
- pixel_metadata = {}
- else:
- pixel_metadata = {}
-
- return pixel_metadata
-
-
-# TODO: discuss replacing this function with something from either
-# ome_zarr or ome_zarr_metadata
-def parse_xml_string(xml_string: str) -> dict:
- """
- Parse an XML string and convert it into a dictionary.
-
- This is intended to be used for OME/METADATA.ome.xml created
- by bioformats2raw.
- """
-
- def _xml_to_dict(element: ET) -> dict:
- """
- Convert XML element and children to a dict, including attributes.
- """
- # Initialize the dictionary to store the element's data
- result = {}
-
- # Include attributes in the result if they exist
- if element.attrib:
- result.update({k: v for k, v in element.attrib.items()})
-
- # If the element has no children, return its text or result if there are attributes
- if len(element) == 0:
- return element.text if not result else result
-
- # Iterate over the children of the element
- for child in element:
- child_dict = _xml_to_dict(child)
-
- # Handle duplicate tags by storing them as a list
- if child.tag in result:
- if isinstance(result[child.tag], list):
- result[child.tag].append(child_dict)
- else:
- result[child.tag] = [result[child.tag], child_dict]
- else:
- result[child.tag] = child_dict
-
- return result
-
- # Parse the XML string into an ElementTree
- root = ET.fromstring(xml_string)
-
- # Convert the ElementTree into a dictionary
- return {root.tag: _xml_to_dict(root)}
-
-
-def create_s3_uri_suffix_for_image_representation(
- accession_id: str, representation: bia_data_model.ImageRepresentation
-) -> str:
- """Create the part of the s3 uri that goes after the bucket name for an image representation"""
-
- assert representation.image_format and len(representation.image_format) > 0
- assert isinstance(representation.representation_of_uuid, UUID) or isinstance(
- UUID(representation.representation_of_uuid), UUID
- )
- return f"{accession_id}/{representation.representation_of_uuid}/{representation.uuid}{representation.image_format}"
-
-
-def get_local_path_for_representation(uuid: [str | UUID], image_format: str) -> Path:
- """Return path to local cache for this image representation"""
-
- if not image_format.startswith("."):
- image_format = f".{image_format}"
- cache_dirpath = settings.cache_root_dirpath / "other_converted_images"
- cache_dirpath.mkdir(exist_ok=True, parents=True)
- return cache_dirpath / f"{uuid}{image_format}"
-
-
-def get_image_extension(file_path: str) -> str:
- """Return standardized image extension for a given file path."""
-
- # Process files with multi suffix extensions
- multi_suffix_ext = {
- ".ome.zarr.zip": ".ome.zarr.zip",
- ".zarr.zip": ".zarr.zip",
- ".ome.zarr": ".ome.zarr",
- ".ome.tiff": ".ome.tiff",
- ".ome.tif": ".ome.tiff",
- ".tar.gz": ".tar.gz",
- }
-
- for ext, mapped_value in multi_suffix_ext.items():
- if file_path.lower().endswith(ext):
- return mapped_value
-
- # Standardise extensions expressed using different suffixes
- ext_map = {
- ".jpeg": ".jpg",
- ".tif": ".tiff",
- }
-
- ext = Path(file_path).suffix.lower()
- if ext in ext_map:
- return ext_map[ext]
- else:
- return ext
-
-
-def merge_dicts(dict_list: List[Dict[str, str]]) -> Dict:
- """Merge list of dicts to one dict. Values for repeated keys are put into lists
-
- Assumes all input dict values are strings as in function type hint
- """
-
- if not dict_list:
- return {}
-
- merged_dict = dict_list[0]
-
- for dictionary in dict_list[1:]:
- for key, value in dictionary.items():
- # If the key already exists in the merged dictionary
- if key in merged_dict:
- # If it's not already a list, convert the current value to a list
- if not isinstance(merged_dict[key], list):
- merged_dict[key] = [merged_dict[key]]
- # Append the new value to the list
- merged_dict[key].append(value)
- else:
- # If the key does not exist, add it to the merged dictionary
- merged_dict[key] = value
-
- return merged_dict
-
-
-def save_to_api(object_list: List[BaseModel]) -> None:
- """Convert bia_data_model to bia_integrator_api.model and persist to API"""
-
- for obj in object_list:
- api_obj = getattr(api_models, obj.model.type_name).model_validate_json(
- obj.model_dump_json()
- )
- # First try to retrieve object
- try:
- api_get_method = f"get_{to_snake(obj.model.type_name)}"
- api_copy_of_obj = getattr(api_client, api_get_method)(api_obj.uuid)
- except NotFoundException:
- api_copy_of_obj = None
-
- if api_obj == api_copy_of_obj:
- message = f"Not writing object with uuid: {obj.uuid} and type: {obj.model.type_name} to API because an identical copy of object exists in API"
- logger.warning(message)
- continue
- elif api_copy_of_obj:
- api_obj.version = api_copy_of_obj.version + 1
-
- api_creation_method = f"post_{to_snake(obj.model.type_name)}"
- getattr(api_client, api_creation_method)(api_obj)
- logger.debug(f"persisted {obj.uuid} of type {obj.model.type_name} to API")
diff --git a/bia-converter-light/file_references_to_convert.tsv b/bia-converter-light/file_references_to_convert.tsv
deleted file mode 100644
index 7184086d..00000000
--- a/bia-converter-light/file_references_to_convert.tsv
+++ /dev/null
@@ -1,16 +0,0 @@
-accession_id study_uuid name file_reference_uuid size_in_bytes size_human_readable
-S-BIAD1444 cfd7d1f1-d215-4311-9d92-ac77b18e73f6 WT/WT-028.tif b3a888d4-d20c-4ea8-bdee-423a6727c957 522428 510.2KiB
-S-BIAD1444 cfd7d1f1-d215-4311-9d92-ac77b18e73f6 c8-1_set1/c8-1_set1-018.tif 14c72b7e-e05c-4e68-9d30-605e6a62e02d 515312 503.2KiB
-S-BIAD1444 cfd7d1f1-d215-4311-9d92-ac77b18e73f6 c8-1_set1/c8-1_set1-010.tif c178922c-8525-4922-aa2e-90dfcf84e30a 473364 462.3KiB
-S-BIAD1444 cfd7d1f1-d215-4311-9d92-ac77b18e73f6 c8-1_set2/c8-1_set2-026.tif 01f912ae-2919-44c4-bec5-b9c2222a2feb 428224 418.2KiB
-S-BIAD1444 cfd7d1f1-d215-4311-9d92-ac77b18e73f6 WT/WT-008.tif b35cc0d0-6560-4cc8-bff2-c0c7a58693a0 406184 396.7KiB
-S-BIAD1266 a1d3488b-a325-4992-911a-2404fb7cd390 Fig.5A-C_N2_miRCon_Sham_n10_t30 min.Project Maximum Z_XYc561.tif 8b307eb1-5b61-4eb5-90c1-454c3bb122ec 524925 512.6KiB
-S-BIAD1266 a1d3488b-a325-4992-911a-2404fb7cd390 Fig.7D-F_N2_CsA_Sham_n9_t20 min.Project Maximum Z_XYc640.tif 26dcc756-1ad5-4b60-a6b5-cd289880ad65 524923 512.6KiB
-S-BIAD1266 a1d3488b-a325-4992-911a-2404fb7cd390 Fig.5A-C_N2_miRCon_Sham_n8_t10 min.Project Maximum Z_XYc488.tif 15ad9853-4813-430e-903b-8e187a75688e 524923 512.6KiB
-S-BIAD1266 a1d3488b-a325-4992-911a-2404fb7cd390 Fig.5A-C_N1_miR153_iLTP_n1_t30 min.Project Maximum Z_XYc488.tif 49fbfaf4-ed84-4d98-a876-799c7efb3cd3 524923 512.6KiB
-S-BIAD1266 a1d3488b-a325-4992-911a-2404fb7cd390 Fig.2A-C_N3_miRCon_n40.Project Maximum Z_XYc488.tif aa51e605-e4ac-43a7-9ace-25c2694e1d42 524917 512.6KiB
-S-BIAD1285 55ecc13b-5896-434f-893e-88d992f21648 SR1482_WSIs/SR1482_40X_HE_T333_02.czi f7d4f019-e738-42f9-8687-991b73e71218 6589973632 6.1GiB
-S-BIAD1285 55ecc13b-5896-434f-893e-88d992f21648 SR1482_WSIs/SR1482_40X_HE_T120_02.czi a57ec624-4dd7-4130-8911-d2777ae27852 5860090976 5.5GiB
-S-BIAD1285 55ecc13b-5896-434f-893e-88d992f21648 SR386_WSIs/SR386_40X_HE_T211_01.czi 8e436a59-d7a0-4dce-9319-8b96497854b2 4680591136 4.4GiB
-S-BIAD1285 55ecc13b-5896-434f-893e-88d992f21648 SR386_WSIs/SR386_40X_HE_T081_01.czi ff408246-ce37-4204-9418-7484c61dd534 2315012512 2.2GiB
-S-BIAD1285 55ecc13b-5896-434f-893e-88d992f21648 SR1482_WSIs/SR1482_40X_HE_T276_01.czi 9c1b3a3d-7f21-472c-96b8-93d231813564 961088224 916.6MiB
diff --git a/bia-converter-light/pyproject.toml b/bia-converter-light/pyproject.toml
deleted file mode 100644
index ce6bad62..00000000
--- a/bia-converter-light/pyproject.toml
+++ /dev/null
@@ -1,40 +0,0 @@
-[tool.poetry]
-name = "bia-converter-light"
-version = "0.1.0"
-description = "BIA simple approach to creation of images associated with image representations"
-authors = ["Kola Babalola "]
-license = "Apache Software License 2.0"
-readme = "README.md"
-packages = [{include = "bia_converter_light"}]
-
-[tool.poetry.dependencies]
-python = "^3.10,<3.12"
-requests = "^2.31.0"
-pytest = "^7.0"
-bia-shared-datamodels = { path = "../bia-shared-datamodels", develop = true }
-bia-integrator-api = { path = "../clients/python", develop = true }
-bia-ingest = { path = "../bia-ingest", develop = true }
-bia-test-data = { path = "../bia-test-data", develop = true }
-bia-assign-image = { path = "../bia-assign-image", develop = true }
-typer = "^0.12.3"
-typing-extensions = "^4.12.2"
-pydantic-settings = "^2.3.4"
-
-# Dependencies for image conversion and upload to Embassy S3
-zarr = "^2.18.3"
-dask = "^2024.8.2"
-microfilm = "^0.2.1"
-setuptools = "^74.1.2"
-fsspec = "^2024.9.0"
-s3fs = "^2024.9.0"
-
-[tool.poetry.scripts]
-bia-converter-light = "bia_converter_light.cli:app"
-
-
-[tool.poetry.group.dev.dependencies]
-ipython = "^8.22.1"
-
-[build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"
diff --git a/bia-converter-light/scripts/print_details_of_convertible_images.py b/bia-converter-light/scripts/print_details_of_convertible_images.py
deleted file mode 100644
index 0bc32550..00000000
--- a/bia-converter-light/scripts/print_details_of_convertible_images.py
+++ /dev/null
@@ -1,70 +0,0 @@
-"""Ad hoc script to select images to convert based on size"""
-
-import typer
-from bia_converter_light.config import api_client
-from bia_converter_light.utils import in_bioformats_single_file_formats_list
-
-PAGE_SIZE_DEFAULT = 10000000
-
-
-def sizeof_fmt(num, suffix="B"):
- for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]:
- if abs(num) < 1024.0:
- return f"{num:3.1f}{unit}{suffix}"
- num /= 1024.0
- return f"{num:.1f}Yi{suffix}"
-
-
-app = typer.Typer()
-
-
-def get_details_of_images_that_can_be_converted(accession_id: str):
- study = api_client.search_study_by_accession(accession_id)
- assert study
- datasets = api_client.get_dataset_linking_study(
- study.uuid, page_size=PAGE_SIZE_DEFAULT
- )
- file_references = []
- for dataset in datasets:
- file_references.extend(
- api_client.get_file_reference_linking_dataset(
- dataset.uuid, PAGE_SIZE_DEFAULT
- )
- )
-
- convertible_file_references = [
- {
- "accession_id": accession_id,
- "study_uuid": study.uuid,
- "name": fr.file_path,
- "uuid": fr.uuid,
- "size_in_bytes": fr.size_in_bytes,
- "size_human_readable": sizeof_fmt(fr.size_in_bytes),
- }
- for fr in file_references
- if in_bioformats_single_file_formats_list(fr.file_path)
- ]
-
- convertible_file_references = sorted(
- convertible_file_references, key=lambda fr: fr["size_in_bytes"], reverse=True
- )
- return convertible_file_references
-
-
-@app.command()
-def print_details_of_convertible_images(accession_id: str):
- """
- Print details of images that can be converted.
- """
-
- convertible_file_references = get_details_of_images_that_can_be_converted(
- accession_id
- )
- for cfr in convertible_file_references:
- print(
- f"{cfr['accession_id']}\t{cfr['study_uuid']}\t{cfr['name']}\t{cfr['uuid']}\t{cfr['size_in_bytes']}\t{cfr['size_human_readable']}"
- )
-
-
-if __name__ == "__main__":
- app()
diff --git a/bia-converter-light/test/data/test_files_for_study_component_2/im06.png b/bia-converter-light/test/data/test_files_for_study_component_2/im06.png
deleted file mode 100644
index fd76f6cf..00000000
Binary files a/bia-converter-light/test/data/test_files_for_study_component_2/im06.png and /dev/null differ
diff --git a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/.zattrs b/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/.zattrs
deleted file mode 100644
index 80da91b3..00000000
--- a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/.zattrs
+++ /dev/null
@@ -1,3 +0,0 @@
-{
- "bioformats2raw.layout" : 3
-}
diff --git a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/.zgroup b/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/.zgroup
deleted file mode 100644
index 03087769..00000000
--- a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/.zgroup
+++ /dev/null
@@ -1,3 +0,0 @@
-{
- "zarr_format" : 2
-}
diff --git a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/.zattrs b/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/.zattrs
deleted file mode 100644
index e5c3d03c..00000000
--- a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/.zattrs
+++ /dev/null
@@ -1,80 +0,0 @@
-{
- "multiscales" : [ {
- "metadata" : {
- "method" : "loci.common.image.SimpleImageScaler",
- "version" : "Bio-Formats 6.13.0"
- },
- "axes" : [ {
- "name" : "t",
- "type" : "time"
- }, {
- "name" : "c",
- "type" : "channel"
- }, {
- "name" : "z",
- "type" : "space"
- }, {
- "name" : "y",
- "type" : "space"
- }, {
- "name" : "x",
- "type" : "space"
- } ],
- "name" : "im06.png",
- "datasets" : [ {
- "path" : "0",
- "coordinateTransformations" : [ {
- "scale" : [ 1.0, 1.0, 1.0, 1.0, 1.0 ],
- "type" : "scale"
- } ]
- } ],
- "version" : "0.4"
- } ],
- "omero" : {
- "channels" : [ {
- "color" : "FF0000",
- "coefficient" : 1,
- "active" : true,
- "label" : "Channel 0",
- "window" : {
- "min" : 255.0,
- "max" : 255.0,
- "start" : 255.0,
- "end" : 255.0
- },
- "family" : "linear",
- "inverted" : false
- }, {
- "color" : "00FF00",
- "coefficient" : 1,
- "active" : true,
- "label" : "Channel 1",
- "window" : {
- "min" : 255.0,
- "max" : 255.0,
- "start" : 255.0,
- "end" : 255.0
- },
- "family" : "linear",
- "inverted" : false
- }, {
- "color" : "0000FF",
- "coefficient" : 1,
- "active" : true,
- "label" : "Channel 2",
- "window" : {
- "min" : 255.0,
- "max" : 255.0,
- "start" : 255.0,
- "end" : 255.0
- },
- "family" : "linear",
- "inverted" : false
- } ],
- "rdefs" : {
- "defaultT" : 0,
- "model" : "color",
- "defaultZ" : 0
- }
- }
-}
diff --git a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/.zgroup b/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/.zgroup
deleted file mode 100644
index 03087769..00000000
--- a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/.zgroup
+++ /dev/null
@@ -1,3 +0,0 @@
-{
- "zarr_format" : 2
-}
diff --git a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/.zarray b/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/.zarray
deleted file mode 100644
index e03ace41..00000000
--- a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/.zarray
+++ /dev/null
@@ -1,17 +0,0 @@
-{
- "chunks" : [ 1, 1, 1, 80, 100 ],
- "compressor" : {
- "clevel" : 5,
- "blocksize" : 0,
- "shuffle" : 1,
- "cname" : "lz4",
- "id" : "blosc"
- },
- "dtype" : "|u1",
- "fill_value" : 0,
- "filters" : null,
- "order" : "C",
- "shape" : [ 1, 3, 1, 80, 100 ],
- "dimension_separator" : "/",
- "zarr_format" : 2
-}
diff --git a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/0/0/0/0/0 b/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/0/0/0/0/0
deleted file mode 100644
index 9b845ff5..00000000
Binary files a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/0/0/0/0/0 and /dev/null differ
diff --git a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/0/1/0/0/0 b/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/0/1/0/0/0
deleted file mode 100644
index 9b845ff5..00000000
Binary files a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/0/1/0/0/0 and /dev/null differ
diff --git a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/0/2/0/0/0 b/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/0/2/0/0/0
deleted file mode 100644
index 9b845ff5..00000000
Binary files a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/0/2/0/0/0 and /dev/null differ
diff --git a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/OME/.zattrs b/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/OME/.zattrs
deleted file mode 100644
index 6837bf36..00000000
--- a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/OME/.zattrs
+++ /dev/null
@@ -1,3 +0,0 @@
-{
- "series" : [ "0" ]
-}
diff --git a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/OME/.zgroup b/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/OME/.zgroup
deleted file mode 100644
index 03087769..00000000
--- a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/OME/.zgroup
+++ /dev/null
@@ -1,3 +0,0 @@
-{
- "zarr_format" : 2
-}
diff --git a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/OME/METADATA.ome.xml b/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/OME/METADATA.ome.xml
deleted file mode 100644
index 5fb45870..00000000
--- a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/OME/METADATA.ome.xml
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/bia-converter-light/test/test_cli.py b/bia-converter-light/test/test_cli.py
deleted file mode 100644
index 229187e4..00000000
--- a/bia-converter-light/test/test_cli.py
+++ /dev/null
@@ -1,340 +0,0 @@
-from pathlib import Path
-import shutil
-import pytest
-from typer.testing import CliRunner
-from unittest.mock import MagicMock
-
-from bia_integrator_api import PrivateApi
-from bia_shared_datamodels import bia_data_model
-from bia_test_data.mock_objects.mock_object_constants import accession_id
-from bia_test_data.mock_objects import (
- mock_image_representation,
- mock_image,
- mock_dataset,
- mock_file_reference,
-)
-from bia_ingest.persistence_strategy import (
- persistence_strategy_factory,
- PersistenceStrategy,
-)
-from bia_converter_light import conversion
-from bia_converter_light import cli, utils
-from bia_converter_light.config import settings
-
-
-@pytest.fixture
-def runner() -> CliRunner:
- return CliRunner(mix_stderr=False)
-
-
-@pytest.fixture
-def output_dir_base(tmpdir, monkeypatch):
- odb = Path(tmpdir)
-
- monkeypatch.setattr(
- settings,
- "cache_root_dirpath",
- odb / ".cache",
- )
-
- # Copy file to be convertered to cache so it does not need to be downloaded
- file_reference = mock_file_reference.get_file_reference()[0]
- src_path = (
- Path(__file__).parent / "data" / "test_files_for_study_component_2" / "im06.png"
- )
- dest_dir = odb / ".cache" / "files"
- if not dest_dir.is_dir():
- dest_dir.mkdir(parents=True)
- dest_path = dest_dir / f"{file_reference.uuid}.png"
- shutil.copy(src_path, dest_path)
-
- return odb
-
-
-@pytest.fixture
-def persister(output_dir_base) -> PersistenceStrategy:
- persister = persistence_strategy_factory(
- persistence_mode="disk",
- output_dir_base=str(output_dir_base),
- accession_id=accession_id,
- )
- return persister
-
-
-@pytest.fixture
-def dataset(persister) -> bia_data_model.Dataset:
- ds = mock_dataset.get_dataset()[1]
- persister.persist(
- [
- ds,
- ]
- )
-
- return ds
-
-
-@pytest.fixture
-def image(persister) -> bia_data_model.Image:
- im = mock_image.get_image_with_one_file_reference()
- persister.persist(
- [
- im,
- ]
- )
-
- return im
-
-
-@pytest.fixture
-def file_reference(persister) -> bia_data_model.FileReference:
- file_ref = mock_file_reference.get_file_reference()[0]
- persister.persist(
- [
- file_ref,
- ]
- )
-
- return file_ref
-
-
-@pytest.fixture
-def conversion_details_path(output_dir_base, dataset, file_reference) -> Path:
- """Write tsv files with details of file references to convert"""
-
- path_to_conversion_details = output_dir_base / "file_references_to_convert.tsv"
- study_uuid = f"{dataset.submitted_in_study_uuid}"
- # Get details of file references in study component 2 of mock study
- size_human_readable = f"{file_reference.size_in_bytes}B"
- conversion_details = "\t".join(
- [
- "accession_id",
- "study_uuid",
- "name",
- "file_reference_uuid",
- "size_in_bytes",
- "size_human_readable",
- ]
- )
- conversion_details += "\n"
- conversion_details += "\t".join(
- [
- accession_id,
- study_uuid,
- file_reference.file_path,
- f"{file_reference.uuid}",
- f"{file_reference.size_in_bytes}",
- size_human_readable,
- ]
- )
- path_to_conversion_details.write_text(conversion_details)
- return path_to_conversion_details
-
-
-@pytest.fixture
-def mock_api_client(monkeypatch, persister, output_dir_base):
- """Mock api_client functions for getting and saving model objects used in test"""
-
- def mock_get_image(uuid):
- return persister.fetch_by_uuid(
- [
- uuid,
- ],
- bia_data_model.Image,
- )[0]
-
- def mock_get_image_representation(uuid):
- return persister.fetch_by_uuid(
- [
- uuid,
- ],
- bia_data_model.ImageRepresentation,
- )[0]
-
- def mock_get_dataset(uuid):
- return persister.fetch_by_uuid(
- [
- uuid,
- ],
- bia_data_model.Dataset,
- )[0]
-
- def mock_get_file_reference(uuid):
- return persister.fetch_by_uuid(
- [
- uuid,
- ],
- bia_data_model.FileReference,
- )[0]
-
- def mock_get_image_representation_linking_image(image_uuid, page_size):
- image_representations_path = output_dir_base / "image_representation"
- representation_uuids = [
- p.stem for p in image_representations_path.rglob("*/*.json")
- ]
- all_image_representations = persister.fetch_by_uuid(
- representation_uuids, bia_data_model.ImageRepresentation
- )
-
- return [
- r
- for r in all_image_representations
- if f"{r.representation_of_uuid}" == f"{image_uuid}"
- ]
-
- def mock_post_object(obj):
- persister.persist(
- [
- obj,
- ]
- )
-
- mock_api_client_object = MagicMock()
- mock_api_client_object.get_image_representation = mock_get_image_representation
- mock_api_client_object.get_dataset = mock_get_dataset
- mock_api_client_object.get_file_reference = mock_get_file_reference
- mock_api_client_object.get_image = mock_get_image
- mock_api_client_object.get_image_representation_linking_image = (
- mock_get_image_representation_linking_image
- )
- mock_api_client_object.post_dataset = mock_post_object
- mock_api_client_object.post_image_representation = mock_post_object
- mock_api_client_object.__class__ = PrivateApi
- monkeypatch.setattr(
- conversion,
- "api_client",
- mock_api_client_object,
- )
- monkeypatch.setattr(
- cli,
- "api_client",
- mock_api_client_object,
- )
- monkeypatch.setattr(
- utils,
- "api_client",
- mock_api_client_object,
- )
- return mock_api_client_object
-
-
-@pytest.fixture
-def mock_copy_local_to_s3(monkeypatch):
- """Return s3 url without actual copy to s3"""
-
- def _mock_copy_local_to_s3(src_fpath, dst_key):
- endpoint_url = settings.endpoint_url
- # bucket_name = settings.bucket_name
- bucket_name = "test-bucket"
-
- return f"{endpoint_url}/{bucket_name}/{dst_key}"
-
- monkeypatch.setattr(
- conversion,
- "copy_local_to_s3",
- _mock_copy_local_to_s3,
- )
-
-
-def test_cli_convert_image(
- runner,
- output_dir_base,
- conversion_details_path,
- mock_api_client,
- image,
- file_reference,
- mock_copy_local_to_s3,
- persister,
-):
- image_representation = (
- mock_image_representation.get_image_representation_of_interactive_display()
- )
- persister.persist(
- [
- image_representation,
- ]
- )
-
- result = runner.invoke(
- cli.app,
- [
- "convert-image",
- # "--accession-ids",
- # accession_id,
- "--conversion-details-path",
- conversion_details_path,
- ],
- catch_exceptions=False,
- )
-
- assert result.exit_code == 0
-
- # Check all zarr and pngs were created
- # TODO: Compare all output files vs expected?
- for use_type in (
- "thumbnail",
- "static_display",
- "interactive_display",
- ):
- func = f"get_image_representation_of_{use_type}"
- representation = getattr(mock_image_representation, func)()
-
- if use_type == "interactive_display":
- expected_path = (
- output_dir_base / ".cache" / "zarr" / f"{representation.uuid}.ome.zarr"
- )
- assert expected_path.is_dir(), f"Did not find expected dir for {use_type}"
- else:
- expected_path = (
- output_dir_base
- / ".cache"
- / "other_converted_images"
- / f"{representation.uuid}.png"
- )
- assert expected_path.is_file(), f"Did not find expected file for {use_type}"
-
- # TODO: Check the file_uris of the created image representations properly
- created_representation = persister.fetch_by_uuid(
- [representation.uuid],
- bia_data_model.ImageRepresentation,
- )[0]
- assert created_representation.file_uri[0].startswith("http")
- assert f"{representation.uuid}" in created_representation.file_uri[0]
-
-
-def test_cli_update_example_image_uri_for_dataset(
- runner, mock_api_client, output_dir_base, persister, dataset
-):
- image_representation = (
- mock_image_representation.get_image_representation_of_static_display()
- )
- persister.persist(
- [
- image_representation,
- ]
- )
-
- bia_image = mock_image.get_image_with_one_file_reference()
- persister.persist(
- [
- bia_image,
- ]
- )
-
- result = runner.invoke(
- cli.app,
- [
- "update-example-image-uri-for-dataset",
- str(image_representation.uuid),
- ],
- )
-
- assert result.exit_code == 0
- # cli.update_example_image_uri_for_dataset(image_representation.uuid)
- modified_dataset = persister.fetch_by_uuid(
- [
- dataset.uuid,
- ],
- bia_data_model.Dataset,
- )[0]
- assert dataset.example_image_uri == []
- assert modified_dataset.example_image_uri == image_representation.file_uri
diff --git a/bia-converter-light/test/test_conversion_util_funcs.py b/bia-converter-light/test/test_conversion_util_funcs.py
deleted file mode 100644
index ed378f7d..00000000
--- a/bia-converter-light/test/test_conversion_util_funcs.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from bia_converter_light import utils
-
-
-def test_merge_dicts():
- dict_list = [
- {"key1": "value1", "key2": "value2"},
- {"key1": "value3", "key4": "value4"},
- ]
-
- assert utils.merge_dicts(dict_list) == {
- "key1": ["value1", "value3"],
- "key2": "value2",
- "key4": "value4",
- }
diff --git a/bia-converter-light/test/test_create_thumbnails.py b/bia-converter-light/test/test_create_thumbnails.py
deleted file mode 100644
index 55e4f1ce..00000000
--- a/bia-converter-light/test/test_create_thumbnails.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from pathlib import Path
-from bia_converter_light.rendering import generate_padded_thumbnail_from_ngff_uri
-
-
-def test_generate_padded_thumbnail_from_ngff_uri():
- """Test function runs without errors. NOT that correct values produced"""
-
- local_path_to_zarr = (
- Path(__file__).parent
- / "data"
- / "test_image_representations"
- / "study_component1"
- / "im06.ome.zarr"
- )
- thumbnail = generate_padded_thumbnail_from_ngff_uri(local_path_to_zarr)
- assert thumbnail.size == (256, 256)