diff --git a/src/tiled_ingestor/ingest.py b/src/tiled_ingestor/ingest.py index b3df05a..bad1f75 100644 --- a/src/tiled_ingestor/ingest.py +++ b/src/tiled_ingestor/ingest.py @@ -2,19 +2,44 @@ import logging import sys +import h5py from tiled.catalog.register import identity, register from tiled.catalog import from_uri import tiled.config +from tiled.adapters.hdf5 import HDF5Adapter, SWMR_DEFAULT +from tiled.structures.core import Spec +from tiled.utils import path_from_uri logger = logging.getLogger(__name__) - logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) +DLS_TOMO_SPEC = Spec("DLSNXTomo", "1") + +# In a future version of tiled, the register method will move from +# server to client. This will allow us to remove a lot of code here, including +# looking for the server config, connecting to the database, writing an +# adapter that serves just to mark files with a particular spec. +# See https://github.com/bluesky/tiled/pull/661 + def get_tiled_config(config_path: str): return tiled.config.parse_configs(config_path) +def diamond_tomo_h5_read_adapter( + data_uri, swmr=SWMR_DEFAULT, libver="latest", specs=None, **kwargs +): + # this serves as enough of an adapter to read a file, verify that it's + # the right type, and register a spec for it. It's like a piece of an + # adapter that wouldn't work for reading data set, but works for adding + # a spec to a dataset when registered externally. + specs = specs or [] + specs.append(DLS_TOMO_SPEC) + filepath = path_from_uri(data_uri) + file = h5py.File(filepath, "r", swmr=swmr, libver=libver) + return HDF5Adapter.from_file(file, specs=specs, **kwargs) + + async def process_file( file_path: str, tiled_config: dict, @@ -57,11 +82,10 @@ async def process_file( matching_tree["tree"] == "catalog" ), f"Matching tiled tree {tiled_config_tree_path} is not a catalog" - # using thre tree in the configuration, generate a catalog(adapter) + # using the tree in the configuration, generate a catalog(adapter) catalog_adapter = from_uri( matching_tree["args"]["uri"], readable_storage=matching_tree["args"]["readable_storage"], - adapters_by_mimetype=matching_tree["args"].get("adapters_by_mimetype"), ) # Register with tiled. This writes entries into the database for all of the nodes down to the data node @@ -71,6 +95,9 @@ async def process_file( path=file_path, prefix=path_prefix, overwrite=False, + adapters_by_mimetype={ + "application/x-hdf5": "tiled_ingestor.ingest:diamond_tomo_h5_read_adapter" + }, ) @@ -84,9 +111,9 @@ async def process_file( tiled_config = get_tiled_config("../mlex_tomo_framework/tiled/deploy/config") asyncio.run( process_file( - "../mlex_tomo_framework/data/tiled_storage/beamlines/8.3.2/recons/rec20240207_120829_test_no_xrays_n1313", + "../mlex_tomo_framework/data/tiled_storage/recons/nexus-example.nxs", tiled_config, - path_prefix="/beamlines/8.3.2/recons/", + path_prefix="/recons", ) ) else: @@ -94,13 +121,11 @@ async def process_file( import os pprint(os.environ) - tiled_config = get_tiled_config( - "/tiled_storage/beamlines/8.3.2/recons/rec20240207_120829_test_no_xrays_n1313" - ) + tiled_config = get_tiled_config("../mlex_tomo_framework/tiled/deploy/config") asyncio.run( process_file( - # "/tiled_storage/beamlines/8.3.2/recons/rec20240207_120550_test_no_xrays_n257", + "/tiled_storage/recons/rec20240207_120550_test_no_xrays_n257", tiled_config, - path_prefix="/beamlines/8.3.2/recons/", + path_prefix="/recons", ) )