From d6a1969d4bf7ba1bdb798cea078b4e7490ab1854 Mon Sep 17 00:00:00 2001 From: Dylan McReynolds Date: Tue, 20 Feb 2024 10:16:14 -0800 Subject: [PATCH 1/2] first working spec addition --- src/tiled_ingestor/ingest.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/tiled_ingestor/ingest.py b/src/tiled_ingestor/ingest.py index b3df05a..6c54cc7 100644 --- a/src/tiled_ingestor/ingest.py +++ b/src/tiled_ingestor/ingest.py @@ -2,9 +2,13 @@ import logging import sys +import h5py from tiled.catalog.register import identity, register from tiled.catalog import from_uri import tiled.config +from tiled.adapters.hdf5 import HDF5Adapter, SWMR_DEFAULT +from tiled.structures.core import Spec +from tiled.utils import path_from_uri logger = logging.getLogger(__name__) @@ -15,6 +19,14 @@ def get_tiled_config(config_path: str): return tiled.config.parse_configs(config_path) +def diamond_tomo_h5(data_uri, swmr=SWMR_DEFAULT, libver="latest", specs=None, **kwargs): + specs = specs or [] + specs.append(Spec("a_spec", "a_version")) + filepath = path_from_uri(data_uri) + file = h5py.File(filepath, "r", swmr=swmr, libver=libver) + return HDF5Adapter.from_file(file, specs=specs, **kwargs) + + async def process_file( file_path: str, tiled_config: dict, @@ -57,11 +69,11 @@ async def process_file( matching_tree["tree"] == "catalog" ), f"Matching tiled tree {tiled_config_tree_path} is not a catalog" - # using thre tree in the configuration, generate a catalog(adapter) + # using the tree in the configuration, generate a catalog(adapter) catalog_adapter = from_uri( matching_tree["args"]["uri"], readable_storage=matching_tree["args"]["readable_storage"], - adapters_by_mimetype=matching_tree["args"].get("adapters_by_mimetype"), + ) # Register with tiled. This writes entries into the database for all of the nodes down to the data node @@ -71,6 +83,7 @@ async def process_file( path=file_path, prefix=path_prefix, overwrite=False, + adapters_by_mimetype=matching_tree["args"].get("adapters_by_mimetype") ) @@ -84,9 +97,10 @@ async def process_file( tiled_config = get_tiled_config("../mlex_tomo_framework/tiled/deploy/config") asyncio.run( process_file( - "../mlex_tomo_framework/data/tiled_storage/beamlines/8.3.2/recons/rec20240207_120829_test_no_xrays_n1313", + "../mlex_tomo_framework/data/tiled_storage/recons/nexus-example.nxs", tiled_config, - path_prefix="/beamlines/8.3.2/recons/", + path_prefix="/recons", + # specs=[{"name": "ANexus", "version": "sdfsdf"}] ) ) else: @@ -94,13 +108,11 @@ async def process_file( import os pprint(os.environ) - tiled_config = get_tiled_config( - "/tiled_storage/beamlines/8.3.2/recons/rec20240207_120829_test_no_xrays_n1313" - ) + tiled_config = get_tiled_config("../mlex_tomo_framework/tiled/deploy/config") asyncio.run( process_file( - # "/tiled_storage/beamlines/8.3.2/recons/rec20240207_120550_test_no_xrays_n257", + "/tiled_storage/recons/rec20240207_120550_test_no_xrays_n257", tiled_config, - path_prefix="/beamlines/8.3.2/recons/", + path_prefix="/recons", ) ) From cb18ccfd853fe065044281f780e43c56e3b59c8d Mon Sep 17 00:00:00 2001 From: Dylan McReynolds Date: Thu, 22 Feb 2024 16:25:35 -0800 Subject: [PATCH 2/2] add partial adapter, comments --- src/tiled_ingestor/ingest.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/src/tiled_ingestor/ingest.py b/src/tiled_ingestor/ingest.py index 6c54cc7..bad1f75 100644 --- a/src/tiled_ingestor/ingest.py +++ b/src/tiled_ingestor/ingest.py @@ -11,17 +11,30 @@ from tiled.utils import path_from_uri logger = logging.getLogger(__name__) - logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) +DLS_TOMO_SPEC = Spec("DLSNXTomo", "1") + +# In a future version of tiled, the register method will move from +# server to client. This will allow us to remove a lot of code here, including +# looking for the server config, connecting to the database, writing an +# adapter that serves just to mark files with a particular spec. +# See https://github.com/bluesky/tiled/pull/661 + def get_tiled_config(config_path: str): return tiled.config.parse_configs(config_path) -def diamond_tomo_h5(data_uri, swmr=SWMR_DEFAULT, libver="latest", specs=None, **kwargs): +def diamond_tomo_h5_read_adapter( + data_uri, swmr=SWMR_DEFAULT, libver="latest", specs=None, **kwargs +): + # this serves as enough of an adapter to read a file, verify that it's + # the right type, and register a spec for it. It's like a piece of an + # adapter that wouldn't work for reading data set, but works for adding + # a spec to a dataset when registered externally. specs = specs or [] - specs.append(Spec("a_spec", "a_version")) + specs.append(DLS_TOMO_SPEC) filepath = path_from_uri(data_uri) file = h5py.File(filepath, "r", swmr=swmr, libver=libver) return HDF5Adapter.from_file(file, specs=specs, **kwargs) @@ -73,7 +86,6 @@ async def process_file( catalog_adapter = from_uri( matching_tree["args"]["uri"], readable_storage=matching_tree["args"]["readable_storage"], - ) # Register with tiled. This writes entries into the database for all of the nodes down to the data node @@ -83,7 +95,9 @@ async def process_file( path=file_path, prefix=path_prefix, overwrite=False, - adapters_by_mimetype=matching_tree["args"].get("adapters_by_mimetype") + adapters_by_mimetype={ + "application/x-hdf5": "tiled_ingestor.ingest:diamond_tomo_h5_read_adapter" + }, ) @@ -100,7 +114,6 @@ async def process_file( "../mlex_tomo_framework/data/tiled_storage/recons/nexus-example.nxs", tiled_config, path_prefix="/recons", - # specs=[{"name": "ANexus", "version": "sdfsdf"}] ) ) else: