diff --git a/backend/common/utils/cxg_generation_utils.py b/backend/common/utils/cxg_generation_utils.py index 5cfecf321249c..6d76e57a1a085 100644 --- a/backend/common/utils/cxg_generation_utils.py +++ b/backend/common/utils/cxg_generation_utils.py @@ -1,5 +1,6 @@ import json import logging +import pickle import numpy as np import pandas as pd @@ -31,6 +32,32 @@ def convert_dictionary_to_cxg_group(cxg_container, metadata_dict, group_metadata metadata_array.meta[key] = value +def convert_uns_to_cxg_group(cxg_container, metadata_dict, group_metadata_name="cxg_group_metadata", ctx=None): + + array_name = f"{cxg_container}/{group_metadata_name}" + + tiledb.from_numpy(array_name, np.zeros((1,))) + + def iterate_over_dict(metadata_dict): + with tiledb.open(array_name, mode="w", ctx=ctx) as metadata_array: + for key, value in metadata_dict.items(): + if not key.startswith("spatial"): + continue + print(f"key: {key}, type:{type(value)}, value: {value}") + if isinstance(value, dict): + try: + metadata_array.meta[key] = pickle.dumps(value) + except Exception as e: + logging.error(f"Error adding metadata {key} to {array_name}: {e}") + else: + try: + metadata_array.meta[key] = value + except Exception as e: + logging.error(f"Error adding metadata {key} to {array_name}: {e}") + + iterate_over_dict(metadata_dict) + + def convert_dataframe_to_cxg_array(cxg_container, dataframe_name, dataframe, index_column_name, ctx): """ Saves the contents of the dataframe to the CXG output directory specified. diff --git a/backend/layers/processing/h5ad_data_file.py b/backend/layers/processing/h5ad_data_file.py index 26022a372a063..5ed297159862b 100644 --- a/backend/layers/processing/h5ad_data_file.py +++ b/backend/layers/processing/h5ad_data_file.py @@ -18,6 +18,7 @@ convert_dictionary_to_cxg_group, convert_matrices_to_cxg_arrays, convert_ndarray_to_cxg_dense_array, + convert_uns_to_cxg_group, ) from backend.common.utils.matrix_utils import is_matrix_sparse from backend.common.utils.tiledb import consolidation_buffer_size @@ -79,6 +80,9 @@ def to_cxg(self, output_cxg_directory, sparse_threshold, convert_anndata_colors_ convert_dataframe_to_cxg_array(output_cxg_directory, "var", self.var, self.var_index_column_name, ctx) logging.info("\t...dataset var dataframe saved") + convert_uns_to_cxg_group(output_cxg_directory, self.anndata.uns, "uns", ctx) + logging.info("\t...dataset uns dataframe saved") + self.write_anndata_embeddings_to_cxg(output_cxg_directory, ctx) logging.info("\t...dataset embeddings saved") diff --git a/make_cxg.ipynb b/make_cxg.ipynb new file mode 100644 index 0000000000000..7639cc7625339 --- /dev/null +++ b/make_cxg.ipynb @@ -0,0 +1,61 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from backend.layers.processing.h5ad_data_file import H5ADDataFile\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def make_cxg(local_filename):\n", + " \"\"\"\n", + " Convert the uploaded H5AD file to the CXG format servicing the cellxgene Explorer.\n", + " \"\"\"\n", + "\n", + " cxg_output_container = local_filename.replace(\".h5ad\", \".cxg\")\n", + " try:\n", + " h5ad_data_file = H5ADDataFile(local_filename, var_index_column_name=\"feature_name\")\n", + " h5ad_data_file.to_cxg(cxg_output_container, sparse_threshold=25.0)\n", + " except Exception as ex:\n", + " # TODO use a specialized exception\n", + " msg = \"CXG conversion failed.\"\n", + "\n", + " raise RuntimeError(msg) from ex\n", + " raise ex\n", + "\n", + " return cxg_output_container\n", + "\n", + "make_cxg(\"UXR_0bb15784-1cea-47e1-9a00-57dcd127746c.h5ad\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}