From eeadf417862c7a4d3c6dadab16b080d1e20b140e Mon Sep 17 00:00:00 2001 From: Sidney Bell Date: Mon, 18 Dec 2023 21:49:31 +0530 Subject: [PATCH 1/6] Copy updates (#6364) --- frontend/src/views/CensusDirectory/index.tsx | 21 +++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/frontend/src/views/CensusDirectory/index.tsx b/frontend/src/views/CensusDirectory/index.tsx index ef03329039197..7f6532842da7d 100644 --- a/frontend/src/views/CensusDirectory/index.tsx +++ b/frontend/src/views/CensusDirectory/index.tsx @@ -42,8 +42,11 @@ function CensusDirectory() { provide feedback!

- {/* TODO: add link to notebooks once available */} - Please see these tutorials for usage details. + Please{" "} + + see these tutorials + {" "} + for usage details.

If you’d like to have your project featured here, please{" "} @@ -52,15 +55,15 @@ function CensusDirectory() { {maintainedProjects.length > 0 && ( - CELL×GENE Maintained Projects + CELL×GENE Collaboration Projects - These models and their output embeddings are maintained and - regularly re-trained by CELL×GENE in close collaboration with their - creators. Embeddings are accessible via the Census API; - corresponding models are available via CELL×GENE-maintained links. + These models and their output embeddings are ongoing collaborations. + CZI and the partner labs are improving the models as the Census + resource grows. Embeddings are accessible via the Census API; + corresponding models are available for download.
Please{" "} - + contact the CELL×GENE team with feedback . @@ -80,7 +83,7 @@ function CensusDirectory() { available).
For issues accessing these embeddings, please{" "} - + contact the CELL×GENE team . For feedback on the embeddings themselves, please contact the From e1fb59fe46a1132c042a44f87cfcd67c7e38b006 Mon Sep 17 00:00:00 2001 From: pablo-gar Date: Mon, 18 Dec 2023 10:48:40 -0600 Subject: [PATCH 2/6] feat(census-models): fix Geneformer description (#6363) --- frontend/census-projects.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/census-projects.json b/frontend/census-projects.json index d22f1251627fd..edfdc6a6b5a48 100644 --- a/frontend/census-projects.json +++ b/frontend/census-projects.json @@ -2,7 +2,7 @@ { "tier": "maintained", "title": "Geneformer embeddings fine-tuned for CELLxGENE Census cell subclass classification", - "description": "Geneformer is a foundation transformer model pretrained on a large-scale corpus of ~30 million single cell transcriptomes to enable context-aware predictions in settings with limited data in network biology.\nThese cell embeddings are derived from a Geneformer model CZI fine-tuned for cell subclass classification. As the fine-tuning procedure remains experimental and wasn’t performed by the Geneformer authors, these embeddings should not be used to assess performance of the Geneformer ", + "description": "Geneformer is a foundation transformer model pretrained on a large-scale corpus of ~30 million single cell transcriptomes to enable context-aware predictions in settings with limited data in network biology.\nThese cell embeddings are derived from a Geneformer model CZI fine-tuned for cell subclass classification. As the fine-tuning procedure remains experimental and wasn’t performed by the Geneformer authors, these embeddings should not be used to assess performance of the pre-trained Geneformer model.", "primary_contact": { "name": "CELLxGENE Discover Team", "email": "soma@chanzuckerberg.com", From d2db23fb277955b073644216ed1eeeaeffec10a9 Mon Sep 17 00:00:00 2001 From: Trent Smith <1429913+Bento007@users.noreply.github.com> Date: Mon, 18 Dec 2023 10:48:13 -0800 Subject: [PATCH 3/6] chore: remove parameterize dependency (#6325) - remove the parametrize dependency and use `pytest.mark.paramtrize`. - change test_rollup to use `pytest.mark.paramtrize`. --- requirements-backend.txt | 1 - .../utils/test_type_conversion_utils.py | 62 +- .../backend/wmg/api/common/test_rollup.py | 646 +++++++++--------- 3 files changed, 353 insertions(+), 356 deletions(-) diff --git a/requirements-backend.txt b/requirements-backend.txt index b6cce4ec6b951..4be816affaf37 100644 --- a/requirements-backend.txt +++ b/requirements-backend.txt @@ -26,7 +26,6 @@ numba==0.56.2 # required for where's my gene numpy==1.23.5 # required for where's my gene owlready2==0.40.0 pandas==1.5.3 # required for where's my gene -parameterized psutil==5.9.5 psycopg2-binary>=2.8.5 pyarrow==12.0.0 # required for where's my gene diff --git a/tests/unit/backend/layers/utils/test_type_conversion_utils.py b/tests/unit/backend/layers/utils/test_type_conversion_utils.py index 7725a9c0836ef..f59a92fa7ea79 100644 --- a/tests/unit/backend/layers/utils/test_type_conversion_utils.py +++ b/tests/unit/backend/layers/utils/test_type_conversion_utils.py @@ -3,8 +3,8 @@ import numpy as np import pandas as pd +import pytest from pandas import DataFrame, Series -from parameterized import parameterized_class from scipy import sparse from backend.common.utils.type_conversion_utils import ( @@ -292,33 +292,35 @@ def __exit__(self, exc_type, exc_val, exc_tb): ] -@parameterized_class(test_cases) -class TestTypeInference(unittest.TestCase, AssertNoLog): - def test_type_inference(self): - throws = getattr(self, "throws", None) - if throws: - with self.assertRaises(throws): - get_dtype_and_schema_of_array(self.data) - with self.assertRaises(throws): - get_encoding_dtype_of_array(self.data) - with self.assertRaises(throws): - get_schema_type_hint_of_array(self.data) - +@pytest.mark.parametrize("parameters", test_cases) +def test_type_inference(parameters, caplog): + throws = parameters.get("throws", None) + if throws: + with pytest.raises(throws): + get_dtype_and_schema_of_array(parameters["data"]) + with pytest.raises(throws): + get_dtype_and_schema_of_array(parameters["data"]) + with pytest.raises(throws): + get_encoding_dtype_of_array(parameters["data"]) + with pytest.raises(throws): + get_schema_type_hint_of_array(parameters["data"]) + + else: + logs = parameters.get("logs", None) + if logs is not None: + # + with caplog.at_level(logs["level"]): + encoding_dtype, schema_hint = get_dtype_and_schema_of_array(parameters["data"]) + assert encoding_dtype == parameters["expected_encoding_dtype"] + assert schema_hint == parameters["expected_schema_hint"] + assert logs["output"] in caplog.messages[0] else: - logs = getattr(self, "logs", None) - if logs is not None: - with self.assertLogs(level=logs["level"]) as logger: - encoding_dtype, schema_hint = get_dtype_and_schema_of_array(self.data) - self.assertEqual(encoding_dtype, self.expected_encoding_dtype) - self.assertEqual(schema_hint, self.expected_schema_hint) - self.assertIn(logs["output"], logger.output[0]) - - else: - with self.assertNoLogs(logging.getLogger(), logging.WARNING): - encoding_dtype, schema_hint = get_dtype_and_schema_of_array(self.data) - self.assertEqual(encoding_dtype, self.expected_encoding_dtype) - self.assertEqual(schema_hint, self.expected_schema_hint) - - # also test the other public API - self.assertEqual(get_encoding_dtype_of_array(self.data), self.expected_encoding_dtype) - self.assertEqual(get_schema_type_hint_of_array(self.data), self.expected_schema_hint) + with caplog.at_level(logging.WARNING): + encoding_dtype, schema_hint = get_dtype_and_schema_of_array(parameters["data"]) + assert encoding_dtype == parameters["expected_encoding_dtype"] + assert schema_hint == parameters["expected_schema_hint"] + assert len(caplog.messages) == 0 + + # also test the other public API + assert get_encoding_dtype_of_array(parameters["data"]) == parameters["expected_encoding_dtype"] + assert get_schema_type_hint_of_array(parameters["data"]) == parameters["expected_schema_hint"] diff --git a/tests/unit/backend/wmg/api/common/test_rollup.py b/tests/unit/backend/wmg/api/common/test_rollup.py index c30d6ab07c625..977d5c2702cd8 100644 --- a/tests/unit/backend/wmg/api/common/test_rollup.py +++ b/tests/unit/backend/wmg/api/common/test_rollup.py @@ -2,28 +2,28 @@ In detail, this module tests the public and private functions defined in `backend.wmg.api.common.rollup` module. """ -import unittest +from typing import List import pandas as pd +import pytest from pandas import DataFrame from pandas.testing import assert_frame_equal -from parameterized import parameterized from backend.wmg.api.common.rollup import rollup -def _create_cell_counts_df_helper(cell_counts_rows: list[list], columns: list[str], index_cols: list[str]) -> DataFrame: +def _create_cell_counts_df_helper(cell_counts_rows: List[list], columns: List[str], index_cols: List[str]) -> DataFrame: cell_counts_df = pd.DataFrame(cell_counts_rows, columns=columns) cell_counts_df = cell_counts_df.set_index(index_cols, verify_integrity=True) return cell_counts_df -def _create_gene_expression_df_helper(gene_expr_rows: list[list], columns: list[str]) -> DataFrame: +def _create_gene_expression_df_helper(gene_expr_rows: List[list], columns: List[str]) -> DataFrame: gene_expr_df = pd.DataFrame(gene_expr_rows, columns=columns) return gene_expr_df -def _cell_counts_df_without_compare_dim(cell_counts_rows: list[list]) -> DataFrame: +def _cell_counts_df_without_compare_dim(cell_counts_rows: List[list]) -> DataFrame: cell_counts_col_names = ["tissue_ontology_term_id", "cell_type_ontology_term_id", "n_cells_cell_type"] cell_counts_index_col_names = ["tissue_ontology_term_id", "cell_type_ontology_term_id"] return _create_cell_counts_df_helper( @@ -31,7 +31,7 @@ def _cell_counts_df_without_compare_dim(cell_counts_rows: list[list]) -> DataFra ) -def _cell_counts_df_with_ethnicity_compare_dim(cell_counts_rows: list[list]) -> DataFrame: +def _cell_counts_df_with_ethnicity_compare_dim(cell_counts_rows: List[list]) -> DataFrame: cell_counts_col_names = [ "tissue_ontology_term_id", "cell_type_ontology_term_id", @@ -50,7 +50,7 @@ def _cell_counts_df_with_ethnicity_compare_dim(cell_counts_rows: list[list]) -> ) -def _gene_expression_df_without_compare_dim(gene_expr_rows: list[list]) -> DataFrame: +def _gene_expression_df_without_compare_dim(gene_expr_rows: List[list]) -> DataFrame: gene_expr_col_names = [ "gene_ontology_term_id", "tissue_ontology_term_id", @@ -63,7 +63,7 @@ def _gene_expression_df_without_compare_dim(gene_expr_rows: list[list]) -> DataF return _create_gene_expression_df_helper(gene_expr_rows, columns=gene_expr_col_names) -def _gene_expression_df_with_ethnicity_compare_dim(gene_expr_rows: list[list]) -> DataFrame: +def _gene_expression_df_with_ethnicity_compare_dim(gene_expr_rows: List[list]) -> DataFrame: gene_expr_col_names = [ "gene_ontology_term_id", "tissue_ontology_term_id", @@ -78,7 +78,294 @@ def _gene_expression_df_with_ethnicity_compare_dim(gene_expr_rows: list[list]) - return _create_gene_expression_df_helper(gene_expr_rows, columns=gene_expr_col_names) -class TestHighLevelRollupFunction(unittest.TestCase): +def _rollup_testcases(): + """ + TODO: convert this to use pytest.mark.parametrize to remove the dependecy on parameterized + Testcases for the `rollup` function. + + An important note about how the expected values are laid out in the testcases: + + 1. Expected values for rows in the rolled up cell counts dataframe are sorted by + (tissue_ontology_term_id, cell_type_ontology_term_id, ) + + 2. Expected values for rows in the rolled up gene expression dataframe are sorted by + (tissue_ontology_term_id, cell_type_ontology_term_id, , gene_ontology_term_id) + """ + tests = [ + { + "name": "no_compare_dim_all_tissues_have_all_cell_types", + "input_cell_counts": [ + ["UBERON:0000955", "CL:0000127", 300], + ["UBERON:0000955", "CL:0000644", 70], + ["UBERON:0000955", "CL:0002605", 80], + ["UBERON:0000955", "CL:0002627", 90], + ["UBERON:0002113", "CL:0000127", 300], + ["UBERON:0002113", "CL:0000644", 70], + ["UBERON:0002113", "CL:0002605", 80], + ["UBERON:0002113", "CL:0002627", 90], + ], + "expected_rolled_up_cell_counts": [ + ["UBERON:0000955", "CL:0000127", 540], + ["UBERON:0000955", "CL:0000644", 70], + ["UBERON:0000955", "CL:0002605", 80], + ["UBERON:0000955", "CL:0002627", 90], + ["UBERON:0002113", "CL:0000127", 540], + ["UBERON:0002113", "CL:0000644", 70], + ["UBERON:0002113", "CL:0002605", 80], + ["UBERON:0002113", "CL:0002627", 90], + ], + "input_gene_expression": [ + ["ENSG00000085265", "UBERON:0000955", "CL:0000644", 1, 1, 70, 1000], + ["ENSG00000085265", "UBERON:0000955", "CL:0002605", 1, 1, 80, 1000], + ["ENSG00000169429", "UBERON:0000955", "CL:0002627", 1, 1, 90, 1000], + ["ENSG00000085265", "UBERON:0002113", "CL:0000644", 1, 1, 70, 1000], + ["ENSG00000085265", "UBERON:0002113", "CL:0002605", 1, 1, 80, 1000], + ["ENSG00000169429", "UBERON:0002113", "CL:0002627", 1, 1, 90, 1000], + ], + "expected_rolled_up_gene_expression": [ + ["ENSG00000085265", "UBERON:0000955", "CL:0000127", 2, 2, 150, 1000], + ["ENSG00000169429", "UBERON:0000955", "CL:0000127", 1, 1, 90, 1000], + ["ENSG00000085265", "UBERON:0000955", "CL:0000644", 1, 1, 70, 1000], + ["ENSG00000085265", "UBERON:0000955", "CL:0002605", 1, 1, 80, 1000], + ["ENSG00000169429", "UBERON:0000955", "CL:0002627", 1, 1, 90, 1000], + ["ENSG00000085265", "UBERON:0002113", "CL:0000127", 2, 2, 150, 1000], + ["ENSG00000169429", "UBERON:0002113", "CL:0000127", 1, 1, 90, 1000], + ["ENSG00000085265", "UBERON:0002113", "CL:0000644", 1, 1, 70, 1000], + ["ENSG00000085265", "UBERON:0002113", "CL:0002605", 1, 1, 80, 1000], + ["ENSG00000169429", "UBERON:0002113", "CL:0002627", 1, 1, 90, 1000], + ], + "cell_counts_func": _cell_counts_df_without_compare_dim, + "gene_expression_func": _gene_expression_df_without_compare_dim, + }, + { + "name": "no_compare_dim_one_ancestor_cell_type_missing_in_one_tissue_but_exists_in_all_others", + # Tissue: "UBERON:0000955" MISSING cell type: "CL:0000127" in input cell counts + "input_cell_counts": [ + ["UBERON:0000955", "CL:0000644", 70], + ["UBERON:0000955", "CL:0002605", 80], + ["UBERON:0000955", "CL:0002627", 90], + ["UBERON:0002113", "CL:0000127", 300], + ["UBERON:0002113", "CL:0000644", 70], + ["UBERON:0002113", "CL:0002605", 80], + ["UBERON:0002113", "CL:0002627", 90], + ], + # cell count for cell type: "CL:0000127" in Tissue: "UBERON:0000955" GETS AGGREGATED because + # "CL:0000127" has non-zero cell count for at least one tissue in the input AND at least one + # descendant of "CL:0000127" has non-zero cell count for "UBERON:0000955" + "expected_rolled_up_cell_counts": [ + ["UBERON:0000955", "CL:0000127", 240], + ["UBERON:0000955", "CL:0000644", 70], + ["UBERON:0000955", "CL:0002605", 80], + ["UBERON:0000955", "CL:0002627", 90], + ["UBERON:0002113", "CL:0000127", 540], + ["UBERON:0002113", "CL:0000644", 70], + ["UBERON:0002113", "CL:0002605", 80], + ["UBERON:0002113", "CL:0002627", 90], + ], + "input_gene_expression": [ + ["ENSG00000085265", "UBERON:0000955", "CL:0000644", 1, 1, 70, 1000], + ["ENSG00000085265", "UBERON:0000955", "CL:0002605", 1, 1, 80, 1000], + ["ENSG00000169429", "UBERON:0000955", "CL:0002627", 1, 1, 90, 1000], + ["ENSG00000085265", "UBERON:0002113", "CL:0000644", 1, 1, 70, 1000], + ["ENSG00000085265", "UBERON:0002113", "CL:0002605", 1, 1, 80, 1000], + ["ENSG00000169429", "UBERON:0002113", "CL:0002627", 1, 1, 90, 1000], + ], + "expected_rolled_up_gene_expression": [ + ["ENSG00000085265", "UBERON:0000955", "CL:0000127", 2, 2, 150, 1000], + ["ENSG00000169429", "UBERON:0000955", "CL:0000127", 1, 1, 90, 1000], + ["ENSG00000085265", "UBERON:0000955", "CL:0000644", 1, 1, 70, 1000], + ["ENSG00000085265", "UBERON:0000955", "CL:0002605", 1, 1, 80, 1000], + ["ENSG00000169429", "UBERON:0000955", "CL:0002627", 1, 1, 90, 1000], + ["ENSG00000085265", "UBERON:0002113", "CL:0000127", 2, 2, 150, 1000], + ["ENSG00000169429", "UBERON:0002113", "CL:0000127", 1, 1, 90, 1000], + ["ENSG00000085265", "UBERON:0002113", "CL:0000644", 1, 1, 70, 1000], + ["ENSG00000085265", "UBERON:0002113", "CL:0002605", 1, 1, 80, 1000], + ["ENSG00000169429", "UBERON:0002113", "CL:0002627", 1, 1, 90, 1000], + ], + "cell_counts_func": _cell_counts_df_without_compare_dim, + "gene_expression_func": _gene_expression_df_without_compare_dim, + }, + { + "name": "no_compare_dim_gene_expressed_in_one_tissue_but_not_other", + "input_cell_counts": [ + ["UBERON:0000955", "CL:0000127", 300], + ["UBERON:0000955", "CL:0000644", 70], + ["UBERON:0000955", "CL:0002605", 80], + ["UBERON:0000955", "CL:0002627", 90], + ["UBERON:0002113", "CL:0000127", 300], + ["UBERON:0002113", "CL:0000644", 70], + ["UBERON:0002113", "CL:0002605", 80], + ["UBERON:0002113", "CL:0002627", 90], + ], + "expected_rolled_up_cell_counts": [ + ["UBERON:0000955", "CL:0000127", 540], + ["UBERON:0000955", "CL:0000644", 70], + ["UBERON:0000955", "CL:0002605", 80], + ["UBERON:0000955", "CL:0002627", 90], + ["UBERON:0002113", "CL:0000127", 540], + ["UBERON:0002113", "CL:0000644", 70], + ["UBERON:0002113", "CL:0002605", 80], + ["UBERON:0002113", "CL:0002627", 90], + ], + # Gene "ENSG00000169429" expressed in Tissue "UBERON:0000955" but not expressed + # in Tissue "UBERON:0002113" + "input_gene_expression": [ + ["ENSG00000085265", "UBERON:0000955", "CL:0000644", 1, 1, 70, 1000], + ["ENSG00000085265", "UBERON:0000955", "CL:0002605", 1, 1, 80, 1000], + ["ENSG00000169429", "UBERON:0000955", "CL:0002627", 1, 1, 90, 1000], + ["ENSG00000085265", "UBERON:0002113", "CL:0000644", 1, 1, 70, 1000], + ["ENSG00000085265", "UBERON:0002113", "CL:0002605", 1, 1, 80, 1000], + ], + "expected_rolled_up_gene_expression": [ + ["ENSG00000085265", "UBERON:0000955", "CL:0000127", 2, 2, 150, 1000], + ["ENSG00000169429", "UBERON:0000955", "CL:0000127", 1, 1, 90, 1000], + ["ENSG00000085265", "UBERON:0000955", "CL:0000644", 1, 1, 70, 1000], + ["ENSG00000085265", "UBERON:0000955", "CL:0002605", 1, 1, 80, 1000], + ["ENSG00000169429", "UBERON:0000955", "CL:0002627", 1, 1, 90, 1000], + ["ENSG00000085265", "UBERON:0002113", "CL:0000127", 2, 2, 150, 1000], + ["ENSG00000085265", "UBERON:0002113", "CL:0000644", 1, 1, 70, 1000], + ["ENSG00000085265", "UBERON:0002113", "CL:0002605", 1, 1, 80, 1000], + ], + "cell_counts_func": _cell_counts_df_without_compare_dim, + "gene_expression_func": _gene_expression_df_without_compare_dim, + }, + { + "name": "no_compare_dim_one_of_the_tissues_has_no_gene_expressions_at_all", + "input_cell_counts": [ + ["UBERON:0000955", "CL:0000127", 300], + ["UBERON:0000955", "CL:0000644", 70], + ["UBERON:0000955", "CL:0002605", 80], + ["UBERON:0000955", "CL:0002627", 90], + ["UBERON:0002113", "CL:0000127", 300], + ["UBERON:0002113", "CL:0000644", 70], + ["UBERON:0002113", "CL:0002605", 80], + ["UBERON:0002113", "CL:0002627", 90], + ], + "expected_rolled_up_cell_counts": [ + ["UBERON:0000955", "CL:0000127", 540], + ["UBERON:0000955", "CL:0000644", 70], + ["UBERON:0000955", "CL:0002605", 80], + ["UBERON:0000955", "CL:0002627", 90], + ["UBERON:0002113", "CL:0000127", 540], + ["UBERON:0002113", "CL:0000644", 70], + ["UBERON:0002113", "CL:0002605", 80], + ["UBERON:0002113", "CL:0002627", 90], + ], + # Tissue issue "UBERON:0002113" has no gene expressions + "input_gene_expression": [ + ["ENSG00000085265", "UBERON:0000955", "CL:0000644", 1, 1, 70, 1000], + ["ENSG00000085265", "UBERON:0000955", "CL:0002605", 1, 1, 80, 1000], + ["ENSG00000169429", "UBERON:0000955", "CL:0002627", 1, 1, 90, 1000], + ], + "expected_rolled_up_gene_expression": [ + ["ENSG00000085265", "UBERON:0000955", "CL:0000127", 2, 2, 150, 1000], + ["ENSG00000169429", "UBERON:0000955", "CL:0000127", 1, 1, 90, 1000], + ["ENSG00000085265", "UBERON:0000955", "CL:0000644", 1, 1, 70, 1000], + ["ENSG00000085265", "UBERON:0000955", "CL:0002605", 1, 1, 80, 1000], + ["ENSG00000169429", "UBERON:0000955", "CL:0002627", 1, 1, 90, 1000], + ], + "cell_counts_func": _cell_counts_df_without_compare_dim, + "gene_expression_func": _gene_expression_df_without_compare_dim, + }, + { + "name": "with_ethnicity_compare_dim_on_single_tissue", + "input_cell_counts": [ + ["UBERON:0000955", "CL:0000127", "unknown", 300], + ["UBERON:0000955", "CL:0000644", "unknown", 70], + ["UBERON:0000955", "CL:0002605", "HANCESTRO:0005", 10], + ["UBERON:0000955", "CL:0002605", "HANCESTRO:0008", 30], + ["UBERON:0000955", "CL:0002605", "multiethnic", 40], + ["UBERON:0000955", "CL:0002627", "HANCESTRO:0006", 10], + ["UBERON:0000955", "CL:0002627", "HANCESTRO:0008", 20], + ["UBERON:0000955", "CL:0002627", "multiethnic", 30], + ["UBERON:0000955", "CL:0002627", "unknown", 40], + ], + "expected_rolled_up_cell_counts": [ + ["UBERON:0000955", "CL:0000127", "HANCESTRO:0008", 50], + ["UBERON:0000955", "CL:0000127", "multiethnic", 70], + ["UBERON:0000955", "CL:0000127", "unknown", 410], + ["UBERON:0000955", "CL:0000644", "unknown", 70], + ["UBERON:0000955", "CL:0002605", "HANCESTRO:0005", 10], + ["UBERON:0000955", "CL:0002605", "HANCESTRO:0008", 30], + ["UBERON:0000955", "CL:0002605", "multiethnic", 40], + ["UBERON:0000955", "CL:0002627", "HANCESTRO:0006", 10], + ["UBERON:0000955", "CL:0002627", "HANCESTRO:0008", 20], + ["UBERON:0000955", "CL:0002627", "multiethnic", 30], + ["UBERON:0000955", "CL:0002627", "unknown", 40], + ], + "input_gene_expression": [ + ["ENSG00000085265", "UBERON:0000955", "CL:0000644", "unknown", 1, 1, 70, 1000], + ["ENSG00000085265", "UBERON:0000955", "CL:0002605", "HANCESTRO:0008", 1, 1, 30, 1000], + ["ENSG00000085265", "UBERON:0000955", "CL:0002605", "multiethnic", 1, 1, 40, 1000], + ["ENSG00000085265", "UBERON:0000955", "CL:0002627", "HANCESTRO:0008", 1, 1, 20, 1000], + ["ENSG00000169429", "UBERON:0000955", "CL:0002627", "multiethnic", 1, 1, 30, 1000], + ], + "expected_rolled_up_gene_expression": [ + ["ENSG00000085265", "UBERON:0000955", "CL:0000127", "HANCESTRO:0008", 2, 2, 50, 1000], + ["ENSG00000085265", "UBERON:0000955", "CL:0000127", "multiethnic", 1, 1, 40, 1000], + ["ENSG00000169429", "UBERON:0000955", "CL:0000127", "multiethnic", 1, 1, 30, 1000], + ["ENSG00000085265", "UBERON:0000955", "CL:0000127", "unknown", 1, 1, 70, 1000], + ["ENSG00000085265", "UBERON:0000955", "CL:0000644", "unknown", 1, 1, 70, 1000], + ["ENSG00000085265", "UBERON:0000955", "CL:0002605", "HANCESTRO:0008", 1, 1, 30, 1000], + ["ENSG00000085265", "UBERON:0000955", "CL:0002605", "multiethnic", 1, 1, 40, 1000], + ["ENSG00000085265", "UBERON:0000955", "CL:0002627", "HANCESTRO:0008", 1, 1, 20, 1000], + ["ENSG00000169429", "UBERON:0000955", "CL:0002627", "multiethnic", 1, 1, 30, 1000], + ], + "cell_counts_func": _cell_counts_df_with_ethnicity_compare_dim, + "gene_expression_func": _gene_expression_df_with_ethnicity_compare_dim, + }, + { + "name": "no_compare_dim_all_tissues_have_all_cell_types_except_root_cell_type", + "input_cell_counts": [ + ["UBERON:0000955", "CL:0000644", 70], + ["UBERON:0000955", "CL:0002605", 80], + ["UBERON:0000955", "CL:0002627", 90], + ["UBERON:0002113", "CL:0000644", 70], + ["UBERON:0002113", "CL:0002605", 80], + ["UBERON:0002113", "CL:0002627", 90], + ], + "expected_rolled_up_cell_counts": [ + ["UBERON:0000955", "CL:0000644", 70], + ["UBERON:0000955", "CL:0002605", 80], + ["UBERON:0000955", "CL:0002627", 90], + ["UBERON:0002113", "CL:0000644", 70], + ["UBERON:0002113", "CL:0002605", 80], + ["UBERON:0002113", "CL:0002627", 90], + ], + "input_gene_expression": [ + ["ENSG00000085265", "UBERON:0000955", "CL:0000644", 1, 1, 70, 1000], + ["ENSG00000085265", "UBERON:0000955", "CL:0002605", 1, 1, 80, 1000], + ["ENSG00000169429", "UBERON:0000955", "CL:0002627", 1, 1, 90, 1000], + ["ENSG00000085265", "UBERON:0002113", "CL:0000644", 1, 1, 70, 1000], + ["ENSG00000085265", "UBERON:0002113", "CL:0002605", 1, 1, 80, 1000], + ["ENSG00000169429", "UBERON:0002113", "CL:0002627", 1, 1, 90, 1000], + ], + "expected_rolled_up_gene_expression": [ + ["ENSG00000085265", "UBERON:0000955", "CL:0000644", 1, 1, 70, 1000], + ["ENSG00000085265", "UBERON:0000955", "CL:0002605", 1, 1, 80, 1000], + ["ENSG00000169429", "UBERON:0000955", "CL:0002627", 1, 1, 90, 1000], + ["ENSG00000085265", "UBERON:0002113", "CL:0000644", 1, 1, 70, 1000], + ["ENSG00000085265", "UBERON:0002113", "CL:0002605", 1, 1, 80, 1000], + ["ENSG00000169429", "UBERON:0002113", "CL:0002627", 1, 1, 90, 1000], + ], + "cell_counts_func": _cell_counts_df_without_compare_dim, + "gene_expression_func": _gene_expression_df_without_compare_dim, + }, + ] + for test in tests: + yield ( + test["name"], + test["cell_counts_func"](test["input_cell_counts"]), + test["cell_counts_func"](test["expected_rolled_up_cell_counts"]), + test["gene_expression_func"](test["input_gene_expression"]), + test["gene_expression_func"](test["expected_rolled_up_gene_expression"]), + ) + + +@pytest.mark.parametrize( + "name,input_cell_counts_df,expected_cell_counts_df,input_gene_expr_df," "expected_gene_expr_df", _rollup_testcases() +) +def test__rollup(name, input_cell_counts_df, expected_cell_counts_df, input_gene_expr_df, expected_gene_expr_df): """ Test that the `rollup` function correctly accumulates (or rolls up) gene-expression values FOR EACH expressed gene and cell count values up the cell type ANCESTOR paths @@ -115,324 +402,33 @@ class TestHighLevelRollupFunction(unittest.TestCase): 5. Assert that the cell counts in the rolled up cell counts dataframe hold the correct rolled up values. """ + # Arrange + cell_counts_df_index_list = list(input_cell_counts_df.index.names) - @staticmethod - def _rollup_testcases(): - """ - Testcases for the `rollup` function. - - An important note about how the expected values are laid out in the testcases: - - 1. Expected values for rows in the rolled up cell counts dataframe are sorted by - (tissue_ontology_term_id, cell_type_ontology_term_id, ) - - 2. Expected values for rows in the rolled up gene expression dataframe are sorted by - (tissue_ontology_term_id, cell_type_ontology_term_id, , gene_ontology_term_id) - """ - tests = [ - { - "name": "no_compare_dim_all_tissues_have_all_cell_types", - "input_cell_counts": [ - ["UBERON:0000955", "CL:0000127", 300], - ["UBERON:0000955", "CL:0000644", 70], - ["UBERON:0000955", "CL:0002605", 80], - ["UBERON:0000955", "CL:0002627", 90], - ["UBERON:0002113", "CL:0000127", 300], - ["UBERON:0002113", "CL:0000644", 70], - ["UBERON:0002113", "CL:0002605", 80], - ["UBERON:0002113", "CL:0002627", 90], - ], - "expected_rolled_up_cell_counts": [ - ["UBERON:0000955", "CL:0000127", 540], - ["UBERON:0000955", "CL:0000644", 70], - ["UBERON:0000955", "CL:0002605", 80], - ["UBERON:0000955", "CL:0002627", 90], - ["UBERON:0002113", "CL:0000127", 540], - ["UBERON:0002113", "CL:0000644", 70], - ["UBERON:0002113", "CL:0002605", 80], - ["UBERON:0002113", "CL:0002627", 90], - ], - "input_gene_expression": [ - ["ENSG00000085265", "UBERON:0000955", "CL:0000644", 1, 1, 70, 1000], - ["ENSG00000085265", "UBERON:0000955", "CL:0002605", 1, 1, 80, 1000], - ["ENSG00000169429", "UBERON:0000955", "CL:0002627", 1, 1, 90, 1000], - ["ENSG00000085265", "UBERON:0002113", "CL:0000644", 1, 1, 70, 1000], - ["ENSG00000085265", "UBERON:0002113", "CL:0002605", 1, 1, 80, 1000], - ["ENSG00000169429", "UBERON:0002113", "CL:0002627", 1, 1, 90, 1000], - ], - "expected_rolled_up_gene_expression": [ - ["ENSG00000085265", "UBERON:0000955", "CL:0000127", 2, 2, 150, 1000], - ["ENSG00000169429", "UBERON:0000955", "CL:0000127", 1, 1, 90, 1000], - ["ENSG00000085265", "UBERON:0000955", "CL:0000644", 1, 1, 70, 1000], - ["ENSG00000085265", "UBERON:0000955", "CL:0002605", 1, 1, 80, 1000], - ["ENSG00000169429", "UBERON:0000955", "CL:0002627", 1, 1, 90, 1000], - ["ENSG00000085265", "UBERON:0002113", "CL:0000127", 2, 2, 150, 1000], - ["ENSG00000169429", "UBERON:0002113", "CL:0000127", 1, 1, 90, 1000], - ["ENSG00000085265", "UBERON:0002113", "CL:0000644", 1, 1, 70, 1000], - ["ENSG00000085265", "UBERON:0002113", "CL:0002605", 1, 1, 80, 1000], - ["ENSG00000169429", "UBERON:0002113", "CL:0002627", 1, 1, 90, 1000], - ], - "cell_counts_func": _cell_counts_df_without_compare_dim, - "gene_expression_func": _gene_expression_df_without_compare_dim, - }, - { - "name": "no_compare_dim_one_ancestor_cell_type_missing_in_one_tissue_but_exists_in_all_others", - # Tissue: "UBERON:0000955" MISSING cell type: "CL:0000127" in input cell counts - "input_cell_counts": [ - ["UBERON:0000955", "CL:0000644", 70], - ["UBERON:0000955", "CL:0002605", 80], - ["UBERON:0000955", "CL:0002627", 90], - ["UBERON:0002113", "CL:0000127", 300], - ["UBERON:0002113", "CL:0000644", 70], - ["UBERON:0002113", "CL:0002605", 80], - ["UBERON:0002113", "CL:0002627", 90], - ], - # cell count for cell type: "CL:0000127" in Tissue: "UBERON:0000955" GETS AGGREGATED because - # "CL:0000127" has non-zero cell count for at least one tissue in the input AND at least one - # descendant of "CL:0000127" has non-zero cell count for "UBERON:0000955" - "expected_rolled_up_cell_counts": [ - ["UBERON:0000955", "CL:0000127", 240], - ["UBERON:0000955", "CL:0000644", 70], - ["UBERON:0000955", "CL:0002605", 80], - ["UBERON:0000955", "CL:0002627", 90], - ["UBERON:0002113", "CL:0000127", 540], - ["UBERON:0002113", "CL:0000644", 70], - ["UBERON:0002113", "CL:0002605", 80], - ["UBERON:0002113", "CL:0002627", 90], - ], - "input_gene_expression": [ - ["ENSG00000085265", "UBERON:0000955", "CL:0000644", 1, 1, 70, 1000], - ["ENSG00000085265", "UBERON:0000955", "CL:0002605", 1, 1, 80, 1000], - ["ENSG00000169429", "UBERON:0000955", "CL:0002627", 1, 1, 90, 1000], - ["ENSG00000085265", "UBERON:0002113", "CL:0000644", 1, 1, 70, 1000], - ["ENSG00000085265", "UBERON:0002113", "CL:0002605", 1, 1, 80, 1000], - ["ENSG00000169429", "UBERON:0002113", "CL:0002627", 1, 1, 90, 1000], - ], - "expected_rolled_up_gene_expression": [ - ["ENSG00000085265", "UBERON:0000955", "CL:0000127", 2, 2, 150, 1000], - ["ENSG00000169429", "UBERON:0000955", "CL:0000127", 1, 1, 90, 1000], - ["ENSG00000085265", "UBERON:0000955", "CL:0000644", 1, 1, 70, 1000], - ["ENSG00000085265", "UBERON:0000955", "CL:0002605", 1, 1, 80, 1000], - ["ENSG00000169429", "UBERON:0000955", "CL:0002627", 1, 1, 90, 1000], - ["ENSG00000085265", "UBERON:0002113", "CL:0000127", 2, 2, 150, 1000], - ["ENSG00000169429", "UBERON:0002113", "CL:0000127", 1, 1, 90, 1000], - ["ENSG00000085265", "UBERON:0002113", "CL:0000644", 1, 1, 70, 1000], - ["ENSG00000085265", "UBERON:0002113", "CL:0002605", 1, 1, 80, 1000], - ["ENSG00000169429", "UBERON:0002113", "CL:0002627", 1, 1, 90, 1000], - ], - "cell_counts_func": _cell_counts_df_without_compare_dim, - "gene_expression_func": _gene_expression_df_without_compare_dim, - }, - { - "name": "no_compare_dim_gene_expressed_in_one_tissue_but_not_other", - "input_cell_counts": [ - ["UBERON:0000955", "CL:0000127", 300], - ["UBERON:0000955", "CL:0000644", 70], - ["UBERON:0000955", "CL:0002605", 80], - ["UBERON:0000955", "CL:0002627", 90], - ["UBERON:0002113", "CL:0000127", 300], - ["UBERON:0002113", "CL:0000644", 70], - ["UBERON:0002113", "CL:0002605", 80], - ["UBERON:0002113", "CL:0002627", 90], - ], - "expected_rolled_up_cell_counts": [ - ["UBERON:0000955", "CL:0000127", 540], - ["UBERON:0000955", "CL:0000644", 70], - ["UBERON:0000955", "CL:0002605", 80], - ["UBERON:0000955", "CL:0002627", 90], - ["UBERON:0002113", "CL:0000127", 540], - ["UBERON:0002113", "CL:0000644", 70], - ["UBERON:0002113", "CL:0002605", 80], - ["UBERON:0002113", "CL:0002627", 90], - ], - # Gene "ENSG00000169429" expressed in Tissue "UBERON:0000955" but not expressed - # in Tissue "UBERON:0002113" - "input_gene_expression": [ - ["ENSG00000085265", "UBERON:0000955", "CL:0000644", 1, 1, 70, 1000], - ["ENSG00000085265", "UBERON:0000955", "CL:0002605", 1, 1, 80, 1000], - ["ENSG00000169429", "UBERON:0000955", "CL:0002627", 1, 1, 90, 1000], - ["ENSG00000085265", "UBERON:0002113", "CL:0000644", 1, 1, 70, 1000], - ["ENSG00000085265", "UBERON:0002113", "CL:0002605", 1, 1, 80, 1000], - ], - "expected_rolled_up_gene_expression": [ - ["ENSG00000085265", "UBERON:0000955", "CL:0000127", 2, 2, 150, 1000], - ["ENSG00000169429", "UBERON:0000955", "CL:0000127", 1, 1, 90, 1000], - ["ENSG00000085265", "UBERON:0000955", "CL:0000644", 1, 1, 70, 1000], - ["ENSG00000085265", "UBERON:0000955", "CL:0002605", 1, 1, 80, 1000], - ["ENSG00000169429", "UBERON:0000955", "CL:0002627", 1, 1, 90, 1000], - ["ENSG00000085265", "UBERON:0002113", "CL:0000127", 2, 2, 150, 1000], - ["ENSG00000085265", "UBERON:0002113", "CL:0000644", 1, 1, 70, 1000], - ["ENSG00000085265", "UBERON:0002113", "CL:0002605", 1, 1, 80, 1000], - ], - "cell_counts_func": _cell_counts_df_without_compare_dim, - "gene_expression_func": _gene_expression_df_without_compare_dim, - }, - { - "name": "no_compare_dim_one_of_the_tissues_has_no_gene_expressions_at_all", - "input_cell_counts": [ - ["UBERON:0000955", "CL:0000127", 300], - ["UBERON:0000955", "CL:0000644", 70], - ["UBERON:0000955", "CL:0002605", 80], - ["UBERON:0000955", "CL:0002627", 90], - ["UBERON:0002113", "CL:0000127", 300], - ["UBERON:0002113", "CL:0000644", 70], - ["UBERON:0002113", "CL:0002605", 80], - ["UBERON:0002113", "CL:0002627", 90], - ], - "expected_rolled_up_cell_counts": [ - ["UBERON:0000955", "CL:0000127", 540], - ["UBERON:0000955", "CL:0000644", 70], - ["UBERON:0000955", "CL:0002605", 80], - ["UBERON:0000955", "CL:0002627", 90], - ["UBERON:0002113", "CL:0000127", 540], - ["UBERON:0002113", "CL:0000644", 70], - ["UBERON:0002113", "CL:0002605", 80], - ["UBERON:0002113", "CL:0002627", 90], - ], - # Tissue issue "UBERON:0002113" has no gene expressions - "input_gene_expression": [ - ["ENSG00000085265", "UBERON:0000955", "CL:0000644", 1, 1, 70, 1000], - ["ENSG00000085265", "UBERON:0000955", "CL:0002605", 1, 1, 80, 1000], - ["ENSG00000169429", "UBERON:0000955", "CL:0002627", 1, 1, 90, 1000], - ], - "expected_rolled_up_gene_expression": [ - ["ENSG00000085265", "UBERON:0000955", "CL:0000127", 2, 2, 150, 1000], - ["ENSG00000169429", "UBERON:0000955", "CL:0000127", 1, 1, 90, 1000], - ["ENSG00000085265", "UBERON:0000955", "CL:0000644", 1, 1, 70, 1000], - ["ENSG00000085265", "UBERON:0000955", "CL:0002605", 1, 1, 80, 1000], - ["ENSG00000169429", "UBERON:0000955", "CL:0002627", 1, 1, 90, 1000], - ], - "cell_counts_func": _cell_counts_df_without_compare_dim, - "gene_expression_func": _gene_expression_df_without_compare_dim, - }, - { - "name": "with_ethnicity_compare_dim_on_single_tissue", - "input_cell_counts": [ - ["UBERON:0000955", "CL:0000127", "unknown", 300], - ["UBERON:0000955", "CL:0000644", "unknown", 70], - ["UBERON:0000955", "CL:0002605", "HANCESTRO:0005", 10], - ["UBERON:0000955", "CL:0002605", "HANCESTRO:0008", 30], - ["UBERON:0000955", "CL:0002605", "multiethnic", 40], - ["UBERON:0000955", "CL:0002627", "HANCESTRO:0006", 10], - ["UBERON:0000955", "CL:0002627", "HANCESTRO:0008", 20], - ["UBERON:0000955", "CL:0002627", "multiethnic", 30], - ["UBERON:0000955", "CL:0002627", "unknown", 40], - ], - "expected_rolled_up_cell_counts": [ - ["UBERON:0000955", "CL:0000127", "HANCESTRO:0008", 50], - ["UBERON:0000955", "CL:0000127", "multiethnic", 70], - ["UBERON:0000955", "CL:0000127", "unknown", 410], - ["UBERON:0000955", "CL:0000644", "unknown", 70], - ["UBERON:0000955", "CL:0002605", "HANCESTRO:0005", 10], - ["UBERON:0000955", "CL:0002605", "HANCESTRO:0008", 30], - ["UBERON:0000955", "CL:0002605", "multiethnic", 40], - ["UBERON:0000955", "CL:0002627", "HANCESTRO:0006", 10], - ["UBERON:0000955", "CL:0002627", "HANCESTRO:0008", 20], - ["UBERON:0000955", "CL:0002627", "multiethnic", 30], - ["UBERON:0000955", "CL:0002627", "unknown", 40], - ], - "input_gene_expression": [ - ["ENSG00000085265", "UBERON:0000955", "CL:0000644", "unknown", 1, 1, 70, 1000], - ["ENSG00000085265", "UBERON:0000955", "CL:0002605", "HANCESTRO:0008", 1, 1, 30, 1000], - ["ENSG00000085265", "UBERON:0000955", "CL:0002605", "multiethnic", 1, 1, 40, 1000], - ["ENSG00000085265", "UBERON:0000955", "CL:0002627", "HANCESTRO:0008", 1, 1, 20, 1000], - ["ENSG00000169429", "UBERON:0000955", "CL:0002627", "multiethnic", 1, 1, 30, 1000], - ], - "expected_rolled_up_gene_expression": [ - ["ENSG00000085265", "UBERON:0000955", "CL:0000127", "HANCESTRO:0008", 2, 2, 50, 1000], - ["ENSG00000085265", "UBERON:0000955", "CL:0000127", "multiethnic", 1, 1, 40, 1000], - ["ENSG00000169429", "UBERON:0000955", "CL:0000127", "multiethnic", 1, 1, 30, 1000], - ["ENSG00000085265", "UBERON:0000955", "CL:0000127", "unknown", 1, 1, 70, 1000], - ["ENSG00000085265", "UBERON:0000955", "CL:0000644", "unknown", 1, 1, 70, 1000], - ["ENSG00000085265", "UBERON:0000955", "CL:0002605", "HANCESTRO:0008", 1, 1, 30, 1000], - ["ENSG00000085265", "UBERON:0000955", "CL:0002605", "multiethnic", 1, 1, 40, 1000], - ["ENSG00000085265", "UBERON:0000955", "CL:0002627", "HANCESTRO:0008", 1, 1, 20, 1000], - ["ENSG00000169429", "UBERON:0000955", "CL:0002627", "multiethnic", 1, 1, 30, 1000], - ], - "cell_counts_func": _cell_counts_df_with_ethnicity_compare_dim, - "gene_expression_func": _gene_expression_df_with_ethnicity_compare_dim, - }, - { - "name": "no_compare_dim_all_tissues_have_all_cell_types_except_root_cell_type", - "input_cell_counts": [ - ["UBERON:0000955", "CL:0000644", 70], - ["UBERON:0000955", "CL:0002605", 80], - ["UBERON:0000955", "CL:0002627", 90], - ["UBERON:0002113", "CL:0000644", 70], - ["UBERON:0002113", "CL:0002605", 80], - ["UBERON:0002113", "CL:0002627", 90], - ], - "expected_rolled_up_cell_counts": [ - ["UBERON:0000955", "CL:0000644", 70], - ["UBERON:0000955", "CL:0002605", 80], - ["UBERON:0000955", "CL:0002627", 90], - ["UBERON:0002113", "CL:0000644", 70], - ["UBERON:0002113", "CL:0002605", 80], - ["UBERON:0002113", "CL:0002627", 90], - ], - "input_gene_expression": [ - ["ENSG00000085265", "UBERON:0000955", "CL:0000644", 1, 1, 70, 1000], - ["ENSG00000085265", "UBERON:0000955", "CL:0002605", 1, 1, 80, 1000], - ["ENSG00000169429", "UBERON:0000955", "CL:0002627", 1, 1, 90, 1000], - ["ENSG00000085265", "UBERON:0002113", "CL:0000644", 1, 1, 70, 1000], - ["ENSG00000085265", "UBERON:0002113", "CL:0002605", 1, 1, 80, 1000], - ["ENSG00000169429", "UBERON:0002113", "CL:0002627", 1, 1, 90, 1000], - ], - "expected_rolled_up_gene_expression": [ - ["ENSG00000085265", "UBERON:0000955", "CL:0000644", 1, 1, 70, 1000], - ["ENSG00000085265", "UBERON:0000955", "CL:0002605", 1, 1, 80, 1000], - ["ENSG00000169429", "UBERON:0000955", "CL:0002627", 1, 1, 90, 1000], - ["ENSG00000085265", "UBERON:0002113", "CL:0000644", 1, 1, 70, 1000], - ["ENSG00000085265", "UBERON:0002113", "CL:0002605", 1, 1, 80, 1000], - ["ENSG00000169429", "UBERON:0002113", "CL:0002627", 1, 1, 90, 1000], - ], - "cell_counts_func": _cell_counts_df_without_compare_dim, - "gene_expression_func": _gene_expression_df_without_compare_dim, - }, - ] - - return [ - ( - test["name"], - test["cell_counts_func"](test["input_cell_counts"]), - test["cell_counts_func"](test["expected_rolled_up_cell_counts"]), - test["gene_expression_func"](test["input_gene_expression"]), - test["gene_expression_func"](test["expected_rolled_up_gene_expression"]), - ) - for test in tests - ] - - @parameterized.expand(_rollup_testcases) - def test__rollup(self, _, input_cell_counts_df, expected_cell_counts_df, input_gene_expr_df, expected_gene_expr_df): - # Arrange - cell_counts_df_index_list = list(input_cell_counts_df.index.names) - - # Act - - # Note that we are creating copies of the input dataframes before passing them as - # arguments to the `rollup` function so that if the `rollup` function mutates the - # argument values, the input to the test is not affected. - rolled_up_gene_expr_df, rolled_up_cell_counts_df = rollup( - input_gene_expr_df.copy(), input_cell_counts_df.copy() - ) + # Act - # Assert - rolled_up_cell_counts_df.reset_index(inplace=True) - expected_cell_counts_df.reset_index(inplace=True) + # Note that we are creating copies of the input dataframes before passing them as + # arguments to the `rollup` function so that if the `rollup` function mutates the + # argument values, the input to the test is not affected. + rolled_up_gene_expr_df, rolled_up_cell_counts_df = rollup(input_gene_expr_df.copy(), input_cell_counts_df.copy()) - assert_frame_equal( - rolled_up_cell_counts_df.reset_index(drop=True), - expected_cell_counts_df.reset_index(drop=True), - check_dtype=False, - ) + # Assert + rolled_up_cell_counts_df.reset_index(inplace=True) + expected_cell_counts_df.reset_index(inplace=True) - # sort the rolled up gene expression dataframe so that the correct rows are compared with - # the expected gene expression rows in the assert call - sort_columns_for_rolled_gene_expr_df = list(cell_counts_df_index_list) + ["gene_ontology_term_id"] - rolled_up_gene_expr_df.sort_values(sort_columns_for_rolled_gene_expr_df, inplace=True) + assert_frame_equal( + rolled_up_cell_counts_df.reset_index(drop=True), + expected_cell_counts_df.reset_index(drop=True), + check_dtype=False, + ) - assert_frame_equal( - rolled_up_gene_expr_df.reset_index(drop=True), - expected_gene_expr_df.reset_index(drop=True), - check_dtype=False, - ) + # sort the rolled up gene expression dataframe so that the correct rows are compared with + # the expected gene expression rows in the assert call + sort_columns_for_rolled_gene_expr_df = list(cell_counts_df_index_list) + ["gene_ontology_term_id"] + rolled_up_gene_expr_df.sort_values(sort_columns_for_rolled_gene_expr_df, inplace=True) + + assert_frame_equal( + rolled_up_gene_expr_df.reset_index(drop=True), + expected_gene_expr_df.reset_index(drop=True), + check_dtype=False, + ) From e52f53c7c4f2e4c1e694cccbbeb61825015bf96d Mon Sep 17 00:00:00 2001 From: Severiano Badajoz Date: Mon, 18 Dec 2023 12:10:30 -0800 Subject: [PATCH 4/6] feat(census-models): clobber and differentiate projects with same title, add r embedding for maintained, analytics (#6360) Co-authored-by: Sidney Bell Co-authored-by: pablo-gar --- frontend/census-projects.json | 71 +++++++------- frontend/src/common/analytics/events.ts | 1 + .../Header/components/Nav/index.tsx | 74 +-------------- frontend/src/types/census-projects.d.ts | 2 +- .../components/EmbeddingButton/connect.ts | 74 +++++++++++---- .../components/EmbeddingButton/index.tsx | 27 +++--- .../components/EmbeddingButton/style.ts | 0 .../components/EmbeddingButton/types.ts | 7 ++ .../components/ModelButton/index.tsx | 20 ++-- .../Project/ProjectButtons/index.tsx | 83 ++++++++++++++++ .../Project/ProjectButtons/style.tsx | 26 +++++ .../components/EmbeddingButton/types.ts | 6 -- .../components/Project/connect.ts | 66 ++++++++++--- .../components/Project/index.tsx | 94 +++++++++---------- .../components/Project/types.ts | 10 +- frontend/src/views/CensusDirectory/index.tsx | 57 +++++++---- frontend/src/views/CensusDirectory/style.ts | 16 +--- frontend/src/views/CensusDirectory/utils.ts | 69 +++++++++++++- 18 files changed, 450 insertions(+), 253 deletions(-) rename frontend/src/views/CensusDirectory/components/Project/{ => ProjectButtons}/components/EmbeddingButton/connect.ts (56%) rename frontend/src/views/CensusDirectory/components/Project/{ => ProjectButtons}/components/EmbeddingButton/index.tsx (84%) rename frontend/src/views/CensusDirectory/components/Project/{ => ProjectButtons}/components/EmbeddingButton/style.ts (100%) create mode 100644 frontend/src/views/CensusDirectory/components/Project/ProjectButtons/components/EmbeddingButton/types.ts rename frontend/src/views/CensusDirectory/components/Project/{ => ProjectButtons}/components/ModelButton/index.tsx (72%) create mode 100644 frontend/src/views/CensusDirectory/components/Project/ProjectButtons/index.tsx create mode 100644 frontend/src/views/CensusDirectory/components/Project/ProjectButtons/style.tsx delete mode 100644 frontend/src/views/CensusDirectory/components/Project/components/EmbeddingButton/types.ts diff --git a/frontend/census-projects.json b/frontend/census-projects.json index edfdc6a6b5a48..3d7a04d0a5e6d 100644 --- a/frontend/census-projects.json +++ b/frontend/census-projects.json @@ -1,37 +1,4 @@ [ - { - "tier": "maintained", - "title": "Geneformer embeddings fine-tuned for CELLxGENE Census cell subclass classification", - "description": "Geneformer is a foundation transformer model pretrained on a large-scale corpus of ~30 million single cell transcriptomes to enable context-aware predictions in settings with limited data in network biology.\nThese cell embeddings are derived from a Geneformer model CZI fine-tuned for cell subclass classification. As the fine-tuning procedure remains experimental and wasn’t performed by the Geneformer authors, these embeddings should not be used to assess performance of the pre-trained Geneformer model.", - "primary_contact": { - "name": "CELLxGENE Discover Team", - "email": "soma@chanzuckerberg.com", - "affiliation": "CZI" - }, - "DOI": "10.1038/s41586-023-06139-9", - "publication_info": "", - "publication_link": "", - "project_page": "", - "additional_information": "Beginning with the geneformer-12L-30M pretrained model published by Theodoris et al. (huggingface.co/ctheodoris/Geneformer), a BertForSequenceClassification model was trained to predict cell subclass (as annotated in CELLxGENE Discover see https://cellxgene.cziscience.com/collections). Embeddings were then generated using Geneformer’s EmbExtractor module with emb_layer=0.\nFor full details and a reproducible workflow please see: https://github.com/chanzuckerberg/cellxgene-census/blob/main/tools/models/geneformer/README.md", - "model_link": "s3://cellxgene-contrib-public/models/geneformer/2023-12-15/homo_sapiens/fined-tuned-model/", - "data_type": "obs_embedding", - "obsm_layer": "geneformer", - "census_version": "2023-12-15", - "experiment_name": "homo_sapiens", - "measurement_name": "RNA", - "n_cells": 62998417, - "n_columns": 512, - "n_features": 512, - "notebook_links": [ - [ - "Using trained model", - "https://chanzuckerberg.github.io/cellxgene-census/notebooks/analysis_demo/comp_bio_geneformer_prediction.html" - ] - ], - "submission_date": "2023-11-06", - "last_updated": null, - "revised_by": null - }, { "tier": "maintained", "title": "scVI integrated-embeddings with explicit modeling of batch effects", @@ -130,6 +97,40 @@ "last_updated": null, "revised_by": null }, + { + "tier": "maintained", + "title": "Geneformer embeddings fine-tuned for CELLxGENE Census cell subclass classification", + "description": "Geneformer is a foundation transformer model pretrained on a large-scale corpus of ~30 million single cell transcriptomes to enable context-aware predictions in settings with limited data in network biology.\nThese cell embeddings are derived from a Geneformer model CZI fine-tuned for cell subclass classification. As the fine-tuning procedure remains experimental and wasn’t performed by the Geneformer authors, these embeddings should not be used to assess performance of the pre-trained Geneformer model.", + "primary_contact": { + "name": "CELLxGENE Discover Team", + "email": "soma@chanzuckerberg.com", + "affiliation": "CZI" + }, + "DOI": "10.1038/s41586-023-06139-9", + "publication_info": "", + "publication_link": "", + "project_page": "", + "additional_information": "Beginning with the geneformer-12L-30M pretrained model published by Theodoris et al. (huggingface.co/ctheodoris/Geneformer), a BertForSequenceClassification model was trained to predict cell subclass (as annotated in CELLxGENE Discover see https://cellxgene.cziscience.com/collections). Embeddings were then generated using Geneformer’s EmbExtractor module with emb_layer=0.\nFor full details and a reproducible workflow please see: https://github.com/chanzuckerberg/cellxgene-census/blob/main/tools/models/geneformer/README.md", + "model_link": "s3://cellxgene-contrib-public/models/geneformer/2023-12-15/homo_sapiens/fined-tuned-model/", + "data_type": "obs_embedding", + "obsm_layer": "geneformer", + "census_version": "2023-12-15", + "experiment_name": "homo_sapiens", + "measurement_name": "RNA", + "n_cells": 62998417, + "n_columns": 512, + "n_features": 512, + "notebook_links": [ + [ + "Using trained model", + "https://chanzuckerberg.github.io/cellxgene-census/notebooks/analysis_demo/comp_bio_geneformer_prediction.html" + ] + ], + "submission_date": "2023-11-06", + "last_updated": null, + "revised_by": null + }, + { "tier": "community", "title": "PINNACLE: Contextual AI Model for Single-Cell Protein Biology", @@ -213,12 +214,12 @@ "additional_contacts": [ { "name": "Jialong Jiang", - "email": "jiangjl@caltech.edu" , + "email": "jiangjl@caltech.edu", "affiliation": "Thomson Lab, Caltech" }, { "name": "Yingying Gong", - "email": "ygong@caltech.edu" , + "email": "ygong@caltech.edu", "affiliation": "Thomson Lab, Caltech" } ], diff --git a/frontend/src/common/analytics/events.ts b/frontend/src/common/analytics/events.ts index 3caf14324404c..cc5824ed36bcb 100644 --- a/frontend/src/common/analytics/events.ts +++ b/frontend/src/common/analytics/events.ts @@ -60,6 +60,7 @@ export enum EVENTS { CENSUS_EMBEDDING_COPIED = "CENSUS_EMBEDDING_COPIED", CENSUS_PROJECT_LINK_CLICKED = "CENSUS_PROJECT_LINK_CLICKED", CENSUS_EMBEDDING_NOTEBOOK_CLICKED = "CENSUS_EMBEDDING_NOTEBOOK_CLICKED", + CENSUS_MODELS_TUTORIALS_CLICKED = "CENSUS_MODELS_TUTORIALS_CLICKED", DATASETS_CLICK_NAV = "DATASETS_CLICK_NAV", COLLECTIONS_CLICK_NAV = "COLLECTIONS_CLICK_NAV", DOCUMENTATION_CLICK_NAV = "DOCUMENTATION_CLICK_NAV", diff --git a/frontend/src/components/Header/components/Nav/index.tsx b/frontend/src/components/Header/components/Nav/index.tsx index 04cf29f1b74eb..a073d03275976 100644 --- a/frontend/src/components/Header/components/Nav/index.tsx +++ b/frontend/src/components/Header/components/Nav/index.tsx @@ -18,79 +18,7 @@ import { CENSUS_LINK } from "./constants"; import { Props } from "./types"; export default function Nav({ className, pathname }: Props): JSX.Element { - const isCensusDirectory = isRouteActive(pathname, ROUTES.CENSUS_DIRECTORY); - - return !isCensusDirectory ? ( - - - - { - track(EVENTS.COLLECTIONS_CLICK_NAV); - }} - text="Collections" - /> - - - - - { - track(EVENTS.DATASETS_CLICK_NAV); - }} - text="Datasets" - /> - - - - - { - track(EVENTS.WMG_CLICK_NAV); - }} - text="Gene Expression" - /> - - - - - { - track(EVENTS.CELL_GUIDE_CLICK_NAV); - }} - text="Cell Guide" - /> - - - - - - { - track(EVENTS.CENSUS_DOCUMENTATION_CLICK_NAV); - }} - rel="noopener" - target="_self" - text="Census" - /> - - - ) : ( + return ( <> diff --git a/frontend/src/types/census-projects.d.ts b/frontend/src/types/census-projects.d.ts index f905e0e95a467..236fc7b71d099 100644 --- a/frontend/src/types/census-projects.d.ts +++ b/frontend/src/types/census-projects.d.ts @@ -3,7 +3,7 @@ declare module "census-projects.json" { extends Partial { notebook_links?: [string, string][]; tier: "community" | "maintained"; - obs_matrix: string; + obsm_layer: string; project_page: string; } const content: StaticProject[]; diff --git a/frontend/src/views/CensusDirectory/components/Project/components/EmbeddingButton/connect.ts b/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/components/EmbeddingButton/connect.ts similarity index 56% rename from frontend/src/views/CensusDirectory/components/Project/components/EmbeddingButton/connect.ts rename to frontend/src/views/CensusDirectory/components/Project/ProjectButtons/components/EmbeddingButton/connect.ts index 57777acae45e7..726a426f1c18a 100644 --- a/frontend/src/views/CensusDirectory/components/Project/components/EmbeddingButton/connect.ts +++ b/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/components/EmbeddingButton/connect.ts @@ -1,10 +1,9 @@ -import { useCallback, useState } from "react"; +import { useCallback, useEffect, useState } from "react"; import { track } from "src/common/analytics"; import { EVENTS } from "src/common/analytics/events"; import { EmbeddingButtonProps } from "./types"; -import { Project } from "src/common/queries/censusDirectory"; -import { StaticProject } from "census-projects.json"; import { getProjectTier } from "src/views/CensusDirectory/utils"; +import { UnionProject } from "../../../types"; // The div contains two lines of the word copy const NUMBER_OF_EXTRA_LINES = 2; @@ -13,13 +12,18 @@ const NUMBER_OF_PADDING_LINES = 1; // Total amount of padding around the highlighted line const LINE_HIGHLIGHT_BACKGROUND_PADDING = 8; -function pythonCodeSnippet(project: StaticProject | Project): string { +const MAINTAINED_PYTHON_NOTEBOOK_LINK = + "https://chanzuckerberg.github.io/cellxgene-census/notebooks/api_demo/census_access_maintained_embeddings.html"; +const MAINTAINED_R_NOTEBOOK_LINK = + "https://chanzuckerberg.github.io/cellxgene-census/r/articles/census_access_maintained_embeddings.html"; +const HOSTED_PYTHON_NOTEBOOK_LINK = + "https://chanzuckerberg.github.io/cellxgene-census/notebooks/api_demo/census_embedding.html"; + +function pythonCodeSnippet(project: UnionProject, uri: string): string { const censusVersion = project.census_version; const organism = project.experiment_name; const measurement = project.measurement_name; - const uri = `"s3://cellxgene-contrib-archive/contrib/cell-census/${project.id}"`; - return project.tier === "maintained" ? ` import cellxgene_census @@ -28,25 +32,46 @@ function pythonCodeSnippet(project: StaticProject | Project): string { census, organism = "${organism}", measurement_name = "${measurement}", - obs_value_filter = "tissue == 'tongue'", - obsm_layers = "${project.obs_matrix}" + obs_value_filter = "tissue_general == 'central nervous system'", + obsm_layers = ["${project.obsm_layer}"] )` : ` import cellxgene_census from cellxgene_census.experimental import get_embedding - embedding_uri = ${uri} + embedding_uri = \\ + "${uri}" census = cellxgene_census.open_soma(census_version="${censusVersion}") adata = cellxgene_census.get_anndata( census, organism = "${organism}", measurement_name = "${measurement}", - obs_value_filter = "tissue == 'tongue'", + obs_value_filter = "tissue_general == 'central nervous system'", ) - embeddings = get_embedding("${censusVersion}", embedding_uri, adata.obs["soma_joinid"]) + embeddings = get_embedding("${censusVersion}", embedding_uri, adata.obs["soma_joinid"]).to_numpy()) adata.obsm["emb"] = embeddings`; } +function rCodeSnippet(project: UnionProject): string { + const censusVersion = project.census_version; + const organism = project.experiment_name; + + return project.tier === "maintained" + ? ` library("cellxgene.census") + library("Seurat") + + census <- open_soma(census_version = "${censusVersion}") + seurat_obj <- get_seurat( + census, + organism = "${organism}", + obs_value_filter = "tissue_general == 'central nervous system'", + obs_column_names = c("cell_type"), + obsm_layers = c("${project.obsm_layer}") + ) + ` + : ""; +} + export const useConnect = ({ project }: EmbeddingButtonProps) => { const [isOpen, setIsOpen] = useState(false); const [isCopied, setIsCopied] = useState(false); @@ -65,10 +90,12 @@ export const useConnect = ({ project }: EmbeddingButtonProps) => { setIsOpen(!isOpen); }, [isOpen, projectTier, project.title]); - const codeSnippet = language === "python" ? pythonCodeSnippet(project) : ""; + const uri = `s3://cellxgene-contrib-public/contrib/cell-census/soma/${project.census_version}/${project.id}`; - // These can be derived from the static S3 namespace + the accessor_id or will be a static url provided in json blob - const uri = `s3://cellxgene-contrib-archive/contrib/cell-census/${project.id}`; + const codeSnippet = + language === "python" + ? pythonCodeSnippet(project, uri) + : rCodeSnippet(project); const codeSnippetRef = useCallback( (node: HTMLDivElement) => { @@ -83,8 +110,7 @@ export const useConnect = ({ project }: EmbeddingButtonProps) => { const lineIndex = lines.findIndex((line: string) => line.includes(uri)); setURITopPosition( - newLineHeight * (lineIndex + 1) + - NUMBER_OF_PADDING_LINES + + newLineHeight * (lineIndex + 1 + NUMBER_OF_PADDING_LINES) + LINE_HIGHLIGHT_BACKGROUND_PADDING / 2 ); setLineHeight(newLineHeight + LINE_HIGHLIGHT_BACKGROUND_PADDING); @@ -93,6 +119,21 @@ export const useConnect = ({ project }: EmbeddingButtonProps) => { [uri] ); + const [notebookLink, setNotebookLink] = useState(""); + useEffect(() => { + if (projectTier === "maintained") { + if (language === "python") { + setNotebookLink(MAINTAINED_PYTHON_NOTEBOOK_LINK); + } else { + setNotebookLink(MAINTAINED_R_NOTEBOOK_LINK); + } + } else { + if (language === "python") { + setNotebookLink(HOSTED_PYTHON_NOTEBOOK_LINK); + } + } + }, [language, projectTier]); + const handleCopyMouseEnter = () => setIsCopied(false); return { @@ -104,6 +145,7 @@ export const useConnect = ({ project }: EmbeddingButtonProps) => { uri, uriTopPosition, lineHeight, + notebookLink, codeSnippetRef, setLanguage, handleButtonClick, diff --git a/frontend/src/views/CensusDirectory/components/Project/components/EmbeddingButton/index.tsx b/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/components/EmbeddingButton/index.tsx similarity index 84% rename from frontend/src/views/CensusDirectory/components/Project/components/EmbeddingButton/index.tsx rename to frontend/src/views/CensusDirectory/components/Project/ProjectButtons/components/EmbeddingButton/index.tsx index b2c9232166339..4b0a9e251a65f 100644 --- a/frontend/src/views/CensusDirectory/components/Project/components/EmbeddingButton/index.tsx +++ b/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/components/EmbeddingButton/index.tsx @@ -8,18 +8,18 @@ import { track } from "src/common/analytics"; import { EVENTS } from "src/common/analytics/events"; import Highlight from "react-highlight"; import { RadioGroup } from "@mui/material"; -import Link from "next/link"; import { StyledDialogContent, Label, CodeSnippet, Break } from "./style"; -import { StyledButton } from "src/components/CreateCollectionModal/style"; +import { StyledButton } from "../../style"; function EmbeddingButton(props: EmbeddingButtonProps) { - const { project } = props; + const { project, uniqueMetadata } = props; const { isOpen, language, codeSnippet, projectTier, uri, + notebookLink, codeSnippetRef, uriTopPosition, lineHeight, @@ -49,7 +49,11 @@ function EmbeddingButton(props: EmbeddingButtonProps) { row > - +

@@ -67,6 +71,7 @@ function EmbeddingButton(props: EmbeddingButtonProps) { project: project.title, category: projectTier, version: language, + ...uniqueMetadata, }) } /> @@ -79,6 +84,7 @@ function EmbeddingButton(props: EmbeddingButtonProps) { project: project.title, category: projectTier, version: "URI", + ...uniqueMetadata, }) } /> @@ -93,22 +99,19 @@ function EmbeddingButton(props: EmbeddingButtonProps) { { "If you'd like to see more advanced access patterns, explore this " } - track(EVENTS.CENSUS_EMBEDDING_NOTEBOOK_CLICKED, { project: project.title, category: projectTier, version: language, + ...uniqueMetadata, }) } > - Jupyter Notebook - + {language === "python" ? "Jupyter" : "R"} Notebook + !
diff --git a/frontend/src/views/CensusDirectory/components/Project/components/EmbeddingButton/style.ts b/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/components/EmbeddingButton/style.ts similarity index 100% rename from frontend/src/views/CensusDirectory/components/Project/components/EmbeddingButton/style.ts rename to frontend/src/views/CensusDirectory/components/Project/ProjectButtons/components/EmbeddingButton/style.ts diff --git a/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/components/EmbeddingButton/types.ts b/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/components/EmbeddingButton/types.ts new file mode 100644 index 0000000000000..c212dad7715f8 --- /dev/null +++ b/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/components/EmbeddingButton/types.ts @@ -0,0 +1,7 @@ +import { ClobberedProjects } from "src/views/CensusDirectory/utils"; +import { UnionProject } from "../../../types"; + +export interface EmbeddingButtonProps { + project: UnionProject; + uniqueMetadata?: ClobberedProjects[number][0]; +} diff --git a/frontend/src/views/CensusDirectory/components/Project/components/ModelButton/index.tsx b/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/components/ModelButton/index.tsx similarity index 72% rename from frontend/src/views/CensusDirectory/components/Project/components/ModelButton/index.tsx rename to frontend/src/views/CensusDirectory/components/Project/ProjectButtons/components/ModelButton/index.tsx index daae24aa01da6..f8f2a5a6e20ce 100644 --- a/frontend/src/views/CensusDirectory/components/Project/components/ModelButton/index.tsx +++ b/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/components/ModelButton/index.tsx @@ -1,12 +1,17 @@ -import { StaticProject } from "census-projects.json"; -import Link from "next/link"; import { track } from "src/common/analytics"; import { EVENTS } from "src/common/analytics/events"; -import { Project } from "src/common/queries/censusDirectory"; -import { StyledButton } from "src/views/CensusDirectory/style"; import Toast from "src/views/Collection/components/Toast"; +import { UnionProject } from "../../../types"; +import { StyledButton } from "../../style"; +import { ClobberedProjects } from "src/views/CensusDirectory/utils"; -const ModelButton = ({ project }: { project: StaticProject | Project }) => { +const ModelButton = ({ + project, + uniqueMetadata, +}: { + project: UnionProject; + uniqueMetadata?: ClobberedProjects[number][0]; +}) => { if (!project.model_link) return null; return project.model_link.startsWith("s3") ? ( { track(EVENTS.CENSUS_MODEL_COPIED, { project: project.title, category: project.tier, + ...uniqueMetadata, }); // copy URI to clipboard navigator.clipboard.writeText(project.model_link || ""); @@ -29,7 +35,7 @@ const ModelButton = ({ project }: { project: StaticProject | Project }) => { Copy Model URI ) : ( - + { > Model Page - + ); }; diff --git a/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/index.tsx b/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/index.tsx new file mode 100644 index 0000000000000..7b59042895fe4 --- /dev/null +++ b/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/index.tsx @@ -0,0 +1,83 @@ +import { track } from "src/common/analytics"; +import { EVENTS } from "src/common/analytics/events"; +import { ClobberedProjects } from "src/views/CensusDirectory/utils"; +import ModelButton from "./components/ModelButton"; +import EmbeddingButton from "./components/EmbeddingButton"; +import { ButtonsColumn, ButtonsRow, StyledButton } from "./style"; +import DetailItem from "../../DetailItem"; + +const IGNORE_DIFFERENT_METADATA_KEYS = ["model_link", "id"]; +const ATTRIBUTE_TO_LABEL: Record = { + experiment_name: "Organism", + n_cells: "Cells", +}; + +const ProjectButtons = ({ + clobberedProjects, +}: { + clobberedProjects: ClobberedProjects[number]; +}) => { + const sharedProject = clobberedProjects[0]; + const projects = clobberedProjects[1]; + if (projects.length === 1) { + return ( + + {"project_page" in sharedProject && !!sharedProject.project_page && ( + + { + track(EVENTS.CENSUS_PROJECT_LINK_CLICKED, { + project: sharedProject.title, + category: sharedProject.tier, + }); + }} + > + Project Page + + + )} + + + + ); + } + + return ( + + {projects.map((project) => { + const uniqueMetadata = Object.fromEntries( + Object.entries(project).filter(([key, value]) => { + return !(key in sharedProject) && value; + }) + ); + + return ( + + {Object.entries(uniqueMetadata) + .filter(([key]) => !IGNORE_DIFFERENT_METADATA_KEYS.includes(key)) + .map(([key, value]) => { + return ( + + {value} + + ); + })} + + + + ); + })} + + ); +}; + +export default ProjectButtons; diff --git a/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/style.tsx b/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/style.tsx new file mode 100644 index 0000000000000..805dc1d8df4a7 --- /dev/null +++ b/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/style.tsx @@ -0,0 +1,26 @@ +import { Button } from "@czi-sds/components"; +import styled from "@emotion/styled"; +import { spacesDefault, spacesXl } from "src/common/theme"; + +export const ButtonsColumn = styled.div` + display: flex; + flex-direction: column; + justify-content: center; + align-items: center; +`; + +export const ButtonsRow = styled.div` + display: flex; + flex-direction: row; + gap: ${spacesDefault}px; + margin-bottom: ${spacesXl}px; + + & > div { + margin-right: ${spacesXl}px; + } +`; + +export const StyledButton = styled(Button)` + font-weight: 500; + min-width: 80px; +`; diff --git a/frontend/src/views/CensusDirectory/components/Project/components/EmbeddingButton/types.ts b/frontend/src/views/CensusDirectory/components/Project/components/EmbeddingButton/types.ts deleted file mode 100644 index b3a3abc3dfaa9..0000000000000 --- a/frontend/src/views/CensusDirectory/components/Project/components/EmbeddingButton/types.ts +++ /dev/null @@ -1,6 +0,0 @@ -import { type StaticProject } from "census-projects.json"; -import { type Project } from "src/common/queries/censusDirectory"; - -export interface EmbeddingButtonProps { - project: StaticProject | Project; -} diff --git a/frontend/src/views/CensusDirectory/components/Project/connect.ts b/frontend/src/views/CensusDirectory/components/Project/connect.ts index 65f104a1bd3e5..709c054d3331b 100644 --- a/frontend/src/views/CensusDirectory/components/Project/connect.ts +++ b/frontend/src/views/CensusDirectory/components/Project/connect.ts @@ -1,26 +1,58 @@ -import { getProjectTier } from "../../utils"; +/* eslint-disable sonarjs/cognitive-complexity */ import { ProjectProps } from "./types"; import notebookLinks from "census-notebook-links.json"; -export const useConnect = ({ project, id }: ProjectProps) => { - const date = new Date( - project.last_updated || project.submission_date || "" - ).toLocaleDateString("en-US", { +const DEFAULT_EPOCH_TIME = 0; + +export const useConnect = ({ clobberedProjects }: ProjectProps) => { + const sharedProject = clobberedProjects[0]; + + let date = new Date( + sharedProject.last_updated || + sharedProject.submission_date || + DEFAULT_EPOCH_TIME + ); + + if (date.getDate() === DEFAULT_EPOCH_TIME) { + clobberedProjects[1].forEach((project) => { + const projectDate = new Date( + project.last_updated || project.submission_date || DEFAULT_EPOCH_TIME + ); + // check if the project date is more recent than the current date + if (projectDate > date) { + date = projectDate; + } + }); + } + + const formattedDate = date.toLocaleDateString("en-US", { dateStyle: "long", }); - const projectNotebookLinks: [string, string][] | undefined = - "notebook_links" in project - ? project.notebook_links - : notebookLinks[id ?? ""]; + const projectNotebookLinks: [string, string][] = + ("notebook_links" in sharedProject + ? sharedProject.notebook_links + : notebookLinks[sharedProject.id ?? ""]) ?? []; + + if (projectNotebookLinks.length === 0) { + clobberedProjects[1].forEach((project) => { + projectNotebookLinks?.push( + ...(("notebook_links" in project + ? project.notebook_links + : notebookLinks[project.id ?? ""]) ?? []) + ); + }); + } - const projectTier = getProjectTier(project); + const projectTier = sharedProject.tier; let authorsString = ""; - const primary_affiliation = project.primary_contact?.affiliation || ""; + const primary_affiliation = sharedProject.primary_contact?.affiliation || ""; const affiliations = new Map(); - affiliations.set(primary_affiliation, [project.primary_contact?.name || ""]); - project.additional_contacts?.forEach((contact) => { + affiliations.set(primary_affiliation, [ + sharedProject.primary_contact?.name || "", + ]); + sharedProject.additional_contacts?.forEach((contact) => { const affiliationNames = affiliations.get(contact.affiliation) || []; affiliationNames.push(contact.name); affiliations.set(contact.affiliation, affiliationNames); @@ -37,5 +69,11 @@ export const useConnect = ({ project, id }: ProjectProps) => { } }); - return { date, projectNotebookLinks, projectTier, authorsString }; + return { + date: formattedDate, + projectNotebookLinks, + projectTier, + authorsString, + sharedProject, + }; }; diff --git a/frontend/src/views/CensusDirectory/components/Project/index.tsx b/frontend/src/views/CensusDirectory/components/Project/index.tsx index 0bf0db1d00441..ad32134498735 100644 --- a/frontend/src/views/CensusDirectory/components/Project/index.tsx +++ b/frontend/src/views/CensusDirectory/components/Project/index.tsx @@ -1,4 +1,3 @@ -import Link from "next/link"; import { type Project } from "src/common/queries/censusDirectory"; import { ProjectContainer, @@ -7,69 +6,79 @@ import { ProjectSubmitter, ProjectDescription, DetailsContainer, - ProjectButtons, - StyledButton, } from "../../style"; import DetailItem from "../DetailItem"; -import EmbeddingButton from "./components/EmbeddingButton"; - import { ProjectProps } from "./types"; import { useConnect } from "./connect"; import { track } from "src/common/analytics"; import { EVENTS } from "src/common/analytics/events"; -import ModelButton from "./components/ModelButton"; +import ProjectButtons from "./ProjectButtons"; -const Project = ({ project, id }: ProjectProps) => { - const { date, projectNotebookLinks, projectTier, authorsString } = useConnect( - { - project, - id, - } - ); +const DATA_TYPE_TO_EMBEDDING: { [key: string]: string } = { + obs_embedding: "obs", + var_embedding: "var", +}; + +const Project = ({ clobberedProjects }: ProjectProps) => { + const { + date, + projectNotebookLinks, + projectTier, + authorsString, + sharedProject, + } = useConnect({ + clobberedProjects, + }); return ( - + - {project.title} + {sharedProject.title} {authorsString} - {project.description} + {sharedProject.description} { track(EVENTS.CENSUS_CONTACT_CLICKED, { - project: project.title, - contact: project.primary_contact?.name, + project: sharedProject.title, + contact: sharedProject.primary_contact?.name, }); }} > - {project.primary_contact?.name} + {sharedProject.primary_contact?.name} { track(EVENTS.CENSUS_PUBLICATION_CLICKED, { - publication: project.publication_info, - project: project.title, + publication: sharedProject.publication_info, + project: sharedProject.title, }); }} > - {project.publication_info} + {sharedProject.publication_info} {date} - {project.census_version} + {sharedProject.census_version} + + + {sharedProject.experiment_name} - {project.experiment_name} - {project.measurement_name} + {sharedProject.measurement_name} + + + {sharedProject.data_type + ? DATA_TYPE_TO_EMBEDDING[sharedProject.data_type] + : undefined} - {project.data_type} {projectNotebookLinks?.map((link) => ( { key={link[1]} onClick={() => { track(EVENTS.CENSUS_NOTEBOOK_CLICKED, { - project: project.title, + project: sharedProject.title, category: projectTier, notebook: link[0], }); @@ -86,31 +95,12 @@ const Project = ({ project, id }: ProjectProps) => { {link[0]} ))} - {project.n_cells} - {project.n_genes} - {project.n_columns} + {sharedProject.n_cells} + {sharedProject.n_genes} + {sharedProject.n_columns} - - {"project_page" in project && !!project.project_page && ( - - { - track(EVENTS.CENSUS_PROJECT_LINK_CLICKED, { - project: project.title, - category: projectTier, - }); - }} - > - Project Page - - - )} - - - + ); }; diff --git a/frontend/src/views/CensusDirectory/components/Project/types.ts b/frontend/src/views/CensusDirectory/components/Project/types.ts index 42573943d9425..72db90fd4f71e 100644 --- a/frontend/src/views/CensusDirectory/components/Project/types.ts +++ b/frontend/src/views/CensusDirectory/components/Project/types.ts @@ -1,10 +1,8 @@ import { type StaticProject } from "census-projects.json"; -import { - type Project, - type ProjectResponse, -} from "src/common/queries/censusDirectory"; +import { type Project } from "src/common/queries/censusDirectory"; +import { type ClobberedProjects } from "../../utils"; +export type UnionProject = StaticProject | Project; export interface ProjectProps { - project: StaticProject | Project; - id?: keyof ProjectResponse; + clobberedProjects: ClobberedProjects[number]; } diff --git a/frontend/src/views/CensusDirectory/index.tsx b/frontend/src/views/CensusDirectory/index.tsx index 7f6532842da7d..480726bdd9c81 100644 --- a/frontend/src/views/CensusDirectory/index.tsx +++ b/frontend/src/views/CensusDirectory/index.tsx @@ -15,19 +15,28 @@ import { TierDescription, } from "./style"; import Project from "./components/Project"; +import { clobberAndDifferentiateProjectMetadata } from "./utils"; +import { track } from "src/common/analytics"; +import { EVENTS } from "src/common/analytics/events"; function CensusDirectory() { const { data: projects } = useProjects(); - const hostedProjects = Object.entries( - projects ?? ({} as ProjectType[]) - ).filter(([_, project]) => !project.revised_by); + const hostedProjects = clobberAndDifferentiateProjectMetadata( + Object.values(projects ?? ({} as ProjectType[])).filter( + (project) => !project.revised_by + ) + ); - const communityProjects = Object.values(staticProjects).filter( - (project) => project.tier === "community" + const communityProjects = clobberAndDifferentiateProjectMetadata( + Object.values(staticProjects).filter( + (project) => project.tier === "community" + ) ); - const maintainedProjects = Object.values(staticProjects).filter( - (project) => project.tier === "maintained" + const maintainedProjects = clobberAndDifferentiateProjectMetadata( + Object.values(staticProjects).filter( + (project) => project.tier === "maintained" + ) ); return ( @@ -43,14 +52,21 @@ function CensusDirectory() {

Please{" "} - + { + track(EVENTS.CENSUS_MODELS_TUTORIALS_CLICKED); + }} + > see these tutorials - {" "} + {" "} for usage details.

If you’d like to have your project featured here, please{" "} - get in touch. + get in touch.

{maintainedProjects.length > 0 && ( @@ -68,8 +84,11 @@ function CensusDirectory() { . - {maintainedProjects.map((project) => ( - + {maintainedProjects.map((clobberedProjects) => ( + ))} )} @@ -89,8 +108,11 @@ function CensusDirectory() { . For feedback on the embeddings themselves, please contact the creators. - {hostedProjects.map(([id, project]) => ( - + {hostedProjects.map((clobberedProjects) => ( + ))} )} @@ -104,8 +126,11 @@ function CensusDirectory() {
Please contact their creators with questions or feedback. - {communityProjects.map((project) => ( - + {communityProjects.map((clobberedProjects) => ( + ))} )} diff --git a/frontend/src/views/CensusDirectory/style.ts b/frontend/src/views/CensusDirectory/style.ts index 4b40334be3769..1d18704595f76 100644 --- a/frontend/src/views/CensusDirectory/style.ts +++ b/frontend/src/views/CensusDirectory/style.ts @@ -1,5 +1,4 @@ import { - Button, fontBodyS, fontCapsXxxs, fontHeaderL, @@ -10,7 +9,6 @@ import styled from "@emotion/styled"; import { RadioGroup } from "@mui/material"; import { fontWeightBold, - fontWeightMedium, fontWeightRegular, fontWeightSemibold, gray400, @@ -83,16 +81,13 @@ export const ProjectContainer = styled.div` display: flex; flex-direction: row; justify-content: space-between; - margin-top: 20px; -`; -export const ProjectButtons = styled.div` - display: flex; - flex-direction: row; - gap: ${spacesDefault}px; + margin-top: 40px; `; + export const ProjectDetails = styled.div` display: flex; flex-direction: column; + max-width: 720px; `; export const DetailsContainer = styled.div` display: flex; @@ -101,11 +96,6 @@ export const DetailsContainer = styled.div` margin-top: ${spacesL}px; `; -export const StyledButton = styled(Button)` - font-weight: ${fontWeightMedium}; - min-width: 80px; -`; - export const ItemContainer = styled.div` display: flex; flex-direction: column; diff --git a/frontend/src/views/CensusDirectory/utils.ts b/frontend/src/views/CensusDirectory/utils.ts index f2e090661c434..4dd26e1073b43 100644 --- a/frontend/src/views/CensusDirectory/utils.ts +++ b/frontend/src/views/CensusDirectory/utils.ts @@ -1,8 +1,73 @@ -import { ProjectProps } from "./components/Project/types"; +import { UnionProject } from "./components/Project/types"; -export const getProjectTier = (project: ProjectProps["project"]) => { +export const getProjectTier = (project: UnionProject) => { let projectTier = "hosted"; if ("tier" in project) projectTier = project.tier; return projectTier; }; + +export type ClobberedProjects = [Partial, UnionProject[]][]; + +function isEqual(obj1: any, obj2: any): boolean { + Object.keys(obj1).forEach((key) => { + if (typeof obj1[key] === "object" && typeof obj2[key] === "object") { + if (!isEqual(obj1[key], obj2[key])) { + return false; + } + } else if (obj1[key] !== obj2[key]) { + return false; + } + }); + return true; +} + +// Iterates over all projects and returns a list of projects that have the same title +// and the overlapping data itself +export const clobberAndDifferentiateProjectMetadata = ( + projects: UnionProject[] +): ClobberedProjects => { + const clobberedProjects: ClobberedProjects = []; + // get unique project titles and the projects that have that title + const titlesToProjects = projects.reduce( + (acc, project) => { + if (!project.title) return acc; + if (!acc[project.title]) acc[project.title] = []; + acc[project.title].push(project); + return acc; + }, + {} as Record + ); + + Object.values(titlesToProjects).forEach((projects) => { + if (projects.length > 1) { + const clobberedProject = projects.reduce( + (acc, project) => { + ( + Object.keys(project) as unknown as Array + ).forEach((key) => { + if ( + acc[key] && + typeof acc[key] === "object" && + typeof project[key] === "object" + ) { + if (!isEqual(acc[key], project[key])) { + delete acc[key]; + } + } else if (acc[key] && acc[key] !== project[key]) { + delete acc[key]; + } + }); + return acc; + }, + { ...projects[0] } + ); + + clobberedProjects.push([clobberedProject, projects]); + } else { + clobberedProjects.push([projects[0], projects]); + } + }); + + return clobberedProjects; +}; From 046d5a504f3eeeeacffc0ccf2e6a1e50c960d46d Mon Sep 17 00:00:00 2001 From: Nayib Gloria <55710092+nayib-jose-gloria@users.noreply.github.com> Date: Mon, 18 Dec 2023 16:20:46 -0500 Subject: [PATCH 5/6] chore: Schema 4 doc updates (#6365) --- backend/curation/api/curation-api.yml | 46 +++++++++---------- .../032__Contribute and Publish Data.mdx | 22 ++++++++- .../4_2_2__Cell Type and Gene Ordering.mdx | 2 +- .../components/CopyCaption/index.tsx | 4 +- 4 files changed, 46 insertions(+), 28 deletions(-) diff --git a/backend/curation/api/curation-api.yml b/backend/curation/api/curation-api.yml index cfd8f44892611..34b3d83cbc4ba 100644 --- a/backend/curation/api/curation-api.yml +++ b/backend/curation/api/curation-api.yml @@ -709,7 +709,7 @@ components: batch_condition: description: | These keys define the batches that a normalization or integration algorithm should be aware of. - [batch condition schema](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#batch_condition) + [batch condition schema](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#batch_condition) type: array items: @@ -992,7 +992,7 @@ components: type: integer cell_type: description: | - [cell type label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#cell_type) + [cell type label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#cell_type) default: [] items: $ref: "#/components/schemas/ontology_element" @@ -1007,7 +1007,7 @@ components: $ref: "#/components/schemas/default_embedding" development_stage: description: | - [development stage label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#development_stage) + [development stage label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#development_stage) default: [] items: $ref: "#/components/schemas/ontology_element" @@ -1034,7 +1034,7 @@ components: type: number title: description: | - [title](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#title) + [title](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#title) nullable: true type: string organism: @@ -1061,14 +1061,14 @@ components: type: string self_reported_ethnicity: description: | - [self reported ethnicity label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#self_reported_ethnicity) + [self reported ethnicity label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#self_reported_ethnicity) default: [] items: $ref: "#/components/schemas/ontology_element" type: array sex: description: | - [sex label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#sex) + [sex label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#sex) default: [] items: $ref: "#/components/schemas/ontology_element" @@ -1098,7 +1098,7 @@ components: type: integer cell_type: description: | - [cell type label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#cell_type) + [cell type label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#cell_type) default: [] items: $ref: "#/components/schemas/ontology_element" @@ -1117,7 +1117,7 @@ components: $ref: "#/components/schemas/dataset_version_id" development_stage: description: | - [development stage label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#development_stage) + [development stage label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#development_stage) default: [] items: $ref: "#/components/schemas/ontology_element" @@ -1136,7 +1136,7 @@ components: type: number title: description: | - [title](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#title) + [title](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#title) nullable: true type: string organism: @@ -1160,14 +1160,14 @@ components: type: string self_reported_ethnicity: description: | - [self reported ethnicity label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#self_reported_ethnicity) + [self reported ethnicity label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#self_reported_ethnicity) default: [] items: $ref: "#/components/schemas/ontology_element" type: array sex: description: | - [sex label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#sex) + [sex label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#sex) default: [] items: $ref: "#/components/schemas/ontology_element" @@ -1197,7 +1197,7 @@ components: type: integer cell_type: description: | - [cell type label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#cell_type) + [cell type label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#cell_type) default: [] items: $ref: "#/components/schemas/ontology_element" @@ -1219,7 +1219,7 @@ components: $ref: "#/components/schemas/default_embedding" development_stage: description: | - [development stage label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#development_stage) + [development stage label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#development_stage) default: [] items: $ref: "#/components/schemas/ontology_element" @@ -1244,7 +1244,7 @@ components: type: number title: description: | - [title](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#title) + [title](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#title) nullable: true type: string organism: @@ -1261,14 +1261,14 @@ components: type: string self_reported_ethnicity: description: | - [self reported ethnicity label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#self_reported_ethnicity) + [self reported ethnicity label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#self_reported_ethnicity) default: [] items: $ref: "#/components/schemas/ontology_element" type: array sex: description: | - [sex label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#sex) + [sex label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#sex) default: [] items: $ref: "#/components/schemas/ontology_element" @@ -1282,28 +1282,28 @@ components: type: object dataset_assay: description: | - [assay label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#assay) + [assay label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#assay) default: [] items: $ref: "#/components/schemas/ontology_element" type: array dataset_disease: description: | - [disease label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#disease) + [disease label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#disease) default: [] items: $ref: "#/components/schemas/ontology_element" type: array dataset_organism: description: | - [organism label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#organism) + [organism label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#organism) default: [] items: $ref: "#/components/schemas/ontology_element" type: array dataset_tissue: description: | - [tissue label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#tissue) + [tissue label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#tissue) default: [] items: allOf: @@ -1351,7 +1351,7 @@ components: CELLxGENE Discover runs a heuristic to detect the approximate distribution of the data in X so that it can accurately calculate statistical properties of the data. This field enables the curator to override this heuristic and - specify the data distribution explicitly. [x_approximate_distribution](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#x_approximate_distribution) + specify the data distribution explicitly. [x_approximate_distribution](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#x_approximate_distribution) enum: - COUNT - NORMAL @@ -1407,7 +1407,7 @@ components: nullable: true is_primary_data: description: | - [is_primary_data](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#is_primary_data) + [is_primary_data](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#is_primary_data) Describes whether cellular observations for this Dataset are all canonical (True), all non-canonical (False), or contain a mixture (True, False). @@ -1524,7 +1524,7 @@ components: - raw.X suspension_type: description: | - [suspension_type](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#suspension_type) + [suspension_type](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#suspension_type) List of unique suspension types represented in the dataset, corresponding to dataset's assay(s). Possible item values are 'nucleus', 'cell', and/or 'na'. diff --git a/frontend/doc-site/032__Contribute and Publish Data.mdx b/frontend/doc-site/032__Contribute and Publish Data.mdx index fc340b1d16ede..a85cd3c32bfe0 100644 --- a/frontend/doc-site/032__Contribute and Publish Data.mdx +++ b/frontend/doc-site/032__Contribute and Publish Data.mdx @@ -33,6 +33,23 @@ We need the following collection metadata (i.e. details associated with your pub - Contact: name and email - Publication/preprint DOI: can be added later - URLs: any additional URLs for related data or resources, such as GEO or protocols.io - can be added later + - Consortia: optional, and can be added later. Can be one or more of: + - Allen Institute for Brain Science + - BRAIN Initiative + - CZ Biohub + - CZI Neurodegeneration Challenge Network + - CZI Single-Cell Biology + - European Union’s Horizon 2020 + - GenitoUrinary Development Molecular Anatomy Project (GUDMAP) + - Gut Cell Atlas + - Human BioMolecular Atlas Program (HuBMAP) + - Human Cell Atlas (HCA) + - Human Pancreas Analysis Program (HPAP) + - Human Tumor Atlas Network (HTAN) + - Kidney Precision Medicine Project (KPMP) + - LungMAP + - SEA-AD + - Wellcome HCA Strategic Science Support Each dataset needs the following information added to a single h5ad (AnnData 0.8) format file: @@ -49,12 +66,13 @@ Each dataset needs the following information added to a single h5ad (AnnData 0.8 - donor_id: free-text identifier that distinguishes the unique individual that data were derived from. It is encouraged to be something not likely to be used in other studies (e.g. donor_1 is likely to not be unique in the data corpus) - development_stage_ontology_term_id: [HsapDv](https://www.ebi.ac.uk/ols/ontologies/hsapdv) if human, [MmusDv](https://www.ebi.ac.uk/ols/ontologies/mmusdv) if mouse, `unknown` if information unavailable - sex_ontology_term_id: `PATO:0000384` for male, `PATO:0000383` for female, or `unknown` if unavailable - - self_reported_ethnicity_ontology_term_id: [HANCESTRO](https://www.ebi.ac.uk/ols/ontologies/hancestro) use `multiethnic` if more than one ethnicity is reported. If human and information unavailable, use `unknown`. Use `na` if non-human. + - self_reported_ethnicity_ontology_term_id: [HANCESTRO](https://www.ebi.ac.uk/ols/ontologies/hancestro) multiple comma-separated terms may be used if more than one ethnicity is reported. If human and information unavailable, use `unknown`. Use `na` if non-human. - disease_ontology_term_id: [MONDO](https://www.ebi.ac.uk/ols/ontologies/mondo) or `PATO:0000461` for 'normal' + - tissue_type: `tissue`, `organoid`, or `cell culture` - tissue_ontology_term_id: [UBERON](https://www.ebi.ac.uk/ols/ontologies/uberon) - cell_type_ontology_term_id: [CL](https://www.ebi.ac.uk/ols/ontologies/cl) - assay_ontology_term_id: [EFO](https://www.ebi.ac.uk/ols/ontologies/efo) - - suspension_type: `cell`, `nucleus`, or `na`, as corresponding to assay. Use [this table](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#suspension_type) defined in the data schema for guidance. If the assay does not appear in this table, the most appropriate value MUST be selected and the [curation team informed](mailto:cellxgene@chanzuckerberg.com) during submission so that the assay can be added to the table. + - suspension_type: `cell`, `nucleus`, or `na`, as corresponding to assay. Use [this table](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#suspension_type) defined in the data schema for guidance. If the assay does not appear in this table, the most appropriate value MUST be selected and the [curation team informed](mailto:cellxgene@chanzuckerberg.com) during submission so that the assay can be added to the table. - **Embeddings in obsm**: - One or more two-dimensional embeddings, prefixed with 'X\_' - **Features in var & raw.var (if present)**: diff --git a/frontend/doc-site/04__Analyze Public Data/4_2__Gene Expression Documentation/4_2_2__Cell Type and Gene Ordering.mdx b/frontend/doc-site/04__Analyze Public Data/4_2__Gene Expression Documentation/4_2_2__Cell Type and Gene Ordering.mdx index 5cdd3cb2842ee..04d96ccc5b338 100644 --- a/frontend/doc-site/04__Analyze Public Data/4_2__Gene Expression Documentation/4_2_2__Cell Type and Gene Ordering.mdx +++ b/frontend/doc-site/04__Analyze Public Data/4_2__Gene Expression Documentation/4_2_2__Cell Type and Gene Ordering.mdx @@ -4,7 +4,7 @@ Cell types and genes shown in a dot plot can be arranged in different ways as de # Cell Type Ordering -Cell types are annotated by the original data contributors and mapped to the closest cell ontology (CL) term as defined in the [data schema](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.0.0/schema.md#cell_type_ontology_term_id). +Cell types are annotated by the original data contributors and mapped to the closest cell ontology (CL) term as defined in the [data schema](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#cell_type_ontology_term_id). In some cases there are cell types annotated with a high-level term whereas in some other cases they can be very granularly annotated. For example, there are some cells annotated as "T cells" and others annotated with children terms like "effector CD8-positive, alpha-beta T cell". All of these cell types are shown in the dot plot and they should not be interpreted as one being a subset of the other. diff --git a/frontend/src/components/Collections/components/Dataset/components/DownloadDataset/components/Content/components/DownloadLink/components/CopyCaption/index.tsx b/frontend/src/components/Collections/components/Dataset/components/DownloadDataset/components/Content/components/DownloadLink/components/CopyCaption/index.tsx index d1b0662877935..a1c882a639041 100644 --- a/frontend/src/components/Collections/components/Dataset/components/DownloadDataset/components/Content/components/DownloadLink/components/CopyCaption/index.tsx +++ b/frontend/src/components/Collections/components/Dataset/components/DownloadDataset/components/Content/components/DownloadLink/components/CopyCaption/index.tsx @@ -4,9 +4,9 @@ import { DATASET_ASSET_FORMAT } from "src/common/entities"; const DISCOVER_API_URL = "https://api.cellxgene.cziscience.com/curation/ui/#/"; const SCHEMA_URL = - "https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md"; + "https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md"; const SEURAT_SCHEMA_URL = - "https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/seurat_encoding.md"; + "https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/seurat_encoding.md"; interface Props { selectedFormat: DATASET_ASSET_FORMAT | ""; From 69777dcc97ddfdcf97d7561368faffff5dcb5a21 Mon Sep 17 00:00:00 2001 From: Severiano Badajoz Date: Mon, 18 Dec 2023 13:32:52 -0800 Subject: [PATCH 6/6] fix: small changes (#6367) --- .../components/CopyButton/index.tsx | 8 +- .../components/EmbeddingButton/connect.ts | 73 +++++++++---------- .../components/EmbeddingButton/index.tsx | 2 + .../components/Project/index.tsx | 2 +- 4 files changed, 44 insertions(+), 41 deletions(-) diff --git a/frontend/src/components/Collections/components/Dataset/components/DownloadDataset/components/Content/components/DownloadLink/components/CopyButton/index.tsx b/frontend/src/components/Collections/components/Dataset/components/DownloadDataset/components/Content/components/DownloadLink/components/CopyButton/index.tsx index 7325a14bd5e59..02f1e04e17e58 100644 --- a/frontend/src/components/Collections/components/Dataset/components/DownloadDataset/components/Content/components/DownloadLink/components/CopyButton/index.tsx +++ b/frontend/src/components/Collections/components/Dataset/components/DownloadDataset/components/Content/components/DownloadLink/components/CopyButton/index.tsx @@ -7,11 +7,13 @@ import { ANIMATION, ANIMATION_STEP } from "./constants"; interface Props { downloadLink: string; handleAnalytics: () => void; + label?: string; } export default function CopyButton({ downloadLink, handleAnalytics, + label = "Copy", }: Props): JSX.Element { const [animationStep, setAnimationStep] = useState( ANIMATION_STEP.IDLE @@ -61,7 +63,7 @@ export default function CopyButton({ onExited={onUpdateAnimationStep} timeout={animation.timeout} > - {getButtonText(animationStep)} + {getButtonText(animationStep, label)} ); @@ -72,14 +74,14 @@ export default function CopyButton({ * @param step - Current animation step. * @returns button text. */ -function getButtonText(step: number): string { +function getButtonText(step: number, label: string): string { if ( step === ANIMATION_STEP.COPIED_ENTER || step === ANIMATION_STEP.COPIED_EXIT ) { return "Copied"; } - return "Copy"; + return label; } /** diff --git a/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/components/EmbeddingButton/connect.ts b/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/components/EmbeddingButton/connect.ts index 726a426f1c18a..9eca948b00793 100644 --- a/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/components/EmbeddingButton/connect.ts +++ b/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/components/EmbeddingButton/connect.ts @@ -25,31 +25,31 @@ function pythonCodeSnippet(project: UnionProject, uri: string): string { const measurement = project.measurement_name; return project.tier === "maintained" - ? ` import cellxgene_census - - census = cellxgene_census.open_soma(census_version="${project.census_version}") - adata = cellxgene_census.get_anndata( - census, - organism = "${organism}", - measurement_name = "${measurement}", - obs_value_filter = "tissue_general == 'central nervous system'", - obsm_layers = ["${project.obsm_layer}"] - )` - : ` import cellxgene_census - from cellxgene_census.experimental import get_embedding - - embedding_uri = \\ - "${uri}" - census = cellxgene_census.open_soma(census_version="${censusVersion}") - - adata = cellxgene_census.get_anndata( - census, - organism = "${organism}", - measurement_name = "${measurement}", - obs_value_filter = "tissue_general == 'central nervous system'", - ) - embeddings = get_embedding("${censusVersion}", embedding_uri, adata.obs["soma_joinid"]).to_numpy()) - adata.obsm["emb"] = embeddings`; + ? `import cellxgene_census + +census = cellxgene_census.open_soma(census_version="${project.census_version}") +adata = cellxgene_census.get_anndata( + census, + organism = "${organism}", + measurement_name = "${measurement}", + obs_value_filter = "tissue_general == 'central nervous system'", + obsm_layers = ["${project.obsm_layer}"] +)` + : `import cellxgene_census +from cellxgene_census.experimental import get_embedding + +embedding_uri = \\ + "${uri}" +census = cellxgene_census.open_soma(census_version="${censusVersion}") + +adata = cellxgene_census.get_anndata( + census, + organism = "${organism}", + measurement_name = "${measurement}", + obs_value_filter = "tissue_general == 'central nervous system'", +) +embeddings = get_embedding("${censusVersion}", embedding_uri, adata.obs["soma_joinid"].to_numpy()) +adata.obsm["emb"] = embeddings`; } function rCodeSnippet(project: UnionProject): string { @@ -57,18 +57,17 @@ function rCodeSnippet(project: UnionProject): string { const organism = project.experiment_name; return project.tier === "maintained" - ? ` library("cellxgene.census") - library("Seurat") - - census <- open_soma(census_version = "${censusVersion}") - seurat_obj <- get_seurat( - census, - organism = "${organism}", - obs_value_filter = "tissue_general == 'central nervous system'", - obs_column_names = c("cell_type"), - obsm_layers = c("${project.obsm_layer}") - ) - ` + ? `library("cellxgene.census") +library("Seurat") + +census <- open_soma(census_version = "${censusVersion}") +seurat_obj <- get_seurat( + census, + organism = "${organism}", + obs_value_filter = "tissue_general == 'central nervous system'", + obs_column_names = c("cell_type"), + obsm_layers = c("${project.obsm_layer}") +)` : ""; } diff --git a/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/components/EmbeddingButton/index.tsx b/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/components/EmbeddingButton/index.tsx index 4b0a9e251a65f..b36aaa2ff8975 100644 --- a/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/components/EmbeddingButton/index.tsx +++ b/frontend/src/views/CensusDirectory/components/Project/ProjectButtons/components/EmbeddingButton/index.tsx @@ -66,6 +66,7 @@ function EmbeddingButton(props: EmbeddingButtonProps) { {codeSnippet} track(EVENTS.CENSUS_EMBEDDING_COPIED, { project: project.title, @@ -79,6 +80,7 @@ function EmbeddingButton(props: EmbeddingButtonProps) {
track(EVENTS.CENSUS_EMBEDDING_COPIED, { project: project.title, diff --git a/frontend/src/views/CensusDirectory/components/Project/index.tsx b/frontend/src/views/CensusDirectory/components/Project/index.tsx index ad32134498735..ccaddb1d77b7d 100644 --- a/frontend/src/views/CensusDirectory/components/Project/index.tsx +++ b/frontend/src/views/CensusDirectory/components/Project/index.tsx @@ -40,7 +40,7 @@ const Project = ({ clobberedProjects }: ProjectProps) => { { track(EVENTS.CENSUS_CONTACT_CLICKED, { project: sharedProject.title,