diff --git a/bionty/base/_public_ontology.py b/bionty/base/_public_ontology.py index e2016a1..80ede5c 100644 --- a/bionty/base/_public_ontology.py +++ b/bionty/base/_public_ontology.py @@ -473,14 +473,14 @@ def standardize( return_mapper: `bool = False` If `True`, returns `{input_synonym1: standardized_name1}`. case_sensitive: `bool = False` Whether the mapping is case sensitive. - keep: `Literal["first", "last", False] = "first"` When a synonym maps to - multiple names, determines which duplicates to mark as - `pd.DataFrame.duplicated` + keep: {'first', 'last', False}, default 'first'. + When a synonym maps to multiple standardized values, determines + which duplicates to mark as `pandas.DataFrame.duplicated`. + + - "first": returns the first mapped standardized value + - "last": returns the last mapped standardized value + - False: returns all mapped standardized value mute: Whether to mute logging. Defaults to False. - Keep: Which standardized name to keep. - - "first": returns the first mapped standardized name - - "last": returns the last mapped standardized name - - `False`: returns all mapped standardized name synonyms_field: `str = "synonyms"` A field containing the concatenated synonyms. Returns: diff --git a/bionty/models.py b/bionty/models.py index e429c5c..e76e4b7 100644 --- a/bionty/models.py +++ b/bionty/models.py @@ -554,6 +554,9 @@ class Gene(BioRecord, TracksRun, TracksUpdates): Bulk create Gene records via :meth:`~docs:lamindb.core.CanValidate.from_values`. Map legacy ensembl IDs to current ensembl IDs using :meth:`bionty.base.Gene.map_legacy_ids`. + We discourage validating gene symbols and to work with unique identifiers such as ENSEMBL IDs instead. + For more details, see :doc:`docs:faq/symbol-mapping`. + Examples: >>> record = bionty.Gene.from_source(symbol="TCF7", organism="human") """