From feaedcc6772ec8fcada10e1c3b093d98bb72d5a4 Mon Sep 17 00:00:00 2001 From: Daniel Ji Date: Wed, 31 Jul 2024 10:28:08 -0700 Subject: [PATCH 1/4] Update gjensen dataset configs with latest fixes (#177) * gjensen_config.py: remove whitespace from dataset_title and remove null objects * gjensen_config.py strip dataset_description as well * update gjensen configs with new czii-data-portal-processing fixes --- .../dataset_configs/gjensen/10021.yaml | 4 +- .../dataset_configs/gjensen/10038.yaml | 4 +- .../dataset_configs/gjensen/10078.yaml | 4 +- .../dataset_configs/gjensen/10099.yaml | 4 +- .../dataset_configs/gjensen/10115.yaml | 4 +- .../dataset_configs/gjensen/10116.yaml | 2 - .../dataset_configs/gjensen/10117.yaml | 2 - .../dataset_configs/gjensen/10122.yaml | 4 +- .../dataset_configs/gjensen/10125.yaml | 4 +- .../dataset_configs/gjensen/10126.yaml | 4 +- .../dataset_configs/gjensen/10159.yaml | 3 +- .../dataset_configs/gjensen/10172.yaml | 2 +- .../dataset_configs/gjensen/10173.yaml | 2 +- .../dataset_configs/gjensen/10174.yaml | 2 +- .../dataset_configs/gjensen/10175.yaml | 2 +- .../dataset_configs/gjensen/10176.yaml | 2 +- .../dataset_configs/gjensen/10177.yaml | 2 +- .../dataset_configs/gjensen/10178.yaml | 2 +- .../dataset_configs/gjensen/10179.yaml | 2 +- .../dataset_configs/gjensen/10180.yaml | 2 +- .../dataset_configs/gjensen/10181.yaml | 2 +- .../dataset_configs/gjensen/10182.yaml | 2 +- .../dataset_configs/gjensen/10183.yaml | 2 +- .../dataset_configs/gjensen/10184.yaml | 2 +- .../dataset_configs/gjensen/10185.yaml | 2 +- .../dataset_configs/gjensen/10186.yaml | 2 +- .../dataset_configs/gjensen/10187.yaml | 2 +- .../dataset_configs/gjensen/10188.yaml | 2 +- .../dataset_configs/gjensen/10197.yaml | 3 +- .../dataset_configs/gjensen/10242.yaml | 4 +- .../dataset_configs/gjensen/10262.yaml | 3 +- .../dataset_configs/gjensen/10287.yaml | 4 +- .../dataset_configs/gjensen/10288.yaml | 5 +- .../dataset_configs/gjensen/10300.yaml | 4 +- schema/v1.1.0/api_models_materialized.yaml | 28 ++++---- .../v1.1.0/dataset_config_materialized.yaml | 70 +++++++++---------- schema/v1.1.0/dataset_config_validate.py | 4 +- .../metadata-docs/affiliation_address.md | 2 +- .../metadata-docs/affiliation_identifier.md | 2 +- .../v1.1.0/metadata-docs/affiliation_name.md | 2 +- schema/v1.1.0/metadata-docs/authors.md | 6 +- schema/v1.1.0/metadata-docs/binning.md | 2 +- schema/v1.1.0/metadata-docs/cell_component.md | 2 +- schema/v1.1.0/metadata-docs/cell_strain.md | 2 +- schema/v1.1.0/metadata-docs/cell_type.md | 2 +- .../corresponding_author_status.md | 2 +- .../v1.1.0/metadata-docs/cross_references.md | 4 +- schema/v1.1.0/metadata-docs/dates.md | 2 +- schema/v1.1.0/metadata-docs/email.md | 2 +- schema/v1.1.0/metadata-docs/file_format.md | 6 +- schema/v1.1.0/metadata-docs/filter_value.md | 2 +- schema/v1.1.0/metadata-docs/funding.md | 2 +- schema/v1.1.0/metadata-docs/glob_string.md | 6 +- schema/v1.1.0/metadata-docs/glob_strings.md | 6 +- .../v1.1.0/metadata-docs/grid_preparation.md | 2 +- schema/v1.1.0/metadata-docs/id.md | 4 +- .../metadata-docs/is_visualization_default.md | 6 +- schema/v1.1.0/metadata-docs/manufacturer.md | 2 +- schema/v1.1.0/metadata-docs/model.md | 2 +- schema/v1.1.0/metadata-docs/name.md | 12 ++-- schema/v1.1.0/metadata-docs/order.md | 2 +- schema/v1.1.0/metadata-docs/organism.md | 2 +- schema/v1.1.0/metadata-docs/other_setup.md | 2 +- .../metadata-docs/primary_author_status.md | 2 +- .../metadata-docs/sample_preparation.md | 2 +- schema/v1.1.0/metadata-docs/sample_type.md | 2 +- schema/v1.1.0/metadata-docs/tissue.md | 2 +- schema/v1.1.0/metadata_materialized.yaml | 30 ++++---- 68 files changed, 162 insertions(+), 160 deletions(-) diff --git a/ingestion_tools/dataset_configs/gjensen/10021.yaml b/ingestion_tools/dataset_configs/gjensen/10021.yaml index 48226a382..e3d457bc7 100644 --- a/ingestion_tools/dataset_configs/gjensen/10021.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10021.yaml @@ -20,8 +20,8 @@ datasets: cross_references: publications: 10.1038/s41467-022-32584-7 related_database_entries: EMD-27654, EMPIAR-11125 - dataset_description: ' and is a part of Caltech Jensen lab etdb. Carboxysomes - purified from H. Neapolitanus, WT, high resolution' + dataset_description: This is a single dataset studying Halothiobacillus neapolitanus + collected by LaurenAnn Metskas and is a part of Caltech Jensen lab etdb. dataset_identifier: 10021 dataset_title: H. neapolitanus Carboxysomes dates: &id002 diff --git a/ingestion_tools/dataset_configs/gjensen/10038.yaml b/ingestion_tools/dataset_configs/gjensen/10038.yaml index c28631c54..cd8625745 100644 --- a/ingestion_tools/dataset_configs/gjensen/10038.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10038.yaml @@ -93,8 +93,8 @@ tiltseries: tilt_alignment_software: '{ts-tilt_alignment_software}' tilt_axis: 84.3 tilt_range: - max: -50.0 - min: 50.0 + max: 50.0 + min: -50.0 tilt_series_quality: int {ts-tilt_series_quality} tilt_step: 2.0 tilting_scheme: min to max tilt diff --git a/ingestion_tools/dataset_configs/gjensen/10078.yaml b/ingestion_tools/dataset_configs/gjensen/10078.yaml index c5f3816a6..b7014a45b 100644 --- a/ingestion_tools/dataset_configs/gjensen/10078.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10078.yaml @@ -19,9 +19,9 @@ datasets: name: Philadelphia-1_Lp02 cross_references: publications: 10.1038/s41564-019-0427-4 - dataset_description: 'This is 21th of 41 sets of data studying Legionella pneumophila + dataset_description: This is 21th of 41 sets of data studying Legionella pneumophila collected by Debnath Ghosal and is a part of Caltech Jensen lab etdb. JV7091 - is a double mutant that lacks dotE and dotP (Lp02). ' + is a double mutant that lacks dotE and dotP (Lp02). dataset_identifier: 10078 dataset_title: L. pneumophila JV7091 dates: &id002 diff --git a/ingestion_tools/dataset_configs/gjensen/10099.yaml b/ingestion_tools/dataset_configs/gjensen/10099.yaml index fa7c0e150..7fbd3e087 100644 --- a/ingestion_tools/dataset_configs/gjensen/10099.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10099.yaml @@ -14,8 +14,8 @@ datasets: - corresponding_author_status: true name: Grant Jensen primary_author_status: false - dataset_description: ' and is a part of Caltech Jensen lab etdb. Archaella and - plate' + dataset_description: This is a single dataset studying Halobacterium salinarum + collected by Debnath Ghosal and is a part of Caltech Jensen lab etdb. dataset_identifier: 10099 dataset_title: Halobacterium salinarum dates: &id002 diff --git a/ingestion_tools/dataset_configs/gjensen/10115.yaml b/ingestion_tools/dataset_configs/gjensen/10115.yaml index 27515385a..05dff200b 100644 --- a/ingestion_tools/dataset_configs/gjensen/10115.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10115.yaml @@ -14,8 +14,8 @@ datasets: - corresponding_author_status: true name: Grant Jensen primary_author_status: false - dataset_description: ' and is a part of Caltech Jensen lab etdb. search for ESCRT - in dividing cells' + dataset_description: This is a single dataset studying Sulfolobus acidocaldarius + collected by Rasika Ramdasi and is a part of Caltech Jensen lab etdb. dataset_identifier: 10115 dataset_title: S. acidocaldarius Sulfolobus cells synchronized fo dates: &id002 diff --git a/ingestion_tools/dataset_configs/gjensen/10116.yaml b/ingestion_tools/dataset_configs/gjensen/10116.yaml index 6c8e5dba8..8aac5907a 100644 --- a/ingestion_tools/dataset_configs/gjensen/10116.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10116.yaml @@ -14,8 +14,6 @@ datasets: - corresponding_author_status: true name: Grant Jensen primary_author_status: false - cell_component: - id: GO:0051301,GO:0043093 dataset_description: This is 1st of 2 sets of data studying Nitrosopumilus maritimus collected by Rasika Ramdasi and is a part of Caltech Jensen lab etdb. Looking for cell division related CDV proteins. diff --git a/ingestion_tools/dataset_configs/gjensen/10117.yaml b/ingestion_tools/dataset_configs/gjensen/10117.yaml index 1a9214011..3c0bc42b1 100644 --- a/ingestion_tools/dataset_configs/gjensen/10117.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10117.yaml @@ -14,8 +14,6 @@ datasets: - corresponding_author_status: true name: Grant Jensen primary_author_status: false - cell_component: - id: GO:0051301 dataset_description: This is 2nd of 2 sets of data studying Nitrosopumilus maritimus collected by Rasika Ramdasi and is a part of Caltech Jensen lab etdb. Looking for cell division and ESCRt related proteins. diff --git a/ingestion_tools/dataset_configs/gjensen/10122.yaml b/ingestion_tools/dataset_configs/gjensen/10122.yaml index f8c89d410..245db3779 100644 --- a/ingestion_tools/dataset_configs/gjensen/10122.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10122.yaml @@ -19,9 +19,9 @@ datasets: name: deltaQ 167 cross_references: publications: 10.1128/mBio.01898-15 - dataset_description: 'This is 5th of 17 sets of data studying Magnetospirillum + dataset_description: This is 5th of 17 sets of data studying Magnetospirillum magneticum collected by Poorna Subramanian and is a part of Caltech Jensen lab - etdb. This is the strain in which dQ and dK have been deactivated and the ' + etdb. This is the strain in which dQ and dK have been deactivated and the dataset_identifier: 10122 dataset_title: M. magneticum dK203_3h_induced dates: &id002 diff --git a/ingestion_tools/dataset_configs/gjensen/10125.yaml b/ingestion_tools/dataset_configs/gjensen/10125.yaml index 2e6958b23..5145b4067 100644 --- a/ingestion_tools/dataset_configs/gjensen/10125.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10125.yaml @@ -19,9 +19,9 @@ datasets: name: deltaQ 167 cross_references: publications: 10.1128/mBio.01898-15 - dataset_description: 'This is 8th of 17 sets of data studying Magnetospirillum + dataset_description: This is 8th of 17 sets of data studying Magnetospirillum magneticum collected by Poorna Subramanian and is a part of Caltech Jensen lab - etdb. This is the strain in which dQ has been deactivated and an inducible ' + etdb. This is the strain in which dQ has been deactivated and an inducible dataset_identifier: 10125 dataset_title: M. magneticum dQ167_10h_induced dates: &id002 diff --git a/ingestion_tools/dataset_configs/gjensen/10126.yaml b/ingestion_tools/dataset_configs/gjensen/10126.yaml index 2561950a6..8d451c8fd 100644 --- a/ingestion_tools/dataset_configs/gjensen/10126.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10126.yaml @@ -19,10 +19,10 @@ datasets: name: deltaQ 167 cross_references: publications: 10.1128/mBio.01898-15 - dataset_description: 'This is 9th of 17 sets of data studying Magnetospirillum + dataset_description: This is 9th of 17 sets of data studying Magnetospirillum magneticum collected by Poorna Subramanian and is a part of Caltech Jensen lab etdb. plasmid is present for dQdQ167 is the strain in which dQ has been mutated - and an inducible ' + and an inducible dataset_identifier: 10126 dataset_title: M. magneticum dQ167_14h_induced dates: &id002 diff --git a/ingestion_tools/dataset_configs/gjensen/10159.yaml b/ingestion_tools/dataset_configs/gjensen/10159.yaml index 083d387d5..4dae12a7a 100644 --- a/ingestion_tools/dataset_configs/gjensen/10159.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10159.yaml @@ -17,7 +17,8 @@ datasets: cell_strain: id: null name: G27 - dataset_description: ' and is a part of Caltech Jensen lab etdb.' + dataset_description: This is a single dataset studying Helicobacter pylori collected + by Yi-Wei Chang and is a part of Caltech Jensen lab etdb. dataset_identifier: 10159 dataset_title: Helicobacter pylori G27 dates: &id002 diff --git a/ingestion_tools/dataset_configs/gjensen/10172.yaml b/ingestion_tools/dataset_configs/gjensen/10172.yaml index 0d38b3917..102063b9f 100644 --- a/ingestion_tools/dataset_configs/gjensen/10172.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10172.yaml @@ -16,7 +16,7 @@ datasets: primary_author_status: false cell_type: id: CL:0002618 - name: umbilical vein endothelial cell + name: endothelial cell of umbilical vein dataset_description: This is 1st of 17 sets of data studying HUVECs collected by Cora Woodward and is a part of Caltech Jensen lab etdb. dataset_identifier: 10172 diff --git a/ingestion_tools/dataset_configs/gjensen/10173.yaml b/ingestion_tools/dataset_configs/gjensen/10173.yaml index 8d88d0274..bb9cba263 100644 --- a/ingestion_tools/dataset_configs/gjensen/10173.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10173.yaml @@ -16,7 +16,7 @@ datasets: primary_author_status: false cell_type: id: CL:0002618 - name: umbilical vein endothelial cell + name: endothelial cell of umbilical vein cross_references: publications: 10.1128/JVI.02997-14 dataset_description: This is 2nd of 17 sets of data studying HUVECs collected diff --git a/ingestion_tools/dataset_configs/gjensen/10174.yaml b/ingestion_tools/dataset_configs/gjensen/10174.yaml index d33eade14..fd2dc0f44 100644 --- a/ingestion_tools/dataset_configs/gjensen/10174.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10174.yaml @@ -16,7 +16,7 @@ datasets: primary_author_status: false cell_type: id: CL:0002618 - name: umbilical vein endothelial cell + name: endothelial cell of umbilical vein cross_references: publications: 10.1128/JVI.02997-14 dataset_description: This is 3rd of 17 sets of data studying HUVECs collected diff --git a/ingestion_tools/dataset_configs/gjensen/10175.yaml b/ingestion_tools/dataset_configs/gjensen/10175.yaml index 494bdb329..653e6fa43 100644 --- a/ingestion_tools/dataset_configs/gjensen/10175.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10175.yaml @@ -16,7 +16,7 @@ datasets: primary_author_status: false cell_type: id: CL:0002618 - name: umbilical vein endothelial cell + name: endothelial cell of umbilical vein dataset_description: This is 4th of 17 sets of data studying HUVECs collected by Cora Woodward and is a part of Caltech Jensen lab etdb. HUVECs- no treatment dataset_identifier: 10175 diff --git a/ingestion_tools/dataset_configs/gjensen/10176.yaml b/ingestion_tools/dataset_configs/gjensen/10176.yaml index b73d247a7..aa092853e 100644 --- a/ingestion_tools/dataset_configs/gjensen/10176.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10176.yaml @@ -16,7 +16,7 @@ datasets: primary_author_status: false cell_type: id: CL:0002618 - name: umbilical vein endothelial cell + name: endothelial cell of umbilical vein cross_references: publications: 10.1128/JVI.02997-14 dataset_description: This is 5th of 17 sets of data studying HUVECs collected diff --git a/ingestion_tools/dataset_configs/gjensen/10177.yaml b/ingestion_tools/dataset_configs/gjensen/10177.yaml index 3d519d937..e77150a37 100644 --- a/ingestion_tools/dataset_configs/gjensen/10177.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10177.yaml @@ -16,7 +16,7 @@ datasets: primary_author_status: false cell_type: id: CL:0002618 - name: umbilical vein endothelial cell + name: endothelial cell of umbilical vein cross_references: publications: 10.1128/JVI.02997-14 dataset_description: This is 6th of 17 sets of data studying HUVECs collected diff --git a/ingestion_tools/dataset_configs/gjensen/10178.yaml b/ingestion_tools/dataset_configs/gjensen/10178.yaml index 4f2916044..3e5c679ce 100644 --- a/ingestion_tools/dataset_configs/gjensen/10178.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10178.yaml @@ -16,7 +16,7 @@ datasets: primary_author_status: false cell_type: id: CL:0002618 - name: umbilical vein endothelial cell + name: endothelial cell of umbilical vein cross_references: publications: 10.1128/JVI.02997-14 dataset_description: This is 7th of 17 sets of data studying HUVECs collected diff --git a/ingestion_tools/dataset_configs/gjensen/10179.yaml b/ingestion_tools/dataset_configs/gjensen/10179.yaml index cf82a7982..ff0b9fbbb 100644 --- a/ingestion_tools/dataset_configs/gjensen/10179.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10179.yaml @@ -16,7 +16,7 @@ datasets: primary_author_status: false cell_type: id: CL:0002618 - name: umbilical vein endothelial cell + name: endothelial cell of umbilical vein dataset_description: This is 8th of 17 sets of data studying HUVECs collected by Cora Woodward and is a part of Caltech Jensen lab etdb. HUVECs transfected with CHMP4A+VPS4DN constructs using Nucleofector Kit. Sample# C58-2 diff --git a/ingestion_tools/dataset_configs/gjensen/10180.yaml b/ingestion_tools/dataset_configs/gjensen/10180.yaml index 247df26a5..64b4d433f 100644 --- a/ingestion_tools/dataset_configs/gjensen/10180.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10180.yaml @@ -16,7 +16,7 @@ datasets: primary_author_status: false cell_type: id: CL:0002618 - name: umbilical vein endothelial cell + name: endothelial cell of umbilical vein cross_references: publications: 10.1128/JVI.02997-14 dataset_description: This is 9th of 17 sets of data studying HUVECs collected diff --git a/ingestion_tools/dataset_configs/gjensen/10181.yaml b/ingestion_tools/dataset_configs/gjensen/10181.yaml index bb60a1679..5cb424fd3 100644 --- a/ingestion_tools/dataset_configs/gjensen/10181.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10181.yaml @@ -16,7 +16,7 @@ datasets: primary_author_status: false cell_type: id: CL:0002618 - name: umbilical vein endothelial cell + name: endothelial cell of umbilical vein cross_references: publications: 10.1128/JVI.02997-14 dataset_description: This is 10th of 17 sets of data studying HUVECs collected diff --git a/ingestion_tools/dataset_configs/gjensen/10182.yaml b/ingestion_tools/dataset_configs/gjensen/10182.yaml index c11d22b3d..1b240d902 100644 --- a/ingestion_tools/dataset_configs/gjensen/10182.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10182.yaml @@ -16,7 +16,7 @@ datasets: primary_author_status: false cell_type: id: CL:0002618 - name: umbilical vein endothelial cell + name: endothelial cell of umbilical vein cross_references: publications: 10.1128/JVI.02997-14 dataset_description: This is 11th of 17 sets of data studying HUVECs collected diff --git a/ingestion_tools/dataset_configs/gjensen/10183.yaml b/ingestion_tools/dataset_configs/gjensen/10183.yaml index 6b568e620..23713a53f 100644 --- a/ingestion_tools/dataset_configs/gjensen/10183.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10183.yaml @@ -16,7 +16,7 @@ datasets: primary_author_status: false cell_type: id: CL:0002618 - name: umbilical vein endothelial cell + name: endothelial cell of umbilical vein cross_references: publications: 10.1128/JVI.02997-14 dataset_description: This is 12th of 17 sets of data studying HUVECs collected diff --git a/ingestion_tools/dataset_configs/gjensen/10184.yaml b/ingestion_tools/dataset_configs/gjensen/10184.yaml index 6fd26b512..ab47830bd 100644 --- a/ingestion_tools/dataset_configs/gjensen/10184.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10184.yaml @@ -16,7 +16,7 @@ datasets: primary_author_status: false cell_type: id: CL:0002618 - name: umbilical vein endothelial cell + name: endothelial cell of umbilical vein cross_references: publications: 10.1128/JVI.02997-14 dataset_description: This is 13th of 17 sets of data studying HUVECs collected diff --git a/ingestion_tools/dataset_configs/gjensen/10185.yaml b/ingestion_tools/dataset_configs/gjensen/10185.yaml index 94043cbbb..a226f3c3b 100644 --- a/ingestion_tools/dataset_configs/gjensen/10185.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10185.yaml @@ -16,7 +16,7 @@ datasets: primary_author_status: false cell_type: id: CL:0002618 - name: umbilical vein endothelial cell + name: endothelial cell of umbilical vein cross_references: publications: 10.1128/JVI.02997-14 dataset_description: This is 14th of 17 sets of data studying HUVECs collected diff --git a/ingestion_tools/dataset_configs/gjensen/10186.yaml b/ingestion_tools/dataset_configs/gjensen/10186.yaml index 941fd5e38..34d24a2cb 100644 --- a/ingestion_tools/dataset_configs/gjensen/10186.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10186.yaml @@ -16,7 +16,7 @@ datasets: primary_author_status: false cell_type: id: CL:0002618 - name: umbilical vein endothelial cell + name: endothelial cell of umbilical vein cross_references: publications: 10.1128/JVI.02997-14 dataset_description: This is 15th of 17 sets of data studying HUVECs collected diff --git a/ingestion_tools/dataset_configs/gjensen/10187.yaml b/ingestion_tools/dataset_configs/gjensen/10187.yaml index 1eac1e928..34d79c4ba 100644 --- a/ingestion_tools/dataset_configs/gjensen/10187.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10187.yaml @@ -16,7 +16,7 @@ datasets: primary_author_status: false cell_type: id: CL:0002618 - name: umbilical vein endothelial cell + name: endothelial cell of umbilical vein cross_references: publications: 10.1128/JVI.02997-14 dataset_description: This is 16th of 17 sets of data studying HUVECs collected diff --git a/ingestion_tools/dataset_configs/gjensen/10188.yaml b/ingestion_tools/dataset_configs/gjensen/10188.yaml index 2fd5d96ee..6fdccf390 100644 --- a/ingestion_tools/dataset_configs/gjensen/10188.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10188.yaml @@ -16,7 +16,7 @@ datasets: primary_author_status: false cell_type: id: CL:0002618 - name: umbilical vein endothelial cell + name: endothelial cell of umbilical vein cross_references: publications: 10.1128/JVI.02997-14 dataset_description: This is 17th of 17 sets of data studying HUVECs collected diff --git a/ingestion_tools/dataset_configs/gjensen/10197.yaml b/ingestion_tools/dataset_configs/gjensen/10197.yaml index 8c4fab4aa..f095ef39b 100644 --- a/ingestion_tools/dataset_configs/gjensen/10197.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10197.yaml @@ -17,7 +17,8 @@ datasets: cell_strain: id: null name: 2740-80 - dataset_description: ' and is a part of Caltech Jensen lab etdb.' + dataset_description: This is a single dataset studying Vibrio cholerae collected + by Martin Pilhofer and is a part of Caltech Jensen lab etdb. dataset_identifier: 10197 dataset_title: V. cholerae dates: &id002 diff --git a/ingestion_tools/dataset_configs/gjensen/10242.yaml b/ingestion_tools/dataset_configs/gjensen/10242.yaml index a43f972b6..3e9cf4028 100644 --- a/ingestion_tools/dataset_configs/gjensen/10242.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10242.yaml @@ -14,8 +14,8 @@ datasets: - corresponding_author_status: true name: Grant Jensen primary_author_status: false - dataset_description: ' and is a part of Caltech Jensen lab etdb. ribbon near - membrane, possibly something between ribbon and membrane' + dataset_description: This is a single dataset studying Spiroplasma melliferum + collected by Morgan Beeby and is a part of Caltech Jensen lab etdb. dataset_identifier: 10242 dataset_title: Spiroplasma melliferum dates: &id002 diff --git a/ingestion_tools/dataset_configs/gjensen/10262.yaml b/ingestion_tools/dataset_configs/gjensen/10262.yaml index dc321c12d..81f731e5d 100644 --- a/ingestion_tools/dataset_configs/gjensen/10262.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10262.yaml @@ -17,7 +17,8 @@ datasets: cross_references: publications: 10.15252/embr.201744070 related_database_entries: EMD-8603 - dataset_description: ' and is a part of Caltech Jensen lab etdb.' + dataset_description: This is a single dataset studying Thermococcus kodakarensis + collected by Ariane Briegel and is a part of Caltech Jensen lab etdb. dataset_identifier: 10262 dataset_title: Thermococcus kodakarensis dates: &id002 diff --git a/ingestion_tools/dataset_configs/gjensen/10287.yaml b/ingestion_tools/dataset_configs/gjensen/10287.yaml index 588b2ae11..3b2b2c846 100644 --- a/ingestion_tools/dataset_configs/gjensen/10287.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10287.yaml @@ -17,8 +17,8 @@ datasets: cell_strain: id: null name: ATCC 51449 - dataset_description: ' and is a part of Caltech Jensen lab etdb. Helicobacter - hepaticus cell poles' + dataset_description: This is a single dataset studying Helicobacter hepaticus + collected by Ariane Briegel and is a part of Caltech Jensen lab etdb. dataset_identifier: 10287 dataset_title: H.hepaticus dates: &id002 diff --git a/ingestion_tools/dataset_configs/gjensen/10288.yaml b/ingestion_tools/dataset_configs/gjensen/10288.yaml index 951358a86..07b1ddb11 100644 --- a/ingestion_tools/dataset_configs/gjensen/10288.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10288.yaml @@ -16,7 +16,8 @@ datasets: primary_author_status: false cross_references: publications: 10.1091/mbc.E12-11-0785 - dataset_description: ' and is a part of Caltech Jensen lab etdb.' + dataset_description: This is a single dataset studying CHMP1B tubes collected + by Zhiheng Yu and is a part of Caltech Jensen lab etdb. dataset_identifier: 10288 dataset_title: CHMP1B tubes dates: &id002 @@ -28,7 +29,7 @@ datasets: name: Homo sapiens taxonomy_id: 9606 sample_preparation: null - sample_type: in_vitro + sample_type: organism sources: - literal: value: diff --git a/ingestion_tools/dataset_configs/gjensen/10300.yaml b/ingestion_tools/dataset_configs/gjensen/10300.yaml index b174ebcb4..9a6dbae1e 100644 --- a/ingestion_tools/dataset_configs/gjensen/10300.yaml +++ b/ingestion_tools/dataset_configs/gjensen/10300.yaml @@ -17,8 +17,8 @@ datasets: cell_strain: id: null name: NA1000 - dataset_description: ' and is a part of Caltech Jensen lab etdb. high-order DNA, - H-NS, nucleoid, asymmetric cell division' + dataset_description: This is a single dataset studying Caulobacter crescentus + collected by Jian Shi and is a part of Caltech Jensen lab etdb. dataset_identifier: 10300 dataset_title: C. crescentus Calubactor EB session22 27.5KX dates: &id002 diff --git a/schema/v1.1.0/api_models_materialized.yaml b/schema/v1.1.0/api_models_materialized.yaml index 1feca95eb..1eafefc32 100644 --- a/schema/v1.1.0/api_models_materialized.yaml +++ b/schema/v1.1.0/api_models_materialized.yaml @@ -1045,20 +1045,19 @@ classes: name: annotation_files description: Files associated with an annotation from_schema: cdp-dataset-config - multivalued: true alias: annotation_files owner: Annotation domain_of: - Annotation inverse: AnnotationFile.annotation range: AnnotationFile + multivalued: true inlined: true inlined_as_list: true authors: name: authors description: Author of an annotation from_schema: cdp-dataset-config - multivalued: true alias: authors owner: Annotation domain_of: @@ -1067,6 +1066,7 @@ classes: - Tomograms inverse: AnnotationAuthor.annotation range: AnnotationAuthor + multivalued: true inlined: true inlined_as_list: true deposition: @@ -1667,20 +1667,19 @@ classes: name: funding_sources description: Information about how a dataset was funded from_schema: cdp-dataset-config - multivalued: true alias: funding_sources owner: Dataset domain_of: - Dataset inverse: DatasetFunding.dataset range: DatasetFunding + multivalued: true inlined: true inlined_as_list: true authors: name: authors description: An author of a dataset from_schema: cdp-dataset-config - multivalued: true alias: authors owner: Dataset domain_of: @@ -1689,31 +1688,32 @@ classes: - Tomograms inverse: DatasetAuthor.dataset range: DatasetAuthor + multivalued: true inlined: true inlined_as_list: true runs: name: runs from_schema: cdp-dataset-config - multivalued: true alias: runs owner: Dataset domain_of: - Dataset inverse: Run.dataset range: Run + multivalued: true inlined: true inlined_as_list: true dataset_authors: name: dataset_authors description: An author of a dataset from_schema: cdp-dataset-config - multivalued: true alias: dataset_authors owner: Dataset domain_of: - Dataset inverse: DatasetAuthor.dataset range: DatasetAuthor + multivalued: true inlined: true inlined_as_list: true title: @@ -2110,20 +2110,19 @@ classes: name: datasets description: An author of a dataset from_schema: cdp-dataset-config - multivalued: true alias: datasets owner: Deposition domain_of: - Deposition inverse: Dataset.deposition range: Dataset + multivalued: true inlined: true inlined_as_list: true annotations: name: annotations description: Metadata about an annotation for a run from_schema: cdp-dataset-config - multivalued: true alias: annotations owner: Deposition domain_of: @@ -2131,13 +2130,13 @@ classes: - TomogramVoxelSpacing inverse: Annotation.deposition range: Annotation + multivalued: true inlined: true inlined_as_list: true tomograms: name: tomograms description: Metadata describing a tomogram. from_schema: cdp-dataset-config - multivalued: true alias: tomograms owner: Deposition domain_of: @@ -2145,6 +2144,7 @@ classes: - TomogramVoxelSpacing inverse: Tomogram.deposition range: Tomograms + multivalued: true inlined: true inlined_as_list: true id: @@ -2190,26 +2190,26 @@ classes: tiltseries: name: tiltseries from_schema: cdp-dataset-config - multivalued: true alias: tiltseries owner: Run domain_of: - Run inverse: Tiltseries.run range: Tiltseries + multivalued: true inlined: true inlined_as_list: true tomogram_voxel_spacings: name: tomogram_voxel_spacings description: Voxel spacings for a run from_schema: cdp-dataset-config - multivalued: true alias: tomogram_voxel_spacings owner: Run domain_of: - Run inverse: TomogramVoxelSpacing.run range: TomogramVoxelSpacing + multivalued: true inlined: true inlined_as_list: true name: @@ -3037,7 +3037,6 @@ classes: tomograms: name: tomograms from_schema: cdp-dataset-config - multivalued: true alias: tomograms owner: TomogramVoxelSpacing domain_of: @@ -3045,13 +3044,13 @@ classes: - TomogramVoxelSpacing inverse: Tomogram.tomogram_voxel_spacing range: Tomogram + multivalued: true inlined: true inlined_as_list: true annotations: name: annotations description: Metadata about an annotation for a run from_schema: cdp-dataset-config - multivalued: true alias: annotations owner: TomogramVoxelSpacing domain_of: @@ -3059,6 +3058,7 @@ classes: - TomogramVoxelSpacing inverse: Annotation.tomogram_voxel_spacing range: Annotation + multivalued: true inlined: true inlined_as_list: true voxel_spacing: @@ -3166,7 +3166,6 @@ classes: name: authors description: Author of a tomogram from_schema: cdp-dataset-config - multivalued: true alias: authors owner: Tomograms domain_of: @@ -3175,6 +3174,7 @@ classes: - Tomograms inverse: TomogramAuthor.tomogram range: TomogramAuthor + multivalued: true inlined: true inlined_as_list: true name: diff --git a/schema/v1.1.0/dataset_config_materialized.yaml b/schema/v1.1.0/dataset_config_materialized.yaml index 7cdcccb84..0e0afcf0e 100644 --- a/schema/v1.1.0/dataset_config_materialized.yaml +++ b/schema/v1.1.0/dataset_config_materialized.yaml @@ -536,37 +536,37 @@ classes: name: annotations description: An annotation entity. from_schema: cdp-dataset-config - multivalued: true alias: annotations owner: Container domain_of: - Container range: AnnotationEntity + multivalued: true inlined: true inlined_as_list: true dataset_keyphotos: name: dataset_keyphotos description: A dataset key photo entity. from_schema: cdp-dataset-config - multivalued: true alias: dataset_keyphotos owner: Container domain_of: - Container range: DatasetKeyPhotoEntity + multivalued: true inlined: true inlined_as_list: true datasets: name: datasets description: A dataset entity. from_schema: cdp-dataset-config - multivalued: true alias: datasets owner: Container domain_of: - Container range: DatasetEntity required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -574,25 +574,25 @@ classes: name: deposition_keyphotos description: A deposition key photo entity. from_schema: cdp-dataset-config - multivalued: true alias: deposition_keyphotos owner: Container domain_of: - Container range: DepositionKeyPhotoEntity + multivalued: true inlined: true inlined_as_list: true depositions: name: depositions description: A deposition entity. from_schema: cdp-dataset-config - multivalued: true alias: depositions owner: Container domain_of: - Container range: DepositionEntity required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -600,61 +600,61 @@ classes: name: frames description: A frame entity. from_schema: cdp-dataset-config - multivalued: true alias: frames owner: Container domain_of: - Container range: FrameEntity + multivalued: true inlined: true inlined_as_list: true gains: name: gains description: A gain entity. from_schema: cdp-dataset-config - multivalued: true alias: gains owner: Container domain_of: - Container range: GainEntity + multivalued: true inlined: true inlined_as_list: true key_images: name: key_images description: A key image entity. from_schema: cdp-dataset-config - multivalued: true alias: key_images owner: Container domain_of: - Container range: KeyImageEntity + multivalued: true inlined: true inlined_as_list: true rawtilts: name: rawtilts description: A raw tilt entity. from_schema: cdp-dataset-config - multivalued: true alias: rawtilts owner: Container domain_of: - Container range: RawTiltEntity + multivalued: true inlined: true inlined_as_list: true runs: name: runs description: A run entity. from_schema: cdp-dataset-config - multivalued: true alias: runs owner: Container domain_of: - Container range: RunEntity required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -674,37 +674,37 @@ classes: name: tiltseries description: A tilt series entity. from_schema: cdp-dataset-config - multivalued: true alias: tiltseries owner: Container domain_of: - Container range: TiltSeriesEntity + multivalued: true inlined: true inlined_as_list: true tomograms: name: tomograms description: A tomogram entity. from_schema: cdp-dataset-config - multivalued: true alias: tomograms owner: Container domain_of: - Container range: TomogramEntity + multivalued: true inlined: true inlined_as_list: true voxel_spacings: name: voxel_spacings description: A voxel spacing entity. from_schema: cdp-dataset-config - multivalued: true alias: voxel_spacings owner: Container domain_of: - Container range: VoxelSpacingEntity required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -851,13 +851,13 @@ classes: name: list_globs description: The globs for the file. from_schema: cdp-dataset-config - multivalued: true alias: list_globs owner: SourceMultiGlob domain_of: - SourceMultiGlob range: string required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -959,61 +959,61 @@ classes: name: annotation description: Include or exclude annotations. from_schema: cdp-dataset-config - multivalued: true alias: annotation owner: SourceParent domain_of: - SourceParent range: string + multivalued: true inlined: true inlined_as_list: true dataset: name: dataset description: Include or exclude datasets. from_schema: cdp-dataset-config - multivalued: true alias: dataset owner: SourceParent domain_of: - SourceParent range: string + multivalued: true inlined: true inlined_as_list: true run: name: run description: Include or exclude runs. from_schema: cdp-dataset-config - multivalued: true alias: run owner: SourceParent domain_of: - SourceParent range: string + multivalued: true inlined: true inlined_as_list: true tomogram: name: tomogram description: Include or exclude tomograms. from_schema: cdp-dataset-config - multivalued: true alias: tomogram owner: SourceParent domain_of: - SourceParent range: string + multivalued: true inlined: true inlined_as_list: true voxel_spacing: name: voxel_spacing description: Include or exclude voxel spacings. from_schema: cdp-dataset-config - multivalued: true alias: voxel_spacing owner: SourceParent domain_of: - Tomogram - SourceParent range: string + multivalued: true inlined: true inlined_as_list: true DefaultLiteralEntity: @@ -1044,7 +1044,6 @@ classes: name: value description: A placeholder for any type of data. from_schema: cdp-dataset-config - multivalued: true alias: value owner: DefaultLiteral domain_of: @@ -1053,6 +1052,7 @@ classes: - VoxelSpacingLiteral range: Any required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -1065,7 +1065,6 @@ classes: name: sources description: A key photo source. from_schema: cdp-dataset-config - multivalued: true alias: sources owner: KeyPhotoEntity domain_of: @@ -1083,6 +1082,7 @@ classes: - VoxelSpacingEntity range: KeyPhotoSource required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -1162,7 +1162,6 @@ classes: name: sources description: An annotation source. from_schema: cdp-dataset-config - multivalued: true alias: sources owner: AnnotationEntity domain_of: @@ -1180,6 +1179,7 @@ classes: - VoxelSpacingEntity range: AnnotationSource required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -1284,7 +1284,6 @@ classes: name: sources description: A dataset source. from_schema: cdp-dataset-config - multivalued: true alias: sources owner: DatasetEntity domain_of: @@ -1302,6 +1301,7 @@ classes: - VoxelSpacingEntity range: DatasetSource required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -1372,7 +1372,6 @@ classes: name: sources description: A key photo source. from_schema: cdp-dataset-config - multivalued: true alias: sources owner: DatasetKeyPhotoEntity domain_of: @@ -1390,6 +1389,7 @@ classes: - VoxelSpacingEntity range: KeyPhotoSource required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -1403,7 +1403,6 @@ classes: name: sources description: A key photo source. from_schema: cdp-dataset-config - multivalued: true alias: sources owner: DepositionKeyPhotoEntity domain_of: @@ -1421,6 +1420,7 @@ classes: - VoxelSpacingEntity range: KeyPhotoSource required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -1448,7 +1448,6 @@ classes: name: sources description: A deposition source. from_schema: cdp-dataset-config - multivalued: true alias: sources owner: DepositionEntity domain_of: @@ -1466,6 +1465,7 @@ classes: - VoxelSpacingEntity range: DepositionSource required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -1535,7 +1535,6 @@ classes: name: sources description: A frame source. from_schema: cdp-dataset-config - multivalued: true alias: sources owner: FrameEntity domain_of: @@ -1553,6 +1552,7 @@ classes: - VoxelSpacingEntity range: FrameSource required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -1635,7 +1635,6 @@ classes: name: sources description: A gain source. from_schema: cdp-dataset-config - multivalued: true alias: sources owner: GainEntity domain_of: @@ -1653,6 +1652,7 @@ classes: - VoxelSpacingEntity range: GainSource required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -1735,7 +1735,6 @@ classes: name: sources description: A key image source. from_schema: cdp-dataset-config - multivalued: true alias: sources owner: KeyImageEntity domain_of: @@ -1753,6 +1752,7 @@ classes: - VoxelSpacingEntity range: KeyImageSource required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -1835,7 +1835,6 @@ classes: name: sources description: A raw tilt source. from_schema: cdp-dataset-config - multivalued: true alias: sources owner: RawTiltEntity domain_of: @@ -1853,6 +1852,7 @@ classes: - VoxelSpacingEntity range: RawTiltSource required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -1935,7 +1935,6 @@ classes: name: sources description: A run source. from_schema: cdp-dataset-config - multivalued: true alias: sources owner: RunEntity domain_of: @@ -1953,6 +1952,7 @@ classes: - VoxelSpacingEntity range: RunSource required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -2123,7 +2123,6 @@ classes: name: sources description: A tilt series source. from_schema: cdp-dataset-config - multivalued: true alias: sources owner: TiltSeriesEntity domain_of: @@ -2141,6 +2140,7 @@ classes: - VoxelSpacingEntity range: TiltSeriesSource required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -2238,7 +2238,6 @@ classes: name: sources description: A tomogram source. from_schema: cdp-dataset-config - multivalued: true alias: sources owner: TomogramEntity domain_of: @@ -2256,6 +2255,7 @@ classes: - VoxelSpacingEntity range: TomogramSource required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -2338,7 +2338,6 @@ classes: name: sources description: A voxel spacing source. from_schema: cdp-dataset-config - multivalued: true alias: sources owner: VoxelSpacingEntity domain_of: @@ -2356,6 +2355,7 @@ classes: - VoxelSpacingEntity range: VoxelSpacingSource required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -2435,7 +2435,6 @@ classes: name: value description: The value for the voxel spacing literal. from_schema: cdp-dataset-config - multivalued: true alias: value owner: VoxelSpacingLiteral domain_of: @@ -2444,6 +2443,7 @@ classes: - VoxelSpacingLiteral range: float required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 diff --git a/schema/v1.1.0/dataset_config_validate.py b/schema/v1.1.0/dataset_config_validate.py index 632cfe394..3ade81071 100644 --- a/schema/v1.1.0/dataset_config_validate.py +++ b/schema/v1.1.0/dataset_config_validate.py @@ -168,6 +168,8 @@ def main( validation_exclusions = json.load(f) logger.info("Using validation exclusions file: %s", validation_exclusions_file) + if output_dir[-1] != "/": + output_dir += "/" # Remove existing dir if os.path.exists(output_dir): logging.warning("Removing existing %s directory.", output_dir) @@ -211,7 +213,7 @@ def main( with open(os.path.join(output_dir, "dataset_config_validate_errors.json"), "w") as f: json.dump(dict(sorted(errors.items())), f, indent=2, default=str) - logger.error("Validation failed. See dataset_config_validate_errors.json for details.") + logger.error("Validation failed. See %s for errors.", output_dir) exit(1) diff --git a/schema/v1.1.0/metadata-docs/affiliation_address.md b/schema/v1.1.0/metadata-docs/affiliation_address.md index bc793b718..973bfe8f6 100644 --- a/schema/v1.1.0/metadata-docs/affiliation_address.md +++ b/schema/v1.1.0/metadata-docs/affiliation_address.md @@ -16,8 +16,8 @@ URI: [cdp-meta:affiliation_address](metadataaffiliation_address) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AuthorMixin](AuthorMixin.md) | An entity with author data | no | | [Author](Author.md) | Author of a scientific data entity | no | +| [AuthorMixin](AuthorMixin.md) | An entity with author data | no | diff --git a/schema/v1.1.0/metadata-docs/affiliation_identifier.md b/schema/v1.1.0/metadata-docs/affiliation_identifier.md index cdca5c467..d9d684bfd 100644 --- a/schema/v1.1.0/metadata-docs/affiliation_identifier.md +++ b/schema/v1.1.0/metadata-docs/affiliation_identifier.md @@ -16,8 +16,8 @@ URI: [cdp-meta:affiliation_identifier](metadataaffiliation_identifier) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AuthorMixin](AuthorMixin.md) | An entity with author data | no | | [Author](Author.md) | Author of a scientific data entity | no | +| [AuthorMixin](AuthorMixin.md) | An entity with author data | no | diff --git a/schema/v1.1.0/metadata-docs/affiliation_name.md b/schema/v1.1.0/metadata-docs/affiliation_name.md index 3f3b517bf..d865a139f 100644 --- a/schema/v1.1.0/metadata-docs/affiliation_name.md +++ b/schema/v1.1.0/metadata-docs/affiliation_name.md @@ -16,8 +16,8 @@ URI: [cdp-meta:affiliation_name](metadataaffiliation_name) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AuthorMixin](AuthorMixin.md) | An entity with author data | no | | [Author](Author.md) | Author of a scientific data entity | no | +| [AuthorMixin](AuthorMixin.md) | An entity with author data | no | diff --git a/schema/v1.1.0/metadata-docs/authors.md b/schema/v1.1.0/metadata-docs/authors.md index 11484f93d..58a194eb4 100644 --- a/schema/v1.1.0/metadata-docs/authors.md +++ b/schema/v1.1.0/metadata-docs/authors.md @@ -16,11 +16,11 @@ URI: [cdp-meta:authors](metadataauthors) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AuthoredEntity](AuthoredEntity.md) | An entity with associated authors | no | -| [Tomogram](Tomogram.md) | Metadata describing a tomogram | no | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | -| [Deposition](Deposition.md) | Metadata describing a deposition | no | | [Annotation](Annotation.md) | Metadata describing an annotation | no | +| [Tomogram](Tomogram.md) | Metadata describing a tomogram | no | +| [Deposition](Deposition.md) | Metadata describing a deposition | no | +| [AuthoredEntity](AuthoredEntity.md) | An entity with associated authors | no | diff --git a/schema/v1.1.0/metadata-docs/binning.md b/schema/v1.1.0/metadata-docs/binning.md index f4abaf427..65615f7c8 100644 --- a/schema/v1.1.0/metadata-docs/binning.md +++ b/schema/v1.1.0/metadata-docs/binning.md @@ -16,8 +16,8 @@ URI: [cdp-meta:binning](metadatabinning) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | | [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | +| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | | [AnnotationPointFile](AnnotationPointFile.md) | File and sourcing data for a point annotation | no | diff --git a/schema/v1.1.0/metadata-docs/cell_component.md b/schema/v1.1.0/metadata-docs/cell_component.md index 02979edfa..67015dde6 100644 --- a/schema/v1.1.0/metadata-docs/cell_component.md +++ b/schema/v1.1.0/metadata-docs/cell_component.md @@ -16,8 +16,8 @@ URI: [cdp-meta:cell_component](metadatacell_component) | Name | Description | Modifies Slot | | --- | --- | --- | -| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | +| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | diff --git a/schema/v1.1.0/metadata-docs/cell_strain.md b/schema/v1.1.0/metadata-docs/cell_strain.md index 2f9ad3712..de2601565 100644 --- a/schema/v1.1.0/metadata-docs/cell_strain.md +++ b/schema/v1.1.0/metadata-docs/cell_strain.md @@ -16,8 +16,8 @@ URI: [cdp-meta:cell_strain](metadatacell_strain) | Name | Description | Modifies Slot | | --- | --- | --- | -| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | +| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | diff --git a/schema/v1.1.0/metadata-docs/cell_type.md b/schema/v1.1.0/metadata-docs/cell_type.md index 6ad5e3274..d90ff4802 100644 --- a/schema/v1.1.0/metadata-docs/cell_type.md +++ b/schema/v1.1.0/metadata-docs/cell_type.md @@ -16,8 +16,8 @@ URI: [cdp-meta:cell_type](metadatacell_type) | Name | Description | Modifies Slot | | --- | --- | --- | -| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | +| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | diff --git a/schema/v1.1.0/metadata-docs/corresponding_author_status.md b/schema/v1.1.0/metadata-docs/corresponding_author_status.md index dc1be7e47..08cc9b7d1 100644 --- a/schema/v1.1.0/metadata-docs/corresponding_author_status.md +++ b/schema/v1.1.0/metadata-docs/corresponding_author_status.md @@ -16,8 +16,8 @@ URI: [cdp-meta:corresponding_author_status](metadatacorresponding_author_status) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AuthorMixin](AuthorMixin.md) | An entity with author data | no | | [Author](Author.md) | Author of a scientific data entity | no | +| [AuthorMixin](AuthorMixin.md) | An entity with author data | no | diff --git a/schema/v1.1.0/metadata-docs/cross_references.md b/schema/v1.1.0/metadata-docs/cross_references.md index da96b6902..5cfac3b8c 100644 --- a/schema/v1.1.0/metadata-docs/cross_references.md +++ b/schema/v1.1.0/metadata-docs/cross_references.md @@ -16,9 +16,9 @@ URI: [cdp-meta:cross_references](metadatacross_references) | Name | Description | Modifies Slot | | --- | --- | --- | -| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | -| [Deposition](Deposition.md) | Metadata describing a deposition | no | | [CrossReferencedEntity](CrossReferencedEntity.md) | An entity with associated cross-references to other databases and publication... | no | +| [Deposition](Deposition.md) | Metadata describing a deposition | no | +| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | diff --git a/schema/v1.1.0/metadata-docs/dates.md b/schema/v1.1.0/metadata-docs/dates.md index a764357af..4b2ea1c86 100644 --- a/schema/v1.1.0/metadata-docs/dates.md +++ b/schema/v1.1.0/metadata-docs/dates.md @@ -16,10 +16,10 @@ URI: [cdp-meta:dates](metadatadates) | Name | Description | Modifies Slot | | --- | --- | --- | -| [DateStampedEntity](DateStampedEntity.md) | An entity with associated deposition, release and last modified dates | no | | [Annotation](Annotation.md) | Metadata describing an annotation | no | | [Deposition](Deposition.md) | Metadata describing a deposition | no | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | +| [DateStampedEntity](DateStampedEntity.md) | An entity with associated deposition, release and last modified dates | no | diff --git a/schema/v1.1.0/metadata-docs/email.md b/schema/v1.1.0/metadata-docs/email.md index d589e8c4c..8c1f16329 100644 --- a/schema/v1.1.0/metadata-docs/email.md +++ b/schema/v1.1.0/metadata-docs/email.md @@ -16,8 +16,8 @@ URI: [cdp-meta:email](metadataemail) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AuthorMixin](AuthorMixin.md) | An entity with author data | no | | [Author](Author.md) | Author of a scientific data entity | no | +| [AuthorMixin](AuthorMixin.md) | An entity with author data | no | diff --git a/schema/v1.1.0/metadata-docs/file_format.md b/schema/v1.1.0/metadata-docs/file_format.md index b730c0a13..c9fae7667 100644 --- a/schema/v1.1.0/metadata-docs/file_format.md +++ b/schema/v1.1.0/metadata-docs/file_format.md @@ -16,12 +16,12 @@ URI: [cdp-meta:file_format](metadatafile_format) | Name | Description | Modifies Slot | | --- | --- | --- | +| [AnnotationSourceFile](AnnotationSourceFile.md) | File and sourcing data for an annotation | no | +| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | | [AnnotationSemanticSegmentationMaskFile](AnnotationSemanticSegmentationMaskFile.md) | File and sourcing data for a semantic segmentation mask annotation | no | +| [AnnotationSegmentationMaskFile](AnnotationSegmentationMaskFile.md) | File and sourcing data for a segmentation mask annotation | no | | [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | -| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | -| [AnnotationSourceFile](AnnotationSourceFile.md) | File and sourcing data for an annotation | no | | [AnnotationPointFile](AnnotationPointFile.md) | File and sourcing data for a point annotation | no | -| [AnnotationSegmentationMaskFile](AnnotationSegmentationMaskFile.md) | File and sourcing data for a segmentation mask annotation | no | diff --git a/schema/v1.1.0/metadata-docs/filter_value.md b/schema/v1.1.0/metadata-docs/filter_value.md index dd4d29ef9..dade1978c 100644 --- a/schema/v1.1.0/metadata-docs/filter_value.md +++ b/schema/v1.1.0/metadata-docs/filter_value.md @@ -16,8 +16,8 @@ URI: [cdp-meta:filter_value](metadatafilter_value) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | | [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | +| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | diff --git a/schema/v1.1.0/metadata-docs/funding.md b/schema/v1.1.0/metadata-docs/funding.md index ea5b99bef..3d42d19c5 100644 --- a/schema/v1.1.0/metadata-docs/funding.md +++ b/schema/v1.1.0/metadata-docs/funding.md @@ -16,8 +16,8 @@ URI: [cdp-meta:funding](metadatafunding) | Name | Description | Modifies Slot | | --- | --- | --- | -| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | | [FundedEntity](FundedEntity.md) | An entity with associated funding sources | no | +| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | diff --git a/schema/v1.1.0/metadata-docs/glob_string.md b/schema/v1.1.0/metadata-docs/glob_string.md index 1ff37ea08..42e0efe37 100644 --- a/schema/v1.1.0/metadata-docs/glob_string.md +++ b/schema/v1.1.0/metadata-docs/glob_string.md @@ -16,12 +16,12 @@ URI: [cdp-meta:glob_string](metadataglob_string) | Name | Description | Modifies Slot | | --- | --- | --- | +| [AnnotationSourceFile](AnnotationSourceFile.md) | File and sourcing data for an annotation | no | +| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | | [AnnotationSemanticSegmentationMaskFile](AnnotationSemanticSegmentationMaskFile.md) | File and sourcing data for a semantic segmentation mask annotation | no | +| [AnnotationSegmentationMaskFile](AnnotationSegmentationMaskFile.md) | File and sourcing data for a segmentation mask annotation | no | | [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | -| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | -| [AnnotationSourceFile](AnnotationSourceFile.md) | File and sourcing data for an annotation | no | | [AnnotationPointFile](AnnotationPointFile.md) | File and sourcing data for a point annotation | no | -| [AnnotationSegmentationMaskFile](AnnotationSegmentationMaskFile.md) | File and sourcing data for a segmentation mask annotation | no | diff --git a/schema/v1.1.0/metadata-docs/glob_strings.md b/schema/v1.1.0/metadata-docs/glob_strings.md index 0c41fa5a5..9a8531f59 100644 --- a/schema/v1.1.0/metadata-docs/glob_strings.md +++ b/schema/v1.1.0/metadata-docs/glob_strings.md @@ -16,12 +16,12 @@ URI: [cdp-meta:glob_strings](metadataglob_strings) | Name | Description | Modifies Slot | | --- | --- | --- | +| [AnnotationSourceFile](AnnotationSourceFile.md) | File and sourcing data for an annotation | no | +| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | | [AnnotationSemanticSegmentationMaskFile](AnnotationSemanticSegmentationMaskFile.md) | File and sourcing data for a semantic segmentation mask annotation | no | +| [AnnotationSegmentationMaskFile](AnnotationSegmentationMaskFile.md) | File and sourcing data for a segmentation mask annotation | no | | [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | -| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | -| [AnnotationSourceFile](AnnotationSourceFile.md) | File and sourcing data for an annotation | no | | [AnnotationPointFile](AnnotationPointFile.md) | File and sourcing data for a point annotation | no | -| [AnnotationSegmentationMaskFile](AnnotationSegmentationMaskFile.md) | File and sourcing data for a segmentation mask annotation | no | diff --git a/schema/v1.1.0/metadata-docs/grid_preparation.md b/schema/v1.1.0/metadata-docs/grid_preparation.md index 6af98bced..e12dc162c 100644 --- a/schema/v1.1.0/metadata-docs/grid_preparation.md +++ b/schema/v1.1.0/metadata-docs/grid_preparation.md @@ -16,8 +16,8 @@ URI: [cdp-meta:grid_preparation](metadatagrid_preparation) | Name | Description | Modifies Slot | | --- | --- | --- | -| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | +| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | diff --git a/schema/v1.1.0/metadata-docs/id.md b/schema/v1.1.0/metadata-docs/id.md index 65fd8df4c..7104b3e92 100644 --- a/schema/v1.1.0/metadata-docs/id.md +++ b/schema/v1.1.0/metadata-docs/id.md @@ -16,11 +16,11 @@ URI: [cdp-meta:id](metadataid) | Name | Description | Modifies Slot | | --- | --- | --- | -| [TissueDetails](TissueDetails.md) | The type of tissue from which the sample was derived | no | | [AnnotationObject](AnnotationObject.md) | Metadata describing the object being annotated | no | | [CellStrain](CellStrain.md) | The strain or cell line from which the sample was derived | no | -| [CellType](CellType.md) | The cell type from which the sample was derived | no | | [CellComponent](CellComponent.md) | The cellular component from which the sample was derived | no | +| [TissueDetails](TissueDetails.md) | The type of tissue from which the sample was derived | no | +| [CellType](CellType.md) | The cell type from which the sample was derived | no | diff --git a/schema/v1.1.0/metadata-docs/is_visualization_default.md b/schema/v1.1.0/metadata-docs/is_visualization_default.md index 36cee7c45..6b558b697 100644 --- a/schema/v1.1.0/metadata-docs/is_visualization_default.md +++ b/schema/v1.1.0/metadata-docs/is_visualization_default.md @@ -16,12 +16,12 @@ URI: [cdp-meta:is_visualization_default](metadatais_visualization_default) | Name | Description | Modifies Slot | | --- | --- | --- | +| [AnnotationSourceFile](AnnotationSourceFile.md) | File and sourcing data for an annotation | no | +| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | | [AnnotationSemanticSegmentationMaskFile](AnnotationSemanticSegmentationMaskFile.md) | File and sourcing data for a semantic segmentation mask annotation | no | +| [AnnotationSegmentationMaskFile](AnnotationSegmentationMaskFile.md) | File and sourcing data for a segmentation mask annotation | no | | [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | -| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | -| [AnnotationSourceFile](AnnotationSourceFile.md) | File and sourcing data for an annotation | no | | [AnnotationPointFile](AnnotationPointFile.md) | File and sourcing data for a point annotation | no | -| [AnnotationSegmentationMaskFile](AnnotationSegmentationMaskFile.md) | File and sourcing data for a segmentation mask annotation | no | diff --git a/schema/v1.1.0/metadata-docs/manufacturer.md b/schema/v1.1.0/metadata-docs/manufacturer.md index b23e3b78f..85141ebd4 100644 --- a/schema/v1.1.0/metadata-docs/manufacturer.md +++ b/schema/v1.1.0/metadata-docs/manufacturer.md @@ -16,8 +16,8 @@ URI: [cdp-meta:manufacturer](metadatamanufacturer) | Name | Description | Modifies Slot | | --- | --- | --- | -| [CameraDetails](CameraDetails.md) | The camera used to collect the tilt series | no | | [MicroscopeDetails](MicroscopeDetails.md) | The microscope used to collect the tilt series | no | +| [CameraDetails](CameraDetails.md) | The camera used to collect the tilt series | no | diff --git a/schema/v1.1.0/metadata-docs/model.md b/schema/v1.1.0/metadata-docs/model.md index 844a8d02b..e695080ca 100644 --- a/schema/v1.1.0/metadata-docs/model.md +++ b/schema/v1.1.0/metadata-docs/model.md @@ -16,8 +16,8 @@ URI: [cdp-meta:model](metadatamodel) | Name | Description | Modifies Slot | | --- | --- | --- | -| [CameraDetails](CameraDetails.md) | The camera used to collect the tilt series | no | | [MicroscopeDetails](MicroscopeDetails.md) | The microscope used to collect the tilt series | no | +| [CameraDetails](CameraDetails.md) | The camera used to collect the tilt series | no | diff --git a/schema/v1.1.0/metadata-docs/name.md b/schema/v1.1.0/metadata-docs/name.md index c8bedec6d..d76029321 100644 --- a/schema/v1.1.0/metadata-docs/name.md +++ b/schema/v1.1.0/metadata-docs/name.md @@ -16,15 +16,15 @@ URI: [cdp-meta:name](metadataname) | Name | Description | Modifies Slot | | --- | --- | --- | -| [TissueDetails](TissueDetails.md) | The type of tissue from which the sample was derived | no | -| [OrganismDetails](OrganismDetails.md) | The species from which the sample was derived | no | -| [Author](Author.md) | Author of a scientific data entity | no | -| [CellStrain](CellStrain.md) | The strain or cell line from which the sample was derived | no | | [AnnotationObject](AnnotationObject.md) | Metadata describing the object being annotated | no | -| [CellType](CellType.md) | The cell type from which the sample was derived | no | -| [CellComponent](CellComponent.md) | The cellular component from which the sample was derived | no | | [AuthorMixin](AuthorMixin.md) | An entity with author data | no | +| [Author](Author.md) | Author of a scientific data entity | no | +| [CellStrain](CellStrain.md) | The strain or cell line from which the sample was derived | no | +| [OrganismDetails](OrganismDetails.md) | The species from which the sample was derived | no | | [AnnotationMethodLinks](AnnotationMethodLinks.md) | A set of links to models, sourcecode, documentation, etc referenced by annota... | no | +| [CellComponent](CellComponent.md) | The cellular component from which the sample was derived | no | +| [TissueDetails](TissueDetails.md) | The type of tissue from which the sample was derived | no | +| [CellType](CellType.md) | The cell type from which the sample was derived | no | diff --git a/schema/v1.1.0/metadata-docs/order.md b/schema/v1.1.0/metadata-docs/order.md index 0267a3091..a0a9aadcd 100644 --- a/schema/v1.1.0/metadata-docs/order.md +++ b/schema/v1.1.0/metadata-docs/order.md @@ -16,8 +16,8 @@ URI: [cdp-meta:order](metadataorder) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | | [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | +| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | diff --git a/schema/v1.1.0/metadata-docs/organism.md b/schema/v1.1.0/metadata-docs/organism.md index 97abe1618..09e3139ef 100644 --- a/schema/v1.1.0/metadata-docs/organism.md +++ b/schema/v1.1.0/metadata-docs/organism.md @@ -16,8 +16,8 @@ URI: [cdp-meta:organism](metadataorganism) | Name | Description | Modifies Slot | | --- | --- | --- | -| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | +| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | diff --git a/schema/v1.1.0/metadata-docs/other_setup.md b/schema/v1.1.0/metadata-docs/other_setup.md index 394ce8ce5..1932482d2 100644 --- a/schema/v1.1.0/metadata-docs/other_setup.md +++ b/schema/v1.1.0/metadata-docs/other_setup.md @@ -16,8 +16,8 @@ URI: [cdp-meta:other_setup](metadataother_setup) | Name | Description | Modifies Slot | | --- | --- | --- | -| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | +| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | diff --git a/schema/v1.1.0/metadata-docs/primary_author_status.md b/schema/v1.1.0/metadata-docs/primary_author_status.md index 3ce412ffc..b46aa989c 100644 --- a/schema/v1.1.0/metadata-docs/primary_author_status.md +++ b/schema/v1.1.0/metadata-docs/primary_author_status.md @@ -16,8 +16,8 @@ URI: [cdp-meta:primary_author_status](metadataprimary_author_status) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AuthorMixin](AuthorMixin.md) | An entity with author data | no | | [Author](Author.md) | Author of a scientific data entity | no | +| [AuthorMixin](AuthorMixin.md) | An entity with author data | no | diff --git a/schema/v1.1.0/metadata-docs/sample_preparation.md b/schema/v1.1.0/metadata-docs/sample_preparation.md index 8d8ff8e50..339ac7e57 100644 --- a/schema/v1.1.0/metadata-docs/sample_preparation.md +++ b/schema/v1.1.0/metadata-docs/sample_preparation.md @@ -16,8 +16,8 @@ URI: [cdp-meta:sample_preparation](metadatasample_preparation) | Name | Description | Modifies Slot | | --- | --- | --- | -| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | +| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | diff --git a/schema/v1.1.0/metadata-docs/sample_type.md b/schema/v1.1.0/metadata-docs/sample_type.md index 4ee08059b..6309bf7fc 100644 --- a/schema/v1.1.0/metadata-docs/sample_type.md +++ b/schema/v1.1.0/metadata-docs/sample_type.md @@ -16,8 +16,8 @@ URI: [cdp-meta:sample_type](metadatasample_type) | Name | Description | Modifies Slot | | --- | --- | --- | -| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | +| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | diff --git a/schema/v1.1.0/metadata-docs/tissue.md b/schema/v1.1.0/metadata-docs/tissue.md index 6fa5a0109..750ac8d31 100644 --- a/schema/v1.1.0/metadata-docs/tissue.md +++ b/schema/v1.1.0/metadata-docs/tissue.md @@ -16,8 +16,8 @@ URI: [cdp-meta:tissue](metadatatissue) | Name | Description | Modifies Slot | | --- | --- | --- | -| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | +| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | diff --git a/schema/v1.1.0/metadata_materialized.yaml b/schema/v1.1.0/metadata_materialized.yaml index 39de99e81..3d3174ec8 100644 --- a/schema/v1.1.0/metadata_materialized.yaml +++ b/schema/v1.1.0/metadata_materialized.yaml @@ -818,7 +818,6 @@ classes: name: authors description: Author of a scientific data entity. from_schema: metadata - multivalued: true list_elements_ordered: true alias: authors owner: AuthoredEntity @@ -826,6 +825,7 @@ classes: - AuthoredEntity range: Author required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -840,7 +840,6 @@ classes: description: A funding source for a scientific data entity (base for JSON and DB representation). from_schema: metadata - multivalued: true list_elements_ordered: true alias: funding owner: FundedEntity @@ -848,6 +847,7 @@ classes: - FundedEntity range: FundingDetails recommended: true + multivalued: true inlined: true inlined_as_list: true CrossReferences: @@ -1359,7 +1359,6 @@ classes: name: authors description: Author of a scientific data entity. from_schema: metadata - multivalued: true list_elements_ordered: true alias: authors owner: Dataset @@ -1367,6 +1366,7 @@ classes: - AuthoredEntity range: Author required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -1375,7 +1375,6 @@ classes: description: A funding source for a scientific data entity (base for JSON and DB representation). from_schema: metadata - multivalued: true list_elements_ordered: true alias: funding owner: Dataset @@ -1383,6 +1382,7 @@ classes: - FundedEntity range: FundingDetails recommended: true + multivalued: true inlined: true inlined_as_list: true cross_references: @@ -1568,13 +1568,13 @@ classes: from_schema: metadata exact_mappings: - cdp-common:deposition_types - multivalued: true alias: deposition_types owner: Deposition domain_of: - Deposition range: deposition_types_enum required: true + multivalued: true inlined: true inlined_as_list: true pattern: (^annotation$)|(^dataset$)|(^tomogram$) @@ -1596,7 +1596,6 @@ classes: name: authors description: Author of a scientific data entity. from_schema: metadata - multivalued: true list_elements_ordered: true alias: authors owner: Deposition @@ -1604,6 +1603,7 @@ classes: - AuthoredEntity range: Author required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -2428,7 +2428,6 @@ classes: name: authors description: Author of a scientific data entity. from_schema: metadata - multivalued: true list_elements_ordered: true alias: authors owner: Tomogram @@ -2436,6 +2435,7 @@ classes: - AuthoredEntity range: Author required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 @@ -2612,13 +2612,13 @@ classes: from_schema: metadata exact_mappings: - cdp-common:annotation_source_file_glob_strings - multivalued: true alias: glob_strings owner: AnnotationSourceFile domain_of: - AnnotationSourceFile range: string required: false + multivalued: true inlined: true inlined_as_list: true is_visualization_default: @@ -2726,13 +2726,13 @@ classes: from_schema: metadata exact_mappings: - cdp-common:annotation_source_file_glob_strings - multivalued: true alias: glob_strings owner: AnnotationOrientedPointFile domain_of: - AnnotationSourceFile range: string required: false + multivalued: true inlined: true inlined_as_list: true is_visualization_default: @@ -2840,13 +2840,13 @@ classes: from_schema: metadata exact_mappings: - cdp-common:annotation_source_file_glob_strings - multivalued: true alias: glob_strings owner: AnnotationInstanceSegmentationFile domain_of: - AnnotationSourceFile range: string required: false + multivalued: true inlined: true inlined_as_list: true is_visualization_default: @@ -2953,13 +2953,13 @@ classes: from_schema: metadata exact_mappings: - cdp-common:annotation_source_file_glob_strings - multivalued: true alias: glob_strings owner: AnnotationPointFile domain_of: - AnnotationSourceFile range: string required: false + multivalued: true inlined: true inlined_as_list: true is_visualization_default: @@ -3021,13 +3021,13 @@ classes: from_schema: metadata exact_mappings: - cdp-common:annotation_source_file_glob_strings - multivalued: true alias: glob_strings owner: AnnotationSegmentationMaskFile domain_of: - AnnotationSourceFile range: string required: false + multivalued: true inlined: true inlined_as_list: true is_visualization_default: @@ -3103,13 +3103,13 @@ classes: from_schema: metadata exact_mappings: - cdp-common:annotation_source_file_glob_strings - multivalued: true alias: glob_strings owner: AnnotationSemanticSegmentationMaskFile domain_of: - AnnotationSourceFile range: string required: false + multivalued: true inlined: true inlined_as_list: true is_visualization_default: @@ -3206,13 +3206,13 @@ classes: description: File and sourcing data for an annotation. Represents an entry in annotation.sources. from_schema: metadata - multivalued: true list_elements_ordered: true alias: files owner: Annotation domain_of: - Annotation range: AnnotationSourceFile + multivalued: true inlined: true inlined_as_list: true ground_truth_status: @@ -3304,7 +3304,6 @@ classes: name: authors description: Author of a scientific data entity. from_schema: metadata - multivalued: true list_elements_ordered: true alias: authors owner: Annotation @@ -3312,6 +3311,7 @@ classes: - AuthoredEntity range: Author required: true + multivalued: true inlined: true inlined_as_list: true minimum_cardinality: 1 From 0cc191793c8f3b90fda89b4dee83f109b5553fe2 Mon Sep 17 00:00:00 2001 From: Manasa Venkatakrishnan <14958785+manasaV3@users.noreply.github.com> Date: Wed, 31 Jul 2024 12:25:19 -0700 Subject: [PATCH 2/4] fix: Segmentation mask output datatype to int (#170) * fix: Segmentation mask output datatype to int * Fixing pyramid base --- ingestion_tools/scripts/importers/annotation.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ingestion_tools/scripts/importers/annotation.py b/ingestion_tools/scripts/importers/annotation.py index 85b35d089..f778974c1 100644 --- a/ingestion_tools/scripts/importers/annotation.py +++ b/ingestion_tools/scripts/importers/annotation.py @@ -265,12 +265,25 @@ def get_metadata(self, output_prefix: str) -> list[dict[str, Any]]: class SegmentationMaskAnnotation(VolumeAnnotationSource): shape = "SegmentationMask" # Don't expose SemanticSegmentationMask to the public portal. + mask_label: int + + def __init__( + self, + mask_label: int | None = None, + *args, + **kwargs, + ) -> None: + super().__init__(*args, **kwargs) + if not mask_label: + mask_label = 1 + self.mask_label = mask_label def convert(self, output_prefix: str): return make_pyramids( self.config.fs, self.get_output_filename(output_prefix), self.path, + label=self.mask_label, write_mrc=self.config.write_mrc, write_zarr=self.config.write_zarr, voxel_spacing=self.get_voxel_spacing().as_float(), From c769d537d9b1e624187a18d09bac42c330c38778 Mon Sep 17 00:00:00 2001 From: Daniel Ji Date: Wed, 31 Jul 2024 15:16:15 -0700 Subject: [PATCH 3/4] Refactor dataset config validation to support wormbase strains (#172) * Add dataset config file validation scripts * Update schema and template.yaml with new validation constraints and other small fixes * add LinkML regex matching * Merge main and work towards getting dataset config formatted strings (related to linkml any_of bug) * Work on dataset config Pydantic validation * Update configs to fit new spec, improve / fix validation * Merge branch 'main' into 867-template-schema-update * Resolve merge conflicts, improve dataset config validation script * Fix / improve validation script and validation generation (schema.py) * Update dataset config validation docs, small schema refactor * Add more Pydantic extended validation * Refactor Pydantic schema extended network validation * Work on pydantic validation * Cleanup validation, add more annotation checking * add comments to schema.py * Add annotation_publication validation, both python and LinkML, as well as correct doi entries in configs * Update validation docs, small validation script changes * update ingestion_tool deps * Move validation script to schema folder * Merge network and non-network validation into one file * Continue working on Pydantic validation * Start working on tomogram and tiltseries Pydantic validation * Work more on tomogram and tiltseries Pydantic validation * Add dataset config validation to pre-commit (disabled for now), add VSCode IDE intellisense support for config files, add docs on how to setup PycharmIDE intellisense support * Fixes * add depositions to metadata * Validation bugfixes * Fix python deps * fix deps * try to fix workflow * fix workflow * linting * update schema auto-gen'd files * fix config files * refactor crossreferences.dataset_publications -> crossreferences.publications * Update gjensen script with dataset_publications refactor * clean up pydantic dataset validation * Delete schema/v1.1.0/dataset_config_validate_errors/dataset_config_validate_errors.json * dataset config validation warn on http * fix formatting * fix workflow paths * fix workflow * add pdb to annotations.annotation_publications * merge refactor-config-publications into 869-pydantic-validation-datasets * restore dataset_config_merge.py * validation lookup fixes * validation fixes * remove camera manufacturer enum * more validation fixes * validation network cache fixes * fix validation network requests * add depositions, pydantic bug fixes * add authors to tomogram pydantic validation * validation fix * add sources fixes * gjensen_config.py and config fixes * extended validation class name fix * extended validation fixes * gjensen config fix * small validation fixes * add missing validation deps to pyproject.toml * fix dataset config validate logging * improve code style for dataset config extended validation * more code style improvements for dataset config extended validation * add field whitelist feature to dataset config validation * rename from whitelist to excludelist * remove print statements from extended validation * 10000, 10001, 100 02, 10004, 10006, 10007, 10009 * dataset config validation: rename from field excludelist to validation exclusions * 10005, 10006, 10008 * 10301 * fix dataset_config_models_extended with new schema metadata requirements * 10008 * 10009 * 10009 * 10009 tilt axis * 10010 * 10008 strain * remove redundant null values * refactor datset config validation to support wormbase strains * update validation file docs * remove mutable default values from yaml_files.py * make ontology checking case-sensitive --------- Co-authored-by: uermel --- ingestion_tools/dataset_configs/10005.yaml | 2 +- ingestion_tools/dataset_configs/10009.yaml | 2 +- ingestion_tools/dataset_configs/10010.yaml | 4 +- ingestion_tools/scripts/common/yaml_files.py | 41 ++++-- schema/v1.1.0/api_models_materialized.yaml | 12 +- schema/v1.1.0/common.yaml | 9 +- .../v1.1.0/dataset_config_materialized.yaml | 6 + schema/v1.1.0/dataset_config_models.py | 10 +- .../v1.1.0/dataset_config_models.schema.json | 13 +- .../v1.1.0/dataset_config_models_extended.py | 129 ++++++++++++------ schema/v1.1.0/dataset_config_validate.py | 28 ++-- .../dataset_config_validation_exclusions.json | 15 +- schema/v1.1.0/docs/dataset_config_validate.md | 4 +- schema/v1.1.0/metadata-docs/CellStrain.md | 15 +- schema/v1.1.0/metadata-docs/WORMBASEID.md | 45 ++++++ schema/v1.1.0/metadata-docs/authors.md | 2 +- schema/v1.1.0/metadata-docs/binning.md | 2 +- schema/v1.1.0/metadata-docs/cell_component.md | 2 +- schema/v1.1.0/metadata-docs/cell_strain.md | 2 +- schema/v1.1.0/metadata-docs/cell_type.md | 2 +- .../v1.1.0/metadata-docs/cross_references.md | 2 +- .../v1.1.0/metadata-docs/dataset_citations.md | 2 +- schema/v1.1.0/metadata-docs/file_format.md | 4 +- schema/v1.1.0/metadata-docs/filter_value.md | 2 +- schema/v1.1.0/metadata-docs/glob_string.md | 4 +- schema/v1.1.0/metadata-docs/glob_strings.md | 4 +- .../v1.1.0/metadata-docs/grid_preparation.md | 2 +- schema/v1.1.0/metadata-docs/id.md | 4 +- schema/v1.1.0/metadata-docs/index.md | 1 + .../metadata-docs/is_visualization_default.md | 4 +- schema/v1.1.0/metadata-docs/manufacturer.md | 2 +- schema/v1.1.0/metadata-docs/model.md | 2 +- schema/v1.1.0/metadata-docs/name.md | 8 +- schema/v1.1.0/metadata-docs/order.md | 2 +- schema/v1.1.0/metadata-docs/organism.md | 2 +- schema/v1.1.0/metadata-docs/other_setup.md | 2 +- schema/v1.1.0/metadata-docs/publications.md | 2 +- .../metadata-docs/related_database_entries.md | 2 +- .../metadata-docs/related_database_links.md | 2 +- .../metadata-docs/sample_preparation.md | 2 +- schema/v1.1.0/metadata-docs/sample_type.md | 2 +- schema/v1.1.0/metadata-docs/tissue.md | 2 +- schema/v1.1.0/metadata_materialized.yaml | 12 +- schema/v1.1.0/metadata_models.py | 10 +- 44 files changed, 288 insertions(+), 138 deletions(-) create mode 100644 schema/v1.1.0/metadata-docs/WORMBASEID.md diff --git a/ingestion_tools/dataset_configs/10005.yaml b/ingestion_tools/dataset_configs/10005.yaml index 22f1f3e08..149ff9342 100644 --- a/ingestion_tools/dataset_configs/10005.yaml +++ b/ingestion_tools/dataset_configs/10005.yaml @@ -83,7 +83,7 @@ datasets: name: C57BI/6J cell_type: id: CL:0000019 - name: Sperm + name: sperm cross_references: publications: doi:10.1101/2022.09.28.510016 dataset_description: Cryo-electron tomograms of mouse sperm flagella from FIB-SEM diff --git a/ingestion_tools/dataset_configs/10009.yaml b/ingestion_tools/dataset_configs/10009.yaml index 6a68d126b..2b55576bf 100644 --- a/ingestion_tools/dataset_configs/10009.yaml +++ b/ingestion_tools/dataset_configs/10009.yaml @@ -230,7 +230,7 @@ datasets: name: Benjamin D. Engel cell_component: id: GO:0005929 - name: Cilium + name: cilium cell_strain: name: CC-3994 cross_references: diff --git a/ingestion_tools/dataset_configs/10010.yaml b/ingestion_tools/dataset_configs/10010.yaml index db0694061..e2335e1ff 100644 --- a/ingestion_tools/dataset_configs/10010.yaml +++ b/ingestion_tools/dataset_configs/10010.yaml @@ -96,10 +96,10 @@ datasets: primary_author_status: false cell_component: id: GO:0005741 - name: Mitochondrial Outer Membrane + name: mitochondrial outer membrane cell_type: id: CL:2000042 - name: Embryonic fibroblast + name: embryonic fibroblast cross_references: publications: 10.1083/jcb.202204093, 10.1101/2022.01.23.477440 related_database_entries: EMPIAR-11370 diff --git a/ingestion_tools/scripts/common/yaml_files.py b/ingestion_tools/scripts/common/yaml_files.py index 15e9ee5cb..ee9ace899 100644 --- a/ingestion_tools/scripts/common/yaml_files.py +++ b/ingestion_tools/scripts/common/yaml_files.py @@ -2,7 +2,7 @@ import os import re -DATASET_CONFIGS_DIR = "../dataset_configs/" +DATASET_CONFIGS_DIR = os.path.normpath(os.path.join(os.path.dirname(__file__), "../../dataset_configs/")) EXCLUDE_LIST = ["template.yaml", "dataset_config_merged.yaml"] EXCLUDE_KEYWORDS_LIST = ["draft"] YAML_EXTENSIONS = (".yaml", ".yml") @@ -12,9 +12,10 @@ def get_yaml_config_files( + input_files: list[str] = None, include_glob: str = None, - exclude_keywords_list: list[str] = EXCLUDE_KEYWORDS_LIST, - dataset_configs_dir: str = DATASET_CONFIGS_DIR, + exclude_keywords_list: list[str] = None, + dataset_configs_dir: str = None, verbose: bool = False, ) -> list: """ @@ -23,28 +24,38 @@ def get_yaml_config_files( if verbose: logger.setLevel(logging.DEBUG) - if exclude_keywords_list[0] != "": - logger.info("Excluding files that contain any of the following keywords: %s", exclude_keywords_list) - else: - exclude_keywords_list = [] + if not exclude_keywords_list: + exclude_keywords_list = EXCLUDE_KEYWORDS_LIST - # Get all YAML files in the dataset_configs directory - all_files = [] - for root, _, files in os.walk(dataset_configs_dir): - for file in files: - if file.endswith(YAML_EXTENSIONS): - all_files.append(os.path.join(root, file)) + logger.info("Excluding files that contain any of the following keywords: %s", exclude_keywords_list) + + # in the case that a None is passed in, set the default value + if not dataset_configs_dir: + dataset_configs_dir = DATASET_CONFIGS_DIR + + # If input_files is not defined, Get all files in the dataset_configs_dir + files_to_filter = ( + input_files + if input_files + else [ + os.path.join(dirpath, f) + for (dirpath, dirnames, filenames) in os.walk(dataset_configs_dir) + for f in filenames + ] + ) # Filter files based on the exclude list files_to_validate = [] - for file in all_files: + for file in files_to_filter: filename = os.path.basename(file) + if not filename.endswith(YAML_EXTENSIONS): + continue if filename in EXCLUDE_LIST: continue if any(keyword in filename for keyword in exclude_keywords_list): logger.debug("Excluding %s because it contains an exclude keyword", file) continue - files_to_validate.append(file) + files_to_validate.append(os.path.normpath(os.path.join(dataset_configs_dir, file))) # Filter files based on the include glob if include_glob: diff --git a/schema/v1.1.0/api_models_materialized.yaml b/schema/v1.1.0/api_models_materialized.yaml index 1eafefc32..e8570be1a 100644 --- a/schema/v1.1.0/api_models_materialized.yaml +++ b/schema/v1.1.0/api_models_materialized.yaml @@ -295,6 +295,12 @@ types: from_schema: cdp-dataset-config base: str pattern: ^GO:[0-9]{7}$ + WORMBASE_ID: + name: WORMBASE_ID + description: A WormBase identifier + from_schema: cdp-dataset-config + base: str + pattern: WBStrain[0-9]{8}$ ONTOLOGY_ID: name: ONTOLOGY_ID description: An ontology identifier @@ -1859,11 +1865,13 @@ classes: owner: Dataset domain_of: - Dataset - range: ONTOLOGY_ID recommended: true inlined: true inlined_as_list: true - pattern: ^[a-zA-Z]+:[0-9]+$ + pattern: (WBStrain[0-9]{8}$)|(^[a-zA-Z]+:[0-9]+$) + any_of: + - range: WORMBASE_ID + - range: ONTOLOGY_ID sample_preparation: name: sample_preparation description: Describes how the sample was prepared. diff --git a/schema/v1.1.0/common.yaml b/schema/v1.1.0/common.yaml index 4ec131d48..a427ec743 100644 --- a/schema/v1.1.0/common.yaml +++ b/schema/v1.1.0/common.yaml @@ -242,7 +242,9 @@ slots: description: >- Cell line or strain for the sample. cell_strain_id: - range: ONTOLOGY_ID + any_of: + - range: WORMBASE_ID + - range: ONTOLOGY_ID recommended: true description: >- Link to more information about the cell strain. @@ -956,6 +958,11 @@ types: base: str pattern: '^GO:[0-9]{7}$' + WORMBASE_ID: + description: A WormBase identifier + base: str + pattern: 'WBStrain[0-9]{8}$' + ONTOLOGY_ID: description: An ontology identifier base: str diff --git a/schema/v1.1.0/dataset_config_materialized.yaml b/schema/v1.1.0/dataset_config_materialized.yaml index 0e0afcf0e..4b3550d2e 100644 --- a/schema/v1.1.0/dataset_config_materialized.yaml +++ b/schema/v1.1.0/dataset_config_materialized.yaml @@ -296,6 +296,12 @@ types: from_schema: cdp-dataset-config base: str pattern: ^GO:[0-9]{7}$ + WORMBASE_ID: + name: WORMBASE_ID + description: A WormBase identifier + from_schema: cdp-dataset-config + base: str + pattern: WBStrain[0-9]{8}$ ONTOLOGY_ID: name: ONTOLOGY_ID description: An ontology identifier diff --git a/schema/v1.1.0/dataset_config_models.py b/schema/v1.1.0/dataset_config_models.py index 39a49f80b..5bd0a4886 100644 --- a/schema/v1.1.0/dataset_config_models.py +++ b/schema/v1.1.0/dataset_config_models.py @@ -176,6 +176,13 @@ def __contains__(self, key: str) -> bool: "minimum_value": 0, "name": "VersionString", }, + "WORMBASE_ID": { + "base": "str", + "description": "A WormBase identifier", + "from_schema": "cdp-dataset-config", + "name": "WORMBASE_ID", + "pattern": "WBStrain[0-9]{8}$", + }, "boolean": { "base": "Bool", "description": "A binary (true or false) value", @@ -997,6 +1004,7 @@ class CellStrain(ConfiguredBaseModel): json_schema_extra={ "linkml_meta": { "alias": "id", + "any_of": [{"range": "WORMBASE_ID"}, {"range": "ONTOLOGY_ID"}], "domain_of": ["TissueDetails", "CellType", "CellStrain", "CellComponent", "AnnotationObject"], "exact_mappings": ["cdp-common:cell_strain_id"], "recommended": True, @@ -1006,7 +1014,7 @@ class CellStrain(ConfiguredBaseModel): @field_validator("id") def pattern_id(cls, v): - pattern = re.compile(r"^[a-zA-Z]+:[0-9]+$") + pattern = re.compile(r"(WBStrain[0-9]{8}$)|(^[a-zA-Z]+:[0-9]+$)") if isinstance(v, list): for element in v: if not pattern.match(element): diff --git a/schema/v1.1.0/dataset_config_models.schema.json b/schema/v1.1.0/dataset_config_models.schema.json index 76a64c4ce..a658f9b9c 100644 --- a/schema/v1.1.0/dataset_config_models.schema.json +++ b/schema/v1.1.0/dataset_config_models.schema.json @@ -581,9 +581,18 @@ "description": "The strain or cell line from which the sample was derived.", "properties": { "id": { + "anyOf": [ + { + "pattern": "WBStrain[0-9]{8}$", + "type": "string" + }, + { + "pattern": "^[a-zA-Z]+:[0-9]+$", + "type": "string" + } + ], "description": "Link to more information about the cell strain.", - "pattern": "^[a-zA-Z]+:[0-9]+$", - "type": "string" + "pattern": "(WBStrain[0-9]{8}$)|(^[a-zA-Z]+:[0-9]+$)" }, "name": { "description": "Cell line or strain for the sample.", diff --git a/schema/v1.1.0/dataset_config_models_extended.py b/schema/v1.1.0/dataset_config_models_extended.py index 756b53bb7..51f417132 100644 --- a/schema/v1.1.0/dataset_config_models_extended.py +++ b/schema/v1.1.0/dataset_config_models_extended.py @@ -105,7 +105,7 @@ # ============================================================================== # Helper Functions # ============================================================================== -def check_skip_validation(obj: BaseModel, field_name: str, case_sensitive: bool = True) -> bool: +def skip_validation(obj: BaseModel, field_name: str, case_sensitive: bool = True) -> bool: # Check if the original class name is in the validation exclusions global validation_exclusions @@ -127,8 +127,8 @@ def check_skip_validation(obj: BaseModel, field_name: str, case_sensitive: bool field_name, field_value, ) - return False - return True + return True + return False # ============================================================================== @@ -227,9 +227,9 @@ def valid_dates(self) -> Self: # ID Object Validation # ============================================================================== @alru_cache -async def validate_id(id: str) -> Tuple[dict, bool]: +async def validate_id(id: str) -> Tuple[List[str], bool]: """ - Returns a tuple of the ID data and whether or not it is valid. + Returns a tuple of the ID names (including original label) and whether or not it is valid. """ # Encode the IRI iri = f"http://purl.obolibrary.org/obo/{id.replace(':', '_')}" @@ -241,8 +241,14 @@ async def validate_id(id: str) -> Tuple[dict, bool]: logger.debug("Getting ID %s at %s", id, url) async with aiohttp.ClientSession() as session, session.get(url) as response: - data = await response.json() if response.status == 200 else {} - return data, data.get("page", {}).get("totalElements", 0) > 0 + if response.status >= 400: + return [], False + data = await response.json() + names = [] + for entry in data["_embedded"]["terms"]: + names.append(entry["label"]) + names += entry["synonyms"] + return names, True @alru_cache @@ -268,11 +274,45 @@ async def is_id_ancestor(id_ancestor: str, id: str) -> bool: return response.status == 200 and id_ancestor in ancestor_ids +@alru_cache +async def validate_wormbase_id(id: str) -> Tuple[List[str], bool]: + """ + Returns a tuple of the ID names (including original label) and whether or not it is valid. + """ + + url = f"http://rest.wormbase.org/rest/field/strain/{id}/name" + + logger.debug("Getting ID %s at %s", id, url) + + names = [] + async with aiohttp.ClientSession() as session, session.get(url) as response: + if response.status >= 400: + return [], False + data = await response.json() + if label := data.get("name", {}).get("data", {}).get("label", ""): + names.append(label) + + names_url = f"http://rest.wormbase.org/rest/field/strain/{id}/other_names" + + async with aiohttp.ClientSession() as session, session.get(names_url) as response: + if response.status >= 400: + return names, True + data = await response.json() + if other_names := data.get("other_names", {}).get("data", []): + names += other_names + + return names, True + + def validate_id_name_object( + self: Union[AnnotationObject, CellComponent, CellStrain, CellType, OrganismDetails, TissueDetails], id: str, name: str, + id_field_name: str = "id", validate_name: bool = True, ancestor: str | None = None, + validate_id_function: callable = validate_id, + validate_ancestor_function: callable = is_id_ancestor, ) -> None: """ Validates the ID and name, ensuring that: @@ -280,24 +320,29 @@ def validate_id_name_object( - The name matches the ID - The name is an ancestor of the ancestor ID (if provided) """ + if ( + not running_network_validation + or getattr(self, id_field_name) is None + or skip_validation(self, id_field_name, case_sensitive=True) + ): + return + logger.debug("Validating %s with ID %s", name, id) - id_data, valid_id = asyncio.run(validate_id(id)) + id = id.strip() + name = name.strip() + retrieved_names, valid_id = asyncio.run(validate_id_function(id)) if not valid_id: raise ValueError(f"Invalid ID {id}") # return here since if name validation is not done, we don't need to check ancestors - if not validate_name: + if not validate_name or skip_validation(self, "name", case_sensitive=True): return logger.debug("Valid ID, now checking if name '%s' matches ID: %s", name, id) - # check if the name matches the ID's label or any of its synonyms - valid_name = False - for entry in id_data["_embedded"]["terms"]: - if name == entry["label"] or name in entry["synonyms"]: - valid_name = True - break + + valid_name = any(name == retrieved_name for retrieved_name in retrieved_names) if not valid_name: raise ValueError(f"name '{name}' does not match id: {id}") @@ -306,38 +351,21 @@ def validate_id_name_object( return logger.debug("Valid name, now checking if %s is an ancestor of %s", name, ancestor) - if not asyncio.run(is_id_ancestor(ancestor, id)): + if not asyncio.run(validate_ancestor_function(ancestor, id)): raise ValueError(f"'{name}' is not a descendant of {ancestor}") -def validate_ontology_object( - self: Union[AnnotationObject, CellComponent, CellStrain, CellType, TissueDetails], - ancestor: str = None, -) -> CellType: +def validate_cell_strain_object(self: CellStrain) -> CellStrain: """ - Validates a typical object with an ontology ID and name + Validates a cell strain object, with slightly different validation (also looking at wormbase cell strain IDs) """ if not running_network_validation or self.id is None: return self - validate_object = check_skip_validation(self, "id") - if not validate_object: - return self - - validate_name = check_skip_validation(self, "name") - validate_id_name_object(self.id.strip(), self.name.strip(), validate_name, ancestor) - - return self - - -def validate_organism_object(self: OrganismDetails) -> OrganismDetails: - """ - Validates an organism object, with slightly different validation (taxonomy_id, but needs to be prefixed with NCBITaxon) - """ - if not running_network_validation or self.taxonomy_id is None: - return self - - validate_id_name_object(f"NCBITaxon:{self.taxonomy_id}", self.name.strip(), validate_name=False) + if self.id.startswith("WBStrain"): + validate_id_name_object(self, self.id, self.name, validate_id_function=validate_wormbase_id) + else: + validate_id_name_object(self, self.id, self.name) return self @@ -541,7 +569,8 @@ def validate_sources( class ExtendedValidationAnnotationObject(AnnotationObject): @model_validator(mode="after") def validate_annotation_object(self) -> Self: - return validate_ontology_object(self, CELLULAR_COMPONENT_GO_ID) + validate_id_name_object(self, self.id, self.name, ancestor=CELLULAR_COMPONENT_GO_ID) + return self # ============================================================================== @@ -688,31 +717,41 @@ def valid_sources(cls: Self, source_list: List[KeyPhotoSource]) -> List[KeyPhoto class ExtendedValidationCellComponent(CellComponent): @model_validator(mode="after") def validate_cell_component(self) -> Self: - return validate_ontology_object(self, CELLULAR_COMPONENT_GO_ID) + validate_id_name_object(self, self.id, self.name, ancestor=CELLULAR_COMPONENT_GO_ID) + return self class ExtendedValidationCellStrain(CellStrain): @model_validator(mode="after") def validate_cell_strain(self) -> Self: - return validate_ontology_object(self) + return validate_cell_strain_object(self) class ExtendedValidationCellType(CellType): @model_validator(mode="after") def validate_cell_type(self) -> Self: - return validate_ontology_object(self) + validate_id_name_object(self, self.id, self.name) + return self class ExtendedValidationTissue(TissueDetails): @model_validator(mode="after") def validate_tissue(self) -> Self: - return validate_ontology_object(self) + validate_id_name_object(self, self.id, self.name) + return self class ExtendedValidationOrganism(OrganismDetails): @model_validator(mode="after") def validate_organism(self) -> Self: - return validate_organism_object(self) + validate_id_name_object( + self, + f"NCBITaxon:{self.taxonomy_id}", + self.name, + id_field_name="taxonomy_id", + validate_name=False, + ) + return self class ExtendedValidationDataset(Dataset): diff --git a/schema/v1.1.0/dataset_config_validate.py b/schema/v1.1.0/dataset_config_validate.py index 3ade81071..e4abedc34 100644 --- a/schema/v1.1.0/dataset_config_validate.py +++ b/schema/v1.1.0/dataset_config_validate.py @@ -101,14 +101,14 @@ def replace_formatted_strings(config_data: dict, depth: int, permitted_parent: b "--include-glob", type=str, default=None, - help="Include only files that match the given glob pattern, used in conjunction with --input-dir.", + help="Include only files that match the given glob pattern, can be used for both --input-dir and input files.", ) @click.option( "--exclude-keywords", type=str, default=EXCLUDE_KEYWORDS_LIST, multiple=True, - help="Exclude files that contain the following keywords in the filename, used in conjunction with --input-dir. Repeat the flag for multiple keywords.", + help="Exclude files that contain the following keywords in the filename, can be used for both --input-dir and input files.", ) @click.option( "--validation-exclusions-file", @@ -132,7 +132,7 @@ def main( input_files: str, input_dir: str, include_glob: str, - exclude_keywords: str, + exclude_keywords: list[str], validation_exclusions_file: str, output_dir: str, network_validation: bool, @@ -144,21 +144,17 @@ def main( if verbose: logger.setLevel(logging.DEBUG) - files_to_validate = [] if input_files and input_dir: logger.error("Provide input files or --input-dir, not both.") exit(1) - elif input_files: - files_to_validate = input_files - if include_glob: - logger.warning("Ignoring --include-glob option because input files were provided.") - if exclude_keywords: - logger.warning("Ignoring --exclude-keywords option because input files were provided.") - elif input_dir: - files_to_validate = get_yaml_config_files(include_glob, exclude_keywords, input_dir, verbose) - else: - logger.info("No input files or directory provided. Using default input directory: %s", DATASET_CONFIGS_DIR) - files_to_validate = get_yaml_config_files(include_glob, exclude_keywords, DATASET_CONFIGS_DIR, verbose) + + files_to_validate = get_yaml_config_files( + input_files=input_files, + include_glob=include_glob, + exclude_keywords_list=exclude_keywords, + dataset_configs_dir=input_dir, + verbose=verbose, + ) if not files_to_validate: logger.warning("No files to validate.") @@ -172,7 +168,7 @@ def main( output_dir += "/" # Remove existing dir if os.path.exists(output_dir): - logging.warning("Removing existing %s directory.", output_dir) + logger.warning("Removing existing %s directory.", output_dir) shutil.rmtree(output_dir) os.makedirs(output_dir, exist_ok=True) diff --git a/schema/v1.1.0/dataset_config_validation_exclusions.json b/schema/v1.1.0/dataset_config_validation_exclusions.json index bf30fee0d..48bab23b3 100644 --- a/schema/v1.1.0/dataset_config_validation_exclusions.json +++ b/schema/v1.1.0/dataset_config_validation_exclusions.json @@ -4,11 +4,11 @@ "name": [ "microtubule doublet 48 nm repeat", "microtubule doublet 96 nm repeat", - "sars-cov2 spike protein", - "hydrogen-dependent co2 reductase filament", + "SARS-CoV2 spike protein", + "hydrogen-dependent CO2 reductase filament", "stellate", - "mtd sleeve", - "dynein-1b" + "MTD sleeve", + "Dynein-1b" ] }, "CellComponent": { @@ -16,9 +16,7 @@ "name": [] }, "CellStrain": { - "id": [ - "WBStrain00050783" - ], + "id": [], "name": [] }, "CellType": { @@ -26,8 +24,7 @@ "name": [] }, "OrganismDetails": { - "id": [], - "name": [] + "taxonomy_id": [] }, "TissueDetails": { "id": [], diff --git a/schema/v1.1.0/docs/dataset_config_validate.md b/schema/v1.1.0/docs/dataset_config_validate.md index 93d2eb452..077507860 100644 --- a/schema/v1.1.0/docs/dataset_config_validate.md +++ b/schema/v1.1.0/docs/dataset_config_validate.md @@ -12,6 +12,8 @@ python3 -m venv .venv # create a virtualenv source .venv/bin/activate # activate the virtualenv python3 -m pip install poetry # Install the poetry package manager poetry install # Use poetry to install this package's dependencies +cd ../ingestion_tools/ # Move to the ingestion_tools directory +poetry install # Use poetry to install this package's dependencies ``` ## Running the script @@ -42,7 +44,7 @@ python dataset_config_validate.py --include-glob ".*104[0-9]{2}.*" ``` ### `--exclude-keywords` -Exclude files that contain the following keywords in the filename, used in conjunction with --input-dir. Repeat the flag for multiple keywords. By default, any file containing "draft" will be excluded. Note that this exclude option is applied BEFORE the include option. +Exclude files that contain the following keywords in the filename. Repeat the flag for multiple keywords. By default, any file containing "draft" will be excluded. Note that this exclude option takes superiority over the include option. Example: ```bash diff --git a/schema/v1.1.0/metadata-docs/CellStrain.md b/schema/v1.1.0/metadata-docs/CellStrain.md index dbdeb7a71..253d0f507 100644 --- a/schema/v1.1.0/metadata-docs/CellStrain.md +++ b/schema/v1.1.0/metadata-docs/CellStrain.md @@ -38,7 +38,7 @@ URI: [cdp-meta:CellStrain](metadataCellStrain) | Name | Cardinality and Range | Description | Inheritance | | --- | --- | --- | --- | | [name](name.md) | 1
[String](String.md) | Cell line or strain for the sample | direct | -| [id](id.md) | 0..1 _recommended_
[ONTOLOGYID](ONTOLOGYID.md) | Link to more information about the cell strain | direct | +| [id](id.md) | 0..1 _recommended_
[String](String.md) or 
[WORMBASEID](WORMBASEID.md) or 
[ONTOLOGYID](ONTOLOGYID.md) | Link to more information about the cell strain | direct | @@ -133,11 +133,13 @@ attributes: - CellStrain - CellComponent - AnnotationObject - range: ONTOLOGY_ID recommended: true inlined: true inlined_as_list: true - pattern: ^[a-zA-Z]+:[0-9]+$ + pattern: (WBStrain[0-9]{8}$)|(^[a-zA-Z]+:[0-9]+$) + any_of: + - range: WORMBASE_ID + - range: ONTOLOGY_ID ``` @@ -186,11 +188,14 @@ attributes: - CellStrain - CellComponent - AnnotationObject - range: ONTOLOGY_ID + range: string recommended: true inlined: true inlined_as_list: true - pattern: ^[a-zA-Z]+:[0-9]+$ + pattern: (WBStrain[0-9]{8}$)|(^[a-zA-Z]+:[0-9]+$) + any_of: + - range: WORMBASE_ID + - range: ONTOLOGY_ID ``` diff --git a/schema/v1.1.0/metadata-docs/WORMBASEID.md b/schema/v1.1.0/metadata-docs/WORMBASEID.md new file mode 100644 index 000000000..a92ec2ce3 --- /dev/null +++ b/schema/v1.1.0/metadata-docs/WORMBASEID.md @@ -0,0 +1,45 @@ +# Type: WORMBASEID + + + + +_A WormBase identifier_ + + + +URI: [cdp-meta:WORMBASE_ID](metadataWORMBASE_ID) + +* [base](https://w3id.org/linkml/base): str + + + + +* [pattern](https://w3id.org/linkml/pattern): `WBStrain[0-9]{8}$` + + + + + + +## Identifier and Mapping Information + + + + + + + +### Schema Source + + +* from schema: metadata + + + + +## Mappings + +| Mapping Type | Mapped Value | +| --- | --- | +| self | cdp-meta:WORMBASE_ID | +| native | cdp-meta:WORMBASE_ID | diff --git a/schema/v1.1.0/metadata-docs/authors.md b/schema/v1.1.0/metadata-docs/authors.md index 58a194eb4..715dedbb2 100644 --- a/schema/v1.1.0/metadata-docs/authors.md +++ b/schema/v1.1.0/metadata-docs/authors.md @@ -17,10 +17,10 @@ URI: [cdp-meta:authors](metadataauthors) | Name | Description | Modifies Slot | | --- | --- | --- | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | -| [Annotation](Annotation.md) | Metadata describing an annotation | no | | [Tomogram](Tomogram.md) | Metadata describing a tomogram | no | | [Deposition](Deposition.md) | Metadata describing a deposition | no | | [AuthoredEntity](AuthoredEntity.md) | An entity with associated authors | no | +| [Annotation](Annotation.md) | Metadata describing an annotation | no | diff --git a/schema/v1.1.0/metadata-docs/binning.md b/schema/v1.1.0/metadata-docs/binning.md index 65615f7c8..f4abaf427 100644 --- a/schema/v1.1.0/metadata-docs/binning.md +++ b/schema/v1.1.0/metadata-docs/binning.md @@ -16,8 +16,8 @@ URI: [cdp-meta:binning](metadatabinning) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | | [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | +| [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | | [AnnotationPointFile](AnnotationPointFile.md) | File and sourcing data for a point annotation | no | diff --git a/schema/v1.1.0/metadata-docs/cell_component.md b/schema/v1.1.0/metadata-docs/cell_component.md index 67015dde6..02979edfa 100644 --- a/schema/v1.1.0/metadata-docs/cell_component.md +++ b/schema/v1.1.0/metadata-docs/cell_component.md @@ -16,8 +16,8 @@ URI: [cdp-meta:cell_component](metadatacell_component) | Name | Description | Modifies Slot | | --- | --- | --- | -| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | | [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | +| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | diff --git a/schema/v1.1.0/metadata-docs/cell_strain.md b/schema/v1.1.0/metadata-docs/cell_strain.md index de2601565..2f9ad3712 100644 --- a/schema/v1.1.0/metadata-docs/cell_strain.md +++ b/schema/v1.1.0/metadata-docs/cell_strain.md @@ -16,8 +16,8 @@ URI: [cdp-meta:cell_strain](metadatacell_strain) | Name | Description | Modifies Slot | | --- | --- | --- | -| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | | [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | +| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | diff --git a/schema/v1.1.0/metadata-docs/cell_type.md b/schema/v1.1.0/metadata-docs/cell_type.md index d90ff4802..6ad5e3274 100644 --- a/schema/v1.1.0/metadata-docs/cell_type.md +++ b/schema/v1.1.0/metadata-docs/cell_type.md @@ -16,8 +16,8 @@ URI: [cdp-meta:cell_type](metadatacell_type) | Name | Description | Modifies Slot | | --- | --- | --- | -| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | | [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | +| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | diff --git a/schema/v1.1.0/metadata-docs/cross_references.md b/schema/v1.1.0/metadata-docs/cross_references.md index 5cfac3b8c..89e6bc20e 100644 --- a/schema/v1.1.0/metadata-docs/cross_references.md +++ b/schema/v1.1.0/metadata-docs/cross_references.md @@ -16,8 +16,8 @@ URI: [cdp-meta:cross_references](metadatacross_references) | Name | Description | Modifies Slot | | --- | --- | --- | -| [CrossReferencedEntity](CrossReferencedEntity.md) | An entity with associated cross-references to other databases and publication... | no | | [Deposition](Deposition.md) | Metadata describing a deposition | no | +| [CrossReferencedEntity](CrossReferencedEntity.md) | An entity with associated cross-references to other databases and publication... | no | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | diff --git a/schema/v1.1.0/metadata-docs/dataset_citations.md b/schema/v1.1.0/metadata-docs/dataset_citations.md index 1e5beac9e..094e8ede0 100644 --- a/schema/v1.1.0/metadata-docs/dataset_citations.md +++ b/schema/v1.1.0/metadata-docs/dataset_citations.md @@ -16,8 +16,8 @@ URI: [cdp-meta:dataset_citations](metadatadataset_citations) | Name | Description | Modifies Slot | | --- | --- | --- | -| [CrossReferences](CrossReferences.md) | A set of cross-references to other databases and publications | no | | [CrossReferencesMixin](CrossReferencesMixin.md) | A set of cross-references to other databases and publications | no | +| [CrossReferences](CrossReferences.md) | A set of cross-references to other databases and publications | no | diff --git a/schema/v1.1.0/metadata-docs/file_format.md b/schema/v1.1.0/metadata-docs/file_format.md index c9fae7667..d744b7b39 100644 --- a/schema/v1.1.0/metadata-docs/file_format.md +++ b/schema/v1.1.0/metadata-docs/file_format.md @@ -16,12 +16,12 @@ URI: [cdp-meta:file_format](metadatafile_format) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AnnotationSourceFile](AnnotationSourceFile.md) | File and sourcing data for an annotation | no | +| [AnnotationPointFile](AnnotationPointFile.md) | File and sourcing data for a point annotation | no | | [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | | [AnnotationSemanticSegmentationMaskFile](AnnotationSemanticSegmentationMaskFile.md) | File and sourcing data for a semantic segmentation mask annotation | no | | [AnnotationSegmentationMaskFile](AnnotationSegmentationMaskFile.md) | File and sourcing data for a segmentation mask annotation | no | | [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | -| [AnnotationPointFile](AnnotationPointFile.md) | File and sourcing data for a point annotation | no | +| [AnnotationSourceFile](AnnotationSourceFile.md) | File and sourcing data for an annotation | no | diff --git a/schema/v1.1.0/metadata-docs/filter_value.md b/schema/v1.1.0/metadata-docs/filter_value.md index dade1978c..dd4d29ef9 100644 --- a/schema/v1.1.0/metadata-docs/filter_value.md +++ b/schema/v1.1.0/metadata-docs/filter_value.md @@ -16,8 +16,8 @@ URI: [cdp-meta:filter_value](metadatafilter_value) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | | [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | +| [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | diff --git a/schema/v1.1.0/metadata-docs/glob_string.md b/schema/v1.1.0/metadata-docs/glob_string.md index 42e0efe37..1e5259104 100644 --- a/schema/v1.1.0/metadata-docs/glob_string.md +++ b/schema/v1.1.0/metadata-docs/glob_string.md @@ -16,12 +16,12 @@ URI: [cdp-meta:glob_string](metadataglob_string) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AnnotationSourceFile](AnnotationSourceFile.md) | File and sourcing data for an annotation | no | +| [AnnotationPointFile](AnnotationPointFile.md) | File and sourcing data for a point annotation | no | | [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | | [AnnotationSemanticSegmentationMaskFile](AnnotationSemanticSegmentationMaskFile.md) | File and sourcing data for a semantic segmentation mask annotation | no | | [AnnotationSegmentationMaskFile](AnnotationSegmentationMaskFile.md) | File and sourcing data for a segmentation mask annotation | no | | [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | -| [AnnotationPointFile](AnnotationPointFile.md) | File and sourcing data for a point annotation | no | +| [AnnotationSourceFile](AnnotationSourceFile.md) | File and sourcing data for an annotation | no | diff --git a/schema/v1.1.0/metadata-docs/glob_strings.md b/schema/v1.1.0/metadata-docs/glob_strings.md index 9a8531f59..c93d58393 100644 --- a/schema/v1.1.0/metadata-docs/glob_strings.md +++ b/schema/v1.1.0/metadata-docs/glob_strings.md @@ -16,12 +16,12 @@ URI: [cdp-meta:glob_strings](metadataglob_strings) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AnnotationSourceFile](AnnotationSourceFile.md) | File and sourcing data for an annotation | no | +| [AnnotationPointFile](AnnotationPointFile.md) | File and sourcing data for a point annotation | no | | [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | | [AnnotationSemanticSegmentationMaskFile](AnnotationSemanticSegmentationMaskFile.md) | File and sourcing data for a semantic segmentation mask annotation | no | | [AnnotationSegmentationMaskFile](AnnotationSegmentationMaskFile.md) | File and sourcing data for a segmentation mask annotation | no | | [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | -| [AnnotationPointFile](AnnotationPointFile.md) | File and sourcing data for a point annotation | no | +| [AnnotationSourceFile](AnnotationSourceFile.md) | File and sourcing data for an annotation | no | diff --git a/schema/v1.1.0/metadata-docs/grid_preparation.md b/schema/v1.1.0/metadata-docs/grid_preparation.md index e12dc162c..6af98bced 100644 --- a/schema/v1.1.0/metadata-docs/grid_preparation.md +++ b/schema/v1.1.0/metadata-docs/grid_preparation.md @@ -16,8 +16,8 @@ URI: [cdp-meta:grid_preparation](metadatagrid_preparation) | Name | Description | Modifies Slot | | --- | --- | --- | -| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | | [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | +| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | diff --git a/schema/v1.1.0/metadata-docs/id.md b/schema/v1.1.0/metadata-docs/id.md index 7104b3e92..1fdf9e430 100644 --- a/schema/v1.1.0/metadata-docs/id.md +++ b/schema/v1.1.0/metadata-docs/id.md @@ -16,10 +16,10 @@ URI: [cdp-meta:id](metadataid) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AnnotationObject](AnnotationObject.md) | Metadata describing the object being annotated | no | -| [CellStrain](CellStrain.md) | The strain or cell line from which the sample was derived | no | | [CellComponent](CellComponent.md) | The cellular component from which the sample was derived | no | +| [CellStrain](CellStrain.md) | The strain or cell line from which the sample was derived | no | | [TissueDetails](TissueDetails.md) | The type of tissue from which the sample was derived | no | +| [AnnotationObject](AnnotationObject.md) | Metadata describing the object being annotated | no | | [CellType](CellType.md) | The cell type from which the sample was derived | no | diff --git a/schema/v1.1.0/metadata-docs/index.md b/schema/v1.1.0/metadata-docs/index.md index 0b7a47176..7b86cb888 100644 --- a/schema/v1.1.0/metadata-docs/index.md +++ b/schema/v1.1.0/metadata-docs/index.md @@ -231,6 +231,7 @@ Name: cdp-meta | [Uriorcurie](Uriorcurie.md) | a URI or a CURIE | | [URLorS3URI](URLorS3URI.md) | A URL or S3 URI | | [VersionString](VersionString.md) | A version number (only major, minor versions) | +| [WORMBASEID](WORMBASEID.md) | A WormBase identifier | ## Subsets diff --git a/schema/v1.1.0/metadata-docs/is_visualization_default.md b/schema/v1.1.0/metadata-docs/is_visualization_default.md index 6b558b697..6eed74948 100644 --- a/schema/v1.1.0/metadata-docs/is_visualization_default.md +++ b/schema/v1.1.0/metadata-docs/is_visualization_default.md @@ -16,12 +16,12 @@ URI: [cdp-meta:is_visualization_default](metadatais_visualization_default) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AnnotationSourceFile](AnnotationSourceFile.md) | File and sourcing data for an annotation | no | +| [AnnotationPointFile](AnnotationPointFile.md) | File and sourcing data for a point annotation | no | | [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | | [AnnotationSemanticSegmentationMaskFile](AnnotationSemanticSegmentationMaskFile.md) | File and sourcing data for a semantic segmentation mask annotation | no | | [AnnotationSegmentationMaskFile](AnnotationSegmentationMaskFile.md) | File and sourcing data for a segmentation mask annotation | no | | [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | -| [AnnotationPointFile](AnnotationPointFile.md) | File and sourcing data for a point annotation | no | +| [AnnotationSourceFile](AnnotationSourceFile.md) | File and sourcing data for an annotation | no | diff --git a/schema/v1.1.0/metadata-docs/manufacturer.md b/schema/v1.1.0/metadata-docs/manufacturer.md index 85141ebd4..b23e3b78f 100644 --- a/schema/v1.1.0/metadata-docs/manufacturer.md +++ b/schema/v1.1.0/metadata-docs/manufacturer.md @@ -16,8 +16,8 @@ URI: [cdp-meta:manufacturer](metadatamanufacturer) | Name | Description | Modifies Slot | | --- | --- | --- | -| [MicroscopeDetails](MicroscopeDetails.md) | The microscope used to collect the tilt series | no | | [CameraDetails](CameraDetails.md) | The camera used to collect the tilt series | no | +| [MicroscopeDetails](MicroscopeDetails.md) | The microscope used to collect the tilt series | no | diff --git a/schema/v1.1.0/metadata-docs/model.md b/schema/v1.1.0/metadata-docs/model.md index e695080ca..844a8d02b 100644 --- a/schema/v1.1.0/metadata-docs/model.md +++ b/schema/v1.1.0/metadata-docs/model.md @@ -16,8 +16,8 @@ URI: [cdp-meta:model](metadatamodel) | Name | Description | Modifies Slot | | --- | --- | --- | -| [MicroscopeDetails](MicroscopeDetails.md) | The microscope used to collect the tilt series | no | | [CameraDetails](CameraDetails.md) | The camera used to collect the tilt series | no | +| [MicroscopeDetails](MicroscopeDetails.md) | The microscope used to collect the tilt series | no | diff --git a/schema/v1.1.0/metadata-docs/name.md b/schema/v1.1.0/metadata-docs/name.md index d76029321..1f8079748 100644 --- a/schema/v1.1.0/metadata-docs/name.md +++ b/schema/v1.1.0/metadata-docs/name.md @@ -16,15 +16,15 @@ URI: [cdp-meta:name](metadataname) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AnnotationObject](AnnotationObject.md) | Metadata describing the object being annotated | no | | [AuthorMixin](AuthorMixin.md) | An entity with author data | no | -| [Author](Author.md) | Author of a scientific data entity | no | -| [CellStrain](CellStrain.md) | The strain or cell line from which the sample was derived | no | -| [OrganismDetails](OrganismDetails.md) | The species from which the sample was derived | no | | [AnnotationMethodLinks](AnnotationMethodLinks.md) | A set of links to models, sourcecode, documentation, etc referenced by annota... | no | +| [CellStrain](CellStrain.md) | The strain or cell line from which the sample was derived | no | | [CellComponent](CellComponent.md) | The cellular component from which the sample was derived | no | | [TissueDetails](TissueDetails.md) | The type of tissue from which the sample was derived | no | +| [AnnotationObject](AnnotationObject.md) | Metadata describing the object being annotated | no | +| [Author](Author.md) | Author of a scientific data entity | no | | [CellType](CellType.md) | The cell type from which the sample was derived | no | +| [OrganismDetails](OrganismDetails.md) | The species from which the sample was derived | no | diff --git a/schema/v1.1.0/metadata-docs/order.md b/schema/v1.1.0/metadata-docs/order.md index a0a9aadcd..0267a3091 100644 --- a/schema/v1.1.0/metadata-docs/order.md +++ b/schema/v1.1.0/metadata-docs/order.md @@ -16,8 +16,8 @@ URI: [cdp-meta:order](metadataorder) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | | [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | +| [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | diff --git a/schema/v1.1.0/metadata-docs/organism.md b/schema/v1.1.0/metadata-docs/organism.md index 09e3139ef..97abe1618 100644 --- a/schema/v1.1.0/metadata-docs/organism.md +++ b/schema/v1.1.0/metadata-docs/organism.md @@ -16,8 +16,8 @@ URI: [cdp-meta:organism](metadataorganism) | Name | Description | Modifies Slot | | --- | --- | --- | -| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | | [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | +| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | diff --git a/schema/v1.1.0/metadata-docs/other_setup.md b/schema/v1.1.0/metadata-docs/other_setup.md index 1932482d2..394ce8ce5 100644 --- a/schema/v1.1.0/metadata-docs/other_setup.md +++ b/schema/v1.1.0/metadata-docs/other_setup.md @@ -16,8 +16,8 @@ URI: [cdp-meta:other_setup](metadataother_setup) | Name | Description | Modifies Slot | | --- | --- | --- | -| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | | [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | +| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | diff --git a/schema/v1.1.0/metadata-docs/publications.md b/schema/v1.1.0/metadata-docs/publications.md index d016823f2..313a8c39a 100644 --- a/schema/v1.1.0/metadata-docs/publications.md +++ b/schema/v1.1.0/metadata-docs/publications.md @@ -16,8 +16,8 @@ URI: [cdp-meta:publications](metadatapublications) | Name | Description | Modifies Slot | | --- | --- | --- | -| [CrossReferences](CrossReferences.md) | A set of cross-references to other databases and publications | no | | [CrossReferencesMixin](CrossReferencesMixin.md) | A set of cross-references to other databases and publications | no | +| [CrossReferences](CrossReferences.md) | A set of cross-references to other databases and publications | no | diff --git a/schema/v1.1.0/metadata-docs/related_database_entries.md b/schema/v1.1.0/metadata-docs/related_database_entries.md index 2afd9cec9..8a09c428e 100644 --- a/schema/v1.1.0/metadata-docs/related_database_entries.md +++ b/schema/v1.1.0/metadata-docs/related_database_entries.md @@ -16,8 +16,8 @@ URI: [cdp-meta:related_database_entries](metadatarelated_database_entries) | Name | Description | Modifies Slot | | --- | --- | --- | -| [CrossReferences](CrossReferences.md) | A set of cross-references to other databases and publications | no | | [CrossReferencesMixin](CrossReferencesMixin.md) | A set of cross-references to other databases and publications | no | +| [CrossReferences](CrossReferences.md) | A set of cross-references to other databases and publications | no | diff --git a/schema/v1.1.0/metadata-docs/related_database_links.md b/schema/v1.1.0/metadata-docs/related_database_links.md index 4f8147af5..69a529ee5 100644 --- a/schema/v1.1.0/metadata-docs/related_database_links.md +++ b/schema/v1.1.0/metadata-docs/related_database_links.md @@ -16,8 +16,8 @@ URI: [cdp-meta:related_database_links](metadatarelated_database_links) | Name | Description | Modifies Slot | | --- | --- | --- | -| [CrossReferences](CrossReferences.md) | A set of cross-references to other databases and publications | no | | [CrossReferencesMixin](CrossReferencesMixin.md) | A set of cross-references to other databases and publications | no | +| [CrossReferences](CrossReferences.md) | A set of cross-references to other databases and publications | no | diff --git a/schema/v1.1.0/metadata-docs/sample_preparation.md b/schema/v1.1.0/metadata-docs/sample_preparation.md index 339ac7e57..8d8ff8e50 100644 --- a/schema/v1.1.0/metadata-docs/sample_preparation.md +++ b/schema/v1.1.0/metadata-docs/sample_preparation.md @@ -16,8 +16,8 @@ URI: [cdp-meta:sample_preparation](metadatasample_preparation) | Name | Description | Modifies Slot | | --- | --- | --- | -| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | | [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | +| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | diff --git a/schema/v1.1.0/metadata-docs/sample_type.md b/schema/v1.1.0/metadata-docs/sample_type.md index 6309bf7fc..4ee08059b 100644 --- a/schema/v1.1.0/metadata-docs/sample_type.md +++ b/schema/v1.1.0/metadata-docs/sample_type.md @@ -16,8 +16,8 @@ URI: [cdp-meta:sample_type](metadatasample_type) | Name | Description | Modifies Slot | | --- | --- | --- | -| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | | [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | +| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | diff --git a/schema/v1.1.0/metadata-docs/tissue.md b/schema/v1.1.0/metadata-docs/tissue.md index 750ac8d31..6fa5a0109 100644 --- a/schema/v1.1.0/metadata-docs/tissue.md +++ b/schema/v1.1.0/metadata-docs/tissue.md @@ -16,8 +16,8 @@ URI: [cdp-meta:tissue](metadatatissue) | Name | Description | Modifies Slot | | --- | --- | --- | -| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | | [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | +| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | diff --git a/schema/v1.1.0/metadata_materialized.yaml b/schema/v1.1.0/metadata_materialized.yaml index 3d3174ec8..6316e58e3 100644 --- a/schema/v1.1.0/metadata_materialized.yaml +++ b/schema/v1.1.0/metadata_materialized.yaml @@ -312,6 +312,12 @@ types: from_schema: metadata base: str pattern: ^GO:[0-9]{7}$ + WORMBASE_ID: + name: WORMBASE_ID + description: A WormBase identifier + from_schema: metadata + base: str + pattern: WBStrain[0-9]{8}$ ONTOLOGY_ID: name: ONTOLOGY_ID description: An ontology identifier @@ -1117,11 +1123,13 @@ classes: - CellStrain - CellComponent - AnnotationObject - range: ONTOLOGY_ID recommended: true inlined: true inlined_as_list: true - pattern: ^[a-zA-Z]+:[0-9]+$ + pattern: (WBStrain[0-9]{8}$)|(^[a-zA-Z]+:[0-9]+$) + any_of: + - range: WORMBASE_ID + - range: ONTOLOGY_ID CellComponent: name: CellComponent description: The cellular component from which the sample was derived. diff --git a/schema/v1.1.0/metadata_models.py b/schema/v1.1.0/metadata_models.py index 961df93cc..529e54dbf 100644 --- a/schema/v1.1.0/metadata_models.py +++ b/schema/v1.1.0/metadata_models.py @@ -184,6 +184,13 @@ def __contains__(self, key: str) -> bool: "minimum_value": 0, "name": "VersionString", }, + "WORMBASE_ID": { + "base": "str", + "description": "A WormBase identifier", + "from_schema": "metadata", + "name": "WORMBASE_ID", + "pattern": "WBStrain[0-9]{8}$", + }, "boolean": { "base": "Bool", "description": "A binary (true or false) value", @@ -1005,6 +1012,7 @@ class CellStrain(ConfiguredBaseModel): json_schema_extra={ "linkml_meta": { "alias": "id", + "any_of": [{"range": "WORMBASE_ID"}, {"range": "ONTOLOGY_ID"}], "domain_of": ["TissueDetails", "CellType", "CellStrain", "CellComponent", "AnnotationObject"], "exact_mappings": ["cdp-common:cell_strain_id"], "recommended": True, @@ -1014,7 +1022,7 @@ class CellStrain(ConfiguredBaseModel): @field_validator("id") def pattern_id(cls, v): - pattern = re.compile(r"^[a-zA-Z]+:[0-9]+$") + pattern = re.compile(r"(WBStrain[0-9]{8}$)|(^[a-zA-Z]+:[0-9]+$)") if isinstance(v, list): for element in v: if not pattern.match(element): From 62f8383f84e28850844e84867159af00bf9164b9 Mon Sep 17 00:00:00 2001 From: Daniel Ji Date: Wed, 31 Jul 2024 17:34:29 -0700 Subject: [PATCH 4/4] Remove TODO statements, fix dataset_config_merge.py cmd line arg, remake docs / schema models.py with correct LinkML version (#179) * Small schema & dataset config fixes * remove dataset_config_merge global variable * add black as schema dep --- .../scripts/dataset_config_merge.py | 62 +++++++------- schema/poetry.lock | 84 ++++++++++++++++++- schema/pyproject.toml | 1 + schema/v1.1.0/dataset_config.yaml | 1 - .../v1.1.0/dataset_config_models_extended.py | 2 - .../metadata-docs/affiliation_address.md | 2 +- .../metadata-docs/affiliation_identifier.md | 2 +- .../v1.1.0/metadata-docs/affiliation_name.md | 2 +- schema/v1.1.0/metadata-docs/authors.md | 4 +- schema/v1.1.0/metadata-docs/binning.md | 2 +- schema/v1.1.0/metadata-docs/cell_component.md | 2 +- schema/v1.1.0/metadata-docs/cell_strain.md | 2 +- schema/v1.1.0/metadata-docs/cell_type.md | 2 +- .../corresponding_author_status.md | 2 +- .../v1.1.0/metadata-docs/cross_references.md | 2 +- .../v1.1.0/metadata-docs/dataset_citations.md | 2 +- schema/v1.1.0/metadata-docs/dates.md | 2 +- .../v1.1.0/metadata-docs/deposition_date.md | 2 +- schema/v1.1.0/metadata-docs/email.md | 2 +- schema/v1.1.0/metadata-docs/file_format.md | 8 +- schema/v1.1.0/metadata-docs/filter_value.md | 2 +- schema/v1.1.0/metadata-docs/glob_string.md | 8 +- schema/v1.1.0/metadata-docs/glob_strings.md | 8 +- .../v1.1.0/metadata-docs/grid_preparation.md | 2 +- schema/v1.1.0/metadata-docs/id.md | 4 +- .../metadata-docs/is_visualization_default.md | 8 +- .../metadata-docs/last_modified_date.md | 2 +- schema/v1.1.0/metadata-docs/name.md | 12 +-- schema/v1.1.0/metadata-docs/order.md | 2 +- schema/v1.1.0/metadata-docs/organism.md | 2 +- schema/v1.1.0/metadata-docs/other_setup.md | 2 +- .../metadata-docs/primary_author_status.md | 2 +- schema/v1.1.0/metadata-docs/publications.md | 2 +- .../metadata-docs/related_database_entries.md | 2 +- .../metadata-docs/related_database_links.md | 2 +- schema/v1.1.0/metadata-docs/release_date.md | 2 +- .../metadata-docs/sample_preparation.md | 2 +- schema/v1.1.0/metadata-docs/sample_type.md | 2 +- schema/v1.1.0/metadata-docs/tissue.md | 2 +- schema/v1.1.0/metadata-docs/x.md | 2 +- schema/v1.1.0/metadata-docs/y.md | 2 +- schema/v1.1.0/metadata-docs/z.md | 2 +- 42 files changed, 174 insertions(+), 88 deletions(-) diff --git a/ingestion_tools/scripts/dataset_config_merge.py b/ingestion_tools/scripts/dataset_config_merge.py index 99b2f8d1d..b88851620 100644 --- a/ingestion_tools/scripts/dataset_config_merge.py +++ b/ingestion_tools/scripts/dataset_config_merge.py @@ -1,6 +1,6 @@ """ Usage (from ingestion_tools/scripts directory): -python dataset_config_merge.py (--unique_values) +python dataset_config_merge.py (--unique-values) Will write to OUTPUT_FILE (see below) @@ -37,18 +37,18 @@ DATASET_CONFIGS_FOLDER = "../dataset_configs/" OUTPUT_FILE = DATASET_CONFIGS_FOLDER + "dataset_config_merged.yaml" -unique_values = False - """ Merges two lists together, keeping only values that are of unique types (values are arbitrary, as long as they are unique). -If unique_values is true, not only are unique types kept, but unique values are kept as well. +If keep_unique_values is true, not only are unique types kept, but unique values are kept as well. """ -def keep_unique_datatypes(original_list: list, new_value: Union[int, float, str, bool, list, datetime.date]) -> list: - global unique_values - +def keep_unique_datatypes( + original_list: list, + new_value: Union[int, float, str, bool, list, datetime.date, None], + keep_unique_values: bool, +) -> list: if new_value is None: return original_list @@ -59,7 +59,7 @@ def keep_unique_datatypes(original_list: list, new_value: Union[int, float, str, return [new_value] new_list = original_list.copy() - if unique_values and new_value not in original_list: + if keep_unique_values and new_value not in original_list: new_list.append(new_value) elif type(new_value) not in [type(value) for value in original_list]: new_list.append(new_value) @@ -71,7 +71,7 @@ def keep_unique_datatypes(original_list: list, new_value: Union[int, float, str, """ Runs a data check on non-dict items and reports any warnings about potential different-type attributes across files. -If unique_values is true, not only are unique types kept, but unique values are kept as well. +If keep_unique_values is true, not only are unique types kept, but unique values are kept as well. Returns False when there are conflicting datatypes, otherwise true. """ @@ -80,8 +80,8 @@ def keep_unique_datatypes(original_list: list, new_value: Union[int, float, str, def primitive_data_check( original_value: Union[int, float, str, bool, list, datetime.date], new_value: Union[int, float, str, bool, list, datetime.date], + keep_unique_values: bool, ) -> bool: - global unique_values if original_value is None: return True @@ -96,7 +96,7 @@ def primitive_data_check( print(f"Warning: Data type conflict: {original_value} | {new_value}") return False - if unique_values and original_value != new_value: + if keep_unique_values and original_value != new_value: return False return True @@ -112,7 +112,12 @@ def primitive_data_check( """ -def recursive_dict_update_list_helper(current_entries: dict, key: str, new_entry_values: list) -> dict: +def recursive_dict_update_list_helper( + current_entries: dict, + key: str, + new_entry_values: list, + keep_unique_values: bool, +) -> dict: # nothing to update with if len(new_entry_values) == 0: return current_entries @@ -142,10 +147,14 @@ def recursive_dict_update_list_helper(current_entries: dict, key: str, new_entry f"Unresolvable conflict for {current_entries[key]} (type: {type(current_entries[key])}), {new_entry_values[i]} (type: {type(new_entry_values[i])})", ) - current_entries[key] = recursive_dict_update(corresponding_entry, new_entry_values[i]) + current_entries[key] = recursive_dict_update(corresponding_entry, new_entry_values[i], keep_unique_values) else: - primitive_data_check(current_entries.get(key), new_entry_values) - current_entries[key] = keep_unique_datatypes(current_entries.get(key, []), new_entry_values[i]) + primitive_data_check(current_entries.get(key), new_entry_values, keep_unique_values) + current_entries[key] = keep_unique_datatypes( + current_entries.get(key, []), + new_entry_values[i], + keep_unique_values, + ) # if new_entry_values was a list of dictionaries, the current_entries[key] will be a dictionary instead of a list (recursive_dict_update returns a dictionary) # so we need to convert it back to a list if not isinstance(current_entries[key], list): @@ -162,7 +171,7 @@ def recursive_dict_update_list_helper(current_entries: dict, key: str, new_entry """ -def recursive_dict_update(current_entries: dict, new_entries: dict) -> dict: +def recursive_dict_update(current_entries: dict, new_entries: dict, keep_unique_values: bool) -> dict: for key, new_value in new_entries.items(): # Regular scenarios if new_value is None: @@ -171,19 +180,19 @@ def recursive_dict_update(current_entries: dict, new_entries: dict) -> dict: if isinstance(new_value, dict) and ( isinstance(current_entries.get(key), dict) or current_entries.get(key) is None ): - current_entries[key] = recursive_dict_update(current_entries.get(key, {}), new_value) + current_entries[key] = recursive_dict_update(current_entries.get(key, {}), new_value, keep_unique_values) # current value: list and new value: list situation elif isinstance(new_value, list) and ( isinstance(current_entries.get(key), list) or current_entries.get(key) is None ): - current_entries = recursive_dict_update_list_helper(current_entries, key, new_value) + current_entries = recursive_dict_update_list_helper(current_entries, key, new_value, keep_unique_values) # current value: dict and new value: non-dict (list or primitive) elif isinstance(current_entries.get(key), dict) and not isinstance(new_value, dict): # edge case: current value: dict and a new value: list, add the dict to the list if isinstance(new_value, list): new_list = new_value + [current_entries.get(key)] # and then now it is list, list situation - current_entries = recursive_dict_update_list_helper(current_entries, key, new_list) + current_entries = recursive_dict_update_list_helper(current_entries, key, new_list, keep_unique_values) # edge case: current value: dict and a new value: primitive, just keep the dict and print a warning else: print("type conflict:") @@ -196,13 +205,13 @@ def recursive_dict_update(current_entries: dict, new_entries: dict) -> dict: if isinstance(new_value, dict): new_list = [new_value] + current_entries.get(key) # and then now it is list, list situation - current_entries = recursive_dict_update_list_helper(current_entries, key, new_list) + current_entries = recursive_dict_update_list_helper(current_entries, key, new_list, keep_unique_values) # edge case: current-value: list and a new value: primitive, just keep the list and print a warning else: - current_entries[key] = keep_unique_datatypes(current_entries.get(key), new_value) + current_entries[key] = keep_unique_datatypes(current_entries.get(key), new_value, keep_unique_values) # non-dict, non-list (primitive) and non-dict, non-list (primitive) else: - datatypes_match = primitive_data_check(current_entries.get(key, None), new_value) + datatypes_match = primitive_data_check(current_entries.get(key, None), new_value, keep_unique_values) # if the primitive datatypes don't match, create a new list representing multiple datatypes (but not multivalued attribute necessarily) if not datatypes_match: current_entries[key] = [new_value, current_entries[key]] @@ -214,14 +223,11 @@ def recursive_dict_update(current_entries: dict, new_entries: dict) -> dict: @click.command() @click.option( - "--keep_unique_values", + "--unique-values", is_flag=True, help="If set, not only are unique types kept, but unique values are kept as well. Note that this works only for primitive types, and non-multivalued attributes may display as multivalued (because they are represented as a list of unique values).", ) -def main(keep_unique_values: bool): - global unique_values - unique_values = keep_unique_values - +def main(unique_values: bool): all_files: list[str] = [ os.path.join(directory_path, file) for directory_path, _, filename in os.walk(os.path.expanduser(DATASET_CONFIGS_FOLDER)) @@ -240,7 +246,7 @@ def main(keep_unique_values: bool): config_file: dict = yaml.safe_load(stream) # a temp is created to avoid a half-merged config file after an exception since recursive_dict_update modifies in-place temp_unified_config = unified_config.copy() - recursive_dict_update(temp_unified_config, config_file) + recursive_dict_update(temp_unified_config, config_file, keep_unique_values=unique_values) unified_config = temp_unified_config except yaml.YAMLError as exc: print(exc) diff --git a/schema/poetry.lock b/schema/poetry.lock index 02f3a2704..ee02a81d8 100644 --- a/schema/poetry.lock +++ b/schema/poetry.lock @@ -179,6 +179,50 @@ tests = ["attrs[tests-no-zope]", "zope-interface"] tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"] tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] +[[package]] +name = "black" +version = "24.4.2" +description = "The uncompromising code formatter." +optional = false +python-versions = ">=3.8" +files = [ + {file = "black-24.4.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dd1b5a14e417189db4c7b64a6540f31730713d173f0b63e55fabd52d61d8fdce"}, + {file = "black-24.4.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e537d281831ad0e71007dcdcbe50a71470b978c453fa41ce77186bbe0ed6021"}, + {file = "black-24.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaea3008c281f1038edb473c1aa8ed8143a5535ff18f978a318f10302b254063"}, + {file = "black-24.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:7768a0dbf16a39aa5e9a3ded568bb545c8c2727396d063bbaf847df05b08cd96"}, + {file = "black-24.4.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:257d724c2c9b1660f353b36c802ccece186a30accc7742c176d29c146df6e474"}, + {file = "black-24.4.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bdde6f877a18f24844e381d45e9947a49e97933573ac9d4345399be37621e26c"}, + {file = "black-24.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e151054aa00bad1f4e1f04919542885f89f5f7d086b8a59e5000e6c616896ffb"}, + {file = "black-24.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:7e122b1c4fb252fd85df3ca93578732b4749d9be076593076ef4d07a0233c3e1"}, + {file = "black-24.4.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:accf49e151c8ed2c0cdc528691838afd217c50412534e876a19270fea1e28e2d"}, + {file = "black-24.4.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:88c57dc656038f1ab9f92b3eb5335ee9b021412feaa46330d5eba4e51fe49b04"}, + {file = "black-24.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be8bef99eb46d5021bf053114442914baeb3649a89dc5f3a555c88737e5e98fc"}, + {file = "black-24.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:415e686e87dbbe6f4cd5ef0fbf764af7b89f9057b97c908742b6008cc554b9c0"}, + {file = "black-24.4.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bf10f7310db693bb62692609b397e8d67257c55f949abde4c67f9cc574492cc7"}, + {file = "black-24.4.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:98e123f1d5cfd42f886624d84464f7756f60ff6eab89ae845210631714f6db94"}, + {file = "black-24.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48a85f2cb5e6799a9ef05347b476cce6c182d6c71ee36925a6c194d074336ef8"}, + {file = "black-24.4.2-cp38-cp38-win_amd64.whl", hash = "sha256:b1530ae42e9d6d5b670a34db49a94115a64596bc77710b1d05e9801e62ca0a7c"}, + {file = "black-24.4.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:37aae07b029fa0174d39daf02748b379399b909652a806e5708199bd93899da1"}, + {file = "black-24.4.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:da33a1a5e49c4122ccdfd56cd021ff1ebc4a1ec4e2d01594fef9b6f267a9e741"}, + {file = "black-24.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef703f83fc32e131e9bcc0a5094cfe85599e7109f896fe8bc96cc402f3eb4b6e"}, + {file = "black-24.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:b9176b9832e84308818a99a561e90aa479e73c523b3f77afd07913380ae2eab7"}, + {file = "black-24.4.2-py3-none-any.whl", hash = "sha256:d36ed1124bb81b32f8614555b34cc4259c3fbc7eec17870e8ff8ded335b58d8c"}, + {file = "black-24.4.2.tar.gz", hash = "sha256:c872b53057f000085da66a19c55d68f6f8ddcac2642392ad3a355878406fbd4d"}, +] + +[package.dependencies] +click = ">=8.0.0" +mypy-extensions = ">=0.4.3" +packaging = ">=22.0" +pathspec = ">=0.9.0" +platformdirs = ">=2" + +[package.extras] +colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] +jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] +uvloop = ["uvloop (>=0.15.2)"] + [[package]] name = "certifi" version = "2024.7.4" @@ -1028,6 +1072,17 @@ files = [ {file = "multidict-6.0.5.tar.gz", hash = "sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da"}, ] +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.5" +files = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] + [[package]] name = "numpy" version = "1.26.4" @@ -1109,6 +1164,33 @@ files = [ {file = "parse-1.20.2.tar.gz", hash = "sha256:b41d604d16503c79d81af5165155c0b20f6c8d6c559efa66b4b695c3e5a0a0ce"}, ] +[[package]] +name = "pathspec" +version = "0.12.1" +description = "Utility library for gitignore style pattern matching of file paths." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, + {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, +] + +[[package]] +name = "platformdirs" +version = "4.2.2" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." +optional = false +python-versions = ">=3.8" +files = [ + {file = "platformdirs-4.2.2-py3-none-any.whl", hash = "sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee"}, + {file = "platformdirs-4.2.2.tar.gz", hash = "sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3"}, +] + +[package.extras] +docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"] +type = ["mypy (>=1.8)"] + [[package]] name = "pluggy" version = "1.5.0" @@ -2233,4 +2315,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "a4af0ee9b4816ebd60fea2480c2bc0978e3127e9351ec755e949fc9bfddf4935" +content-hash = "a549bd2abe8fe853005098d5abb7c2c002c08a9b19c97c54e02520cbbf28c0b6" diff --git a/schema/pyproject.toml b/schema/pyproject.toml index e0b2ba5d9..97b0b06d1 100644 --- a/schema/pyproject.toml +++ b/schema/pyproject.toml @@ -17,6 +17,7 @@ numpy = "^1.26.4" pydantic = "^2.8.2" typing-extensions = "^4.12.2" pyyaml = "^6.0.1" +black = "^24.4.2" [tool.black] line-length = 120 diff --git a/schema/v1.1.0/dataset_config.yaml b/schema/v1.1.0/dataset_config.yaml index 92b9c9608..7708bd9be 100644 --- a/schema/v1.1.0/dataset_config.yaml +++ b/schema/v1.1.0/dataset_config.yaml @@ -119,7 +119,6 @@ classes: range: string required: true - # TODO: LinkML doesn't support all the validation for sources, so we'll need to extend it more in Python / JSON Schema DefaultSource: description: A generalized source class with glob finders. attributes: diff --git a/schema/v1.1.0/dataset_config_models_extended.py b/schema/v1.1.0/dataset_config_models_extended.py index 51f417132..441563939 100644 --- a/schema/v1.1.0/dataset_config_models_extended.py +++ b/schema/v1.1.0/dataset_config_models_extended.py @@ -805,7 +805,6 @@ class ExtendedValidationDepositionKeyPhotoEntity(DepositionKeyPhotoEntity): @field_validator("sources") @classmethod def valid_sources(cls: Self, source_list: List[KeyPhotoSource]) -> List[KeyPhotoSource]: - # TODO: change "deposition_keyphoto" to the correct importer type when it gets implemented return validate_sources_parent_filters(source_list, "deposition_keyphoto") @@ -833,7 +832,6 @@ class ExtendedValidationDepositionEntity(DepositionEntity): @field_validator("sources") @classmethod def valid_sources(cls: Self, source_list: List[DepositionSource]) -> List[DepositionSource]: - # TODO: change "deposition" to the correct importer type return validate_sources(source_list, "deposition", skip_parent_filters=True) diff --git a/schema/v1.1.0/metadata-docs/affiliation_address.md b/schema/v1.1.0/metadata-docs/affiliation_address.md index 973bfe8f6..bc793b718 100644 --- a/schema/v1.1.0/metadata-docs/affiliation_address.md +++ b/schema/v1.1.0/metadata-docs/affiliation_address.md @@ -16,8 +16,8 @@ URI: [cdp-meta:affiliation_address](metadataaffiliation_address) | Name | Description | Modifies Slot | | --- | --- | --- | -| [Author](Author.md) | Author of a scientific data entity | no | | [AuthorMixin](AuthorMixin.md) | An entity with author data | no | +| [Author](Author.md) | Author of a scientific data entity | no | diff --git a/schema/v1.1.0/metadata-docs/affiliation_identifier.md b/schema/v1.1.0/metadata-docs/affiliation_identifier.md index d9d684bfd..cdca5c467 100644 --- a/schema/v1.1.0/metadata-docs/affiliation_identifier.md +++ b/schema/v1.1.0/metadata-docs/affiliation_identifier.md @@ -16,8 +16,8 @@ URI: [cdp-meta:affiliation_identifier](metadataaffiliation_identifier) | Name | Description | Modifies Slot | | --- | --- | --- | -| [Author](Author.md) | Author of a scientific data entity | no | | [AuthorMixin](AuthorMixin.md) | An entity with author data | no | +| [Author](Author.md) | Author of a scientific data entity | no | diff --git a/schema/v1.1.0/metadata-docs/affiliation_name.md b/schema/v1.1.0/metadata-docs/affiliation_name.md index d865a139f..3f3b517bf 100644 --- a/schema/v1.1.0/metadata-docs/affiliation_name.md +++ b/schema/v1.1.0/metadata-docs/affiliation_name.md @@ -16,8 +16,8 @@ URI: [cdp-meta:affiliation_name](metadataaffiliation_name) | Name | Description | Modifies Slot | | --- | --- | --- | -| [Author](Author.md) | Author of a scientific data entity | no | | [AuthorMixin](AuthorMixin.md) | An entity with author data | no | +| [Author](Author.md) | Author of a scientific data entity | no | diff --git a/schema/v1.1.0/metadata-docs/authors.md b/schema/v1.1.0/metadata-docs/authors.md index 715dedbb2..042c4f321 100644 --- a/schema/v1.1.0/metadata-docs/authors.md +++ b/schema/v1.1.0/metadata-docs/authors.md @@ -16,11 +16,11 @@ URI: [cdp-meta:authors](metadataauthors) | Name | Description | Modifies Slot | | --- | --- | --- | -| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | | [Tomogram](Tomogram.md) | Metadata describing a tomogram | no | | [Deposition](Deposition.md) | Metadata describing a deposition | no | -| [AuthoredEntity](AuthoredEntity.md) | An entity with associated authors | no | | [Annotation](Annotation.md) | Metadata describing an annotation | no | +| [AuthoredEntity](AuthoredEntity.md) | An entity with associated authors | no | +| [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | diff --git a/schema/v1.1.0/metadata-docs/binning.md b/schema/v1.1.0/metadata-docs/binning.md index f4abaf427..65615f7c8 100644 --- a/schema/v1.1.0/metadata-docs/binning.md +++ b/schema/v1.1.0/metadata-docs/binning.md @@ -16,8 +16,8 @@ URI: [cdp-meta:binning](metadatabinning) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | | [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | +| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | | [AnnotationPointFile](AnnotationPointFile.md) | File and sourcing data for a point annotation | no | diff --git a/schema/v1.1.0/metadata-docs/cell_component.md b/schema/v1.1.0/metadata-docs/cell_component.md index 02979edfa..67015dde6 100644 --- a/schema/v1.1.0/metadata-docs/cell_component.md +++ b/schema/v1.1.0/metadata-docs/cell_component.md @@ -16,8 +16,8 @@ URI: [cdp-meta:cell_component](metadatacell_component) | Name | Description | Modifies Slot | | --- | --- | --- | -| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | +| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | diff --git a/schema/v1.1.0/metadata-docs/cell_strain.md b/schema/v1.1.0/metadata-docs/cell_strain.md index 2f9ad3712..de2601565 100644 --- a/schema/v1.1.0/metadata-docs/cell_strain.md +++ b/schema/v1.1.0/metadata-docs/cell_strain.md @@ -16,8 +16,8 @@ URI: [cdp-meta:cell_strain](metadatacell_strain) | Name | Description | Modifies Slot | | --- | --- | --- | -| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | +| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | diff --git a/schema/v1.1.0/metadata-docs/cell_type.md b/schema/v1.1.0/metadata-docs/cell_type.md index 6ad5e3274..d90ff4802 100644 --- a/schema/v1.1.0/metadata-docs/cell_type.md +++ b/schema/v1.1.0/metadata-docs/cell_type.md @@ -16,8 +16,8 @@ URI: [cdp-meta:cell_type](metadatacell_type) | Name | Description | Modifies Slot | | --- | --- | --- | -| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | +| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | diff --git a/schema/v1.1.0/metadata-docs/corresponding_author_status.md b/schema/v1.1.0/metadata-docs/corresponding_author_status.md index 08cc9b7d1..dc1be7e47 100644 --- a/schema/v1.1.0/metadata-docs/corresponding_author_status.md +++ b/schema/v1.1.0/metadata-docs/corresponding_author_status.md @@ -16,8 +16,8 @@ URI: [cdp-meta:corresponding_author_status](metadatacorresponding_author_status) | Name | Description | Modifies Slot | | --- | --- | --- | -| [Author](Author.md) | Author of a scientific data entity | no | | [AuthorMixin](AuthorMixin.md) | An entity with author data | no | +| [Author](Author.md) | Author of a scientific data entity | no | diff --git a/schema/v1.1.0/metadata-docs/cross_references.md b/schema/v1.1.0/metadata-docs/cross_references.md index 89e6bc20e..a218af64a 100644 --- a/schema/v1.1.0/metadata-docs/cross_references.md +++ b/schema/v1.1.0/metadata-docs/cross_references.md @@ -17,8 +17,8 @@ URI: [cdp-meta:cross_references](metadatacross_references) | Name | Description | Modifies Slot | | --- | --- | --- | | [Deposition](Deposition.md) | Metadata describing a deposition | no | -| [CrossReferencedEntity](CrossReferencedEntity.md) | An entity with associated cross-references to other databases and publication... | no | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | +| [CrossReferencedEntity](CrossReferencedEntity.md) | An entity with associated cross-references to other databases and publication... | no | diff --git a/schema/v1.1.0/metadata-docs/dataset_citations.md b/schema/v1.1.0/metadata-docs/dataset_citations.md index 094e8ede0..1e5beac9e 100644 --- a/schema/v1.1.0/metadata-docs/dataset_citations.md +++ b/schema/v1.1.0/metadata-docs/dataset_citations.md @@ -16,8 +16,8 @@ URI: [cdp-meta:dataset_citations](metadatadataset_citations) | Name | Description | Modifies Slot | | --- | --- | --- | -| [CrossReferencesMixin](CrossReferencesMixin.md) | A set of cross-references to other databases and publications | no | | [CrossReferences](CrossReferences.md) | A set of cross-references to other databases and publications | no | +| [CrossReferencesMixin](CrossReferencesMixin.md) | A set of cross-references to other databases and publications | no | diff --git a/schema/v1.1.0/metadata-docs/dates.md b/schema/v1.1.0/metadata-docs/dates.md index 4b2ea1c86..2720ef1a5 100644 --- a/schema/v1.1.0/metadata-docs/dates.md +++ b/schema/v1.1.0/metadata-docs/dates.md @@ -16,10 +16,10 @@ URI: [cdp-meta:dates](metadatadates) | Name | Description | Modifies Slot | | --- | --- | --- | -| [Annotation](Annotation.md) | Metadata describing an annotation | no | | [Deposition](Deposition.md) | Metadata describing a deposition | no | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | | [DateStampedEntity](DateStampedEntity.md) | An entity with associated deposition, release and last modified dates | no | +| [Annotation](Annotation.md) | Metadata describing an annotation | no | diff --git a/schema/v1.1.0/metadata-docs/deposition_date.md b/schema/v1.1.0/metadata-docs/deposition_date.md index b35201f44..83f47deb9 100644 --- a/schema/v1.1.0/metadata-docs/deposition_date.md +++ b/schema/v1.1.0/metadata-docs/deposition_date.md @@ -16,8 +16,8 @@ URI: [cdp-meta:deposition_date](metadatadeposition_date) | Name | Description | Modifies Slot | | --- | --- | --- | -| [DateStampedEntityMixin](DateStampedEntityMixin.md) | A set of dates at which a data item was deposited, published and last modifie... | no | | [DateStamp](DateStamp.md) | A set of dates at which a data item was deposited, published and last modifie... | no | +| [DateStampedEntityMixin](DateStampedEntityMixin.md) | A set of dates at which a data item was deposited, published and last modifie... | no | diff --git a/schema/v1.1.0/metadata-docs/email.md b/schema/v1.1.0/metadata-docs/email.md index 8c1f16329..d589e8c4c 100644 --- a/schema/v1.1.0/metadata-docs/email.md +++ b/schema/v1.1.0/metadata-docs/email.md @@ -16,8 +16,8 @@ URI: [cdp-meta:email](metadataemail) | Name | Description | Modifies Slot | | --- | --- | --- | -| [Author](Author.md) | Author of a scientific data entity | no | | [AuthorMixin](AuthorMixin.md) | An entity with author data | no | +| [Author](Author.md) | Author of a scientific data entity | no | diff --git a/schema/v1.1.0/metadata-docs/file_format.md b/schema/v1.1.0/metadata-docs/file_format.md index d744b7b39..48ea3125e 100644 --- a/schema/v1.1.0/metadata-docs/file_format.md +++ b/schema/v1.1.0/metadata-docs/file_format.md @@ -16,12 +16,12 @@ URI: [cdp-meta:file_format](metadatafile_format) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AnnotationPointFile](AnnotationPointFile.md) | File and sourcing data for a point annotation | no | -| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | | [AnnotationSemanticSegmentationMaskFile](AnnotationSemanticSegmentationMaskFile.md) | File and sourcing data for a semantic segmentation mask annotation | no | -| [AnnotationSegmentationMaskFile](AnnotationSegmentationMaskFile.md) | File and sourcing data for a segmentation mask annotation | no | -| [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | | [AnnotationSourceFile](AnnotationSourceFile.md) | File and sourcing data for an annotation | no | +| [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | +| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | +| [AnnotationSegmentationMaskFile](AnnotationSegmentationMaskFile.md) | File and sourcing data for a segmentation mask annotation | no | +| [AnnotationPointFile](AnnotationPointFile.md) | File and sourcing data for a point annotation | no | diff --git a/schema/v1.1.0/metadata-docs/filter_value.md b/schema/v1.1.0/metadata-docs/filter_value.md index dd4d29ef9..dade1978c 100644 --- a/schema/v1.1.0/metadata-docs/filter_value.md +++ b/schema/v1.1.0/metadata-docs/filter_value.md @@ -16,8 +16,8 @@ URI: [cdp-meta:filter_value](metadatafilter_value) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | | [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | +| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | diff --git a/schema/v1.1.0/metadata-docs/glob_string.md b/schema/v1.1.0/metadata-docs/glob_string.md index 1e5259104..2021b253a 100644 --- a/schema/v1.1.0/metadata-docs/glob_string.md +++ b/schema/v1.1.0/metadata-docs/glob_string.md @@ -16,12 +16,12 @@ URI: [cdp-meta:glob_string](metadataglob_string) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AnnotationPointFile](AnnotationPointFile.md) | File and sourcing data for a point annotation | no | -| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | | [AnnotationSemanticSegmentationMaskFile](AnnotationSemanticSegmentationMaskFile.md) | File and sourcing data for a semantic segmentation mask annotation | no | -| [AnnotationSegmentationMaskFile](AnnotationSegmentationMaskFile.md) | File and sourcing data for a segmentation mask annotation | no | -| [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | | [AnnotationSourceFile](AnnotationSourceFile.md) | File and sourcing data for an annotation | no | +| [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | +| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | +| [AnnotationSegmentationMaskFile](AnnotationSegmentationMaskFile.md) | File and sourcing data for a segmentation mask annotation | no | +| [AnnotationPointFile](AnnotationPointFile.md) | File and sourcing data for a point annotation | no | diff --git a/schema/v1.1.0/metadata-docs/glob_strings.md b/schema/v1.1.0/metadata-docs/glob_strings.md index c93d58393..59c90f886 100644 --- a/schema/v1.1.0/metadata-docs/glob_strings.md +++ b/schema/v1.1.0/metadata-docs/glob_strings.md @@ -16,12 +16,12 @@ URI: [cdp-meta:glob_strings](metadataglob_strings) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AnnotationPointFile](AnnotationPointFile.md) | File and sourcing data for a point annotation | no | -| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | | [AnnotationSemanticSegmentationMaskFile](AnnotationSemanticSegmentationMaskFile.md) | File and sourcing data for a semantic segmentation mask annotation | no | -| [AnnotationSegmentationMaskFile](AnnotationSegmentationMaskFile.md) | File and sourcing data for a segmentation mask annotation | no | -| [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | | [AnnotationSourceFile](AnnotationSourceFile.md) | File and sourcing data for an annotation | no | +| [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | +| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | +| [AnnotationSegmentationMaskFile](AnnotationSegmentationMaskFile.md) | File and sourcing data for a segmentation mask annotation | no | +| [AnnotationPointFile](AnnotationPointFile.md) | File and sourcing data for a point annotation | no | diff --git a/schema/v1.1.0/metadata-docs/grid_preparation.md b/schema/v1.1.0/metadata-docs/grid_preparation.md index 6af98bced..e12dc162c 100644 --- a/schema/v1.1.0/metadata-docs/grid_preparation.md +++ b/schema/v1.1.0/metadata-docs/grid_preparation.md @@ -16,8 +16,8 @@ URI: [cdp-meta:grid_preparation](metadatagrid_preparation) | Name | Description | Modifies Slot | | --- | --- | --- | -| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | +| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | diff --git a/schema/v1.1.0/metadata-docs/id.md b/schema/v1.1.0/metadata-docs/id.md index 1fdf9e430..0e4fa14e0 100644 --- a/schema/v1.1.0/metadata-docs/id.md +++ b/schema/v1.1.0/metadata-docs/id.md @@ -16,10 +16,10 @@ URI: [cdp-meta:id](metadataid) | Name | Description | Modifies Slot | | --- | --- | --- | +| [AnnotationObject](AnnotationObject.md) | Metadata describing the object being annotated | no | +| [TissueDetails](TissueDetails.md) | The type of tissue from which the sample was derived | no | | [CellComponent](CellComponent.md) | The cellular component from which the sample was derived | no | | [CellStrain](CellStrain.md) | The strain or cell line from which the sample was derived | no | -| [TissueDetails](TissueDetails.md) | The type of tissue from which the sample was derived | no | -| [AnnotationObject](AnnotationObject.md) | Metadata describing the object being annotated | no | | [CellType](CellType.md) | The cell type from which the sample was derived | no | diff --git a/schema/v1.1.0/metadata-docs/is_visualization_default.md b/schema/v1.1.0/metadata-docs/is_visualization_default.md index 6eed74948..2d4ba7647 100644 --- a/schema/v1.1.0/metadata-docs/is_visualization_default.md +++ b/schema/v1.1.0/metadata-docs/is_visualization_default.md @@ -16,12 +16,12 @@ URI: [cdp-meta:is_visualization_default](metadatais_visualization_default) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AnnotationPointFile](AnnotationPointFile.md) | File and sourcing data for a point annotation | no | -| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | | [AnnotationSemanticSegmentationMaskFile](AnnotationSemanticSegmentationMaskFile.md) | File and sourcing data for a semantic segmentation mask annotation | no | -| [AnnotationSegmentationMaskFile](AnnotationSegmentationMaskFile.md) | File and sourcing data for a segmentation mask annotation | no | -| [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | | [AnnotationSourceFile](AnnotationSourceFile.md) | File and sourcing data for an annotation | no | +| [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | +| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | +| [AnnotationSegmentationMaskFile](AnnotationSegmentationMaskFile.md) | File and sourcing data for a segmentation mask annotation | no | +| [AnnotationPointFile](AnnotationPointFile.md) | File and sourcing data for a point annotation | no | diff --git a/schema/v1.1.0/metadata-docs/last_modified_date.md b/schema/v1.1.0/metadata-docs/last_modified_date.md index 9fd06b7bb..df2ed6694 100644 --- a/schema/v1.1.0/metadata-docs/last_modified_date.md +++ b/schema/v1.1.0/metadata-docs/last_modified_date.md @@ -16,8 +16,8 @@ URI: [cdp-meta:last_modified_date](metadatalast_modified_date) | Name | Description | Modifies Slot | | --- | --- | --- | -| [DateStampedEntityMixin](DateStampedEntityMixin.md) | A set of dates at which a data item was deposited, published and last modifie... | no | | [DateStamp](DateStamp.md) | A set of dates at which a data item was deposited, published and last modifie... | no | +| [DateStampedEntityMixin](DateStampedEntityMixin.md) | A set of dates at which a data item was deposited, published and last modifie... | no | diff --git a/schema/v1.1.0/metadata-docs/name.md b/schema/v1.1.0/metadata-docs/name.md index 1f8079748..c10c0e9f8 100644 --- a/schema/v1.1.0/metadata-docs/name.md +++ b/schema/v1.1.0/metadata-docs/name.md @@ -16,15 +16,15 @@ URI: [cdp-meta:name](metadataname) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AuthorMixin](AuthorMixin.md) | An entity with author data | no | -| [AnnotationMethodLinks](AnnotationMethodLinks.md) | A set of links to models, sourcecode, documentation, etc referenced by annota... | no | -| [CellStrain](CellStrain.md) | The strain or cell line from which the sample was derived | no | -| [CellComponent](CellComponent.md) | The cellular component from which the sample was derived | no | -| [TissueDetails](TissueDetails.md) | The type of tissue from which the sample was derived | no | | [AnnotationObject](AnnotationObject.md) | Metadata describing the object being annotated | no | +| [TissueDetails](TissueDetails.md) | The type of tissue from which the sample was derived | no | | [Author](Author.md) | Author of a scientific data entity | no | -| [CellType](CellType.md) | The cell type from which the sample was derived | no | +| [AuthorMixin](AuthorMixin.md) | An entity with author data | no | +| [CellComponent](CellComponent.md) | The cellular component from which the sample was derived | no | | [OrganismDetails](OrganismDetails.md) | The species from which the sample was derived | no | +| [CellStrain](CellStrain.md) | The strain or cell line from which the sample was derived | no | +| [AnnotationMethodLinks](AnnotationMethodLinks.md) | A set of links to models, sourcecode, documentation, etc referenced by annota... | no | +| [CellType](CellType.md) | The cell type from which the sample was derived | no | diff --git a/schema/v1.1.0/metadata-docs/order.md b/schema/v1.1.0/metadata-docs/order.md index 0267a3091..a0a9aadcd 100644 --- a/schema/v1.1.0/metadata-docs/order.md +++ b/schema/v1.1.0/metadata-docs/order.md @@ -16,8 +16,8 @@ URI: [cdp-meta:order](metadataorder) | Name | Description | Modifies Slot | | --- | --- | --- | -| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | | [AnnotationOrientedPointFile](AnnotationOrientedPointFile.md) | File and sourcing data for an oriented point annotation | no | +| [AnnotationInstanceSegmentationFile](AnnotationInstanceSegmentationFile.md) | File and sourcing data for an instance segmentation annotation | no | diff --git a/schema/v1.1.0/metadata-docs/organism.md b/schema/v1.1.0/metadata-docs/organism.md index 97abe1618..09e3139ef 100644 --- a/schema/v1.1.0/metadata-docs/organism.md +++ b/schema/v1.1.0/metadata-docs/organism.md @@ -16,8 +16,8 @@ URI: [cdp-meta:organism](metadataorganism) | Name | Description | Modifies Slot | | --- | --- | --- | -| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | +| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | diff --git a/schema/v1.1.0/metadata-docs/other_setup.md b/schema/v1.1.0/metadata-docs/other_setup.md index 394ce8ce5..1932482d2 100644 --- a/schema/v1.1.0/metadata-docs/other_setup.md +++ b/schema/v1.1.0/metadata-docs/other_setup.md @@ -16,8 +16,8 @@ URI: [cdp-meta:other_setup](metadataother_setup) | Name | Description | Modifies Slot | | --- | --- | --- | -| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | +| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | diff --git a/schema/v1.1.0/metadata-docs/primary_author_status.md b/schema/v1.1.0/metadata-docs/primary_author_status.md index b46aa989c..3ce412ffc 100644 --- a/schema/v1.1.0/metadata-docs/primary_author_status.md +++ b/schema/v1.1.0/metadata-docs/primary_author_status.md @@ -16,8 +16,8 @@ URI: [cdp-meta:primary_author_status](metadataprimary_author_status) | Name | Description | Modifies Slot | | --- | --- | --- | -| [Author](Author.md) | Author of a scientific data entity | no | | [AuthorMixin](AuthorMixin.md) | An entity with author data | no | +| [Author](Author.md) | Author of a scientific data entity | no | diff --git a/schema/v1.1.0/metadata-docs/publications.md b/schema/v1.1.0/metadata-docs/publications.md index 313a8c39a..d016823f2 100644 --- a/schema/v1.1.0/metadata-docs/publications.md +++ b/schema/v1.1.0/metadata-docs/publications.md @@ -16,8 +16,8 @@ URI: [cdp-meta:publications](metadatapublications) | Name | Description | Modifies Slot | | --- | --- | --- | -| [CrossReferencesMixin](CrossReferencesMixin.md) | A set of cross-references to other databases and publications | no | | [CrossReferences](CrossReferences.md) | A set of cross-references to other databases and publications | no | +| [CrossReferencesMixin](CrossReferencesMixin.md) | A set of cross-references to other databases and publications | no | diff --git a/schema/v1.1.0/metadata-docs/related_database_entries.md b/schema/v1.1.0/metadata-docs/related_database_entries.md index 8a09c428e..2afd9cec9 100644 --- a/schema/v1.1.0/metadata-docs/related_database_entries.md +++ b/schema/v1.1.0/metadata-docs/related_database_entries.md @@ -16,8 +16,8 @@ URI: [cdp-meta:related_database_entries](metadatarelated_database_entries) | Name | Description | Modifies Slot | | --- | --- | --- | -| [CrossReferencesMixin](CrossReferencesMixin.md) | A set of cross-references to other databases and publications | no | | [CrossReferences](CrossReferences.md) | A set of cross-references to other databases and publications | no | +| [CrossReferencesMixin](CrossReferencesMixin.md) | A set of cross-references to other databases and publications | no | diff --git a/schema/v1.1.0/metadata-docs/related_database_links.md b/schema/v1.1.0/metadata-docs/related_database_links.md index 69a529ee5..4f8147af5 100644 --- a/schema/v1.1.0/metadata-docs/related_database_links.md +++ b/schema/v1.1.0/metadata-docs/related_database_links.md @@ -16,8 +16,8 @@ URI: [cdp-meta:related_database_links](metadatarelated_database_links) | Name | Description | Modifies Slot | | --- | --- | --- | -| [CrossReferencesMixin](CrossReferencesMixin.md) | A set of cross-references to other databases and publications | no | | [CrossReferences](CrossReferences.md) | A set of cross-references to other databases and publications | no | +| [CrossReferencesMixin](CrossReferencesMixin.md) | A set of cross-references to other databases and publications | no | diff --git a/schema/v1.1.0/metadata-docs/release_date.md b/schema/v1.1.0/metadata-docs/release_date.md index 54c8905e1..64f0fa0be 100644 --- a/schema/v1.1.0/metadata-docs/release_date.md +++ b/schema/v1.1.0/metadata-docs/release_date.md @@ -16,8 +16,8 @@ URI: [cdp-meta:release_date](metadatarelease_date) | Name | Description | Modifies Slot | | --- | --- | --- | -| [DateStampedEntityMixin](DateStampedEntityMixin.md) | A set of dates at which a data item was deposited, published and last modifie... | no | | [DateStamp](DateStamp.md) | A set of dates at which a data item was deposited, published and last modifie... | no | +| [DateStampedEntityMixin](DateStampedEntityMixin.md) | A set of dates at which a data item was deposited, published and last modifie... | no | diff --git a/schema/v1.1.0/metadata-docs/sample_preparation.md b/schema/v1.1.0/metadata-docs/sample_preparation.md index 8d8ff8e50..339ac7e57 100644 --- a/schema/v1.1.0/metadata-docs/sample_preparation.md +++ b/schema/v1.1.0/metadata-docs/sample_preparation.md @@ -16,8 +16,8 @@ URI: [cdp-meta:sample_preparation](metadatasample_preparation) | Name | Description | Modifies Slot | | --- | --- | --- | -| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | +| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | diff --git a/schema/v1.1.0/metadata-docs/sample_type.md b/schema/v1.1.0/metadata-docs/sample_type.md index 4ee08059b..6309bf7fc 100644 --- a/schema/v1.1.0/metadata-docs/sample_type.md +++ b/schema/v1.1.0/metadata-docs/sample_type.md @@ -16,8 +16,8 @@ URI: [cdp-meta:sample_type](metadatasample_type) | Name | Description | Modifies Slot | | --- | --- | --- | -| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | +| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | diff --git a/schema/v1.1.0/metadata-docs/tissue.md b/schema/v1.1.0/metadata-docs/tissue.md index 6fa5a0109..750ac8d31 100644 --- a/schema/v1.1.0/metadata-docs/tissue.md +++ b/schema/v1.1.0/metadata-docs/tissue.md @@ -16,8 +16,8 @@ URI: [cdp-meta:tissue](metadatatissue) | Name | Description | Modifies Slot | | --- | --- | --- | -| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | | [Dataset](Dataset.md) | High-level description of a cryoET dataset | no | +| [ExperimentMetadata](ExperimentMetadata.md) | Metadata describing sample and sample preparation methods used in a cryoET da... | no | diff --git a/schema/v1.1.0/metadata-docs/x.md b/schema/v1.1.0/metadata-docs/x.md index 1334542ee..ec4e28a24 100644 --- a/schema/v1.1.0/metadata-docs/x.md +++ b/schema/v1.1.0/metadata-docs/x.md @@ -16,8 +16,8 @@ URI: [cdp-meta:x](metadatax) | Name | Description | Modifies Slot | | --- | --- | --- | -| [TomogramOffset](TomogramOffset.md) | The offset of a tomogram in voxels in each dimension relative to the canonica... | no | | [TomogramSize](TomogramSize.md) | The size of a tomogram in voxels in each dimension | no | +| [TomogramOffset](TomogramOffset.md) | The offset of a tomogram in voxels in each dimension relative to the canonica... | no | diff --git a/schema/v1.1.0/metadata-docs/y.md b/schema/v1.1.0/metadata-docs/y.md index 7abde3482..aa0b51ebb 100644 --- a/schema/v1.1.0/metadata-docs/y.md +++ b/schema/v1.1.0/metadata-docs/y.md @@ -16,8 +16,8 @@ URI: [cdp-meta:y](metadatay) | Name | Description | Modifies Slot | | --- | --- | --- | -| [TomogramOffset](TomogramOffset.md) | The offset of a tomogram in voxels in each dimension relative to the canonica... | no | | [TomogramSize](TomogramSize.md) | The size of a tomogram in voxels in each dimension | no | +| [TomogramOffset](TomogramOffset.md) | The offset of a tomogram in voxels in each dimension relative to the canonica... | no | diff --git a/schema/v1.1.0/metadata-docs/z.md b/schema/v1.1.0/metadata-docs/z.md index 2ddd30456..0b0298979 100644 --- a/schema/v1.1.0/metadata-docs/z.md +++ b/schema/v1.1.0/metadata-docs/z.md @@ -16,8 +16,8 @@ URI: [cdp-meta:z](metadataz) | Name | Description | Modifies Slot | | --- | --- | --- | -| [TomogramOffset](TomogramOffset.md) | The offset of a tomogram in voxels in each dimension relative to the canonica... | no | | [TomogramSize](TomogramSize.md) | The size of a tomogram in voxels in each dimension | no | +| [TomogramOffset](TomogramOffset.md) | The offset of a tomogram in voxels in each dimension relative to the canonica... | no |