Skip to content

Commit

Permalink
fix annotation publications list, add SIMULATED to microscope manufac…
Browse files Browse the repository at this point in the history
…turer enum, and add UniProtKB id support to annotation object
  • Loading branch information
daniel-ji committed Aug 21, 2024
1 parent f21d4f3 commit 57e99e0
Show file tree
Hide file tree
Showing 12 changed files with 259 additions and 120 deletions.
2 changes: 1 addition & 1 deletion ingestion_tools/dataset_configs/template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ annotations: OPTIONAL
annotation_method: REQUIRED, STRING
annotation_object: REQUIRED
description: OPTIONAL, STRING
id: REQUIRED, STRING (GO_ID)
id: REQUIRED, STRING (GO_ID / UNIPROT_ID)
name: REQUIRED, STRING
state: OPTIONAL, STRING
annotation_publications: OPTIONAL, STRING (DOI / EMPIAR / EMDB / PDB IDs)
Expand Down
33 changes: 22 additions & 11 deletions schema/api/v1.0.0/codegen/api_models_materialized.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,12 @@ types:
from_schema: cdp-api-models
base: str
pattern: ^GO:[0-9]{7}$
UNIPROT_ID:
name: UNIPROT_ID
description: A UniProt identifier
from_schema: cdp-api-models
base: str
pattern: ^UniProtKB:[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}+$
WORMBASE_ID:
name: WORMBASE_ID
description: A WormBase identifier
Expand Down Expand Up @@ -336,19 +342,19 @@ types:
description: A Protein Data Bank identifier
from_schema: cdp-api-models
base: str
pattern: ^pdb[0-9a-zA-Z]{4,8}$
pattern: ^PDB-[0-9a-zA-Z]{4,8}$
EMPIAR_EMDB_PDB_LIST:
name: EMPIAR_EMDB_PDB_LIST
description: A list of EMPIAR, EMDB, and PDB identifiers
from_schema: cdp-api-models
base: str
pattern: ^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8}))*$
pattern: ^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8}))*$
EMPIAR_EMDB_DOI_PDB_LIST:
name: EMPIAR_EMDB_DOI_PDB_LIST
description: A list of EMPIAR, EMDB, DOI, and PDB identifiers
from_schema: cdp-api-models
base: str
pattern: ^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|pdb[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|pdb[0-9a-zA-Z]{4,8}))*$
pattern: ^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|PDB-[0-9a-zA-Z]{4,8}))*$
enums:
tomogram_type:
name: tomogram_type
Expand Down Expand Up @@ -489,6 +495,9 @@ enums:
JEOL:
text: JEOL
description: JEOL Ltd.
SIMULATED:
text: SIMULATED
description: Simulated data
fiducial_alignment_status_enum:
name: fiducial_alignment_status_enum
description: Fiducial Alignment method
Expand Down Expand Up @@ -1118,8 +1127,8 @@ classes:
inlined_as_list: true
annotation_publication:
name: annotation_publication
description: List of publication IDs (EMPIAR, EMDB, DOI) that describe this
annotation method. Comma separated.
description: List of publication IDs (EMPIAR, EMDB, DOI, PDB) that describe
this annotation method. Comma separated.
from_schema: cdp-api-models
exact_mappings:
- cdp-common:annotation_publications
Expand All @@ -1130,7 +1139,7 @@ classes:
range: EMPIAR_EMDB_DOI_PDB_LIST
inlined: true
inlined_as_list: true
pattern: ^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|pdb[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|pdb[0-9a-zA-Z]{4,8}))*$
pattern: ^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|PDB-[0-9a-zA-Z]{4,8}))*$
annotation_method:
name: annotation_method
description: Describe how the annotation is made (e.g. Manual, crYoLO, Positive
Expand Down Expand Up @@ -1173,11 +1182,13 @@ classes:
owner: Annotation
domain_of:
- Annotation
range: GO_ID
required: true
inlined: true
inlined_as_list: true
pattern: ^GO:[0-9]{7}$
pattern: (^GO:[0-9]{7}$)|(^UniProtKB:[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}+$)
any_of:
- range: GO_ID
- range: UNIPROT_ID
object_name:
name: object_name
description: Name of the object being annotated (e.g. ribosome, nuclear pore
Expand Down Expand Up @@ -2055,7 +2066,7 @@ classes:
recommended: true
inlined: true
inlined_as_list: true
pattern: (^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8}))*$)|(^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8}))*$)
pattern: (^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8}))*$)|(^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8}))*$)
related_database_links:
name: related_database_links
description: Comma-separated list of related database links for the dataset.
Expand Down Expand Up @@ -2487,7 +2498,7 @@ classes:
required: true
inlined: true
inlined_as_list: true
pattern: (^FEI$)|(^TFS$)|(^JEOL$)
pattern: (^FEI$)|(^TFS$)|(^JEOL$)|(^SIMULATED$)
microscope_model:
name: microscope_model
description: Microscope model name
Expand Down Expand Up @@ -3682,7 +3693,7 @@ classes:
recommended: true
inlined: true
inlined_as_list: true
pattern: (^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8}))*$)|(^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8}))*$)
pattern: (^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8}))*$)|(^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8}))*$)
related_database_links:
name: related_database_links
description: Comma-separated list of related database links for the dataset.
Expand Down
65 changes: 38 additions & 27 deletions schema/api/v2.0.0/codegen/api_models_materialized.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,12 @@ types:
from_schema: cdp-api
base: string
pattern: ^GO:[0-9]{7}$
UNIPROT_ID:
name: UNIPROT_ID
description: A UniProt identifier
from_schema: cdp-api
base: str
pattern: ^UniProtKB:[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}+$
WORMBASE_ID:
name: WORMBASE_ID
description: A WormBase identifier
Expand Down Expand Up @@ -336,19 +342,19 @@ types:
description: A Protein Data Bank identifier
from_schema: cdp-api
base: string
pattern: ^pdb[0-9a-zA-Z]{4,8}$
pattern: ^PDB-[0-9a-zA-Z]{4,8}$
EMPIAR_EMDB_PDB_LIST:
name: EMPIAR_EMDB_PDB_LIST
description: A list of EMPIAR, EMDB, and PDB identifiers
from_schema: cdp-api
base: string
pattern: ^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8}))*$
pattern: ^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8}))*$
EMPIAR_EMDB_DOI_PDB_LIST:
name: EMPIAR_EMDB_DOI_PDB_LIST
description: A list of EMPIAR, EMDB, DOI, and PDB identifiers
from_schema: cdp-api
base: string
pattern: ^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|pdb[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|pdb[0-9a-zA-Z]{4,8}))*$
pattern: ^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|PDB-[0-9a-zA-Z]{4,8}))*$
enums:
annotation_file_source_enum:
name: annotation_file_source_enum
Expand Down Expand Up @@ -492,6 +498,9 @@ enums:
JEOL:
text: JEOL
description: JEOL Ltd.
SIMULATED:
text: SIMULATED
description: Simulated data
fiducial_alignment_status_enum:
name: fiducial_alignment_status_enum
description: Fiducial Alignment method
Expand Down Expand Up @@ -737,7 +746,7 @@ classes:
owner: AuthorEntityMixin
domain_of:
- AuthorMixin
range: string
range: boolean
inlined: true
inlined_as_list: true
primary_author_status:
Expand All @@ -751,7 +760,7 @@ classes:
owner: AuthorEntityMixin
domain_of:
- AuthorMixin
range: string
range: boolean
inlined: true
inlined_as_list: true
APIDateStampedEntityMixin:
Expand Down Expand Up @@ -1232,7 +1241,7 @@ classes:
owner: AnnotationAuthor
domain_of:
- AuthorMixin
range: string
range: boolean
inlined: true
inlined_as_list: true
primary_author_status:
Expand All @@ -1246,7 +1255,7 @@ classes:
owner: AnnotationAuthor
domain_of:
- AuthorMixin
range: string
range: boolean
inlined: true
inlined_as_list: true
AnnotationFile:
Expand Down Expand Up @@ -1537,8 +1546,8 @@ classes:
inlined_as_list: true
annotation_publication:
name: annotation_publication
description: List of publication IDs (EMPIAR, EMDB, DOI) that describe this
annotation method. Comma separated.
description: List of publication IDs (EMPIAR, EMDB, DOI, PDB) that describe
this annotation method. Comma separated.
from_schema: cdp-api
exact_mappings:
- cdp-common:annotation_publications
Expand All @@ -1549,7 +1558,7 @@ classes:
range: EMPIAR_EMDB_DOI_PDB_LIST
inlined: true
inlined_as_list: true
pattern: ^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|pdb[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|pdb[0-9a-zA-Z]{4,8}))*$
pattern: ^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|PDB-[0-9a-zA-Z]{4,8}))*$
annotation_method:
name: annotation_method
description: Describe how the annotation is made (e.g. Manual, crYoLO, Positive
Expand Down Expand Up @@ -1592,11 +1601,13 @@ classes:
owner: Annotation
domain_of:
- Annotation
range: GO_ID
required: true
inlined: true
inlined_as_list: true
pattern: ^GO:[0-9]{7}$
pattern: (^GO:[0-9]{7}$)|(^UniProtKB:[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}+$)
any_of:
- range: GO_ID
- range: UNIPROT_ID
object_name:
name: object_name
description: Name of the object being annotated (e.g. ribosome, nuclear pore
Expand Down Expand Up @@ -1957,7 +1968,7 @@ classes:
owner: DatasetAuthor
domain_of:
- AuthorMixin
range: string
range: boolean
inlined: true
inlined_as_list: true
primary_author_status:
Expand All @@ -1971,7 +1982,7 @@ classes:
owner: DatasetAuthor
domain_of:
- AuthorMixin
range: string
range: boolean
inlined: true
inlined_as_list: true
DatasetFunding:
Expand Down Expand Up @@ -2458,7 +2469,7 @@ classes:
recommended: true
inlined: true
inlined_as_list: true
pattern: (^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8}))*$)|(^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8}))*$)
pattern: (^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8}))*$)|(^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8}))*$)
related_database_links:
name: related_database_links
description: Comma-separated list of related database links for the dataset.
Expand Down Expand Up @@ -2663,7 +2674,7 @@ classes:
owner: DepositionAuthor
domain_of:
- AuthorMixin
range: string
range: boolean
inlined: true
inlined_as_list: true
primary_author_status:
Expand All @@ -2677,7 +2688,7 @@ classes:
owner: DepositionAuthor
domain_of:
- AuthorMixin
range: string
range: boolean
inlined: true
inlined_as_list: true
Deposition:
Expand Down Expand Up @@ -2861,7 +2872,7 @@ classes:
recommended: true
inlined: true
inlined_as_list: true
pattern: (^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8}))*$)|(^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8}))*$)
pattern: (^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8}))*$)|(^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8}))*$)
related_database_links:
name: related_database_links
description: Comma-separated list of related database links for the dataset.
Expand Down Expand Up @@ -3828,7 +3839,7 @@ classes:
required: true
inlined: true
inlined_as_list: true
pattern: (^FEI$)|(^TFS$)|(^JEOL$)
pattern: (^FEI$)|(^TFS$)|(^JEOL$)|(^SIMULATED$)
microscope_model:
name: microscope_model
description: Microscope model name
Expand Down Expand Up @@ -4333,7 +4344,7 @@ classes:
owner: TomogramAuthor
domain_of:
- AuthorMixin
range: string
range: boolean
inlined: true
inlined_as_list: true
primary_author_status:
Expand All @@ -4347,7 +4358,7 @@ classes:
owner: TomogramAuthor
domain_of:
- AuthorMixin
range: string
range: boolean
inlined: true
inlined_as_list: true
TomogramVoxelSpacing:
Expand Down Expand Up @@ -5036,7 +5047,7 @@ classes:
domain_of:
- DateStampedEntityMixin
- APIDateStampedEntityMixin
range: string
range: date
required: true
inlined: true
inlined_as_list: true
Expand All @@ -5051,7 +5062,7 @@ classes:
domain_of:
- DateStampedEntityMixin
- APIDateStampedEntityMixin
range: string
range: date
required: true
inlined: true
inlined_as_list: true
Expand All @@ -5067,7 +5078,7 @@ classes:
domain_of:
- DateStampedEntityMixin
- APIDateStampedEntityMixin
range: string
range: date
required: true
inlined: true
inlined_as_list: true
Expand Down Expand Up @@ -5103,7 +5114,7 @@ classes:
recommended: true
inlined: true
inlined_as_list: true
pattern: (^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8}))*$)|(^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|pdb[0-9a-zA-Z]{4,8}))*$)
pattern: (^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8}))*$)|(^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8}))*$)
related_database_links:
name: related_database_links
description: Comma-separated list of related database links for the dataset.
Expand Down Expand Up @@ -5212,7 +5223,7 @@ classes:
owner: AuthorMixin
domain_of:
- AuthorMixin
range: string
range: boolean
inlined: true
inlined_as_list: true
primary_author_status:
Expand All @@ -5226,7 +5237,7 @@ classes:
owner: AuthorMixin
domain_of:
- AuthorMixin
range: string
range: boolean
inlined: true
inlined_as_list: true
source_file: api/v2.0.0/api_models.yaml
Loading

0 comments on commit 57e99e0

Please sign in to comment.