diff --git a/generated/jsonschema/allianceModel.schema.json b/generated/jsonschema/allianceModel.schema.json index f3984a5b5..df4580371 100644 --- a/generated/jsonschema/allianceModel.schema.json +++ b/generated/jsonschema/allianceModel.schema.json @@ -13620,7 +13620,7 @@ "type": "boolean" }, "note_type": { - "description": "The type of note: e.g., cytology, comment, summary. Permissible values for 'note_type' currently = disease_summary, disease_note", + "description": "The type of note: e.g., cytology, comment, summary. Permissible values for 'note_type' currently = disease_summary, disease_note, molecular_mutation -- please see the VocabularyTerm class for the full list.", "type": "string" }, "obsolete": { @@ -16575,7 +16575,7 @@ "type": "string" }, "source_variant_locations": { - "description": "Location of the variant within genomic entities,as described in the source references.", + "description": "Location of the variant within genomic entities, as described in the source references.", "items": { "$ref": "#/$defs/SourceVariantLocation" }, @@ -16590,7 +16590,7 @@ "type": "string" }, "variant_genome_locations": { - "description": "Location of the variant in a genomic context. multiple variant_genome_locations represent multiple assemblies.", + "description": "Location of the variant in a genomic context. multiple variant_genome_locations represent multiple assemblies. because we want the variant to stay consistent between assemblies, we must have a single variant object apart from its location.", "items": { "$ref": "#/$defs/VariantGenomeLocation" }, @@ -16621,7 +16621,6 @@ }, "required": [ "variant_type", - "variant_genome_locations", "curie", "taxon", "data_provider", @@ -16751,6 +16750,14 @@ "format": "date-time", "type": "string" }, + "deleted_sequence": { + "description": "The sequence of DNA base pairs removed from the reference sequence by the variant.", + "type": "string" + }, + "dna_mutation_type": { + "description": "The type of DNA mutation, for example, insertion, deletion, substitution, or indel.", + "type": "string" + }, "end_position": { "description": "End position of variant on genomic entity.", "type": "integer" @@ -16758,18 +16765,50 @@ "evidence_code": { "type": "string" }, + "exon_number": { + "description": "The exon number of the variant location. If the variant is in an intron, this slot is not populated. Primarily used for zebrafish.", + "type": "integer" + }, + "gene_localization_type": { + "description": "The type of gene localization, for example, intronic, intergenic, or intragenic.", + "type": "string" + }, + "inserted_sequence": { + "description": "The sequence of DNA base pairs inserted into the reference sequence by the variant.", + "type": "string" + }, "internal": { "description": "Classifies the entity as private (for internal use) or not (for public use).", "type": "boolean" }, + "intron_number": { + "description": "The intron number of the variant location. If the variant is in an exon, this slot is not populated. Primarily used for zebrafish.", + "type": "integer" + }, + "number_additional_dna_base_pairs": { + "description": "The number of additional DNA base pairs inserted into the reference sequence by the variant.", + "type": "integer" + }, + "number_removed_dna_base_pairs": { + "description": "The number of DNA base pairs removed from the reference sequence by the variant. sequence.", + "type": "integer" + }, "obsolete": { "description": "Entity is no longer current.", "type": "boolean" }, + "padded_base": { + "description": "", + "type": "string" + }, "reference_sequence": { "description": "Reference sequence of genome or genomic entity at position of aligned variant.", "type": "string" }, + "sequence_of_reference_accession_number": { + "description": "The accession number that describes the assembly and chromosome of the reference sequence.", + "type": "string" + }, "single_reference": { "description": "holds between an object and a single reference", "type": "string" @@ -16785,6 +16824,10 @@ "variant_sequence": { "description": "Sequence that differs from the reference sequence of genome or genomic entity at position of variant, as specified by curator.", "type": "string" + }, + "variation_strand": { + "description": "The strand on which the variant is located. This is the strand of the reference sequence, not the variant sequence.", + "type": "string" } }, "required": [ @@ -16793,6 +16836,84 @@ "title": "VariantGenomeLocation", "type": "object" }, + "VariantLocation": { + "additionalProperties": false, + "description": "Base class linking a variant to a position on a genomic entity and the resulting consequence to the sequence and/or function of that genomic entity. Slots are provided for data taken from a source publication or data load and for data resulting from manual curation. Where the values are the same, the curator has confirmed the information from the source. In other cases, the curator's analysis has resulted in different values, for instance, if the assembly is different, the source did not specify the transcript or protein isoform, the definition of the transcript or protein isoform used by the source has changed, or if there was an error in the source data.", + "properties": { + "consequence": { + "description": "SOTerm (child of SO:0001576 - transcript_variant) that describes the consequence of the variant, as stated in the source reference. In practice source consequence will be associated with locations at any or all of VariantGenomeLocation, VariantTranscriptLocation, and VariantPolypeptideLocation.", + "type": "string" + }, + "created_by": { + "description": "The individual that created the entity.", + "type": "string" + }, + "date_created": { + "description": "The date on which an entity was created. This can be applied to nodes or edges.", + "format": "date-time", + "type": "string" + }, + "date_updated": { + "description": "Date on which an entity was last modified.", + "format": "date-time", + "type": "string" + }, + "db_date_created": { + "description": "The date on which an entity was created in the Alliance database. This is disinct from date_created, which represents the date when the entity was originally created (i.e. at the MOD for imported data).", + "format": "date-time", + "type": "string" + }, + "db_date_updated": { + "description": "Date on which an entity was last modified in the Alliance database. This is disinct from date_updated, which represents the date when the entity was last modified and may predate import into the Alliance database.", + "format": "date-time", + "type": "string" + }, + "end_position": { + "description": "End position of variant on genomic entity.", + "type": "integer" + }, + "evidence_code": { + "type": "string" + }, + "internal": { + "description": "Classifies the entity as private (for internal use) or not (for public use).", + "type": "boolean" + }, + "obsolete": { + "description": "Entity is no longer current.", + "type": "boolean" + }, + "reference_sequence": { + "description": "Reference sequence of genome or genomic entity at position of aligned variant.", + "type": "string" + }, + "sequence_of_reference_accession_number": { + "description": "The accession number that describes the assembly and chromosome of the reference sequence.", + "type": "string" + }, + "single_reference": { + "description": "holds between an object and a single reference", + "type": "string" + }, + "start_position": { + "description": "Start position of variant on genomic entity.", + "type": "integer" + }, + "updated_by": { + "description": "The individual that last modified the entity.", + "type": "string" + }, + "variant_sequence": { + "description": "Sequence that differs from the reference sequence of genome or genomic entity at position of variant, as specified by curator.", + "type": "string" + } + }, + "required": [ + "internal" + ], + "title": "VariantLocation", + "type": "object" + }, "VariantPolypeptideLocation": { "additionalProperties": false, "description": "Links a variant to a position on a specified polypeptide and the resulting consequence to the sequence and/or function of that polypeptide.", @@ -16843,6 +16964,14 @@ "description": "Classifies the entity as private (for internal use) or not (for public use).", "type": "boolean" }, + "number_amino_acids_inserted": { + "description": "The number of amino acids added from the polypeptide as a result of the variant.", + "type": "integer" + }, + "number_amino_acids_removed": { + "description": "The number of amino acids removed from the polypeptide as a result of the variant.", + "type": "integer" + }, "obsolete": { "description": "Entity is no longer current.", "type": "boolean" @@ -16855,6 +16984,10 @@ "description": "Reference sequence of genome or genomic entity at position of aligned variant.", "type": "string" }, + "sequence_of_reference_accession_number": { + "description": "The accession number that describes the assembly and chromosome of the reference sequence.", + "type": "string" + }, "single_reference": { "description": "holds between an object and a single reference", "type": "string" @@ -17056,10 +17189,18 @@ "evidence_code": { "type": "string" }, + "exon_number": { + "description": "The exon number of the variant location. If the variant is in an intron, this slot is not populated. Primarily used for zebrafish.", + "type": "integer" + }, "internal": { "description": "Classifies the entity as private (for internal use) or not (for public use).", "type": "boolean" }, + "intron_number": { + "description": "The intron number of the variant location. If the variant is in an exon, this slot is not populated. Primarily used for zebrafish.", + "type": "integer" + }, "obsolete": { "description": "Entity is no longer current.", "type": "boolean" @@ -17068,6 +17209,10 @@ "description": "Reference sequence of genome or genomic entity at position of aligned variant.", "type": "string" }, + "sequence_of_reference_accession_number": { + "description": "The accession number that describes the assembly and chromosome of the reference sequence.", + "type": "string" + }, "single_reference": { "description": "holds between an object and a single reference", "type": "string" diff --git a/model/schema/core.yaml b/model/schema/core.yaml index 077caeab0..634ad5b5e 100644 --- a/model/schema/core.yaml +++ b/model/schema/core.yaml @@ -540,7 +540,8 @@ slots: note_type: description: >- - The type of note: e.g., cytology, comment, summary. Permissible values for 'note_type' currently = disease_summary, disease_note + The type of note: e.g., cytology, comment, summary. Permissible values for 'note_type' currently = disease_summary, disease_note, + molecular_mutation -- please see the VocabularyTerm class for the full list. range: VocabularyTerm note_type_name: diff --git a/model/schema/variation.yaml b/model/schema/variation.yaml index 1976a3a7e..ec5a49cf3 100644 --- a/model/schema/variation.yaml +++ b/model/schema/variation.yaml @@ -60,16 +60,8 @@ classes: - variant_status exact_mappings: - SO:0001059 - rules: - - postconditions: - any_of: - - slot_conditions: - variant_genome_locations: - required: true - - slot_conditions: - variant_status: - equals_string: private - description: a variant must have a genome location, or must be private + notes: >- + a variant must have a genome location, or must be private/incomplete SourceVariantLocation: is_a: AuditedObject @@ -84,7 +76,6 @@ classes: VariantLocation: is_a: AuditedObject - abstract: true description: >- Base class linking a variant to a position on a genomic entity and the resulting consequence to the sequence and/or function of that genomic entity. Slots are provided for data taken from a source publication or data @@ -101,9 +92,7 @@ classes: - reference_sequence - variant_sequence - consequence - slot_usage: - evidence_code: - required: false + - sequence_of_reference_accession_number VariantGenomeLocation: is_a: VariantLocation @@ -114,6 +103,16 @@ classes: slots: - assembly - chromosome + - exon_number + - intron_number + - variation_strand + - number_additional_dna_base_pairs + - number_removed_dna_base_pairs + - inserted_sequence + - deleted_sequence + - padded_base + - dna_mutation_type + - gene_localization_type VariantTranscriptLocation: is_a: VariantLocation @@ -122,6 +121,9 @@ classes: function of that transcript. slots: - transcript + - exon_number + - intron_number + VariantPolypeptideLocation: is_a: VariantLocation @@ -131,11 +133,125 @@ classes: slots: - polypeptide - associated_transcripts + - number_amino_acids_removed + - number_amino_acids_inserted + # Slots slots: + number_amino_acids_removed: + description: >- + The number of amino acids removed from the polypeptide as a result of the variant. + range: integer + required: false + multivalued: false + examples: + - value: 1 + - value: 2 + - value: 3 + exact_mappings: + - SO:0001889 + + number_amino_acids_inserted: + description: >- + The number of amino acids added from the polypeptide as a result of the variant. + range: integer + required: false + multivalued: false + examples: + - value: 1 + - value: 2 + - value: 3 + exact_mappings: + - SO:0001889 + + dna_mutation_type: + description: >- + The type of DNA mutation, for example, insertion, deletion, substitution, or indel. + range: uriorcurie + examples: + - value: SO:1000027 # G_to_T_transversion + + gene_localization_type: + description: >- + The type of gene localization, for example, intronic, intergenic, or intragenic. + range: uriorcurie + examples: + - value: SO:0000147 # exon + + padded_base: + description: >- + range: string + multivalued: false + examples: + - value: 'A' + + variation_strand: + description: >- + The strand on which the variant is located. This is the strand of the reference sequence, not the variant + sequence. + range: string + multivalued: false + required: false + examples: + - value: '+' + - value: '-' + + number_additional_dna_base_pairs: + description: >- + The number of additional DNA base pairs inserted into the reference sequence by the variant. + range: integer + multivalued: false + required: false + examples: + - value: 1 + - value: 2 + + number_removed_dna_base_pairs: + description: >- + The number of DNA base pairs removed from the reference sequence by the variant. + sequence. + range: integer + multivalued: false + required: false + examples: + - value: 1 + - value: 2 + + inserted_sequence: + description: >- + The sequence of DNA base pairs inserted into the reference sequence by the variant. + range: string + multivalued: false + required: false + examples: + - value: 'A' + - value: 'AT' + + deleted_sequence: + description: >- + The sequence of DNA base pairs removed from the reference sequence by the variant. + range: string + multivalued: false + required: false + examples: + - value: 'A' + - value: 'AT' + + exon_number: + range: integer + description: >- + The exon number of the variant location. If the variant is in an intron, this slot is not populated. + Primarily used for zebrafish. + + intron_number: + range: integer + description: >- + The intron number of the variant location. If the variant is in an exon, this slot is not populated. + Primarily used for zebrafish. + variant_curie: range: string @@ -154,6 +270,7 @@ slots: range: SOTerm source_general_consequence: + aliases: ['transcript_consequence'] description: >- SOTerm (child of SO:0001576 - transcript_variant) that describes the consequence of the variant, as stated in the source reference when no @@ -165,6 +282,13 @@ slots: domain: Variant range: SOTerm + sequence_of_reference_accession_number: + description: >- + The accession number that describes the assembly and chromosome of the reference sequence. + multivalued: false + domain: VariantLocation + range: string + consequence: description: >- SOTerm (child of SO:0001576 - transcript_variant) that describes the @@ -194,13 +318,15 @@ slots: multivalued: true domain: Variant range: VariantLocation + inlined: true + inlined_as_list: true variant_genome_locations: is_a: variant_locations description: >- Location of the variant in a genomic context. multiple variant_genome_locations represent multiple - assemblies. - required: true + assemblies. because we want the variant to stay consistent between assemblies, we must have a single + variant object apart from its location. multivalued: true domain: Variant range: VariantGenomeLocation @@ -215,6 +341,8 @@ slots: multivalued: true domain: Variant range: VariantPolypeptideLocation + inlined: true + inlined_as_list: true variant_transcript_locations: is_a: variant_locations @@ -224,15 +352,19 @@ slots: multivalued: true domain: Variant range: VariantTranscriptLocation + inlined: true + inlined_as_list: true source_variant_locations: description: >- - Location of the variant within genomic entities,as described in the source + Location of the variant within genomic entities, as described in the source references. required: false multivalued: true domain: Variant range: SourceVariantLocation + inlined: true + inlined_as_list: true hgvs: description: >- diff --git a/test/data/variant_test.json b/test/data/variant_test.json index 725fb66e9..fb0375df8 100644 --- a/test/data/variant_test.json +++ b/test/data/variant_test.json @@ -7,6 +7,17 @@ "created_by": "ZFIN", "updated_by": "ZFIN", "internal": false, + "variant_transcript_locations": [ + { + "single_reference": "ZFIN:ZDB-PUB-000412-1", + "reference_sequence": "C", + "variant_sequence": "T", + "internal": false, + "sequence_of_reference_accession_number": "NC_006088.4", + "transcript": "ZFIN:ZDB-TSCRIPT-000412-2", + "exon_number": 3 + } + ], "data_provider": { "source_organization": { "abbreviation" : "ZFIN", @@ -16,19 +27,58 @@ }, "internal" : false }, - "variant_genome_locations": [ +"variant_genome_locations": [ { + "single_reference": "ZFIN:ZDB-PUB-000412-1", "assembly": "GRCz11", "chromosome": "7", "start_position": 30341642, "end_position": 30341642, "reference_sequence": "C", "variant_sequence": "T", - "internal": false + "internal": false, + "sequence_of_reference_accession_number": "NC_006088.4" + } + ], + "variant_polypeptide_locations": [ + { + "single_reference": "ZFIN:ZDB-PUB-000412-1", + "internal": false, + "associated_transcripts": [ + "ZFIN:ZDB-TSCRIPT-000412-1" + ], + "polypeptide": "UNIPROT:Q9Y6X7", + "number_amino_acids_removed": 3 + } + ], + "source_variant_locations": [ + { + "single_reference": "ZFIN:ZDB-PUB-000412-1", + "internal": false, + "variant_locations": [ + { + "single_reference": "ZFIN:ZDB-PUB-000412-1", + "start_position": 30341642, + "end_position": 30341642, + "reference_sequence": "C", + "variant_sequence": "T", + "internal": false, + "sequence_of_reference_accession_number": "NC_006088.4" + } + ] } ], "variant_status": "public", "variant_type": "SO:1000008" + }, + { + "curie": "ZFIN:ZDB-ALT-000412-9", + "taxon": "NCBITaxon:7955", + "created_by": "ZFIN", + "updated_by": "ZFIN", + "internal": false, + "variant_status": "private", + "variant_type": "SO:1000008" } ] }