schema_version |
character |
The version of the JSON schema that the server uses to validate the object. |
[optional] [Pattern: ^\d+(\.\d+)*$] |
uuid |
character |
The unique identifier associated with every object. |
[optional] |
notes |
character |
DACC internal notes. |
[optional] [Pattern: ^(\S+(\s |
aliases |
set[character] |
Lab specific identifiers to reference an object. |
[optional] |
creation_timestamp |
character |
The date the object was created. |
[optional] |
submitted_by |
character |
The user who submitted the object. |
[optional] |
submitter_comment |
character |
Additional information specified by the submitter to be displayed as a comment on the portal. |
[optional] [Pattern: ^(\S+(\s |
description |
character |
A plain text description of the object. |
[optional] [Pattern: ^(\S+(\s |
status |
character |
The status of the metadata object. |
[optional] [Enum: [in progress, preview, released, deleted, replaced, revoked, archived]] |
user |
character |
The user that is assigned to this access key. |
[optional] |
access_key_id |
character |
An access key. |
[optional] |
secret_access_key_hash |
character |
A secret access key. |
[optional] |
@id |
character |
|
[optional] |
@type |
array[character] |
|
[optional] |
summary |
character |
A summary of the object. |
[optional] |
release_timestamp |
character |
The date the object was released. |
[optional] |
lab |
character |
Lab associated with the submission. |
[optional] |
award |
character |
Grant associated with the submission. |
[optional] |
analysis_step_types |
set[character] |
The classification of the software. |
[optional] [Enum: ] |
step_label |
character |
Unique lowercased label of the analysis step that includes the relevant assays, the software used, and the purpose of the step, e.g. rampage-grit-peak-calling-step |
[optional] [Pattern: ^[a-z0-9-]+-step$] |
title |
character |
The full name of the user. |
[optional] |
workflow |
character |
The workflow used to produce this file. |
[optional] |
parents |
set[character] |
The precursor steps. |
[optional] |
input_content_types |
set[character] |
The content types used as input for the analysis step. |
[optional] [Enum: ] |
output_content_types |
set[character] |
The content types produced as output by the analysis step. |
[optional] [Enum: ] |
name |
character |
The preferred viewable name of the workflow. |
[optional] [Pattern: ^(\S+(\s |
analysis_step_versions |
set[character] |
The analysis step versions associated with this analysis step. |
[optional] |
analysis_step |
character |
The analysis step which this version belongs to. |
[optional] |
software_versions |
set[character] |
The software versions associated with this publication. |
[optional] |
url |
character |
An external resource with additional information. |
[optional] |
start_date |
character |
The date when the award begins. |
[optional] |
end_date |
character |
The date when the award concludes. |
[optional] |
pis |
set[character] |
Principal Investigator(s) of the grant. |
[optional] |
contact_pi |
character |
The contact Principal Investigator of the grant. |
[optional] |
project |
character |
The collection of biological data related to a single initiative, originating from a consortium. |
[optional] [Enum: [community, ENCODE, IGVF, IGVF affiliate]] |
viewing_group |
character |
The group that determines which set of data the user has permission to view. |
[optional] [Enum: [community, IGVF]] |
component |
character |
The project component the award is associated with. |
[optional] [Enum: [affiliate, data analysis, data coordination, functional characterization, mapping, networks, predictive modeling]] |
classification |
character |
Sample specific biomarker. |
[optional] [Enum: [cell surface protein, marker gene]] |
quantification |
character |
The biomarker association to the biosample, disease or other condition. This can be the absence of the biomarker or the presence of the biomarker in some low, intermediate or high quantity. |
[optional] [Enum: [negative, positive, low, intermediate, high]] |
synonyms |
set[character] |
Synonyms for the term that have been recorded in an ontology. |
[optional] |
gene |
character |
Biomarker gene. |
[optional] |
name_quantification |
character |
A concatenation of the name and quantification of the biomarker. |
[optional] |
biomarker_for |
set[character] |
The samples which have been confirmed to have this biomarker. |
[optional] |
attachment |
Attachment |
|
[optional] |
document_type |
character |
The category that best describes the document. |
[optional] [Enum: [cell fate change protocol, characterization, computational protocol, experimental protocol, file format specification, image, model source data, plate map, plasmid map, plasmid sequence, quality control report, standards]] |
characterization_method |
character |
The method used for the characterization. |
[optional] [Enum: [FACS, immunoblot, immunofluorescence, immunoprecipitation, mass spectrometry, PCR, restriction digest, RT-qPCR, sequencing]] |
urls |
set[character] |
Link to the institutional certification form. |
[optional] |
taxa |
character |
The species of the organism. |
[optional] [Enum: [Homo sapiens, Mus musculus]] |
publications |
set[character] |
The publications associated with this object. |
[optional] |
documents |
set[character] |
Documents that provide additional information (not data file). |
[optional] |
accession |
character |
A unique identifier to be used to reference the object prefixed with IGVF. |
[optional] |
alternate_accessions |
set[character] |
Accessions previously assigned to objects that have been merged with this object. |
[optional] |
collections |
set[character] |
Some samples are part of particular data collections. |
[optional] [Enum: ] |
revoke_detail |
character |
Explanation of why an object was transitioned to the revoked status. |
[optional] [Pattern: ^(\S+(\s |
dbxrefs |
set[character] |
Biosample identifiers from external resources, such as Biosample database or Cellosaurus. |
[optional] |
sex |
character |
|
[optional] [Enum: [female, male, mixed, unspecified]] |
phenotypic_features |
set[character] |
A list of associated phenotypic features of the donor. |
[optional] |
virtual |
character |
Virtual samples are not representing actual physical entities from experiments, but rather capturing metadata about hypothetical samples that the reported analysis results are relevant for. |
[optional] |
related_donors |
set[RelatedDonor] |
Familial relations of this donor. |
[optional] |
ethnicities |
set[character] |
Ethnicity of the donor. |
[optional] [Enum: ] |
human_donor_identifiers |
set[character] |
Identifiers of this human donor. |
[optional] |
sources |
set[character] |
The originating lab(s) or vendor(s). |
[optional] |
lot_id |
character |
The lot identifier provided by the originating lab or vendor. |
[optional] [Pattern: ^(\S+(\s |
product_id |
character |
The product identifier provided by the originating lab or vendor. |
[optional] [Pattern: ^(\S+(\s |
strain_background |
character |
The specific parent strain designation of a non-human donor. |
[optional] [Enum: [A/J (AJ), B6129S1F1/J, B6AF1/J, B6CASTF1/J, B6NODF1/J, B6NZOF1/J, B6PWKF1/J, B6WSBF1/J, C57BL/6J (B6), 129S1/SvImJ (129), NOD/ShiLtJ (NOD), NZO/H1LtJ (NZO), CAST/EiJ (CAST), PWK/PhJ (PWK), WSB/EiJ (WSB), CAST (M. m. castaneus), WSB (M. m. domesticus), PWK (M. m. musculus)]] |
strain |
character |
The specific strain designation of a non-human donor. |
[optional] |
genotype |
character |
The genotype of the strain according to accepted nomenclature conventions. |
[optional] |
individual_rodent |
character |
This rodent donor represents an individual rodent. |
[optional] |
rodent_identifier |
character |
The identifier for this individual rodent donor. |
[optional] |
controlled_access |
character |
Indicator of whether the samples are under controlled access. |
[optional] |
anvil_url |
character |
URL linking to the controlled access file that has been deposited at AnVIL workspace. |
[optional] |
transcriptome_annotation |
character |
The annotation and version of the reference resource. |
[optional] [Enum: [GENCODE 32, GENCODE 40, GENCODE 41, GENCODE 42, GENCODE 43, GENCODE 44, GENCODE 45, GENCODE 47, GENCODE Cast - M32, GENCODE M30, GENCODE M31, GENCODE M32, GENCODE M33, GENCODE M34, GENCODE M36]] |
assembly |
character |
Genome assembly applicable for the tabular data. |
[optional] [Enum: [Cast - GRCm39, GRCh38, GRCm39, custom]] |
reference_files |
set[character] |
Link to the reference files used to generate this file. |
[optional] |
filtered |
character |
Indicates whether the file has gone through some filtering step, for example, removal of PCR duplicates or filtering based on significance calling. |
[optional] |
analysis_step_version |
character |
The analysis step version of the file. |
[optional] |
content_md5sum |
character |
The MD5sum of the uncompressed file. |
[optional] [Pattern: [a-f\d]{32} |
content_type |
character |
The type of content in the file. |
[optional] |
derived_from |
set[character] |
The files participating as inputs into software to produce this output file. |
[optional] |
derived_manually |
character |
A boolean indicating whether the file has been dervided manually without automated computational methods. |
[optional] |
file_format |
character |
The file format or extension of the file. |
[optional] [Enum: [bed, csv, gtf, tsv, vcf]] |
file_format_specifications |
set[character] |
Documents that describe the file format and fields of this file. |
[optional] |
file_set |
character |
The file set that this file belongs to. |
[optional] |
file_size |
integer |
File size specified in bytes. |
[optional] [Min: 0] |
md5sum |
character |
The md5sum of the file being transferred. |
[optional] [Pattern: [a-f\d]{32} |
submitted_file_name |
character |
Original name of the file. |
[optional] |
upload_status |
character |
The upload/validation status of the file. |
[optional] [Enum: [pending, file not found, invalidated, validated, validation exempted]] |
validation_error_detail |
character |
Explanation of why the file failed the automated content checks. |
[optional] |
checkfiles_version |
character |
The Checkfiles GitHub version release the file was validated with. |
[optional] |
read_count |
integer |
Number of reads in a fastq file. |
[optional] [Min: 0] |
redacted |
character |
Indicates whether the alignments data have been sanitized (redacted) to prevent leakage of private and potentially identifying genomic information. |
[optional] |
integrated_in |
set[character] |
Construct library set(s) that this file was used for in insert design. |
[optional] |
input_file_for |
set[character] |
The files which are derived from this file. |
[optional] |
gene_list_for |
set[character] |
File Set(s) that this file is a gene list for. |
[optional] |
loci_list_for |
set[character] |
File Set(s) that this file is a loci list for. |
[optional] |
assay_titles |
set[character] |
Title(s) of assays that produced data analyzed in the analysis set. |
[optional] |
href |
character |
The download path to obtain file. |
[optional] |
s3_uri |
character |
The S3 URI of public file object. |
[optional] |
upload_credentials |
object |
The upload credentials for S3 to submit the file content. |
[optional] |
content_summary |
character |
A summary of the data in the signal file. |
[optional] |
seqspec_of |
set[character] |
Sequence files this file is a seqspec of. |
[optional] |
cell_type_annotation |
character |
The inferred cell type this file is associated with based on single-cell expression profiling. |
[optional] |
file_format_type |
character |
The subtype of bed files. |
[optional] [Enum: [bed12, bed3, bed3+, bed5, bed6, bed6+, bed9, bed9+, mpra_starr]] |
principal_dimension |
character |
The principal dimension of the matrix. |
[optional] [Enum: [cell, fragment, gene, time, treatment, variant, genomic position, spot barcode]] |
secondary_dimensions |
set[character] |
The secondary, tertiary....n levels of dimensions of the matrix. |
[optional] [Enum: ] |
externally_hosted |
character |
|
[optional] |
external_host_url |
character |
A link to the resource where the file is externally hosted. |
[optional] |
source_url |
character |
An external resource to the code base of the workflow in github. |
[optional] [Pattern: ^https?://github\.com/(\S+)$] |
external |
character |
Indicates whether the file was obtained from an external, non-IGVF source. |
[optional] |
flowcell_id |
character |
The alphanumeric identifier for the flowcell of a sequencing machine. |
[optional] [Pattern: ^[a-zA-Z0-9-]+$] |
lane |
integer |
An integer identifying the lane of a sequencing machine. |
[optional] [Min: 1] |
minimum_read_length |
integer |
For high-throughput sequencing, the minimum number of contiguous nucleotides determined by sequencing. |
[optional] [Max: 300000000] [Min: 0] |
maximum_read_length |
integer |
For high-throughput sequencing, the maximum number of contiguous nucleotides determined by sequencing. |
[optional] [Max: 300000000] [Min: 0] |
mean_read_length |
numeric |
For high-throughput sequencing, the mean number of contiguous nucleotides determined by sequencing. |
[optional] [Max: 300000000] [Min: 0] |
sequencing_platform |
character |
The measurement device used to produce sequencing data. |
[optional] |
sequencing_kit |
character |
A reagent kit used with a library to prepare it for sequencing. |
[optional] [Enum: [AVITI 2x75 Sequencing Kit Cloudbreak High Output, AVITI 2x150 Sequencing Kit Cloudbreak High Output, HiSeq SBS Kit v4, HiSeq SR Cluster Kit v4-cBot-HS, HiSeq PE Cluster Kit v4-cBot-HS, HiSeq SR Rapid Cluster Kit v2, HiSeq PE Rapid Cluster Kit v2, HiSeq Rapid SBS Kit v2, HiSeq 3000/4000 SBS Kit, HiSeq 3000/4000 SR Cluster Kit, HiSeq 3000/4000 PE Cluster Kit, MiSeq Reagent Kit v2, NextSeq 500 Mid Output Kit, NextSeq 500 High Output Kit, NextSeq 500 Mid Output v2 Kit, NextSeq 500 High Output v2 Kit, NextSeq 500/550 Mid-Output v2.5 Kit, NextSeq 500/550 High-Output v2.5 Kit, TG NextSeq 500/550 Mid-Output Kit v2.5, TG NextSeq 500/550 High-Output Kit v2.5, NextSeq 1000/2000 P1 Reagent Kit, NextSeq 1000/2000 P2 Reagent Kit, NextSeq 1000/2000 P3 Reagent Kit, NextSeq 1000/2000 P1 XLEAP-SBS Reagent Kit, NextSeq 1000/2000 P2 XLEAP-SBS Reagent Kit, NextSeq 2000 P3 XLEAP-SBS Reagent Kit, NextSeq 2000 P4 XLEAP-SBS Reagent Kit, NovaSeq 6000 SP Reagent Kit v1.5, NovaSeq 6000 S1 Reagent Kit v1.5, NovaSeq 6000 S2 Reagent Kit v1.5, NovaSeq 6000 S4 Reagent Kit v1.5, NovaSeq X Series 1.5B Reagent Kit, NovaSeq X Series 10B Reagent Kit, NovaSeq X Series 25B Reagent Kit, ONT Ligation Sequencing Kit V14, Sequel sequencing kit 3.0, Sequel II sequencing kit 2.0, Singular G4 F2 Reagent Kit]] |
sequencing_run |
integer |
An ordinal number indicating which sequencing run of the associated library that the file belongs to. |
[optional] [Min: 1] |
illumina_read_type |
character |
The read type of the file. Relevant only for files produced using an Illumina sequencing platform. |
[optional] [Enum: [R1, R2, R3, I1, I2]] |
index |
character |
An Illumina index associated with the file. |
[optional] |
base_modifications |
set[character] |
The chemical modifications to bases in a DNA sequence that are detected in this file. |
[optional] [Enum: ] |
read_names |
set[character] |
The read names of a sequence file based on how it will be used by uniform pipelines. |
[optional] [Enum: ] |
seqspecs |
set[character] |
Link(s) to the associated seqspec YAML configuration file(s). |
[optional] |
strand_specificity |
character |
The strandedness of the signal file: plus, minus, or unstranded. |
[optional] [Enum: [plus, minus, unstranded]] |
normalized |
character |
Indicates if the signal file is normalized. |
[optional] |
start_view_position |
character |
The 0-based coordinate for the default starting position when viewing the signal in a genome browser. |
[optional] [Pattern: ^(chr(X |
barcode_map_for |
set[character] |
Link(s) to the Multiplexed samples using this file as barcode map. |
[optional] |
input_file_sets |
set[character] |
The file set(s) required for this prediction set. |
[optional] |
control_type |
character |
The type of control this file set represents. |
[optional] |
samples |
set[character] |
The samples associated with this publication. |
[optional] |
donors |
set[character] |
Donor(s) the sample was derived from. |
[optional] |
file_set_type |
character |
The category that best describes this prediction set. |
[optional] [Enum: [activity level, binding effect, functional effect, pathogenicity, protein stability]] |
external_image_data_url |
character |
Links to the external site where images and related data produced by this analysis are stored. |
[optional] [Pattern: ^https://cellpainting-gallery\.s3\.amazonaws\.com(\S+)$] |
demultiplexed_sample |
character |
The sample associated with this analysis set inferred through demultiplexing. |
[optional] |
files |
set[character] |
The files associated with this file set. |
[optional] |
control_for |
set[character] |
The file sets for which this file set is a control. |
[optional] |
submitted_files_timestamp |
character |
The timestamp the first file object in the file_set or associated auxiliary sets was created. |
[optional] |
input_for |
set[character] |
The file sets that use this file set as an input. |
[optional] |
protocols |
set[character] |
Links to the protocol(s) for preparing the samples on Protocols.io. |
[optional] |
sample_summary |
character |
A summary of the samples associated with input file sets of this analysis set. |
[optional] |
functional_assay_mechanisms |
set[character] |
The biological processes measured by this functional assay. For example, a VAMP-seq (MultiSTEP) assay measures the effects of variants on protein carboxylation and secretion processes. |
[optional] |
workflows |
set[character] |
The workflows associated with this publication. |
[optional] |
barcode_map |
character |
The link to the barcode mapping tabular file. |
[optional] |
measurement_sets |
set[character] |
The measurement sets that link to this auxiliary set. |
[optional] |
control_file_sets |
set[character] |
File sets that can serve as scientific controls for this file set. |
[optional] |
small_scale_loci_list |
set[Locus1] |
A small scale (<=100) list of specific chromosomal region(s) whose functionality is investigated in this prediction set. This property describes the input variables of the prediction set. For example, this list consists of the genetic variants whose functionality is predicted in this prediction set. |
[optional] |
large_scale_loci_list |
character |
A large scale list (>100) of specific chromosomal regions whose functionality is investigated in this prediction set. This property describes the input variables of the prediction set. For example, this list consists of the genetic variants whose functionality is predicted in this prediction set. |
[optional] |
small_scale_gene_list |
set[character] |
The specific, small scale list of (<=100) gene(s) whose functionality is investigated in this prediction set. This property describes the input variables of the prediction set. For example, this list consists of the genes whose expression level is predicted in this prediction set. It differs from assessed_genes (see more information under assessed_genes). |
[optional] |
large_scale_gene_list |
character |
The large scale list of (>100 genes) whose functionality is investigated in this prediction set. This property describes the input variables of the prediction set. For example, this list consists of the genes whose expression level is predicted in this prediction set. It differs from assessed_genes (see more information under assessed_genes). |
[optional] |
scope |
character |
The scope or scale that this prediction set is designed to target. |
[optional] [Enum: [genes, loci, genome-wide]] |
selection_criteria |
set[character] |
The criteria used to select the sequence material cloned into the library. |
[optional] [Enum: ] |
integrated_content_files |
set[character] |
The files containing sequence material of interest either used for insert design or directly cloned into vectors in this library. |
[optional] |
associated_phenotypes |
set[character] |
Ontological terms for diseases or phenotypes associated with the sequence material cloned in this construct library. |
[optional] |
orf_list |
set[character] |
List of Open Reading Frame this construct library was designed to target. |
[optional] |
exon |
character |
An identifier in plain text for the specific exon in an expression vector library. The associated gene must be listed in the small_scale_gene_list property. |
[optional] [Pattern: ^(\S+(\s |
tile |
Tile |
|
[optional] |
guide_type |
character |
The design of guides used in a CRISPR library, paired-guide (pgRNA) or single-guide (sgRNA). |
[optional] [Enum: [sgRNA, pgRNA]] |
tiling_modality |
character |
The tiling modality of guides across elements or loci in a CRISPR library. |
[optional] [Enum: [peak tiling, full tiling, sparse peaks]] |
average_guide_coverage |
numeric |
The average number of guides targeting each element of interest in the library. |
[optional] [Min: 0] |
lower_bound_guide_coverage |
integer |
Lower bound of the number of guides targeting each element of interest in the library. |
[optional] |
upper_bound_guide_coverage |
integer |
Upper bound of the number of guides targeting each element of interest in the library. |
[optional] |
average_insert_size |
numeric |
The average size of the inserts cloned into vectors in the library. |
[optional] [Min: 0] |
lower_bound_insert_size |
integer |
Lower bound of the size of the inserts cloned in vectors in the library. |
[optional] |
upper_bound_insert_size |
integer |
Upper bound of the size of the inserts cloned in vectors in the library. |
[optional] |
targeton |
character |
An identifier in plain text for the specific targeton in an editing template library. The associated gene must be listed in the small_scale_gene_list property. |
[optional] [Pattern: ^(\S+(\s |
applied_to_samples |
set[character] |
The samples that link to this construct library set. |
[optional] |
assemblies |
set[character] |
The genome assemblies to which the referencing files in the file set are utilizing (e.g., GRCh38). |
[optional] |
transcriptome_annotations |
set[character] |
The annotation versions of the reference resource. |
[optional] |
assay_term |
character |
The assay used to produce data in this measurement set. |
[optional] |
preferred_assay_title |
character |
The custom lab preferred label for the experiment performed in this measurement set. |
[optional] [Enum: [RNA-seq, scRNA-seq, snRNA-seq, scNT-seq, scNT-seq2, Parse SPLiT-seq, ATAC-seq, varACCESS, scATAC-seq, snATAC-seq, scMito-seq, DOGMA-seq, 10x multiome, 10x multiome with MULTI-seq, MULTI-seq, SHARE-seq, Histone ChIP-seq, TF ChIP-seq, MPRA, MPRA (scQer), electroporated MPRA, AAV-MPRA, lentiMPRA, STARR-seq, SUPERSTARR, Cell painting, Variant painting via fluorescence, Variant painting via immunostaining, smFISH, MERFISH, Proliferation CRISPR screen, Growth CRISPR screen, Migration CRISPR screen, CRISPR FlowFISH screen, CRISPR FACS screen, CRISPR MACS screen, CRISPR mCherry screen, HCR-FlowFISH screen, scCRISPR screen, Perturb-seq, CERES-seq, TAP-seq, Variant-EFFECTS, SGE, MIAA, snmC-Seq2, snMCT-seq, snM3C-seq, mN2H, semi-qY2H, Y2H, yN2H, VAMP-seq, VAMP-seq (MultiSTEP), Hi-C, HiCAR, Spatial transcriptomics, HT-recruit, ONT dRNA, ONT Fiber-seq, ONT direct WGS, WGS]] |
multiome_size |
integer |
The number of datasets included in the multiome experiment this measurement set is a part of. |
[optional] [Min: 2] |
sequencing_library_types |
set[character] |
Description of the libraries sequenced in this measurement set. |
[optional] [Enum: ] |
auxiliary_sets |
set[character] |
The auxiliary sets of files produced alongside raw data from this measurement set. |
[optional] |
external_image_url |
character |
Links to the external site where images produced by this measurement are stored. |
[optional] [Pattern: ^https://cellpainting-gallery\.s3\.amazonaws\.com(\S+)$] |
targeted_genes |
set[character] |
A list of genes targeted in this assay. For example, TF ChIP-seq attempts to identify binding sites of a protein encoded by a specific gene. In CRISPR FlowFISH, the modified samples are sorted based on expression of a specific gene. This property differs from small_scale_gene_list in Construct Library Set, which describes genes targeted by the content integrated in the constructs (such as guide RNAs.) |
[optional] |
onlist_method |
character |
The method by which the onlist files will be combined by the seqspec onlist tool to generate the final barcode inclusion list for the single cell uniform pipeline. |
[optional] [Enum: [no combination, product, multi]] |
onlist_files |
set[character] |
The barcode region onlist files listed in associated seqspec yaml files. |
[optional] |
related_multiome_datasets |
set[character] |
Related datasets included in the multiome experiment this measurement set is a part of. |
[optional] |
model_name |
character |
The custom lab name given to this predictive model set. |
[optional] |
model_version |
character |
The semantic version number for this predictive model set. |
[optional] [Pattern: ^v(?!0\.0\.0$)[0-9]+\.[0-9]+\.[0-9]+$] |
prediction_objects |
set[character] |
The objects this predictive model set is targeting. |
[optional] [Enum: ] |
model_zoo_location |
character |
The link to the model on the Kipoi repository. |
[optional] [Pattern: ^https?://kipoi\.org/models/(\S+)$] |
assessed_genes |
set[character] |
A list of gene(s) assessed in this prediction set. This property is used to describe the gene(s) being investigated, especially how the input variables in the prediction set affect some critical functionality of the gene(s). For example, the effect could be predicted from genetic variants on the binding affinity of a transcription factor encoded by a gene (assessed_genes). It differs from small_scale_gene_list and large_scale_gene_list, as these are used when the input variables of the prediction set are genes. |
[optional] |
external_input_data |
character |
A tabular file with links to external data utilized for this model. |
[optional] |
geneid |
character |
ENSEMBL GeneID of official nomenclature approved gene. The GeneID does not include the current version number suffix. |
[optional] [Pattern: ^ENS[A-Z]*G\d{11}(_PAR_Y)?$] |
symbol |
character |
Gene symbol approved by the official nomenclature. |
[optional] |
locations |
set[GeneLocation1] |
Gene locations specified using 1-based, closed coordinates for different versions of reference genome assemblies. |
[optional] |
version_number |
character |
Current ENSEMBL GeneID version number of the gene. |
[optional] [Pattern: ^\d+?] |
geneid_with_version |
character |
The ENSEMBL GeneID concatenated with its version number. |
[optional] |
caption |
character |
The caption of the image. |
[optional] |
thumb_nail |
character |
Image url |
[optional] |
download_url |
character |
Download Url |
[optional] |
certificate_identifier |
character |
A unique identifier for the certificate. |
[optional] [Pattern: ^IP\d{3}-\d{2}$] |
data_use_limitation |
character |
Code indicating the limitations on data use for data generated from the applicable samples. GRU (General research use): Use of the data is limited only by the terms of the Data Use Certification: these data will be added to the dbGaP Collection. HMB (Health/medical/biomedical): Use of the data is limited to health/medical/biomedical purposes, does not include the study of population origins or ancestry. DS (Disease specific): Use of the data must be related to the specified disease. Other: any other customized limitation. |
[optional] [Enum: [DS, GRU, HMB, other]] |
data_use_limitation_modifiers |
set[character] |
Code indicating a modifier on the limitations on data use for data generated from the applicable samples. COL: Requestor must provide a letter of collaboration with the primary study investigator(s). GSO: Use of the data is limited to genetic studies only. IRB: Approval Required IRB Requestor must provide documentation of local IRB approval. MDS: Use of the data includes methods development research (e.g., development and testing of software or algorithms). NPU: Use of the data is limited to not-for-profit organizations. PUB: Requestor agrees to make results of studies using the data available to the larger scientific community. |
[optional] [Enum: ] |
pi |
character |
Principle Investigator of the lab. |
[optional] |
awards |
set[character] |
Grants associated with the lab. |
[optional] |
institute_label |
character |
An abbreviation for the institute the lab is associated with. |
[optional] [Pattern: ^(\S+(\s |
activated |
character |
A boolean indicating whether the modification has been activated by a chemical agent. |
[optional] |
activating_agent_term_id |
character |
The CHEBI identifier for the activating agent of the modification. |
[optional] [Pattern: ^CHEBI:[0-9]{1,7}$] |
activating_agent_term_name |
character |
The CHEBI name for the activating agent of the modification. |
[optional] |
modality |
character |
The purpose or intended effect of a modification. |
[optional] [Enum: [degradation]] |
tagged_proteins |
set[character] |
The tagged proteins which are targeted for degradation. |
[optional] |
cas |
character |
The name of the CRISPR associated protein used in the modification. |
[optional] [Enum: [Cas9, Cas12a, Cas13, dCas9, nCas9, SpG, SpRY]] |
fused_domain |
character |
The name of the molecule fused to a Cas protein. |
[optional] [Enum: [2xVP64, 3xVP64, ABE8e, ABE8.20, ANTI-FLAG, BE4, BE4max, eA3A, eA3A-T31A, eA3A-T44D-S45A, KOX1-KRAB, M-MLV RT (PE2), p300, TdCBE, TdCGBE, TdDE, VPH, VP64, VP64-p65-Rta (VPR), ZIM3-KRAB]] |
cas_species |
character |
The originating species of the Cas nuclease. |
[optional] [Enum: [Streptococcus pyogenes (Sp), Staphylococcus aureus (Sa), Campylobacter jejuni (Cj), Neisseria meningitidis (Nm)]] |
biosamples_modified |
set[character] |
The biosamples which have been modified with this modification. |
[optional] |
degron_system |
character |
The type of degron system implemented. |
[optional] [Enum: [AID, AlissAid, ssAID]] |
term_id |
character |
An ontology identifier describing a biological sample |
[optional] [Pattern: ^(UBERON |
term_name |
character |
Ontology term describing a biological sample, assay, trait, or disease. |
[optional] [Pattern: ^(?![\s"'])[\S |
deprecated_ntr_terms |
set[character] |
A list of deprecated NTR terms previously associated with this ontology term. |
[optional] |
is_a |
set[character] |
A list of ontology terms which are the nearest ancestor to this ontology term. |
[optional] |
preferred_assay_titles |
set[character] |
The custom lab preferred labels that this assay term may be associated with. |
[optional] [Enum: ] |
ancestors |
set[character] |
List of term names of ontological terms that precede the given term in the ontological tree. These ancestor terms are typically more general ontological terms under which the term is classified. |
[optional] |
ontology |
character |
The ontology in which the term is recorded. |
[optional] |
assay_slims |
set[character] |
A broad categorization of the assay term. |
[optional] |
category_slims |
set[character] |
The type of feature or interaction measured by the assay. |
[optional] |
objective_slims |
set[character] |
The purpose of the assay. |
[optional] |
company |
character |
The company that developed and sells the instrument. |
[optional] [Enum: [10X Genomics, Element Biosciences, Illumina, Life Technologies, Oxford Nanopore Technologies, Pacific Biosciences, Parse Biosciences, Roche, Singular Genomics]] |
sequencing_kits |
set[character] |
The available sequencing kits for this platform. |
[optional] [Enum: ] |
organ_slims |
set[character] |
Organs associated with the sample term. |
[optional] |
cell_slims |
set[character] |
Cells associated with the sample term. |
[optional] |
developmental_slims |
set[character] |
Developmental stages associated with the sample term. |
[optional] |
system_slims |
set[character] |
Organ systems associated with the sample term. |
[optional] |
orf_id |
character |
Open reading frame ID. |
[optional] [Pattern: ^CCSBORF[1-9][0-9]*$] |
genes |
set[character] |
ENSEMBL GeneIDs of official nomenclature approved genes. The GeneIDs do not include the current version number suffix. |
[optional] |
protein_id |
character |
ENSEMBL ProteinID of official nomenclature approved protein. The ProteinID does not include the current version number suffix. |
[optional] [Pattern: ^ENSP\d{11}.?\d*?$] |
pct_identical_protein |
numeric |
The percentage of identical matches to Ensembl protein. |
[optional] [Max: 100] [Min: 0] |
pct_coverage_protein |
numeric |
The percentage of ORF covered by Ensembl protein. |
[optional] [Max: 100] [Min: 0] |
pct_coverage_orf |
numeric |
The percentage of Ensembl protein covered by ORF. |
[optional] [Max: 100] [Min: 0] |
parent |
character |
The parent page associated with this page. |
[optional] |
layout |
PageLayout |
|
[optional] |
canonical_uri |
character |
The path of the page. |
[optional] |
feature |
character |
The phenotypic feature observed for the donor. |
[optional] |
quantity |
numeric |
A quantity associated with the phenotypic feature, if applicable. |
[optional] |
quantity_units |
character |
The unit of measurement for a quantity associated with the phenotypic feature. |
[optional] [Enum: [meter, micromole, nanogram, microgram, milligram, gram, kilogram, milli-International Unit per milliliter, picogram per milliliter, nanogram per milliliter, milligram per deciliter]] |
quality |
character |
A quality assessment associated with the phenotypic feature, such as a categorical description. |
[optional] [Enum: [none, sparse, moderate, frequent, unknown, I, II, III, IV, V, VI, 2/2, 2/3, 2/4, 3/3, 3/4, 4/4]] |
observation_date |
character |
The date the feature was observed or measured. |
[optional] |
abstract |
character |
Abstract of the publication or communication. |
[optional] |
authors |
character |
The authors of the publication. |
[optional] |
date_published |
character |
The date the publication or communication was published; must be in YYYY-MM-DD format. |
[optional] |
date_revised |
character |
The date the publication was revised. |
[optional] |
issue |
character |
The issue of the publication. |
[optional] |
page |
character |
Pagination of the reference |
[optional] |
volume |
character |
The volume of the publication. |
[optional] |
journal |
character |
The journal of the publication. |
[optional] |
publication_identifiers |
set[character] |
The publication identifiers associated with this publication object. |
[optional] |
published_by |
set[character] |
The affiliation of the lab with a larger organization, such as IGVF. |
[optional] [Enum: ] |
publication_year |
integer |
The year the publication was published. |
[optional] |
file_sets |
set[character] |
The file sets linked to this sample. |
[optional] |
software |
character |
Unique name of the software package. |
[optional] |
lower_bound_age |
numeric |
Lower bound of age of the organism at the time of collection of the sample. |
[optional] |
upper_bound_age |
numeric |
Upper bound of age of the organism at the time of collection of the sample. |
[optional] |
age_units |
character |
The units of time associated with age of the biosample. |
[optional] [Enum: [minute, hour, day, week, month, year]] |
sample_terms |
set[character] |
Ontology terms identifying a biosample. |
[optional] |
disease_terms |
set[character] |
Ontology term of the disease associated with the biosample. |
[optional] |
pooled_from |
set[character] |
The biosamples this biosample is pooled from. |
[optional] |
part_of |
character |
Links to a biosample which represents a larger sample from which this sample was taken regardless of whether it is a tissue taken from an organism or smaller slices of a piece of tissue or aliquots of a cell growth. |
[optional] |
originated_from |
character |
Links to a biosample that was originated from due to differentiation, dedifferentiation, reprogramming, or the introduction of a genetic modification. |
[optional] |
treatments |
set[character] |
A list of treatments applied to the biosample with the purpose of perturbation. |
[optional] |
biomarkers |
set[character] |
Biological markers that are associated with this sample. |
[optional] |
embryonic |
character |
Biosample is embryonic. |
[optional] |
modifications |
set[character] |
Links to modifications applied to this biosample. |
[optional] |
cellular_sub_pool |
character |
Cellular sub-pool fraction of the sample. Also known as PKR and sub-library. |
[optional] [Pattern: ^[a-zA-Z\d_.()-]+(?:\s[a-zA-Z\d_.()-]+)*$] |
starting_amount |
numeric |
The initial quantity of samples obtained. |
[optional] |
starting_amount_units |
character |
The units used to quantify the amount of samples obtained. |
[optional] [Enum: [cells, cells/ml, g, items, mg, whole animals, whole embryos, μg, ng]] |
date_obtained |
character |
The date the sample was harvested, dissected or created, depending on the type of the sample. |
[optional] |
sorted_from |
character |
Links to a larger sample from which this sample was obtained through sorting. |
[optional] |
sorted_from_detail |
character |
Detail for sample sorted into fractions capturing information about sorting. |
[optional] |
construct_library_sets |
set[character] |
The construct library sets of vectors introduced to this sample prior to performing an assay. |
[optional] |
moi |
numeric |
The actual multiplicity of infection (MOI) for vectors introduced to this sample. At least one construct library set must be specified in order to specify MOI. This property should capture the actual MOI, and not the targeted MOI. |
[optional] [Min: 0] |
nucleic_acid_delivery |
character |
Method of introduction of nucleic acid into the cell. |
[optional] [Enum: [transfection, adenoviral transduction, lentiviral transduction]] |
time_post_library_delivery |
numeric |
The time that elapsed past the time-point when the construct library sets were introduced. |
[optional] |
time_post_library_delivery_units |
character |
The units of time that elapsed past the point when the construct library sets were introduced. |
[optional] [Enum: [minute, hour, day, week, month]] |
classifications |
set[character] |
The general category of this type of sample. |
[optional] |
time_post_change |
numeric |
The time that elapsed past the time-point when the cell fate change treatments were introduced. |
[optional] |
time_post_change_units |
character |
The units of time that elapsed past the point when the cell fate change treatments were introduced. |
[optional] [Enum: [minute, hour, day, week, month]] |
cell_fate_change_treatments |
set[character] |
A list of treatments applied to the biosample with the purpose of differentiation, dedifferentiation, or reprogramming. |
[optional] |
cell_fate_change_protocol |
character |
A protocol applied to the biosample with the purpose of differentiation, dedifferentiation, or reprogramming. |
[optional] |
demultiplexed_from |
character |
The biosample this in vitro system sample was demultiplexed from using computational methods. |
[optional] |
passage_number |
integer |
Number of passages including the passages from the source. |
[optional] [Min: 0] |
targeted_sample_term |
character |
Ontology term identifying the targeted endpoint biosample resulting from differentation or reprogramming. |
[optional] |
growth_medium |
character |
A growth medium of the in vitro system. |
[optional] [Enum: [DMEM with serum, DMEM without serum, SMBM with serum, SMBM without serum]] |
biosample_qualifiers |
set[character] |
An array of various cell states. This property provides additional information about a cell at a finer-grained level compared to what ontologies currently capture. For example, exhausted T-cells. |
[optional] [Enum: ] |
multiplexed_in |
set[character] |
The multiplexed samples in which this sample is included. |
[optional] |
sorted_fractions |
set[character] |
The fractions into which this sample has been sorted. |
[optional] |
origin_of |
set[character] |
The samples which originate from this sample, such as through a process of cell differentiation. |
[optional] |
institutional_certificates |
set[character] |
The institutional certificates under which use of this sample is approved. |
[optional] |
age |
character |
Age of organism at the time of collection of the sample. |
[optional] [Pattern: ^((\d+(\.[1-9])?(\-\d+(\.[1-9])?)?) |
upper_bound_age_in_hours |
numeric |
Upper bound of age of organism in hours at the time of collection of the sample. |
[optional] |
lower_bound_age_in_hours |
numeric |
Lower bound of age of organism in hours at the time of collection of the sample . |
[optional] |
parts |
set[character] |
The parts into which this sample has been divided. |
[optional] |
pooled_in |
set[character] |
The pooled samples in which this sample is included. |
[optional] |
demultiplexed_to |
set[character] |
The parts into which this sample has been demultiplexed. |
[optional] |
multiplexed_samples |
set[character] |
The samples multiplexed together to produce this sample. |
[optional] |
multiplexing_methods |
set[character] |
The methods used for multiplexing and demultiplexing. |
[optional] [Enum: ] |
sample_material |
character |
|
[optional] [Enum: [undefined, inorganic, synthetic, organic]] |
pmi |
integer |
The amount of time elapsed since death. |
[optional] [Min: 1] |
pmi_units |
character |
The unit in which the PMI time was reported. |
[optional] [Enum: [second, minute, hour, day, week]] |
ccf_id |
character |
HubMap Common Coordinate Framework unique identifier corresponding to the organ, biological structure, and spatial location of the tissue specimen. |
[optional] |
preservation_method |
character |
The method by which the tissue was preserved: cryopreservation (slow-freeze) or flash-freezing. |
[optional] [Enum: [cryopreservation, flash-freezing]] |
used_by |
set[character] |
The component(s) of the IGVF consortium that utilize this software. |
[optional] [Enum: ] |
versions |
set[character] |
A list of versions that have been released for this software. |
[optional] |
version |
character |
The version of a particular software. |
[optional] [Pattern: ^v(?!0\.0\.0$)([0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+)? |
download_id |
character |
The MD5 checksum, SHA-1 commit ID, image hash, or similar permanent identifier of the particular version of software used. |
[optional] |
amount |
numeric |
Specific quantity of the applied treatment (used in conjunction with amount_units). |
[optional] |
amount_units |
character |
A unit for an amount other than those for time or temperature. |
[optional] [Enum: [mg/kg, mg/mL, mM, ng/mL, nM, percent, μg/kg, μg/kg, μg/mL, μM, kpa]] |
duration |
numeric |
Duration indicates the time elapsed between the start and end of the treatment. |
[optional] |
duration_units |
character |
A unit of time. |
[optional] [Enum: [second, minute, hour, day]] |
pH |
numeric |
Final pH of the solution containing a chemical compound (if applicable) |
[optional] |
purpose |
character |
The intended purpose for treating the samples. |
[optional] [Enum: [activation, agonist, antagonist, control, differentiation, de-differentiation, perturbation, selection, stimulation]] |
post_treatment_time |
numeric |
Post treatment time in conjunction with post treatment time units is used to specify the time that has passed between the point when biosamples were removed from the treatment solution before being sampled or treated with the next treatment. |
[optional] |
post_treatment_time_units |
character |
A unit of time. |
[optional] [Enum: [minute, hour, day, week, month]] |
temperature |
numeric |
The temperature in Celsius to which the sample was exposed |
[optional] |
temperature_units |
character |
A unit of temperature. |
[optional] [Enum: [Celsius]] |
treatment_type |
character |
The classification of treatment agent that specifies its exact molecular nature. |
[optional] [Enum: [chemical, protein, environmental]] |
treatment_term_id |
character |
Ontology identifier describing a component in the treatment. |
[optional] [Pattern: ^((CHEBI:[0-9]{1,7}) |
treatment_term_name |
character |
Ontology term describing a component in the treatment that is the principal component affecting the biosample being treated. Examples: interferon gamma, interleukin-4, Fibroblast growth factor 2, 20-hydroxyecdysone, 5-bromouridine etc. |
[optional] |
depletion |
character |
Treatment is depleted. |
[optional] |
biosamples_treated |
set[character] |
The samples which have been treated using this treatment. |
[optional] |
email |
character |
The email associated with the user's account. |
[optional] [Pattern: ^[^A-Z\\s@]+@[^A-Z\\s@]+\.[^A-Z\\s@]+$] |
first_name |
character |
The user's first (given) name. |
[optional] |
last_name |
character |
The user's last (family) name. |
[optional] |
submits_for |
set[character] |
Labs user is authorized to submit data for. |
[optional] |
groups |
set[character] |
Additional access control groups |
[optional] [Enum: ] |
viewing_groups |
set[character] |
The group that determines which set of data the user has permission to view. |
[optional] [Enum: ] |
job_title |
character |
The role of the user in their lab or organization. |
[optional] [Enum: [Principal Investigator, Co-Investigator, Project Manager, Submitter, Post Doc, Data Wrangler, Scientist, Computational Scientist, Software Developer, NHGRI staff member, Other]] |
workflow_repositories |
set[character] |
Resources hosting the workflow. |
[optional] |
standards_page |
character |
A link to a page describing the standards for this workflow. |
[optional] |
workflow_version |
integer |
The version of this workflow. |
[optional] [Min: 1] |
uniform_pipeline |
character |
Indicates whether the pipeline is developed by the IGVF consortium. |
[optional] |
analysis_steps |
set[character] |
The analysis steps which are part of this workflow. |
[optional] |