Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create database migration to add WGS entities #109

Merged
merged 2 commits into from
Nov 9, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
316 changes: 316 additions & 0 deletions entities/database_migrations/versions/20231109_103318_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,316 @@
"""

Create Date: 2023-11-09 18:33:19.405053

"""
import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "20231109_103318"
down_revision = "20230929_110616"
branch_labels = None
depends_on = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"entity",
sa.Column("id", sa.UUID(), nullable=False),
sa.Column("type", sa.String(), nullable=False),
sa.Column("producing_run_id", sa.Integer(), nullable=True),
sa.Column("owner_user_id", sa.Integer(), nullable=False),
sa.Column("collection_id", sa.Integer(), nullable=False),
sa.PrimaryKeyConstraint("id", name=op.f("pk_entity")),
)
op.create_table(
"file",
sa.Column("id", sa.UUID(), nullable=False),
sa.Column("entity_id", sa.UUID(), nullable=True),
sa.Column("entity_field_name", sa.String(), nullable=False),
sa.Column("status", sa.Enum("SUCCESS", "FAILED", "PENDING", name="filestatus"), nullable=False),
sa.Column("protocol", sa.String(), nullable=False),
sa.Column("namespace", sa.String(), nullable=False),
sa.Column("path", sa.String(), nullable=False),
sa.Column("file_format", sa.String(), nullable=False),
sa.Column("compression_type", sa.String(), nullable=True),
sa.Column("size", sa.Integer(), nullable=True),
sa.ForeignKeyConstraint(["entity_id"], ["entity.id"], name=op.f("fk_file_entity_id_entity")),
sa.PrimaryKeyConstraint("id", name=op.f("pk_file")),
)
op.create_table(
"metadata_field",
sa.Column("field_name", sa.String(), nullable=False),
sa.Column("description", sa.String(), nullable=False),
sa.Column("field_type", sa.String(), nullable=False),
sa.Column("is_required", sa.Boolean(), nullable=False),
sa.Column("options", sa.String(), nullable=True),
sa.Column("default_value", sa.String(), nullable=True),
sa.Column("entity_id", sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(["entity_id"], ["entity.id"], name=op.f("fk_metadata_field_entity_id_entity")),
sa.PrimaryKeyConstraint("entity_id", name=op.f("pk_metadata_field")),
)
op.create_table(
"upstream_database",
sa.Column("name", sa.String(), nullable=False),
sa.Column("entity_id", sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(["entity_id"], ["entity.id"], name=op.f("fk_upstream_database_entity_id_entity")),
sa.PrimaryKeyConstraint("entity_id", name=op.f("pk_upstream_database")),
)
op.create_table(
"coverage_viz",
sa.Column("accession_id", sa.String(), nullable=False),
sa.Column("coverage_viz_file_id", sa.UUID(), nullable=False),
sa.Column("entity_id", sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(
["coverage_viz_file_id"], ["file.id"], name=op.f("fk_coverage_viz_coverage_viz_file_id_file")
),
sa.ForeignKeyConstraint(["entity_id"], ["entity.id"], name=op.f("fk_coverage_viz_entity_id_entity")),
sa.PrimaryKeyConstraint("entity_id", name=op.f("pk_coverage_viz")),
)
op.create_table(
"metadata_field_project",
sa.Column("project_id", sa.Integer(), nullable=False),
sa.Column("metadata_field_id", sa.UUID(), nullable=False),
sa.Column("entity_id", sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(["entity_id"], ["entity.id"], name=op.f("fk_metadata_field_project_entity_id_entity")),
sa.ForeignKeyConstraint(
["metadata_field_id"],
["metadata_field.entity_id"],
name=op.f("fk_metadata_field_project_metadata_field_id_metadata_field"),
),
sa.PrimaryKeyConstraint("entity_id", name=op.f("pk_metadata_field_project")),
)
op.create_table(
"taxon",
sa.Column("wikipedia_id", sa.String(), nullable=True),
sa.Column("description", sa.String(), nullable=True),
sa.Column("common_name", sa.String(), nullable=True),
sa.Column("name", sa.String(), nullable=False),
sa.Column("is_phage", sa.Boolean(), nullable=False),
sa.Column("upstream_database_id", sa.UUID(), nullable=False),
sa.Column("upstream_database_identifier", sa.String(), nullable=False),
sa.Column("level", sa.Enum("species", "genus", "family", name="taxonlevel", native_enum=False), nullable=False),
sa.Column("tax_id", sa.Integer(), nullable=False),
sa.Column("tax_id_parent", sa.Integer(), nullable=False),
sa.Column("tax_id_species", sa.Integer(), nullable=False),
sa.Column("tax_id_genus", sa.Integer(), nullable=False),
sa.Column("tax_id_family", sa.Integer(), nullable=False),
sa.Column("tax_id_order", sa.Integer(), nullable=False),
sa.Column("tax_id_class", sa.Integer(), nullable=False),
sa.Column("tax_id_phylum", sa.Integer(), nullable=False),
sa.Column("tax_id_kingdom", sa.Integer(), nullable=False),
sa.Column("entity_id", sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(["entity_id"], ["entity.id"], name=op.f("fk_taxon_entity_id_entity")),
sa.ForeignKeyConstraint(
["upstream_database_id"],
["upstream_database.entity_id"],
name=op.f("fk_taxon_upstream_database_id_upstream_database"),
),
sa.PrimaryKeyConstraint("entity_id", name=op.f("pk_taxon")),
)
op.create_table(
"reference_genome",
sa.Column("file_id", sa.UUID(), nullable=False),
sa.Column("file_index_id", sa.UUID(), nullable=True),
sa.Column("name", sa.String(), nullable=False),
sa.Column("description", sa.String(), nullable=False),
sa.Column("taxon_id", sa.UUID(), nullable=False),
sa.Column("accession_id", sa.String(), nullable=True),
sa.Column("entity_id", sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(["entity_id"], ["entity.id"], name=op.f("fk_reference_genome_entity_id_entity")),
sa.ForeignKeyConstraint(["file_id"], ["file.id"], name=op.f("fk_reference_genome_file_id_file")),
sa.ForeignKeyConstraint(["file_index_id"], ["file.id"], name=op.f("fk_reference_genome_file_index_id_file")),
sa.ForeignKeyConstraint(["taxon_id"], ["taxon.entity_id"], name=op.f("fk_reference_genome_taxon_id_taxon")),
sa.PrimaryKeyConstraint("entity_id", name=op.f("pk_reference_genome")),
)
op.create_table(
"sample",
sa.Column("name", sa.String(), nullable=False),
sa.Column("sample_type", sa.String(), nullable=False),
sa.Column("water_control", sa.Boolean(), nullable=False),
sa.Column("collection_date", sa.DateTime(), nullable=True),
sa.Column("collection_location", sa.String(), nullable=False),
sa.Column("description", sa.String(), nullable=True),
sa.Column("host_taxon_id", sa.UUID(), nullable=True),
sa.Column("entity_id", sa.UUID(), nullable=True),
sa.ForeignKeyConstraint(["entity_id"], ["entity.id"], name=op.f("fk_sample_entity_id_entity")),
sa.ForeignKeyConstraint(["host_taxon_id"], ["taxon.entity_id"], name=op.f("fk_sample_host_taxon_id_taxon")),
sa.PrimaryKeyConstraint("entity_id", name=op.f("pk_sample")),
)
op.create_table(
"genomic_range",
sa.Column("reference_genome_id", sa.UUID(), nullable=False),
sa.Column("file_id", sa.UUID(), nullable=False),
sa.Column("entity_id", sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(["entity_id"], ["entity.id"], name=op.f("fk_genomic_range_entity_id_entity")),
sa.ForeignKeyConstraint(["file_id"], ["file.id"], name=op.f("fk_genomic_range_file_id_file")),
sa.ForeignKeyConstraint(
["reference_genome_id"],
["reference_genome.entity_id"],
name=op.f("fk_genomic_range_reference_genome_id_reference_genome"),
),
sa.PrimaryKeyConstraint("entity_id", name=op.f("pk_genomic_range")),
)
op.create_table(
"metadatum",
sa.Column("sample_id", sa.UUID(), nullable=False),
sa.Column("metadata_field_id", sa.UUID(), nullable=False),
sa.Column("value", sa.String(), nullable=False),
sa.Column("entity_id", sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(["entity_id"], ["entity.id"], name=op.f("fk_metadatum_entity_id_entity")),
sa.ForeignKeyConstraint(
["metadata_field_id"],
["metadata_field.entity_id"],
name=op.f("fk_metadatum_metadata_field_id_metadata_field"),
),
sa.ForeignKeyConstraint(["sample_id"], ["sample.entity_id"], name=op.f("fk_metadatum_sample_id_sample")),
sa.PrimaryKeyConstraint("entity_id", name=op.f("pk_metadatum")),
)
op.create_table(
"sequence_alignment_index",
sa.Column("index_file_id", sa.UUID(), nullable=False),
sa.Column("reference_genome_id", sa.UUID(), nullable=False),
sa.Column(
"tool", sa.Enum("bowtie2", "minimap2", "ncbi", name="alignmenttool", native_enum=False), nullable=False
),
sa.Column("entity_id", sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(
["entity_id"], ["entity.id"], name=op.f("fk_sequence_alignment_index_entity_id_entity")
),
sa.ForeignKeyConstraint(
["index_file_id"], ["file.id"], name=op.f("fk_sequence_alignment_index_index_file_id_file")
),
sa.ForeignKeyConstraint(
["reference_genome_id"],
["reference_genome.entity_id"],
name=op.f("fk_sequence_alignment_index_reference_genome_id_reference_genome"),
),
sa.PrimaryKeyConstraint("entity_id", name=op.f("pk_sequence_alignment_index")),
)
op.create_table(
"sequencing_read",
sa.Column("sample_id", sa.UUID(), nullable=True),
sa.Column(
"protocol",
sa.Enum("MNGS", "TARGETED", "MSSPE", name="sequencingprotocol", native_enum=False),
nullable=False,
),
sa.Column("r1_file_id", sa.UUID(), nullable=False),
sa.Column("r2_file_id", sa.UUID(), nullable=True),
sa.Column(
"techonology",
sa.Enum("Illumina", "Nanopore", name="sequencingtechnology", native_enum=False),
nullable=False,
),
sa.Column("nucleic_acid", sa.Enum("RNA", "DNA", name="nucleicacid", native_enum=False), nullable=False),
sa.Column("has_ercc", sa.Boolean(), nullable=False),
sa.Column("taxon_id", sa.UUID(), nullable=True),
sa.Column("primer_file_id", sa.UUID(), nullable=True),
sa.Column("entity_id", sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(["entity_id"], ["entity.id"], name=op.f("fk_sequencing_read_entity_id_entity")),
sa.ForeignKeyConstraint(["primer_file_id"], ["file.id"], name=op.f("fk_sequencing_read_primer_file_id_file")),
sa.ForeignKeyConstraint(["r1_file_id"], ["file.id"], name=op.f("fk_sequencing_read_r1_file_id_file")),
sa.ForeignKeyConstraint(["r2_file_id"], ["file.id"], name=op.f("fk_sequencing_read_r2_file_id_file")),
sa.ForeignKeyConstraint(["sample_id"], ["sample.entity_id"], name=op.f("fk_sequencing_read_sample_id_sample")),
sa.ForeignKeyConstraint(["taxon_id"], ["taxon.entity_id"], name=op.f("fk_sequencing_read_taxon_id_taxon")),
sa.PrimaryKeyConstraint("entity_id", name=op.f("pk_sequencing_read")),
)
op.create_table(
"consensus_genome",
sa.Column("taxon_id", sa.UUID(), nullable=False),
sa.Column("sequence_read_id", sa.UUID(), nullable=False),
sa.Column("genomic_range_id", sa.UUID(), nullable=False),
sa.Column("reference_genome_id", sa.UUID(), nullable=False),
sa.Column("sequence_id", sa.UUID(), nullable=False),
sa.Column("is_reverse_complement", sa.Boolean(), nullable=False),
sa.Column("intermediate_outputs_id", sa.UUID(), nullable=True),
sa.Column("entity_id", sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(["entity_id"], ["entity.id"], name=op.f("fk_consensus_genome_entity_id_entity")),
sa.ForeignKeyConstraint(
["genomic_range_id"],
["genomic_range.entity_id"],
name=op.f("fk_consensus_genome_genomic_range_id_genomic_range"),
),
sa.ForeignKeyConstraint(
["intermediate_outputs_id"], ["file.id"], name=op.f("fk_consensus_genome_intermediate_outputs_id_file")
),
sa.ForeignKeyConstraint(
["reference_genome_id"],
["reference_genome.entity_id"],
name=op.f("fk_consensus_genome_reference_genome_id_reference_genome"),
),
sa.ForeignKeyConstraint(["sequence_id"], ["file.id"], name=op.f("fk_consensus_genome_sequence_id_file")),
sa.ForeignKeyConstraint(
["sequence_read_id"],
["sequencing_read.entity_id"],
name=op.f("fk_consensus_genome_sequence_read_id_sequencing_read"),
),
sa.ForeignKeyConstraint(["taxon_id"], ["taxon.entity_id"], name=op.f("fk_consensus_genome_taxon_id_taxon")),
sa.PrimaryKeyConstraint("entity_id", name=op.f("pk_consensus_genome")),
)
op.create_table(
"contig",
sa.Column("sequencing_read_id", sa.UUID(), nullable=True),
sa.Column("sequence", sa.String(), nullable=False),
sa.Column("entity_id", sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(["entity_id"], ["entity.id"], name=op.f("fk_contig_entity_id_entity")),
sa.ForeignKeyConstraint(
["sequencing_read_id"],
["sequencing_read.entity_id"],
name=op.f("fk_contig_sequencing_read_id_sequencing_read"),
),
sa.PrimaryKeyConstraint("entity_id", name=op.f("pk_contig")),
)
op.create_table(
"metric_consensus_genome",
sa.Column("consensus_genome_id", sa.UUID(), nullable=False),
sa.Column("coverage_depth", sa.Float(), nullable=True),
sa.Column("reference_genome_length", sa.Float(), nullable=True),
sa.Column("percent_genome_called", sa.Float(), nullable=True),
sa.Column("percent_identity", sa.Float(), nullable=True),
sa.Column("gc_percent", sa.Float(), nullable=True),
sa.Column("total_reads", sa.Integer(), nullable=True),
sa.Column("mapped_reads", sa.Integer(), nullable=True),
sa.Column("ref_snps", sa.Integer(), nullable=True),
sa.Column("n_actg", sa.Integer(), nullable=True),
sa.Column("n_missing", sa.Integer(), nullable=True),
sa.Column("n_ambiguous", sa.Integer(), nullable=True),
sa.Column("coverage_viz_summary_file_id", sa.UUID(), nullable=False),
sa.Column("entity_id", sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(
["consensus_genome_id"],
["consensus_genome.entity_id"],
name=op.f("fk_metric_consensus_genome_consensus_genome_id_consensus_genome"),
),
sa.ForeignKeyConstraint(
["coverage_viz_summary_file_id"],
["file.id"],
name=op.f("fk_metric_consensus_genome_coverage_viz_summary_file_id_file"),
),
sa.ForeignKeyConstraint(["entity_id"], ["entity.id"], name=op.f("fk_metric_consensus_genome_entity_id_entity")),
sa.PrimaryKeyConstraint("entity_id", name=op.f("pk_metric_consensus_genome")),
)
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("metric_consensus_genome")
op.drop_table("contig")
op.drop_table("consensus_genome")
op.drop_table("sequencing_read")
op.drop_table("sequence_alignment_index")
op.drop_table("metadatum")
op.drop_table("genomic_range")
op.drop_table("sample")
op.drop_table("reference_genome")
op.drop_table("taxon")
op.drop_table("metadata_field_project")
op.drop_table("coverage_viz")
op.drop_table("upstream_database")
op.drop_table("metadata_field")
op.drop_table("file")
op.drop_table("entity")
# ### end Alembic commands ###
Loading