Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create database migration to add WGS entities #109

Merged
merged 2 commits into from
Nov 9, 2023
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
238 changes: 238 additions & 0 deletions entities/database_migrations/versions/20231109_103318_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
"""

Create Date: 2023-11-09 18:33:19.405053

"""
import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = '20231109_103318'
down_revision = '20230929_110616'
branch_labels = None
depends_on = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('entity',
sa.Column('id', sa.UUID(), nullable=False),
sa.Column('type', sa.String(), nullable=False),
sa.Column('producing_run_id', sa.Integer(), nullable=True),
sa.Column('owner_user_id', sa.Integer(), nullable=False),
sa.Column('collection_id', sa.Integer(), nullable=False),
sa.PrimaryKeyConstraint('id', name=op.f('pk_entity'))
)
op.create_table('file',
sa.Column('id', sa.UUID(), nullable=False),
sa.Column('entity_id', sa.UUID(), nullable=True),
sa.Column('entity_field_name', sa.String(), nullable=False),
sa.Column('status', sa.Enum('SUCCESS', 'FAILED', 'PENDING', name='filestatus'), nullable=False),
sa.Column('protocol', sa.String(), nullable=False),
sa.Column('namespace', sa.String(), nullable=False),
sa.Column('path', sa.String(), nullable=False),
sa.Column('file_format', sa.String(), nullable=False),
sa.Column('compression_type', sa.String(), nullable=True),
sa.Column('size', sa.Integer(), nullable=True),
sa.ForeignKeyConstraint(['entity_id'], ['entity.id'], name=op.f('fk_file_entity_id_entity')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_file'))
)
op.create_table('metadata_field',
sa.Column('field_name', sa.String(), nullable=False),
sa.Column('description', sa.String(), nullable=False),
sa.Column('field_type', sa.String(), nullable=False),
sa.Column('is_required', sa.Boolean(), nullable=False),
sa.Column('options', sa.String(), nullable=True),
sa.Column('default_value', sa.String(), nullable=True),
sa.Column('entity_id', sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(['entity_id'], ['entity.id'], name=op.f('fk_metadata_field_entity_id_entity')),
sa.PrimaryKeyConstraint('entity_id', name=op.f('pk_metadata_field'))
)
op.create_table('upstream_database',
sa.Column('name', sa.String(), nullable=False),
sa.Column('entity_id', sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(['entity_id'], ['entity.id'], name=op.f('fk_upstream_database_entity_id_entity')),
sa.PrimaryKeyConstraint('entity_id', name=op.f('pk_upstream_database'))
)
op.create_table('coverage_viz',
sa.Column('accession_id', sa.String(), nullable=False),
sa.Column('coverage_viz_file_id', sa.UUID(), nullable=False),
sa.Column('entity_id', sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(['coverage_viz_file_id'], ['file.id'], name=op.f('fk_coverage_viz_coverage_viz_file_id_file')),
sa.ForeignKeyConstraint(['entity_id'], ['entity.id'], name=op.f('fk_coverage_viz_entity_id_entity')),
sa.PrimaryKeyConstraint('entity_id', name=op.f('pk_coverage_viz'))
)
op.create_table('metadata_field_project',
sa.Column('project_id', sa.Integer(), nullable=False),
sa.Column('metadata_field_id', sa.UUID(), nullable=False),
sa.Column('entity_id', sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(['entity_id'], ['entity.id'], name=op.f('fk_metadata_field_project_entity_id_entity')),
sa.ForeignKeyConstraint(['metadata_field_id'], ['metadata_field.entity_id'], name=op.f('fk_metadata_field_project_metadata_field_id_metadata_field')),
sa.PrimaryKeyConstraint('entity_id', name=op.f('pk_metadata_field_project'))
)
op.create_table('taxon',
sa.Column('wikipedia_id', sa.String(), nullable=True),
sa.Column('description', sa.String(), nullable=True),
sa.Column('common_name', sa.String(), nullable=True),
sa.Column('name', sa.String(), nullable=False),
sa.Column('is_phage', sa.Boolean(), nullable=False),
sa.Column('upstream_database_id', sa.UUID(), nullable=False),
sa.Column('upstream_database_identifier', sa.String(), nullable=False),
sa.Column('level', sa.Enum('species', 'genus', 'family', name='taxonlevel', native_enum=False), nullable=False),
sa.Column('tax_id', sa.Integer(), nullable=False),
sa.Column('tax_id_parent', sa.Integer(), nullable=False),
sa.Column('tax_id_species', sa.Integer(), nullable=False),
sa.Column('tax_id_genus', sa.Integer(), nullable=False),
sa.Column('tax_id_family', sa.Integer(), nullable=False),
sa.Column('tax_id_order', sa.Integer(), nullable=False),
sa.Column('tax_id_class', sa.Integer(), nullable=False),
sa.Column('tax_id_phylum', sa.Integer(), nullable=False),
sa.Column('tax_id_kingdom', sa.Integer(), nullable=False),
sa.Column('entity_id', sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(['entity_id'], ['entity.id'], name=op.f('fk_taxon_entity_id_entity')),
sa.ForeignKeyConstraint(['upstream_database_id'], ['upstream_database.entity_id'], name=op.f('fk_taxon_upstream_database_id_upstream_database')),
sa.PrimaryKeyConstraint('entity_id', name=op.f('pk_taxon'))
)
op.create_table('reference_genome',
sa.Column('file_id', sa.UUID(), nullable=False),
sa.Column('file_index_id', sa.UUID(), nullable=True),
sa.Column('name', sa.String(), nullable=False),
sa.Column('description', sa.String(), nullable=False),
sa.Column('taxon_id', sa.UUID(), nullable=False),
sa.Column('accession_id', sa.String(), nullable=True),
sa.Column('entity_id', sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(['entity_id'], ['entity.id'], name=op.f('fk_reference_genome_entity_id_entity')),
sa.ForeignKeyConstraint(['file_id'], ['file.id'], name=op.f('fk_reference_genome_file_id_file')),
sa.ForeignKeyConstraint(['file_index_id'], ['file.id'], name=op.f('fk_reference_genome_file_index_id_file')),
sa.ForeignKeyConstraint(['taxon_id'], ['taxon.entity_id'], name=op.f('fk_reference_genome_taxon_id_taxon')),
sa.PrimaryKeyConstraint('entity_id', name=op.f('pk_reference_genome'))
)
op.create_table('sample',
sa.Column('name', sa.String(), nullable=False),
sa.Column('sample_type', sa.String(), nullable=False),
sa.Column('water_control', sa.Boolean(), nullable=False),
sa.Column('collection_date', sa.DateTime(), nullable=True),
sa.Column('collection_location', sa.String(), nullable=False),
sa.Column('description', sa.String(), nullable=True),
sa.Column('host_taxon_id', sa.UUID(), nullable=True),
sa.Column('entity_id', sa.UUID(), nullable=True),
sa.ForeignKeyConstraint(['entity_id'], ['entity.id'], name=op.f('fk_sample_entity_id_entity')),
sa.ForeignKeyConstraint(['host_taxon_id'], ['taxon.entity_id'], name=op.f('fk_sample_host_taxon_id_taxon')),
sa.PrimaryKeyConstraint('entity_id', name=op.f('pk_sample'))
)
op.create_table('genomic_range',
sa.Column('reference_genome_id', sa.UUID(), nullable=False),
sa.Column('file_id', sa.UUID(), nullable=False),
sa.Column('entity_id', sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(['entity_id'], ['entity.id'], name=op.f('fk_genomic_range_entity_id_entity')),
sa.ForeignKeyConstraint(['file_id'], ['file.id'], name=op.f('fk_genomic_range_file_id_file')),
sa.ForeignKeyConstraint(['reference_genome_id'], ['reference_genome.entity_id'], name=op.f('fk_genomic_range_reference_genome_id_reference_genome')),
sa.PrimaryKeyConstraint('entity_id', name=op.f('pk_genomic_range'))
)
op.create_table('metadatum',
sa.Column('sample_id', sa.UUID(), nullable=False),
sa.Column('metadata_field_id', sa.UUID(), nullable=False),
sa.Column('value', sa.String(), nullable=False),
sa.Column('entity_id', sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(['entity_id'], ['entity.id'], name=op.f('fk_metadatum_entity_id_entity')),
sa.ForeignKeyConstraint(['metadata_field_id'], ['metadata_field.entity_id'], name=op.f('fk_metadatum_metadata_field_id_metadata_field')),
sa.ForeignKeyConstraint(['sample_id'], ['sample.entity_id'], name=op.f('fk_metadatum_sample_id_sample')),
sa.PrimaryKeyConstraint('entity_id', name=op.f('pk_metadatum'))
)
op.create_table('sequence_alignment_index',
sa.Column('index_file_id', sa.UUID(), nullable=False),
sa.Column('reference_genome_id', sa.UUID(), nullable=False),
sa.Column('tool', sa.Enum('bowtie2', 'minimap2', 'ncbi', name='alignmenttool', native_enum=False), nullable=False),
sa.Column('entity_id', sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(['entity_id'], ['entity.id'], name=op.f('fk_sequence_alignment_index_entity_id_entity')),
sa.ForeignKeyConstraint(['index_file_id'], ['file.id'], name=op.f('fk_sequence_alignment_index_index_file_id_file')),
sa.ForeignKeyConstraint(['reference_genome_id'], ['reference_genome.entity_id'], name=op.f('fk_sequence_alignment_index_reference_genome_id_reference_genome')),
sa.PrimaryKeyConstraint('entity_id', name=op.f('pk_sequence_alignment_index'))
)
op.create_table('sequencing_read',
sa.Column('sample_id', sa.UUID(), nullable=True),
sa.Column('protocol', sa.Enum('MNGS', 'TARGETED', 'MSSPE', name='sequencingprotocol', native_enum=False), nullable=False),
sa.Column('r1_file_id', sa.UUID(), nullable=False),
sa.Column('r2_file_id', sa.UUID(), nullable=True),
sa.Column('techonology', sa.Enum('Illumina', 'Nanopore', name='sequencingtechnology', native_enum=False), nullable=False),
sa.Column('nucleic_acid', sa.Enum('RNA', 'DNA', name='nucleicacid', native_enum=False), nullable=False),
sa.Column('has_ercc', sa.Boolean(), nullable=False),
sa.Column('taxon_id', sa.UUID(), nullable=True),
sa.Column('primer_file_id', sa.UUID(), nullable=True),
sa.Column('entity_id', sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(['entity_id'], ['entity.id'], name=op.f('fk_sequencing_read_entity_id_entity')),
sa.ForeignKeyConstraint(['primer_file_id'], ['file.id'], name=op.f('fk_sequencing_read_primer_file_id_file')),
sa.ForeignKeyConstraint(['r1_file_id'], ['file.id'], name=op.f('fk_sequencing_read_r1_file_id_file')),
sa.ForeignKeyConstraint(['r2_file_id'], ['file.id'], name=op.f('fk_sequencing_read_r2_file_id_file')),
sa.ForeignKeyConstraint(['sample_id'], ['sample.entity_id'], name=op.f('fk_sequencing_read_sample_id_sample')),
sa.ForeignKeyConstraint(['taxon_id'], ['taxon.entity_id'], name=op.f('fk_sequencing_read_taxon_id_taxon')),
sa.PrimaryKeyConstraint('entity_id', name=op.f('pk_sequencing_read'))
)
op.create_table('consensus_genome',
sa.Column('taxon_id', sa.UUID(), nullable=False),
sa.Column('sequence_read_id', sa.UUID(), nullable=False),
sa.Column('genomic_range_id', sa.UUID(), nullable=False),
sa.Column('reference_genome_id', sa.UUID(), nullable=False),
sa.Column('sequence_id', sa.UUID(), nullable=False),
sa.Column('is_reverse_complement', sa.Boolean(), nullable=False),
sa.Column('intermediate_outputs_id', sa.UUID(), nullable=True),
sa.Column('entity_id', sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(['entity_id'], ['entity.id'], name=op.f('fk_consensus_genome_entity_id_entity')),
sa.ForeignKeyConstraint(['genomic_range_id'], ['genomic_range.entity_id'], name=op.f('fk_consensus_genome_genomic_range_id_genomic_range')),
sa.ForeignKeyConstraint(['intermediate_outputs_id'], ['file.id'], name=op.f('fk_consensus_genome_intermediate_outputs_id_file')),
sa.ForeignKeyConstraint(['reference_genome_id'], ['reference_genome.entity_id'], name=op.f('fk_consensus_genome_reference_genome_id_reference_genome')),
sa.ForeignKeyConstraint(['sequence_id'], ['file.id'], name=op.f('fk_consensus_genome_sequence_id_file')),
sa.ForeignKeyConstraint(['sequence_read_id'], ['sequencing_read.entity_id'], name=op.f('fk_consensus_genome_sequence_read_id_sequencing_read')),
sa.ForeignKeyConstraint(['taxon_id'], ['taxon.entity_id'], name=op.f('fk_consensus_genome_taxon_id_taxon')),
sa.PrimaryKeyConstraint('entity_id', name=op.f('pk_consensus_genome'))
)
op.create_table('contig',
sa.Column('sequencing_read_id', sa.UUID(), nullable=True),
sa.Column('sequence', sa.String(), nullable=False),
sa.Column('entity_id', sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(['entity_id'], ['entity.id'], name=op.f('fk_contig_entity_id_entity')),
sa.ForeignKeyConstraint(['sequencing_read_id'], ['sequencing_read.entity_id'], name=op.f('fk_contig_sequencing_read_id_sequencing_read')),
sa.PrimaryKeyConstraint('entity_id', name=op.f('pk_contig'))
)
op.create_table('metric_consensus_genome',
sa.Column('consensus_genome_id', sa.UUID(), nullable=False),
sa.Column('coverage_depth', sa.Float(), nullable=True),
sa.Column('reference_genome_length', sa.Float(), nullable=True),
sa.Column('percent_genome_called', sa.Float(), nullable=True),
sa.Column('percent_identity', sa.Float(), nullable=True),
sa.Column('gc_percent', sa.Float(), nullable=True),
sa.Column('total_reads', sa.Integer(), nullable=True),
sa.Column('mapped_reads', sa.Integer(), nullable=True),
sa.Column('ref_snps', sa.Integer(), nullable=True),
sa.Column('n_actg', sa.Integer(), nullable=True),
sa.Column('n_missing', sa.Integer(), nullable=True),
sa.Column('n_ambiguous', sa.Integer(), nullable=True),
sa.Column('coverage_viz_summary_file_id', sa.UUID(), nullable=False),
sa.Column('entity_id', sa.UUID(), nullable=False),
sa.ForeignKeyConstraint(['consensus_genome_id'], ['consensus_genome.entity_id'], name=op.f('fk_metric_consensus_genome_consensus_genome_id_consensus_genome')),
sa.ForeignKeyConstraint(['coverage_viz_summary_file_id'], ['file.id'], name=op.f('fk_metric_consensus_genome_coverage_viz_summary_file_id_file')),
sa.ForeignKeyConstraint(['entity_id'], ['entity.id'], name=op.f('fk_metric_consensus_genome_entity_id_entity')),
sa.PrimaryKeyConstraint('entity_id', name=op.f('pk_metric_consensus_genome'))
)
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('metric_consensus_genome')
op.drop_table('contig')
op.drop_table('consensus_genome')
op.drop_table('sequencing_read')
op.drop_table('sequence_alignment_index')
op.drop_table('metadatum')
op.drop_table('genomic_range')
op.drop_table('sample')
op.drop_table('reference_genome')
op.drop_table('taxon')
op.drop_table('metadata_field_project')
op.drop_table('coverage_viz')
op.drop_table('upstream_database')
op.drop_table('metadata_field')
op.drop_table('file')
op.drop_table('entity')
# ### end Alembic commands ###
Loading