Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Consensus Genomes Readiness #115

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions workflows/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,16 @@ class WorkflowRunner:


@strawberry.input
class WorkflowInput:
class EntityInput:
name: str
value: str
entityType: str
entityId: str


@strawberry.input
class RawInput:
name: str
value: typing.Any


@strawberry.type
Expand Down Expand Up @@ -148,7 +155,8 @@ async def add_run(
self,
project_id: int,
workflow_version_id: int,
workflow_inputs: typing.List[WorkflowInput],
entity_inputs: typing.List[EntityInput],
raw_inputs: typing.List[RawInput],
workflow_runner: str = default_workflow_runner_name,
session: AsyncSession = Depends(get_db_session, use_cache=False),
event_bus: EventBus = Depends(get_event_bus),
Expand Down
10 changes: 10 additions & 0 deletions workflows/database/models/workflow.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import enum

import strawberry
from manifest.manifest import Manifest
from platformics.database.models.base import Base
from sqlalchemy import Column, DateTime, Enum, ForeignKey, Integer, String, func
from sqlalchemy.orm import Mapped, relationship
Expand All @@ -24,8 +25,17 @@ class WorkflowVersion(Base):
workflow_id = Column(Integer, ForeignKey("workflow.id"), nullable=False)
workflow: Mapped[Workflow] = relationship(Workflow, back_populates="versions", foreign_keys=[workflow_id])
runs = relationship("Run", back_populates="workflow_version", foreign_keys="Run.workflow_version_id")
workflow_definition_uri = Column(String, nullable=False)
manifest = Column(String, nullable=False)

_parsed_manifest = None # Placeholder for memoized data

@property
def parsed_manifest(self):
if self._parsed_manifest is None:
self._parsed_manifest = Manifest.model_validate_json(str(self.manifest))
return self._parsed_manifest


@strawberry.enum
class RunStatus(enum.Enum):
Expand Down
2 changes: 1 addition & 1 deletion workflows/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ x-redis-variables: &redis-variables
x-workflow-variables: &workflow-variables
? ENVIRONMENT
? BOTO_ENDPOINT_URL=http://motoserver.czidnet:4000
? ENTITY_SERVICE_URL=http://entities.czidnet:8008
? ENTITY_SERVICE_URL=http://entities.czidnet:8008/graphql
? DEFAULT_UPLOAD_BUCKET=local-bucket
? ENTITY_SERVICE_AUTH_TOKEN=eyJhbGciOiJFQ0RILUVTIiwiZW5jIjoiQTI1NkNCQy1IUzUxMiIsImVwayI6eyJjcnYiOiJQLTM4NCIsImt0eSI6IkVDIiwieCI6Ik5Nc3ZKbXVuYnBXY0VsdVlJTmRVeVVIcUkzbjZCR2VQd2V3ajRXS0pVdEt0QXhmUUtrVE81M2kzQ2dSZkZYVEQiLCJ5IjoiYmp6TkJuZjExWjRIV3dBVm95UVpSOGRWSERicW9wTjhvQkJZYnIxQlBiU1llZHdaWkVuYzJqS21rY0xxcloxTiJ9LCJraWQiOiItQmx2bF9wVk5LU2JRQ2N5dGV4UzNfMk5MaHBia2J6LVk5VFFjbkY5S1drIiwidHlwIjoiSldFIn0..Ymjmtj6nXp8r8AFe8AgI1g.e_39w7OXGJaOVKL_QkP38rvlcEeSgFQsxT0rTdCgI5E-b328zlVHagLSFZ_Iqvqiy6Z8KcU4pLJ3NTaW3Ys_YQsnUn6yUEvwqOY2KESB0mT0Bp3qpNYRBZJVA8PW43YAbOnO7h7ZTwQZJfwMzwhcaaYeZW8pN9rvcNtQX3rgBubSuR-LHKL6k4uAMPh9A8ZxXKZgpI6tpJPxE-uspvYi-foW8VyjZtwOUMvMp3lfZPyL1oQIv_rEUhOGNO_lfi339QcT6F7DwBjXK6C_7U65F-dFZScnReLnVczPfHhJ7z3NnVt46sFcddgZpLIpQyzA6puCcDoRm5ZZCVvm8h-LHVy-9dGWLVxBRhGRdBwBhbiVu2O_CNeVabXl8JhAs3oeV2zDgYfOj_-kkHWsbgHZ0y-tc-HtgoXzsUqaRP1IXQ3g3VDES7UjsaKsfxgURH5EIsrdWwFrWHGoLNfGwwPSwTBI5Mul7LT10-Pg_uBBCiHfQIDqerRQeADRFhV_07GYatBDt-RxwNL4bO59V8ewCzhpdCYRpL363HGldT1Pic-SpTk2NsY2t8MA6__FhJU9JSKYwJpeKMaGLUHA_40PEQ.gb5q-WZTU-ZKpV7WYFbMGMEF2CZIBrFlCUeaZ5ffPDU
? DEFAULT_UPLOAD_PROTOCOL=S3 # don't need this in workflows
Expand Down
222 changes: 207 additions & 15 deletions workflows/entity_interface.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from abc import ABC
import asyncio
from datetime import datetime
from dataclasses import dataclass, field, fields
import enum
import os
import sys
from typing import Generic, Optional
import typing
from uuid import UUID
Expand Down Expand Up @@ -53,7 +56,8 @@ def gql_create_mutation(self) -> str:
(_snake_to_camel(entity_field.name), _type_name_to_graphql_type[entity_field.type.__name__])
)

# field_name_types.append(("userId", "Int"))
field_name_types.append(("producingRunId", "Int"))
field_name_types.append(("ownerUserId", "Int"))
field_name_types.append(("collectionId", "Int"))

type_signature = ", ".join([f"${field_name}: {field_type}!" for field_name, field_type in field_name_types])
Expand Down Expand Up @@ -91,20 +95,15 @@ async def create_if_not_exists(self, user_id: int, collection_id: int, client: C
dependent_entity_futures.append(entity_ref.create_if_not_exists(user_id, collection_id, client))
await asyncio.gather(*dependent_entity_futures)

print(self.gql_create_mutation(), file=sys.stderr)
variables = self.gql_variables()
# variables["userId"] = user_id
variables["userId"] = user_id
variables["collectionId"] = collection_id
response = await client.execute_async(gql(self.gql_create_mutation()), variable_values=variables)
entity_id = response.get(self._mutation_name(), {}).get("id")
self.entity_id = entity_id


@dataclass
class Sample(Entity):
name: str
location: str


T = typing.TypeVar("T", bound=Entity)


Expand All @@ -124,22 +123,215 @@ async def create_if_not_exists(self, user_id: int, collection_id: int, client: C
def exists(self) -> bool:
return self.entity_id is not None

async def load(self) -> None:
pass
async def load(self) -> T:
raise Exception("Not implemented")


class FileStatus(enum.Enum):
SUCCESS = "SUCCESS"
FAILED = "FAILED"
PENDING = "PENDING"


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How is this file generated, do you have codegen of some kind?

@dataclass
class SequencingRead(Entity):
nucleotide: str
sequence: str
class File:
entity_field_name: str
entity: EntityReference[Entity] = field(metadata={"id_name": "entityId"})
status: FileStatus
protocol: str
sample: Optional[EntityReference[Sample]] = field(metadata={"id_name": "sampleId"})
namespace: str
path: str
file_format: str
compression_type: Optional[str]
size: Optional[int]


class FileReference:
file_id: Optional[UUID]
file: Optional[File]

async def load(self) -> File:
raise Exception("Not implemented")


@dataclass
class GenomicRange(Entity):
reference_genome: EntityReference["ReferenceGenome"]
file: FileReference
consensus_genomes: list[EntityReference["ConsensusGenome"]]


@dataclass
class MetricConsensusGenome(Entity):
coverage_depth: Optional[int]
reference_genome_length: Optional[int]
percent_genome_called: Optional[int]
percent_identity: Optional[int]
gc_percent: Optional[int]
total_reads: Optional[int]
mapped_reads: Optional[int]
ref_snps: Optional[int]
n_actg: Optional[int]
n_missing: Optional[int]
n_ambiguous: Optional[int]
consensus_genome: EntityReference["ConsensusGenome"] = field(metadata={"id_name": "consensusGenomeId"})
coverage_viz_summary_file: FileReference = field(metadata={"id_name": "coverageVizSummaryFileId"})


@dataclass
class ConsensusGenome(Entity):
is_reverse_complement: bool
taxon: EntityReference["Taxon"] = field(metadata={"id_name": "taxonId"})
sequence_read: EntityReference["SequencingRead"] = field(metadata={"id_name": "sequenceReadId"})
genomic_range: EntityReference[GenomicRange] = field(metadata={"id_name": "genomicRangeId"})
reference_genome: EntityReference["ReferenceGenome"] = field(metadata={"id_name": "referenceGenomeId"})
sequence: FileReference = field(metadata={"id_name": "sequenceId"})
intermediate_outputs: FileReference = field(metadata={"id_name": "intermediateOutputsId"})
metrics: list[EntityReference[MetricConsensusGenome]]


@dataclass
class Contig(Entity):
sequence: str
sequencing_read: Optional[EntityReference[SequencingRead]] = field(metadata={"id_name": "sequencingReadId"})
sequencing_read: Optional[EntityReference["SequencingRead"]] = field(metadata={"id_name": "sequencingReadId"})


@dataclass
class CoverageViz(Entity):
accession_id: str
coverage_viz_file: FileReference = field(metadata={"id_name": "coverageVizId"})


@dataclass
class MetadataField(Entity):
field_group: list[EntityReference["MetadataFieldProject"]] = field(metadata={"id_name": "metadataFieldId"})
field_name: str
description: str
field_type: str
is_required: bool
options: Optional[str]
default_value: Optional[str]
metadatas: list[EntityReference["Metadatum"]]


@dataclass
class MetadataFieldProject(Entity):
project_id: int
metadata_field: EntityReference[MetadataField] = field(metadata={"id_name": "metadataFieldId"})


@dataclass
class Metadatum(Entity):
sample: EntityReference["Sample"] = field(metadata={"id_name": "sampleId"})
metadata_field: EntityReference[MetadataField] = field(metadata={"id_name": "metadataFieldId"})
value: str


@dataclass
class ReferenceGenome(Entity):
file: FileReference = field(metadata={"id_name": "fileId"})
file_index: Optional[FileReference] = field(metadata={"id_name": "fileIndexId"})
name: str
description: str
taxon: EntityReference["Taxon"] = field(metadata={"id_name": "taxonId"})
accession_id: Optional[str]
sequence_alignment_indices: list[EntityReference["SequenceAlignmentIndex"]]
consensus_genomes: list[EntityReference[ConsensusGenome]]
genomic_ranges: list[EntityReference[GenomicRange]]


@dataclass
class Sample(Entity):
name: str
sample_type: str
water_control: bool
collection_date: Optional[datetime]
collection_location: str
description: Optional[str]
host_taxon: EntityReference["Taxon"] = field(metadata={"id_name": "hostTaxonId"})
sequencing_reads: list[EntityReference["SequencingRead"]] = field(metadata={"id_name": "sampleId"})
metadatas: list[EntityReference[Metadatum]]


class AlignmentTool(enum.Enum):
bowtie2 = "bowtie2"
minimap2 = "minimap2"
ncbi = "ncbi"


@dataclass
class SequenceAlignmentIndex(Entity):
index_file: FileReference = field(metadata={"id_name": "indexFileId"})
reference_genome: EntityReference[ReferenceGenome] = field(metadata={"id_name": "referenceGenomeId"})
tool: AlignmentTool


class NucleicAcid(enum.Enum):
RNA = "RNA"
DNA = "DNA"


class SequencingTechnology(enum.Enum):
Illumina = "Illumina"
Nanopore = "Nanopore"


class SequencingProtocol(enum.Enum):
MNGS = "MNGS"
TARGETED = "TARGETED"
MSSPE = "MSSPE"


@dataclass
class SequencingRead(Entity):
sample: Optional[EntityReference[Sample]] = field(metadata={"id_name": "sampleId"})
protocol: SequencingProtocol
r1_file: FileReference = field(metadata={"id_name": "r1FileId"})
r2_file: Optional[FileReference] = field(metadata={"id_name": "r2FileId"})
techonology: SequencingTechnology
nucleic_acid: NucleicAcid
has_ercc: bool
taxon: Optional[EntityReference["Taxon"]] = field(metadata={"id_name": "taxonId"})
primer_file: Optional[FileReference] = field(metadata={"id_name": "primerFileId"})
consensus_genomes: list[EntityReference[ConsensusGenome]]
contigs: list[EntityReference[Contig]]


class TaxonLevel(enum.Enum):
species = "species"
genus = "genus"
family = "family"


@dataclass
class Taxon(Entity):
wikipedia_id: Optional[str]
description: Optional[str]
common_name: Optional[str]
name: str
is_phage: bool
upstream_database: EntityReference["UpstreamDatabase"] = field(metadata={"id_name": "upstreamDatabaseId"})
upstream_database_identifier: str
level: TaxonLevel
tax_id: int
tax_id_parent: int
tax_id_species: int
tax_id_genus: int
tax_id_family: int
tax_id_order: int
tax_id_class: int
tax_id_phylum: int
tax_id_kingdom: int
consensus_genomes: list[EntityReference[ConsensusGenome]]
reference_genomes: list[EntityReference[ReferenceGenome]]
sequencing_reads: list[EntityReference[SequencingRead]]
samples: list[EntityReference[Sample]]


@dataclass
class UpstreamDatabase(Entity):
name: str
taxa: list[EntityReference[Taxon]]


async def create_entities(user_id: int, collection_id: int, entities: list[Entity]) -> None:
Expand Down
File renamed without changes.
Loading