Skip to content

Commit

Permalink
Merge pull request #6063 from chanzuckerberg/staging
Browse files Browse the repository at this point in the history
chore: prod deploy 10/23
  • Loading branch information
atarashansky authored Oct 23, 2023
2 parents 2152552 + 1f8b142 commit f8f10c7
Show file tree
Hide file tree
Showing 188 changed files with 3,363 additions and 5,630 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/build-images-and-create-deployment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@ on:
push:
branches:
- main
- staging
- prod
repository_dispatch:
types: [build-images]
types: [build-images-for-staging]
env:
# Force using BuildKit instead of normal Docker, required so that metadata
# is written/read to allow us to use layers of previous builds as cache.
Expand Down
21 changes: 0 additions & 21 deletions .github/workflows/test-receiving-repository-dispatch.yml

This file was deleted.

19 changes: 0 additions & 19 deletions .github/workflows/test-sending-repository-dispatch.yml

This file was deleted.

25 changes: 0 additions & 25 deletions .github/workflows/test-workflow-run-after-push-tests-pass.yml

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
uses: peter-evans/repository-dispatch@v2
with:
token: ${{ secrets.GITHUB_TOKEN }}
event-type: build-images
event-type: build-images-for-staging
client-payload: '{"ref": "refs/heads/staging"}'

- name: Send slack notification if main not merged into staging
Expand Down
4 changes: 2 additions & 2 deletions .happy/terraform/envs/dev/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ module stack {
batch_container_memory_limit = 28000
wmg_batch_container_memory_limit = 248000
wmg_desired_vcpus = 128
cg_desired_vcpus = 48
cg_batch_container_memory_limit = 92000
cg_desired_vcpus = 128
cg_batch_container_memory_limit = 248000
backend_memory = 8192
frontend_memory = 4096
backend_instance_count = 4
Expand Down
4 changes: 2 additions & 2 deletions .happy/terraform/envs/prod/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ module stack {
batch_container_memory_limit = 63500
wmg_batch_container_memory_limit = 248000
wmg_desired_vcpus = 128
cg_desired_vcpus = 48
cg_batch_container_memory_limit = 92000
cg_desired_vcpus = 128
cg_batch_container_memory_limit = 248000
backend_memory = 30 * 1024
frontend_memory = 4096
backend_instance_count = 6
Expand Down
4 changes: 2 additions & 2 deletions .happy/terraform/envs/stage/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ module stack {
batch_container_memory_limit = 63500
wmg_batch_container_memory_limit = 248000
wmg_desired_vcpus = 128
cg_batch_container_memory_limit = 92000
cg_desired_vcpus = 48
cg_batch_container_memory_limit = 248000
cg_desired_vcpus = 128
backend_memory = 8192
frontend_memory = 4096
backend_instance_count = 4
Expand Down
53 changes: 53 additions & 0 deletions .happy/terraform/modules/schema_migration/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,59 @@ resource aws_batch_job_definition schema_migrations {
})
}

resource aws_batch_job_definition pubish_revisions {
type = "container"
name = "dp-${var.deployment_stage}-${var.custom_stack_name}-${local.name}-publish-revisions"
container_properties = jsonencode({
command = ["python3",
"-m",
"backend.layers.processing.publish_revisions",
],
jobRoleArn= var.batch_role_arn,
image= var.image,
environment= [
{
name= "ARTIFACT_BUCKET",
value= var.artifact_bucket
},
{
name= "DEPLOYMENT_STAGE",
value= var.deployment_stage
},
{
name= "AWS_DEFAULT_REGION",
value= data.aws_region.current.name
},
{
name= "REMOTE_DEV_PREFIX",
value= var.remote_dev_prefix
},
{
name= "DATASETS_BUCKET",
value= var.datasets_bucket
},
],
resourceRequirements = [
{
type= "VCPU",
Value="2"
},
{
Type="MEMORY",
Value = "4096"
}
]
logConfiguration= {
logDriver= "awslogs",
options= {
awslogs-group= aws_cloudwatch_log_group.batch_cloud_watch_logs_group.id,
awslogs-region= data.aws_region.current.name
}
}
})
}


resource aws_sfn_state_machine sfn_schema_migration {
name = "dp-${var.deployment_stage}-${var.custom_stack_name}-${local.name}-sfn"
role_arn = var.sfn_role_arn
Expand Down
6 changes: 5 additions & 1 deletion .happy/terraform/modules/sfn/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,11 @@ resource "aws_sfn_state_machine" "state_machine" {
},
{
"Name": "DATASET_ID",
"Value.$": "$.dataset_id"
"Value.$": "$.dataset_id"
},
{
"Name": "COLLECTION_ID",
"Value.$": "$.collection_id"
},
{
"Name": "STEP_NAME",
Expand Down
4 changes: 3 additions & 1 deletion Dockerfile.wmg_pipeline
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ ADD backend/wmg/__init__.py backend/wmg/__init__.py
ADD backend/wmg/config.py backend/wmg/config.py
ADD backend/wmg/data backend/wmg/data
ADD backend/wmg/pipeline backend/wmg/pipeline
ADD backend/wmg/api backend/wmg/api
ADD backend/cellguide/pipeline backend/cellguide/pipeline
ADD backend/layers backend/layers
ADD backend/common backend/common

Expand All @@ -29,4 +31,4 @@ LABEL commit=${HAPPY_COMMIT}
ENV COMMIT_SHA=${HAPPY_COMMIT}
ENV COMMIT_BRANCH=${HAPPY_BRANCH}

CMD ["python3", "-m", "backend.wmg.pipeline.cube_pipeline"]
CMD ["python3", "-m", "backend.wmg.pipeline"]
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import logging

from backend.cellguide.pipeline.computational_marker_genes.computational_markers import MarkerGenesCalculator
from backend.cellguide.pipeline.computational_marker_genes.constants import MARKER_SCORE_THRESHOLD
from backend.cellguide.pipeline.computational_marker_genes.computational_markers import (
MARKER_SCORE_THRESHOLD,
MarkerGenesCalculator,
)
from backend.cellguide.pipeline.constants import COMPUTATIONAL_MARKER_GENES_FOLDERNAME, MARKER_GENE_PRESENCE_FILENAME
from backend.cellguide.pipeline.ontology_tree import get_ontology_tree_builder
from backend.cellguide.pipeline.ontology_tree.tree_builder import OntologyTreeBuilder
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@
from dask.diagnostics import ProgressBar
from tqdm import tqdm

from backend.cellguide.pipeline.computational_marker_genes.constants import (
MARKER_SCORE_THRESHOLD,
)
from backend.cellguide.pipeline.computational_marker_genes.types import ComputationalMarkerGenes
from backend.cellguide.pipeline.computational_marker_genes.utils import (
bootstrap_rows_percentiles,
Expand Down Expand Up @@ -43,6 +40,8 @@
or any arbitrary combinations of metadata dimensions.
"""

MARKER_SCORE_THRESHOLD = 0.5


class MarkerGenesCalculator:
def __init__(self, *, snapshot: WmgSnapshot, all_cell_type_ids_in_corpus: list[str], groupby_terms: list[str]):
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
from backend.cellguide.pipeline.canonical_marker_genes.utils import format_citation_dp
from backend.cellguide.pipeline.source_collections.types import SourceCollectionsData
from backend.common.utils.rollup import descendants
from backend.wmg.data.utils import get_collections_from_curation_api, get_datasets_from_curation_api
from backend.wmg.data.utils import get_collections_from_discover_api, get_datasets_from_discover_api


def generate_source_collections_data(all_cell_type_ids_in_corpus: list[str]) -> dict[str, list[SourceCollectionsData]]:
"""
For each cell type id in the corpus, we want to generate a SourceCollectionsData object, which contains
metadata about the source data for each cell type
"""
all_datasets = get_datasets_from_curation_api()
all_collections = get_collections_from_curation_api()
all_datasets = get_datasets_from_discover_api()
all_collections = get_collections_from_discover_api()

collections_dict = {collection["collection_id"]: collection for collection in all_collections}
datasets_dict = {dataset["dataset_id"]: dataset for dataset in all_datasets}
Expand Down
4 changes: 0 additions & 4 deletions backend/common/utils/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,5 @@ class NonExistentDatasetException(CorporaException):
pass


class CubeValidationException(Exception):
pass


class MarkerGeneCalculationException(Exception):
pass
5 changes: 2 additions & 3 deletions backend/common/utils/result_notification.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def gen_wmg_pipeline_failure_message(failure_info: str) -> dict:
}


def gen_wmg_pipeline_success_message(snapshot_path: str, dataset_count: int, cell_count: int, gene_count: int) -> dict:
def gen_wmg_pipeline_success_message(snapshot_path: str, dataset_count: int, cell_count: int) -> dict:
return {
"blocks": [
{
Expand All @@ -101,8 +101,7 @@ def gen_wmg_pipeline_success_message(snapshot_path: str, dataset_count: int, cel
"text": {
"type": "mrkdwn",
"text": f"\n* WMG snapshot stored in {snapshot_path}"
f"\n* The cube contains {cell_count} cells from {dataset_count} "
f"\n datasets, with expression scores across {gene_count} genes.",
f"\n* The cube contains {cell_count} cells from {dataset_count} datasets.",
},
},
]
Expand Down
22 changes: 21 additions & 1 deletion backend/curation/api/curation-api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,14 @@ components:
type: string
nullable: true
example: ["patient", "seqBatch"]
citation:
description: |
Citation that includes downloadable permalink to h5ad artifact for this dataset, a permalink to collection it
belongs to in CZ CELLxGENE Discover, and--if applicable--the Publication DOI associated with the dataset.
See details about the exact format in the
[schema definition](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#citation)
type: string
nullable: true
collection_list:
description: Collection metadata
properties:
Expand Down Expand Up @@ -988,6 +996,8 @@ components:
items:
$ref: "#/components/schemas/ontology_element"
type: array
citation:
$ref: "#/components/schemas/citation"
dataset_id:
$ref: "#/components/schemas/dataset_id"
dataset_version_id:
Expand Down Expand Up @@ -1092,6 +1102,8 @@ components:
items:
$ref: "#/components/schemas/ontology_element"
type: array
citation:
$ref: "#/components/schemas/citation"
collection_doi:
$ref: "#/components/schemas/doi"
collection_id:
Expand Down Expand Up @@ -1189,6 +1201,8 @@ components:
items:
$ref: "#/components/schemas/ontology_element"
type: array
citation:
$ref: "#/components/schemas/citation"
collection_id:
$ref: "#/components/schemas/collection_id"
collection_version_id:
Expand Down Expand Up @@ -1291,7 +1305,13 @@ components:
[tissue label](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/3.1.0/schema.md#tissue)
default: []
items:
$ref: "#/components/schemas/ontology_element"
allOf:
- $ref: "#/components/schemas/ontology_element"
- type: object
properties:
tissue_type:
type: string
nullable: true
type: array
dataset_tombstone:
description: When True, this Dataset was withdrawn from the Data curation at the request of its submitter.
Expand Down
5 changes: 5 additions & 0 deletions backend/curation/api/v1/curation/collections/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,10 @@ def reshape_dataset_for_curation_api(
if col is not None:
ds[column] = col

if ds.get("tissue") is not None and CorporaConfig().schema_4_feature_flag.lower() == "false":
for tissue in ds["tissue"]:
del tissue["tissue_type"]

ds["dataset_id"] = dataset_version.dataset_id.id
ds["dataset_version_id"] = dataset_version.version_id.id
# Get none preview specific dataset fields
Expand Down Expand Up @@ -318,6 +322,7 @@ class EntityColumns:
"mean_genes_per_cell",
"schema_version",
"donor_id",
"citation",
]

dataset_metadata_cols = [
Expand Down
8 changes: 7 additions & 1 deletion backend/layers/common/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,13 +158,18 @@ class OntologyTermId:
ontology_term_id: str


@dataclass
class TissueOntologyTermId(OntologyTermId):
tissue_type: Optional[str] = None


@dataclass_json
@dataclass
class DatasetMetadata:
name: str
schema_version: str
organism: List[OntologyTermId]
tissue: List[OntologyTermId]
tissue: List[TissueOntologyTermId]
assay: List[OntologyTermId]
disease: List[OntologyTermId]
sex: List[OntologyTermId]
Expand All @@ -178,6 +183,7 @@ class DatasetMetadata:
donor_id: List[str]
is_primary_data: str
x_approximate_distribution: Optional[str]
citation: Optional[str] = None
default_embedding: Optional[str] = None
embeddings: Optional[List[str]] = None
feature_biotype: Optional[List[str]] = None
Expand Down
Loading

0 comments on commit f8f10c7

Please sign in to comment.