Skip to content

Commit

Permalink
Merge pull request #6361 from chanzuckerberg/staging
Browse files Browse the repository at this point in the history
chore: prod <- staging; 12/15/23
  • Loading branch information
nayib-jose-gloria authored Dec 15, 2023
2 parents 1fccc1b + 95f5ffa commit 5687754
Show file tree
Hide file tree
Showing 16 changed files with 5,159 additions and 2,516 deletions.
77 changes: 77 additions & 0 deletions .happy/terraform/modules/batch/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,83 @@ resource aws_batch_job_definition dataset_metadata_update {
"maxSwap": 800000,
"swappiness": 60
},
"retryStrategy": {
"attempts": 3,
"evaluateOnExit": [
{
"action": "RETRY",
"onReason": "Task failed to start"
},
{
"action": "EXIT",
"onReason": "*"
}
]
},
"logConfiguration": {
"logDriver": "awslogs",
"options": {
"awslogs-group": "${aws_cloudwatch_log_group.cloud_watch_logs_group.id}",
"awslogs-region": "${data.aws_region.current.name}"
}
}
})
}

resource aws_batch_job_definition reprocess_dataset_metadata {
# this was used to reprocess dataset metadata in place when an error was found after publishing cellxgene schema 4.0
# TODO: can be removed after 4.0 migration is complete
type = "container"
name = "dp-${var.deployment_stage}-${var.custom_stack_name}-reprocess-dataset-metadata"
container_properties = jsonencode({
"command": ["python3", "-m", "backend.layers.processing.reprocess_dataset_metadata"],
"jobRoleArn": "${var.batch_role_arn}",
"image": "${var.image}",
"memory": var.batch_container_memory_limit,
"environment": [
{
"name": "ARTIFACT_BUCKET",
"value": "${var.artifact_bucket}"
},
{
"name": "CELLXGENE_BUCKET",
"value": "${var.cellxgene_bucket}"
},
{
"name": "DATASETS_BUCKET",
"value": "${var.datasets_bucket}"
},
{
"name": "DEPLOYMENT_STAGE",
"value": "${var.deployment_stage}"
},
{
"name": "AWS_DEFAULT_REGION",
"value": "${data.aws_region.current.name}"
},
{
"name": "REMOTE_DEV_PREFIX",
"value": "${var.remote_dev_prefix}"
}
],
"vcpus": 8,
"linuxParameters": {
"maxSwap": 800000,
"swappiness": 60
},
"retryStrategy": {
"attempts": 3,
"evaluateOnExit": [
{
"action": "RETRY",
"onReason": "Task failed to start"
},
{
"action": "EXIT",
"onReason": "*"
}
]
},
"logConfiguration": {
"logDriver": "awslogs",
"options": {
Expand Down
Binary file modified backend/common/ontology_files/all_ontology.json.gz
Binary file not shown.
Binary file modified backend/common/ontology_files/genes_ercc.csv.gz
Binary file not shown.
Binary file modified backend/common/ontology_files/genes_homo_sapiens.csv.gz
Binary file not shown.
Binary file modified backend/common/ontology_files/genes_mus_musculus.csv.gz
Binary file not shown.
Binary file modified backend/common/ontology_files/genes_sars_cov_2.csv.gz
Binary file not shown.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions backend/layers/common/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ class DatasetArtifactMetadataUpdate:
title: Optional[str] = None
schema_version: Optional[str] = None
citation: Optional[str] = None
schema_reference: Optional[str] = None

def as_dict_without_none_values(self):
return {key: value for key, value in asdict(self).items() if value is not None}
Expand Down
11 changes: 10 additions & 1 deletion backend/layers/processing/make_seurat.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,18 @@ require(devtools)

h5adPath <- commandArgs(trailingOnly = TRUE)[1]

target_uns_keys <- c("schema_version",
"title",
"batch_condition",
"default_embedding",
"X_approximate_distribution",
"citation",
"schema_reference"
)

sceasy::convertFormat(h5adPath,
from="anndata",
to="seurat",
outFile = gsub(".h5ad", ".rds", h5adPath),
main_layer = "data",
target_uns_keys = c("schema_version", "title", "batch_condition", "default_embedding", "X_approximate_distribution"))
target_uns_keys = target_uns_keys)
Loading

0 comments on commit 5687754

Please sign in to comment.