Skip to content

Commit

Permalink
chore: staging -> prod (#6292)
Browse files Browse the repository at this point in the history
Signed-off-by: dependabot[bot] <[email protected]>
Co-authored-by: Joyce Yan <[email protected]>
Co-authored-by: Timmy Huang <[email protected]>
Co-authored-by: Ronen <[email protected]>
Co-authored-by: Trent Smith <[email protected]>
Co-authored-by: Mim Hastie <[email protected]>
Co-authored-by: Daniel Hegeman <[email protected]>
Co-authored-by: atarashansky <[email protected]>
Co-authored-by: pablo-gar <[email protected]>
Co-authored-by: Fran McDade <[email protected]>
Co-authored-by: Fran McDade <[email protected]>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: SethFeingold <[email protected]>
  • Loading branch information
13 people authored Dec 1, 2023
1 parent 0a898a5 commit a9e7206
Show file tree
Hide file tree
Showing 87 changed files with 3,664 additions and 1,522 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/deploy-happy-stack.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
steps:
- uses: actions/setup-node@v2
with:
node-version: 15
node-version: "16.14.2"
- name: Configure AWS Prod Credentials
uses: aws-actions/configure-aws-credentials@v2
if: github.event.deployment.environment == 'prod'
Expand Down
12 changes: 10 additions & 2 deletions .github/workflows/push-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,20 @@ jobs:
with:
fetch-depth: 2
- uses: actions/setup-python@v3

- name: check backend
uses: pre-commit/[email protected]
- uses: actions/setup-node@v4
with:
node-version-file: "frontend/.nvmrc"
cache: "npm"
cache-dependency-path: "frontend/package-lock.json"
- name: check frontend
run: |
cp frontend/src/configs/local.js frontend/src/configs/configs.js
docker-compose run --no-deps --rm frontend make lint
cd frontend
npm ci
cp src/configs/local.js src/configs/configs.js
npm run lint
- uses: 8398a7/action-slack@v3
with:
status: ${{ job.status }}
Expand Down
57 changes: 51 additions & 6 deletions .happy/terraform/modules/sfn/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,7 @@ resource "aws_sfn_state_machine" "state_machine" {
"BackoffRate": 2.0
} ],
"Next": "DeregisterJobDefinition",
"ResultPath": null,
"OutputPath": "$.[0]"
"ResultPath": null
},
"HandleErrors": {
"Type": "Task",
Expand All @@ -300,16 +299,62 @@ resource "aws_sfn_state_machine" "state_machine" {
"MaxAttempts": 3,
"BackoffRate": 2.0
} ],
"Next": "DeregisterJobDefinition",
"Next": "DeregisterJobDefinitionAfterHandleErrors",
"ResultPath": null
},
"DeregisterJobDefinition": {
"DeregisterJobDefinitionAfterHandleErrors": {
"Type": "Task",
"End": true,
"Next": "CheckForErrors",
"Parameters": {
"JobDefinition.$": "$.batch.JobDefinitionName"
},
"Resource": "arn:aws:states:::aws-sdk:batch:deregisterJobDefinition"
"Resource": "arn:aws:states:::aws-sdk:batch:deregisterJobDefinition",
"ResultPath": null
},
"DeregisterJobDefinition": {
"Type": "Task",
"Next": "CheckForErrors",
"Parameters": {
"JobDefinition.$": "$[0].batch.JobDefinitionName"
},
"Resource": "arn:aws:states:::aws-sdk:batch:deregisterJobDefinition",
"ResultPath": null
},
"CheckForErrors": {
"Type": "Choice",
"Choices": [
{
"Variable": "$.error",
"IsPresent": true,
"Next": "DownloadValidateError"
},
{
"Or": [
{
"Variable": "$[0].error",
"IsPresent": true
},
{
"Variable": "$[1].error",
"IsPresent": true
}
],
"Next": "ConversionError"
}
],
"Default": "EndPass"
},
"ConversionError": {
"Type": "Fail",
"Cause": "CXG and/or Seurat conversion failed."
},
"DownloadValidateError": {
"Type": "Fail",
"Cause": "An error occurred during Download/Validate."
},
"EndPass": {
"Type": "Pass",
"End": true
}
}
}
Expand Down
21 changes: 0 additions & 21 deletions backend/layers/business/business.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
CollectionMetadataUpdate,
CollectionQueryFilter,
DatasetArtifactDownloadData,
DeprecatedDatasetArtifactDownloadData,
)
from backend.layers.business.exceptions import (
ArtifactNotFoundException,
Expand Down Expand Up @@ -544,26 +543,6 @@ def get_dataset_artifact_download_data(

return DatasetArtifactDownloadData(file_size, url)

# TODO: Superseded by get_dataset_artifact_download_data. Remove with #5697.
def get_dataset_artifact_download_data_deprecated(
self, dataset_version_id: DatasetVersionId, artifact_id: DatasetArtifactId
) -> DeprecatedDatasetArtifactDownloadData:
"""
Returns download data for an artifact, including a presigned URL
"""
artifacts = self.get_dataset_artifacts(dataset_version_id)
artifact = next((a for a in artifacts if a.id == artifact_id), None)

if not artifact:
raise ArtifactNotFoundException(f"Artifact {artifact_id} not found in dataset {dataset_version_id}")

file_name = artifact.uri.split("/")[-1]
file_type = artifact.type
file_size = self.s3_provider.get_file_size(artifact.uri)
presigned_url = self.s3_provider.generate_presigned_url(artifact.uri)

return DeprecatedDatasetArtifactDownloadData(file_name, file_type, file_size, presigned_url)

def get_dataset_status(self, dataset_version_id: DatasetVersionId) -> DatasetStatus:
"""
Returns the dataset status for a specific dataset version
Expand Down
7 changes: 0 additions & 7 deletions backend/layers/business/business_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
CollectionMetadataUpdate,
CollectionQueryFilter,
DatasetArtifactDownloadData,
DeprecatedDatasetArtifactDownloadData,
)
from backend.layers.common.entities import (
CanonicalCollection,
Expand Down Expand Up @@ -136,12 +135,6 @@ def get_dataset_artifact_download_data(
) -> DatasetArtifactDownloadData:
pass

# TODO: Superseded by get_dataset_artifact_download_data. Remove with #5697.
def get_dataset_artifact_download_data_deprecated(
self, dataset_version_id: DatasetVersionId, artifact_id: DatasetArtifactId
) -> DeprecatedDatasetArtifactDownloadData:
pass

def update_dataset_version_status(
self,
dataset_version_id: DatasetVersionId,
Expand Down
11 changes: 1 addition & 10 deletions backend/layers/business/entities.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from dataclasses import dataclass
from typing import List, Optional

from backend.layers.common.entities import DatasetArtifactType, Link
from backend.layers.common.entities import Link


@dataclass
Expand All @@ -18,15 +18,6 @@ class DatasetArtifactDownloadData:
url: str


# TODO: Superseded by DatasetArtifactDownloadData. Remove with #5697.
@dataclass
class DeprecatedDatasetArtifactDownloadData:
file_name: str
file_type: DatasetArtifactType
file_size: int
presigned_url: str


@dataclass
class CollectionMetadataUpdate:
"""
Expand Down
2 changes: 1 addition & 1 deletion backend/layers/processing/schema_migration.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def dataset_migrate(
existing_dataset_version_id=DatasetVersionId(dataset_version_id),
start_step_function=False, # The schema_migration sfn will start the ingest sfn
)
sfn_name = sfn_name_generator(dataset_version_id, prefix="migrate")
sfn_name = sfn_name_generator(new_dataset_version_id, prefix="migrate")
return {
"collection_version_id": collection_version_id,
"dataset_version_id": new_dataset_version_id.id,
Expand Down
26 changes: 26 additions & 0 deletions backend/portal/api/enrichment.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

from collections import OrderedDict

from backend.common.feature_flag import FeatureFlagService, FeatureFlagValues


def enrich_dataset_with_ancestors(dataset, key, ontology_mapping):
"""
Expand All @@ -15,6 +17,16 @@ def enrich_dataset_with_ancestors(dataset, key, ontology_mapping):

terms = [e["ontology_term_id"] for e in dataset[key]]

is_schema_4 = FeatureFlagService.is_enabled(FeatureFlagValues.SCHEMA_4)
is_tissue = key == "tissue"
if is_tissue and is_schema_4:
# TODO remove is_schema_4 condition once Schema 4 is rolled out and
# feature flag is removed (#6266). "tissue" must include "tissue_type"
# when generating ancestors; "cell_type" and "development_stage" do not.
terms = [generate_tagged_tissue_ontology_id(e) for e in dataset[key]]
else:
terms = [e["ontology_term_id"] for e in dataset[key]]

if not terms:
return

Expand All @@ -23,3 +35,17 @@ def enrich_dataset_with_ancestors(dataset, key, ontology_mapping):
unique_ancestors = list(OrderedDict.fromkeys(flattened_ancestors))
if unique_ancestors:
dataset[f"{key}_ancestors"] = unique_ancestors


def generate_tagged_tissue_ontology_id(tissue):
"""
Generate ontology ID tagged with tissue_type for the given tissue. For
example, UBERON:1234567 (organoid).
"""
tissue_id = tissue["ontology_term_id"]
# Handle possible None for tissue_type (possible during migration): default
# to "tissue".
tissue_type = tissue["tissue_type"] or "tissue"
if tissue_type == "tissue":
return tissue_id
return f"{tissue_id} ({tissue_type})"
36 changes: 0 additions & 36 deletions backend/portal/api/portal-api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -581,42 +581,6 @@ paths:
$ref: "#/components/responses/403"
"404":
$ref: "#/components/responses/404"
post:
tags:
- datasets
summary: Request to download a dataset
description: >-
Request to download a file which on success will generate a pre-signed URL to download the dataset.
operationId: backend.portal.api.portal_api.post_dataset_asset
parameters:
- $ref: "#/components/parameters/path_dataset_id"
- name: asset_id
in: path
required: true
schema:
type: string
responses:
"200":
description: OK
content:
application/json:
schema:
type: object
properties:
dataset_id:
$ref: "#/components/schemas/dataset_id"
presigned_url:
type: string
file_name:
type: string
file_size:
type: number
"401":
$ref: "#/components/responses/401"
"403":
$ref: "#/components/responses/403"
"404":
$ref: "#/components/responses/404"

/v1/datasets/{dataset_id}/status:
get:
Expand Down
32 changes: 0 additions & 32 deletions backend/portal/api/portal_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,38 +678,6 @@ def get_dataset_asset(dataset_id: str, asset_id: str):
return make_response(response, 200)


def post_dataset_asset(dataset_id: str, asset_id: str):
"""
Requests to download a dataset asset, by generating a presigned_url.
"""

version = get_business_logic().get_dataset_version(DatasetVersionId(dataset_id))
if version is None:
raise NotFoundHTTPException(detail=f"'dataset/{dataset_id}' not found.")

try:
download_data = get_business_logic().get_dataset_artifact_download_data_deprecated(
DatasetVersionId(dataset_id), DatasetArtifactId(asset_id)
)
except ArtifactNotFoundException:
raise NotFoundHTTPException(detail=f"'dataset/{dataset_id}/asset/{asset_id}' not found.") from None

if download_data.file_size is None:
raise ServerErrorHTTPException() from None

if download_data.presigned_url is None:
raise ServerErrorHTTPException()

response = {
"dataset_id": dataset_id,
"file_name": download_data.file_name,
"file_size": download_data.file_size,
"presigned_url": download_data.presigned_url,
}

return make_response(response, 200)


def get_dataset_assets(dataset_id: str):
"""
Returns a list of all the artifacts registered to a dataset.
Expand Down
11 changes: 11 additions & 0 deletions backend/wmg/api/common/rollup.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,17 @@ def _rollup_gene_expression(gene_expression_df, universal_set_cell_counts_df) ->
Augments the input gene expression dataframe to include
(gene_ontology_term_id, tissue_ontology_term_id, cell_type_ontology_term_id, <compare_dimension>)
combinations for which numeric expression values should be aggregated during the rollup operation.
Then proceeds to perform rollup.
Specifically, this function pivots the tidy gene expression dataframe to get dense 2D arrays for each
numeric column (nnz, sum, sqsum). The rows of these 2D arrays are the (tissue, cell_type, <compare_dimension>).
The columns of these 2D arrays are the genes. The 2D arrays are then stacked into a 3D array.
Because gene expressions are sparse, we need to add missing data corresponding to (tissue, cell_type, <compare_dimension>)
combinations present in the cell counts dataframe and not present in the gene expression dataframe. This is done by vertically
stacking (axis=0) empty arrays corresponding to the missing combinations. The 3D array is then rolled up along the first dimension
which aggregates the rows across cell type descendants for cell types present in the same group (tissue, <compare_dimension>).
The resulting non-zero values in the 3D array are then converted back to a tidy dataframe.
Parameters
----------
Expand Down
8 changes: 6 additions & 2 deletions frontend/census-projects.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
"publication_info": "Cul et al. (2023) bioRxiv",
"publication_link": "",
"project_page": "https://example.com/",
"notebook_links": [["link A", "https://example.com/"]]
"notebook_links": [["link A", "https://example.com/"]],
"n_cells": 6523,
"n_columns": 8721
},
{
"tier": "maintained",
Expand All @@ -40,6 +42,8 @@
"DOI": null,
"publication_info": "Cul et al. (2023) bioRxiv",
"publication_link": "",
"notebook_links": [["link A", "https://example.com/"]]
"notebook_links": [["link A", "https://example.com/"]],
"n_genes": 1312,
"n_columns": 2312
}
]
8 changes: 4 additions & 4 deletions frontend/doc-site/03__Download Published Data.mdx
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# Downloading Published Data on CZ CELLxGENE Discover

Clicking the download button launches a dialog that enables a dataset to be downloaded in h5ad (AnnData v0.8) and rds (Seurat v4) formats. All datasets adhere to the CELLxGENE single cell annotated data schema. Datasets can either be downloaded via the browser by clicking the blue download button, or via the command line by pasting the provided curl command.

1. Select the data set you wish to download.
Clicking the download button launches a dialog that enables a dataset to be downloaded in h5ad (AnnData v0.8) and rds (Seurat v4) formats. All datasets adhere to the CELLxGENE single cell annotated data schema.

<Image src={"/doc-site/datasetHighlight.png"} />

2. Make your selections and hit the download button
Click the white Download button for the dataset that you wish to download.

<Image src={"/doc-site/downloadDialog.png"} />

Select either the h5ad (AnnData v0.8) or rds (Seurat v4) download format. Click the blue Download button to download the dataset via the browser. The permanent download link can also be copied, shared, and pasted into a browser address bar.
Loading

0 comments on commit a9e7206

Please sign in to comment.