-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
17 changed files
with
418 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,3 +9,5 @@ tmp/ | |
.nextflow* | ||
stack-outputs.json | ||
test_data | ||
|
||
build/cloudformation/packaged.yaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
# Tagged version 2.1.0 | ||
# https://github.com/aqlaboratory/openfold/blob/v.2.1.0/Dockerfile | ||
|
||
# Modifications have been made to work within the aws-samples/drug-discovery-workflows repository | ||
# Mostly addition of git clone and comment out COPY statements | ||
|
||
FROM nvidia/cuda:11.3.1-cudnn8-devel-ubuntu18.04 | ||
|
||
# metainformation | ||
LABEL org.opencontainers.image.version = "1.0.0" | ||
LABEL org.opencontainers.image.authors = "Gustaf Ahdritz" | ||
LABEL org.opencontainers.image.source = "https://github.com/aqlaboratory/openfold" | ||
LABEL org.opencontainers.image.licenses = "Apache License 2.0" | ||
LABEL org.opencontainers.image.base.name="docker.io/nvidia/cuda:10.2-cudnn8-runtime-ubuntu18.04" | ||
|
||
RUN apt-key del 7fa2af80 | ||
RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub | ||
RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub | ||
|
||
RUN apt-get update && apt-get install -y wget libxml2 cuda-minimal-build-11-3 libcusparse-dev-11-3 libcublas-dev-11-3 libcusolver-dev-11-3 git tree gzip | ||
RUN wget -P /tmp \ | ||
"https://github.com/conda-forge/miniforge/releases/download/23.3.1-1/Miniforge3-Linux-x86_64.sh" \ | ||
&& bash /tmp/Miniforge3-Linux-x86_64.sh -b -p /opt/conda \ | ||
&& rm /tmp/Miniforge3-Linux-x86_64.sh | ||
ENV PATH /opt/conda/bin:$PATH | ||
|
||
# Tagged version 2.1.0 | ||
RUN mkdir -p /opt/openfold | ||
RUN git clone https://github.com/aqlaboratory/openfold.git /opt/openfold && cd /opt/openfold && git checkout f434a2786b5a6b39171f358fb3470ad9f4fd2a58 | ||
|
||
WORKDIR /opt/openfold | ||
|
||
# The git clone does this already | ||
# COPY environment.yml /opt/openfold/environment.yml | ||
|
||
# installing into the base environment since the docker container wont do anything other than run openfold | ||
RUN mamba env update -n base --file /opt/openfold/environment.yml && mamba clean --all | ||
RUN export LD_LIBRARY_PATH=${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH} | ||
|
||
# The git clone does this already | ||
# COPY openfold /opt/openfold/openfold | ||
# COPY scripts /opt/openfold/scripts | ||
# COPY run_pretrained_openfold.py /opt/openfold/run_pretrained_openfold.py | ||
# COPY train_openfold.py /opt/openfold/train_openfold.py | ||
# COPY setup.py /opt/openfold/setup.py | ||
|
||
COPY ./prep_mmseqs_dbs.mod.sh /opt/openfold/scripts/prep_mmseqs_dbs.sh | ||
|
||
RUN ./scripts/install_third_party_dependencies.sh | ||
|
||
# from scripts/install_third_party_dependencies.sh | ||
# RUN wget -q -P /opt/openfold/openfold/resources \ | ||
# https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt | ||
# WORKDIR /opt/openfold | ||
# RUN python3 setup.py install |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
#!/bin/bash | ||
# | ||
# Copyright 2021 AlQuraishi Laboratory | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
# Downloads and unzips all required data for AlphaFold. | ||
# | ||
# Usage: bash download_all_data.sh /path/to/download/directory | ||
set -e | ||
|
||
DOWNLOAD_DIR="$1" | ||
TMP_DIR="$2" | ||
ROOT_DIR="${DOWNLOAD_DIR}/mmseqs_dbs" | ||
mkdir -p $ROOT_DIR | ||
|
||
for f in $(ls ${DOWNLOAD_DIR}/*.tar*) | ||
do | ||
tar --extract --verbose --file="${f}" \ | ||
--directory=$ROOT_DIR | ||
rm "${f}" | ||
BASENAME="$(basename ${f%%.*})" | ||
DB_NAME="${BASENAME}_db" | ||
OLD_PWD=$(pwd) | ||
cd $ROOT_DIR | ||
mmseqs tsv2exprofiledb "${BASENAME}" "${DB_NAME}" | ||
# mmseqs createindex "${DB_NAME}" "${DOWNLOAD_DIR}/tmp/" | ||
mmseqs createindex "${DB_NAME}" "${TMP_DIR}" | ||
cd "${OLD_PWD}" | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
https://wwwuser.gwdguser.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
s3://omics-{{REGION}}/alphafold_multimer/mgy/ | ||
https://storage.googleapis.com/alphafold-databases/v2.3/mgy_clusters_2022_05.fa.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
s3://openfold/openfold_params/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
https://storage.googleapis.com/alphafold-databases/casp14_versions/uniclust30_2018_08_hhsuite.tar.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
This repository helps you set up and run OpenFold Monomer on AWS HealthOmics. At the end of the configuration, you should be able to run a full end-to-end inference. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#!/bin/bash | ||
|
||
set -ex | ||
|
||
REGION=$1 | ||
ACCOUNT=$2 | ||
TAG=${3:-latest} | ||
|
||
aws ecr get-login-password --region $REGION | docker login --username AWS --password-stdin $ACCOUNT.dkr.ecr.$REGION.amazonaws.com | ||
|
||
# build openfold | ||
cd openfold | ||
docker build --platform linux/amd64 -t $ACCOUNT.dkr.ecr.$REGION.amazonaws.com/openfold:$TAG . | ||
docker push $ACCOUNT.dkr.ecr.$REGION.amazonaws.com/openfold:$TAG | ||
cd .. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
name: OpenFold | ||
description: "Predict single-chain protein structures with OpenFold" | ||
engine: NEXTFLOW | ||
main: main.nf | ||
parameterTemplate: | ||
mmseq_db: | ||
optional: true | ||
description: "mmseq db" | ||
uniref90: | ||
optional: true | ||
description: "uniref90" | ||
uniref30: | ||
optional: true | ||
description: "uniref30" | ||
mgnify: | ||
optional: true | ||
description: "mgnify" | ||
pdb70: | ||
optional: true | ||
description: "pdb70" | ||
uniclust30: | ||
optional: true | ||
description: "uniclust30" | ||
bfd: | ||
optional: true | ||
description: "bfd" | ||
openfold_checkpoint: | ||
optional: true | ||
description: "openfold_checkpoint" | ||
fasta_dir: | ||
optional: false | ||
description: "fasta_dir" | ||
pdb_mmcif_files: | ||
optional: true | ||
description: "pdb_mmcif_files" | ||
|
||
storageCapacity: 4800 | ||
tags: | ||
Name: "OpenFold" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
nextflow.enable.dsl = 2 | ||
|
||
workflow { | ||
|
||
mmseqs_db = PrepMMseqDatabases(params.mmseq_db) | ||
|
||
// Create a channel for all FASTA files in the directory | ||
fasta_files = Channel.fromPath("${params.fasta_dir}/*.fasta") | ||
|
||
// runs per .fa file | ||
alignment_dirs = PrecomputeAlignments( | ||
fasta_files, | ||
mmseqs_db.prepped_mmseq_db, | ||
params.pdb70 | ||
) | ||
|
||
// Define the target directory for merging files | ||
merged_alignment_dir = file("merged_alignments") | ||
|
||
// Create the target directory if it doesn't exist | ||
merged_alignment_dir.mkdirs() | ||
|
||
merged_aligned_dir_chan = alignment_dirs.alignments.flatMap { d -> | ||
// Get the directory within "output" | ||
d.listFiles() | ||
} | map { d -> | ||
// Copy the dir to our shared output | ||
d.copyTo(merged_alignment_dir) | ||
return merged_alignment_dir | ||
// Ensure all directories are copied | ||
} | last | ||
|
||
// Should be run once | ||
PretrainedOpenFold( | ||
params.fasta_dir, | ||
params.pdb_mmcif_files, | ||
merged_aligned_dir_chan, | ||
|
||
params.openfold_checkpoint | ||
) | ||
|
||
} | ||
|
||
process PrepMMseqDatabases { | ||
label 'openfold' | ||
cpus 32 | ||
memory '128 GB' | ||
accelerator 1, type: 'nvidia-tesla-a10g' | ||
|
||
input: | ||
path data_dir | ||
|
||
output: | ||
path "${data_dir}/*", emit: prepped_mmseq_db | ||
|
||
script: | ||
""" | ||
set -euxo pipefail | ||
tree . | ||
tree ${data_dir} | ||
mkdir -p prep-tmp/ | ||
bash /opt/openfold/scripts/prep_mmseqs_dbs.sh ${data_dir} prep-tmp/ | ||
tree . | ||
tree ${data_dir} | ||
""" | ||
} | ||
|
||
process PrecomputeAlignments { | ||
label 'openfold' | ||
cpus 32 | ||
memory '128 GB' | ||
accelerator 1, type: 'nvidia-tesla-a10g' | ||
|
||
publishDir "/mnt/workflow/pubdir" | ||
|
||
input: | ||
path fasta | ||
path prepped_mmseq_db | ||
path pdb70 | ||
|
||
output: | ||
path 'output', emit: alignments | ||
|
||
script: | ||
""" | ||
set -euxo pipefail | ||
mkdir -p ./output | ||
pwd | ||
tree . | ||
WORK="\$(realpath .)" | ||
FASTA="\$(realpath ${fasta})" | ||
MMSEQ_DB="\$(realpath ${prepped_mmseq_db})" | ||
PDB_70="\$(realpath ${pdb70})" | ||
pushd /opt/openfold | ||
pwd | ||
python3 /opt/openfold/scripts/precompute_alignments_mmseqs.py \$FASTA \ | ||
\$MMSEQ_DB \ | ||
colabfold_envdb_202108_db \ | ||
\$WORK/output \ | ||
/opt/conda/bin/mmseqs \ | ||
--hhsearch_binary_path /opt/conda/bin/hhsearch \ | ||
--env_db colabfold_envdb_202108_db \ | ||
--pdb70 \$PDB_70/pdb70 | ||
tree \$WORK | ||
pwd | ||
popd | ||
pwd | ||
""" | ||
} | ||
|
||
process PretrainedOpenFold { | ||
label 'openfold' | ||
cpus 32 | ||
memory '128 GB' | ||
accelerator 1, type: 'nvidia-tesla-a10g' | ||
|
||
publishDir "/mnt/workflow/pubdir" | ||
|
||
input: | ||
// inputs | ||
path fasta_dir | ||
path pdb_mmcif_files | ||
path alignment_dir | ||
|
||
// ref data | ||
path openfold_checkpoint | ||
|
||
output: | ||
path 'output/*', emit: results | ||
|
||
script: | ||
""" | ||
set -euxo pipefail | ||
mkdir -p ./output | ||
tree . | ||
tree ${fasta_dir} | ||
tree ${pdb_mmcif_files} | ||
tree ${openfold_checkpoint} | ||
pushd ${pdb_mmcif_files} | ||
for file in *.tar; do | ||
tar -xf "\$file" | ||
done | ||
popd | ||
tree ${pdb_mmcif_files} | ||
python3 /opt/openfold/run_pretrained_openfold.py \ | ||
${fasta_dir} \ | ||
${pdb_mmcif_files} \ | ||
--use_precomputed_alignments ${alignment_dir} \ | ||
--config_preset model_1_ptm \ | ||
--output_dir ./output \ | ||
--model_device cuda:0 \ | ||
--openfold_checkpoint_path ${openfold_checkpoint}/finetuning_ptm_2.pt | ||
tree . | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
params { | ||
mmseq_db = "s3://{{S3_BUCKET_NAME}}/ref-data/colabfold_envdb/" | ||
uniref90 = "s3://{{S3_BUCKET_NAME}}/ref-data/uniref90/uniref90.fasta" | ||
uniref30 = "s3://{{S3_BUCKET_NAME}}/ref-data/uniref30/" | ||
mgnify = "s3://{{S3_BUCKET_NAME}}/ref-data/mgy/mgy_clusters_2022_05.fa.gz" | ||
pdb70 = "s3://{{S3_BUCKET_NAME}}/ref-data/pdb70/" | ||
uniclust30 = "s3://{{S3_BUCKET_NAME}}/ref-data/uniclust30/uniclust30_2018_08_hhsuite.tar.gz" | ||
bfd = "s3://{{S3_BUCKET_NAME}}/ref-data/bfd/" | ||
openfold_checkpoint = "s3://{{S3_BUCKET_NAME}}/ref-data/openfold/" | ||
pdb_mmcif_files = "s3://{{S3_BUCKET_NAME}}/ref-data/pdb_mmcif/" | ||
} | ||
|
||
process { | ||
withLabel: openfold { container = "{{openfold:latest}}"} | ||
} |
Oops, something went wrong.