Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release for AlphaFold2 Multimer Parallel #29

Open
wants to merge 21 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ tmp/
.nextflow*
stack-outputs.json
test_data
linter-rules-for-nextflow
build/cloudformation/packaged.yaml
68 changes: 47 additions & 21 deletions assets/containers/alphafold-data/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

FROM public.ecr.aws/amazonlinux/amazonlinux:latest as build
FROM public.ecr.aws/amazonlinux/amazonlinux:latest AS build

RUN yum upgrade -y \
&& yum install -y \
Expand All @@ -19,26 +19,52 @@ RUN yum upgrade -y \
wget \
zstd \
&& yum clean all \
&& rm -rf /var/cache/yum \
&& pushd /tmp \
&& git clone https://github.com/soedinglab/hh-suite.git \
&& cd hh-suite && mkdir build && cd build \
&& cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite .. \
&& make -j 4 && make install \
&& popd \
&& pushd /tmp \
&& wget http://msa.sbc.su.se/downloads/kalign/current.tar.gz --no-check-certificate \
&& mkdir -p /tmp/kalign2/build \
&& tar -xvzf current.tar.gz -C /tmp/kalign2 \
&& pushd /tmp/kalign2 \
&& ./configure \
&& make && make install \
&& popd \
&& rm -rf /tmp/kalign2 \
&& popd \
&& mkdir -p /tmp/hmmer && wget -O hmmer.tar.gz http://eddylab.org/software/hmmer/hmmer-3.4.tar.gz \
&& tar xvzf hmmer.tar.gz -C /tmp/hmmer \
&& pushd /tmp/hmmer/hmmer-* \
&& rm -rf /var/cache/yum

# ADD hh-suite.tar.gz /tmp/hh-suite
# RUN pushd /tmp/hh-suite \
# && cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite . \
# && make && make install \
# && popd
RUN pushd /tmp && \
git clone https://github.com/soedinglab/hh-suite.git && \
cd hh-suite && mkdir build && cd build && \
cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite .. && \
make -j 4 && make install && \
popd

# ADD kalign.tar.gz /tmp/kalign-3.4.3
# RUN pushd /tmp/kalign2 \
# && ./configure \
# && make && make install \
# && popd
# RUN pushd /tmp && \
# wget https://github.com/TimoLassmann/kalign/archive/refs/tags/v3.4.0.tar.gz && \
# tar -xvzf v3.4.0.tar.gz && \
# cd kalign-3.4.0 && \
# mkdir build && \
# cd build && \
# cmake3 .. && \
# make -j 4 && make test && \
# make install && \
# popd

# Compile kalign2 from source
RUN pushd /tmp && \
wget http://msa.sbc.su.se/downloads/kalign/current.tar.gz --no-check-certificate \
&& mkdir -p /tmp/kalign2/build \
&& tar -xvzf current.tar.gz -C /tmp/kalign2 \
&& pushd /tmp/kalign2 \
&& ./configure \
&& make && make install \
&& popd \
&& rm -rf /tmp/kalign2 && \
popd

# ADD hmmer.tar.gz /tmp/hmmer
RUN mkdir -p /tmp/hmmer && wget -O hmmer.tar.gz http://eddylab.org/software/hmmer/hmmer-3.4.tar.gz \
&& tar xvzf hmmer.tar.gz -C /tmp/hmmer
RUN pushd /tmp/hmmer/hmmer-* \
&& ./configure \
&& make && make install \
&& popd
Expand Down
33 changes: 23 additions & 10 deletions assets/containers/alphafold-data/update_locations.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,35 @@

from alphafold.data.pipeline_multimer import int_id_to_str_id


# Example file_lists:
#
# [4ZQK.1_uniref90_hits.sto 4ZQK.2_uniref90_hits.sto]
# [4ZQK.2_mgnify_hits.sto 4ZQK.1_mgnify_hits.sto]
# [4ZQK.1_uniprot_hits.sto 4ZQK.2_uniprot_hits.sto]
# [4ZQK.1_bfd_hits.a3m 4ZQK.2_bfd_hits.a3m]
# [4ZQK.1_pdb_hits.sto 4ZQK.2_pdb_hits.sto]

# target_dir = msa
def update_locations(target_dir, file_list):
for filename in file_list:
index, _null, outfile = filename.partition("_")
index = index.split(".")[1]

chain = int_id_to_str_id(int(index))
print(f'file: {filename} index: {index} chain: {chain} outfile:{outfile}')
chain = os.path.join(target_dir, chain)
path = pathlib.Path(chain)
# Indexed format: 5nl6.1_uniref90_hits.sto
# record_id = 5nl6.1
# outfile = uniref90_hits.sto
record_id, _null, outfile = filename.partition("_")
record_inx = int(record_id[-1])

chain = int_id_to_str_id(record_inx)

chain_dir_path = pathlib.Path(os.path.join(target_dir, chain))

if not path.exists():
path.mkdir(parents=True)
shutil.copy(filename, os.path.join(chain, outfile), follow_symlinks=True)
if not chain_dir_path.exists():
chain_dir_path.mkdir(parents=True)

target = os.path.join(chain_dir_path, outfile)
print(f"COPY {filename} -> {target}")
shutil.copy(filename, target, follow_symlinks=True)


if __name__ == "__main__":
update_locations(sys.argv[1], sys.argv[2:])
83 changes: 47 additions & 36 deletions assets/containers/alphafold-predict/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
# SPDX-License-Identifier: Apache-2.0

# ARG CUDA=11.1.1
ARG CUDA=12.2.2
# ARG ALPHAFOLD2_VERSION=v2.3.2
ARG ALPHAFOLD2_VERSION=f251de6613cb478207c732bf9627b1e853c99c2f
FROM nvcr.io/nvidia/cuda:${CUDA}-cudnn8-runtime-ubuntu20.04
ARG CUDA=11.6.0
ARG ALPHAFOLD2_VERSION=v2.3.2
FROM nvcr.io/nvidia/cuda:${CUDA}-cudnn8-runtime-ubuntu18.04
# FROM directive resets ARGS, so we specify again (the value is retained if
# previously set).
ARG CUDA
Expand All @@ -15,19 +14,18 @@ ARG ALPHAFOLD2_VERSION
# Use bash to support string substitution.
SHELL ["/bin/bash", "-o", "pipefail", "-c"]

RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
build-essential \
cmake \
cuda-command-line-tools-$(cut -f1,2 -d- <<< ${CUDA//./-}) \
git \
hmmer \
kalign \
tzdata \
wget \
awscli \
jq \
unzip \
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
build-essential \
cmake \
cuda-command-line-tools-$(cut -f1,2 -d- <<< ${CUDA//./-}) \
git \
hmmer \
kalign \
tzdata \
wget \
awscli \
jq \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get autoremove -y \
&& apt-get clean
Expand All @@ -36,7 +34,7 @@ RUN apt-get update \
RUN git clone --branch v3.3.0 https://github.com/soedinglab/hh-suite.git /tmp/hh-suite \
&& mkdir /tmp/hh-suite/build \
&& pushd /tmp/hh-suite/build \
&& cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite .. \
&& cmake -DHAVE_AVX2=1 -DCMAKE_INSTALL_PREFIX=/opt/hhsuite .. \
&& make -j 4 && make install \
&& ln -s /opt/hhsuite/bin/* /usr/bin \
&& popd \
Expand All @@ -50,18 +48,18 @@ RUN wget -q -P /tmp \

# Install conda packages.
ENV PATH="/opt/conda/bin:$PATH"
ENV LD_LIBRARY_PATH="/opt/conda/lib:$LD_LIBRARY_PATH"
RUN conda install -qy conda==24.5.0 pip python=3.11 \
&& conda install -y -c nvidia/label/cuda-${CUDA} cuda \
&& conda install -y -c conda-forge openmm=8.0.0 pdbfixer \
&& conda clean --all --force-pkgs-dirs --yes
# RUN conda install -qy conda==4.13.0
# && conda install -y -c conda-forge
RUN conda install -y -c conda-forge \
openmm=7.5.1 \
cudatoolkit=${CUDA_VERSION} \
pdbfixer=1.7 \
pip \
python=3.9.16 \
&& conda clean --all --force-pkgs-dirs --yes

# Install AlphaFold
RUN wget -q -P /tmp \
https://github.com/google-deepmind/alphafold/archive/${ALPHAFOLD2_VERSION}.zip \
&& mkdir -p /app/alphafold \
&& unzip /tmp/f251de6613cb478207c732bf9627b1e853c99c2f.zip -d /tmp \
&& mv /tmp/alphafold-f251de6613cb478207c732bf9627b1e853c99c2f/* /app/alphafold

RUN git clone --branch ${ALPHAFOLD2_VERSION} --depth 1 https://github.com/deepmind/alphafold.git /app/alphafold

RUN wget -q -P /app/alphafold/alphafold/common/ \
https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt
Expand All @@ -70,20 +68,33 @@ RUN wget -q -P /app/alphafold/alphafold/common/ \
RUN pip3 install --upgrade pip --no-cache-dir \
&& pip3 install -r /app/alphafold/requirements.txt --no-cache-dir \
&& pip3 install --upgrade --no-cache-dir \
jax==0.4.26 \
jaxlib==0.4.26+cuda12.cudnn89 \
jax==0.3.25 \
jaxlib==0.3.25+cuda11.cudnn805 \
-f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html

RUN pip3 install --upgrade --no-cache-dir \
matplotlib==3.9.2
matplotlib==3.6.3 \
numpy==1.24.3

# Apply OpenMM patch.
WORKDIR /opt/conda/lib/python3.9/site-packages
RUN patch -p0 < /app/alphafold/docker/openmm.patch

# Add SETUID bit to the ldconfig binary so that non-root users can run it.
RUN chmod u+s /sbin/ldconfig.real

# Currently needed to avoid undefined_symbol error.
RUN ln -sf /usr/lib/x86_64-linux-gnu/libffi.so.7 /opt/conda/lib/libffi.so.7

# We need to run `ldconfig` first to ensure GPUs are visible, due to some quirk
# with Debian. See https://github.com/NVIDIA/nvidia-docker/issues/1399 for
# details.
# ENTRYPOINT does not support easily running multiple commands, so instead we
# write a shell script to wrap them up.
WORKDIR /app/alphafold
COPY predict.py /app/alphafold/
# COPY run.sh /app/alphafold/run.sh
# RUN echo $'#!/bin/bash\n\
# ldconfig\n\
# python /app/alphafold/run_alphafold.py "$@"' > /app/run_alphafold.sh \
# && chmod +x /app/run_alphafold.sh /app/alphafold/run.sh

ENTRYPOINT []
# ENTRYPOINT ["bash", "/app/alphafold/run.sh"]
ENTRYPOINT ["bash"]
28 changes: 13 additions & 15 deletions assets/containers/protein-utils/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,27 +1,25 @@
FROM public.ecr.aws/amazonlinux/amazonlinux:2023 as build
FROM public.ecr.aws/amazonlinux/amazonlinux:2 as build

WORKDIR /home

COPY code /home/putils
COPY requirements.txt /home
COPY code /tmp/putils

# Install python and other dependencies
RUN yum update \
RUN amazon-linux-extras install python3.8 \
&& yum upgrade -y \
&& yum install -y \
python3.11 \
unzip-6.0 \
wget-1.21.3 \
&& python3.11 -m venv /opt/venv \
wget-1.14 \
&& python3.8 -m venv /opt/venv \
&& source /opt/venv/bin/activate \
&& pip install -U pip \
&& pip install -q --no-cache-dir -r /home/requirements.txt \
&& pip install -q --no-cache-dir /home/putils \
&& yum autoremove -y \
&& pip install -q --no-cache-dir \
pandas==2.0.0 \
numpy==1.24.2 \
biopython==1.81 \
/tmp/putils \
&& yum clean all \
&& rm -rf /var/cache/yum
&& rm -rf /var/cache/yum \
&& rm -rf /tmp/putils

ENV VIRTUAL_ENV="/opt/venv"
ENV PATH="$VIRTUAL_ENV/bin:$PATH"

ENTRYPOINT []
WORKDIR /home
1 change: 1 addition & 0 deletions assets/containers/protein-utils/code/resources.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id": "3D06", "seq_length": 200, "seq_count": 1, "template_search_resources": {"vcpu": 2, "memory": "4 GiB", "gpu": "False"}, "feature_gen_resources": {"vcpu": 2, "memory": "4 GiB", "gpu": "False"}, "predict_resources": {"vcpu": 8, "memory": "32 GiB", "gpu": "True"}, "uniref90_msa_resources": {"vcpu": 8, "memory": "16 GiB", "gpu": "False"}, "mgnify_msa_resources": {"vcpu": 8, "memory": "16 GiB", "gpu": "False"}, "bfd_msa_resources": {"vcpu": 16, "memory": "32 GiB", "gpu": "False"}}
1 change: 1 addition & 0 deletions assets/containers/protein-utils/code/seq_info.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id": "2022", "seq_length": "100", "seq_count": "1"}
2 changes: 1 addition & 1 deletion assets/containers/protein-utils/code/setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT-0
# SPDX-License-Identifier: Apache-2.0

from setuptools import setup, find_packages

Expand Down
Loading