Skip to content

Commit

Permalink
v2.12.0
Browse files Browse the repository at this point in the history
  • Loading branch information
brianloyal committed Jan 3, 2025
1 parent 2b12d5c commit fabbd8a
Show file tree
Hide file tree
Showing 33 changed files with 590 additions and 542 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ tmp/
.nextflow*
stack-outputs.json
test_data
linter-rules-for-nextflow
build/cloudformation/packaged.yaml
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

---

## [2.12.0] - 2025-01-03

### 2.12.0 Added

- Added MMseqs2 workflow

### 2.12.0 Changed

- Updated AlphaFold2-Multimer workflow to support multiple input fasta files

---

## [2.11.0] - 2024-12-18

### 2.11.0 Added
Expand Down
68 changes: 47 additions & 21 deletions assets/containers/alphafold-data/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

FROM public.ecr.aws/amazonlinux/amazonlinux:latest as build
FROM public.ecr.aws/amazonlinux/amazonlinux:latest AS build

RUN yum upgrade -y \
&& yum install -y \
Expand All @@ -19,26 +19,52 @@ RUN yum upgrade -y \
wget \
zstd \
&& yum clean all \
&& rm -rf /var/cache/yum \
&& pushd /tmp \
&& git clone https://github.com/soedinglab/hh-suite.git \
&& cd hh-suite && mkdir build && cd build \
&& cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite .. \
&& make -j 4 && make install \
&& popd \
&& pushd /tmp \
&& wget http://msa.sbc.su.se/downloads/kalign/current.tar.gz --no-check-certificate \
&& mkdir -p /tmp/kalign2/build \
&& tar -xvzf current.tar.gz -C /tmp/kalign2 \
&& pushd /tmp/kalign2 \
&& ./configure \
&& make && make install \
&& popd \
&& rm -rf /tmp/kalign2 \
&& popd \
&& mkdir -p /tmp/hmmer && wget -O hmmer.tar.gz http://eddylab.org/software/hmmer/hmmer-3.4.tar.gz \
&& tar xvzf hmmer.tar.gz -C /tmp/hmmer \
&& pushd /tmp/hmmer/hmmer-* \
&& rm -rf /var/cache/yum

# ADD hh-suite.tar.gz /tmp/hh-suite
# RUN pushd /tmp/hh-suite \
# && cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite . \
# && make && make install \
# && popd
RUN pushd /tmp && \
git clone https://github.com/soedinglab/hh-suite.git && \
cd hh-suite && mkdir build && cd build && \
cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite .. && \
make -j 4 && make install && \
popd

# ADD kalign.tar.gz /tmp/kalign-3.4.3
# RUN pushd /tmp/kalign2 \
# && ./configure \
# && make && make install \
# && popd
# RUN pushd /tmp && \
# wget https://github.com/TimoLassmann/kalign/archive/refs/tags/v3.4.0.tar.gz && \
# tar -xvzf v3.4.0.tar.gz && \
# cd kalign-3.4.0 && \
# mkdir build && \
# cd build && \
# cmake3 .. && \
# make -j 4 && make test && \
# make install && \
# popd

# Compile kalign2 from source
RUN pushd /tmp && \
wget http://msa.sbc.su.se/downloads/kalign/current.tar.gz --no-check-certificate \
&& mkdir -p /tmp/kalign2/build \
&& tar -xvzf current.tar.gz -C /tmp/kalign2 \
&& pushd /tmp/kalign2 \
&& ./configure \
&& make && make install \
&& popd \
&& rm -rf /tmp/kalign2 && \
popd

# ADD hmmer.tar.gz /tmp/hmmer
RUN mkdir -p /tmp/hmmer && wget -O hmmer.tar.gz http://eddylab.org/software/hmmer/hmmer-3.4.tar.gz \
&& tar xvzf hmmer.tar.gz -C /tmp/hmmer
RUN pushd /tmp/hmmer/hmmer-* \
&& ./configure \
&& make && make install \
&& popd
Expand Down
52 changes: 40 additions & 12 deletions assets/containers/alphafold-data/update_locations.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,50 @@

from alphafold.data.pipeline_multimer import int_id_to_str_id


def update_locations(target_dir, file_list):
# Example file_lists:
#
# [4ZQK.1_uniref90_hits.sto 4ZQK.2_uniref90_hits.sto]
# [4ZQK.2_mgnify_hits.sto 4ZQK.1_mgnify_hits.sto]
# [4ZQK.1_uniprot_hits.sto 4ZQK.2_uniprot_hits.sto]
# [4ZQK.1_bfd_hits.a3m 4ZQK.2_bfd_hits.a3m]
# [4ZQK.1_pdb_hits.sto 4ZQK.2_pdb_hits.sto]
# or
# 4ZQK_simple.1_uniref90_hits.sto 4ZQK_simple.2_uniref90_hits.sto
# 4ZQK_simple.1_mgnify_hits.sto 4ZQK_simple.2_mgnify_hits.sto
# 4ZQK_simple.2_uniprot_hits.sto 4ZQK_simple.1_uniprot_hits.sto
# 4ZQK_simple.2_bfd_hits.a3m 4ZQK_simple.1_bfd_hits.a3m
# 4ZQK_simple.1_pdb_hits.sto 4ZQK_simple.2_pdb_hits.sto

def strip_suffix_str(s: str, suffix: str):
if s.endswith(suffix):
return s[:-len(suffix)]
return None

# target_dir = msa
def update_locations(target_dir, strip_suffix, file_list):
for filename in file_list:
index, _null, outfile = filename.partition("_")
index = index.split(".")[1]
# filename = 4ZQK_simple.1_uniref90_hits.sto
# strip_suffix = _uniref90_hits.sto

stripped_filename = strip_suffix_str(filename, strip_suffix)
if stripped_filename == None:
raise Exception(f"Suffix {strip_suffix} not in {filename}")

chain = int_id_to_str_id(int(index))
print(f'file: {filename} index: {index} chain: {chain} outfile:{outfile}')
chain = os.path.join(target_dir, chain)
path = pathlib.Path(chain)
# stripped_filename = 4ZQK_simple.1
record_inx = int(stripped_filename[-1]) # 1
outfile = strip_suffix[1:] # uniref90_hits.sto

chain = int_id_to_str_id(record_inx)

if not path.exists():
path.mkdir(parents=True)
shutil.copy(filename, os.path.join(chain, outfile), follow_symlinks=True)
chain_dir_path = pathlib.Path(os.path.join(target_dir, chain))

if not chain_dir_path.exists():
chain_dir_path.mkdir(parents=True)

target = os.path.join(chain_dir_path, outfile)
print(f"COPY {filename} -> {target}")
shutil.copy(filename, target, follow_symlinks=True)


if __name__ == "__main__":
update_locations(sys.argv[1], sys.argv[2:])
update_locations(sys.argv[1], sys.argv[2], sys.argv[3:])
83 changes: 47 additions & 36 deletions assets/containers/alphafold-predict/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
# SPDX-License-Identifier: Apache-2.0

# ARG CUDA=11.1.1
ARG CUDA=12.2.2
# ARG ALPHAFOLD2_VERSION=v2.3.2
ARG ALPHAFOLD2_VERSION=f251de6613cb478207c732bf9627b1e853c99c2f
FROM nvcr.io/nvidia/cuda:${CUDA}-cudnn8-runtime-ubuntu20.04
ARG CUDA=11.6.0
ARG ALPHAFOLD2_VERSION=v2.3.2
FROM nvcr.io/nvidia/cuda:${CUDA}-cudnn8-runtime-ubuntu18.04
# FROM directive resets ARGS, so we specify again (the value is retained if
# previously set).
ARG CUDA
Expand All @@ -15,19 +14,18 @@ ARG ALPHAFOLD2_VERSION
# Use bash to support string substitution.
SHELL ["/bin/bash", "-o", "pipefail", "-c"]

RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
build-essential \
cmake \
cuda-command-line-tools-$(cut -f1,2 -d- <<< ${CUDA//./-}) \
git \
hmmer \
kalign \
tzdata \
wget \
awscli \
jq \
unzip \
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
build-essential \
cmake \
cuda-command-line-tools-$(cut -f1,2 -d- <<< ${CUDA//./-}) \
git \
hmmer \
kalign \
tzdata \
wget \
awscli \
jq \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get autoremove -y \
&& apt-get clean
Expand All @@ -36,7 +34,7 @@ RUN apt-get update \
RUN git clone --branch v3.3.0 https://github.com/soedinglab/hh-suite.git /tmp/hh-suite \
&& mkdir /tmp/hh-suite/build \
&& pushd /tmp/hh-suite/build \
&& cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite .. \
&& cmake -DHAVE_AVX2=1 -DCMAKE_INSTALL_PREFIX=/opt/hhsuite .. \
&& make -j 4 && make install \
&& ln -s /opt/hhsuite/bin/* /usr/bin \
&& popd \
Expand All @@ -50,18 +48,18 @@ RUN wget -q -P /tmp \

# Install conda packages.
ENV PATH="/opt/conda/bin:$PATH"
ENV LD_LIBRARY_PATH="/opt/conda/lib:$LD_LIBRARY_PATH"
RUN conda install -qy conda==24.5.0 pip python=3.11 \
&& conda install -y -c nvidia/label/cuda-${CUDA} cuda \
&& conda install -y -c conda-forge openmm=8.0.0 pdbfixer \
&& conda clean --all --force-pkgs-dirs --yes
# RUN conda install -qy conda==4.13.0
# && conda install -y -c conda-forge
RUN conda install -y -c conda-forge \
openmm=7.5.1 \
cudatoolkit=${CUDA_VERSION} \
pdbfixer=1.7 \
pip \
python=3.9.16 \
&& conda clean --all --force-pkgs-dirs --yes

# Install AlphaFold
RUN wget -q -P /tmp \
https://github.com/google-deepmind/alphafold/archive/${ALPHAFOLD2_VERSION}.zip \
&& mkdir -p /app/alphafold \
&& unzip /tmp/f251de6613cb478207c732bf9627b1e853c99c2f.zip -d /tmp \
&& mv /tmp/alphafold-f251de6613cb478207c732bf9627b1e853c99c2f/* /app/alphafold

RUN git clone --branch ${ALPHAFOLD2_VERSION} --depth 1 https://github.com/deepmind/alphafold.git /app/alphafold

RUN wget -q -P /app/alphafold/alphafold/common/ \
https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt
Expand All @@ -70,20 +68,33 @@ RUN wget -q -P /app/alphafold/alphafold/common/ \
RUN pip3 install --upgrade pip --no-cache-dir \
&& pip3 install -r /app/alphafold/requirements.txt --no-cache-dir \
&& pip3 install --upgrade --no-cache-dir \
jax==0.4.26 \
jaxlib==0.4.26+cuda12.cudnn89 \
jax==0.3.25 \
jaxlib==0.3.25+cuda11.cudnn805 \
-f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html

RUN pip3 install --upgrade --no-cache-dir \
matplotlib==3.9.2
matplotlib==3.6.3 \
numpy==1.24.3

# Apply OpenMM patch.
WORKDIR /opt/conda/lib/python3.9/site-packages
RUN patch -p0 < /app/alphafold/docker/openmm.patch

# Add SETUID bit to the ldconfig binary so that non-root users can run it.
RUN chmod u+s /sbin/ldconfig.real

# Currently needed to avoid undefined_symbol error.
RUN ln -sf /usr/lib/x86_64-linux-gnu/libffi.so.7 /opt/conda/lib/libffi.so.7

# We need to run `ldconfig` first to ensure GPUs are visible, due to some quirk
# with Debian. See https://github.com/NVIDIA/nvidia-docker/issues/1399 for
# details.
# ENTRYPOINT does not support easily running multiple commands, so instead we
# write a shell script to wrap them up.
WORKDIR /app/alphafold
COPY predict.py /app/alphafold/
# COPY run.sh /app/alphafold/run.sh
# RUN echo $'#!/bin/bash\n\
# ldconfig\n\
# python /app/alphafold/run_alphafold.py "$@"' > /app/run_alphafold.sh \
# && chmod +x /app/run_alphafold.sh /app/alphafold/run.sh

ENTRYPOINT []
# ENTRYPOINT ["bash", "/app/alphafold/run.sh"]
ENTRYPOINT ["bash"]
4 changes: 4 additions & 0 deletions assets/containers/mmseqs2/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
FROM ghcr.io/soedinglab/mmseqs2:master-cuda12

# Run with /usr/local/bin/entrypoint
ENTRYPOINT []
28 changes: 13 additions & 15 deletions assets/containers/protein-utils/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,27 +1,25 @@
FROM public.ecr.aws/amazonlinux/amazonlinux:2023 as build
FROM public.ecr.aws/amazonlinux/amazonlinux:2 as build

WORKDIR /home

COPY code /home/putils
COPY requirements.txt /home
COPY code /tmp/putils

# Install python and other dependencies
RUN yum update \
RUN amazon-linux-extras install python3.8 \
&& yum upgrade -y \
&& yum install -y \
python3.11 \
unzip-6.0 \
wget-1.21.3 \
&& python3.11 -m venv /opt/venv \
wget-1.14 \
&& python3.8 -m venv /opt/venv \
&& source /opt/venv/bin/activate \
&& pip install -U pip \
&& pip install -q --no-cache-dir -r /home/requirements.txt \
&& pip install -q --no-cache-dir /home/putils \
&& yum autoremove -y \
&& pip install -q --no-cache-dir \
pandas==2.0.0 \
numpy==1.24.2 \
biopython==1.81 \
/tmp/putils \
&& yum clean all \
&& rm -rf /var/cache/yum
&& rm -rf /var/cache/yum \
&& rm -rf /tmp/putils

ENV VIRTUAL_ENV="/opt/venv"
ENV PATH="$VIRTUAL_ENV/bin:$PATH"

ENTRYPOINT []
WORKDIR /home
1 change: 1 addition & 0 deletions assets/containers/protein-utils/code/resources.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id": "3D06", "seq_length": 200, "seq_count": 1, "template_search_resources": {"vcpu": 2, "memory": "4 GiB", "gpu": "False"}, "feature_gen_resources": {"vcpu": 2, "memory": "4 GiB", "gpu": "False"}, "predict_resources": {"vcpu": 8, "memory": "32 GiB", "gpu": "True"}, "uniref90_msa_resources": {"vcpu": 8, "memory": "16 GiB", "gpu": "False"}, "mgnify_msa_resources": {"vcpu": 8, "memory": "16 GiB", "gpu": "False"}, "bfd_msa_resources": {"vcpu": 16, "memory": "32 GiB", "gpu": "False"}}
1 change: 1 addition & 0 deletions assets/containers/protein-utils/code/seq_info.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id": "2022", "seq_length": "100", "seq_count": "1"}
2 changes: 1 addition & 1 deletion assets/containers/protein-utils/code/setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT-0
# SPDX-License-Identifier: Apache-2.0

from setuptools import setup, find_packages

Expand Down
Loading

0 comments on commit fabbd8a

Please sign in to comment.