From fabbd8a23a6525204898e5ea178ee9d558829e1d Mon Sep 17 00:00:00 2001
From: Brian Loyal <bloyal@amazon.com>
Date: Fri, 3 Jan 2025 14:24:42 -0600
Subject: [PATCH] v2.12.0

---
 .gitignore                                    |   2 +
 CHANGELOG.md                                  |  12 +
 assets/containers/alphafold-data/Dockerfile   |  68 ++++--
 .../alphafold-data/update_locations.py        |  52 ++++-
 .../containers/alphafold-predict/Dockerfile   |  83 ++++---
 assets/containers/mmseqs2/Dockerfile          |   4 +
 assets/containers/protein-utils/Dockerfile    |  28 ++-
 .../protein-utils/code/resources.json         |   1 +
 .../protein-utils/code/seq_info.json          |   1 +
 assets/containers/protein-utils/code/setup.py |   2 +-
 .../src/putils/check_and_validate_inputs.py   |  45 ++--
 .../code/src/putils/split_fasta.py            | 221 ------------------
 .../containers/protein-utils/requirements.txt |   7 -
 assets/data/swissprot.txt                     |   1 +
 assets/data/uniref100.txt                     |   1 +
 assets/data/uniref50.txt                      |   1 +
 assets/workflows/abodybuilder3/main.nf        |   3 +-
 .../workflows/abodybuilder3/nextflow.config   |   2 +-
 .../workflows/alphafold2-multimer/README.md   |  33 +--
 .../alphafold2-multimer/build_containers.sh   |  27 +++
 .../workflows/alphafold2-multimer/config.yaml |   3 -
 assets/workflows/alphafold2-multimer/main.nf  | 209 ++++++++++-------
 .../alphafold2-multimer/nextflow.config       |   6 +-
 .../parameter-template.json                   |   6 +
 .../workflows/alphafold2-multimer/params.json |   3 +
 .../workflows/alphafold2-multimer/searches.nf | 160 +++++++------
 .../workflows/alphafold2-multimer/unpack.nf   |  20 +-
 assets/workflows/mmseqs2/README.md            |   5 +
 assets/workflows/mmseqs2/config.yaml          |  15 ++
 assets/workflows/mmseqs2/main.nf              |  85 +++++++
 assets/workflows/mmseqs2/nextflow.config      |  12 +
 build/buildspec/buildspec_data.yaml           |   8 +-
 scripts/testrun.sh                            |   6 +-
 33 files changed, 590 insertions(+), 542 deletions(-)
 create mode 100644 assets/containers/mmseqs2/Dockerfile
 create mode 100644 assets/containers/protein-utils/code/resources.json
 create mode 100644 assets/containers/protein-utils/code/seq_info.json
 delete mode 100644 assets/containers/protein-utils/code/src/putils/split_fasta.py
 delete mode 100644 assets/containers/protein-utils/requirements.txt
 create mode 100644 assets/data/swissprot.txt
 create mode 100644 assets/data/uniref100.txt
 create mode 100644 assets/data/uniref50.txt
 create mode 100755 assets/workflows/alphafold2-multimer/build_containers.sh
 create mode 100644 assets/workflows/alphafold2-multimer/parameter-template.json
 create mode 100644 assets/workflows/alphafold2-multimer/params.json
 create mode 100644 assets/workflows/mmseqs2/README.md
 create mode 100644 assets/workflows/mmseqs2/config.yaml
 create mode 100644 assets/workflows/mmseqs2/main.nf
 create mode 100644 assets/workflows/mmseqs2/nextflow.config

diff --git a/.gitignore b/.gitignore
index 46fe94f..ad4468b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,5 @@ tmp/
 .nextflow*
 stack-outputs.json
 test_data
+linter-rules-for-nextflow
+build/cloudformation/packaged.yaml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0acd5fc..50b9c76 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ---
 
+## [2.12.0] - 2025-01-03
+
+### 2.12.0 Added
+
+- Added MMseqs2 workflow
+
+### 2.12.0 Changed
+
+- Updated AlphaFold2-Multimer workflow to support multiple input fasta files
+
+---
+
 ## [2.11.0] - 2024-12-18
 
 ### 2.11.0 Added
diff --git a/assets/containers/alphafold-data/Dockerfile b/assets/containers/alphafold-data/Dockerfile
index 19970db..4eaeca7 100644
--- a/assets/containers/alphafold-data/Dockerfile
+++ b/assets/containers/alphafold-data/Dockerfile
@@ -1,7 +1,7 @@
 # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0
 
-FROM public.ecr.aws/amazonlinux/amazonlinux:latest as build
+FROM public.ecr.aws/amazonlinux/amazonlinux:latest AS build
 
 RUN yum upgrade -y \
   && yum install -y \
@@ -19,26 +19,52 @@ RUN yum upgrade -y \
   wget \
   zstd \
   && yum clean all \
-  && rm -rf /var/cache/yum \
-  && pushd /tmp \
-  && git clone https://github.com/soedinglab/hh-suite.git \
-  && cd hh-suite && mkdir build && cd build \
-  && cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite .. \
-  && make -j 4 && make install \
-  && popd \
-  && pushd /tmp \
-  && wget http://msa.sbc.su.se/downloads/kalign/current.tar.gz --no-check-certificate \
-  && mkdir -p /tmp/kalign2/build \
-  && tar -xvzf current.tar.gz -C /tmp/kalign2 \
-  && pushd /tmp/kalign2 \
-  && ./configure \
-  && make && make install \
-  && popd \
-  && rm -rf /tmp/kalign2 \
-  && popd \
-  && mkdir -p /tmp/hmmer && wget -O hmmer.tar.gz  http://eddylab.org/software/hmmer/hmmer-3.4.tar.gz \
-  && tar xvzf hmmer.tar.gz -C /tmp/hmmer \
-  && pushd /tmp/hmmer/hmmer-* \
+  && rm -rf /var/cache/yum
+
+# ADD hh-suite.tar.gz /tmp/hh-suite
+# RUN pushd /tmp/hh-suite \
+#   && cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite . \
+#   && make && make install \
+#   && popd
+RUN pushd /tmp && \
+git clone https://github.com/soedinglab/hh-suite.git && \
+cd hh-suite && mkdir build && cd build && \
+cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite .. && \
+make -j 4 && make install && \
+popd
+
+# ADD kalign.tar.gz /tmp/kalign-3.4.3
+# RUN pushd /tmp/kalign2 \
+#   && ./configure \
+#   && make && make install \
+#   && popd
+# RUN pushd /tmp && \
+# wget https://github.com/TimoLassmann/kalign/archive/refs/tags/v3.4.0.tar.gz && \
+# tar -xvzf v3.4.0.tar.gz && \
+# cd kalign-3.4.0 && \
+# mkdir build && \
+# cd build && \
+# cmake3 .. && \
+# make -j 4 && make test && \
+# make install && \
+# popd
+
+# Compile kalign2 from source
+RUN pushd /tmp && \
+wget http://msa.sbc.su.se/downloads/kalign/current.tar.gz --no-check-certificate \
+&& mkdir -p /tmp/kalign2/build \
+&& tar -xvzf current.tar.gz -C /tmp/kalign2 \
+&& pushd /tmp/kalign2 \
+&& ./configure \
+&& make && make install \
+&& popd \
+&& rm -rf /tmp/kalign2 && \
+popd
+
+# ADD hmmer.tar.gz /tmp/hmmer
+RUN mkdir -p /tmp/hmmer && wget -O hmmer.tar.gz  http://eddylab.org/software/hmmer/hmmer-3.4.tar.gz \
+&& tar xvzf hmmer.tar.gz -C /tmp/hmmer
+RUN pushd /tmp/hmmer/hmmer-* \
   && ./configure \
   && make && make install \
   && popd
diff --git a/assets/containers/alphafold-data/update_locations.py b/assets/containers/alphafold-data/update_locations.py
index 35cfa43..8a76baf 100644
--- a/assets/containers/alphafold-data/update_locations.py
+++ b/assets/containers/alphafold-data/update_locations.py
@@ -8,22 +8,50 @@
 
 from alphafold.data.pipeline_multimer import int_id_to_str_id
 
-
-def update_locations(target_dir, file_list):
+# Example file_lists:
+# 
+# [4ZQK.1_uniref90_hits.sto 4ZQK.2_uniref90_hits.sto]
+# [4ZQK.2_mgnify_hits.sto 4ZQK.1_mgnify_hits.sto]
+# [4ZQK.1_uniprot_hits.sto 4ZQK.2_uniprot_hits.sto]
+# [4ZQK.1_bfd_hits.a3m 4ZQK.2_bfd_hits.a3m]
+# [4ZQK.1_pdb_hits.sto 4ZQK.2_pdb_hits.sto]
+# or
+# 4ZQK_simple.1_uniref90_hits.sto 4ZQK_simple.2_uniref90_hits.sto
+# 4ZQK_simple.1_mgnify_hits.sto 4ZQK_simple.2_mgnify_hits.sto
+# 4ZQK_simple.2_uniprot_hits.sto 4ZQK_simple.1_uniprot_hits.sto
+# 4ZQK_simple.2_bfd_hits.a3m 4ZQK_simple.1_bfd_hits.a3m
+# 4ZQK_simple.1_pdb_hits.sto 4ZQK_simple.2_pdb_hits.sto
+
+def strip_suffix_str(s: str, suffix: str):
+    if s.endswith(suffix):
+        return s[:-len(suffix)]
+    return None
+
+# target_dir = msa
+def update_locations(target_dir, strip_suffix, file_list):
     for filename in file_list:
-        index, _null, outfile = filename.partition("_")
-        index = index.split(".")[1]
+        # filename = 4ZQK_simple.1_uniref90_hits.sto
+        # strip_suffix = _uniref90_hits.sto
+
+        stripped_filename = strip_suffix_str(filename, strip_suffix)
+        if stripped_filename == None:
+            raise Exception(f"Suffix {strip_suffix} not in {filename}")
 
-        chain = int_id_to_str_id(int(index))
-        print(f'file: {filename} index: {index} chain: {chain} outfile:{outfile}')
-        chain = os.path.join(target_dir, chain)
-        path = pathlib.Path(chain)
+        # stripped_filename = 4ZQK_simple.1
+        record_inx = int(stripped_filename[-1]) # 1
+        outfile = strip_suffix[1:] # uniref90_hits.sto
 
+        chain = int_id_to_str_id(record_inx)
 
-        if not path.exists():
-            path.mkdir(parents=True)
-        shutil.copy(filename, os.path.join(chain, outfile), follow_symlinks=True)
+        chain_dir_path = pathlib.Path(os.path.join(target_dir, chain))
+
+        if not chain_dir_path.exists():
+            chain_dir_path.mkdir(parents=True)
         
+        target = os.path.join(chain_dir_path, outfile)
+        print(f"COPY {filename} -> {target}")
+        shutil.copy(filename, target, follow_symlinks=True)
+
 
 if __name__ == "__main__":
-    update_locations(sys.argv[1], sys.argv[2:])
+    update_locations(sys.argv[1], sys.argv[2], sys.argv[3:])
diff --git a/assets/containers/alphafold-predict/Dockerfile b/assets/containers/alphafold-predict/Dockerfile
index 30644af..cc483f5 100644
--- a/assets/containers/alphafold-predict/Dockerfile
+++ b/assets/containers/alphafold-predict/Dockerfile
@@ -3,10 +3,9 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # ARG CUDA=11.1.1
-ARG CUDA=12.2.2
-# ARG ALPHAFOLD2_VERSION=v2.3.2
-ARG ALPHAFOLD2_VERSION=f251de6613cb478207c732bf9627b1e853c99c2f
-FROM nvcr.io/nvidia/cuda:${CUDA}-cudnn8-runtime-ubuntu20.04
+ARG CUDA=11.6.0
+ARG ALPHAFOLD2_VERSION=v2.3.2
+FROM nvcr.io/nvidia/cuda:${CUDA}-cudnn8-runtime-ubuntu18.04
 # FROM directive resets ARGS, so we specify again (the value is retained if
 # previously set).
 ARG CUDA
@@ -15,19 +14,18 @@ ARG ALPHAFOLD2_VERSION
 # Use bash to support string substitution.
 SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 
-RUN apt-get update \
-  && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
-  build-essential \
-  cmake \
-  cuda-command-line-tools-$(cut -f1,2 -d- <<< ${CUDA//./-}) \
-  git \
-  hmmer \
-  kalign \
-  tzdata \
-  wget \
-  awscli \
-  jq \
-  unzip \
+RUN apt-get update \ 
+    && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
+    build-essential \
+    cmake \
+    cuda-command-line-tools-$(cut -f1,2 -d- <<< ${CUDA//./-}) \
+    git \
+    hmmer \
+    kalign \
+    tzdata \
+    wget \
+    awscli \
+    jq \
   && rm -rf /var/lib/apt/lists/* \
   && apt-get autoremove -y \
   && apt-get clean
@@ -36,7 +34,7 @@ RUN apt-get update \
 RUN git clone --branch v3.3.0 https://github.com/soedinglab/hh-suite.git /tmp/hh-suite \
   && mkdir /tmp/hh-suite/build \
   && pushd /tmp/hh-suite/build \
-  && cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite .. \
+  && cmake -DHAVE_AVX2=1 -DCMAKE_INSTALL_PREFIX=/opt/hhsuite .. \
   && make -j 4 && make install \
   && ln -s /opt/hhsuite/bin/* /usr/bin \
   && popd \
@@ -50,18 +48,18 @@ RUN wget -q -P /tmp \
 
 # Install conda packages.
 ENV PATH="/opt/conda/bin:$PATH"
-ENV LD_LIBRARY_PATH="/opt/conda/lib:$LD_LIBRARY_PATH"
-RUN conda install -qy conda==24.5.0 pip python=3.11 \
-    && conda install -y -c nvidia/label/cuda-${CUDA} cuda \
-    && conda install -y -c conda-forge openmm=8.0.0 pdbfixer \
-    && conda clean --all --force-pkgs-dirs --yes
+# RUN conda install -qy conda==4.13.0 
+#   && conda install -y -c conda-forge
+RUN conda install -y -c conda-forge \
+    openmm=7.5.1 \    
+    cudatoolkit=${CUDA_VERSION} \
+    pdbfixer=1.7 \
+    pip \
+    python=3.9.16 \
+  && conda clean --all --force-pkgs-dirs --yes
   
-# Install AlphaFold
-RUN wget -q -P /tmp \
-    https://github.com/google-deepmind/alphafold/archive/${ALPHAFOLD2_VERSION}.zip \
-    && mkdir -p /app/alphafold \
-    && unzip /tmp/f251de6613cb478207c732bf9627b1e853c99c2f.zip -d /tmp \
-    && mv /tmp/alphafold-f251de6613cb478207c732bf9627b1e853c99c2f/* /app/alphafold
+
+RUN git clone --branch ${ALPHAFOLD2_VERSION} --depth 1 https://github.com/deepmind/alphafold.git /app/alphafold
 
 RUN wget -q -P /app/alphafold/alphafold/common/ \
     https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt
@@ -70,20 +68,33 @@ RUN wget -q -P /app/alphafold/alphafold/common/ \
 RUN pip3 install --upgrade pip --no-cache-dir \
     && pip3 install -r /app/alphafold/requirements.txt --no-cache-dir \
     && pip3 install --upgrade --no-cache-dir \
-      jax==0.4.26 \
-      jaxlib==0.4.26+cuda12.cudnn89 \
+      jax==0.3.25 \
+      jaxlib==0.3.25+cuda11.cudnn805 \
       -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
 
 RUN pip3 install --upgrade --no-cache-dir \
-      matplotlib==3.9.2
+      matplotlib==3.6.3 \
+      numpy==1.24.3
+
+# Apply OpenMM patch.
+WORKDIR /opt/conda/lib/python3.9/site-packages
+RUN patch -p0 < /app/alphafold/docker/openmm.patch
 
 # Add SETUID bit to the ldconfig binary so that non-root users can run it.
 RUN chmod u+s /sbin/ldconfig.real
 
-# Currently needed to avoid undefined_symbol error.
-RUN ln -sf /usr/lib/x86_64-linux-gnu/libffi.so.7 /opt/conda/lib/libffi.so.7
-
+# We need to run `ldconfig` first to ensure GPUs are visible, due to some quirk
+# with Debian. See https://github.com/NVIDIA/nvidia-docker/issues/1399 for
+# details.
+# ENTRYPOINT does not support easily running multiple commands, so instead we
+# write a shell script to wrap them up.
 WORKDIR /app/alphafold
 COPY predict.py /app/alphafold/
+# COPY run.sh /app/alphafold/run.sh
+# RUN echo $'#!/bin/bash\n\
+# ldconfig\n\
+# python /app/alphafold/run_alphafold.py "$@"' > /app/run_alphafold.sh \
+#   && chmod +x /app/run_alphafold.sh /app/alphafold/run.sh
 
-ENTRYPOINT []
+# ENTRYPOINT ["bash", "/app/alphafold/run.sh"]
+ENTRYPOINT ["bash"]
diff --git a/assets/containers/mmseqs2/Dockerfile b/assets/containers/mmseqs2/Dockerfile
new file mode 100644
index 0000000..4e862f4
--- /dev/null
+++ b/assets/containers/mmseqs2/Dockerfile
@@ -0,0 +1,4 @@
+FROM ghcr.io/soedinglab/mmseqs2:master-cuda12
+
+# Run with /usr/local/bin/entrypoint
+ENTRYPOINT []
\ No newline at end of file
diff --git a/assets/containers/protein-utils/Dockerfile b/assets/containers/protein-utils/Dockerfile
index 4ccee87..32eb420 100644
--- a/assets/containers/protein-utils/Dockerfile
+++ b/assets/containers/protein-utils/Dockerfile
@@ -1,27 +1,25 @@
-FROM public.ecr.aws/amazonlinux/amazonlinux:2023 as build
+FROM public.ecr.aws/amazonlinux/amazonlinux:2 as build
 
-WORKDIR /home
-
-COPY code /home/putils
-COPY requirements.txt /home
+COPY code /tmp/putils
 
 # Install python and other dependencies
-RUN yum update \
+RUN amazon-linux-extras install python3.8 \
   && yum upgrade -y \
   && yum install -y \
-  python3.11 \
   unzip-6.0 \
-  wget-1.21.3 \
-  && python3.11 -m venv /opt/venv \
+  wget-1.14 \
+  && python3.8 -m venv /opt/venv \
   && source /opt/venv/bin/activate \
-  && pip install -U pip \
-  && pip install -q --no-cache-dir -r /home/requirements.txt \
-  && pip install -q --no-cache-dir /home/putils \
-  && yum autoremove -y \
+  && pip install -q --no-cache-dir \
+  pandas==2.0.0 \
+  numpy==1.24.2 \
+  biopython==1.81 \
+  /tmp/putils \
   && yum clean all \
-  && rm -rf /var/cache/yum
+  && rm -rf /var/cache/yum \
+  && rm -rf /tmp/putils
 
 ENV VIRTUAL_ENV="/opt/venv"
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 
-ENTRYPOINT []
\ No newline at end of file
+WORKDIR /home
\ No newline at end of file
diff --git a/assets/containers/protein-utils/code/resources.json b/assets/containers/protein-utils/code/resources.json
new file mode 100644
index 0000000..a69de4e
--- /dev/null
+++ b/assets/containers/protein-utils/code/resources.json
@@ -0,0 +1 @@
+{"id": "3D06", "seq_length": 200, "seq_count": 1, "template_search_resources": {"vcpu": 2, "memory": "4 GiB", "gpu": "False"}, "feature_gen_resources": {"vcpu": 2, "memory": "4 GiB", "gpu": "False"}, "predict_resources": {"vcpu": 8, "memory": "32 GiB", "gpu": "True"}, "uniref90_msa_resources": {"vcpu": 8, "memory": "16 GiB", "gpu": "False"}, "mgnify_msa_resources": {"vcpu": 8, "memory": "16 GiB", "gpu": "False"}, "bfd_msa_resources": {"vcpu": 16, "memory": "32 GiB", "gpu": "False"}}
\ No newline at end of file
diff --git a/assets/containers/protein-utils/code/seq_info.json b/assets/containers/protein-utils/code/seq_info.json
new file mode 100644
index 0000000..9104a31
--- /dev/null
+++ b/assets/containers/protein-utils/code/seq_info.json
@@ -0,0 +1 @@
+{"id": "2022", "seq_length": "100", "seq_count": "1"}
\ No newline at end of file
diff --git a/assets/containers/protein-utils/code/setup.py b/assets/containers/protein-utils/code/setup.py
index de9ab47..852f912 100644
--- a/assets/containers/protein-utils/code/setup.py
+++ b/assets/containers/protein-utils/code/setup.py
@@ -1,5 +1,5 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-# SPDX-License-Identifier: MIT-0
+# SPDX-License-Identifier: Apache-2.0
 
 from setuptools import setup, find_packages
 
diff --git a/assets/containers/protein-utils/code/src/putils/check_and_validate_inputs.py b/assets/containers/protein-utils/code/src/putils/check_and_validate_inputs.py
index 3828a00..fd735d4 100644
--- a/assets/containers/protein-utils/code/src/putils/check_and_validate_inputs.py
+++ b/assets/containers/protein-utils/code/src/putils/check_and_validate_inputs.py
@@ -1,9 +1,7 @@
 import argparse
-import logging
-# from numpy.polynomial import Polynomial
 from Bio import SeqIO
 import json
-import re
+import logging
 
 logging.basicConfig(
     format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
@@ -11,26 +9,22 @@
     level=logging.INFO,
 )
 
+
 def write_seq_file(seq, filename):
     with open(filename, "w") as out_fh:
         SeqIO.write(seq, out_fh, "fasta")
 
-def split_and_get_sequence_metrics(target_id, seq_list, output_prefix):
+
+def split_and_get_sequence_metrics(seq_list, output_prefix="input"):
     seq_length = 0
     seq_count = 0
     total_length = 0
 
-    if output_prefix:
-        output_prefix = output_prefix + "_"
-    else:
-        output_prefix = "input_"
-
     for seq_record in seq_list:
         seq_length += len(seq_record.seq)
         seq_count += 1
-        # id = seq_record.id
 
-    write_seq_file(seq_list, "inputs.fasta")
+    write_seq_file(seq_list, f"{output_prefix}.fasta")
 
     total_length += seq_length
     return seq_count, total_length
@@ -40,46 +34,37 @@ def check_inputs(target_id, fasta_path, output_prefix):
     with open(fasta_path, "r") as in_fh:
         seq_list = list(SeqIO.parse(in_fh, "fasta"))
 
-    seq_count, total_length = split_and_get_sequence_metrics(target_id, seq_list, output_prefix)
+    seq_count, total_length = split_and_get_sequence_metrics(seq_list, output_prefix)
 
     seq_info = {
         "target_id": str(target_id),
         "total_length": str(total_length),
-        "seq_count": str(seq_count)
+        "seq_count": str(seq_count),
     }
 
-    # write the sequence info to a json file      
+    # write the sequence info to a json file
     with open("seq_info.json", "w") as out_fh:
         json.dump(seq_info, out_fh)
-    # return seq_info
-    # return f'{total_length}\n{seq_count}\n'
     return total_length
 
 
 if __name__ == "__main__":
-
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        "--target_id",
-        help="The ID of the target",
-        type=str,
-        required=True
-    ) 
+        "--target_id", help="The ID of the target", type=str, required=True
+    )
 
     parser.add_argument(
-        "--fasta_path",
-        help="Path to input FASTA file",
-        type=str,
-        required=True
-    ) 
+        "--fasta_path", help="Path to input FASTA file", type=str, required=True
+    )
     parser.add_argument(
         "--output_prefix",
         help="(Optional) file name prefix for the sequence files",
-        default=None,
+        default="input",
         type=str,
-        required=False
+        required=False,
     )
 
     args = parser.parse_args()
     output = check_inputs(args.target_id, args.fasta_path, args.output_prefix)
-    print(output)
+    print(f"Total length is {output}")
diff --git a/assets/containers/protein-utils/code/src/putils/split_fasta.py b/assets/containers/protein-utils/code/src/putils/split_fasta.py
deleted file mode 100644
index a5a8396..0000000
--- a/assets/containers/protein-utils/code/src/putils/split_fasta.py
+++ /dev/null
@@ -1,221 +0,0 @@
-import argparse
-import logging
-import os
-import pyfastx
-import random
-import shutil
-import tempfile
-import tqdm
-from urllib.parse import urlparse
-
-logging.basicConfig(
-    format="%(asctime)s - %(levelname)s - %(message)s",
-    datefmt="%m/%d/%Y %H:%M:%S",
-    level=logging.INFO,
-)
-
-
-def parse_args():
-    """Parse the arguments."""
-    logging.info("Parsing arguments")
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument(
-        "source",
-        type=str,
-        help="Path to input .fasta or .fasta.gz file, e.g. s3://myfasta.fa, http://myfasta.fasta.gz, ~/myfasta.fasta, etc",
-    )
-
-    parser.add_argument(
-        "--max_records_per_partition",
-        type=int,
-        default=2000000,
-        help="Max number of sequence records per csv partition",
-    )
-    parser.add_argument(
-        "--output_dir",
-        type=str,
-        default=os.getcwd(),
-        help="Output dir for processed files",
-    )
-    parser.add_argument(
-        "--save_csv",
-        "-c",
-        action="store_true",
-        default=False,
-        help="Save csv files to output dir?",
-    )
-    parser.add_argument(
-        "-f",
-        "--save_fasta",
-        action="store_true",
-        default=False,
-        help="Save FASTA file to output dir?",
-    )
-    parser.add_argument(
-        "--shuffle",
-        "-s",
-        action="store_true",
-        default=True,
-        help="Shuffle the records in each csv partition?",
-    )
-
-    args, _ = parser.parse_known_args()
-    return args
-
-
-def main(args):
-    """Transform fasta file into dataset"""
-
-    if not os.path.exists(args.output_dir):
-        os.makedirs(args.output_dir)
-
-    tmp_dir = tempfile.TemporaryDirectory(dir=os.getcwd())
-    input_file = os.path.join(tmp_dir.name, "input.fa")
-    input_path = download(args.source, input_file)
-
-    output_path = split_fasta(
-        fasta_file=input_path,
-        output_dir=args.output_dir,
-        max_records_per_partition=args.max_records_per_partition,
-        shuffle=args.shuffle,
-        save_fasta=args.save_fasta,
-        save_csv=args.save_csv,
-    )
-
-    tmp_dir.cleanup()
-    logging.info(f"Files saved to {args.output_dir}")
-
-    return output_path
-
-
-def download(source: str, filename: str) -> str:
-    output_dir = os.path.dirname(filename)
-    if not os.path.exists(output_dir):
-        os.makedirs(output_dir)
-
-    if source.startswith("s3"):
-        import boto3
-
-        logging.info(f"Downloading {source} to {filename}")
-        s3 = boto3.client("s3")
-        parsed = urlparse(source, allow_fragments=False)
-        bucket = parsed.netloc
-        key = parsed.path[1:]
-        total = s3.head_object(Bucket=bucket, Key=key)["ContentLength"]
-        tqdm_params = {
-            "desc": source,
-            "total": total,
-            "miniters": 1,
-            "unit": "B",
-            "unit_scale": True,
-            "unit_divisor": 1024,
-        }
-        with tqdm.tqdm(**tqdm_params) as pb:
-            s3.download_file(
-                parsed.netloc,
-                parsed.path[1:],
-                filename,
-                Callback=lambda bytes_transferred: pb.update(bytes_transferred),
-            )
-    elif source.startswith("http"):
-        import requests
-
-        logging.info(f"Downloading {source} to {filename}")
-
-        with open(filename, "wb") as f:
-            with requests.get(source, stream=True, timeout=60) as r:
-                r.raise_for_status()
-                total = int(r.headers.get("content-length", 0))
-
-                tqdm_params = {
-                    "desc": source,
-                    "total": total,
-                    "miniters": 1,
-                    "unit": "B",
-                    "unit_scale": True,
-                    "unit_divisor": 1024,
-                }
-                with tqdm.tqdm(**tqdm_params) as pb:
-                    for chunk in r.iter_content(chunk_size=8192):
-                        pb.update(len(chunk))
-                        f.write(chunk)
-    elif os.path.isfile(source):
-        logging.info(f"Copying {source} to {filename}")
-        shutil.copyfile(source, filename)
-    else:
-        raise ValueError(f"Invalid source: {source}")
-
-    return filename
-
-
-def split_fasta(
-    fasta_file: str,
-    output_dir: str = os.getcwd(),
-    max_records_per_partition=2000000,
-    shuffle=True,
-    save_fasta: bool = True,
-    save_csv: bool = False,
-) -> list:
-    """Split a .fasta or .fasta.gz file into multiple files."""
-
-    # if save_fasta and not os.path.exists(os.path.join(output_dir, "fasta")):
-    #     os.makedirs(os.path.join(output_dir, "fasta"))
-
-    # if save_csv and not os.path.exists(os.path.join(output_dir, "csv")):
-    #     os.makedirs(os.path.join(output_dir, "csv"))
-
-    print(f"Splitting {fasta_file}")
-    fasta_list = []
-    fasta_idx = 0
-
-    for i, seq in tqdm.tqdm(
-        enumerate(
-            pyfastx.Fasta(fasta_file, build_index=False, uppercase=True, full_name=True)
-        )
-    ):
-        fasta_list.append(seq)
-
-        if (i + 1) % max_records_per_partition == 0:
-            if shuffle:
-                random.shuffle(fasta_list)
-            fasta_idx = int(i / max_records_per_partition)
-            if save_fasta:
-                write_seq_record_to_fasta(fasta_list, output_dir, fasta_idx)
-            if save_csv:
-                write_seq_record_to_csv(fasta_list, output_dir, fasta_idx)
-            fasta_list = []
-        else:
-            if save_fasta:
-                write_seq_record_to_fasta(fasta_list, output_dir, fasta_idx + 1)
-            if save_csv:
-                write_seq_record_to_csv(fasta_list, output_dir, fasta_idx + 1)
-    return output_dir
-
-
-def write_seq_record_to_fasta(content_list, output_dir, index):
-    output_path = os.path.join(
-        output_dir,
-        f"x{str(index).rjust(3, '0')}.fasta",
-    )
-    logging.info(f"Writing {output_path}")
-
-    with open(output_path, "w") as f:
-        for record in content_list:
-            f.write(f">{record[0]}\n{record[1]}\n")
-    return output_path
-
-
-def write_seq_record_to_csv(content_list, output_dir, index):
-    output_path = os.path.join(output_dir, f"x{str(index).rjust(3, '0')}.csv")
-    logging.info(f"Writing {output_path}")
-    with open(output_path, "w") as f:
-        f.write(f"id,text\n")
-        for record in content_list:
-            f.write(f"{record[0].replace(',','')},{record[1].replace(',','')}\n")
-    return output_path
-
-
-if __name__ == "__main__":
-    args = parse_args()
-    main(args)
diff --git a/assets/containers/protein-utils/requirements.txt b/assets/containers/protein-utils/requirements.txt
deleted file mode 100644
index 2f03224..0000000
--- a/assets/containers/protein-utils/requirements.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-biopython
-biotite
-jsonlines
-numpy
-pandas
-pyfastx
-tqdm
\ No newline at end of file
diff --git a/assets/data/swissprot.txt b/assets/data/swissprot.txt
new file mode 100644
index 0000000..b53ca11
--- /dev/null
+++ b/assets/data/swissprot.txt
@@ -0,0 +1 @@
+https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz
diff --git a/assets/data/uniref100.txt b/assets/data/uniref100.txt
new file mode 100644
index 0000000..5ca09c0
--- /dev/null
+++ b/assets/data/uniref100.txt
@@ -0,0 +1 @@
+https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref100/uniref100.fasta.gz
diff --git a/assets/data/uniref50.txt b/assets/data/uniref50.txt
new file mode 100644
index 0000000..a747a35
--- /dev/null
+++ b/assets/data/uniref50.txt
@@ -0,0 +1 @@
+https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref50/uniref50.fasta.gz
diff --git a/assets/workflows/abodybuilder3/main.nf b/assets/workflows/abodybuilder3/main.nf
index 488745e..dffa3c2 100644
--- a/assets/workflows/abodybuilder3/main.nf
+++ b/assets/workflows/abodybuilder3/main.nf
@@ -37,9 +37,8 @@ process ABodyBuilder3Task {
     """
     set -euxo pipefail
     mkdir output
-    tar -xzvf $model_parameters
     /opt/conda/bin/python /home/scripts/abb3_inference.py $fasta_path \
-        --model_path plddt-loss/best_second_stage.ckpt
+        --model_path $model_parameters
     """
 }
 
diff --git a/assets/workflows/abodybuilder3/nextflow.config b/assets/workflows/abodybuilder3/nextflow.config
index ce78f68..2621110 100644
--- a/assets/workflows/abodybuilder3/nextflow.config
+++ b/assets/workflows/abodybuilder3/nextflow.config
@@ -1,5 +1,5 @@
 params {
-    model_parameters = 's3://{{S3_BUCKET_NAME}}/ref-data/abodybuilder3_parameters/output.tar.gz'
+    model_parameters = 's3://{{S3_BUCKET_NAME}}/ref-data/abodybuilder3_parameters/plddt-loss/best_second_stage.ckpt'
 }
 
 process {
diff --git a/assets/workflows/alphafold2-multimer/README.md b/assets/workflows/alphafold2-multimer/README.md
index fce0b4f..c42d638 100644
--- a/assets/workflows/alphafold2-multimer/README.md
+++ b/assets/workflows/alphafold2-multimer/README.md
@@ -1,56 +1,38 @@
 # AlphaFold Multimer
 
-This repository helps you set up and run AlphaFold Multimer on AWS HealthOmics. At the end of the configuration, you should be able to run a full end-to-end inference.
+This repository helps you set up and run AlphaFold Multimer on AWS HealthOmics.
 
-AlphaFold-Multimer requires several steps: at a high level they bundle into:
-
-1. Download and prepare the data
-2. Multisequence alignment (MSA)
-3. Inference
-
-Traditionally, the download and prepare data stage will download `tar.gz` files and unpack. This workflow has a series of optimizations that are designed to improve data staging times and reduce the time and cost of inference while improving scale (>2500 residues). All corresponding reference data is hosted by AWS HealthOmics, so there is no charge to customers to host that data.
+The following setup steps below assume you are starting from scratch and prefer to use the command line. This repository will also have 1-click build capabilities at the root of the repo.
 
 ## Running a workflow
 
-Pick your favorite small fasta file to run your fist end-to-end test. The following command can be done from the terminal or you can navigate to the AWS console.
-
-### Inputs
-
-`target_id`: The ID of the target you wish to predict
-`fasta_path`: S3 URI to a single FASTA file that is in multi-FASTA format. Currently supports 1-chain per record.
+Pick your favorite small fasta file to run your fist end-to-end test. The following command can be done from the terminal or you can navigate to the AWS console. Note that AlphaFold likely will work best using `STATIC` run storage due to low data volumes and faster startup times.
 
 ### Example params.json
 
-```
-
+```json
 {
-    "fasta_path":"s3://mybucket/input/multimer/7unl.fasta",
-    "target_id": "7unl"
+  "fasta_path":"s3://mybucket/alhpafold-multimer/"
 }
 ```
-
 ### Running the Workflow
 
 Replace `$ROLEARN`, `$OUTPUTLOC`, `$PARAMS`, `$WFID` as appropriate. Also modify the `params.json` to point to where your FASTA resides.
 
-```
-
+```bash
 WFID=1234567
 ROLEARN=arn:aws:iam::0123456789012:role/omics-workflow-role-0123456789012-us-east-1
-OUTPUTLOC=s3://mybuckets/run_outputs/alphafold
+OUTPUTLOC=s3://mybuckets/run_outputs/alphafold2-multimer
 PARAMS=./params.json
 
 aws omics start-run --workflow-id $WFID --role-arn $ROLEARN --output-uri $OUTPUTLOC --storage-type STATIC --storage-capacity 4800 --parameters file://$PARAMS --name alphafold-multimer
 ```
-
 All results are written to a location defined within `$OUTPUTLOC` above. To get to the root directory of the ouputs, you can use the `GetRun` API, which provides the path as `runOutputUri`. Alternatively, this location is available in the console.
 
 ## Citation
-
 AlphaFold Multimer was developed by DeepMind. The original source code can be found [here](https://github.com/google-deepmind/alphafold). The algorithm is presented in the following papers.
 
 ```
-
 @Article{AlphaFold2021,
   author  = {Jumper, John and Evans, Richard and Pritzel, Alexander and Green, Tim and Figurnov, Michael and Ronneberger, Olaf and Tunyasuvunakool, Kathryn and Bates, Russ and {\v{Z}}{\'\i}dek, Augustin and Potapenko, Anna and Bridgland, Alex and Meyer, Clemens and Kohl, Simon A A and Ballard, Andrew J and Cowie, Andrew and Romera-Paredes, Bernardino and Nikolov, Stanislav and Jain, Rishub and Adler, Jonas and Back, Trevor and Petersen, Stig and Reiman, David and Clancy, Ellen and Zielinski, Michal and Steinegger, Martin and Pacholska, Michalina and Berghammer, Tamas and Bodenstein, Sebastian and Silver, David and Vinyals, Oriol and Senior, Andrew W and Kavukcuoglu, Koray and Kohli, Pushmeet and Hassabis, Demis},
   journal = {Nature},
@@ -64,7 +46,6 @@ AlphaFold Multimer was developed by DeepMind. The original source code can be fo
 ```
 
 ```
-
 @article {AlphaFold-Multimer2021,
   author       = {Evans, Richard and O{\textquoteright}Neill, Michael and Pritzel, Alexander and Antropova, Natasha and Senior, Andrew and Green, Tim and {\v{Z}}{\'\i}dek, Augustin and Bates, Russ and Blackwell, Sam and Yim, Jason and Ronneberger, Olaf and Bodenstein, Sebastian and Zielinski, Michal and Bridgland, Alex and Potapenko, Anna and Cowie, Andrew and Tunyasuvunakool, Kathryn and Jain, Rishub and Clancy, Ellen and Kohli, Pushmeet and Jumper, John and Hassabis, Demis},
   journal      = {bioRxiv},
diff --git a/assets/workflows/alphafold2-multimer/build_containers.sh b/assets/workflows/alphafold2-multimer/build_containers.sh
new file mode 100755
index 0000000..4658921
--- /dev/null
+++ b/assets/workflows/alphafold2-multimer/build_containers.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+set -ex
+
+REGION=$1
+ACCOUNT=$2
+TAG=${3:-latest}
+
+aws ecr get-login-password --region $REGION | docker login --username AWS --password-stdin $ACCOUNT.dkr.ecr.$REGION.amazonaws.com
+
+# build protein-utils
+cd protein-utils 
+docker build --platform linux/amd64 -t $ACCOUNT.dkr.ecr.$REGION.amazonaws.com/protein-utils:$TAG .
+docker push $ACCOUNT.dkr.ecr.$REGION.amazonaws.com/protein-utils:$TAG
+cd ..
+
+# build alphafold-data
+cd alphafold-data
+docker build --platform linux/amd64 -t $ACCOUNT.dkr.ecr.$REGION.amazonaws.com/alphafold-data:$TAG .
+docker push $ACCOUNT.dkr.ecr.$REGION.amazonaws.com/alphafold-data:$TAG
+cd ..
+
+# build alphafold-predict
+cd alphafold-predict
+docker build --platform linux/amd64 -t $ACCOUNT.dkr.ecr.$REGION.amazonaws.com/alphafold-predict:$TAG .
+docker push $ACCOUNT.dkr.ecr.$REGION.amazonaws.com/alphafold-predict:$TAG
+cd ..
diff --git a/assets/workflows/alphafold2-multimer/config.yaml b/assets/workflows/alphafold2-multimer/config.yaml
index 1db3c7d..3e38011 100644
--- a/assets/workflows/alphafold2-multimer/config.yaml
+++ b/assets/workflows/alphafold2-multimer/config.yaml
@@ -3,9 +3,6 @@ description: "Predict multi-chain protein structures with AlphaFold2-Multimer"
 engine: NEXTFLOW
 main: main.nf
 parameterTemplate:
-  target_id:
-    description: "The ID of the target being run."
-    optional: false
   fasta_path: 
     description: "Input file in multi-FASTA format."
     optional: false
diff --git a/assets/workflows/alphafold2-multimer/main.nf b/assets/workflows/alphafold2-multimer/main.nf
index 49da065..4e96410 100644
--- a/assets/workflows/alphafold2-multimer/main.nf
+++ b/assets/workflows/alphafold2-multimer/main.nf
@@ -1,34 +1,61 @@
-/* groovylint-disable DuplicateNumberLiteral */
 nextflow.enable.dsl = 2
 
-params.fasta_path = ''
+params.fasta_path = "" 
 
 // static data files are in nextflow.config
 
 include {
-    SearchUniref90
-    SearchMgnify
-    SearchBFD
-    SearchTemplatesTask
-    SearchUniprot
-    CombineSearchResults
-} from './searches'
+    SearchUniref90;
+    SearchMgnify;
+    SearchBFD;
+    SearchTemplatesTask;
+    SearchUniprot;
+    CombineSearchResults;
+} from './searches.nf'
 
 include {
-    UnpackBFD
-    UnpackPdb70nSeqres
-    UnpackMMCIF
-} from './unpack'
+    UnpackBFD;
+    UnpackPdb70nSeqres;
+    UnpackMMCIF;
+} from './unpack.nf'
 
-workflow AlphaFold2Multimer {
-    CheckAndValidateInputsTask(params.target_id, params.fasta_path)
 
-    // split fasta run parallel searches (Scatter)
-    split_seqs = CheckAndValidateInputsTask.out.fasta
-                 .splitFasta(file: true)
-                 .map { filename -> tuple(filename.toString().split('/')[-1].split('.fasta')[0], filename) }
+workflow {
+
+    // Convert to one or many files
+    if (params.fasta_path[-1] == "/") {
+        fasta_path = params.fasta_path + "*"
+    } else {
+        fasta_path = params.fasta_path
+    }
+    
+    // [5nl6, 5nl6.fasta]
+    // [5mlq, 5mlq.fasta]
+    fasta_files = Channel
+                  .fromPath(fasta_path)
+                  .map { filename -> tuple ( filename.toString().split("/")[-1].split(".fa")[0], filename) }
+
+    // 5nl6.fasta
+    // 5mlq.fasta
+    CheckAndValidateInputsTask(fasta_files)
+
+    // Explode/scatter the fasta files into channel items per contained record ID
+    // Write the exploded fasta records to their own file, include in tuple that contains original fasta file basename
+    // [5nl6, 5nl6.1, 5nl6.1.fasta]
+    // [5nl6, 5nl6.2, 5nl6.2.fasta]
+    // [5mlq, 5mlq.1, 5mlq.1.fasta]
+    // [5mlq, 5mlq.2, 5mlq.2.fasta]
+    split_seqs = CheckAndValidateInputsTask.out.fasta.map { fastaFile ->
+        def fastaBaseName = fastaFile.baseName
+        def records = fastaFile.splitFasta( file: true )
+
+        def fastaRecordTupleList = []
+        records.forEach { record -> 
+            fastaRecordTupleList.add(tuple (fastaBaseName, record.getBaseName(), record))
+        }
+        return fastaRecordTupleList
+    } | flatMap
 
-    uniref30 = Channel.fromPath(params.uniref30_database_src).first()
     alphafold_model_parameters = Channel.fromPath(params.alphafold_model_parameters).first()
 
     // Unpack the databases
@@ -39,88 +66,107 @@ workflow AlphaFold2Multimer {
               params.bfd_database_hhm_ffdata,
               params.bfd_database_hhm_ffindex)
     UnpackPdb70nSeqres(params.pdb70_src, params.pdb_seqres_src, params.db_pathname)
-    UnpackMMCIF(params.pdb_mmcif_src1,
-                params.pdb_mmcif_src2,
-                params.pdb_mmcif_src3,
-                params.pdb_mmcif_src4,
-                params.pdb_mmcif_src5,
-                params.pdb_mmcif_src6,
-                params.pdb_mmcif_src7,
-                params.pdb_mmcif_src8,
-                params.pdb_mmcif_src9,
+    UnpackMMCIF(params.pdb_mmcif_src1, 
+                params.pdb_mmcif_src2, 
+                params.pdb_mmcif_src3, 
+                params.pdb_mmcif_src4, 
+                params.pdb_mmcif_src5, 
+                params.pdb_mmcif_src6, 
+                params.pdb_mmcif_src7, 
+                params.pdb_mmcif_src8, 
+                params.pdb_mmcif_src9, 
                 params.pdb_mmcif_obsolete)
 
+    // Searches are call for each fastas * records
     SearchUniref90(split_seqs, params.uniref90_database_src)
     SearchMgnify(split_seqs, params.mgnify_database_src)
     SearchUniprot(split_seqs, params.uniprot_database_src)
     SearchBFD(split_seqs, UnpackBFD.out.db_folder, params.uniref30_database_src)
-    SearchTemplatesTask(SearchUniref90.out.msa_with_id, UnpackPdb70nSeqres.out.db_folder)
-
-    // Gather
-    CombineSearchResults(SearchUniref90.out.msa.collect(),
-                         SearchUniprot.out.msa.collect(),
-                         SearchMgnify.out.msa.collect(),
-                         SearchBFD.out.msa.collect(),
-                         SearchTemplatesTask.out.msa.collect())
-
-    GenerateFeaturesTask(CheckAndValidateInputsTask.out.fasta,
-                         CombineSearchResults.out.msa_path,
-                         UnpackMMCIF.out.db_folder,
-                         UnpackMMCIF.out.db_obsolete)
 
+    SearchTemplatesTask(SearchUniref90.out.fasta_basename_with_record_id_and_msa, UnpackPdb70nSeqres.out.db_folder)
+
+    // [5nl6, 5nl6.fasta, [output_5nl6.1/5nl6.1_uniref90_hits.sto, output_5nl6.2/5nl6.2_uniref90_hits.sto], [output_5nl6.2/5nl6.2_mgnify_hits.sto, output_5nl6.1/5nl6.1_mgnify_hits.sto], ...]
+    // [5mlq, 5mlq.fasta, [output_5mlq.1/5mlq.1_uniref90_hits.sto, output_5mlq.2/5mlq.2_uniref90_hits.sto], [output_5mlq.1/5mlq.1_mgnify_hits.sto, output_5mlq.2/5mlq.2_mgnify_hits.sto], ...]
+    // 
+    // Combine/gather the search results into channels per original fasta file
+    msa_tuples = fasta_files
+                .join(SearchUniref90.out.fasta_basename_with_msa.groupTuple())
+                .join(SearchMgnify.out.fasta_basename_with_msa.groupTuple())
+                .join(SearchUniprot.out.fasta_basename_with_msa.groupTuple())
+                .join(SearchBFD.out.fasta_basename_with_msa.groupTuple())
+                .join(SearchTemplatesTask.out.fasta_basename_with_msa.groupTuple())
+
+    // Per original fasta file, move all of the search result files (ArrayList of files) into single directory structure: msa/A, msa/B, ... 
+    // Emit the first two elements of msa_tuples, and a single merged msa/ directory
+    CombineSearchResults(msa_tuples)
+
+    // Called per original fasta input file
+    GenerateFeaturesTask(CombineSearchResults.out.fasta_basename_fasta_and_msa_path,
+                        UnpackMMCIF.out.db_folder,
+                        UnpackMMCIF.out.db_obsolete)
+    
     // Predict. Five separate models
-    model_nums = Channel.of(0, 1, 2, 3, 4)
-    AlphaFoldMultimerInference(params.target_id,
-                               GenerateFeaturesTask.out.features,
-                               params.alphafold_model_parameters,
-                               model_nums, params.random_seed,
-                               params.run_relax)
-
-    MergeRankings(AlphaFoldMultimerInference.out.results.collect())
+    model_nums = Channel.of(0,1,2,3,4)
+    features = GenerateFeaturesTask.out.fasta_basename_with_features.combine(model_nums)
+    AlphaFoldMultimerInference(features, alphafold_model_parameters, params.random_seed, params.run_relax)
+
+    MergeRankings(AlphaFoldMultimerInference.out.results.groupTuple(by: 0))
 }
 
 // Check the inputs and get size etc
 process CheckAndValidateInputsTask {
+    tag "${fasta_basename}"
     label 'protutils'
     cpus 2
     memory '4 GB'
-    publishDir '/mnt/workflow/pubdir/inputs'
+    publishDir "/mnt/workflow/pubdir/${fasta_basename}/inputs"
 
     input:
-        val target_id
-        path fasta_path
+        tuple val(fasta_basename), path(fasta_path)
 
     output:
         stdout
-        path 'seq_info.json', emit: seq_info
-        path 'inputs.fasta', emit: fasta
+        path "seq_info.json", emit: seq_info
+        path "${fasta_basename}.fasta", emit: fasta
+        val "${fasta_basename}", emit: fasta_basename
 
     script:
     """
     set -euxo pipefail
+
+    echo ">>>>>>>>>>>>>>>>>>>"
+    echo $fasta_basename
+    echo $fasta_path
+    echo "<<<<<<<<<<<<<<<<<<<"
+
     ls -alR
+
     /opt/venv/bin/python \
-    /home/putils/src/putils/check_and_validate_inputs.py \
-    --target_id=$target_id --fasta_path=$fasta_path
+    /opt/venv/lib/python3.8/site-packages/putils/check_and_validate_inputs.py \
+    --target_id=$fasta_basename --fasta_path=$fasta_path --output_prefix=$fasta_basename
+
+    ls -alR
+
     """
 }
 
 // Generate features from the searches
 process GenerateFeaturesTask {
+    tag "${fasta_basename}"
     label 'data'
     cpus 4
     memory '16 GB'
-    publishDir '/mnt/workflow/pubdir/features'
+    publishDir "/mnt/workflow/pubdir/${fasta_basename}/features"
 
     input:
-        path fasta_paths
-        path msa_dir
+        tuple val(fasta_basename), path(fasta_path), path(msa_dir)
         path pdb_mmcif_folder
         path mmcif_obsolete_path
 
     output:
-        path 'output/features.pkl', emit: features
-        path 'output/generate_features_metrics.json', emit: metrics
+        tuple val(fasta_basename), path("output/features.pkl"), emit: fasta_basename_with_features
+        path "output/features.pkl", emit: features
+        path "output/generate_features_metrics.json", emit: metrics
 
     script:
     """
@@ -133,14 +179,18 @@ process GenerateFeaturesTask {
     echo "***********************"
 
     /opt/venv/bin/python /opt/generate_features.py \
-      --fasta_paths=$fasta_paths \
+      --fasta_paths=$fasta_path \
       --msa_dir=$msa_dir \
       --template_mmcif_dir="$pdb_mmcif_folder" \
       --obsolete_pdbs_path="$mmcif_obsolete_path" \
       --template_hits="$msa_dir/pdb_hits.sto" \
       --model_preset=multimer \
       --output_dir=output \
-      --max_template_date=2023-01-01
+      --max_template_date=2023-01-01  
+
+    echo "***********************"
+    ls -alR output/
+    echo "***********************"
 
     mv output/metrics.json output/generate_features_metrics.json
     """
@@ -148,24 +198,23 @@ process GenerateFeaturesTask {
 
 // AlphaFold Multimer
 process AlphaFoldMultimerInference {
+    tag "${fasta_basename}_${modelnum}"
     errorStrategy 'retry'
     label 'predict'
     cpus { 4 * Math.pow(2, task.attempt) }
     memory { 16.GB * Math.pow(2, task.attempt) }
     accelerator 1, type: 'nvidia-tesla-a10g'
     maxRetries 2
-    publishDir '/mnt/workflow/pubdir'
+    publishDir "/mnt/workflow/pubdir/${fasta_basename}/prediction_${modelnum}"
     input:
-        val target_id
-        path features
+        tuple val(fasta_basename), path (features), val(modelnum)
         path alphafold_model_parameters
-        val modelnum
         val random_seed
         val run_relax
 
     output:
-        path "output_model_${modelnum}/", emit: results
-
+        tuple val(fasta_basename), path("output_model_${modelnum}/"), emit: results
+    
     script:
     """
     set -euxo pipefail
@@ -174,7 +223,7 @@ process AlphaFoldMultimerInference {
     export XLA_PYTHON_CLIENT_MEM_FRACTION=4.0
     export TF_FORCE_UNIFIED_MEMORY=1
     /opt/conda/bin/python /app/alphafold/predict.py \
-      --target_id=$target_id --features_path=$features --model_preset=multimer \
+      --target_id=$fasta_basename --features_path=$features --model_preset=multimer \
       --model_dir=model --random_seed=$random_seed --output_dir=output_model_${modelnum} \
       --run_relax=${run_relax} --use_gpu_relax=${run_relax} --model_num=$modelnum
 
@@ -182,20 +231,22 @@ process AlphaFoldMultimerInference {
     """
 }
 
-//Merge Rankings
+
+// Merge Rankings
 process MergeRankings {
+    tag "${id}"
     cpus 2
     memory 4.GB
-    publishDir '/mnt/workflow/pubdir'
+    publishDir "/mnt/workflow/pubdir/${id}"
     label 'data'
 
     input:
-    path results
+    tuple val(id), path(results)
 
     output:
-    path 'rankings.json', emit: rankings
-    path 'top_hit*', emit: top_hit
-
+    path "rankings.json", emit: rankings
+    path "top_hit*", emit: top_hit
+    
     script:
     """
     mkdir -p output
@@ -206,7 +257,3 @@ process MergeRankings {
     mv output/rankings.json .
     """
 }
-
-workflow {
-    AlphaFold2Multimer()
-}
diff --git a/assets/workflows/alphafold2-multimer/nextflow.config b/assets/workflows/alphafold2-multimer/nextflow.config
index 16e3e32..c10b538 100644
--- a/assets/workflows/alphafold2-multimer/nextflow.config
+++ b/assets/workflows/alphafold2-multimer/nextflow.config
@@ -36,7 +36,7 @@ params {
 }
 
 process {
-    withLabel: protutils { container = "{{protein-utils:latest}}"}
-    withLabel: data { container = "{{alphafold-data:latest}}"}
-    withLabel: predict { container = "{{alphafold-predict:latest}}"}
+   withLabel: protutils { container = '{{protein-utils:latest}}'}
+   withLabel: data { container = '{{alphafold-data:latest}}'}
+   withLabel: predict { container = '{{alphafold-predict:latest}}'}
 }
\ No newline at end of file
diff --git a/assets/workflows/alphafold2-multimer/parameter-template.json b/assets/workflows/alphafold2-multimer/parameter-template.json
new file mode 100644
index 0000000..a239744
--- /dev/null
+++ b/assets/workflows/alphafold2-multimer/parameter-template.json
@@ -0,0 +1,6 @@
+{  
+  "fasta_path": {
+      "description": "Input file in multi-FASTA format.",
+      "optional": false
+    }
+}
\ No newline at end of file
diff --git a/assets/workflows/alphafold2-multimer/params.json b/assets/workflows/alphafold2-multimer/params.json
new file mode 100644
index 0000000..46ffce8
--- /dev/null
+++ b/assets/workflows/alphafold2-multimer/params.json
@@ -0,0 +1,3 @@
+{
+    "fasta_path":"s3://example-bucket/alphafold2-multimer/"
+}
\ No newline at end of file
diff --git a/assets/workflows/alphafold2-multimer/searches.nf b/assets/workflows/alphafold2-multimer/searches.nf
index dedc650..e430a37 100644
--- a/assets/workflows/alphafold2-multimer/searches.nf
+++ b/assets/workflows/alphafold2-multimer/searches.nf
@@ -1,201 +1,219 @@
 nextflow.enable.dsl = 2
 
 process SearchUniref90 {
+    tag "${record_id}"
     label 'data'
-    cpus 8
-    memory '32 GB'
-    publishDir '/mnt/workflow/pubdir/msa'
+    cpus { 8 * Math.pow(2, task.attempt) }
+    memory { 32.GB * Math.pow(2, task.attempt) }
+    maxRetries 3
+    publishDir "/mnt/workflow/pubdir/${fasta_basename}/msa"
 
     input:
-        tuple val(id), path(fasta_path)
+        tuple val(fasta_basename), val(record_id), path(fasta_record_path)
         path database_path
 
     output:
-        tuple val(id), path("output/${id}_uniref90_hits.sto"), emit: msa_with_id
-        path "output/${id}_uniref90_hits.sto", emit: msa
-        path "output/${id}_uniref90_metrics.json", emit: metrics
+        tuple val(fasta_basename), val(record_id), path("output_${record_id}/${record_id}_uniref90_hits.sto"), emit: fasta_basename_with_record_id_and_msa
+        tuple val(fasta_basename), path("output_${record_id}/${record_id}_uniref90_hits.sto"), emit: fasta_basename_with_msa
+        path "output_${record_id}/${record_id}_uniref90_hits.sto", emit: msa
+        path "output_${record_id}/${record_id}_uniref90_metrics.json", emit: metrics
 
     script:
     """
     set -euxo pipefail
+    cat $fasta_record_path
 
-    mkdir -p output
+    mkdir -p output_${record_id}
 
     /opt/venv/bin/python /opt/create_msa_monomer.py \
-      --fasta_path=$fasta_path \
+      --fasta_path=$fasta_record_path \
       --database_type=uniref90 \
       --database_path=$database_path \
-      --output_dir=output \
+      --output_dir=output_${record_id} \
       --cpu=$task.cpus
 
-    mv output/uniref90_hits.sto output/${id}_uniref90_hits.sto
-    mv output/metrics.json output/${id}_uniref90_metrics.json
+    mv output_${record_id}/uniref90_hits.sto output_${record_id}/${record_id}_uniref90_hits.sto
+    mv output_${record_id}/metrics.json output_${record_id}/${record_id}_uniref90_metrics.json
     """
 }
 
-process SearchUniprot {
+process SearchMgnify {
+    tag "${record_id}"
     label 'data'
-    cpus 8
-    memory '32 GB'
-    publishDir '/mnt/workflow/pubdir/msa'
+    cpus { 8 * Math.pow(2, task.attempt) }
+    memory { 64.GB * Math.pow(2, task.attempt) }
+    maxRetries 3
+    publishDir "/mnt/workflow/pubdir/${fasta_basename}/msa"
 
     input:
-        tuple val(id), path(fasta_path)
+        tuple val(fasta_basename), val(record_id), path(fasta_record_path)
         path database_path
 
     output:
-        path "output/${id}_uniprot_hits.sto", emit: msa
-        path "output/${id}_uniprot_metrics.json", emit: metrics
-        val "$id", emit: id
+        tuple val(fasta_basename), path("output_${record_id}/${record_id}_mgnify_hits.sto"), emit: fasta_basename_with_msa
+        path "output_${record_id}/${record_id}_mgnify_hits.sto", emit: msa
+        path "output_${record_id}/${record_id}_mgnify_metrics.json", emit: metrics
 
     script:
     """
     set -euxo pipefail
-
-    mkdir -p output
+    cat $fasta_record_path
+    
+    mkdir -p output_${record_id}
 
     /opt/venv/bin/python /opt/create_msa_monomer.py \
-      --fasta_path=$fasta_path \
-      --database_type=uniprot \
+      --fasta_path=$fasta_record_path \
+      --database_type=mgnify \
       --database_path=$database_path \
-      --output_dir=output \
+      --output_dir=output_${record_id} \
       --cpu=$task.cpus
 
-    mv output/uniprot_hits.sto output/${id}_uniprot_hits.sto
-    mv output/metrics.json output/${id}_uniprot_metrics.json
+    mv output_${record_id}/mgnify_hits.sto output_${record_id}/${record_id}_mgnify_hits.sto
+    mv output_${record_id}/metrics.json output_${record_id}/${record_id}_mgnify_metrics.json
     """
 }
 
-process SearchMgnify {
+process SearchUniprot {
+    tag "${record_id}"
     label 'data'
     cpus 8
-    memory '64 GB'
-    publishDir '/mnt/workflow/pubdir/msa'
+    memory '32 GB'
+    publishDir "/mnt/workflow/pubdir/${fasta_basename}/msa"
 
     input:
-        tuple val(id), path(fasta_path)
+        tuple val(fasta_basename), val(record_id), path(fasta_record_path)
         path database_path
 
     output:
-        path "output/${id}_mgnify_hits.sto", emit: msa
-        path "output/${id}_mgnify_metrics.json", emit: metrics
+        tuple val(fasta_basename), path("output_${record_id}/${record_id}_uniprot_hits.sto"), emit: fasta_basename_with_msa
+        path "output_${record_id}/${record_id}_uniprot_hits.sto", emit: msa
+        path "output_${record_id}/${record_id}_uniprot_metrics.json", emit: metrics
 
     script:
     """
     set -euxo pipefail
+    cat $fasta_record_path
 
-    mkdir -p output
+    mkdir -p output_${record_id}
 
     /opt/venv/bin/python /opt/create_msa_monomer.py \
-      --fasta_path=$fasta_path \
-      --database_type=mgnify \
+      --fasta_path=$fasta_record_path \
+      --database_type=uniprot \
       --database_path=$database_path \
-      --output_dir=output \
+      --output_dir=output_${record_id} \
       --cpu=$task.cpus
 
-    mv output/mgnify_hits.sto output/${id}_mgnify_hits.sto
-    mv output/metrics.json output/${id}_mgnify_metrics.json
+    mv output_${record_id}/uniprot_hits.sto output_${record_id}/${record_id}_uniprot_hits.sto
+    mv output_${record_id}/metrics.json output_${record_id}/${record_id}_uniprot_metrics.json
     """
 }
 
 process SearchBFD {
+    tag "${record_id}"
     label 'data'
+
     cpus { 8 * Math.pow(2, task.attempt) }
     memory { 64.GB * Math.pow(2, task.attempt) }
-    maxRetries 1
+    maxRetries 3
     errorStrategy 'retry'
-    publishDir '/mnt/workflow/pubdir/msa'
+
+    publishDir "/mnt/workflow/pubdir/${fasta_basename}/msa"
 
     input:
-        tuple val(id), path(fasta_path)
+        tuple val(fasta_basename), val(record_id), path(fasta_record_path)
         path bfd_database_folder
         path uniref30_database_folder
 
     output:
-        path "output/${id}_bfd_uniref_hits.a3m", emit: msa
-        path "output/${id}_metrics.json", emit: metrics
+        tuple val(fasta_basename), path("output_${record_id}/${record_id}_bfd_hits.a3m"), emit: fasta_basename_with_msa
+        path "output_${record_id}/${record_id}_bfd_hits.a3m", emit: msa
+        path "output_${record_id}/${record_id}_bfd_metrics.json", emit: metrics
 
     script:
     """
     set -euxo pipefail
-
-    mkdir -p output
+    cat $fasta_record_path
+    mkdir -p output_${record_id}
 
     /opt/venv/bin/python /opt/create_msa_monomer.py \
-      --fasta_path=$fasta_path \
+      --fasta_path=$fasta_record_path \
       --database_type=bfd \
       --database_path=$bfd_database_folder \
       --database_path_2=$uniref30_database_folder \
-      --output_dir=output \
+      --output_dir=output_${record_id} \
       --cpu=$task.cpus
 
-    mv output/bfd_hits.a3m output/${id}_bfd_uniref_hits.a3m
-    mv output/metrics.json output/${id}_metrics.json
+    mv output_${record_id}/bfd_hits.a3m output_${record_id}/${record_id}_bfd_hits.a3m
+    mv output_${record_id}/metrics.json output_${record_id}/${record_id}_bfd_metrics.json
     """
 }
 
 process SearchTemplatesTask {
+    tag "${record_id}"
     label 'data'
     cpus 2
     memory '8 GB'
-    publishDir '/mnt/workflow/pubdir/msa'
+    publishDir "/mnt/workflow/pubdir/${fasta_basename}/msa"
 
     input:
-        tuple val(id), path(msa_path)
+        tuple val(fasta_basename), val(record_id), path(msa_path)
         path pdb_db_folder
 
     output:
-        path "output/${id}_pdb_hits.sto", emit: msa
-        path "output/${id}_metrics.json", emit: metrics
+        tuple val(fasta_basename), path("output_${record_id}/${record_id}_pdb_hits.sto"), emit: fasta_basename_with_msa
+        path "output_${record_id}/${record_id}_pdb_metrics.json", emit: metrics
 
     script:
     """
     set -euxo pipefail
 
-    mkdir -p output
+    mkdir -p output_${record_id}
 
     /opt/venv/bin/python /opt/search_templates.py \
           --msa_path=$msa_path \
-          --output_dir=output \
+          --output_dir=output_${record_id} \
           --database_path=$pdb_db_folder \
           --model_preset=multimer \
           --cpu=$task.cpus
 
-    mv output/pdb_hits.sto output/${id}_pdb_hits.sto
-    mv output/metrics.json output/${id}_metrics.json
+    mv output_${record_id}/pdb_hits.sto output_${record_id}/${record_id}_pdb_hits.sto
+    mv output_${record_id}/metrics.json output_${record_id}/${record_id}_pdb_metrics.json
     """
 }
 
 // Combine/rename results from parallel searches as AlphaFold expects
 process CombineSearchResults {
+    tag "${fasta_basename}"
     label 'data'
     cpus 4
     memory '8 GB'
+    publishDir "/mnt/workflow/pubdir/${fasta_basename}/msa"
 
     input:
-        path uniref90_msas
-        path uniprot_msas
-        path mgnify_msas
-        path bfd_msas
-        path template_hits
-    output:
-        path 'msa/', emit: msa_path
+        tuple val(fasta_basename), path(fasta_path), path(uniref90_msas), path(mgnify_msas), path(uniprot_msas), path(bfd_msas), path(template_hits)  
+
+    output: 
+        tuple val(fasta_basename), path(fasta_path), path ("msa/"), emit: fasta_basename_fasta_and_msa_path
+        path "msa/", emit: msa_path
 
     script:
     """
     echo ">>>>>>>>>>>>>>>>>>>"
+    echo $fasta_basename
+    echo $fasta_path
     echo $uniref90_msas
-    echo $uniprot_msas
     echo $mgnify_msas
+    echo $uniprot_msas
+    echo $bfd_msas
     echo $template_hits
     echo "<<<<<<<<<<<<<<<<<<<"
 
     mkdir -p msa
-    /opt/venv/bin/python /opt/update_locations.py msa $uniref90_msas
-    /opt/venv/bin/python /opt/update_locations.py msa $uniprot_msas
-    /opt/venv/bin/python /opt/update_locations.py msa $mgnify_msas
-    /opt/venv/bin/python /opt/update_locations.py msa $bfd_msas
-    /opt/venv/bin/python /opt/update_locations.py msa $template_hits
+    /opt/venv/bin/python /opt/update_locations.py msa _uniref90_hits.sto $uniref90_msas
+    /opt/venv/bin/python /opt/update_locations.py msa _mgnify_hits.sto $mgnify_msas
+    /opt/venv/bin/python /opt/update_locations.py msa _uniprot_hits.sto $uniprot_msas
+    /opt/venv/bin/python /opt/update_locations.py msa _bfd_hits.a3m $bfd_msas
+    /opt/venv/bin/python /opt/update_locations.py msa _pdb_hits.sto $template_hits
 
     echo "***********************"
     ls -alR msa/
diff --git a/assets/workflows/alphafold2-multimer/unpack.nf b/assets/workflows/alphafold2-multimer/unpack.nf
index b8211c0..146bc14 100644
--- a/assets/workflows/alphafold2-multimer/unpack.nf
+++ b/assets/workflows/alphafold2-multimer/unpack.nf
@@ -14,7 +14,7 @@ process UnpackBFD {
         path bfd_database_hhm_ffindex
 
     output:
-        path 'bfd/', emit: db_folder
+        path "bfd/", emit: db_folder
 
     script:
     """
@@ -31,6 +31,7 @@ process UnpackBFD {
     """
 }
 
+
 process UnpackUniprot {
     cpus 4
     memory '8 GB'
@@ -53,6 +54,7 @@ process UnpackUniprot {
     """
 }
 
+
 process UnpackPdb70nSeqres {
     label 'data'
     cpus 2
@@ -75,13 +77,14 @@ process UnpackPdb70nSeqres {
     # Templates - pdb70 and seqres
     mkdir -p $base_database_path/pdb
     mv $pdb70_src/* $base_database_path/pdb/
-
+    
     # filter strange sequences containing 0
     /opt/venv/bin/python /opt/filter_pdb.py $pdb_seqres_src $base_database_path/pdb/pdb_seqres.txt
     ls -laR $base_database_path/pdb/
     """
 }
 
+
 process UnpackMMCIF {
     cpus 2
     memory '4 GB'
@@ -98,10 +101,10 @@ process UnpackMMCIF {
         path pdb_mmcif_src8
         path pdb_mmcif_src9
         path pdb_mmcif_obsolete
-
+    
     output:
-        path 'pdb_mmcif/mmcif_files/', emit: db_folder
-        path 'pdb_mmcif/obsolete.dat', emit: db_obsolete
+        path "pdb_mmcif/mmcif_files/", emit: db_folder
+        path "pdb_mmcif/obsolete.dat", emit: db_obsolete
 
     script:
     """
@@ -124,22 +127,23 @@ process UnpackMMCIF {
     """
 }
 
+
 process UnpackRecords {
     tag "${id}"
     label 'protutils'
     cpus 2
     memory '4 GB'
     publishDir "/mnt/workflow/pubdir/${id}/input"
-
+    
     input:
         tuple val(id), val(header), val(seqString)
 
     output:
-        tuple val(id), path('input.fasta'), emit: fasta
+        tuple val(id), path("input.fasta"), emit: fasta
 
     script:
     """
     set -euxo pipefail
     echo -e ">${header}\n${seqString}" > input.fasta
     """
-}
+}
\ No newline at end of file
diff --git a/assets/workflows/mmseqs2/README.md b/assets/workflows/mmseqs2/README.md
new file mode 100644
index 0000000..973cee0
--- /dev/null
+++ b/assets/workflows/mmseqs2/README.md
@@ -0,0 +1,5 @@
+# Generate MSAs using MMSeqs2
+
+## Summary
+
+ MMseqs2: ultra fast and sensitive search and clustering suite.
diff --git a/assets/workflows/mmseqs2/config.yaml b/assets/workflows/mmseqs2/config.yaml
new file mode 100644
index 0000000..b5018a1
--- /dev/null
+++ b/assets/workflows/mmseqs2/config.yaml
@@ -0,0 +1,15 @@
+name: MMseqs2
+description: "MMseqs2: ultra fast and sensitive search and clustering suite"
+engine: NEXTFLOW
+main: main.nf
+parameterTemplate:
+  fasta_path:
+    description: "FASTA file containing query sequence."
+    optional: false
+  database_path:
+    description: "FASTA file containing target database."
+    optional: true
+storageCapacity: 1200
+tags:
+  Name: "MMseqs2"
+accelerators: GPU
diff --git a/assets/workflows/mmseqs2/main.nf b/assets/workflows/mmseqs2/main.nf
new file mode 100644
index 0000000..8ef13de
--- /dev/null
+++ b/assets/workflows/mmseqs2/main.nf
@@ -0,0 +1,85 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+workflow MMSeqs2 {
+    take:
+    fasta_path
+    database_path
+
+    main:
+
+    db_channel = Channel.fromPath(database_path)
+    db_channel.view()
+    MMSeqs2PrepareDatabaseTask(db_channel)
+
+    // Convert to one or many files
+    if (params.fasta_path[-1] == "/") {
+        fasta_path = params.fasta_path + "*"
+    } else {
+        fasta_path = params.fasta_path
+    }
+
+    fasta_channel = Channel.fromPath(fasta_path)
+    fasta_channel.view()
+    search_input = fasta_channel.combine(MMSeqs2PrepareDatabaseTask.out)
+    search_input.view()
+    MMSeqs2SearchTask(
+        search_input
+        )
+
+    emit:
+    MMSeqs2SearchTask.out
+}
+
+process MMSeqs2PrepareDatabaseTask {
+    label 'mmseqs2'
+    cpus 16
+    memory '32 GB'
+    maxRetries 1
+
+    input:
+    path database_path
+
+    output:
+    path "db", emit: db
+
+    script:
+    """
+    set -euxo pipefail
+    mkdir db
+    /usr/local/bin/entrypoint createdb $database_path tmpDB
+    /usr/local/bin/entrypoint makepaddedseqdb tmpDB db/gpuDB
+    /usr/local/bin/entrypoint createindex db/gpuDB tmp --index-subset 2
+    """
+}
+
+process MMSeqs2SearchTask {
+    label 'mmseqs2'
+    cpus 4
+    memory '16 GB'
+    maxRetries 1
+    accelerator 1, type: 'nvidia-tesla-a10g'
+    publishDir "/mnt/workflow/pubdir/${workflow.sessionId}/${task.process.replace(':', '/')}/${task.index}/${task.attempt}"
+
+    input:
+    tuple path(fasta_path), path(database_path)
+
+    output:
+    path "*.a3m", emit: msa
+
+    script:
+    """
+    set -euxo pipefail
+    /usr/local/bin/entrypoint createdb $fasta_path queryDB
+    /usr/local/bin/entrypoint search queryDB $database_path/gpuDB result tmp --gpu 1
+    /usr/local/bin/entrypoint result2msa queryDB $database_path/gpuDB result ${fasta_path.baseName}.a3m --msa-format-mode 5
+    """
+}
+
+workflow {
+    MMSeqs2(
+        params.fasta_path,
+        params.database_path
+    )
+}
diff --git a/assets/workflows/mmseqs2/nextflow.config b/assets/workflows/mmseqs2/nextflow.config
new file mode 100644
index 0000000..124a2e7
--- /dev/null
+++ b/assets/workflows/mmseqs2/nextflow.config
@@ -0,0 +1,12 @@
+params {
+    database_path = "s3://{{S3_BUCKET_NAME}}/ref-data/uniref100/uniref100.fasta"
+}
+
+process {
+    withLabel: mmseqs2 { container = "{{mmseqs2:latest}}" }
+}
+
+docker {
+    enabled = true
+    runOptions = "--gpus all"
+}
diff --git a/build/buildspec/buildspec_data.yaml b/build/buildspec/buildspec_data.yaml
index 154b5c5..b616b7f 100644
--- a/build/buildspec/buildspec_data.yaml
+++ b/build/buildspec/buildspec_data.yaml
@@ -89,7 +89,13 @@ phases:
           else
             echo "Downloading ${SOURCE_URI}"
             wget -nc $SOURCE_URI -P $HOME/s3/$KEY --no-verbose --show-progress --progress=dot:giga
-            if [[ "$SOURCE_URI" =~ .*\.gz$ ]]; then
+            if [[ "$SOURCE_URI" =~ .*\.tar.gz$ ]]; then
+              echo "Extracting file"
+              tar -xzf $HOME/s3/$KEY/*.tar.gz -C $HOME/s3/$KEY
+            elif [[ "$SOURCE_URI" =~ .*\.tar$ ]]; then
+              echo "Extracting file"
+              tar -xf $HOME/s3/$KEY/*.tar -C $HOME/s3/$KEY
+            elif [[ "$SOURCE_URI" =~ .*\.gz$ ]]; then
               echo "Extracting file"
               gunzip -k $HOME/s3/$KEY/*.gz
             fi;
diff --git a/scripts/testrun.sh b/scripts/testrun.sh
index 7e38630..26d0b77 100755
--- a/scripts/testrun.sh
+++ b/scripts/testrun.sh
@@ -51,12 +51,11 @@ else
 fi
 
 # Package the workflow
-mkdir -p tmp/assets/workflows/$WORKFLOW_NAME tmp/assets/modules
+mkdir -p tmp/assets/workflows/$WORKFLOW_NAME
 
 pushd tmp
 
 cp -r ../assets/workflows/$WORKFLOW_NAME/* assets/workflows/$WORKFLOW_NAME
-cp -r ../assets/modules/* assets/modules
 
 sed -i "" -E "s/[0-9]{12}\.dkr\.ecr\.(us-[a-z]*-[0-9])/$ACCOUNT_ID.dkr.ecr.$REGION/g" ./assets/workflows/$WORKFLOW_NAME/*.config assets/workflows/$WORKFLOW_NAME/*.wdl 2>/dev/null || true
 sed -i "" -E "s/[0-9]{12}\.dkr\.ecr\.(us-[a-z]*-[0-9])/$ACCOUNT_ID.dkr.ecr.$REGION/g" ./assets/workflows/$WORKFLOW_NAME/*.config assets/workflows/$WORKFLOW_NAME/*.nf 2>/dev/null || true
@@ -73,7 +72,8 @@ aws omics wait workflow-active --region $REGION --id $workflow_id
 # Run the workflow
 start_run_command="aws omics start-run \
     --retention-mode REMOVE \
-    --storage-type DYNAMIC \
+    --storage-type STATIC \
+    --storage-capacity 9600 \
     --workflow-id $workflow_id \
     --name $WORKFLOW_NAME-dev-$TIMESTAMP \
     --role-arn \"$OMICS_EXECUTION_ROLE\" \