Skip to content

Commit

Permalink
v2.10.0
Browse files Browse the repository at this point in the history
  • Loading branch information
brianloyal committed Dec 13, 2024
1 parent 620ce55 commit 1e51730
Show file tree
Hide file tree
Showing 9 changed files with 148 additions and 0 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

---

## [2.10.0] - 2024-12-13

### 2.10.0 Added

- Added TemStaPro prediction workflow.

---

## [2.9.0] - 2024-12-13

### 2.9.0 Added
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ A collection of AWS HealthOmics workflows to accelerate drug discovery.
- [Generate Protein Sequence Embeddings](https://github.com/aws-samples/drug-discovery-workflows/tree/main/assets/workflows/generate-protein-seq-embeddings): From Meta. Generate ESM-2 vector embeddings for one or more protein amino acid sequences.
- [NanobodyBuilder2](https://github.com/oxpig/ImmuneBuilder): From Oxford Protein Informatics Group. Predict the 3D structure of single-chain nanobodies.
- [RFDiffusion-ProteinMPNN](https://github.com/aws-samples/drug-discovery-workflows/tree/main/assets/workflows/rfdiffusion-proteinmpnn): From the Institute for Protein Design at the University of Washington. Generate protein backbone structures and sequences given a binding target or other structural context.
- [TemStaPro](https://github.com/ievapudz/TemStaPro): From Institute of Biotechnology, Life Sciences Center, Vilnius University. Predict protein thermostability using sequence representations from a protein language model.
- [ThermoMPNN](https://github.com/Kuhlman-Lab/ThermoMPNN): From the University of North Carolina School of Medicine. Predict changes in thermodynamic stability for protein point mutants.

### E2E Workflows
Expand Down
29 changes: 29 additions & 0 deletions assets/containers/temstapro/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
ARG AWS_DEFAULT_REGION=us-east-1

FROM 763104351884.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com/pytorch-inference:2.2.0-gpu-py310-cu118-ubuntu20.04-ec2
ARG COMMIT=db001d2b2479131bad2d0ee170b75001e9fad076

RUN apt-get update \
&& apt-get upgrade -y \
&& apt-get install nano -y \
&& apt-get autoremove -y \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# Download ThermoMPNN Code
RUN wget -q -P /tmp "https://github.com/ievapudz/TemStaPro/archive/${COMMIT}.zip" \
&& mkdir -p /home/TemStaPro \
&& unzip /tmp/${COMMIT}.zip -d /tmp \
&& mv /tmp/TemStaPro-${COMMIT}/* /home/TemStaPro \
&& rm -rf /tmp/TemStaPro-${COMMIT}

WORKDIR /home/TemStaPro

COPY requirements.txt /tmp/requirements.txt
RUN pip install --no-cache-dir -U -r /tmp/requirements.txt \
&& rm /tmp/requirements.txt

ENV TMPDIR="/tmp"
ENV PYTHONPATH="/home/TemStaPro:${PYTHONPATH}"

ENTRYPOINT []
2 changes: 2 additions & 0 deletions assets/containers/temstapro/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
transformers==4.24.0
sentencepiece==0.1.96
1 change: 1 addition & 0 deletions assets/data/temstapro
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Rostlab/prot_t5_xl_half_uniref50-enc
5 changes: 5 additions & 0 deletions assets/workflows/temstapro/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Predict protein thermostability using sequence representations from a protein language model

## Summary

Predict protein thermostability using sequence representations from the [TemStaPro](https://github.com/ievapudz/TemStaPro) protein language model. From Institute of Biotechnology, Life Sciences Center, Vilnius University.
18 changes: 18 additions & 0 deletions assets/workflows/temstapro/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
name: TemStaPro
description: "Predict protein thermostability using sequence representations from a protein language model. From Institute of Biotechnology, Life Sciences Center, Vilnius University."
engine: NEXTFLOW
main: main.nf
parameterTemplate:
fasta_path:
description: "Input file in FASTA format."
optional: false
window_size_predictions:
description: "set the window size for average smoothing of per residue predictions for plotting. Defaults to 81."
optional: true
portion_size:
description: "Maximum size of input sequence divisions. Set to 0 for no division. Defaults to 1000."
optional: true
storageCapacity: 1200
tags:
Name: "TemStaPro"
accelerators: GPU
69 changes: 69 additions & 0 deletions assets/workflows/temstapro/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

workflow TemStaPro {
take:
fasta_path
window_size_predictions
portion_size
prot_t5_params

main:
TemStaProTask(
fasta_path,
window_size_predictions,
portion_size,
prot_t5_params
)

TemStaProTask.out.set { results }

emit:
results
}

process TemStaProTask {
label 'temstapro'
cpus 4
memory '16 GB'
maxRetries 1
accelerator 1, type: 'nvidia-tesla-a10g'
publishDir "/mnt/workflow/pubdir/${workflow.sessionId}/${task.process.replace(':', '/')}/${task.index}/${task.attempt}"

input:
path fasta_path
val window_size_predictions
val portion_size
path prot_t5_params

output:
path 'output/*'

script:
"""
set -euxo pipefail
mkdir output
/opt/conda/bin/python /home/TemStaPro/temstapro \
--input-fasta $fasta_path \
--PT-directory $prot_t5_params \
--temstapro-directory '/home/TemStaPro' \
--more-thresholds \
--mean-output 'output/mean_output.tsv' \
--per-res-output 'output/per_res_output.tsv' \
--window-size-predictions $window_size_predictions \
--per-residue-plot-dir output \
--portion-size $portion_size
"""
}

workflow {
TemStaPro(
Channel.fromPath(params.fasta_path),
params.window_size_predictions,
params.portion_size,
params.prot_t5_params
)
}

15 changes: 15 additions & 0 deletions assets/workflows/temstapro/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
params {
window_size_predictions = 81
portion_size = 1000

prot_t5_params = "s3://{{S3_BUCKET_NAME}}/ref-data/temstapro/Rostlab/prot_t5_xl_half_uniref50-enc/"
}

process {
withLabel: temstapro { container = "{{temstapro:latest}}" }
}

docker {
enabled = true
runOptions = "--gpus all"
}

0 comments on commit 1e51730

Please sign in to comment.