-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
620ce55
commit 1e51730
Showing
9 changed files
with
148 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
ARG AWS_DEFAULT_REGION=us-east-1 | ||
|
||
FROM 763104351884.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com/pytorch-inference:2.2.0-gpu-py310-cu118-ubuntu20.04-ec2 | ||
ARG COMMIT=db001d2b2479131bad2d0ee170b75001e9fad076 | ||
|
||
RUN apt-get update \ | ||
&& apt-get upgrade -y \ | ||
&& apt-get install nano -y \ | ||
&& apt-get autoremove -y \ | ||
&& apt-get clean \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
# Download ThermoMPNN Code | ||
RUN wget -q -P /tmp "https://github.com/ievapudz/TemStaPro/archive/${COMMIT}.zip" \ | ||
&& mkdir -p /home/TemStaPro \ | ||
&& unzip /tmp/${COMMIT}.zip -d /tmp \ | ||
&& mv /tmp/TemStaPro-${COMMIT}/* /home/TemStaPro \ | ||
&& rm -rf /tmp/TemStaPro-${COMMIT} | ||
|
||
WORKDIR /home/TemStaPro | ||
|
||
COPY requirements.txt /tmp/requirements.txt | ||
RUN pip install --no-cache-dir -U -r /tmp/requirements.txt \ | ||
&& rm /tmp/requirements.txt | ||
|
||
ENV TMPDIR="/tmp" | ||
ENV PYTHONPATH="/home/TemStaPro:${PYTHONPATH}" | ||
|
||
ENTRYPOINT [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
transformers==4.24.0 | ||
sentencepiece==0.1.96 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Rostlab/prot_t5_xl_half_uniref50-enc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# Predict protein thermostability using sequence representations from a protein language model | ||
|
||
## Summary | ||
|
||
Predict protein thermostability using sequence representations from the [TemStaPro](https://github.com/ievapudz/TemStaPro) protein language model. From Institute of Biotechnology, Life Sciences Center, Vilnius University. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
name: TemStaPro | ||
description: "Predict protein thermostability using sequence representations from a protein language model. From Institute of Biotechnology, Life Sciences Center, Vilnius University." | ||
engine: NEXTFLOW | ||
main: main.nf | ||
parameterTemplate: | ||
fasta_path: | ||
description: "Input file in FASTA format." | ||
optional: false | ||
window_size_predictions: | ||
description: "set the window size for average smoothing of per residue predictions for plotting. Defaults to 81." | ||
optional: true | ||
portion_size: | ||
description: "Maximum size of input sequence divisions. Set to 0 for no division. Defaults to 1000." | ||
optional: true | ||
storageCapacity: 1200 | ||
tags: | ||
Name: "TemStaPro" | ||
accelerators: GPU |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
#!/usr/bin/env nextflow | ||
|
||
nextflow.enable.dsl = 2 | ||
|
||
workflow TemStaPro { | ||
take: | ||
fasta_path | ||
window_size_predictions | ||
portion_size | ||
prot_t5_params | ||
|
||
main: | ||
TemStaProTask( | ||
fasta_path, | ||
window_size_predictions, | ||
portion_size, | ||
prot_t5_params | ||
) | ||
|
||
TemStaProTask.out.set { results } | ||
|
||
emit: | ||
results | ||
} | ||
|
||
process TemStaProTask { | ||
label 'temstapro' | ||
cpus 4 | ||
memory '16 GB' | ||
maxRetries 1 | ||
accelerator 1, type: 'nvidia-tesla-a10g' | ||
publishDir "/mnt/workflow/pubdir/${workflow.sessionId}/${task.process.replace(':', '/')}/${task.index}/${task.attempt}" | ||
|
||
input: | ||
path fasta_path | ||
val window_size_predictions | ||
val portion_size | ||
path prot_t5_params | ||
|
||
output: | ||
path 'output/*' | ||
|
||
script: | ||
""" | ||
set -euxo pipefail | ||
mkdir output | ||
/opt/conda/bin/python /home/TemStaPro/temstapro \ | ||
--input-fasta $fasta_path \ | ||
--PT-directory $prot_t5_params \ | ||
--temstapro-directory '/home/TemStaPro' \ | ||
--more-thresholds \ | ||
--mean-output 'output/mean_output.tsv' \ | ||
--per-res-output 'output/per_res_output.tsv' \ | ||
--window-size-predictions $window_size_predictions \ | ||
--per-residue-plot-dir output \ | ||
--portion-size $portion_size | ||
""" | ||
} | ||
|
||
workflow { | ||
TemStaPro( | ||
Channel.fromPath(params.fasta_path), | ||
params.window_size_predictions, | ||
params.portion_size, | ||
params.prot_t5_params | ||
) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
params { | ||
window_size_predictions = 81 | ||
portion_size = 1000 | ||
|
||
prot_t5_params = "s3://{{S3_BUCKET_NAME}}/ref-data/temstapro/Rostlab/prot_t5_xl_half_uniref50-enc/" | ||
} | ||
|
||
process { | ||
withLabel: temstapro { container = "{{temstapro:latest}}" } | ||
} | ||
|
||
docker { | ||
enabled = true | ||
runOptions = "--gpus all" | ||
} |