Skip to content

Commit

Permalink
Merge pull request #8 from Australian-Structural-Biology-Computing/ad…
Browse files Browse the repository at this point in the history
…d-dbs-variables-to-msa-pipeline

Make pipeline work on UNSW Katana
  • Loading branch information
nbtm-sh authored Aug 8, 2024
2 parents e676c33 + 03f2575 commit 632610b
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 21 deletions.
2 changes: 1 addition & 1 deletion conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ process {
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }

executor = 'pbspro'
//executor = 'pbspro'

errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' }
maxRetries = 1
Expand Down
14 changes: 11 additions & 3 deletions conf/dbs.config
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,13 @@ params {
// Alphafold paths
bfd_path = "${params.alphafold2_db}/bfd/*"
small_bfd_path = "${params.alphafold2_db}/small_bfd/*"
alphafold2_params_path = "${params.alphafold2_db}/alphafold_params_*/*"
//alphafold2_params_path = "${params.alphafold2_db}/alphafold_params_*/*"
alphafold2_params_path = "${params.alphafold2_db}/params/*"
mgnify_path = "${params.alphafold2_db}/mgnify/*"
pdb70_path = "${params.alphafold2_db}/pdb70/**"
pdb_mmcif_path = "${params.alphafold2_db}/pdb_mmcif/**"
uniclust30_path = "${params.alphafold2_db}/uniclust30/**"
//uniclust30_path = "${params.alphafold2_db}/uniclust30/**"
uniclust30_path = "/srv/scratch/sbf/uniclust30/**"
uniref90_path = "${params.alphafold2_db}/uniref90/*"
pdb_seqres_path = "${params.alphafold2_db}/pdb_seqres/*"
uniprot_path = "${params.alphafold2_db}/uniprot/*"
Expand All @@ -40,11 +42,17 @@ params {
mgnify_variable = "${params.alphafold2_db}/mgnify/"
pdb70_variable = "${params.alphafold2_db}/pdb70/"
pdb_mmcif_variable = "${params.alphafold2_db}/pdb_mmcif/"
uniclust30_variable = "${params.alphafold2_db}/uniclust30/"
//uniclust30_variable = "${params.alphafold2_db}/uniclust30/"
uniclust30_variable = "/srv/scratch/sbf/uniclust30/"
uniref90_variable = "${params.alphafold2_db}/uniref90/"
pdb_seqres_variable = "${params.alphafold2_db}/pdb_seqres/"
uniprot_variable = "${params.alphafold2_db}/uniprot/"

// Alphafold MSA Variables
mgnify_database_path = "${params.alphafold2_db}/mgnify/"
template_mmcif_dir = "${params.alphafold2_db}/pdb_mmcif/mmcif_files/"
obsolete_pdbs_path = "${params.alphafold2_db}/pdb_mmcif/obsolete.dat"

// Colabfold links
colabfold_db_link = 'http://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz'
uniref30 = 'https://wwwuser.gwdg.de/~compbiol/colabfold/uniref30_2202.tar.gz'
Expand Down
25 changes: 14 additions & 11 deletions modules/local/run_alphafold2_msa.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
process RUN_ALPHAFOLD2_MSA {
tag "$meta.id"
label 'process_medium'
debug true

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'docker://nfcore/proteinfold_alphafold2_msa:1.0.0' :
Expand Down Expand Up @@ -34,28 +35,30 @@ process RUN_ALPHAFOLD2_MSA {

script:
def args = task.ext.args ?: ''
def db_preset = db_preset ? "full_dbs --bfd_database_path=./bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniclust30_database_path=./uniclust30/uniclust30_2018_08/uniclust30_2018_08" :
"reduced_dbs --small_bfd_database_path=./small_bfd/bfd-first_non_consensus_sequences.fasta"
def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.bfd_variable}bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniclust30_database_path=${params.uniclust30_variable}uniclust30_2018_08" :
"reduced_dbs --small_bfd_database_path=${params.small_bfd_path}bfd-first_non_consensus_sequences.fasta"
if (alphafold2_model_preset == 'multimer') {
alphafold2_model_preset += " --pdb_seqres_database_path=./pdb_seqres/pdb_seqres.txt --uniprot_database_path=./uniprot/uniprot.fasta "
alphafold2_model_preset += " --pdb_seqres_database_path=${params.pdb_seqres_variable}pdb_seqres.txt --uniprot_database_path=${params.uniprot_variable}/uniprot.fasta "
}
else {
alphafold2_model_preset += " --pdb70_database_path=./pdb70/pdb70_from_mmcif_200916/pdb70 "
alphafold2_model_preset += " --pdb70_database_path=${params.pdb70_variable}pdb70 "
}
"""
if [ -f pdb_seqres/pdb_seqres.txt ]
then sed -i "/^\\w*0/d" pdb_seqres/pdb_seqres.txt
fi
#if [ -f pdb_seqres/pdb_seqres.txt ]
# \$PDB_SEQRES_TEMP=\$(mktemp --directory)
# cp ${params.pdb_seqres_variable}pdb_seqres.txt \${PDB_SEQRES_TEMP}/
# then sed -i "/^\\w*0/d" \$PDB_SEQERS_TEMP/pdb_seqres.txt
#fi
python3 /app/alphafold/run_msa.py \
--fasta_paths=${fasta} \
--model_preset=${alphafold2_model_preset} \
--db_preset=${db_preset} \
--output_dir=\$PWD \
--data_dir=\$PWD \
--uniref90_database_path=./uniref90/uniref90.fasta \
--mgnify_database_path=./mgnify/mgy_clusters_2018_12.fa \
--template_mmcif_dir=./pdb_mmcif/mmcif_files \
--obsolete_pdbs_path=./pdb_mmcif/obsolete.dat \
--uniref90_database_path=${params.uniref90_variable}uniref90.fasta \
--mgnify_database_path=${params.mgnify_database_path}/mgy_clusters_2022_05.fa \
--template_mmcif_dir=${params.template_mmcif_dir} \
--obsolete_pdbs_path=${params.obsolete_pdbs_path} \
$args
cp "${fasta.baseName}"/features.pkl ./"${fasta.baseName}".features.pkl
Expand Down
4 changes: 3 additions & 1 deletion modules/local/run_alphafold2_pred.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ process RUN_ALPHAFOLD2_PRED {
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'docker://nfcore/proteinfold_alphafold2_split:1.0.0' :
'nfcore/proteinfold_alphafold2_split:1.0.0' }"
echo 'true'

input:
tuple val(meta), path(fasta)
Expand Down Expand Up @@ -36,7 +37,8 @@ process RUN_ALPHAFOLD2_PRED {
script:
def args = task.ext.args ?: ''
"""
if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi
echo \$PWD
#if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi
python3 /app/alphafold/run_predict.py \
--fasta_paths=${fasta} \
--model_preset=${alphafold2_model_preset} \
Expand Down
5 changes: 3 additions & 2 deletions pf_files/proteinfold_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@ module load nextflow/23.04.4 java/11
export SINGULARITY_CACHE_DIR=/srv/scratch/sbf/singularity_cache
export NXF_SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cache

nextflow run /srv/scratch/z5378336/proteinfold/main.nf \
nextflow run ../main.nf \
--input samplesheet.csv \
--outdir test_out \
--mode alphafold2 \
--alphafold2_db /data/bio/alphafold \
--full_dbs true \
--alphafold2_model_preset monomer \
--alphafold2_mode 'split_msa_prediction' \
--use_gpu true \
-profile singularity
-profile singularity \
3 changes: 1 addition & 2 deletions pf_files/samplesheet.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
sequence,fasta
T1024,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1024.fasta
T1026,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1026.fasta
1L2Y,/srv/scratch/sbf/nextflow_pipelines-dev/proteinfold/pf_files/1L2Y.fasta
2 changes: 1 addition & 1 deletion pf_files/test_out/pipeline_info/samplesheet.valid.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
sequence,fasta
1L2Y_T1,./1L2Y.fasta
1L2Y_T1,/srv/scratch/sbf/nextflow_pipelines-dev/proteinfold/pf_files/1L2Y.fasta

0 comments on commit 632610b

Please sign in to comment.