Skip to content

Commit

Permalink
Merge pull request #9 from Australian-Structural-Biology-Computing/cl…
Browse files Browse the repository at this point in the history
…ean-up-code

Clean up code
  • Loading branch information
nbtm-sh authored Oct 10, 2024
2 parents 632610b + 9af55b1 commit 8ce0598
Show file tree
Hide file tree
Showing 7 changed files with 165 additions and 69 deletions.
79 changes: 57 additions & 22 deletions conf/dbs.config
Original file line number Diff line number Diff line change
Expand Up @@ -21,37 +21,72 @@ params {
pdb_seqres = 'ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt'
uniprot_sprot = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz'
uniprot_trembl = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz'

bfd_name = params.bfd_prefix ?: 'bfd'
smallbfd_name = params.smallbfd_prefix ?: 'smallbfd'
mgnify_name = params.mgnify_prefix ?: 'mgnify'
pdb70_name = params.pdb70_prefix ?: 'pdb70'
pdb_mmcif_name = params.pdb_mmcif_prefix ?: 'pdb_mmcif'
uniclust30_name = params.uniclust30_prefix ?: 'uniclust30'
uniref90_name = params.uniref90_prefix ?: 'uniref90'
pdb_seqres_name = params.pdb_seq_prefix ?: 'pdb_seqres'
uniprot_name = params.uniprot_prefix ?: 'uniprot'
alphafold_params_name = params.alphafold_params_prefix ?: 'params/alphafold_params_*'
mmcif_files_name = params.mmcif_path ?: 'pdb_mmcif/mmcif_files/'
mmcif_obsolete_name = params.mmcif_obsolete ?: 'pdb_mmcif/obsolete.dat'

uniclust30_db_name = params.uniclust30_db ?: 'uniclust30_2018_08'
bfd_first_non_consensus_sequences_name = params.bfd_first_non_consensus_sequences ?: 'bfd-first_non_consensus_sequences.fasta'
uniprot_fasta_name = params.uniprot_fasta ?: 'uniprot.fasta'
pdb_seqres_txt_name = params.pdb_seqres_txt ?: 'pdb_seqres.txt'
bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name = params.bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt ?: 'bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt'
uniref90_fasta_name = params.uniref90_fasta ?: 'uniref90.fasta'
mgy_clusters_fasta_name = params.mgy_clusters_fasta ?: 'mgy_clusters_2022_05.fa'


// Alphafold paths
bfd_path = "${params.alphafold2_db}/bfd/*"
small_bfd_path = "${params.alphafold2_db}/small_bfd/*"
bfd_path = "${params.alphafold2_db}/${bfd_name}/*"
small_bfd_path = "${params.alphafold2_db}/${smallbfd_name}/*"
//alphafold2_params_path = "${params.alphafold2_db}/alphafold_params_*/*"
alphafold2_params_path = "${params.alphafold2_db}/params/*"
mgnify_path = "${params.alphafold2_db}/mgnify/*"
pdb70_path = "${params.alphafold2_db}/pdb70/**"
pdb_mmcif_path = "${params.alphafold2_db}/pdb_mmcif/**"
//alphafold2_params_path = "${params.alphafold2_db}/params/*"
alphafold2_params_path = "${params.alphafold2_db}/${alphafold_params_name}/*"
mgnify_path = "${params.alphafold2_db}/${mgnify_name}/*"
pdb70_path = "${params.alphafold2_db}/${pdb70_name}/**"
pdb_mmcif_path = "${params.alphafold2_db}/${pdb_mmcif_name}/**"
//uniclust30_path = "${params.alphafold2_db}/uniclust30/**"
uniclust30_path = "/srv/scratch/sbf/uniclust30/**"
uniref90_path = "${params.alphafold2_db}/uniref90/*"
pdb_seqres_path = "${params.alphafold2_db}/pdb_seqres/*"
uniprot_path = "${params.alphafold2_db}/uniprot/*"
//uniclust30_path = "/srv/scratch/sbf/uniclust30/**"
uniclust30_path = "${params.alphafold2_db}/${uniclust30_name}/**"
uniref90_path = "${params.alphafold2_db}/${uniref90_name}/*"
pdb_seqres_path = "${params.alphafold2_db}/${pdb_seqres_name}/*"
uniprot_path = "${params.alphafold2_db}/${uniprot_name}/*"

// Alphafold variables
bfd_variable = "${params.alphafold2_db}/bfd/"
small_bfd_variable = "${params.alphafold2_db}/smallbfd/"
mgnify_variable = "${params.alphafold2_db}/mgnify/"
pdb70_variable = "${params.alphafold2_db}/pdb70/"
pdb_mmcif_variable = "${params.alphafold2_db}/pdb_mmcif/"
//bfd_variable = "${params.alphafold2_db}/bfd/"
bfd_dir_path = "${params.alphafold2_db}/${bfd_name}/"
//small_bfd_variable = "${params.alphafold2_db}/smallbfd/"
small_bfd_dir_path = "${params.alphafold2_db}/${smallbfd_name}/"
//mgnify_variable = "${params.alphafold2_db}/mgnify/"
mgnify_dir_path = "${params.alphafold2_db}/${mgnify_name}/"
//pdb70_variable = "${params.alphafold2_db}/pdb70/"
pdb70_dir_path = "${params.alphafold2_db}/${pdb70_name}/"
//pdb_mmcif_variable = "${params.alphafold2_db}/pdb_mmcif/"
pdb_mmcif_dir_path = "${params.alphafold2_db}/${pdb_mmcif_name}/"
//uniclust30_variable = "${params.alphafold2_db}/uniclust30/"
uniclust30_variable = "/srv/scratch/sbf/uniclust30/"
uniref90_variable = "${params.alphafold2_db}/uniref90/"
pdb_seqres_variable = "${params.alphafold2_db}/pdb_seqres/"
uniprot_variable = "${params.alphafold2_db}/uniprot/"
//uniclust30_variable = "/srv/scratch/sbf/uniclust30/"
uniclust30_dir_path = "${params.alphafold2_db}/${uniclust30_name}/"
//uniref90_variable = "${params.alphafold2_db}/uniref90/"
uniref90_dir_path = "${params.alphafold2_db}/${uniref90_name}/"
//pdb_seqres_variable = "${params.alphafold2_db}/pdb_seqres/"
pdb_seqres_dir_path = "${params.alphafold2_db}/${pdb_seqres_name}/"
//uniprot_variable = "${params.alphafold2_db}/uniprot/"
uniprot_dir_path = "${params.alphafold2_db}/${uniprot_name}/"

// Alphafold MSA Variables
mgnify_database_path = "${params.alphafold2_db}/mgnify/"
template_mmcif_dir = "${params.alphafold2_db}/pdb_mmcif/mmcif_files/"
obsolete_pdbs_path = "${params.alphafold2_db}/pdb_mmcif/obsolete.dat"
mgnify_database_path = "${params.alphafold2_db}/${mgnify_name}/"
//template_mmcif_dir = "${params.alphafold2_db}/pdb_mmcif/mmcif_files/"
template_mmcif_dir = "${params.alphafold2_db}/${mmcif_files_name}/"
//obsolete_pdbs_path = "${params.alphafold2_db}/pdb_mmcif/obsolete.dat"
obsolete_pdbs_path = "${params.alphafold2_db}/${mmcif_obsolete_name}"

// Colabfold links
colabfold_db_link = 'http://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz'
Expand Down
2 changes: 1 addition & 1 deletion conf/katana.config
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ process {
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }

executor = 'pbspro'
//executor = 'pbspro'

errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' }
maxRetries = 1
Expand Down
38 changes: 24 additions & 14 deletions modules/local/run_alphafold2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@ process RUN_ALPHAFOLD2 {
tag "$meta.id"
label 'process_medium'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'docker://nfcore/proteinfold_alphafold2_standard:1.0.0' :
'nfcore/proteinfold_alphafold2_standard:1.0.0' }"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
error("Local RUN_ALPHAFOLD2 module does not support Conda. Please use Docker / Singularity / Podman instead.")
}

container "nf-core/proteinfold_alphafold2_standard:1.1.1"

input:
tuple val(meta), path(fasta)
Expand All @@ -19,7 +22,7 @@ process RUN_ALPHAFOLD2 {
path ('mgnify/*')
path ('pdb70/*')
path ('pdb_mmcif/*')
path ('uniclust30/*')
path ('uniref30/*')
path ('uniref90/*')
path ('pdb_seqres/*')
path ('uniprot/*')
Expand All @@ -34,29 +37,35 @@ process RUN_ALPHAFOLD2 {

script:
def args = task.ext.args ?: ''
def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.bfd_variable}bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniclust30_database_path=${params.uniclust30_variable}uniclust30_2018_08/uniclust30_2018_08" :
"reduced_dbs --small_bfd_database_path=${params.small_bfd_path}bfd-first_non_consensus_sequences.fasta"
def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.alphafold2_db}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniref30_database_path=${params.alphafold2_db}/uniref30/UniRef30_2021_03" :
"reduced_dbs --small_bfd_database_path=${params.alphafold2_db}/small_bfd/bfd-first_non_consensus_sequences.fasta"
if (alphafold2_model_preset == 'multimer') {
alphafold2_model_preset += " --pdb_seqres_database_path=${params.pdb_seqres_variable}pdb_seqres.txt --uniprot_database_path=${params.uniprot_variable}uniprot.fasta "
alphafold2_model_preset += " --pdb_seqres_database_path=${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt --uniprot_database_path=${params.alphafold2_db}/uniprot/uniprot.fasta "
}
else {
alphafold2_model_preset += " --pdb70_database_path=${params.pdb70_variable}pdb70 "
alphafold2_model_preset += " --pdb70_database_path=${params.alphafold2_db}/pdb70/pdb70_from_mmcif_200916/pdb70 "
}
"""
if [ -f ${params.pdb_seqres_variable}pdb_seqres.txt ]
then sed -i "/^\\w*0/d" ${params.pdb_seqres_variable}pdb_seqres.txt
RUNTIME_TMP=\$(mktemp -d)
nvcc --version 2>&1 | tee /home/z3545907/nvcc.txt
nvidia-smi 2>&1 | tee /home/z3545907/nvidia-smi.txt
if [ -f ${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt ]
cp ${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt \${RUNTIME_TMP}
then sed -i "/^\\w*0/d" \${RUNTIME_TMP}/pdb_seqres.txt
fi
if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi
if [ -d ${params.alphafold2_db}/params/ ]; then ln -r -s ${params.alphafold2_db}/params params; fi
python3 /app/alphafold/run_alphafold.py \
--fasta_paths=${fasta} \
--model_preset=${alphafold2_model_preset} \
--db_preset=${db_preset} \
--output_dir=\$PWD \
--data_dir=\$PWD \
--uniref90_database_path=${params.uniref90_variable}uniref90.fasta \
--template_mmcif_dir=${params.pdb_mmcif_variable}mmcif_files \
--obsolete_pdbs_path=${params.pdb_mmcif_variable}obsolete.dat \
--uniref90_database_path=${params.alphafold2_db}/uniref90/uniref90.fasta \
--mgnify_database_path=${params.alphafold2_db}/mgnify/mgy_clusters_2022_05.fa \
--template_mmcif_dir=${params.alphafold2_db}/pdb_mmcif/mmcif_files \
--obsolete_pdbs_path=${params.alphafold2_db}/pdb_mmcif/obsolete.dat \
--random_seed=53343 \
--use_gpu_relax \
$args
cp "${fasta.baseName}"/ranked_0.pdb ./"${fasta.baseName}".alphafold.pdb
Expand All @@ -69,6 +78,7 @@ process RUN_ALPHAFOLD2 {
echo -e Positions"\\t"rank_0"\\t"rank_1"\\t"rank_2"\\t"rank_3"\\t"rank_4 > header.tsv
cat header.tsv plddt.tsv > ../"${fasta.baseName}"_plddt_mqc.tsv
cd ..
rm -rf "\${RUNTIME_TMP}"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
39 changes: 21 additions & 18 deletions modules/local/run_alphafold2_msa.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
process RUN_ALPHAFOLD2_MSA {
tag "$meta.id"
label 'process_medium'
debug true

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'docker://nfcore/proteinfold_alphafold2_msa:1.0.0' :
'nfcore/proteinfold_alphafold2_msa:1.0.0' }"
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
error("Local RUN_ALPHAFOLD2_MSA module does not support Conda. Please use Docker / Singularity / Podman instead.")
}

container "nf-core/proteinfold_alphafold2_msa:1.1.1"

input:
tuple val(meta), path(fasta)
Expand All @@ -20,7 +22,7 @@ process RUN_ALPHAFOLD2_MSA {
path ('mgnify/*')
path ('pdb70/*')
path ('pdb_mmcif/*')
path ('uniclust30/*')
path ('uniref30/*')
path ('uniref90/*')
path ('pdb_seqres/*')
path ('uniprot/*')
Expand All @@ -35,33 +37,34 @@ process RUN_ALPHAFOLD2_MSA {

script:
def args = task.ext.args ?: ''
def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.bfd_variable}bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniclust30_database_path=${params.uniclust30_variable}uniclust30_2018_08" :
"reduced_dbs --small_bfd_database_path=${params.small_bfd_path}bfd-first_non_consensus_sequences.fasta"
def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.alphafold2_db}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniref30_database_path=${params.alphafold2_db}/uniref30/UniRef30_2021_03" :
"reduced_dbs --small_bfd_database_path=${params.alphafold2_db}/small_bfd/bfd-first_non_consensus_sequences.fasta"
if (alphafold2_model_preset == 'multimer') {
alphafold2_model_preset += " --pdb_seqres_database_path=${params.pdb_seqres_variable}pdb_seqres.txt --uniprot_database_path=${params.uniprot_variable}/uniprot.fasta "
alphafold2_model_preset += " --pdb_seqres_database_path=${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt --uniprot_database_path=${params.alphafold2_db}/uniprot/uniprot.fasta "
}
else {
alphafold2_model_preset += " --pdb70_database_path=${params.pdb70_variable}pdb70 "
alphafold2_model_preset += " --pdb70_database_path=${params.alphafold2_db}/pdb70/pdb70_from_mmcif_200916/pdb70 "
}
"""
#if [ -f pdb_seqres/pdb_seqres.txt ]
# \$PDB_SEQRES_TEMP=\$(mktemp --directory)
# cp ${params.pdb_seqres_variable}pdb_seqres.txt \${PDB_SEQRES_TEMP}/
# then sed -i "/^\\w*0/d" \$PDB_SEQERS_TEMP/pdb_seqres.txt
#fi
RUNTIME_TMP=\$(mktemp -d)
if [ -f ${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt ]
cp ${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt \${RUNTIME_TMP}
then sed -i "/^\\w*0/d" \${RUNTIME_TMP}/pdb_seqres.txt
fi
python3 /app/alphafold/run_msa.py \
--fasta_paths=${fasta} \
--model_preset=${alphafold2_model_preset} \
--db_preset=${db_preset} \
--output_dir=\$PWD \
--data_dir=\$PWD \
--uniref90_database_path=${params.uniref90_variable}uniref90.fasta \
--mgnify_database_path=${params.mgnify_database_path}/mgy_clusters_2022_05.fa \
--template_mmcif_dir=${params.template_mmcif_dir} \
--obsolete_pdbs_path=${params.obsolete_pdbs_path} \
--uniref90_database_path=${params.alphafold2_db}/uniref90/uniref90.fasta \
--mgnify_database_path=${params.alphafold2_db}/mgnify/mgy_clusters_2022_05.fa \
--template_mmcif_dir=${params.alphafold2_db}/pdb_mmcif/mmcif_files \
--obsolete_pdbs_path=${params.alphafold2_db}/pdb_mmcif/obsolete.dat \
$args
cp "${fasta.baseName}"/features.pkl ./"${fasta.baseName}".features.pkl
rm -rf "\${RUNTIME_TMP}"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
19 changes: 11 additions & 8 deletions modules/local/run_alphafold2_pred.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
*/
process RUN_ALPHAFOLD2_PRED {
tag "$meta.id"
label 'process_medium', 'gpu_compute'
label 'process_medium'
label 'gpu_compute'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'docker://nfcore/proteinfold_alphafold2_split:1.0.0' :
'nfcore/proteinfold_alphafold2_split:1.0.0' }"
echo 'true'
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
error("Local RUN_ALPHAFOLD2_PRED module does not support Conda. Please use Docker / Singularity / Podman instead.")
}

container "nf-core/proteinfold_alphafold2_split:1.1.1"

input:
tuple val(meta), path(fasta)
Expand All @@ -20,7 +23,7 @@ process RUN_ALPHAFOLD2_PRED {
path ('mgnify/*')
path ('pdb70/*')
path ('pdb_mmcif/*')
path ('uniclust30/*')
path ('uniref30/*')
path ('uniref90/*')
path ('pdb_seqres/*')
path ('uniprot/*')
Expand All @@ -37,15 +40,15 @@ process RUN_ALPHAFOLD2_PRED {
script:
def args = task.ext.args ?: ''
"""
echo \$PWD
#if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi
if [ -d ${params.alphafold2_db}/params/ ]; then ln -r -s ${params.alphafold2_db}/params params; fi
python3 /app/alphafold/run_predict.py \
--fasta_paths=${fasta} \
--model_preset=${alphafold2_model_preset} \
--output_dir=\$PWD \
--data_dir=\$PWD \
--random_seed=53343 \
--msa_path=${msa} \
--use_gpu_relax \
$args
cp "${fasta.baseName}"/ranked_0.pdb ./"${fasta.baseName}".alphafold.pdb
Expand Down
46 changes: 45 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,48 @@ params {
full_dbs = false // true full_dbs, false reduced_dbs
alphafold2_model_preset = "monomer" // for AF2 {monomer (default), monomer_casp14, monomer_ptm, multimer}
alphafold2_db = null

// Database prefixes
bfd_prefix = null
smallbfd_prefix = null
mgnify_prefix = null
pdb70_prefix = null
pdb_mmcif_prefix = null
uniclust30_prefix = null
uniref90_prefix = null
pdb_seq_prefix = null
uniprot_prefix = null
alphafold_params_prefix = null
mmcif_path = null
mmcif_obsolete = null
uniclust30_db = null
bfd_first_non_consensus_sequences = null
uniprot_fasta = null
pdb_seqres_txt = null
bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt = null
uniref90_fasta = null
mgy_clusters_fasta = null
uniclust30_prefix = null

bfd_name = null
smallbfd_name = null
mgnify_name = null
pdb70_name = null
pdb_mmcif_name = null
uniclust30_name = null
uniref90_name = null
pdb_seqres_name = null
uniprot_name = null
alphafold_params_name = null
mmcif_files_name = null
mmcif_obsolete_name = null
uniclust30_db_name = null
bfd_first_non_consensus_sequences_name = null
uniprot_fasta_name = null
pdb_seqres_txt_name = null
bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name = null
uniref90_fasta_name = null
mgy_clusters_fasta_name = null

// Alphafold2 links
bfd = null
Expand Down Expand Up @@ -104,8 +146,10 @@ params {

}

spack.enabled = true

// Load base.config by default for all pipelines
includeConfig 'conf/base.config'
includeConfig 'conf/katana.config'

// Load nf-core custom profiles from different Institutions
try {
Expand Down
11 changes: 6 additions & 5 deletions pf_files/proteinfold_test.sh
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
module load nextflow/23.04.4 java/11
module load nextflow/23.04.4 java/11 cuda/11.8.0

export SINGULARITY_CACHE_DIR=/srv/scratch/sbf/singularity_cache
export NXF_SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cache
export SINGULARITY_CACHE_DIR=/srv/scratch/$USER/Singularity/cache
export NXF_SINGULARITY_CACHEDIR=/srv/scratch/$USER/Singularity/cache

nextflow run ../main.nf \
--input samplesheet.csv \
--outdir test_out \
--mode alphafold2 \
--alphafold2_db /data/bio/alphafold \
--alphafold2_db /mnt/af2/ \
--full_dbs true \
--alphafold2_model_preset monomer \
--alphafold2_model_preset multimer \
--alphafold_params_name 'params' \
--alphafold2_mode 'split_msa_prediction' \
--use_gpu true \
-profile singularity \

0 comments on commit 8ce0598

Please sign in to comment.