From 964f5d01c097177cafddab9666770c462fb94ba4 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 8 Aug 2024 16:04:20 +1000 Subject: [PATCH 01/22] feat(conf/dbs): Added variables for database names, and file names --- conf/dbs.config | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/conf/dbs.config b/conf/dbs.config index 4b2d0e4f..78863ffb 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -21,6 +21,28 @@ params { pdb_seqres = 'ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt' uniprot_sprot = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz' uniprot_trembl = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz' + + def bfd_name = params.bfd ?: 'bfd' + def smallbfd_name = params.smallbfd ?: 'smallbfd' + def mgnify_name = params.mgnify ?: 'mgnify' + def pdb70_name = params.pdb70 ?: 'pdb70' + def pdb_mmcif_name = params.pdb_mmcif ?: 'pdb_mmcif' + def uniclust30_name = params.uniclust30 ?: 'uniclust30' + def uniref90_name = params.uniref90 ?: 'uniref90' + def pdb_seqres_name = params.pdb_seq ?: 'pdb_seqres' + def uniprot_name = params.uniprot ?: 'uniprot' + def alphafold_params_name = params.alphafold_params ?: 'alphafold_params_*' + def mmcif_files_name = params.mmcif_path ?: 'pdb_mmcif/mmcif_files/' + def mmcif_obsolete_name = params.mmcif_obsolete ?: 'pdb_mmcif/obsolete.dat' + + def uniclust30_db_name = params.uniclust30_db ?: 'uniclust30_2018_08' + def bfd_first_non_consensus_sequences_name = params.bfd_first_non_consensus_sequences ?: 'bfd-first_non_consensus_sequences.fasta' + def uniprot_fasta_name = params.uniprot_fasta ?: 'uniprot.fasta' + def pdb_seqres_txt_name = params.pdb_seqres_txt ?: 'pdb_seqres.txt' + def bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name = params.bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt ?: 'bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt' + def uniref90_fasta_name = params.uniref90_fasta ?: 'uniref90.fasta' + def mgy_clusters_fasta_name = params.mgy_clusters_fasta ?: 'mgy_clusters_2022_05.fa' + // Alphafold paths bfd_path = "${params.alphafold2_db}/bfd/*" From 2a79fe43af25a20a77eb3b3505075794eeea5efb Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 8 Aug 2024 16:05:27 +1000 Subject: [PATCH 02/22] feat(conf/dbs): Changed config paths to use database variables instead of hardcoded values --- conf/dbs.config | 57 ++++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/conf/dbs.config b/conf/dbs.config index 78863ffb..8f061a7e 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -45,35 +45,48 @@ params { // Alphafold paths - bfd_path = "${params.alphafold2_db}/bfd/*" - small_bfd_path = "${params.alphafold2_db}/small_bfd/*" + bfd_path = "${params.alphafold2_db}/${bfd_name}/*" + small_bfd_path = "${params.alphafold2_db}/${small_bfd_name}/*" //alphafold2_params_path = "${params.alphafold2_db}/alphafold_params_*/*" - alphafold2_params_path = "${params.alphafold2_db}/params/*" - mgnify_path = "${params.alphafold2_db}/mgnify/*" - pdb70_path = "${params.alphafold2_db}/pdb70/**" - pdb_mmcif_path = "${params.alphafold2_db}/pdb_mmcif/**" + //alphafold2_params_path = "${params.alphafold2_db}/params/*" + alphafold2_params_path = "${params.alphafold2_db}/${alphafold_params_name}/*" + mgnify_path = "${params.alphafold2_db}/${mgnify_name}/*" + pdb70_path = "${params.alphafold2_db}/${pdb70_name}/**" + pdb_mmcif_path = "${params.alphafold2_db}/${pdb_mmcif_name}/**" //uniclust30_path = "${params.alphafold2_db}/uniclust30/**" - uniclust30_path = "/srv/scratch/sbf/uniclust30/**" - uniref90_path = "${params.alphafold2_db}/uniref90/*" - pdb_seqres_path = "${params.alphafold2_db}/pdb_seqres/*" - uniprot_path = "${params.alphafold2_db}/uniprot/*" + //uniclust30_path = "/srv/scratch/sbf/uniclust30/**" + uniclust30_path = "${params.alphafold2_db}/${uniclust30_name}/**" + uniref90_path = "${params.alphafold2_db}/${uniref90_name}/*" + pdb_seqres_path = "${params.alphafold2_db}/${pdb_seqres_name}/*" + uniprot_path = "${params.alphafold2_db}/${uniprot_name}/*" // Alphafold variables - bfd_variable = "${params.alphafold2_db}/bfd/" - small_bfd_variable = "${params.alphafold2_db}/smallbfd/" - mgnify_variable = "${params.alphafold2_db}/mgnify/" - pdb70_variable = "${params.alphafold2_db}/pdb70/" - pdb_mmcif_variable = "${params.alphafold2_db}/pdb_mmcif/" + //bfd_variable = "${params.alphafold2_db}/bfd/" + bfd_dir_path = "${params.alpahfold2_db}/${bfd_name}/" + //small_bfd_variable = "${params.alphafold2_db}/smallbfd/" + small_bfd_dir_path = "${params.alphafold2_db}/${smallbfd_name}/" + //mgnify_variable = "${params.alphafold2_db}/mgnify/" + mgnify_dir_path = "${params.alphafold2_db}/${mgnify_name}/" + //pdb70_variable = "${params.alphafold2_db}/pdb70/" + pdb70_dir_path = "${params.alphafold2_db}/${pdb70_name}/" + //pdb_mmcif_variable = "${params.alphafold2_db}/pdb_mmcif/" + pdb_mmcif_dir_path = "${params.alphafold2_db}/${pdb_mmcif_name}/" //uniclust30_variable = "${params.alphafold2_db}/uniclust30/" - uniclust30_variable = "/srv/scratch/sbf/uniclust30/" - uniref90_variable = "${params.alphafold2_db}/uniref90/" - pdb_seqres_variable = "${params.alphafold2_db}/pdb_seqres/" - uniprot_variable = "${params.alphafold2_db}/uniprot/" + //uniclust30_variable = "/srv/scratch/sbf/uniclust30/" + uniclust30_dir_path = "${params.alphafold2_db}/${uniclust30_name}/" + //uniref90_variable = "${params.alphafold2_db}/uniref90/" + uniref90_dir_path = "${params.alpahfold2_db}/${uniref90_name}/" + //pdb_seqres_variable = "${params.alphafold2_db}/pdb_seqres/" + pdb_seqres_dir_path = "${params.alphafold2_db}/${pdb_seqres_name}/" + //uniprot_variable = "${params.alphafold2_db}/uniprot/" + uniprot_dir_path = "${params.alphafold2_db}/${uniprot_name}/" // Alphafold MSA Variables - mgnify_database_path = "${params.alphafold2_db}/mgnify/" - template_mmcif_dir = "${params.alphafold2_db}/pdb_mmcif/mmcif_files/" - obsolete_pdbs_path = "${params.alphafold2_db}/pdb_mmcif/obsolete.dat" + mgnify_database_path = "${params.alphafold2_db}/${mgnify_name}/" + //template_mmcif_dir = "${params.alphafold2_db}/pdb_mmcif/mmcif_files/" + template_mmcif_dir = "${params.alphafold2_db}/${mmcif_files_name}/" + //obsolete_pdbs_path = "${params.alphafold2_db}/pdb_mmcif/obsolete.dat" + obsolete_pdbs_path = "${params.alphafold2_db}/${mmcif_obsolete_name}" // Colabfold links colabfold_db_link = 'http://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz' From c218ad261b2b1d3d0abadfc84002d5dee3ddb829 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 8 Aug 2024 16:06:46 +1000 Subject: [PATCH 03/22] feat(run_alphafold2): Changed hardcoded paths to use variables and updated variable names --- modules/local/run_alphafold2.nf | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/modules/local/run_alphafold2.nf b/modules/local/run_alphafold2.nf index 731ad1c1..5cf964dd 100644 --- a/modules/local/run_alphafold2.nf +++ b/modules/local/run_alphafold2.nf @@ -34,28 +34,30 @@ process RUN_ALPHAFOLD2 { script: def args = task.ext.args ?: '' - def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.bfd_variable}bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniclust30_database_path=${params.uniclust30_variable}uniclust30_2018_08/uniclust30_2018_08" : - "reduced_dbs --small_bfd_database_path=${params.small_bfd_path}bfd-first_non_consensus_sequences.fasta" + def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.bfd_dir_path}${params.bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name} --uniclust30_database_path=${params.uniclust30_dir_path}${params.uniclust30_db_name}" : + "reduced_dbs --small_bfd_database_path=${params.small_bfd_path}${params.bfd_first_non_consensus_sequences_name}" if (alphafold2_model_preset == 'multimer') { - alphafold2_model_preset += " --pdb_seqres_database_path=${params.pdb_seqres_variable}pdb_seqres.txt --uniprot_database_path=${params.uniprot_variable}uniprot.fasta " + alphafold2_model_preset += " --pdb_seqres_database_path=${params.pdb_seqres_dir_path}${params.pdb_seqres_txt_name} --uniprot_database_path=${params.uniprot_dir_path}${params.uniprot_fasta_name} " } else { - alphafold2_model_preset += " --pdb70_database_path=${params.pdb70_variable}pdb70 " + alphafold2_model_preset += " --pdb70_database_path=${params.pdb_dir_path}${params.pdb70_name} " } """ - if [ -f ${params.pdb_seqres_variable}pdb_seqres.txt ] - then sed -i "/^\\w*0/d" ${params.pdb_seqres_variable}pdb_seqres.txt + if [ -f ${params.pdb_seqres_dir_path}/${params.pbd_seqres_txt_name} ] + \$PDB_SEQRES_TEMP=\$(mktemp --directory) + cp ${params.pdb_seqres_dir_path}${params.pdb_seqres_txt_name} \${PDB_SEQRES_TEMP}/ + then sed -i "/^\\w*0/d" \$PDB_SEQERS_TEMP/${params.pdb_seqres_txt_name} fi - if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi + if [ -d ${params.alphafold2_params_path} ]; then ln -r -s ${params.alphafold2_params_path} params/; fi python3 /app/alphafold/run_alphafold.py \ --fasta_paths=${fasta} \ --model_preset=${alphafold2_model_preset} \ --db_preset=${db_preset} \ --output_dir=\$PWD \ --data_dir=\$PWD \ - --uniref90_database_path=${params.uniref90_variable}uniref90.fasta \ - --template_mmcif_dir=${params.pdb_mmcif_variable}mmcif_files \ - --obsolete_pdbs_path=${params.pdb_mmcif_variable}obsolete.dat \ + --uniref90_database_path=${params.uniref90_dir_path}uniref90.fasta \ + --template_mmcif_dir=${params.template_mmcif_dir} \ + --obsolete_pdbs_path=${params.obsolete_pdbs_path} \ --random_seed=53343 \ $args From edff052c835dbd439e16432e0a2ae1ec3b08221d Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 8 Aug 2024 16:07:42 +1000 Subject: [PATCH 04/22] feat(run_alphafold2_msa): Removed hardcoded paths and changed variables --- modules/local/run_alphafold2_msa.nf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/local/run_alphafold2_msa.nf b/modules/local/run_alphafold2_msa.nf index 10d217ca..2e2bbe9b 100644 --- a/modules/local/run_alphafold2_msa.nf +++ b/modules/local/run_alphafold2_msa.nf @@ -35,13 +35,13 @@ process RUN_ALPHAFOLD2_MSA { script: def args = task.ext.args ?: '' - def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.bfd_variable}bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniclust30_database_path=${params.uniclust30_variable}uniclust30_2018_08" : - "reduced_dbs --small_bfd_database_path=${params.small_bfd_path}bfd-first_non_consensus_sequences.fasta" + def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.bfd_dir_path}${params.bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name} --uniclust30_database_path=${params.uniclust30_dir_path}${uniclust30_db_name}" : + "reduced_dbs --small_bfd_database_path=${params.small_bfd_path}${params.bfd_first_non_consensus_sequences_name}" if (alphafold2_model_preset == 'multimer') { - alphafold2_model_preset += " --pdb_seqres_database_path=${params.pdb_seqres_variable}pdb_seqres.txt --uniprot_database_path=${params.uniprot_variable}/uniprot.fasta " + alphafold2_model_preset += " --pdb_seqres_database_path=${params.pdb_seqres_dir_path}${params.pdb_seqres_txt_name} --uniprot_database_path=${params.uniprot_dir_path}/${params.uniprot_fasta_name} " } else { - alphafold2_model_preset += " --pdb70_database_path=${params.pdb70_variable}pdb70 " + alphafold2_model_preset += " --pdb70_database_path=${params.pdb70_dir_path}${params.pdb70_name} " } """ #if [ -f pdb_seqres/pdb_seqres.txt ] @@ -55,8 +55,8 @@ process RUN_ALPHAFOLD2_MSA { --db_preset=${db_preset} \ --output_dir=\$PWD \ --data_dir=\$PWD \ - --uniref90_database_path=${params.uniref90_variable}uniref90.fasta \ - --mgnify_database_path=${params.mgnify_database_path}/mgy_clusters_2022_05.fa \ + --uniref90_database_path=${params.uniref90_dir_path}/${params.uniref90_fasta_name} \ + --mgnify_database_path=${params.mgnify_database_path}/${params.mgy_clusters_fasta_name} \ --template_mmcif_dir=${params.template_mmcif_dir} \ --obsolete_pdbs_path=${params.obsolete_pdbs_path} \ $args From ea4459a713655395514c5db9897554e457d75dfc Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 8 Aug 2024 16:08:19 +1000 Subject: [PATCH 05/22] feat(run_alphafold2_msa): Added code from run_alphafold2.nf so that the script does not attempt to modify the existing database file --- modules/local/run_alphafold2_msa.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/run_alphafold2_msa.nf b/modules/local/run_alphafold2_msa.nf index 2e2bbe9b..ac3bd143 100644 --- a/modules/local/run_alphafold2_msa.nf +++ b/modules/local/run_alphafold2_msa.nf @@ -44,10 +44,10 @@ process RUN_ALPHAFOLD2_MSA { alphafold2_model_preset += " --pdb70_database_path=${params.pdb70_dir_path}${params.pdb70_name} " } """ - #if [ -f pdb_seqres/pdb_seqres.txt ] + #if [ -f ${params.pdb_seqres_dir_path}/${params.pbd_seqres_txt_name} ] # \$PDB_SEQRES_TEMP=\$(mktemp --directory) - # cp ${params.pdb_seqres_variable}pdb_seqres.txt \${PDB_SEQRES_TEMP}/ - # then sed -i "/^\\w*0/d" \$PDB_SEQERS_TEMP/pdb_seqres.txt + # cp ${params.pdb_seqres_dir_path}${params.pdb_seqres_txt_name} \${PDB_SEQRES_TEMP}/ + # then sed -i "/^\\w*0/d" \$PDB_SEQERS_TEMP/${params.pdb_seqres_txt_name} #fi python3 /app/alphafold/run_msa.py \ --fasta_paths=${fasta} \ From 83ca302ab2222552c07dd5c834c3c5c86808ccb3 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 9 Aug 2024 09:28:30 +1000 Subject: [PATCH 06/22] fix(conf/dbs): Changed variable names to have _prefix on the end to avoid conflicts with existing link variables --- conf/dbs.config | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/conf/dbs.config b/conf/dbs.config index 8f061a7e..03c06b37 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -22,16 +22,16 @@ params { uniprot_sprot = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz' uniprot_trembl = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz' - def bfd_name = params.bfd ?: 'bfd' - def smallbfd_name = params.smallbfd ?: 'smallbfd' - def mgnify_name = params.mgnify ?: 'mgnify' - def pdb70_name = params.pdb70 ?: 'pdb70' - def pdb_mmcif_name = params.pdb_mmcif ?: 'pdb_mmcif' - def uniclust30_name = params.uniclust30 ?: 'uniclust30' - def uniref90_name = params.uniref90 ?: 'uniref90' - def pdb_seqres_name = params.pdb_seq ?: 'pdb_seqres' - def uniprot_name = params.uniprot ?: 'uniprot' - def alphafold_params_name = params.alphafold_params ?: 'alphafold_params_*' + def bfd_name = params.bfd_prefix ?: 'bfd' + def smallbfd_name = params.smallbfd_prefix ?: 'smallbfd' + def mgnify_name = params.mgnify_prefix ?: 'mgnify' + def pdb70_name = params.pdb70_prefix ?: 'pdb70' + def pdb_mmcif_name = params.pdb_mmcif_prefix ?: 'pdb_mmcif' + def uniclust30_name = params.uniclust30_prefix ?: 'uniclust30' + def uniref90_name = params.uniref90_prefix ?: 'uniref90' + def pdb_seqres_name = params.pdb_seq_prefix ?: 'pdb_seqres' + def uniprot_name = params.uniprot_prefix ?: 'uniprot' + def alphafold_params_name = params.alphafold_params_prefix ?: 'alphafold_params_*' def mmcif_files_name = params.mmcif_path ?: 'pdb_mmcif/mmcif_files/' def mmcif_obsolete_name = params.mmcif_obsolete ?: 'pdb_mmcif/obsolete.dat' From faba0abfd77997cfc168a5203db2b98b0abe4df9 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 9 Aug 2024 09:29:18 +1000 Subject: [PATCH 07/22] fix(conf/dbs): Changed existing variables to use new prefix variables --- conf/dbs.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/dbs.config b/conf/dbs.config index 03c06b37..257b7d18 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -46,7 +46,7 @@ params { // Alphafold paths bfd_path = "${params.alphafold2_db}/${bfd_name}/*" - small_bfd_path = "${params.alphafold2_db}/${small_bfd_name}/*" + small_bfd_path = "${params.alphafold2_db}/${smallbfd_name}/*" //alphafold2_params_path = "${params.alphafold2_db}/alphafold_params_*/*" //alphafold2_params_path = "${params.alphafold2_db}/params/*" alphafold2_params_path = "${params.alphafold2_db}/${alphafold_params_name}/*" @@ -62,7 +62,7 @@ params { // Alphafold variables //bfd_variable = "${params.alphafold2_db}/bfd/" - bfd_dir_path = "${params.alpahfold2_db}/${bfd_name}/" + bfd_dir_path = "${params.alphafold2_db}/${bfd_name}/" //small_bfd_variable = "${params.alphafold2_db}/smallbfd/" small_bfd_dir_path = "${params.alphafold2_db}/${smallbfd_name}/" //mgnify_variable = "${params.alphafold2_db}/mgnify/" @@ -75,7 +75,7 @@ params { //uniclust30_variable = "/srv/scratch/sbf/uniclust30/" uniclust30_dir_path = "${params.alphafold2_db}/${uniclust30_name}/" //uniref90_variable = "${params.alphafold2_db}/uniref90/" - uniref90_dir_path = "${params.alpahfold2_db}/${uniref90_name}/" + uniref90_dir_path = "${params.alphafold2_db}/${uniref90_name}/" //pdb_seqres_variable = "${params.alphafold2_db}/pdb_seqres/" pdb_seqres_dir_path = "${params.alphafold2_db}/${pdb_seqres_name}/" //uniprot_variable = "${params.alphafold2_db}/uniprot/" From 04cad9dcd24256d3e5866857049826b7096540b8 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 9 Aug 2024 09:34:32 +1000 Subject: [PATCH 08/22] feat(nextflow.config): Added new param variables and defaults to the config file --- nextflow.config | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/nextflow.config b/nextflow.config index ec71afb5..691ee289 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,6 +20,48 @@ params { full_dbs = false // true full_dbs, false reduced_dbs alphafold2_model_preset = "monomer" // for AF2 {monomer (default), monomer_casp14, monomer_ptm, multimer} alphafold2_db = null + + // Database prefixes + bfd_prefix = null + smallbfd_prefix = null + mgnify_prefix = null + pdb70_prefix = null + pdb_mmcif_prefix = null + uniclust30_prefix = null + uniref90_prefix = null + pdb_seq_prefix = null + uniprot_prefix = null + alphafold_params_prefix = null + mmcif_path = null + mmcif_obsolete = null + uniclust30_db = null + bfd_first_non_consensus_sequences = null + uniprot_fasta = null + pdb_seqres_txt = null + bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt = null + uniref90_fasta = null + mgy_clusters_fasta = null + uniclust30_prefix = null + + bfd_name = null + smallbfd_name = null + mgnify_name = null + pdb70_name = null + pdb_mmcif_name = null + uniclust30_name = null + uniref90_name = null + pdb_seqres_name = null + uniprot_name = null + alphafold_params_name = null + mmcif_files_name = null + mmcif_obsolete_name = null + uniclust30_db_name = null + bfd_first_non_consensus_sequences_name = null + uniprot_fasta_name = null + pdb_seqres_txt_name = null + bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name = null + uniref90_fasta_name = null + mgy_clusters_fasta_name = null // Alphafold2 links bfd = null From afeb1226dd023a6c199976aacd94694c6a033957 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 9 Aug 2024 12:38:11 +1000 Subject: [PATCH 09/22] feat(dbs): Made variables global --- conf/dbs.config | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/conf/dbs.config b/conf/dbs.config index 257b7d18..5c34c852 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -22,26 +22,26 @@ params { uniprot_sprot = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz' uniprot_trembl = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz' - def bfd_name = params.bfd_prefix ?: 'bfd' - def smallbfd_name = params.smallbfd_prefix ?: 'smallbfd' - def mgnify_name = params.mgnify_prefix ?: 'mgnify' - def pdb70_name = params.pdb70_prefix ?: 'pdb70' - def pdb_mmcif_name = params.pdb_mmcif_prefix ?: 'pdb_mmcif' - def uniclust30_name = params.uniclust30_prefix ?: 'uniclust30' - def uniref90_name = params.uniref90_prefix ?: 'uniref90' - def pdb_seqres_name = params.pdb_seq_prefix ?: 'pdb_seqres' - def uniprot_name = params.uniprot_prefix ?: 'uniprot' - def alphafold_params_name = params.alphafold_params_prefix ?: 'alphafold_params_*' - def mmcif_files_name = params.mmcif_path ?: 'pdb_mmcif/mmcif_files/' - def mmcif_obsolete_name = params.mmcif_obsolete ?: 'pdb_mmcif/obsolete.dat' + bfd_name = params.bfd_prefix ?: 'bfd' + smallbfd_name = params.smallbfd_prefix ?: 'smallbfd' + mgnify_name = params.mgnify_prefix ?: 'mgnify' + pdb70_name = params.pdb70_prefix ?: 'pdb70' + pdb_mmcif_name = params.pdb_mmcif_prefix ?: 'pdb_mmcif' + uniclust30_name = params.uniclust30_prefix ?: 'uniclust30' + uniref90_name = params.uniref90_prefix ?: 'uniref90' + pdb_seqres_name = params.pdb_seq_prefix ?: 'pdb_seqres' + uniprot_name = params.uniprot_prefix ?: 'uniprot' + alphafold_params_name = params.alphafold_params_prefix ?: 'alphafold_params_*' + mmcif_files_name = params.mmcif_path ?: 'pdb_mmcif/mmcif_files/' + mmcif_obsolete_name = params.mmcif_obsolete ?: 'pdb_mmcif/obsolete.dat' - def uniclust30_db_name = params.uniclust30_db ?: 'uniclust30_2018_08' - def bfd_first_non_consensus_sequences_name = params.bfd_first_non_consensus_sequences ?: 'bfd-first_non_consensus_sequences.fasta' - def uniprot_fasta_name = params.uniprot_fasta ?: 'uniprot.fasta' - def pdb_seqres_txt_name = params.pdb_seqres_txt ?: 'pdb_seqres.txt' - def bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name = params.bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt ?: 'bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt' - def uniref90_fasta_name = params.uniref90_fasta ?: 'uniref90.fasta' - def mgy_clusters_fasta_name = params.mgy_clusters_fasta ?: 'mgy_clusters_2022_05.fa' + uniclust30_db_name = params.uniclust30_db ?: 'uniclust30_2018_08' + bfd_first_non_consensus_sequences_name = params.bfd_first_non_consensus_sequences ?: 'bfd-first_non_consensus_sequences.fasta' + uniprot_fasta_name = params.uniprot_fasta ?: 'uniprot.fasta' + pdb_seqres_txt_name = params.pdb_seqres_txt ?: 'pdb_seqres.txt' + bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name = params.bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt ?: 'bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt' + uniref90_fasta_name = params.uniref90_fasta ?: 'uniref90.fasta' + mgy_clusters_fasta_name = params.mgy_clusters_fasta ?: 'mgy_clusters_2022_05.fa' // Alphafold paths From 3d615b75f76cb6d2bc8dcc991e0378d47f748e79 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 16 Aug 2024 14:19:28 +1000 Subject: [PATCH 10/22] fix(dbs): Changed database directory default --- conf/dbs.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/dbs.config b/conf/dbs.config index 5c34c852..7d06e41a 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -31,7 +31,7 @@ params { uniref90_name = params.uniref90_prefix ?: 'uniref90' pdb_seqres_name = params.pdb_seq_prefix ?: 'pdb_seqres' uniprot_name = params.uniprot_prefix ?: 'uniprot' - alphafold_params_name = params.alphafold_params_prefix ?: 'alphafold_params_*' + alphafold_params_name = params.alphafold_params_prefix ?: 'params/alphafold_params_*' mmcif_files_name = params.mmcif_path ?: 'pdb_mmcif/mmcif_files/' mmcif_obsolete_name = params.mmcif_obsolete ?: 'pdb_mmcif/obsolete.dat' From cb292568f827613e116316a30893741e1e02b36e Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 16 Aug 2024 14:20:37 +1000 Subject: [PATCH 11/22] feat(katana): Temporarily removed PBS job scheduling --- conf/katana.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/katana.config b/conf/katana.config index a344ffc0..67f310a7 100644 --- a/conf/katana.config +++ b/conf/katana.config @@ -19,7 +19,7 @@ process { memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } - executor = 'pbspro' + //executor = 'pbspro' errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } maxRetries = 1 From 5829301a5cc29d5b2d5b09b8b53e4ca05c975de6 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 16 Aug 2024 14:22:29 +1000 Subject: [PATCH 12/22] fix(run_alphafold2): Fixed copy command to point to the correct directory --- modules/local/run_alphafold2.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/run_alphafold2.nf b/modules/local/run_alphafold2.nf index 5cf964dd..30163279 100644 --- a/modules/local/run_alphafold2.nf +++ b/modules/local/run_alphafold2.nf @@ -43,7 +43,7 @@ process RUN_ALPHAFOLD2 { alphafold2_model_preset += " --pdb70_database_path=${params.pdb_dir_path}${params.pdb70_name} " } """ - if [ -f ${params.pdb_seqres_dir_path}/${params.pbd_seqres_txt_name} ] + if [ -f ${params.pdb_seqres_dir_path}/${params.pdb_seqres_txt_name} ] \$PDB_SEQRES_TEMP=\$(mktemp --directory) cp ${params.pdb_seqres_dir_path}${params.pdb_seqres_txt_name} \${PDB_SEQRES_TEMP}/ then sed -i "/^\\w*0/d" \$PDB_SEQERS_TEMP/${params.pdb_seqres_txt_name} From 485e400f5198b94bff46f0a90b784d4f1fbddf7b Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 16 Aug 2024 14:23:33 +1000 Subject: [PATCH 13/22] fix(run_alphafold2): Updated paths to point to the correct uniclust databse --- modules/local/run_alphafold2_msa.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/run_alphafold2_msa.nf b/modules/local/run_alphafold2_msa.nf index ac3bd143..89667a73 100644 --- a/modules/local/run_alphafold2_msa.nf +++ b/modules/local/run_alphafold2_msa.nf @@ -35,7 +35,7 @@ process RUN_ALPHAFOLD2_MSA { script: def args = task.ext.args ?: '' - def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.bfd_dir_path}${params.bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name} --uniclust30_database_path=${params.uniclust30_dir_path}${uniclust30_db_name}" : + def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.bfd_dir_path}${params.bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name} --uniclust30_database_path=${params.uniclust30_dir_path}${params.uniclust30_db_name}" : "reduced_dbs --small_bfd_database_path=${params.small_bfd_path}${params.bfd_first_non_consensus_sequences_name}" if (alphafold2_model_preset == 'multimer') { alphafold2_model_preset += " --pdb_seqres_database_path=${params.pdb_seqres_dir_path}${params.pdb_seqres_txt_name} --uniprot_database_path=${params.uniprot_dir_path}/${params.uniprot_fasta_name} " From d008f38feec0e74c8704f252c84883059eb53437 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 16 Aug 2024 14:26:32 +1000 Subject: [PATCH 14/22] fix(run_alphafold2): Fixed typo --- modules/local/run_alphafold2_msa.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/run_alphafold2_msa.nf b/modules/local/run_alphafold2_msa.nf index 89667a73..d4fa4f53 100644 --- a/modules/local/run_alphafold2_msa.nf +++ b/modules/local/run_alphafold2_msa.nf @@ -44,7 +44,7 @@ process RUN_ALPHAFOLD2_MSA { alphafold2_model_preset += " --pdb70_database_path=${params.pdb70_dir_path}${params.pdb70_name} " } """ - #if [ -f ${params.pdb_seqres_dir_path}/${params.pbd_seqres_txt_name} ] + #if [ -f ${params.pdb_seqres_dir_path}/${params.pdb_seqres_txt_name} ] # \$PDB_SEQRES_TEMP=\$(mktemp --directory) # cp ${params.pdb_seqres_dir_path}${params.pdb_seqres_txt_name} \${PDB_SEQRES_TEMP}/ # then sed -i "/^\\w*0/d" \$PDB_SEQERS_TEMP/${params.pdb_seqres_txt_name} From a0dbd9c7ac3aaa8dc78a0af61aa5ab755d7e82ff Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 16 Aug 2024 14:37:28 +1000 Subject: [PATCH 15/22] feat(run_alphafold2): Added symlink for params file --- modules/local/run_alphafold2_pred.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/local/run_alphafold2_pred.nf b/modules/local/run_alphafold2_pred.nf index 9f6d20a7..cd7c93eb 100644 --- a/modules/local/run_alphafold2_pred.nf +++ b/modules/local/run_alphafold2_pred.nf @@ -38,7 +38,9 @@ process RUN_ALPHAFOLD2_PRED { def args = task.ext.args ?: '' """ echo \$PWD - #if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi + if [ -d ${params.alphafold2_db}/${params.alphafold2_params_path} ]; + then ln -r -s params/alphafold_params_*/* params/ + fi python3 /app/alphafold/run_predict.py \ --fasta_paths=${fasta} \ --model_preset=${alphafold2_model_preset} \ From 598cc269a40c52b7c1316aadffa7e6d34150a0eb Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 16 Aug 2024 14:42:26 +1000 Subject: [PATCH 16/22] feat(nextflow): Changed default to use GPU --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 691ee289..8612912c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,7 +12,7 @@ params { // Input options input = null mode = 'alphafold2' // {alphafold2, colabfold} - use_gpu = false + use_gpu = true // Alphafold2 parameters alphafold2_mode = "standard" From eba441254ab202113940a48da7e5300d0ded1e69 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 16 Aug 2024 14:44:15 +1000 Subject: [PATCH 17/22] feat(nextflow): Included katana config --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 8612912c..775eb22a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -147,7 +147,7 @@ params { } // Load base.config by default for all pipelines -includeConfig 'conf/base.config' +includeConfig 'conf/katana.config' // Load nf-core custom profiles from different Institutions try { From 4811d3cc235625b08a9930174332aef0840b44b1 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 16 Aug 2024 14:44:54 +1000 Subject: [PATCH 18/22] feat(test): Added options to katana tests --- pf_files/proteinfold_test.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pf_files/proteinfold_test.sh b/pf_files/proteinfold_test.sh index 9e4883ce..3bdbc024 100755 --- a/pf_files/proteinfold_test.sh +++ b/pf_files/proteinfold_test.sh @@ -1,15 +1,16 @@ module load nextflow/23.04.4 java/11 -export SINGULARITY_CACHE_DIR=/srv/scratch/sbf/singularity_cache -export NXF_SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cache +export SINGULARITY_CACHE_DIR=/srv/scratch/$USER/Singularity/cache +export NXF_SINGULARITY_CACHEDIR=/srv/scratch/$USER/Singularity/cache nextflow run ../main.nf \ --input samplesheet.csv \ --outdir test_out \ --mode alphafold2 \ - --alphafold2_db /data/bio/alphafold \ + --alphafold2_db /mnt/af2/ \ --full_dbs true \ - --alphafold2_model_preset monomer \ + --alphafold2_model_preset multimer \ + --alphafold_params_name 'params' \ --alphafold2_mode 'split_msa_prediction' \ --use_gpu true \ -profile singularity \ From 92121114426ba67855a3c2245fa4d73647c4db59 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 16 Aug 2024 14:48:56 +1000 Subject: [PATCH 19/22] revert(nextflow): Changed default GPU to false --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 775eb22a..c1a353e1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,7 +12,7 @@ params { // Input options input = null mode = 'alphafold2' // {alphafold2, colabfold} - use_gpu = true + use_gpu = false // Alphafold2 parameters alphafold2_mode = "standard" From 07954691cefc137e7f19ee271b3920c415b9ca84 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 16 Aug 2024 15:31:39 +1000 Subject: [PATCH 20/22] revert(nextflow): Changed config back to base config --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index c1a353e1..691ee289 100644 --- a/nextflow.config +++ b/nextflow.config @@ -147,7 +147,7 @@ params { } // Load base.config by default for all pipelines -includeConfig 'conf/katana.config' +includeConfig 'conf/base.config' // Load nf-core custom profiles from different Institutions try { From 5fe8e1df99ff1df65575a5d0ae757f3f19a2e19b Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 5 Sep 2024 16:02:24 +1000 Subject: [PATCH 21/22] feat(katana): Added katana config --- modules/local/run_alphafold2_pred.nf | 8 ++++---- nextflow.config | 4 +++- pf_files/proteinfold_test.sh | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/modules/local/run_alphafold2_pred.nf b/modules/local/run_alphafold2_pred.nf index cd7c93eb..9b371f86 100644 --- a/modules/local/run_alphafold2_pred.nf +++ b/modules/local/run_alphafold2_pred.nf @@ -9,7 +9,6 @@ process RUN_ALPHAFOLD2_PRED { 'docker://nfcore/proteinfold_alphafold2_split:1.0.0' : 'nfcore/proteinfold_alphafold2_split:1.0.0' }" echo 'true' - input: tuple val(meta), path(fasta) val db_preset @@ -38,9 +37,10 @@ process RUN_ALPHAFOLD2_PRED { def args = task.ext.args ?: '' """ echo \$PWD - if [ -d ${params.alphafold2_db}/${params.alphafold2_params_path} ]; - then ln -r -s params/alphafold_params_*/* params/ - fi + #if [ -d ${params.alphafold2_params_path} ]; + #then + ln -r -f -s ${params.alphafold2_params_path}* params/ + #fi python3 /app/alphafold/run_predict.py \ --fasta_paths=${fasta} \ --model_preset=${alphafold2_model_preset} \ diff --git a/nextflow.config b/nextflow.config index 691ee289..69cc7ad2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -146,8 +146,10 @@ params { } +spack.enabled = true + // Load base.config by default for all pipelines -includeConfig 'conf/base.config' +includeConfig 'conf/katana.config' // Load nf-core custom profiles from different Institutions try { diff --git a/pf_files/proteinfold_test.sh b/pf_files/proteinfold_test.sh index 3bdbc024..396a63f1 100755 --- a/pf_files/proteinfold_test.sh +++ b/pf_files/proteinfold_test.sh @@ -1,4 +1,4 @@ -module load nextflow/23.04.4 java/11 +module load nextflow/23.04.4 java/11 cuda/11.8.0 export SINGULARITY_CACHE_DIR=/srv/scratch/$USER/Singularity/cache export NXF_SINGULARITY_CACHEDIR=/srv/scratch/$USER/Singularity/cache From 9af55b19269bca2fb9de8565ef1b71cac7d2a279 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 10 Oct 2024 12:24:32 +1100 Subject: [PATCH 22/22] feat(style): pushing uncommited changes --- modules/local/run_alphafold2.nf | 40 +++++++++++++++++----------- modules/local/run_alphafold2_msa.nf | 39 ++++++++++++++------------- modules/local/run_alphafold2_pred.nf | 23 ++++++++-------- 3 files changed, 57 insertions(+), 45 deletions(-) diff --git a/modules/local/run_alphafold2.nf b/modules/local/run_alphafold2.nf index 30163279..013f3ed1 100644 --- a/modules/local/run_alphafold2.nf +++ b/modules/local/run_alphafold2.nf @@ -5,9 +5,12 @@ process RUN_ALPHAFOLD2 { tag "$meta.id" label 'process_medium' - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://nfcore/proteinfold_alphafold2_standard:1.0.0' : - 'nfcore/proteinfold_alphafold2_standard:1.0.0' }" + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Local RUN_ALPHAFOLD2 module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + container "nf-core/proteinfold_alphafold2_standard:1.1.1" input: tuple val(meta), path(fasta) @@ -19,7 +22,7 @@ process RUN_ALPHAFOLD2 { path ('mgnify/*') path ('pdb70/*') path ('pdb_mmcif/*') - path ('uniclust30/*') + path ('uniref30/*') path ('uniref90/*') path ('pdb_seqres/*') path ('uniprot/*') @@ -34,31 +37,35 @@ process RUN_ALPHAFOLD2 { script: def args = task.ext.args ?: '' - def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.bfd_dir_path}${params.bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name} --uniclust30_database_path=${params.uniclust30_dir_path}${params.uniclust30_db_name}" : - "reduced_dbs --small_bfd_database_path=${params.small_bfd_path}${params.bfd_first_non_consensus_sequences_name}" + def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.alphafold2_db}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniref30_database_path=${params.alphafold2_db}/uniref30/UniRef30_2021_03" : + "reduced_dbs --small_bfd_database_path=${params.alphafold2_db}/small_bfd/bfd-first_non_consensus_sequences.fasta" if (alphafold2_model_preset == 'multimer') { - alphafold2_model_preset += " --pdb_seqres_database_path=${params.pdb_seqres_dir_path}${params.pdb_seqres_txt_name} --uniprot_database_path=${params.uniprot_dir_path}${params.uniprot_fasta_name} " + alphafold2_model_preset += " --pdb_seqres_database_path=${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt --uniprot_database_path=${params.alphafold2_db}/uniprot/uniprot.fasta " } else { - alphafold2_model_preset += " --pdb70_database_path=${params.pdb_dir_path}${params.pdb70_name} " + alphafold2_model_preset += " --pdb70_database_path=${params.alphafold2_db}/pdb70/pdb70_from_mmcif_200916/pdb70 " } """ - if [ -f ${params.pdb_seqres_dir_path}/${params.pdb_seqres_txt_name} ] - \$PDB_SEQRES_TEMP=\$(mktemp --directory) - cp ${params.pdb_seqres_dir_path}${params.pdb_seqres_txt_name} \${PDB_SEQRES_TEMP}/ - then sed -i "/^\\w*0/d" \$PDB_SEQERS_TEMP/${params.pdb_seqres_txt_name} + RUNTIME_TMP=\$(mktemp -d) + nvcc --version 2>&1 | tee /home/z3545907/nvcc.txt + nvidia-smi 2>&1 | tee /home/z3545907/nvidia-smi.txt + if [ -f ${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt ] + cp ${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt \${RUNTIME_TMP} + then sed -i "/^\\w*0/d" \${RUNTIME_TMP}/pdb_seqres.txt fi - if [ -d ${params.alphafold2_params_path} ]; then ln -r -s ${params.alphafold2_params_path} params/; fi + if [ -d ${params.alphafold2_db}/params/ ]; then ln -r -s ${params.alphafold2_db}/params params; fi python3 /app/alphafold/run_alphafold.py \ --fasta_paths=${fasta} \ --model_preset=${alphafold2_model_preset} \ --db_preset=${db_preset} \ --output_dir=\$PWD \ --data_dir=\$PWD \ - --uniref90_database_path=${params.uniref90_dir_path}uniref90.fasta \ - --template_mmcif_dir=${params.template_mmcif_dir} \ - --obsolete_pdbs_path=${params.obsolete_pdbs_path} \ + --uniref90_database_path=${params.alphafold2_db}/uniref90/uniref90.fasta \ + --mgnify_database_path=${params.alphafold2_db}/mgnify/mgy_clusters_2022_05.fa \ + --template_mmcif_dir=${params.alphafold2_db}/pdb_mmcif/mmcif_files \ + --obsolete_pdbs_path=${params.alphafold2_db}/pdb_mmcif/obsolete.dat \ --random_seed=53343 \ + --use_gpu_relax \ $args cp "${fasta.baseName}"/ranked_0.pdb ./"${fasta.baseName}".alphafold.pdb @@ -71,6 +78,7 @@ process RUN_ALPHAFOLD2 { echo -e Positions"\\t"rank_0"\\t"rank_1"\\t"rank_2"\\t"rank_3"\\t"rank_4 > header.tsv cat header.tsv plddt.tsv > ../"${fasta.baseName}"_plddt_mqc.tsv cd .. + rm -rf "\${RUNTIME_TMP}" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/run_alphafold2_msa.nf b/modules/local/run_alphafold2_msa.nf index d4fa4f53..7878d9d1 100644 --- a/modules/local/run_alphafold2_msa.nf +++ b/modules/local/run_alphafold2_msa.nf @@ -4,11 +4,13 @@ process RUN_ALPHAFOLD2_MSA { tag "$meta.id" label 'process_medium' - debug true - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://nfcore/proteinfold_alphafold2_msa:1.0.0' : - 'nfcore/proteinfold_alphafold2_msa:1.0.0' }" + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Local RUN_ALPHAFOLD2_MSA module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + container "nf-core/proteinfold_alphafold2_msa:1.1.1" input: tuple val(meta), path(fasta) @@ -20,7 +22,7 @@ process RUN_ALPHAFOLD2_MSA { path ('mgnify/*') path ('pdb70/*') path ('pdb_mmcif/*') - path ('uniclust30/*') + path ('uniref30/*') path ('uniref90/*') path ('pdb_seqres/*') path ('uniprot/*') @@ -35,33 +37,34 @@ process RUN_ALPHAFOLD2_MSA { script: def args = task.ext.args ?: '' - def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.bfd_dir_path}${params.bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name} --uniclust30_database_path=${params.uniclust30_dir_path}${params.uniclust30_db_name}" : - "reduced_dbs --small_bfd_database_path=${params.small_bfd_path}${params.bfd_first_non_consensus_sequences_name}" + def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.alphafold2_db}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniref30_database_path=${params.alphafold2_db}/uniref30/UniRef30_2021_03" : + "reduced_dbs --small_bfd_database_path=${params.alphafold2_db}/small_bfd/bfd-first_non_consensus_sequences.fasta" if (alphafold2_model_preset == 'multimer') { - alphafold2_model_preset += " --pdb_seqres_database_path=${params.pdb_seqres_dir_path}${params.pdb_seqres_txt_name} --uniprot_database_path=${params.uniprot_dir_path}/${params.uniprot_fasta_name} " + alphafold2_model_preset += " --pdb_seqres_database_path=${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt --uniprot_database_path=${params.alphafold2_db}/uniprot/uniprot.fasta " } else { - alphafold2_model_preset += " --pdb70_database_path=${params.pdb70_dir_path}${params.pdb70_name} " + alphafold2_model_preset += " --pdb70_database_path=${params.alphafold2_db}/pdb70/pdb70_from_mmcif_200916/pdb70 " } """ - #if [ -f ${params.pdb_seqres_dir_path}/${params.pdb_seqres_txt_name} ] - # \$PDB_SEQRES_TEMP=\$(mktemp --directory) - # cp ${params.pdb_seqres_dir_path}${params.pdb_seqres_txt_name} \${PDB_SEQRES_TEMP}/ - # then sed -i "/^\\w*0/d" \$PDB_SEQERS_TEMP/${params.pdb_seqres_txt_name} - #fi + RUNTIME_TMP=\$(mktemp -d) + if [ -f ${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt ] + cp ${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt \${RUNTIME_TMP} + then sed -i "/^\\w*0/d" \${RUNTIME_TMP}/pdb_seqres.txt + fi python3 /app/alphafold/run_msa.py \ --fasta_paths=${fasta} \ --model_preset=${alphafold2_model_preset} \ --db_preset=${db_preset} \ --output_dir=\$PWD \ --data_dir=\$PWD \ - --uniref90_database_path=${params.uniref90_dir_path}/${params.uniref90_fasta_name} \ - --mgnify_database_path=${params.mgnify_database_path}/${params.mgy_clusters_fasta_name} \ - --template_mmcif_dir=${params.template_mmcif_dir} \ - --obsolete_pdbs_path=${params.obsolete_pdbs_path} \ + --uniref90_database_path=${params.alphafold2_db}/uniref90/uniref90.fasta \ + --mgnify_database_path=${params.alphafold2_db}/mgnify/mgy_clusters_2022_05.fa \ + --template_mmcif_dir=${params.alphafold2_db}/pdb_mmcif/mmcif_files \ + --obsolete_pdbs_path=${params.alphafold2_db}/pdb_mmcif/obsolete.dat \ $args cp "${fasta.baseName}"/features.pkl ./"${fasta.baseName}".features.pkl + rm -rf "\${RUNTIME_TMP}" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/run_alphafold2_pred.nf b/modules/local/run_alphafold2_pred.nf index 9b371f86..3f34c95f 100644 --- a/modules/local/run_alphafold2_pred.nf +++ b/modules/local/run_alphafold2_pred.nf @@ -3,12 +3,16 @@ */ process RUN_ALPHAFOLD2_PRED { tag "$meta.id" - label 'process_medium', 'gpu_compute' + label 'process_medium' + label 'gpu_compute' + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Local RUN_ALPHAFOLD2_PRED module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + container "nf-core/proteinfold_alphafold2_split:1.1.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://nfcore/proteinfold_alphafold2_split:1.0.0' : - 'nfcore/proteinfold_alphafold2_split:1.0.0' }" - echo 'true' input: tuple val(meta), path(fasta) val db_preset @@ -19,7 +23,7 @@ process RUN_ALPHAFOLD2_PRED { path ('mgnify/*') path ('pdb70/*') path ('pdb_mmcif/*') - path ('uniclust30/*') + path ('uniref30/*') path ('uniref90/*') path ('pdb_seqres/*') path ('uniprot/*') @@ -36,11 +40,7 @@ process RUN_ALPHAFOLD2_PRED { script: def args = task.ext.args ?: '' """ - echo \$PWD - #if [ -d ${params.alphafold2_params_path} ]; - #then - ln -r -f -s ${params.alphafold2_params_path}* params/ - #fi + if [ -d ${params.alphafold2_db}/params/ ]; then ln -r -s ${params.alphafold2_db}/params params; fi python3 /app/alphafold/run_predict.py \ --fasta_paths=${fasta} \ --model_preset=${alphafold2_model_preset} \ @@ -48,6 +48,7 @@ process RUN_ALPHAFOLD2_PRED { --data_dir=\$PWD \ --random_seed=53343 \ --msa_path=${msa} \ + --use_gpu_relax \ $args cp "${fasta.baseName}"/ranked_0.pdb ./"${fasta.baseName}".alphafold.pdb