From 0962f916e4f523acecb3c7149601adffa74974c7 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Tue, 30 Jul 2024 17:52:03 +1000 Subject: [PATCH 1/9] fix(proteinfold_test.sh): Made path to main.nf rel --- pf_files/proteinfold_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pf_files/proteinfold_test.sh b/pf_files/proteinfold_test.sh index 9d66e6dd..10fc0b14 100755 --- a/pf_files/proteinfold_test.sh +++ b/pf_files/proteinfold_test.sh @@ -3,7 +3,7 @@ module load nextflow/23.04.4 java/11 export SINGULARITY_CACHE_DIR=/srv/scratch/sbf/singularity_cache export NXF_SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cache -nextflow run /srv/scratch/z5378336/proteinfold/main.nf \ +nextflow run ../main.nf \ --input samplesheet.csv \ --outdir test_out \ --mode alphafold2 \ From 992d6d1c90c46039a105073fb3b65c45502174bc Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Tue, 30 Jul 2024 17:54:50 +1000 Subject: [PATCH 2/9] revert(base.config): Changed executor back to local for testing as cluster tooling is incomlete --- conf/base.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/base.config b/conf/base.config index e9b41fad..b7f6a482 100644 --- a/conf/base.config +++ b/conf/base.config @@ -19,7 +19,7 @@ process { memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } - executor = 'pbspro' + //executor = 'pbspro' errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } maxRetries = 1 From 32d466c73c676566352d536ca7523de3544f6d49 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Tue, 30 Jul 2024 18:00:02 +1000 Subject: [PATCH 3/9] fix(proteinfold_test.sh): Changed mode to 'split_msa_production' --- pf_files/proteinfold_test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/pf_files/proteinfold_test.sh b/pf_files/proteinfold_test.sh index 10fc0b14..d405fed6 100755 --- a/pf_files/proteinfold_test.sh +++ b/pf_files/proteinfold_test.sh @@ -10,5 +10,6 @@ nextflow run ../main.nf \ --alphafold2_db /data/bio/alphafold \ --full_dbs true \ --alphafold2_model_preset monomer \ + --alphafold2_mode 'split_msa_prediction' \ --use_gpu true \ -profile singularity From b3140e735c4202a39a5847d37c160b56dc420a8a Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 8 Aug 2024 10:09:51 +1000 Subject: [PATCH 4/9] fix(dbs.conf): Updated dbs.conf to work on UNSW infrastructure --- conf/dbs.config | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/conf/dbs.config b/conf/dbs.config index c3617f49..4b2d0e4f 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -25,11 +25,13 @@ params { // Alphafold paths bfd_path = "${params.alphafold2_db}/bfd/*" small_bfd_path = "${params.alphafold2_db}/small_bfd/*" - alphafold2_params_path = "${params.alphafold2_db}/alphafold_params_*/*" + //alphafold2_params_path = "${params.alphafold2_db}/alphafold_params_*/*" + alphafold2_params_path = "${params.alphafold2_db}/params/*" mgnify_path = "${params.alphafold2_db}/mgnify/*" pdb70_path = "${params.alphafold2_db}/pdb70/**" pdb_mmcif_path = "${params.alphafold2_db}/pdb_mmcif/**" - uniclust30_path = "${params.alphafold2_db}/uniclust30/**" + //uniclust30_path = "${params.alphafold2_db}/uniclust30/**" + uniclust30_path = "/srv/scratch/sbf/uniclust30/**" uniref90_path = "${params.alphafold2_db}/uniref90/*" pdb_seqres_path = "${params.alphafold2_db}/pdb_seqres/*" uniprot_path = "${params.alphafold2_db}/uniprot/*" @@ -40,11 +42,17 @@ params { mgnify_variable = "${params.alphafold2_db}/mgnify/" pdb70_variable = "${params.alphafold2_db}/pdb70/" pdb_mmcif_variable = "${params.alphafold2_db}/pdb_mmcif/" - uniclust30_variable = "${params.alphafold2_db}/uniclust30/" + //uniclust30_variable = "${params.alphafold2_db}/uniclust30/" + uniclust30_variable = "/srv/scratch/sbf/uniclust30/" uniref90_variable = "${params.alphafold2_db}/uniref90/" pdb_seqres_variable = "${params.alphafold2_db}/pdb_seqres/" uniprot_variable = "${params.alphafold2_db}/uniprot/" + // Alphafold MSA Variables + mgnify_database_path = "${params.alphafold2_db}/mgnify/" + template_mmcif_dir = "${params.alphafold2_db}/pdb_mmcif/mmcif_files/" + obsolete_pdbs_path = "${params.alphafold2_db}/pdb_mmcif/obsolete.dat" + // Colabfold links colabfold_db_link = 'http://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz' uniref30 = 'https://wwwuser.gwdg.de/~compbiol/colabfold/uniref30_2202.tar.gz' From 4047e62432d461d4e5d36f44cb6532093ddb5cd0 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 8 Aug 2024 10:10:27 +1000 Subject: [PATCH 5/9] fix(run_alphafold2_msa): Fixed incorrectly named files --- modules/local/run_alphafold2_msa.nf | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/modules/local/run_alphafold2_msa.nf b/modules/local/run_alphafold2_msa.nf index 78278810..10d217ca 100644 --- a/modules/local/run_alphafold2_msa.nf +++ b/modules/local/run_alphafold2_msa.nf @@ -4,6 +4,7 @@ process RUN_ALPHAFOLD2_MSA { tag "$meta.id" label 'process_medium' + debug true container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'docker://nfcore/proteinfold_alphafold2_msa:1.0.0' : @@ -34,28 +35,30 @@ process RUN_ALPHAFOLD2_MSA { script: def args = task.ext.args ?: '' - def db_preset = db_preset ? "full_dbs --bfd_database_path=./bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniclust30_database_path=./uniclust30/uniclust30_2018_08/uniclust30_2018_08" : - "reduced_dbs --small_bfd_database_path=./small_bfd/bfd-first_non_consensus_sequences.fasta" + def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.bfd_variable}bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniclust30_database_path=${params.uniclust30_variable}uniclust30_2018_08" : + "reduced_dbs --small_bfd_database_path=${params.small_bfd_path}bfd-first_non_consensus_sequences.fasta" if (alphafold2_model_preset == 'multimer') { - alphafold2_model_preset += " --pdb_seqres_database_path=./pdb_seqres/pdb_seqres.txt --uniprot_database_path=./uniprot/uniprot.fasta " + alphafold2_model_preset += " --pdb_seqres_database_path=${params.pdb_seqres_variable}pdb_seqres.txt --uniprot_database_path=${params.uniprot_variable}/uniprot.fasta " } else { - alphafold2_model_preset += " --pdb70_database_path=./pdb70/pdb70_from_mmcif_200916/pdb70 " + alphafold2_model_preset += " --pdb70_database_path=${params.pdb70_variable}pdb70 " } """ - if [ -f pdb_seqres/pdb_seqres.txt ] - then sed -i "/^\\w*0/d" pdb_seqres/pdb_seqres.txt - fi + #if [ -f pdb_seqres/pdb_seqres.txt ] + # \$PDB_SEQRES_TEMP=\$(mktemp --directory) + # cp ${params.pdb_seqres_variable}pdb_seqres.txt \${PDB_SEQRES_TEMP}/ + # then sed -i "/^\\w*0/d" \$PDB_SEQERS_TEMP/pdb_seqres.txt + #fi python3 /app/alphafold/run_msa.py \ --fasta_paths=${fasta} \ --model_preset=${alphafold2_model_preset} \ --db_preset=${db_preset} \ --output_dir=\$PWD \ --data_dir=\$PWD \ - --uniref90_database_path=./uniref90/uniref90.fasta \ - --mgnify_database_path=./mgnify/mgy_clusters_2018_12.fa \ - --template_mmcif_dir=./pdb_mmcif/mmcif_files \ - --obsolete_pdbs_path=./pdb_mmcif/obsolete.dat \ + --uniref90_database_path=${params.uniref90_variable}uniref90.fasta \ + --mgnify_database_path=${params.mgnify_database_path}/mgy_clusters_2022_05.fa \ + --template_mmcif_dir=${params.template_mmcif_dir} \ + --obsolete_pdbs_path=${params.obsolete_pdbs_path} \ $args cp "${fasta.baseName}"/features.pkl ./"${fasta.baseName}".features.pkl From 93513bca5d2fcb36dab0dc4a95ecdc293bf180c1 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 8 Aug 2024 10:11:10 +1000 Subject: [PATCH 6/9] fix(run_alphafold2_pred): Fixed incorrectly named files --- modules/local/run_alphafold2_pred.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/local/run_alphafold2_pred.nf b/modules/local/run_alphafold2_pred.nf index 7df9578d..9f6d20a7 100644 --- a/modules/local/run_alphafold2_pred.nf +++ b/modules/local/run_alphafold2_pred.nf @@ -8,6 +8,7 @@ process RUN_ALPHAFOLD2_PRED { container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'docker://nfcore/proteinfold_alphafold2_split:1.0.0' : 'nfcore/proteinfold_alphafold2_split:1.0.0' }" + echo 'true' input: tuple val(meta), path(fasta) @@ -36,7 +37,8 @@ process RUN_ALPHAFOLD2_PRED { script: def args = task.ext.args ?: '' """ - if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi + echo \$PWD + #if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi python3 /app/alphafold/run_predict.py \ --fasta_paths=${fasta} \ --model_preset=${alphafold2_model_preset} \ From a007d5a2ff272879048831c4e24f1fc750fc6018 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 8 Aug 2024 10:11:44 +1000 Subject: [PATCH 7/9] fix(proteinfold_test.sh): Added singulairty argument --- pf_files/proteinfold_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pf_files/proteinfold_test.sh b/pf_files/proteinfold_test.sh index d405fed6..9e4883ce 100755 --- a/pf_files/proteinfold_test.sh +++ b/pf_files/proteinfold_test.sh @@ -12,4 +12,4 @@ nextflow run ../main.nf \ --alphafold2_model_preset monomer \ --alphafold2_mode 'split_msa_prediction' \ --use_gpu true \ - -profile singularity + -profile singularity \ From 232c8c9b1fac0dddf4d862851111f1c39ee0d7c7 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 8 Aug 2024 10:12:24 +1000 Subject: [PATCH 8/9] fix(samplesheet): Changed sample to a much smaller sample --- pf_files/samplesheet.csv | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pf_files/samplesheet.csv b/pf_files/samplesheet.csv index f450a551..10fdfdb9 100644 --- a/pf_files/samplesheet.csv +++ b/pf_files/samplesheet.csv @@ -1,3 +1,2 @@ sequence,fasta -T1024,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1024.fasta -T1026,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1026.fasta \ No newline at end of file +1L2Y,/srv/scratch/sbf/nextflow_pipelines-dev/proteinfold/pf_files/1L2Y.fasta From 03f257563ce2e14822fee7f0c342cda43185e3bb Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 8 Aug 2024 10:12:50 +1000 Subject: [PATCH 9/9] fix(samplesheet): Changed sampel to a smaller sample --- pf_files/test_out/pipeline_info/samplesheet.valid.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pf_files/test_out/pipeline_info/samplesheet.valid.csv b/pf_files/test_out/pipeline_info/samplesheet.valid.csv index 570c3304..b0a380eb 100644 --- a/pf_files/test_out/pipeline_info/samplesheet.valid.csv +++ b/pf_files/test_out/pipeline_info/samplesheet.valid.csv @@ -1,2 +1,2 @@ sequence,fasta -1L2Y_T1,./1L2Y.fasta +1L2Y_T1,/srv/scratch/sbf/nextflow_pipelines-dev/proteinfold/pf_files/1L2Y.fasta