From 8cc32e1eb161905bc939e86c98fb68b32aedd2fb Mon Sep 17 00:00:00 2001 From: jscgh Date: Fri, 15 Nov 2024 13:11:49 +1100 Subject: [PATCH] Updated rosettafold_all_atom files --- conf/dbs.config | 14 +++++++++++- modules/local/run_rosettafold_all_atom.nf | 28 ++++++++++++----------- workflows/rosettafold_all_atom.nf | 2 +- 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/conf/dbs.config b/conf/dbs.config index 4ec169e8..fbbcfd13 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -49,7 +49,7 @@ params { ] // RoseTTAFold links - uniref30 = 'http://wwwuser.gwdg.de/~compbiol/uniclust/2020_06/UniRef30_2020_06_hhsuite.tar.gz' + uniref30 = 'http://wwwuser.gwdg.de/~compbiol/uniclust/2020_06/UniRef30_2020_06_hhsuite.tar.gz' pdb100 = 'https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2021Mar03.tar.gz' // RoseTTAFold paths @@ -57,6 +57,18 @@ params { pdb100_path = "${params.rosettafold_all_atom_db}/pdb100_2021Mar03/*" bfd_rosettafold_all_atom_path = "${params.rosettafold_all_atom_db}/bfd/*" + // Helixfold3 links + uniclust30_link = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/uniclust30_2018_08_hhsuite.tar.gz' + ccd_preprocessed_link = 'https://paddlehelix.bd.bcebos.com/HelixFold3/CCD/ccd_preprocessed_etkdg.pkl.gz' + rfam_link = 'https://paddlehelix.bd.bcebos.com/HelixFold3/MSA/Rfam-14.9_rep_seq.fasta' + helixfold3_init_models_link = 'https://paddlehelix.bd.bcebos.com/HelixFold3/params/HelixFold3-params-240814.zip' + + // Helixfold3 paths + uniclust30_path = "${params.helixfold3_db}/uniclust30/*" + ccd_preprocessed_path = "${params.helixfold3_db}/ccd_preprocessed_etkdg.pkl.gz" + rfam_path = "${params.helixfold3_db}/Rfam-14.9_rep_seq.fasta" + helixfold3_init_models_path = "${params.helixfold3_db}" + // Esmfold links esmfold_3B_v1 = 'https://dl.fbaipublicfiles.com/fair-esm/models/esmfold_3B_v1.pt' esm2_t36_3B_UR50D = 'https://dl.fbaipublicfiles.com/fair-esm/models/esm2_t36_3B_UR50D.pt' diff --git a/modules/local/run_rosettafold_all_atom.nf b/modules/local/run_rosettafold_all_atom.nf index e3510fa4..7f4fda0a 100644 --- a/modules/local/run_rosettafold_all_atom.nf +++ b/modules/local/run_rosettafold_all_atom.nf @@ -1,10 +1,10 @@ /* - * Run RoseTTAFold_All_Atom + * Run RoseTTAFold_All_Atom */ process RUN_ROSETTAFOLD_ALL_ATOM { tag "$meta.id" - label 'process_medium' label 'gpu_compute' + label 'process_medium' // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { @@ -18,11 +18,12 @@ process RUN_ROSETTAFOLD_ALL_ATOM { path ('bfd/*') path ('UniRef30_2020_06/*') path ('pdb100_2021Mar03/*') - + output: path ("${fasta.baseName}*") - tuple val(meta), path ("*pdb"), emit: pdb - tuple val(meta), path ("*_mqc.tsv"), emit: multiqc + tuple val(meta), path ("${meta.id}_rosettafold_all_atom.pdb") , emit: main_pdb + tuple val(meta), path ("*pdb") , emit: pdb + tuple val(meta), path ("*_mqc.tsv") , emit: multiqc path "versions.yml", emit: versions when: @@ -33,14 +34,15 @@ process RUN_ROSETTAFOLD_ALL_ATOM { ln -s /app/RoseTTAFold-All-Atom/* . mamba run --name RFAA python -m rf2aa.run_inference \ - --config-dir $PWD \ - --config-path $PWD \ + loader_params.MAXCYCLE=1 \ + checkpoint_path="/srv/scratch/sbf/rfaa/RFAA_paper_weights.pt" \ + --config-dir /app/RoseTTAFold-All-Atom/rf2aa/config/inference \ --config-name "${fasta}" - cp "${fasta.baseName}".pdb ./"${fasta.baseName}".rosettafold_all_atom.pdb - awk '{print \$6"\\t"\$11}' "${fasta.baseName}".rosettafold_all_atom.pdb | uniq > plddt.tsv - echo -e Positions"\\t" > header.tsv - cat header.tsv plddt.tsv > "${fasta.baseName}"_plddt_mqc.tsv + cp "${fasta.baseName}".pdb ./"${meta.id}"_rosettafold_all_atom.pdb + awk '{print \$6"\\t"\$11}' "${meta.id}"_rosettafold_all_atom.pdb | uniq > plddt.tsv + echo -e Positions"\\t""${meta.id}"_rosettafold_all_atom.pdb > header.tsv + cat header.tsv plddt.tsv > "${meta.id}"_plddt_mqc.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -50,8 +52,8 @@ process RUN_ROSETTAFOLD_ALL_ATOM { stub: """ - touch ./"${fasta.baseName}".rosettafold_all_atom.pdb - touch ./"${fasta.baseName}"_mqc.tsv + touch ./"${meta.id}"_rosettafold_all_atom.pdb + touch ./"${meta.id}"_mqc.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/workflows/rosettafold_all_atom.nf b/workflows/rosettafold_all_atom.nf index 21026ef7..4861e35d 100644 --- a/workflows/rosettafold_all_atom.nf +++ b/workflows/rosettafold_all_atom.nf @@ -45,7 +45,7 @@ workflow ROSETTAFOLD_ALL_ATOM { main: ch_multiqc_files = Channel.empty() - + // // SUBWORKFLOW: Run Rosettafold_All_Atom //