From 74d296b81576ba2156051780de47644ba3815f64 Mon Sep 17 00:00:00 2001 From: Ekaterina Sakharova Date: Thu, 3 Oct 2024 16:41:02 +0100 Subject: [PATCH] added meta --- assets/methods_description_template.yml | 35 ++ assets/mgnify_logo.png | Bin 0 -> 5916 bytes assets/multiqc_config.yml | 61 ++ configs/conda.config | 40 +- configs/modules.config | 545 +++++++++++++++--- configs/node.config | 30 - modules/local/annotation/main.nf | 8 +- modules/local/assign/main.nf | 12 +- modules/local/balloon/main.nf | 16 +- modules/local/blast/main.nf | 6 +- modules/local/blast_filter/main.nf | 10 +- modules/local/checkv/main.nf | 21 +- modules/local/chromomap/main.nf | 19 +- modules/local/fastp/main.nf | 21 - modules/local/fastqc/main.nf | 15 - modules/local/filter_reads/main.nf | 19 - modules/local/get_db/checkv.nf | 13 +- modules/local/get_db/imgvr.nf | 4 +- modules/local/get_db/kaiju.nf | 3 +- modules/local/get_db/meta.nf | 5 +- modules/local/get_db/ncbi.nf | 4 +- modules/local/get_db/pvogs.nf | 5 +- modules/local/get_db/rvdb.nf | 4 +- modules/local/get_db/viphog.nf | 4 +- modules/local/get_db/virfinder.nf | 4 +- modules/local/get_db/virsorter.nf | 4 +- modules/local/get_db/vogdb.nf | 4 +- modules/local/get_db/vpf.nf | 4 +- modules/local/help.nf | 2 +- modules/local/hmm_postprocessing/main.nf | 8 +- modules/local/hmmscan/main.nf | 18 +- modules/local/kaiju/main.nf | 20 +- modules/local/krona/main.nf | 22 +- modules/local/length_filtering/main.nf | 10 +- modules/local/mashmap/main.nf | 8 +- modules/local/multiqc/main.nf | 15 - modules/local/parse/main.nf | 8 +- modules/local/phanotate/main.nf | 6 +- modules/local/plot_contig_map/main.nf | 6 +- modules/local/pprmeta/main.nf | 11 +- modules/local/prodigal/main.nf | 8 +- modules/local/ratio_evalue/main.nf | 8 +- modules/local/rename/main.nf | 12 +- modules/local/restore/main.nf | 8 +- modules/local/sankey/main.nf | 16 +- modules/local/spades/main.nf | 17 - modules/local/virfinder/main.nf | 10 +- modules/local/virsorter/main.nf | 8 +- modules/local/write_gff/main.nf | 15 +- .../checkv/endtoend/environment.yml} | 5 +- modules/nf-core/checkv/endtoend/main.nf | 63 ++ modules/nf-core/checkv/endtoend/meta.yml | 107 ++++ .../fastp/environment.yml} | 5 +- modules/nf-core/fastp/main.nf | 125 ++++ modules/nf-core/fastp/meta.yml | 113 ++++ .../fastqc/environment.yml} | 5 +- modules/nf-core/fastqc/main.nf | 64 ++ modules/nf-core/fastqc/meta.yml | 66 +++ .../multiqc/environment.yml} | 5 +- modules/nf-core/multiqc/main.nf | 63 ++ modules/nf-core/multiqc/meta.yml | 78 +++ modules/nf-core/prodigal/environment.yml | 6 + modules/nf-core/prodigal/main.nf | 64 ++ modules/nf-core/prodigal/meta.yml | 79 +++ modules/nf-core/spades/environment.yml | 5 + modules/nf-core/spades/main.nf | 102 ++++ modules/nf-core/spades/meta.yml | 151 +++++ nextflow.config | 59 +- nextflow_schema.json | 21 +- subworkflows/local/annotate.nf | 29 +- subworkflows/local/assemble_illumina.nf | 35 +- subworkflows/local/detect.nf | 12 +- subworkflows/local/preprocess.nf | 6 +- workflows/virify.nf | 42 +- 74 files changed, 1980 insertions(+), 482 deletions(-) create mode 100644 assets/methods_description_template.yml create mode 100644 assets/mgnify_logo.png create mode 100644 assets/multiqc_config.yml delete mode 100644 configs/node.config delete mode 100644 modules/local/fastp/main.nf delete mode 100644 modules/local/fastqc/main.nf delete mode 100644 modules/local/filter_reads/main.nf delete mode 100644 modules/local/multiqc/main.nf delete mode 100644 modules/local/spades/main.nf rename modules/{local/fastp/fastp.yaml => nf-core/checkv/endtoend/environment.yml} (64%) create mode 100644 modules/nf-core/checkv/endtoend/main.nf create mode 100644 modules/nf-core/checkv/endtoend/meta.yml rename modules/{local/spades/spades.yaml => nf-core/fastp/environment.yml} (64%) create mode 100644 modules/nf-core/fastp/main.nf create mode 100644 modules/nf-core/fastp/meta.yml rename modules/{local/multiqc/multiqc.yaml => nf-core/fastqc/environment.yml} (63%) create mode 100644 modules/nf-core/fastqc/main.nf create mode 100644 modules/nf-core/fastqc/meta.yml rename modules/{local/fastqc/fastqc.yaml => nf-core/multiqc/environment.yml} (63%) create mode 100644 modules/nf-core/multiqc/main.nf create mode 100644 modules/nf-core/multiqc/meta.yml create mode 100644 modules/nf-core/prodigal/environment.yml create mode 100644 modules/nf-core/prodigal/main.nf create mode 100644 modules/nf-core/prodigal/meta.yml create mode 100644 modules/nf-core/spades/environment.yml create mode 100644 modules/nf-core/spades/main.nf create mode 100644 modules/nf-core/spades/meta.yml diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml new file mode 100644 index 0000000..e01bb42 --- /dev/null +++ b/assets/methods_description_template.yml @@ -0,0 +1,35 @@ +id: "ebi-metagenomics/emg-viral-pipeline-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "ebi-metagenomics/emg-viral-pipeline Methods Description" +section_href: "https://github.com/EBI-Metagenomics/emg-viral-pipeline" +plot_type: "html" +data: | +

Methods

+

Data was processed using ebi-metagenomics/genomes-generation v${workflow.manifest.version} (${doi_text}; Krakau et al., 2022) of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (GrĂ¼ning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

${tool_citations}

+

References

+ +
+
Notes:
+ +
diff --git a/assets/mgnify_logo.png b/assets/mgnify_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..fe6112be9062ec35b5fef00300f7bf52ab9e8970 GIT binary patch literal 5916 zcmV+%7vt!OP)pF8FWQhbW?9;ba!ELWdL_~cP?peYja~^aAhuUa%Y?FJQ@H17O6=@ zK~#90?VWpkRMoY|f9uR6frx-Sk~wn{f(Cp9c^HL)5R{i%Yw=MYm!ef{t=DViqwQ5o z`>B4)y>0JpZN2yQe(dMA+SI6MD~OgxX~6^pA%XA^5ZfXWa^_5iS0jlKGIRF2eP% z>1mx@`WD`vR-nJJbLY;$+Beod_T3fVy<_*D-5L>L$j~7V0QhN7>*mt8P*rtFym@m`NlD39drBvl zzKLs`UBIjxX8liYZf--{@6FB4sM=Arv?q0P>3g`w*#*ogp7Z{cDO1*;e{Iv|P1mnp zy?Rbh>EzP)aE-HzxUTrRX9o`+9Bcc%zP{eKb?er9dQvBszJ+U?UBvCT-~QILY17_0 z|Jt^yZ8vY+xbf>xJn_V9t5&UA(Nns)^bK6&>_TQBYDYM;EeY^_uri96xbaUS8gigC88kzJ2@9aJoSOfSRK< zfl+y*YF>W%<=s84r%Ug|O+x4*s;a84dF-*ro?2J7Zd84JeW%YG8yitsRk;wrbKS3< zOJBfsYaND}Y5)HH6B^DmjEkDlOB)&*D3R>Ow>Qq*zJ2@k-Kn2T-@zR@B`GW{^w-zd zUjbkY6pR%Rxe6;;0wR-`24@b;{J3~}@u-E17UmuN;NZwpr%utJ3kG5Q`0>ch$i)8r z`_a(Y(5}te+S<&w%iq2SKzWb+QT-TIRfiMNECDf|h(-g*0%IT))HC=j395mDk6>a? zAQ0GJU0ogRX`MSlK`r=U$~S|S=(=8+t`Y$)P^mx}p31!f;?b}kUY)KE@fbOJWVScz zT?Akrfa?Hc#giL!!5~bWG!a|2YymUJZL6rL2)eE#5{b0Ges$6Dr=NQI>Ps%Ur3=r#WbFM}^m^=1a& z2p}!vR8{pd0FxQ{T0;6xA};V3TzkYOc1Y0@NYerI!1nd<6lO#Q-C zoH}(1(dcI{>FS-;Bi?-L&4&PdCtbaJI|Ld6V3d`d-|x5W(G?XH`NEO#&l&tL01{3m z5|9PpW+LiUJBQMpbIwj%1CIl3IE1EY7X@+y&oFT{gYSfdVU8;thh4jONwr^DRf$Ox zCw02MYxl1C%sjxh!Y*xi!pFj^0Xzu%hS^3OTpHzY^2^u<^YN7&ripjg3;WLI}k35H@VsP_XKiRsSrltV=rr!N8+n+>&N9y>3Y> zUE}l)I6GiQ8uxnu%uG{-F9QAU*F505QqUO}de^mr{r+<((D^D?RDbDhz%BgfFMnh+^;k z=FjjW7K?Sd{&D@sp33c&cLL~=$a8;1Kujh1Y33OK2LT)fFaW?%0G9$7LPWi8YbBdVPK&43UsB zeIDOE@eC6H!r^d@!&*6lBk6m@WZOMGd-m+LXmKsNl@a+Pa=>`X=4}l`^nJhAA2dS7 zVx!jhZ73Ald;G}px+P1NocQvWzWi{0e*Qm_H85=0uyc1Vz4L#)UhkP?a+Q^pw;Vcj zXrxX3`UHkI$gH~hF_Q^i$6h<2Y(nLOq1^KP8C^4@#z zEnV@w6@?|gE4d+2TU*}Ul2cw@{vd!K$m-aK5Y^Sy!dlE0md458K4+MKym9UaaAlIu zHC0tFH4NjgvKdT1Gd&Jmmr+N97~49(2UY`pxk%io(dYgXx)F=I{xz|7*At5&aGJ+GlLaYFpw_x9e#%qxkg z*X4WmLIzq1C{TJ;a)B(e##w>n#tA5x3qZ;^FTVKV!A;FgMG}2FYMfAPrO3S=(nEIx zkbcv*8dFK@Ph95Bn^!t_?%apgytd}5L^A9Cv~FDSjN-2Yct%=VmxM5})1!5_AZ45k z-cO(tNWMrv8^AAR+G~n7U1!xdgDVL1kW70r1cSk`44y9~%sA3~uK7;bFics9NW8e} z#Xr9O#_LxlN?>SiZpPlddly2&IE#vke8-O;zaW`REEYRw7)IQ=ZgzHdmf}<9DNtrI zaUmEZ33LH7pCh7U4E`H2ZDHmz!!ScG|(xI@6F50>y$?w z6EW+l55u3Amlt;`)97zBM-Lu-*6KS107X&w$dMzRH|N?UKR`d43|9 zvNy}FUB7<)O*h=@-EK zn25!V)Mhqieh_Jj%n=YZu&N9qAT&+O)AiJsU2^hs@|$DLhhY&|zk{UG`Tonq1);ZaCM}`d>cKVvh*OVobi5!dgs&-U; ztqZ!RU@-WwfcO9{iQ<+cMk{^*OFX9M{a|kJ-_eiu0=F95UnE2o6XsXa=}UFyRZx z=ksAe=76!xZ0QEdlAWEMrE2PHO#E}m8)Zi_nR$h(scVahihNG#*3%HvZ>8jj7NptL z9TGCJm@oEwY2(y&eLsMt8(;-n=h=Fqp1H7ia|nnES-oUXgz`m6Q8Uw*xyH%96H!!D zbg+0vai_Pn-|xrF;+c5z2T#8J?Qeg38WA-)(ckdl!v}f1p0@zV&AY9+IUEW9n?r5& zI75aESqs2=PR*DW2m~zmq{br{3~B%-OY~88?4x55J4pykpm&l95NKLC(90TzRb;r0{f7@9{@BsJlW^^|*OrVNF)|7OSy@?_J$p8ai;J=K-K{9! zRDKZvXO>b39)G6)B@m`N(%xr0qG?+4D1IN|z<~qJ0A7(0CB>`AjDGQua%l-v3fRmL zl9+e;6`zuP3mL(xpjGv$L#IyL(aRFTI2kb}-tK6>7R;J8t8Br71yzd|E#9$w`SObL z@^Wn0umQ1H3>6g>6MyrY-`wR$M~p=PzT`+-?eQ}B(JpAWM_`4eTw7*N$+cx-nfdBJ z1`HVRN5H0Wnq*d3$q*8cjGCs6fpwV*N9UV`g@t|qV`XHCX@`@2%W&T}?pyHG&z_oc zW#N?%XqxtEgg6$9p?X*K5{J4;G;ci#!XJp}M_{ZZ&@>{a`I@Hniz*J`qv2`* zdnH7tW&dJI6a+|PtTAts(C>lk0Z0Wd1cZD}r$dYwF`_NC?BMd{%gc@)J-W#6_hZJ48N041yyDNytPqjY z>#e7lc}3QMtdh0|p^g0f{LE&v`9Uy#0=c5&gM=ccLoV8xGDK9v;Qxs8(Q+hy`Lyu? zfY;-FKt#ve{m%5e)__@3bre(i3V@$k$bCxGj4pu0U`S?*OAFW!hckmW6DZm6JYnSM zk=e&SIu_~liS_zsEP6Kju1%eiVw&b)S^vPpo}|g^UCs3A)1MPU44XW8a_Q2gOJ5)& z@z8^g$dMyW0Dh+Cs;9tsR4P8APSO}!6R#!1*` z)EctY5}pU}6SOQHC8X80l{QYuMQJ}Y3}ai{FSggJh^PXLWX4I}nD=@BFL$`EFxaXj z$quI%|Ait3DU#MHgm5evq-yHFf-r-@g|hcN^hk>#zPk3+Cmvn#=o4zL8U_$|G!Kjn zd^LqytMy>q7!HRm%Rd^m#&c?}x=dzUAAt&N>d|Wng+eD)O)Ud(Yce?kP0r2Dy`;9b z*0Qwh$k8LS5tZ5=S&H*Aqih?eAW@-MouJ+_LbfwbRaGa#qDBu9IbK}Z!=6D*ayz;e zMRD%Mz?cr;78|?tB*BP?}E)}E)lH^86oMqtjzojfh->?X6|2G+z7t~CpS(2 zFxk8Vz<*lFd1Kx?U_C`(4|!fV|AOuPoOMU*-Up+z%i5mKTgW656r7%K0svdSNj`&! z`bA(P0a2gFXX{)R5>zcIYPmC{FJaUgWsnPu$?b4rDVKIj$5A+y&DIPB$tx%WqclzH z=;~O_H>2h`)6w}R0FWT*(awi|2wU&?r7Zsg7vz$Rjw^@(v^)?A-oaK zt4ZlgtvgzG&}!bGF2ZjCd;m+=w+%kPam6gS|*#J9rQr9GCDle0eF2PwNF1nZHGnC1*0 zK70@Wt9_o8T`}MMR1c8ce2aRfRA)DlnsKInU!Ujj;lm$;<7XC~Poi8~LVTlbCqwj7 zqV?*OjZ?%VwsjF^XJll|gjM<+TL+lZf*>0O9-HcRppFb3GIW6gWnjD?1}Cj<&;}9x zI8hs|nLKt#&B)04QZ0Q7Mas3E5C{a??NQFp&mY2I%jq1OoujmEq`aFw91b5;HPry1 zChi&2g(5yAi}QJ8M&WcY$y~3nB2}xFyks{mD=w{Dkm90hs?_O6EI1Ylg`66>KDX{^ zIvBOa231p!0?11yM;=-L;D>F$N2AgCpyVll0NytYBjFh>=|$Pta=kN?nC`ZaIY&fY zGT&^Y?r7ZsKr&j%xF2!}-+c=c_KrhyX+tn49^Kj?w2#`d!ZL{m&nNpZE#oARRZs}9 zIDnQw+gsPux|+3;a+j#NYPzK;T>25hE1rc#G6sXk<>cjD0kl+|09zJX7Z7+UnLt{` z$(;V$q{IepM_`kG1-Wb&fbrATK$eROTB{9iv654SvJe1#nZEh3+9|`>Xc$Iq()MiV z*a^chK0tz96S7DZ4s(ETIDC-7G9x_zX#Pz9^XP)(AYA$=a%-N8T12g*wDHpQI^slT zhwK9%k0t<>4(K2S5&go3_^qm@TJAWoOE4I`88StlTzVB|v-x`fR;i$u1%tsa07!Ti zPAi|pnq#k7^_jMDGE0v}@z~Z)^GyJNQER*bdu33*qiX821qB63=gl(Y(w7YZ+J0*ze5x`dDa@pCY zvpdy1HKodRfk0rCs;U32c$FgzUYbf9F1-U*xI{wCvKk=^P8y|cBW>esJy|TJ^7Ks3 z7LS%86bkJDV8^`$gU3PO9W|)_IT#EsvTCY7tko4RfF-`_=B9L9^%#Q?k0gNZm__lbiRTfkVoKr~Y);hW_5(t!l@FY@9R58hv*~AUN zp=0M2w!a)g)U_#lRzxC^V{CFUfWP&m9&uqrATUeU^`wU#xpY6FP)G;k?G$gXj?_l> z%L+SUoS{(YZ?HM`jx$F1*h=_&jk|7!s3 z);NxtKC@%F*rYgSoHnv=0K&T+D~;TO<@qqd8EJnXUM+9oPRk_^X;Q`hysLvmXdn>|0@OoD0D8m}0-aTPJmX5wEV z&HUk3&H#7|gnPY;H^pV9=0o5% z02f(_1p~^LcN~Yq`7Q z!oe3>A7a~zfVfQ&O43RjilW$ZCvC7D5*a}NexFL3s=MnVO$rJMG8!8jFU-iu@HY4w zK5_hPQJ3C|SJl*706A8I0^+(lz0P*3s%xAsE-9eqs`J588~BeJx-l9l#1#5nF<=*$ z?tqvcmXafS5h(`yT;p_cv5A_iE|xgX5&&S5`GU1nd|bEI#l;4}U~ocfcH2-1@ok1~ zOiQ&dcjR<&IgkAO{J6D$va_?ZR84(|i8n#cFhNXDr=yEIa=N&jhpMSzFq(*{hM8MV zIwxcUm;{TX=sQqj=tcp6SgN`>wV%_)rN0mYp@_lxX%1a0k%nOcu3PKk(vhU)Dka%t z=tfDpjkw0?;?kZ}&oOP|6Jojt_Ucr{#dT|4T-u|jac`8tcNrnWkxdis$m!y8K9sUs zdy<%L3hUuDPO0J=r;E$^n6{0x9*n2Fig!#X6x!&dDz01W;&L8-zyCngjQ$ARQbiFh yYnp@5Qrh%m0QCS4gRut+whYV|_-0oX4)}lLX!h9Qosa(j0000 + + This report has been generated by the ebi-metagenomics/emg-viral-pipeline pipeline. + +report_section_order: + "ebi-metagenomics/emg-viral-pipeline-methods-description": + order: -1000 + software_versions: + order: -1001 + "ebi-metagenomics/emg-viral-pipeline-summary": + order: -1002 + +export_plots: true + +data_format: "yaml" + +run_modules: + - fastqc + - fastp + +## Module order +module_order: + - fastqc + - fastp + +## File name cleaning +extra_fn_clean_exts: + - "_fastp" + +## Prettification +custom_logo: "mgnify_logo.png" +custom_logo_url: https://github.com/ebi-metagenomics/emg-viral-pipeline/ +custom_logo_title: "ebi-metagenomics/emg-viral-pipeline" + +## General Stats customisation +table_columns_visible: + "fastp": + pct_duplication: False + after_filtering_q30_rate: False + after_filtering_q30_bases: False + filtering_result_passed_filter_reads: 3300 + after_filtering_gc_content: False + pct_surviving: True + pct_adapter: True + +table_columns_placement: + "fastp": + pct_duplication: 3000 + after_filtering_q30_rate: 3100 + after_filtering_q30_bases: 3200 + filtering_result_passed_filter_reads: 3300 + after_filtering_gc_content: 3400 + pct_surviving: 3500 + pct_adapter: 3600 + +custom_table_header_config: + general_stats_table: + "Total length": + hidden: True + N50: + hidden: True diff --git a/configs/conda.config b/configs/conda.config index 5afceea..a77e536 100644 --- a/configs/conda.config +++ b/configs/conda.config @@ -1,24 +1,20 @@ process { - withLabel: annotation { conda = "$baseDir/envs/python3.yaml" } - withLabel: assign { conda = "$baseDir/envs/python3.yaml" } - withLabel: balloon { conda = "$baseDir/envs/balloon.yaml" } - withLabel: basics { conda = "$baseDir/envs/python3.yaml" } - withLabel: blast { conda = "$baseDir/envs/blast.yaml" } - withLabel: fastp { conda = "$baseDir/envs/fastp.yaml" } - withLabel: fastqc { conda = "$baseDir/envs/fastqc.yaml" } - withLabel: hmmscan { conda = "$baseDir/envs/hmmer.yaml" } - withLabel: kaiju { conda = "$baseDir/envs/kaiju.yaml" } - withLabel: krona { conda = "$baseDir/envs/krona.yaml" } - withLabel: plot_contig_map { conda = "$baseDir/envs/r.yaml" } - withLabel: multiqc { conda = "$baseDir/envs/multiqc.yaml" } - withLabel: parse { conda = "$baseDir/envs/python3.yaml" } - withLabel: prodigal { conda = "$baseDir/envs/prodigal.yaml" } - withLabel: phanotate { conda = "$baseDir/envs/phanotate.yaml" } - withLabel: python3 { conda = "$baseDir/envs/python3.yaml" } - withLabel: ratio_evalue { conda = "$baseDir/envs/python3.yaml" } - withLabel: ruby { conda = "$baseDir/envs/ruby.yaml" } - withLabel: spades { conda = "$baseDir/envs/spades.yaml" } - withLabel: virsorter { conda = "$baseDir/envs/virsorter.yaml" } - withLabel: virfinder { conda = "$baseDir/envs/virfinder.yaml" } - withLabel: checkV { conda = "$baseDir/envs/checkv.yaml" } + withNAME: ANNOTATION { conda = "$baseDir/envs/python3.yaml" } + withNAME: ASSIGN { conda = "$baseDir/envs/python3.yaml" } + withNAME: BALLOON { conda = "$baseDir/envs/balloon.yaml" } + withNAME: basics { conda = "$baseDir/envs/python3.yaml" } + withNAME: BLAST { conda = "$baseDir/envs/blast.yaml" } + withNAME: HMMSCAN { conda = "$baseDir/envs/hmmer.yaml" } + withNAME: KAIJU { conda = "$baseDir/envs/kaiju.yaml" } + withNAME: KRONA { conda = "$baseDir/envs/krona.yaml" } + withNAME: PLOT_CONTIG_MAP { conda = "$baseDir/envs/r.yaml" } + withNAME: PARSE { conda = "$baseDir/envs/python3.yaml" } + withNAME: PRODIGAL { conda = "$baseDir/envs/prodigal.yaml" } + withNAME: PHANOTATE { conda = "$baseDir/envs/phanotate.yaml" } + withNAME: python3 { conda = "$baseDir/envs/python3.yaml" } + withNAME: RATIO_EVALUE { conda = "$baseDir/envs/python3.yaml" } + withNAME: ruby { conda = "$baseDir/envs/ruby.yaml" } + withNAME: VIRSORTER { conda = "$baseDir/envs/virsorter.yaml" } + withNAME: VIRFINDER { conda = "$baseDir/envs/virfinder.yaml" } + withNAME: CHECKV { conda = "$baseDir/envs/checkv.yaml" } } \ No newline at end of file diff --git a/configs/modules.config b/configs/modules.config index d0c3d8c..09936f8 100644 --- a/configs/modules.config +++ b/configs/modules.config @@ -9,10 +9,19 @@ */ process { - withName: ANNOTATION { + withName: 'ANNOTATION' { publishDir = [ [ - path: "${params.output}/${name}/${params.finaldir}/annotation/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.finaldir}/annotation/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*_annotation.tsv" @@ -20,16 +29,34 @@ process { ] } - withName: ASSIGN { + withName: 'ASSIGN' { publishDir = [ [ - path: "${params.output}/${name}/${params.taxdir}", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.taxdir}/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*_taxonomy.tsv" ], [ - path: "${params.output}/${name}/${params.finaldir}/taxonomy", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.finaldir}/taxonomy/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*_taxonomy.tsv" @@ -40,7 +67,16 @@ process { withName: BALLOON { publishDir = [ [ - path: "${params.output}/${name}/${params.finaldir}/balloon/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.finaldir}/balloon/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*.{pdf,svg}" @@ -49,17 +85,33 @@ process { } withName: BLAST { - errorStrategy 'retry' - maxRetries 1 publishDir = [ [ - path: "${params.output}/${assembly_name}/${params.blastdir}/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.blastdir}/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*.blast" ], [ - path: "${params.output}/${assembly_name}/${params.finaldir}/blast/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.finaldir}/blast/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*.filtered.blast" @@ -68,17 +120,33 @@ process { } withName: BLAST_FILTER { - errorStrategy 'retry' - maxRetries 1 publishDir = [ [ - path: "${params.output}/${assembly_name}/${params.blastdir}/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.blastdir}/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*.meta" ], [ - path: "${params.output}/${assembly_name}/${params.finaldir}/blast/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.finaldir}/blast/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*.meta" @@ -89,16 +157,19 @@ process { withName: CHECKV { publishDir = [ [ - path: "${params.output}/${name}/${params.checkvdir}/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.checkvdir}/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, - pattern: "${confidence_set_name}" - ], - [ - path: "${params.output}/${name}/${params.checkvdir}/", - mode: params.publish_dir_mode, - failOnError: false, - pattern: "*.tsv" + pattern: "*_quality_summary.tsv" ], ] } @@ -106,10 +177,19 @@ process { withName: GENERATE_CHROMOMAP_TABLE { publishDir = [ [ - path: "${params.output}/${name}/${params.finaldir}/chromomap/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.finaldir}/chromomap/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, - pattern: "${id}.filtered-*.contigs.txt" + pattern: "*.filtered-*.contigs.txt" ] ] } @@ -117,13 +197,31 @@ process { withName: GENERATE_KRONA_TABLE { publishDir = [ [ - path: "${params.output}/${name}/${params.plotdir}", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.plotdir}/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*.krona.tsv" ], [ - path: "${params.output}/${name}/${params.finaldir}/krona/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.finaldir}/krona/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*.krona.tsv" @@ -134,16 +232,34 @@ process { withName: GENERATE_SANKEY_TABLE { publishDir = [ [ - path: "${params.output}/${name}/${params.plotdir}", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.plotdir}/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, - pattern: "${set_name}.sankey.*" + pattern: "*.sankey.*" ], [ - path: "${params.output}/${name}/${params.finaldir}/sankey/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.finaldir}/sankey/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, - pattern: "${set_name}.sankey.filtered-${params.sankey}.json" + pattern: "*.sankey.filtered-${params.sankey}.json" ] ] } @@ -151,7 +267,16 @@ process { withName: CHROMOMAP { publishDir = [ [ - path: "${params.output}/${name}/${params.finaldir}/chromomap/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.finaldir}/chromomap/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*.html" @@ -159,24 +284,22 @@ process { ] } - withName: FILTER_READS { - publishDir = [ - [ - path: "${params.output}/${name}/", - mode: params.publish_dir_mode, - failOnError: false, - pattern: "${name}.filtered.fastq" - ] - ] - } - withName: HMM_POSTPROCESSING { publishDir = [ [ - path: "${params.output}/${name}/${params.hmmerdir}/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.hmmerdir}/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, - pattern: "${set_name}_modified.tsv" + pattern: "*_modified.tsv" ] ] } @@ -184,10 +307,19 @@ process { withName: HMMSCAN { publishDir = [ [ - path: "${params.output}/${name}/${params.hmmerdir}/${params.db}", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.hmmerdir}/${params.databases}/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, - pattern: "${set_name}_${params.db}_hmmscan.tbl" + pattern: "*_${params.databases}_hmmscan.tbl" ] ] } @@ -195,10 +327,19 @@ process { withName: KAIJU { publishDir = [ [ - path: "${params.output}/${name}/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, - pattern: "${name}.out" + pattern: "*.out" ] ] } @@ -206,13 +347,31 @@ process { withName: KRONA { publishDir = [ [ - path: "${params.output}/${name}/${params.plotdir}/krona/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.plotdir}/krona/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*.krona.html" ], [ - path: "${params.output}/${name}/${params.finaldir}/krona/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.finaldir}/krona/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*.krona.html" @@ -223,10 +382,19 @@ process { withName: LENGTH_FILTERING { publishDir = [ [ - path: "${params.output}/${name}/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, - pattern: "${name}*filt*.fasta" + pattern: "*filt*.fasta" ] ] } @@ -234,13 +402,31 @@ process { withName: MASHMAP { publishDir = [ [ - path: "${params.output}/${assembly_name}/", + path: "${params.output}/", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*.tsv" ], [ - path: "${params.output}/${assembly_name}/${params.finaldir}/mashmap", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.finaldir}/mashmap/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*.tsv" @@ -251,10 +437,19 @@ process { withName: MULTIQC { publishDir = [ [ - path: "${params.output}/${name}/${params.assemblydir}", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.assemblydir}/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, - pattern: "${name}_multiqc_report.html" + pattern: "*_multiqc_report.html" ] ] } @@ -262,22 +457,49 @@ process { withName: PARSE { publishDir = [ [ - path: "${params.output}/${name}/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*.fna" ], [ - path: "${params.output}/${name}/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "virsorter_metadata.tsv" ], [ - path: "${params.output}/${name}/${params.finaldir}/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.finaldir}/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, - pattern: "${name}_virus_predictions.log" + pattern: "*_virus_predictions.log" ] ] } @@ -285,7 +507,16 @@ process { withName: PHANOTATE { publishDir = [ [ - path: "${params.output}/${name}/${params.phanotatedir}", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.phanotatedir}/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*.faa" @@ -294,21 +525,36 @@ process { } withName: PLOT_CONTIG_MAP { - errorStrategy { - task.exitStatus = 1 ? 'ignore' : 'terminate' - } publishDir = [ [ - path: "${params.output}/${name}/${params.plotdir}/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.plotdir}/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, - pattern: "${set_name}_mapping_results" + pattern: "*_mapping_results" ], [ - path: "${params.output}/${name}/${params.finaldir}/annotation/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.finaldir}/annotation/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, - pattern: "${set_name}_prot_ann_table_filtered.tsv" + pattern: "*_prot_ann_table_filtered.tsv" ] ] } @@ -316,27 +562,51 @@ process { withName: PPRMETA { publishDir = [ [ - path: "${params.output}/${name}/${params.virusdir}/pprmeta", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.virusdir}/pprmeta/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, - pattern: "${name}_pprmeta.csv" + pattern: "*_pprmeta.csv" ] ] } withName: PRODIGAL { - errorStrategy { - task.exitStatus = 18 ? 'ignore' : 'terminate' - } publishDir = [ [ - path: "${params.output}/${assembly_name}/${params.prodigaldir}", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.prodigaldir}/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*.faa" ], [ - path: "${params.output}/${assembly_name}/${params.finaldir}/cds/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.finaldir}/cds/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*.faa" @@ -345,15 +615,21 @@ process { } withName: RATIO_EVALUE { - errorStrategy { - task.exitStatus = 1 ? 'ignore' : 'terminate' - } publishDir = [ [ - path: "${params.output}/${name}/ratio_evalue_tables", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/ratio_evalue_tables/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, - pattern: "${set_name}_modified_informative.tsv" + pattern: "*_modified_informative.tsv" ] ] } @@ -361,10 +637,19 @@ process { withName: RENAME { publishDir = [ [ - path: "${params.output}/${name}/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, - pattern: "${name}_renamed.fasta" + pattern: "*_renamed.fasta" ] ] } @@ -372,13 +657,31 @@ process { withName: RESTORE { publishDir = [ [ - path: "${params.output}/${name}/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*_original.fasta" ], [ - path: "${params.output}/${name}/${params.finaldir}/contigs/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.finaldir}/contigs/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*_original.fasta" @@ -389,13 +692,31 @@ process { withName: SANKEY { publishDir = [ [ - path: "${params.output}/${name}/${params.plotdir}", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.plotdir}/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*.sankey.html" ], [ - path: "${params.output}/${name}/${params.finaldir}/sankey/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.finaldir}/sankey/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*.sankey.html" @@ -406,24 +727,39 @@ process { withName: SPADES { publishDir = [ [ - path: "${params.output}/${name}/${params.assemblydir}", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.assemblydir}/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, - pattern: "${name}.fasta" + pattern: "*.fasta" ] ] } withName: VIRFINDER { - errorStrategy { - task.exitStatus = 1 ? 'ignore' : 'terminate' - } publishDir = [ [ - path: "${params.output}/${name}/${params.virusdir}/virfinder", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.virusdir}/virfinder/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, - pattern: "${name}.txt" + pattern: "*.txt" ] ] } @@ -431,7 +767,16 @@ process { withName: VIRSORTER { publishDir = [ [ - path: "${params.output}/${name}/${params.virusdir}/", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.virusdir}/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false ] @@ -439,10 +784,18 @@ process { } withName: WRITE_GFF { - errorStrategy 'ignore' publishDir = [ [ - path: "${params.output}/${name}/${params.finaldir}/gff", + path: "${params.output}", + saveAs: { + filename -> { + if ( filename.equals('versions.yml') ) { + return null; + } + def output_file = new File(filename); + return "${meta.id}/${params.finaldir}/gff/${output_file.name}"; + } + }, mode: params.publish_dir_mode, failOnError: false, pattern: "*.gff" diff --git a/configs/node.config b/configs/node.config deleted file mode 100644 index 75da6a8..0000000 --- a/configs/node.config +++ /dev/null @@ -1,30 +0,0 @@ -process { - //errorStrategy = "retry" - //maxRetries = 1 - withName: ANNOTATION { cpus = 1; memory = '4.0 GB' } - withName: ASSIGN { cpus = 1; memory = '4.0 GB' } - withName: BALLOON { cpus = 1; memory = '2.0 GB' } - withLabel: basics { cpus = 1; memory = '4.0 GB' } - withName: BLAST { cpus = 12; memory = '12.0 GB' } - withName: CHROMOMAP { cpus = 1; memory = '4.0 GB' } - withName: CHECKV { cpus = 24; memory = '16.0 GB' } - withName: FASTP { cpus = 12; memory = '12.0 GB' } - withName: FASTQC { cpus = 4; memory = '8.0 GB' } - withName: HMMSCAN { cpus = 24; memory = '24.0 GB' } - withName: KAIJU { cpus = 12; memory = '40.0 GB' } - withName: KRONA { cpus = 2; memory = '4.0 GB' } - withName: PLOT_CONTIG_MAP { cpus = 1; memory = '4.0 GB' } - withName: PPRMETA { cpus = 8; memory = '16.0 GB' } - withName: MULTIQC { cpus = 4; memory = '8.0 GB' } - withName: PARSE { cpus = 1; memory = '4.0 GB' } - withName: PRODIGAL { cpus = 8; memory = '8.0 GB' } - withName: PHANONATE { cpus = 1; memory = '4.0 GB' } - withLabel: python3 { cpus = 1; memory = '4.0 GB' } - withName: RATIO_EVALUE { cpus = 1; memory = '4.0 GB' } - withLabel: ruby { cpus = 1; memory = '4.0 GB' } - withName: SPADES { cpus = 12; memory = '40.0 GB' } - withName: SANKEY { cpus = 1; memory = '2.0 GB' } - withName: VIRSORTER { cpus = 12; memory = '12.0 GB' } - withName: VIRFINDER { cpus = 1; memory = '12.0 GB' } - withName: MASHMAP { cpus = 4; memory = '4.0 GB' } -} diff --git a/modules/local/annotation/main.nf b/modules/local/annotation/main.nf index 7aedbc2..fa798e4 100644 --- a/modules/local/annotation/main.nf +++ b/modules/local/annotation/main.nf @@ -15,16 +15,16 @@ process ANNOTATION { help="Name of processing .fna file to write correct output name") */ - tag "${name}" - label 'process_low' + tag "${meta.id} ${set_name}" + label 'process_single' container 'quay.io/microbiome-informatics/virify-python3:1.1' input: - tuple val(name), val(set_name), file(tab), file(faa) + tuple val(meta), val(set_name), path(tab), path(faa) output: - tuple val(name), val(set_name), file("*_annotation.tsv") + tuple val(meta), val(set_name), path("*_annotation.tsv"), emit: annotations script: """ diff --git a/modules/local/assign/main.nf b/modules/local/assign/main.nf index 4fda3a2..da4445e 100644 --- a/modules/local/assign/main.nf +++ b/modules/local/assign/main.nf @@ -4,18 +4,18 @@ process ASSIGN { provides the taxonomic lineage of each viral contig, based on the corresponding ViPhOG annotations''' */ - tag "${name}" - label 'process_low' + tag "${meta.id} ${set_name}" + label 'process_single' container 'quay.io/microbiome-informatics/virify-python3:1.1' input: - tuple val(name), val(set_name), file(tab) - file(db) - file(factor) + tuple val(meta), val(set_name), path(tab) + path(db) + path(factor) output: - tuple val(name), val(set_name), file("*_taxonomy.tsv") + tuple val(meta), val(set_name), path("*_taxonomy.tsv") script: """ diff --git a/modules/local/balloon/main.nf b/modules/local/balloon/main.nf index 06c8ee6..c0f934c 100644 --- a/modules/local/balloon/main.nf +++ b/modules/local/balloon/main.nf @@ -1,14 +1,14 @@ process BALLOON { - tag "${name}" - label 'process_medium' + tag "${meta.id}" + label 'process_single' container 'nanozoo/r_balloon:3.1.1--64f0f7d' input: - tuple val(name), val(set_name), file(tbl) + tuple val(meta), val(set_name), path(tbl) output: - path ("*.{pdf,svg}") optional true + path("*.{pdf,svg}"), optional: true script: """ @@ -22,16 +22,16 @@ process BALLOON { fi # genus - grep -v contig_ID tmp.tsv | awk -v SAMPLE="${name}" 'BEGIN{FS="\\t"};{if(\$2!="" && \$2 !~ /^0/){print SAMPLE"\\tgenus\\t"\$2}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' > \$NAME"_summary.tsv" + grep -v contig_ID tmp.tsv | awk -v SAMPLE="${meta.id}" 'BEGIN{FS="\\t"};{if(\$2!="" && \$2 !~ /^0/){print SAMPLE"\\tgenus\\t"\$2}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' > \$NAME"_summary.tsv" # subfamily - grep -v contig_ID tmp.tsv | awk -v SAMPLE="${name}" 'BEGIN{FS="\\t"};{if(\$3!="" && \$3 !~ /^0/){print SAMPLE"\\tsubfamily\\t"\$3}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv" + grep -v contig_ID tmp.tsv | awk -v SAMPLE="${meta.id}" 'BEGIN{FS="\\t"};{if(\$3!="" && \$3 !~ /^0/){print SAMPLE"\\tsubfamily\\t"\$3}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv" # family - grep -v contig_ID tmp.tsv | awk -v SAMPLE="${name}" 'BEGIN{FS="\\t"};{if(\$4!="" && \$4 !~ /^0/){print SAMPLE"\\tfamily\\t"\$4}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv" + grep -v contig_ID tmp.tsv | awk -v SAMPLE="${meta.id}" 'BEGIN{FS="\\t"};{if(\$4!="" && \$4 !~ /^0/){print SAMPLE"\\tfamily\\t"\$4}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv" # order - grep -v contig_ID tmp.tsv | awk -v SAMPLE="${name}" 'BEGIN{FS="\\t"};{if(\$5!="" && \$5 !~ /^0/){print SAMPLE"\\torder\\t"\$5}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv" + grep -v contig_ID tmp.tsv | awk -v SAMPLE="${meta.id}" 'BEGIN{FS="\\t"};{if(\$5!="" && \$5 !~ /^0/){print SAMPLE"\\torder\\t"\$5}}' | sort | uniq -c | awk '{printf \$2"\\t"\$3"\\t"\$4"\\t"\$1"\\n"}' >> \$NAME"_summary.tsv" if [ -s \$NAME"_summary.tsv" ]; then balloon.R "\${NAME}_summary.tsv" "\${NAME}_balloon.svg" 10 8 diff --git a/modules/local/blast/main.nf b/modules/local/blast/main.nf index 73b162c..48e24e3 100644 --- a/modules/local/blast/main.nf +++ b/modules/local/blast/main.nf @@ -1,15 +1,15 @@ process BLAST { label 'process_high' - tag "${assembly_name}" + tag "${meta.id} ${confidence_set_name}" container 'quay.io/microbiome-informatics/blast:2.9.0' input: - tuple val(assembly_name), val(confidence_set_name), file(fasta) + tuple val(meta), val(confidence_set_name), path(fasta) file(db) output: - tuple val(assembly_name), val(confidence_set_name), file("${confidence_set_name}.blast"), file("${confidence_set_name}.filtered.blast") + tuple val(meta), val(confidence_set_name), path("${confidence_set_name}.blast"), path("${confidence_set_name}.filtered.blast") script: if (task.attempt.toString() == '1') diff --git a/modules/local/blast_filter/main.nf b/modules/local/blast_filter/main.nf index 623aa4d..620a4d3 100644 --- a/modules/local/blast_filter/main.nf +++ b/modules/local/blast_filter/main.nf @@ -1,14 +1,14 @@ process BLAST_FILTER { - label 'process_low' - tag "${assembly_name}" + label 'process_single' + tag "${meta.id} ${confidence_set_name}" container 'quay.io/microbiome-informatics/virify-python3:1.2' input: - tuple val(assembly_name), val(confidence_set_name), file(blast), file(blast_filtered) - file(db) + tuple val(meta), val(confidence_set_name), path(blast), path(blast_filtered) + path(db) output: - tuple val(assembly_name), val(confidence_set_name), file("*.meta") + tuple val(meta), path(confidence_set_name), path("*.meta") script: if (task.attempt.toString() == '1') diff --git a/modules/local/checkv/main.nf b/modules/local/checkv/main.nf index 11bcea9..049ea4e 100644 --- a/modules/local/checkv/main.nf +++ b/modules/local/checkv/main.nf @@ -1,27 +1,22 @@ process CHECKV { - label 'process_medium' - tag "${name}" + label 'process_high' + tag "${meta.id} ${confidence_set_name}" container 'quay.io/microbiome-informatics/checkv:0.8.1__1' input: - tuple val(name), val(confidence_set_name), file(fasta), file(contigs) - file(database) + tuple val(meta), val(confidence_set_name), path(fasta) + path(database) output: - tuple val(name), val(confidence_set_name), file("${confidence_set_name}_quality_summary.tsv"), path("${confidence_set_name}/") + tuple val(meta), val(confidence_set_name), path("${confidence_set_name}_quality_summary.tsv") script: - if (confidence_set_name == 'prophages') { - """ - checkv end_to_end ${fasta} -d ${database} -t ${task.cpus} ${confidence_set_name} - cp ${confidence_set_name}/quality_summary.tsv ${confidence_set_name}_quality_summary.tsv - """ - } else { + """ - checkv end_to_end ${fasta} -d ${database} -t ${task.cpus} ${confidence_set_name} + checkv end_to_end ${fasta} -d ${database} -t ${task.cpus} ${confidence_set_name} cp ${confidence_set_name}/quality_summary.tsv ${confidence_set_name}_quality_summary.tsv """ - } + stub: """ mkdir negative_result_${confidence_set_name}.tsv diff --git a/modules/local/chromomap/main.nf b/modules/local/chromomap/main.nf index 5d7af19..7fb4393 100644 --- a/modules/local/chromomap/main.nf +++ b/modules/local/chromomap/main.nf @@ -1,17 +1,17 @@ process GENERATE_CHROMOMAP_TABLE { - label 'process_low' - tag "${name}" + label 'process_single' + tag "${meta.id} ${set_name}" container 'quay.io/microbiome-informatics/bioruby:2.0.1' input: - tuple val(name), val(set_name), file(assembly), file(annotation_table) + tuple val(meta), val(set_name), path(assembly), path(annotation_table) output: - tuple val(name), val(set_name), file("${id}.filtered-*.contigs.txt"), file("${id}.filtered-*.anno.txt") + tuple val(meta), val(set_name), path("${id}.filtered-*.contigs.txt"), path("${id}.filtered-*.anno.txt") script: id = set_name - if (set_name == "all") { id = name } + if (set_name == "all") { id = meta.id } """ # combine if [[ ${set_name} == "all" ]]; then @@ -29,18 +29,19 @@ process GENERATE_CHROMOMAP_TABLE { } process CHROMOMAP { - label 'process_medium' + label 'process_low' + tag "${meta.id} ${set_name}" container 'quay.io/microbiome-informatics/r_chromomap:0.3' input: - tuple val(name), val(set_name), file(contigs), file(annotations) + tuple val(meta), val(set_name), file(contigs), file(annotations) output: - tuple val(name), val(set_name), file("*.html") optional true + tuple val(meta), val(set_name), file("*.html") optional true script: id = set_name - if (set_name == "all") { id = name } + if (set_name == "all") { id = meta.id } """ #!/usr/bin/env Rscript diff --git a/modules/local/fastp/main.nf b/modules/local/fastp/main.nf deleted file mode 100644 index c85d5d4..0000000 --- a/modules/local/fastp/main.nf +++ /dev/null @@ -1,21 +0,0 @@ -process FASTP { - -/* Comments: - -m, --merge - for paired-end input, merge each pair of reads into a single read if they are overlapped. - The merged reads will be written to the file given by --merged_out, the unmerged reads will be - written to the files specified by --out1 and --out2. The merging mode is disabled by default. -*/ - tag "${name}" - label 'process_medium' - container 'quay.io/biocontainers/fastp:0.20.1--h8b12597_0' - - input: - tuple val(name), file(reads) - output: - tuple val(name), file("${name}*.fastp.fastq.gz") - script: - """ - fastp -i ${reads[0]} -I ${reads[1]} --thread ${task.cpus} -o ${name}.R1.fastp.fastq.gz -O ${name}.R2.fastp.fastq.gz - """ -} \ No newline at end of file diff --git a/modules/local/fastqc/main.nf b/modules/local/fastqc/main.nf deleted file mode 100644 index 199adb3..0000000 --- a/modules/local/fastqc/main.nf +++ /dev/null @@ -1,15 +0,0 @@ -process FASTQC { - tag "${name}" - label 'process_low' - container 'quay.io/biocontainers/fastqc:0.11.9--hdfd78af_1' - - input: - tuple val(name), file(reads) - output: - tuple val(name), file("fastqc/${name}*fastqc*") - script: - """ - mkdir fastqc - fastqc -t ${task.cpus} -o fastqc *.fastq.gz - """ -} \ No newline at end of file diff --git a/modules/local/filter_reads/main.nf b/modules/local/filter_reads/main.nf deleted file mode 100644 index 5454154..0000000 --- a/modules/local/filter_reads/main.nf +++ /dev/null @@ -1,19 +0,0 @@ -process FILTER_READS { - tag "${name}" - label 'process_low' - - input: - tuple val(name), file(kaiju_filtered), file(fastq) - - output: - tuple val(name), file("${name}.filtered.fastq") - tuple val(name), file("${name}.filtered.fasta") - - script: - """ - sed '/^@/!d;s//>/;N' ${fastq} > ${name}.fasta - faSomeRecords ${name}.fasta ${kaiju_filtered} ${name}.filtered.fasta - faToFastq ${name}.filtered.fasta ${name}.filtered.fastq - rm -f ${name}.fasta - """ -} diff --git a/modules/local/get_db/checkv.nf b/modules/local/get_db/checkv.nf index 315ad8a..6aeff6d 100644 --- a/modules/local/get_db/checkv.nf +++ b/modules/local/get_db/checkv.nf @@ -1,7 +1,14 @@ process checkVGetDB { - label 'noDocker' - if (params.cloudProcess) { publishDir "${params.databases}/checkv", mode: 'copy' } - else { storeDir "${params.databases}/checkv" } + label 'process_low' + container 'nanozoo/template:3.8--ccd0653' + + if (params.cloudProcess) { + publishDir "${params.databases}/checkv", mode: 'copy' + } + else { + storeDir "${params.databases}/checkv" + } + output: path("checkv-db-v*", type: 'dir') script: diff --git a/modules/local/get_db/imgvr.nf b/modules/local/get_db/imgvr.nf index f97e7f1..635c45a 100644 --- a/modules/local/get_db/imgvr.nf +++ b/modules/local/get_db/imgvr.nf @@ -1,5 +1,7 @@ process imgvrGetDB { - label 'noDocker' + label 'process_low' + container 'nanozoo/template:3.8--ccd0653' + if (params.cloudProcess) { publishDir "${params.databases}/imgvr/", mode: 'copy', pattern: "IMG_VR_2018-07-01_4" } diff --git a/modules/local/get_db/kaiju.nf b/modules/local/get_db/kaiju.nf index ee10fa4..d9cb9e9 100644 --- a/modules/local/get_db/kaiju.nf +++ b/modules/local/get_db/kaiju.nf @@ -1,5 +1,6 @@ process kaijuGetDB { - label 'noDocker' + label 'process_low' + container 'nanozoo/template:3.8--ccd0653' if (params.cloudProcess) { publishDir "${params.databases}/kaiju/", mode: 'copy', pattern: "viruses"//pattern: "nr_euk" } diff --git a/modules/local/get_db/meta.nf b/modules/local/get_db/meta.nf index 59cd1e0..187c847 100644 --- a/modules/local/get_db/meta.nf +++ b/modules/local/get_db/meta.nf @@ -1,6 +1,7 @@ - process metaGetDB { - label 'noDocker' + label 'process_low' + container 'nanozoo/template:3.8--ccd0653' + if (params.cloudProcess) { publishDir "${params.databases}/models", mode: 'copy', pattern: "additional_data_vpHMMs_${params.meta_version}.tsv" } diff --git a/modules/local/get_db/ncbi.nf b/modules/local/get_db/ncbi.nf index e1ef496..d66a0f8 100644 --- a/modules/local/get_db/ncbi.nf +++ b/modules/local/get_db/ncbi.nf @@ -1,5 +1,7 @@ process ncbiGetDB { - label 'noDocker' + label 'process_low' + container 'nanozoo/template:3.8--ccd0653' + if (params.cloudProcess) { publishDir "${params.databases}/ncbi/", mode: 'copy', pattern: "ete3_ncbi_tax.sqlite" } diff --git a/modules/local/get_db/pvogs.nf b/modules/local/get_db/pvogs.nf index e1e2e61..bf175cb 100644 --- a/modules/local/get_db/pvogs.nf +++ b/modules/local/get_db/pvogs.nf @@ -1,5 +1,8 @@ process pvogsGetDB { - label 'noDocker' + + label 'process_low' + container 'nanozoo/template:3.8--ccd0653' + if (params.cloudProcess) { publishDir "${params.databases}/", mode: 'copy', pattern: "pvogs" } diff --git a/modules/local/get_db/rvdb.nf b/modules/local/get_db/rvdb.nf index 0f850ce..64626f8 100644 --- a/modules/local/get_db/rvdb.nf +++ b/modules/local/get_db/rvdb.nf @@ -1,5 +1,7 @@ process rvdbGetDB { - label 'noDocker' + label 'process_low' + container 'nanozoo/template:3.8--ccd0653' + if (params.cloudProcess) { publishDir "${params.databases}/", mode: 'copy', pattern: "rvdb" } diff --git a/modules/local/get_db/viphog.nf b/modules/local/get_db/viphog.nf index 3364a2a..993a990 100644 --- a/modules/local/get_db/viphog.nf +++ b/modules/local/get_db/viphog.nf @@ -1,5 +1,7 @@ process viphogGetDB { - label 'noDocker' + label 'process_low' + container 'nanozoo/template:3.8--ccd0653' + if (params.cloudProcess) { publishDir "${params.databases}/", mode: 'copy', pattern: "vpHMM_database_${params.viphog_version}" } diff --git a/modules/local/get_db/virfinder.nf b/modules/local/get_db/virfinder.nf index 6ce014b..21fa3df 100644 --- a/modules/local/get_db/virfinder.nf +++ b/modules/local/get_db/virfinder.nf @@ -1,5 +1,7 @@ process virfinderGetDB { - label 'noDocker' + label 'process_low' + container 'nanozoo/template:3.8--ccd0653' + if (params.cloudProcess) { publishDir "${params.databases}/virfinder/", mode: 'copy', pattern: "VF.modEPV_k8.rda" } diff --git a/modules/local/get_db/virsorter.nf b/modules/local/get_db/virsorter.nf index 9ebccd0..6b43242 100644 --- a/modules/local/get_db/virsorter.nf +++ b/modules/local/get_db/virsorter.nf @@ -1,5 +1,7 @@ process virsorterGetDB { - label 'noDocker' + label 'process_low' + container 'nanozoo/template:3.8--ccd0653' + if (params.cloudProcess) { publishDir "${params.databases}/virsorter/", mode: 'copy', pattern: "virsorter-data" } diff --git a/modules/local/get_db/vogdb.nf b/modules/local/get_db/vogdb.nf index 4c7fab1..fa9ba22 100644 --- a/modules/local/get_db/vogdb.nf +++ b/modules/local/get_db/vogdb.nf @@ -1,5 +1,7 @@ process vogdbGetDB { - label 'noDocker' + label 'process_low' + container 'nanozoo/template:3.8--ccd0653' + if (params.cloudProcess) { publishDir "${params.databases}/", mode: 'copy', pattern: "vogdb" } diff --git a/modules/local/get_db/vpf.nf b/modules/local/get_db/vpf.nf index d8432f0..4020154 100644 --- a/modules/local/get_db/vpf.nf +++ b/modules/local/get_db/vpf.nf @@ -1,5 +1,7 @@ process vpfGetDB { - label 'noDocker' + label 'process_low' + container 'nanozoo/template:3.8--ccd0653' + if (params.cloudProcess) { publishDir "${params.databases}/", mode: 'copy', pattern: "vpf" } diff --git a/modules/local/help.nf b/modules/local/help.nf index 42d2739..28e3a97 100644 --- a/modules/local/help.nf +++ b/modules/local/help.nf @@ -81,7 +81,7 @@ def helpMSG() { ${c_yellow}HPC computing:${c_reset} Especially for execution of the workflow on a HPC (LSF, SLURM) adjust the following parameters if needed: - --databases defines the path where databases are stored [default: $params.dbs] + --databases defines the path where databases are stored [default: $params.databases] --workdir defines the path where nextflow writes tmp files [default: $params.workdir] --singularity_cachedir defines the path where images (singularity) are cached [default: $params.singularity_cachedir] diff --git a/modules/local/hmm_postprocessing/main.nf b/modules/local/hmm_postprocessing/main.nf index af99cff..705424d 100644 --- a/modules/local/hmm_postprocessing/main.nf +++ b/modules/local/hmm_postprocessing/main.nf @@ -3,16 +3,16 @@ process HMM_POSTPROCESSING { input: File_hmmer_ViPhOG.tbl output: File_hmmer_ViPhOG_modified.tbl */ - tag "${name}" - label 'process_low' + tag "${meta.id} ${set_name}" + label 'process_single' container 'quay.io/microbiome-informatics/virify-python3:1.2' input: - tuple val(name), val(set_name), file(hmmer_tbl), file(faa) + tuple val(meta), val(set_name), path(hmmer_tbl), path(faa) output: - tuple val(name), val(set_name), file("${set_name}_modified.tsv"), file(faa) + tuple val(meta), val(set_name), path("${set_name}_modified.tsv"), path(faa) script: """ diff --git a/modules/local/hmmscan/main.nf b/modules/local/hmmscan/main.nf index 82cf7b7..c8e4473 100644 --- a/modules/local/hmmscan/main.nf +++ b/modules/local/hmmscan/main.nf @@ -1,28 +1,28 @@ process HMMSCAN { - tag "${name}" + tag "${meta.id} ${set_name}" label 'process_high' container 'quay.io/microbiome-informatics/hmmer:3.1b2' input: - tuple val(name), val(set_name), file(faa) - file(db) + tuple val(meta), val(set_name), path(faa) + path(db) output: - tuple val(name), val(set_name), file("${set_name}_${params.db}_hmmscan.tbl"), file(faa) + tuple val(meta), val(set_name), path("${set_name}_${params.databases}_hmmscan.tbl"), path(faa) script: """ - if [[ ${params.db} == "viphogs" ]]; then + if [[ ${params.databases} == "viphogs" ]]; then if [[ ${params.version} == "v1" ]]; then - hmmscan --cpu ${task.cpus} --noali -E "0.001" --domtblout ${set_name}_${params.db}_hmmscan.tbl ${db}/${db}.hmm ${faa} + hmmscan --cpu ${task.cpus} --noali -E "0.001" --domtblout ${set_name}_${params.databases}_hmmscan.tbl ${db}/${db}.hmm ${faa} else - hmmscan --cpu ${task.cpus} --noali --cut_ga --domtblout ${set_name}_${params.db}_hmmscan_cutga.tbl ${db}/${db}.hmm ${faa} + hmmscan --cpu ${task.cpus} --noali --cut_ga --domtblout ${set_name}_${params.databases}_hmmscan_cutga.tbl ${db}/${db}.hmm ${faa} #filter evalue for models that dont have any GA cutoff - awk '{if(\$1 ~ /^#/){print \$0}else{if(\$7<0.001){print \$0}}}' ${set_name}_${params.db}_hmmscan_cutga.tbl > ${set_name}_${params.db}_hmmscan.tbl + awk '{if(\$1 ~ /^#/){print \$0}else{if(\$7<0.001){print \$0}}}' ${set_name}_${params.databases}_hmmscan_cutga.tbl > ${set_name}_${params.db}_hmmscan.tbl fi else - hmmscan --cpu ${task.cpus} --noali -E "0.001" --domtblout ${set_name}_${params.db}_hmmscan.tbl ${db}/${db}.hmm ${faa} + hmmscan --cpu ${task.cpus} --noali -E "0.001" --domtblout ${set_name}_${params.databases}_hmmscan.tbl ${db}/${db}.hmm ${faa} fi """ } diff --git a/modules/local/kaiju/main.nf b/modules/local/kaiju/main.nf index c461b2f..c2e2ac9 100644 --- a/modules/local/kaiju/main.nf +++ b/modules/local/kaiju/main.nf @@ -5,29 +5,29 @@ process KAIJU { TODO: include viruses.taxids */ - label 'process_medium' - tag "${name}" + label 'process_high' + tag "${meta.id}" container 'quay.io/biocontainers/kaiju:1.7.2--hdbcaa40_0' input: - tuple val(name), file(fastq) - file(database) + tuple val(meta), path(fastq) + path(database) output: - tuple val(name), file("${name}.out") - tuple val(name), file("${name}.out.krona") + tuple val(meta), path("${meta.id}.out") + tuple val(meta), path("${meta.id}.out.krona") shell: if (params.illumina) { ''' - kaiju -z !{task.cpus} -t !{database}/nodes.dmp -f !{database}/!{database}/kaiju_db_!{database}.fmi -i !{fastq[0]} -j !{fastq[1]} -o !{name}.out - kaiju2krona -t !{database}/nodes.dmp -n !{database}/names.dmp -i !{name}.out -o !{name}.out.krona + kaiju -z !{task.cpus} -t !{database}/nodes.dmp -f !{database}/!{database}/kaiju_db_!{database}.fmi -i !{fastq[0]} -j !{fastq[1]} -o !{meta.id}.out + kaiju2krona -t !{database}/nodes.dmp -n !{database}/names.dmp -i !{meta.id}.out -o !{meta.id}.out.krona ''' } if (params.fasta) { ''' - kaiju -z !{task.cpus} -t !{database}/nodes.dmp -f !{database}/!{database}/kaiju_db_!{database}.fmi -i !{fastq} -o !{name}.out - kaiju2krona -t !{database}/nodes.dmp -n !{database}/names.dmp -i !{name}.out -o !{name}.out.krona + kaiju -z !{task.cpus} -t !{database}/nodes.dmp -f !{database}/!{database}/kaiju_db_!{database}.fmi -i !{fastq} -o !{meta.id}.out + kaiju2krona -t !{database}/nodes.dmp -n !{database}/names.dmp -i !{meta.id}.out -o !{meta.id}.out.krona ''' } } diff --git a/modules/local/krona/main.nf b/modules/local/krona/main.nf index deb8c04..ad18021 100644 --- a/modules/local/krona/main.nf +++ b/modules/local/krona/main.nf @@ -1,21 +1,21 @@ process GENERATE_KRONA_TABLE { - label 'process_low' - tag "${name}" + label 'process_single' + tag "${meta.id} ${set_name}" container 'quay.io/microbiome-informatics/virify-python3:1.2' input: - tuple val(name), val(set_name), file(tbl) + tuple val(meta), val(set_name), path(tbl) output: - tuple val(name), val(set_name), file("*.krona.tsv") + tuple val(meta), val(set_name), path("*.krona.tsv") script: """ if [[ "${set_name}" == "all" ]]; then - grep contig_ID *.tsv | awk 'BEGIN{FS=":"};{print \$2}' | uniq > ${name}.tmp - grep -v "contig_ID" *.tsv | awk 'BEGIN{FS=":"};{print \$2}' | uniq >> ${name}.tmp - cp ${name}.tmp ${name}.tsv - generate_counts_table.py -f ${name}.tsv -o ${name}.krona.tsv + grep contig_ID *.tsv | awk 'BEGIN{FS=":"};{print \$2}' | uniq > ${meta.id}.tmp + grep -v "contig_ID" *.tsv | awk 'BEGIN{FS=":"};{print \$2}' | uniq >> ${meta.id}.tmp + cp ${meta.id}.tmp ${meta.id}.tsv + generate_counts_table.py -f ${meta.id}.tsv -o ${meta.id}.krona.tsv else generate_counts_table.py -f ${tbl} -o ${set_name}.krona.tsv fi @@ -24,18 +24,18 @@ process GENERATE_KRONA_TABLE { process KRONA { label 'process_low' - + tag "${meta.id} ${set_name}" container 'quay.io/microbiome-informatics/krona:2.7.1' input: - tuple val(name), val(set_name), file(krona_file) + tuple val(meta), val(set_name), file(krona_file) output: file("*.krona.html") script: """ if [[ ${set_name} == "all" ]]; then - ktImportText -o ${name}.krona.html ${krona_file} + ktImportText -o ${meta.id}.krona.html ${krona_file} else ktImportText -o ${set_name}.krona.html ${krona_file} fi diff --git a/modules/local/length_filtering/main.nf b/modules/local/length_filtering/main.nf index b7fc090..4c2efd3 100644 --- a/modules/local/length_filtering/main.nf +++ b/modules/local/length_filtering/main.nf @@ -1,18 +1,18 @@ process LENGTH_FILTERING { - label 'process_low' - tag "${name}" + label 'process_single' + tag "${meta.id}" container 'quay.io/biocontainers/biopython:1.75' input: - tuple val(name), file(fasta), file(map) + tuple val(meta), path(fasta), path(map) output: - tuple val(name), file("${name}*filt*.fasta"), env(CONTIGS) + tuple val(meta), path("${meta.id}*filt*.fasta"), env(CONTIGS) script: """ filter_contigs_len.py -f ${fasta} -l ${params.length} -o ./ - CONTIGS=\$(grep ">" ${name}*filt*.fasta | wc -l) + CONTIGS=\$(grep ">" ${meta.id}*filt*.fasta | wc -l) """ } diff --git a/modules/local/mashmap/main.nf b/modules/local/mashmap/main.nf index 5312397..e0fac7e 100644 --- a/modules/local/mashmap/main.nf +++ b/modules/local/mashmap/main.nf @@ -1,14 +1,14 @@ process MASHMAP { label 'process_medium' - tag "${assembly_name}" + tag "${meta.id} ${confidence_set_name}" container 'quay.io/microbiome-informatics/mashmap:2.0' input: - tuple val(assembly_name), val(confidence_set_name), file(fasta) - file(reference) + tuple val(meta), val(confidence_set_name), path(fasta) + path(reference) output: - file("${confidence_set_name}_mashmap_hits.tsv") + path("${confidence_set_name}_mashmap_hits.tsv") script: """ diff --git a/modules/local/multiqc/main.nf b/modules/local/multiqc/main.nf deleted file mode 100644 index da54e9b..0000000 --- a/modules/local/multiqc/main.nf +++ /dev/null @@ -1,15 +0,0 @@ -process MULTIQC { - label 'process_low' - tag "${name}" - container 'quay.io/biocontainers/multiqc:1.9--py_1' - - input: - tuple val(name), file(fastqc) - output: - tuple val(name), file("${name}_multiqc_report.html") - - script: - """ - multiqc -i ${name} . - """ -} diff --git a/modules/local/parse/main.nf b/modules/local/parse/main.nf index 41949a7..ec23045 100644 --- a/modules/local/parse/main.nf +++ b/modules/local/parse/main.nf @@ -1,21 +1,21 @@ process PARSE { label 'process_low' - tag "${name}" + tag "${meta.id}" container 'quay.io/microbiome-informatics/virify-python3:1.2' input: - tuple val(name), file(fasta), val(contig_number), file(virfinder), file(virsorter), file(pprmeta) + tuple val(meta), path(fasta), val(contig_number), path(virfinder), path(virsorter), path(pprmeta) when: contig_number.toInteger() > 0 output: - tuple val(name), file("*.fna"), file('virsorter_metadata.tsv'), file("${name}_virus_predictions.log"), optional: true + tuple val(meta), path("*.fna"), path('virsorter_metadata.tsv'), path("${meta.id}_virus_predictions.log"), optional: true script: """ touch virsorter_metadata.tsv - parse_viral_pred.py -a ${fasta} -f ${virfinder} -p ${pprmeta} -s ${virsorter}/Predicted_viral_sequences/*.fasta &> ${name}_virus_predictions.log + parse_viral_pred.py -a ${fasta} -f ${virfinder} -p ${pprmeta} -s ${virsorter}/Predicted_viral_sequences/*.fasta &> ${meta.id}_virus_predictions.log """ } diff --git a/modules/local/phanotate/main.nf b/modules/local/phanotate/main.nf index 5f72fe2..53b1843 100644 --- a/modules/local/phanotate/main.nf +++ b/modules/local/phanotate/main.nf @@ -1,13 +1,13 @@ process PHANOTATE { label 'process_low' - tag "${name}" + tag "${meta.id}" container 'quay.io/biocontainers/phanotate:1.5.0--h30d9df9_2' input: - tuple val(name), file(fasta) + tuple val(meta), path(fasta) output: - tuple val(name), stdout, file("*.faa") + tuple val(meta), stdout, path("*.faa") script: """ diff --git a/modules/local/plot_contig_map/main.nf b/modules/local/plot_contig_map/main.nf index bc55bf2..f7eeaaf 100644 --- a/modules/local/plot_contig_map/main.nf +++ b/modules/local/plot_contig_map/main.nf @@ -1,14 +1,14 @@ process PLOT_CONTIG_MAP { - tag "${name}" + tag "${meta.id} ${set_name}" label 'process_low' container 'quay.io/microbiome-informatics/virify-plot-contig-map:1' input: - tuple val(name), val(set_name), file(tab) + tuple val(meta), val(set_name), path(tab) output: - tuple val(name), val(set_name), file("${set_name}_mapping_results"), file("${set_name}_prot_ann_table_filtered.tsv") + tuple val(meta), val(set_name), path("${set_name}_mapping_results"), path("${set_name}_prot_ann_table_filtered.tsv") script: """ diff --git a/modules/local/pprmeta/main.nf b/modules/local/pprmeta/main.nf index e1f6288..86b5659 100644 --- a/modules/local/pprmeta/main.nf +++ b/modules/local/pprmeta/main.nf @@ -1,22 +1,22 @@ process PPRMETA { label 'process_medium' - tag "${name}" + tag "${meta.id}" container 'quay.io/microbiome-informatics/pprmeta:1.1' input: - tuple val(name), file(fasta), val(contig_number) + tuple val(meta), path(fasta), val(contig_number) path(pprmeta_git) when: contig_number.toInteger() > 0 output: - tuple val(name), file("${name}_pprmeta.csv") + tuple val(meta), path("${meta.id}_pprmeta.csv") script: """ [ -d "pprmeta" ] && cp pprmeta/* . - ./PPR_Meta ${fasta} ${name}_pprmeta.csv + ./PPR_Meta ${fasta} ${meta.id}_pprmeta.csv """ } @@ -24,7 +24,8 @@ process PPRMETA { // need to implement this so its fixed process pprmetaGet { - label 'noDocker' + container 'nanozoo/template:3.8--ccd0653' + label 'process_single' if (params.cloudProcess) { publishDir "${params.databases}/pprmeta", mode: 'copy', pattern: "*" } diff --git a/modules/local/prodigal/main.nf b/modules/local/prodigal/main.nf index 5b429f0..44d1875 100644 --- a/modules/local/prodigal/main.nf +++ b/modules/local/prodigal/main.nf @@ -1,13 +1,13 @@ process PRODIGAL { - label 'process_high' - tag "${name}" + label 'process_medium' + tag "${meta.id} ${confidence_set_name}" container 'quay.io/biocontainers/prodigal:2.6.3--hec16e2b_4' input: - tuple val(assembly_name), val(confidence_set_name), file(fasta) + tuple val(meta), val(confidence_set_name), path(fasta) output: - tuple val(assembly_name), val(confidence_set_name), file("*.faa") + tuple val(meta), val(confidence_set_name), path("*.faa") script: """ diff --git a/modules/local/ratio_evalue/main.nf b/modules/local/ratio_evalue/main.nf index 28ff720..9a73c12 100644 --- a/modules/local/ratio_evalue/main.nf +++ b/modules/local/ratio_evalue/main.nf @@ -7,17 +7,17 @@ process RATIO_EVALUE { out PRJNA530103_small_modified_informative.tsv */ - tag "${name}" + tag "${meta.id} ${set_name}" label 'process_low' container 'quay.io/microbiome-informatics/virify-python3:1.1' input: - tuple val(name), val(set_name), file(modified_table), file(faa) - file(model_metadata) + tuple val(meta), val(set_name), path(modified_table), path(faa) + path(model_metadata) output: - tuple val(name), val(set_name), file("${set_name}_modified_informative.tsv"), file(faa), optional: true + tuple val(meta), val(set_name), path("${set_name}_modified_informative.tsv"), path(faa), optional: true script: """ diff --git a/modules/local/rename/main.nf b/modules/local/rename/main.nf index 8a54128..cb4c87a 100644 --- a/modules/local/rename/main.nf +++ b/modules/local/rename/main.nf @@ -3,24 +3,26 @@ process RENAME { usage: rename_fasta.py [-h] -i INPUT [-m MAP] -o OUTPUT {rename,restore} ... */ - label 'process_low' - tag "${name}" + label 'process_single' + tag "${meta.id}" container 'quay.io/microbiome-informatics/virify-python3:1.2' input: - tuple val(name), file(fasta) + tuple val(meta), path(fasta) output: - tuple val(name), file("${name}_renamed.fasta"), file("${name}_map.tsv") + tuple val(meta), path("${meta.id}_renamed.fasta"), path("${meta.id}_map.tsv") script: """ if [[ ${fasta} =~ \\.gz\$ ]]; then zcat ${fasta} > tmp.fasta + echo "compressed" else cp ${fasta} tmp.fasta + echo "uncompressed" fi - rename_fasta.py -i tmp.fasta -m ${name}_map.tsv -o ${name}_renamed.fasta rename + rename_fasta.py -i tmp.fasta -m ${meta.id}_map.tsv -o ${meta.id}_renamed.fasta rename """ } diff --git a/modules/local/restore/main.nf b/modules/local/restore/main.nf index 4327181..a9b9f7d 100644 --- a/modules/local/restore/main.nf +++ b/modules/local/restore/main.nf @@ -2,16 +2,16 @@ process RESTORE { /* usage: rename_fasta.py [-h] -i INPUT [-m MAP] -o OUTPUT {rename,restore} ... */ - tag "${name}" - label 'process_low' + tag "${meta.id}" + label 'process_single' container 'quay.io/microbiome-informatics/virify-python3:1.2' input: - tuple val(name), file(fasta), file(map) + tuple val(meta), path(fasta), path(map) output: - tuple val(name), env(BN), file("*_original.fasta") + tuple val(meta), env(BN), path("*_original.fasta") script: """ diff --git a/modules/local/sankey/main.nf b/modules/local/sankey/main.nf index b1f2252..0bc7171 100644 --- a/modules/local/sankey/main.nf +++ b/modules/local/sankey/main.nf @@ -1,13 +1,13 @@ process GENERATE_SANKEY_TABLE { label 'process_low' - tag "${name}" + tag "${meta.id} ${set_name}" container 'quay.io/microbiome-informatics/bioruby:2.0.1' input: - tuple val(name), val(set_name), file(krona_table) + tuple val(meta), val(set_name), path(krona_table) output: - tuple val(name), val(set_name), file("${set_name}.sankey.filtered-${params.sankey}.json"), file("${set_name}.sankey.tsv") + tuple val(meta), val(set_name), path("${set_name}.sankey.filtered-${params.sankey}.json"), path("${set_name}.sankey.tsv") script: """ @@ -22,19 +22,19 @@ process GENERATE_SANKEY_TABLE { process SANKEY { - label 'process_medium' - + label 'process_low' + tag "${meta.id} ${set_name}" container 'quay.io/microbiome-informatics/sankeyd3:0.12.3' input: - tuple val(name), val(set_name), file(json), file(tsv) + tuple val(meta), val(set_name), path(json), path(tsv) output: - tuple val(name), val(set_name), file("*.sankey.html") + tuple val(meta), val(set_name), path("*.sankey.html") script: id = set_name - if (set_name == "all") { id = name } + if (set_name == "all") { id = meta.id } """ #!/usr/bin/env Rscript diff --git a/modules/local/spades/main.nf b/modules/local/spades/main.nf deleted file mode 100644 index cdf4483..0000000 --- a/modules/local/spades/main.nf +++ /dev/null @@ -1,17 +0,0 @@ -process SPADES { - - label 'process_medium' - tag "${name}" - container 'quay.io/biocontainers/spades:3.15.5--h95f258a_1' - - input: - tuple val(name), file(reads) - output: - tuple val(name), file("${name}.fasta") - - script: - """ - spades.py --meta --only-assembler -1 !{reads[0]} -2 !{reads[1]} -t !{task.cpus} -o assembly - mv assembly/contigs.fasta !{name}.fasta - """ -} \ No newline at end of file diff --git a/modules/local/virfinder/main.nf b/modules/local/virfinder/main.nf index f4d8f96..69e645f 100644 --- a/modules/local/virfinder/main.nf +++ b/modules/local/virfinder/main.nf @@ -1,22 +1,22 @@ process VIRFINDER { - tag "${name}" - label 'process_high' + tag "${meta.id}" + label 'process_medium' container 'quay.io/microbiome-informatics/virfinder:1.1__eb8032e' input: - tuple val(name), file(fasta), val(contig_number) + tuple val(meta), path(fasta), val(contig_number) path model when: contig_number.toInteger() > 0 output: - tuple val(name), file("${name}.txt") + tuple val(meta), path("${meta.id}.txt") script: """ run_virfinder.Rscript ${model} ${fasta} . - awk '{print \$1"\\t"\$2"\\t"\$3"\\t"\$4}' ${name}*.tsv > ${name}.txt + awk '{print \$1"\\t"\$2"\\t"\$3"\\t"\$4}' ${meta.id}*.tsv > ${meta.id}.txt """ } diff --git a/modules/local/virsorter/main.nf b/modules/local/virsorter/main.nf index 3d5a2bd..23b5382 100644 --- a/modules/local/virsorter/main.nf +++ b/modules/local/virsorter/main.nf @@ -1,17 +1,17 @@ process VIRSORTER { - label 'process_high' - tag "${name}" + label 'process_medium' + tag "${meta.id}" container 'quay.io/microbiome-informatics/virsorter:1.0.6_edfeb8c5e72' input: - tuple val(name), file(fasta), val(contig_number) + tuple val(meta), path(fasta), val(contig_number) path(database) when: contig_number.toInteger() > 0 output: - tuple val(name), file("*") + tuple val(meta), path("*") script: if (params.virome) diff --git a/modules/local/write_gff/main.nf b/modules/local/write_gff/main.nf index 198b221..e759530 100644 --- a/modules/local/write_gff/main.nf +++ b/modules/local/write_gff/main.nf @@ -1,17 +1,14 @@ process WRITE_GFF { - tag "${name}" - label 'process_medium' + tag "${meta.id}" + label 'process_low' container 'quay.io/microbiome-informatics/virify-python3:1.2' input: - tuple val(name), path(fasta) - path(viphos_annotations) - path(taxonomies) - path(quality_summaries) + tuple val(meta), path(fasta), path(viphos_annotations), path(taxonomies), path(quality_summaries) output: - path("${name}_virify.gff") + path("${meta.id}_virify.gff") script: """ @@ -19,9 +16,9 @@ process WRITE_GFF { -v ${viphos_annotations.join(' ')} \ -c ${quality_summaries.join(' ')} \ -t ${taxonomies.join(' ')} \ - -s ${name} \ + -s ${meta.id} \ -a ${fasta} - gt gff3validator ${name}_virify.gff + gt gff3validator ${meta.id}_virify.gff """ } diff --git a/modules/local/fastp/fastp.yaml b/modules/nf-core/checkv/endtoend/environment.yml similarity index 64% rename from modules/local/fastp/fastp.yaml rename to modules/nf-core/checkv/endtoend/environment.yml index b4df7d8..8646fff 100644 --- a/modules/local/fastp/fastp.yaml +++ b/modules/nf-core/checkv/endtoend/environment.yml @@ -1,6 +1,5 @@ -name: fastp channels: - - bioconda - conda-forge + - bioconda dependencies: - - fastp=0.20.0 + - bioconda::checkv=1.0.1 \ No newline at end of file diff --git a/modules/nf-core/checkv/endtoend/main.nf b/modules/nf-core/checkv/endtoend/main.nf new file mode 100644 index 0000000..635c9fa --- /dev/null +++ b/modules/nf-core/checkv/endtoend/main.nf @@ -0,0 +1,63 @@ +process CHECKV_ENDTOEND { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/checkv:1.0.1--pyhdfd78af_0': + 'biocontainers/checkv:1.0.1--pyhdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + path db + + output: + tuple val(meta), path ("${prefix}/quality_summary.tsv") , emit: quality_summary + tuple val(meta), path ("${prefix}/completeness.tsv") , emit: completeness + tuple val(meta), path ("${prefix}/contamination.tsv") , emit: contamination + tuple val(meta), path ("${prefix}/complete_genomes.tsv"), emit: complete_genomes + tuple val(meta), path ("${prefix}/proviruses.fna") , emit: proviruses + tuple val(meta), path ("${prefix}/viruses.fna") , emit: viruses + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + """ + checkv \\ + end_to_end \\ + $args \\ + -t $task.cpus \\ + -d $db \\ + $fasta \\ + $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + checkv: \$(checkv -h 2>&1 | sed -n 's/^.*CheckV v//; s/: assessing.*//; 1p') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix} + touch ${prefix}/quality_summary.tsv + touch ${prefix}/completeness.tsv + touch ${prefix}/contamination.tsv + touch ${prefix}/complete_genomes.tsv + touch ${prefix}/proviruses.fna + touch ${prefix}/viruses.fna + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + checkv: \$(checkv -h 2>&1 | sed -n 's/^.*CheckV v//; s/: assessing.*//; 1p') + END_VERSIONS + """ +} \ No newline at end of file diff --git a/modules/nf-core/checkv/endtoend/meta.yml b/modules/nf-core/checkv/endtoend/meta.yml new file mode 100644 index 0000000..c74d091 --- /dev/null +++ b/modules/nf-core/checkv/endtoend/meta.yml @@ -0,0 +1,107 @@ +name: "checkv_endtoend" +description: Assess the quality of metagenome-assembled viral genomes. +keywords: + - checkv + - checkm + - mag + - metagenome + - quality + - isolates + - virus + - completeness + - contamination +tools: + - "checkv": + description: Assess the quality of metagenome-assembled viral genomes. + homepage: https://bitbucket.org/berkeleylab/checkv/src/master/ + documentation: https://bitbucket.org/berkeleylab/checkv/src/master/ + tool_dev_url: https://bitbucket.org/berkeleylab/checkv/src/master/ + doi: "10.1038/s41587-020-00774-7" + licence: ["BSD License"] + identifier: biotools:checkv +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: fasta file + pattern: "*.{fasta,fna,fa}" + - - db: + type: directory + description: Directory pointing to checkV database +output: + - quality_summary: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample', bin:'1' ] + - ${prefix}/quality_summary.tsv: + type: file + description: CheckV's main output containing integrated results from the three + main modules (contamination, completeness, complete genomes) with overall + quality of contigs + pattern: "${prefix}/quality_summary.tsv" + - completeness: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample', bin:'1' ] + - ${prefix}/completeness.tsv: + type: file + description: CheckV's detailed overview table on estimating completeness + pattern: "${prefix}/completeness.tsv" + - contamination: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample', bin:'1' ] + - ${prefix}/contamination.tsv: + type: file + description: CheckV's detailed overview table on estimating contamination + pattern: "${prefix}/contamination.tsv" + - complete_genomes: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample', bin:'1' ] + - ${prefix}/complete_genomes.tsv: + type: file + description: CheckV's detailed overview table on the identified putative complete + genomes + pattern: "${prefix}/complete_genomes.tsv" + - proviruses: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample', bin:'1' ] + - ${prefix}/proviruses.fna: + type: file + description: CheckV's extracted proviruses contigs + pattern: "${prefix}/proviruses.fna" + - viruses: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample', bin:'1' ] + - ${prefix}/viruses.fna: + type: file + description: CheckV's extracted virus contigs + pattern: "${prefix}/viruses.fna" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Joon-Klaps" +maintainers: + - "@Joon-Klaps" \ No newline at end of file diff --git a/modules/local/spades/spades.yaml b/modules/nf-core/fastp/environment.yml similarity index 64% rename from modules/local/spades/spades.yaml rename to modules/nf-core/fastp/environment.yml index b6db0fd..de9463b 100644 --- a/modules/local/spades/spades.yaml +++ b/modules/nf-core/fastp/environment.yml @@ -1,6 +1,5 @@ -name: spades channels: - - bioconda - conda-forge + - bioconda dependencies: - - spades=3.14 + - bioconda::fastp=0.23.4 \ No newline at end of file diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf new file mode 100644 index 0000000..08200cd --- /dev/null +++ b/modules/nf-core/fastp/main.nf @@ -0,0 +1,125 @@ +process FASTP { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' : + 'biocontainers/fastp:0.23.4--h5f740d0_0' }" + + input: + tuple val(meta), path(reads) + path adapter_fasta + val discard_trimmed_pass + val save_trimmed_fail + val save_merged + + output: + tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads + tuple val(meta), path('*.json') , emit: json + tuple val(meta), path('*.html') , emit: html + tuple val(meta), path('*.log') , emit: log + tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail + tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" + def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--failed_out ${prefix}.paired.fail.fastq.gz --unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' + def out_fq1 = discard_trimmed_pass ?: ( meta.single_end ? "--out1 ${prefix}.fastp.fastq.gz" : "--out1 ${prefix}_1.fastp.fastq.gz" ) + def out_fq2 = discard_trimmed_pass ?: "--out2 ${prefix}_2.fastp.fastq.gz" + // Added soft-links to original fastqs for consistent naming in MultiQC + // Use single ended for interleaved. Add --interleaved_in in config. + if ( task.ext.args?.contains('--interleaved_in') ) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) \\ + | gzip -c > ${prefix}.fastp.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --in1 ${prefix}.fastq.gz \\ + $out_fq1 \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else { + def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz + fastp \\ + --in1 ${prefix}_1.fastq.gz \\ + --in2 ${prefix}_2.fastq.gz \\ + $out_fq1 \\ + $out_fq2 \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $merge_fastq \\ + --thread $task.cpus \\ + --detect_adapter_for_pe \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end + def touch_reads = (discard_trimmed_pass) ? "" : (is_single_output) ? "echo '' | gzip > ${prefix}.fastp.fastq.gz" : "echo '' | gzip > ${prefix}_1.fastp.fastq.gz ; echo '' | gzip > ${prefix}_2.fastp.fastq.gz" + def touch_merged = (!is_single_output && save_merged) ? "echo '' | gzip > ${prefix}.merged.fastq.gz" : "" + def touch_fail_fastq = (!save_trimmed_fail) ? "" : meta.single_end ? "echo '' | gzip > ${prefix}.fail.fastq.gz" : "echo '' | gzip > ${prefix}.paired.fail.fastq.gz ; echo '' | gzip > ${prefix}_1.fail.fastq.gz ; echo '' | gzip > ${prefix}_2.fail.fastq.gz" + """ + $touch_reads + $touch_fail_fastq + $touch_merged + touch "${prefix}.fastp.json" + touch "${prefix}.fastp.html" + touch "${prefix}.fastp.log" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ +} \ No newline at end of file diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml new file mode 100644 index 0000000..bece97e --- /dev/null +++ b/modules/nf-core/fastp/meta.yml @@ -0,0 +1,113 @@ +name: fastp +description: Perform adapter/quality trimming on sequencing reads +keywords: + - trimming + - quality control + - fastq +tools: + - fastp: + description: | + A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. + documentation: https://github.com/OpenGene/fastp + doi: 10.1093/bioinformatics/bty560 + licence: ["MIT"] + identifier: biotools:fastp +input: + - - meta: + type: map + description: | + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. If you wish to run interleaved paired-end data, supply as single-end data + but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + - - adapter_fasta: + type: file + description: File in FASTA format containing possible adapters to remove. + pattern: "*.{fasta,fna,fas,fa}" + - - discard_trimmed_pass: + type: boolean + description: Specify true to not write any reads that pass trimming thresholds. + | This can be used to use fastp for the output report only. + - - save_trimmed_fail: + type: boolean + description: Specify true to save files that failed to pass trimming thresholds + ending in `*.fail.fastq.gz` + - - save_merged: + type: boolean + description: Specify true to save all merged reads to a file ending in `*.merged.fastq.gz` +output: + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastp.fastq.gz": + type: file + description: The trimmed/modified/unmerged fastq reads + pattern: "*fastp.fastq.gz" + - json: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.json": + type: file + description: Results in JSON format + pattern: "*.json" + - html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: Results in HTML format + pattern: "*.html" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: fastq log file + pattern: "*.log" + - reads_fail: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fail.fastq.gz": + type: file + description: Reads the failed the preprocessing + pattern: "*fail.fastq.gz" + - reads_merged: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.merged.fastq.gz": + type: file + description: Reads that were successfully merged + pattern: "*.{merged.fastq.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" \ No newline at end of file diff --git a/modules/local/multiqc/multiqc.yaml b/modules/nf-core/fastqc/environment.yml similarity index 63% rename from modules/local/multiqc/multiqc.yaml rename to modules/nf-core/fastqc/environment.yml index 3bc368b..8b76b92 100644 --- a/modules/local/multiqc/multiqc.yaml +++ b/modules/nf-core/fastqc/environment.yml @@ -1,6 +1,5 @@ -name: multiqc channels: - - bioconda - conda-forge + - bioconda dependencies: - - multiqc=1.8 + - bioconda::fastqc=0.12.1 \ No newline at end of file diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf new file mode 100644 index 0000000..feee5f0 --- /dev/null +++ b/modules/nf-core/fastqc/main.nf @@ -0,0 +1,64 @@ +process FASTQC { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.zip") , emit: zip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // Make list of old name and new name pairs to use for renaming in the bash while loop + def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } + def rename_to = old_new_pairs*.join(' ').join(' ') + def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + + // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory) + // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222 + // Dividing the task.memory by task.cpu allows to stick to requested amount of RAM in the label + def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') / task.cpus + // FastQC memory value allowed range (100 - 10000) + def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) + + """ + printf "%s %s\\n" $rename_to | while read old_name new_name; do + [ -f "\${new_name}" ] || ln -s \$old_name \$new_name + done + + fastqc \\ + $args \\ + --threads $task.cpus \\ + --memory $fastqc_memory \\ + $renamed_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) + END_VERSIONS + """ +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml new file mode 100644 index 0000000..40c8711 --- /dev/null +++ b/modules/nf-core/fastqc/meta.yml @@ -0,0 +1,66 @@ +name: fastqc +description: Run FastQC on sequenced reads +keywords: + - quality control + - qc + - adapters + - fastq +tools: + - fastqc: + description: | + FastQC gives general quality metrics about your reads. + It provides information about the quality score distribution + across your reads, the per base sequence content (%A/C/G/T). + You get information about adapter contamination and other + overrepresented sequences. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ + documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ + licence: ["GPL-2.0-only"] + identifier: biotools:fastqc +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - zip: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.zip": + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" \ No newline at end of file diff --git a/modules/local/fastqc/fastqc.yaml b/modules/nf-core/multiqc/environment.yml similarity index 63% rename from modules/local/fastqc/fastqc.yaml rename to modules/nf-core/multiqc/environment.yml index eef652b..e1d226f 100644 --- a/modules/local/fastqc/fastqc.yaml +++ b/modules/nf-core/multiqc/environment.yml @@ -1,6 +1,5 @@ -name: fastqc channels: - - bioconda - conda-forge + - bioconda dependencies: - - fastqc=0.11.8 + - bioconda::multiqc=1.25.1 \ No newline at end of file diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf new file mode 100644 index 0000000..a91446d --- /dev/null +++ b/modules/nf-core/multiqc/main.nf @@ -0,0 +1,63 @@ +process MULTIQC { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.25.1--pyhdfd78af_0' : + 'biocontainers/multiqc:1.25.1--pyhdfd78af_0' }" + + input: + path multiqc_files, stageAs: "?/*" + path(multiqc_config) + path(extra_multiqc_config) + path(multiqc_logo) + path(replace_names) + path(sample_names) + + output: + path "*multiqc_report.html", emit: report + path "*_data" , emit: data + path "*_plots" , optional:true, emit: plots + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : '' + def config = multiqc_config ? "--config $multiqc_config" : '' + def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : '' + def replace = replace_names ? "--replace-names ${replace_names}" : '' + def samples = sample_names ? "--sample-names ${sample_names}" : '' + """ + multiqc \\ + --force \\ + $args \\ + $config \\ + $prefix \\ + $extra_config \\ + $logo \\ + $replace \\ + $samples \\ + . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ + + stub: + """ + mkdir multiqc_data + touch multiqc_plots + touch multiqc_report.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml new file mode 100644 index 0000000..2621b2a --- /dev/null +++ b/modules/nf-core/multiqc/meta.yml @@ -0,0 +1,78 @@ +name: multiqc +description: Aggregate results from bioinformatics analyses across many samples into + a single report +keywords: + - QC + - bioinformatics tools + - Beautiful stand-alone HTML report +tools: + - multiqc: + description: | + MultiQC searches a given directory for analysis logs and compiles a HTML report. + It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. + homepage: https://multiqc.info/ + documentation: https://multiqc.info/docs/ + licence: ["GPL-3.0-or-later"] + identifier: biotools:multiqc +input: + - - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections + in multiqc_config. + pattern: "*.{yml,yaml}" + - - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + - - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + - - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" +output: + - report: + - "*multiqc_report.html": + type: file + description: MultiQC report file + pattern: "multiqc_report.html" + - data: + - "*_data": + type: directory + description: MultiQC data dir + pattern: "multiqc_data" + - plots: + - "*_plots": + type: file + description: Plots created by MultiQC + pattern: "*_data" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" \ No newline at end of file diff --git a/modules/nf-core/prodigal/environment.yml b/modules/nf-core/prodigal/environment.yml new file mode 100644 index 0000000..b9455d6 --- /dev/null +++ b/modules/nf-core/prodigal/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::prodigal=2.6.3 + - conda-forge::pigz=2.6 \ No newline at end of file diff --git a/modules/nf-core/prodigal/main.nf b/modules/nf-core/prodigal/main.nf new file mode 100644 index 0000000..916f97e --- /dev/null +++ b/modules/nf-core/prodigal/main.nf @@ -0,0 +1,64 @@ +process PRODIGAL { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-2e442ba7b07bfa102b9cf8fac6221263cd746ab8:57f05cfa73f769d6ed6d54144cb3aa2a6a6b17e0-0' : + 'biocontainers/mulled-v2-2e442ba7b07bfa102b9cf8fac6221263cd746ab8:57f05cfa73f769d6ed6d54144cb3aa2a6a6b17e0-0' }" + + input: + tuple val(meta), path(genome) + val(output_format) + + output: + tuple val(meta), path("${prefix}.${output_format}.gz"), emit: gene_annotations + tuple val(meta), path("${prefix}.fna.gz"), emit: nucleotide_fasta + tuple val(meta), path("${prefix}.faa.gz"), emit: amino_acid_fasta + tuple val(meta), path("${prefix}_all.txt.gz"), emit: all_gene_annotations + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + pigz -cdf ${genome} | prodigal \\ + $args \\ + -f $output_format \\ + -d "${prefix}.fna" \\ + -o "${prefix}.${output_format}" \\ + -a "${prefix}.faa" \\ + -s "${prefix}_all.txt" + + pigz -nm ${prefix}.fna + pigz -nm ${prefix}.${output_format} + pigz -nm ${prefix}.faa + pigz -nm ${prefix}_all.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + prodigal: \$(prodigal -v 2>&1 | sed -n 's/Prodigal V\\(.*\\):.*/\\1/p') + pigz: \$(pigz -V 2>&1 | sed 's/pigz //g') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.fna.gz + touch ${prefix}.${output_format}.gz + touch ${prefix}.faa.gz + touch ${prefix}_all.txt.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + prodigal: \$(prodigal -v 2>&1 | sed -n 's/Prodigal V\\(.*\\):.*/\\1/p') + pigz: \$(pigz -V 2>&1 | sed 's/pigz //g') + END_VERSIONS + """ + +} \ No newline at end of file diff --git a/modules/nf-core/prodigal/meta.yml b/modules/nf-core/prodigal/meta.yml new file mode 100644 index 0000000..d59ff5c --- /dev/null +++ b/modules/nf-core/prodigal/meta.yml @@ -0,0 +1,79 @@ +name: prodigal +description: Prodigal (Prokaryotic Dynamic Programming Genefinding Algorithm) is a + microbial (bacterial and archaeal) gene finding program +keywords: + - prokaryotes + - gene finding + - microbial +tools: + - prodigal: + description: Prodigal (Prokaryotic Dynamic Programming Genefinding Algorithm) + is a microbial (bacterial and archaeal) gene finding program + homepage: https://github.com/hyattpd/Prodigal + documentation: https://github.com/hyattpd/prodigal/wiki + tool_dev_url: https://github.com/hyattpd/Prodigal + doi: "10.1186/1471-2105-11-119" + licence: ["GPL v3"] + identifier: biotools:prodigal +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - genome: + type: file + description: fasta/fasta.gz file + - - output_format: + type: string + description: Output format ("gbk"/"gff"/"sqn"/"sco") +output: + - gene_annotations: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${output_format}.gz: + type: file + description: gene annotations in output_format given as input + pattern: "*.{output_format}" + - nucleotide_fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.fna.gz: + type: file + description: nucleotide sequences file + pattern: "*.{fna}" + - amino_acid_fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.faa.gz: + type: file + description: protein translations file + pattern: "*.{faa}" + - all_gene_annotations: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}_all.txt.gz: + type: file + description: complete starts file + pattern: "*.{_all.txt}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@grst" +maintainers: + - "@grst" \ No newline at end of file diff --git a/modules/nf-core/spades/environment.yml b/modules/nf-core/spades/environment.yml new file mode 100644 index 0000000..569eb73 --- /dev/null +++ b/modules/nf-core/spades/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::spades=4.0.0 \ No newline at end of file diff --git a/modules/nf-core/spades/main.nf b/modules/nf-core/spades/main.nf new file mode 100644 index 0000000..46f11c2 --- /dev/null +++ b/modules/nf-core/spades/main.nf @@ -0,0 +1,102 @@ +process SPADES { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/spades:4.0.0--h5fb382e_1' : + 'biocontainers/spades:4.0.0--h5fb382e_1' }" + + input: + tuple val(meta), path(illumina), path(pacbio), path(nanopore) + path yml + path hmm + + output: + tuple val(meta), path('*.scaffolds.fa.gz') , optional:true, emit: scaffolds + tuple val(meta), path('*.contigs.fa.gz') , optional:true, emit: contigs + tuple val(meta), path('*.transcripts.fa.gz') , optional:true, emit: transcripts + tuple val(meta), path('*.gene_clusters.fa.gz'), optional:true, emit: gene_clusters + tuple val(meta), path('*.assembly.gfa.gz') , optional:true, emit: gfa + tuple val(meta), path('*.warnings.log') , optional:true, emit: warnings + tuple val(meta), path('*.spades.log') , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def maxmem = task.memory.toGiga() + def illumina_reads = illumina ? ( meta.single_end ? "-s $illumina" : "-1 ${illumina[0]} -2 ${illumina[1]}" ) : "" + def pacbio_reads = pacbio ? "--pacbio $pacbio" : "" + def nanopore_reads = nanopore ? "--nanopore $nanopore" : "" + def custom_hmms = hmm ? "--custom-hmms $hmm" : "" + def reads = yml ? "--dataset $yml" : "$illumina_reads $pacbio_reads $nanopore_reads" + """ + spades.py \\ + $args \\ + --threads $task.cpus \\ + --memory $maxmem \\ + $custom_hmms \\ + $reads \\ + -o ./ + mv spades.log ${prefix}.spades.log + + if [ -f scaffolds.fasta ]; then + mv scaffolds.fasta ${prefix}.scaffolds.fa + gzip -n ${prefix}.scaffolds.fa + fi + if [ -f contigs.fasta ]; then + mv contigs.fasta ${prefix}.contigs.fa + gzip -n ${prefix}.contigs.fa + fi + if [ -f transcripts.fasta ]; then + mv transcripts.fasta ${prefix}.transcripts.fa + gzip -n ${prefix}.transcripts.fa + fi + if [ -f assembly_graph_with_scaffolds.gfa ]; then + mv assembly_graph_with_scaffolds.gfa ${prefix}.assembly.gfa + gzip -n ${prefix}.assembly.gfa + fi + + if [ -f gene_clusters.fasta ]; then + mv gene_clusters.fasta ${prefix}.gene_clusters.fa + gzip -n ${prefix}.gene_clusters.fa + fi + + if [ -f warnings.log ]; then + mv warnings.log ${prefix}.warnings.log + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + spades: \$(spades.py --version 2>&1 | sed -n 's/^.*SPAdes genome assembler v//p') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def maxmem = task.memory.toGiga() + def illumina_reads = illumina ? ( meta.single_end ? "-s $illumina" : "-1 ${illumina[0]} -2 ${illumina[1]}" ) : "" + def pacbio_reads = pacbio ? "--pacbio $pacbio" : "" + def nanopore_reads = nanopore ? "--nanopore $nanopore" : "" + def custom_hmms = hmm ? "--custom-hmms $hmm" : "" + def reads = yml ? "--dataset $yml" : "$illumina_reads $pacbio_reads $nanopore_reads" + """ + echo "" | gzip > ${prefix}.scaffolds.fa.gz + echo "" | gzip > ${prefix}.contigs.fa.gz + echo "" | gzip > ${prefix}.transcripts.fa.gz + echo "" | gzip > ${prefix}.gene_clusters.fa.gz + echo "" | gzip > ${prefix}.assembly.gfa.gz + touch ${prefix}.spades.log + touch ${prefix}.warnings.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + spades: \$(spades.py --version 2>&1 | sed -n 's/^.*SPAdes genome assembler v//p') + END_VERSIONS + """ +} \ No newline at end of file diff --git a/modules/nf-core/spades/meta.yml b/modules/nf-core/spades/meta.yml new file mode 100644 index 0000000..65d260d --- /dev/null +++ b/modules/nf-core/spades/meta.yml @@ -0,0 +1,151 @@ +name: spades +description: Assembles a small genome (bacterial, fungal, viral) +keywords: + - genome + - assembly + - genome assembler + - small genome + - de novo assembler +tools: + - spades: + description: SPAdes (St. Petersburg genome assembler) is intended for both standard + isolates and single-cell MDA bacteria assemblies. + homepage: http://cab.spbu.ru/files/release3.15.0/manual.html + documentation: http://cab.spbu.ru/files/release3.15.0/manual.html + tool_dev_url: https://github.com/ablab/spades + doi: 10.1089/cmb.2012.0021 + licence: ["GPL v2"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - illumina: + type: file + description: | + List of input FastQ (Illumina or PacBio CCS reads) files + of size 1 and 2 for single-end and paired-end data, + respectively. This input data type is required. + - pacbio: + type: file + description: | + List of input PacBio CLR FastQ files of size 1. + - nanopore: + type: file + description: | + List of input FastQ files of size 1, originating from Oxford Nanopore technology. + - - yml: + type: file + description: | + Path to yml file containing read information. + The raw FASTQ files listed in this YAML file MUST be supplied to the respective illumina/pacbio/nanopore input channel(s) _in addition_ to this YML. + File entries in this yml must contain only the file name and no paths. + pattern: "*.{yml,yaml}" + - - hmm: + type: file + description: File or directory with amino acid HMMs for Spades HMM-guided mode. +output: + - scaffolds: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.fa.gz" + - "*.scaffolds.fa.gz": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.fa.gz" + - contigs: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.fa.gz" + - "*.contigs.fa.gz": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.fa.gz" + - transcripts: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.fa.gz" + - "*.transcripts.fa.gz": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.fa.gz" + - gene_clusters: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.fa.gz" + - "*.gene_clusters.fa.gz": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.fa.gz" + - gfa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.gfa.gz" + - "*.assembly.gfa.gz": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.gfa.gz" + - warnings: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.warnings.log": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.spades.log" + - "*.spades.log": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.spades.log" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@JoseEspinosa" + - "@drpatelh" + - "@d4straub" +maintainers: + - "@JoseEspinosa" + - "@drpatelh" + - "@d4straub" \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 34fa99b..8a08d30 100755 --- a/nextflow.config +++ b/nextflow.config @@ -63,15 +63,30 @@ params { finaldir = '08-final' // location for autodownload data like databases - dbs = 'nextflow-autodownload-databases' + databases = 'nextflow-autodownload-databases' // optional profile configurations, mostly necessary for HPC execution [lsf, slurm] workdir = 'work' singularity_cachedir = 'singularity' publish_dir_mode = 'copy' + + // MultiQC options + multiqc_config = null + multiqc_title = null + multiqc_logo = null + multiqc_methods_description = null + + // Max resource options + // Defaults only, expecting to be overwritten + max_memory = '1.TB' + max_cpus = 32 + max_time = '168.h' // 7 days } +includeConfig 'configs/base.config' +includeConfig 'configs/modules.config' + profiles { //executors @@ -81,33 +96,28 @@ profiles { cpus = params.max_cores } workDir = params.workdir - params.databases = params.dbs params.cloudProcess = false includeConfig 'configs/local.config' } lsf { workDir = params.workdir - params.databases = params.dbs executor { name = "lsf" queueSize = 200 } params.cloudProcess = true process.cache = "lenient" - includeConfig 'configs/node.config' } slurm { workDir = params.workdir - params.databases = params.dbs executor { name = "slurm" queueSize = 200 } params.cloudProcess = true process.cache = "lenient" - includeConfig 'configs/node.config' } @@ -137,7 +147,6 @@ profiles { cpus = params.max_cores } workDir = params.workdir - params.databases = params.dbs params.cloudProcess = false includeConfig 'configs/local.config' docker { enabled = true } @@ -152,7 +161,6 @@ profiles { params.cloudProcess = true process.cache = "lenient" - includeConfig 'configs/node.config' singularity { enabled = true @@ -177,13 +185,38 @@ profiles { } params.cloudProcess = true - includeConfig 'configs/node.config' docker { enabled = true } + } +} - // we need a docker also for basic functionalities in the cloud - process { - withLabel: noDocker { cpus = 4; memory = '8.0 GB'; container = 'nanozoo/template:3.8--ccd0653' } +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj } } -} +} diff --git a/nextflow_schema.json b/nextflow_schema.json index 47a5461..3e400e2 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -148,19 +148,19 @@ "description": "Input parameters", "properties": { "virome": { - "type": "string", + "type": "boolean", "description": "VirSorter parameter, set when running a data set mostly composed of viruses" }, "chromomap": { - "type": "string", + "type": "boolean", "description": "feature to activate chromomap plot" }, "balloon": { - "type": "string", + "type": "boolean", "description": "feature to activate balloon plot" }, "onlyannotate": { - "type": "string", + "type": "boolean", "description": "Only annotate the input FASTA (no virus prediction, only contig length filtering)" }, "mashmap_len": { @@ -169,7 +169,7 @@ "description": "Mashmap mapping segment length, shorter sequences will be ignored" }, "mashmap": { - "type": "string", + "type": "boolean", "description": "Map the viral contigs against the provided reference" }, "evalue": { @@ -189,7 +189,7 @@ }, "factor": { "type": "string", - "default": "/Users/kates/Desktop/EBI/MGnify/pipelines/emg-viral-pipeline/references/viphogs_cds_per_taxon_cummulative.csv", + "default": "emg-viral-pipeline/references/viphogs_cds_per_taxon_cummulative.csv", "description": "Path to file with viral assemblies metadata, including taxon-specific factors" }, "sankey": { @@ -292,7 +292,7 @@ "fa_icon": "fas fa-dna", "description": "Nextflow arguments", "properties": { - "dbs": { + "databases": { "type": "string", "default": "nextflow-autodownload-databases", "description": "directory path to databases" @@ -307,13 +307,8 @@ "default": "singularity", "description": "singularity folder" }, - "databases": { - "type": "string", - "default": "nextflow-autodownload-databases", - "description": "directory path to databases" - }, "cloudProcess": { - "type": "string", + "type": "boolean", "description": "run on cloud" } } diff --git a/subworkflows/local/annotate.nf b/subworkflows/local/annotate.nf index 57810b2..6c672ce 100644 --- a/subworkflows/local/annotate.nf +++ b/subworkflows/local/annotate.nf @@ -69,10 +69,10 @@ workflow ANNOTATE { ANNOTATION( RATIO_EVALUE.out ) // plot visuals --> PDFs - PLOT_CONTIG_MAP( ANNOTATION.out ) + PLOT_CONTIG_MAP( ANNOTATION.out.annotations ) // assign lineages - ASSIGN( ANNOTATION.out, ncbi_db, factor_file ) + ASSIGN( ANNOTATION.out.annotations, ncbi_db, factor_file ) // blast IMG/VR for more information if (params.blastextend) { @@ -93,25 +93,22 @@ workflow ANNOTATE { } CHECKV( - predicted_contigs.combine( contigs.map { name, fasta -> fasta }), - checkv_db + predicted_contigs, + checkv_db.first() ) - - viphos_annotations = ANNOTATION.out.map { _, __, annotations -> annotations }.collect() - taxonomy_annotations = ASSIGN.out.map { _, __, taxonomy -> taxonomy }.collect() - checkv_results = CHECKV.out.map { _, __, quality_summary, ___ -> quality_summary }.collect() + + viphos_annotations = ANNOTATION.out.annotations.map{meta, type, annotation -> [meta, annotation]}.groupTuple() + taxonomy_annotations = ASSIGN.out.map{meta, type, annotation -> [meta, annotation]}.groupTuple() + checkv_results = CHECKV.out.map{meta, type, quality -> [meta, quality]}.groupTuple() WRITE_GFF( - contigs.first(), - viphos_annotations, - taxonomy_annotations, - checkv_results + contigs.join(viphos_annotations).join(taxonomy_annotations).join(checkv_results) ) - - predicted_contigs_filtered = predicted_contigs.map { id, set_name, fasta -> [set_name, id, fasta] } - plot_contig_map_filtered = PLOT_CONTIG_MAP.out.map { id, set_name, dir, table -> [set_name, table] } + + predicted_contigs_filtered = predicted_contigs.map { meta, set_name, fasta -> [set_name, meta, fasta] } + plot_contig_map_filtered = PLOT_CONTIG_MAP.out.map { meta, set_name, dir, table -> [set_name, table] } chromomap_ch = predicted_contigs_filtered.join(plot_contig_map_filtered).map { set_name, assembly_name, fasta, tsv -> [assembly_name, set_name, fasta, tsv]} - + emit: assign_output = ASSIGN.out chromomap = chromomap_ch diff --git a/subworkflows/local/assemble_illumina.nf b/subworkflows/local/assemble_illumina.nf index 7fe36be..d45aa6c 100644 --- a/subworkflows/local/assemble_illumina.nf +++ b/subworkflows/local/assemble_illumina.nf @@ -2,24 +2,39 @@ Optional assembly step, not fully implemented and tested. */ -include { FASTP } from '../../modules/local/fastp' -include { FASTQC } from '../../modules/local/fastqc' -include { MULTIQC } from '../../modules/local/multiqc' -include { SPADES } from '../../modules/local/spades' +include { FASTP } from '../../modules/nf-core/fastp' +include { FASTQC as FASTQC_BEFORE } from '../../modules/nf-core/fastqc' +include { FASTQC as FASTQC_AFTER } from '../../modules/nf-core/fastqc' +include { SPADES } from '../../modules/nf-core/spades' workflow ASSEMBLE_ILLUMINA { take: reads main: + // QC before filtering + FASTQC_BEFORE(reads) + // trimming - FASTP(reads) + FASTP( + reads, + [], + false, + false, + false + ) + + // QC after filtering + FASTQC_AFTER(FASTP.out.reads) - // read QC - MULTIQC(FASTQC(FSATP.out)) - // assembly - SPADES(FASTP.out) + SPADES(FASTP.out.reads) + + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix( FASTQC_BEFORE.out.zip.collect{it[1]}.ifEmpty([]) ) + ch_multiqc_files = ch_multiqc_files.mix( FASTP.out.json.collect{it[1]}.ifEmpty([]) ) + ch_multiqc_files = ch_multiqc_files.mix( FASTQC_AFTER.out.zip.collect{it[1]}.ifEmpty([]) ) emit: - assembly = SPADES.out + assembly = SPADES.out.contigs + ch_multiqc_files = ch_multiqc_files } \ No newline at end of file diff --git a/subworkflows/local/detect.nf b/subworkflows/local/detect.nf index bc4a37e..ea9fd64 100644 --- a/subworkflows/local/detect.nf +++ b/subworkflows/local/detect.nf @@ -18,14 +18,12 @@ workflow DETECT { main: - renamed_ch = assembly_renamed_length_filtered.map {name, renamed_fasta, map, _, __ -> { - tuple(name, renamed_fasta, map) - } + renamed_ch = assembly_renamed_length_filtered.map { + meta, renamed_fasta, map, _, __ -> tuple(meta, renamed_fasta, map) } - length_filtered_ch = assembly_renamed_length_filtered.map { name, _, __, filtered_fasta, contig_number -> { - tuple(name, filtered_fasta, contig_number) - } + length_filtered_ch = assembly_renamed_length_filtered.map { + meta, _, __, filtered_fasta, contig_number -> tuple(meta, filtered_fasta, contig_number) } // virus detection --> VirSorter, VirFinder and PPR-Meta @@ -37,5 +35,5 @@ workflow DETECT { PARSE( length_filtered_ch.join( VIRFINDER.out ).join( VIRSORTER.out ).join( PPRMETA.out ) ) emit: - detect_output = PARSE.out.join(renamed_ch).transpose().map{ name, fasta, vs_meta, log, renamed_fasta, map -> tuple (name, fasta, map) } + detect_output = PARSE.out.join(renamed_ch).transpose().map{ meta, fasta, vs_meta, log, renamed_fasta, map -> tuple (meta, fasta, map) } } \ No newline at end of file diff --git a/subworkflows/local/preprocess.nf b/subworkflows/local/preprocess.nf index 86cb231..efff437 100644 --- a/subworkflows/local/preprocess.nf +++ b/subworkflows/local/preprocess.nf @@ -12,12 +12,12 @@ workflow PREPROCESS { main: - RENAME(assembly) + RENAME(assembly) // out: (meta, renamed.fasta, map) // filter contigs by length - LENGTH_FILTERING(RENAME.out) + LENGTH_FILTERING(RENAME.out) // out: (meta, filt_fasta, env) emit: - // tuple val(name), file("${name}_renamed.fasta"), file("${name}_map.tsv"), file("${name}*filt*.fasta"), env(CONTIGS) + // tuple val(meta), file("${meta.id}_renamed.fasta"), file("${meta.id}_map.tsv"), file("${meta.id}*filt*.fasta"), env(CONTIGS) preprocessed_data = RENAME.out.join(LENGTH_FILTERING.out, by: 0) } \ No newline at end of file diff --git a/workflows/virify.nf b/workflows/virify.nf index a892dc0..3344c2b 100755 --- a/workflows/virify.nf +++ b/workflows/virify.nf @@ -4,19 +4,27 @@ * INPUT CHANNELS **************************/ -input_ch = Channel.empty() -mashmap_ref_ch = Channel.empty() -factor_file = Channel.empty() +input_ch = Channel.empty() +mashmap_ref_ch = Channel.empty() +factor_file = Channel.empty() +ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.fromPath("$projectDir/assets/mgnify_logo.png") +ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + include { samplesheetToList } from 'plugin/nf-schema' if ( params.samplesheet ) { groupReads = { id, assembly, fq1, fq2 -> if (fq1 == []) { - return tuple(id, assembly) + return tuple(["id": id], + assembly + ) } else { if (params.assemble) { - return tuple(id, [fq1, fq2]) + return tuple(["id": id], + [fq1, fq2]) } else { exit 1, "input missing, use [--assemble] flag with raw reads" @@ -26,10 +34,11 @@ if ( params.samplesheet ) { samplesheet = Channel.fromList(samplesheetToList(params.samplesheet, "./assets/schema_input.json")) input_ch = samplesheet.map(groupReads) } + // one sample of assembly if (params.fasta) { input_ch = Channel.fromPath( params.fasta, checkIfExists: true) - .map { file -> tuple(file.simpleName, file) } + .map { file -> tuple(["id": file.simpleName], file) } } // mashmap input @@ -41,6 +50,11 @@ if (params.mashmap) { if (params.factor) { factor_file = file( params.factor, checkIfExists: true) } +/************************** +* SUB WORKFLOWS +**************************/ + +include { MULTIQC } from '../modules/nf-core/multiqc' /************************** * SUB WORKFLOWS @@ -83,6 +97,7 @@ workflow VIRIFY { } // ----------- rename fasta + length filtering + // out: (meta, renamed_fasta, map, filtered_fasta, env) PREPROCESS( assembly_ch ) // ----------- if --onlyannotate - skip DETECT step @@ -98,11 +113,12 @@ workflow VIRIFY { DOWNLOAD_DATABASES.out.virfinder_db, DOWNLOAD_DATABASES.out.pprmeta_git ) - postprocess_input_ch = DETECT.out + // (meta, fasta, map) + postprocess_input_ch = DETECT.out.detect_output } // ----------- POSTPROCESS: restore fasta file - POSTPROCESS(postprocess_input_ch) + POSTPROCESS(postprocess_input_ch) // out: (meta, type(HC/LC/PP), fasta) // ----------- ANNOTATE ANNOTATE( @@ -126,5 +142,15 @@ workflow VIRIFY { ANNOTATE.out.assign_output, ANNOTATE.out.chromomap ) + + if (params.assemble) { + ch_multiqc_files = ASSEMBLE_ILLUMINA.out.ch_multiqc_files + MULTIQC( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() + ) + } }