From 274dcbe71333b6fffae19f48c78dc4a28079cc83 Mon Sep 17 00:00:00 2001 From: chrisjackson Date: Mon, 12 Dec 2022 11:16:20 +1100 Subject: [PATCH] Version 2.1.1; run DIAMOND with native --threads parameter --- README.md | 2 +- change_log.md | 5 +++++ hybpiper/assemble.py | 45 +++++++++++++++++++++++++------------------- setup.py | 2 +- 4 files changed, 33 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index e1d34c7..c09daca 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # HybPiper -Current version: 2.1.0 (December 2022) +Current version: 2.1.1 (December 2022) [![DOI](https://zenodo.org/badge/6513/mossmatters/HybPiper.svg)](https://zenodo.org/badge/latestdoi/6513/mossmatters/HybPiper) diff --git a/change_log.md b/change_log.md index 4d84473..a8b7018 100644 --- a/change_log.md +++ b/change_log.md @@ -1,5 +1,10 @@ # Changelog +**2.1.1** *12th December, 2022* + +- When mapping reads with DIAMOND via `hybpiper assemble --diamond`, remove the `gunzip` step and on-the-fly fastq to fasta conversion (as DIAMOND supports both `*.fastq` and `*.gz` input). Further, pass the value of the `hybpiper assemble` parameter `--cpu` directly to the `--threads` parameter of the `diamond blastx` command; do not run `diamond` via GNU parallel. See issue #104. + + **2.1.0** *1st December, 2022* - The subcommand `hybpiper check_targetfile` now writes a `*.ctl file`; see wiki for details. diff --git a/hybpiper/assemble.py b/hybpiper/assemble.py index b8eb9bb..59babcd 100755 --- a/hybpiper/assemble.py +++ b/hybpiper/assemble.py @@ -1,10 +1,10 @@ #!/usr/bin/env python """ -HybPiper Version 2.1.0 (December 2022) +HybPiper Version 2.1.1 (December 2022) ######################################################################################################################## -############################################## NOTES ON VERSION 2.1.0 ################################################## +############################################## NOTES ON VERSION 2.1.1 ################################################## ######################################################################################################################## After installation of the pipeline, all pipeline commands are now accessed via the main command 'hybpiper', @@ -503,7 +503,7 @@ def blastx(readfiles, targetfile, evalue, basename, cpu=None, max_target_seqs=10 if os.path.isfile(f'{targetfile_basename}.psq'): db_file = targetfile_basename logger.debug(f'Using existing BLAST database. db_file is: {db_file}') - elif os.path.isfile(f'{targetfile_basename}.diamond'): + elif os.path.isfile(f'{targetfile_basename}.dmnd'): db_file = targetfile_basename logger.debug(f'Using existing DIAMOND BLAST database. db_file is: {db_file}') else: @@ -547,24 +547,28 @@ def blastx(readfiles, targetfile, evalue, basename, cpu=None, max_target_seqs=10 read_file = readfiles # Check if read file is gzipped: filename, file_extension = os.path.splitext(read_file) - if file_extension == '.gz': + if file_extension == '.gz' and not diamond: logger.debug(f'Processing gzipped file {os.path.basename(read_file)}') pipe_cmd = f"gunzip -c {read_file} | awk '{{if(NR % 4 == 1 || NR % 4 == 2) {{sub(/@/, \">\"); print; }} " \ f"}}'" - else: + elif not diamond: pipe_cmd = f"cat {read_file} | awk '{{if(NR % 4 == 1 || NR % 4 == 2) {{sub(/@/, \">\"); print; }} }}'" + if diamond and diamond_sensitivity: - blastx_command = f'diamond blastx --db {db_file} --query - --evalue {evalue} --outfmt 6 --max-target-seqs' \ - f' {max_target_seqs} --{diamond_sensitivity}' + blastx_command = f'diamond blastx --threads {cpu} --db {db_file} --query {read_file} --evalue {evalue} ' \ + f'--outfmt 6 --max-target-seqs {max_target_seqs} --{diamond_sensitivity}' elif diamond: - blastx_command = f'diamond blastx --db {db_file} --query - --evalue {evalue} --outfmt 6 --max-target-seqs' \ - f' {max_target_seqs}' + blastx_command = f'diamond blastx --threads {cpu} --db {db_file} --query {read_file} --evalue {evalue} ' \ + f'--outfmt 6 --max-target-seqs {max_target_seqs}' else: blastx_command = f'blastx -db {db_file} -query - -evalue {evalue} -outfmt 6 -max_target_seqs' \ f' {max_target_seqs}' - full_command = f"{pipe_cmd} | parallel -j {cpu} -k --block 200K --recstart '>' --pipe '{blastx_command}' >>" \ - f" {basename}_unpaired.blastx" + if not diamond: + full_command = f"{pipe_cmd} | parallel -j {cpu} -k --block 200K --recstart '>' --pipe '{blastx_command}' " \ + f">> {basename}_unpaired.blastx" + else: + full_command = f"{blastx_command} >> {basename}_unpaired.blastx" fill = utils.fill_forward_slash(f'{"[CMD]:":10} {full_command}', width=90, subsequent_indent=' ' * 11, break_long_words=False, break_on_forward_slash=True) @@ -589,7 +593,7 @@ def blastx(readfiles, targetfile, evalue, basename, cpu=None, max_target_seqs=10 for read_file in readfiles: # Check if read file is gzipped: filename, file_extension = os.path.splitext(read_file) - if file_extension == '.gz': + if file_extension == '.gz' and not diamond: logger.debug(f'Processing gzipped file {os.path.basename(read_file)}') pipe_cmd = f"gunzip -c {read_file} | awk '{{if(NR % 4 == 1 || NR % 4 == 2) {{sub(/@/, \">\"); print; " \ f"}} }}'" @@ -597,17 +601,20 @@ def blastx(readfiles, targetfile, evalue, basename, cpu=None, max_target_seqs=10 pipe_cmd = f"cat {read_file} | awk '{{if(NR % 4 == 1 || NR % 4 == 2) {{sub(/@/, \">\"); print; }} }}'" if diamond and diamond_sensitivity: - blastx_command = f'diamond blastx --db {db_file} --query - --evalue {evalue} --outfmt 6 ' \ - f'--max-target-seqs {max_target_seqs} --{diamond_sensitivity}' + blastx_command = f'diamond blastx --threads {cpu} --db {db_file} --query {read_file} --evalue' \ + f' {evalue} --outfmt 6 --max-target-seqs {max_target_seqs} --{diamond_sensitivity}' elif diamond: - blastx_command = f'diamond blastx --db {db_file} --query - --evalue {evalue} --outfmt 6 ' \ - f'--max-target-seqs {max_target_seqs}' + blastx_command = f'diamond blastx --threads {cpu} --db {db_file} --query {read_file} --evalue' \ + f' {evalue} --outfmt 6 --max-target-seqs {max_target_seqs}' else: blastx_command = f'blastx -db {db_file} -query - -evalue {evalue} -outfmt 6 -max_target_seqs' \ f' {max_target_seqs}' - full_command = f"{pipe_cmd} | parallel -j {cpu} -k --block 200K --recstart '>' --pipe " \ - f"'{blastx_command}' >> {basename}.blastx" + if not diamond: + full_command = f"{pipe_cmd} | parallel -j {cpu} -k --block 200K --recstart '>' --pipe " \ + f"'{blastx_command}' >> {basename}.blastx" + else: + full_command = f"{blastx_command} >> {basename}.blastx" fill = utils.fill_forward_slash(f'{"[CMD]:":10} {full_command}', width=90, subsequent_indent=' ' * 11, break_long_words=False, break_on_forward_slash=True) @@ -1712,7 +1719,7 @@ def parse_arguments(): group_1.add_argument('--version', '-v', dest='version', action='version', - version='%(prog)s 2.1.0', + version='%(prog)s 2.1.1', help='Print the HybPiper version number.') # Add subparsers: diff --git a/setup.py b/setup.py index bd58153..3c52ca4 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ hybpiper_entry_points = {'console_scripts': ['hybpiper = hybpiper.assemble:main']} setuptools.setup(name='hybpiper', - version='2.1.0', + version='2.1.1', packages=setuptools.find_packages(), author='Chris Jackson, Matt Johnson', author_email='chris.jackson@rbg.vic.gov.au',