Skip to content

Commit

Permalink
Version 2.1.1; run DIAMOND with native --threads parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisjackson-pellicle committed Dec 12, 2022
1 parent f5876e6 commit 274dcbe
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 21 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# HybPiper

Current version: 2.1.0 (December 2022)
Current version: 2.1.1 (December 2022)

[![DOI](https://zenodo.org/badge/6513/mossmatters/HybPiper.svg)](https://zenodo.org/badge/latestdoi/6513/mossmatters/HybPiper)

Expand Down
5 changes: 5 additions & 0 deletions change_log.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Changelog

**2.1.1** *12th December, 2022*

- When mapping reads with DIAMOND via `hybpiper assemble --diamond`, remove the `gunzip` step and on-the-fly fastq to fasta conversion (as DIAMOND supports both `*.fastq` and `*.gz` input). Further, pass the value of the `hybpiper assemble` parameter `--cpu` directly to the `--threads` parameter of the `diamond blastx` command; do not run `diamond` via GNU parallel. See issue #104.


**2.1.0** *1st December, 2022*

- The subcommand `hybpiper check_targetfile` now writes a `*.ctl file`; see wiki for details.
Expand Down
45 changes: 26 additions & 19 deletions hybpiper/assemble.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#!/usr/bin/env python

"""
HybPiper Version 2.1.0 (December 2022)
HybPiper Version 2.1.1 (December 2022)
########################################################################################################################
############################################## NOTES ON VERSION 2.1.0 ##################################################
############################################## NOTES ON VERSION 2.1.1 ##################################################
########################################################################################################################
After installation of the pipeline, all pipeline commands are now accessed via the main command 'hybpiper',
Expand Down Expand Up @@ -503,7 +503,7 @@ def blastx(readfiles, targetfile, evalue, basename, cpu=None, max_target_seqs=10
if os.path.isfile(f'{targetfile_basename}.psq'):
db_file = targetfile_basename
logger.debug(f'Using existing BLAST database. db_file is: {db_file}')
elif os.path.isfile(f'{targetfile_basename}.diamond'):
elif os.path.isfile(f'{targetfile_basename}.dmnd'):
db_file = targetfile_basename
logger.debug(f'Using existing DIAMOND BLAST database. db_file is: {db_file}')
else:
Expand Down Expand Up @@ -547,24 +547,28 @@ def blastx(readfiles, targetfile, evalue, basename, cpu=None, max_target_seqs=10
read_file = readfiles
# Check if read file is gzipped:
filename, file_extension = os.path.splitext(read_file)
if file_extension == '.gz':
if file_extension == '.gz' and not diamond:
logger.debug(f'Processing gzipped file {os.path.basename(read_file)}')
pipe_cmd = f"gunzip -c {read_file} | awk '{{if(NR % 4 == 1 || NR % 4 == 2) {{sub(/@/, \">\"); print; }} " \
f"}}'"
else:
elif not diamond:
pipe_cmd = f"cat {read_file} | awk '{{if(NR % 4 == 1 || NR % 4 == 2) {{sub(/@/, \">\"); print; }} }}'"

if diamond and diamond_sensitivity:
blastx_command = f'diamond blastx --db {db_file} --query - --evalue {evalue} --outfmt 6 --max-target-seqs' \
f' {max_target_seqs} --{diamond_sensitivity}'
blastx_command = f'diamond blastx --threads {cpu} --db {db_file} --query {read_file} --evalue {evalue} ' \
f'--outfmt 6 --max-target-seqs {max_target_seqs} --{diamond_sensitivity}'
elif diamond:
blastx_command = f'diamond blastx --db {db_file} --query - --evalue {evalue} --outfmt 6 --max-target-seqs' \
f' {max_target_seqs}'
blastx_command = f'diamond blastx --threads {cpu} --db {db_file} --query {read_file} --evalue {evalue} ' \
f'--outfmt 6 --max-target-seqs {max_target_seqs}'
else:
blastx_command = f'blastx -db {db_file} -query - -evalue {evalue} -outfmt 6 -max_target_seqs' \
f' {max_target_seqs}'

full_command = f"{pipe_cmd} | parallel -j {cpu} -k --block 200K --recstart '>' --pipe '{blastx_command}' >>" \
f" {basename}_unpaired.blastx"
if not diamond:
full_command = f"{pipe_cmd} | parallel -j {cpu} -k --block 200K --recstart '>' --pipe '{blastx_command}' " \
f">> {basename}_unpaired.blastx"
else:
full_command = f"{blastx_command} >> {basename}_unpaired.blastx"

fill = utils.fill_forward_slash(f'{"[CMD]:":10} {full_command}', width=90, subsequent_indent=' ' * 11,
break_long_words=False, break_on_forward_slash=True)
Expand All @@ -589,25 +593,28 @@ def blastx(readfiles, targetfile, evalue, basename, cpu=None, max_target_seqs=10
for read_file in readfiles:
# Check if read file is gzipped:
filename, file_extension = os.path.splitext(read_file)
if file_extension == '.gz':
if file_extension == '.gz' and not diamond:
logger.debug(f'Processing gzipped file {os.path.basename(read_file)}')
pipe_cmd = f"gunzip -c {read_file} | awk '{{if(NR % 4 == 1 || NR % 4 == 2) {{sub(/@/, \">\"); print; " \
f"}} }}'"
else:
pipe_cmd = f"cat {read_file} | awk '{{if(NR % 4 == 1 || NR % 4 == 2) {{sub(/@/, \">\"); print; }} }}'"

if diamond and diamond_sensitivity:
blastx_command = f'diamond blastx --db {db_file} --query - --evalue {evalue} --outfmt 6 ' \
f'--max-target-seqs {max_target_seqs} --{diamond_sensitivity}'
blastx_command = f'diamond blastx --threads {cpu} --db {db_file} --query {read_file} --evalue' \
f' {evalue} --outfmt 6 --max-target-seqs {max_target_seqs} --{diamond_sensitivity}'
elif diamond:
blastx_command = f'diamond blastx --db {db_file} --query - --evalue {evalue} --outfmt 6 ' \
f'--max-target-seqs {max_target_seqs}'
blastx_command = f'diamond blastx --threads {cpu} --db {db_file} --query {read_file} --evalue' \
f' {evalue} --outfmt 6 --max-target-seqs {max_target_seqs}'
else:
blastx_command = f'blastx -db {db_file} -query - -evalue {evalue} -outfmt 6 -max_target_seqs' \
f' {max_target_seqs}'

full_command = f"{pipe_cmd} | parallel -j {cpu} -k --block 200K --recstart '>' --pipe " \
f"'{blastx_command}' >> {basename}.blastx"
if not diamond:
full_command = f"{pipe_cmd} | parallel -j {cpu} -k --block 200K --recstart '>' --pipe " \
f"'{blastx_command}' >> {basename}.blastx"
else:
full_command = f"{blastx_command} >> {basename}.blastx"

fill = utils.fill_forward_slash(f'{"[CMD]:":10} {full_command}', width=90, subsequent_indent=' ' * 11,
break_long_words=False, break_on_forward_slash=True)
Expand Down Expand Up @@ -1712,7 +1719,7 @@ def parse_arguments():
group_1.add_argument('--version', '-v',
dest='version',
action='version',
version='%(prog)s 2.1.0',
version='%(prog)s 2.1.1',
help='Print the HybPiper version number.')

# Add subparsers:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
hybpiper_entry_points = {'console_scripts': ['hybpiper = hybpiper.assemble:main']}

setuptools.setup(name='hybpiper',
version='2.1.0',
version='2.1.1',
packages=setuptools.find_packages(),
author='Chris Jackson, Matt Johnson',
author_email='[email protected]',
Expand Down

0 comments on commit 274dcbe

Please sign in to comment.