Skip to content

Commit

Permalink
add support for unmmapped bams (#98)
Browse files Browse the repository at this point in the history
* add support for unmmapped bams

* fix _validate_bam_file for ubam

* Add tests to make sure proper calls made with unmapped option

---------

Co-authored-by: Yash Patel <[email protected]>
  • Loading branch information
zhuchcn and yashpatel6 authored Jun 25, 2024
1 parent 05ca2ea commit cc1d3d4
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 14 deletions.
4 changes: 2 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
## [Unreleased]
### Added
- Add validation flowchart

- Add support for unmapped BAM
---

## [5.0.0] - 2024-02-16
Expand Down Expand Up @@ -72,7 +72,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm

### Fixed

### Removed
### Removed
- Remove deprecated parameter options from README
- Remove inaccessible design doc link from README
- Remove directory checking
Expand Down
4 changes: 3 additions & 1 deletion pipeval/validate/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def positive_integer(arg):

def add_subparser_validate(subparsers:argparse._SubParsersAction):
""" Parse arguments """
parser = subparsers.add_parser(
parser:argparse.ArgumentParser = subparsers.add_parser(
name = 'validate',
help = 'Validate one or more file(s)',
description = 'Validate one or more file(s)',
Expand All @@ -26,6 +26,8 @@ def add_subparser_validate(subparsers:argparse._SubParsersAction):
parser.add_argument('path', help='One or more paths of files to validate', type=str, nargs='+')
parser.add_argument('-r', '--cram-reference', default=None, \
help='Path to reference file for CRAM')
parser.add_argument('-u', '--unmapped-bam', action='store_true',
help='Input is unmmapped BAM.')
parser.add_argument('-p', '--processes', type=positive_integer, default=1, \
help='Number of processes to run in parallel when validating multiple files')
parser.add_argument('-t', '--test-integrity', action='store_true', \
Expand Down
1 change: 1 addition & 0 deletions pipeval/validate/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def _validate_file(
`args` must contain the following:
`path` is a required argument with a value of list of files
`cram_reference` is a required argument with either a string value or None
`unmapped_bam` is a required argument of boolean variable
'''
_path_exists(path)

Expand Down
2 changes: 1 addition & 1 deletion pipeval/validate/validate_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@

ValidateArgs = namedtuple(
'args',
'path, cram_reference, processes, test_integrity'
'path, cram_reference, unmapped_bam, processes, test_integrity'
)
16 changes: 12 additions & 4 deletions pipeval/validate/validators/bam.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,22 @@

from pipeval.validate.validate_types import ValidateArgs

def _validate_bam_file(path:Path):
def _validate_bam_file(path:Path, unmapped_bam:bool):
'''Validates bam file'''
args = [str(path)]
if unmapped_bam:
args.append('-u')
try:
pysam.quickcheck(str(path))
pysam.quickcheck(*args)
except pysam.SamtoolsError as err:
raise ValueError("samtools bam check failed. " + str(err)) from err

bam_head: pysam.IteratorRowHead = pysam.AlignmentFile(str(path)).head(1)
kwargs = {
'filename': str(path)
}
if unmapped_bam:
kwargs['check_sq'] = False
bam_head: pysam.IteratorRowHead = pysam.AlignmentFile(**kwargs).head(1)
if next(bam_head, None) is None:
raise ValueError("pysam bam check failed. No reads in " + str(path))

Expand All @@ -35,5 +43,5 @@ def _check_bam(path:Path, args:Union[ValidateArgs,Dict[str, Union[str,list]]]):
`args` must contains the following:
`cram_reference` is a required key with either a string value or None
'''
_validate_bam_file(path)
_validate_bam_file(path, args.unmapped_bam)
_check_bam_index(path)
68 changes: 62 additions & 6 deletions test/unit/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# pylint: disable=C0114
from pathlib import Path
from argparse import Namespace, ArgumentTypeError
from unittest.mock import Mock, mock_open
from unittest.mock import Mock, mock_open, MagicMock
import warnings
import zlib
import gzip
Expand Down Expand Up @@ -110,7 +110,13 @@ def test__path_exists__errors_for_non_existing_path(mock_path):
@mock.patch('pipeval.validate.files.Path', autospec=True)
def test__check_compressed__raises_warning_for_uncompressed_path(mock_path, mock_magic):
mock_magic.return_value = 'text/plain'
test_args = ValidateArgs(path=[], cram_reference=None, processes=1, test_integrity=False)
test_args = ValidateArgs(
path=[],
cram_reference=None,
unmapped_bam=False,
processes=1,
test_integrity=False
)

with pytest.warns(UserWarning):
_check_compressed(mock_path, test_args)
Expand All @@ -132,7 +138,13 @@ def test__check_compressed__passes_compression_check(
compression_mime):
mock_magic.return_value = compression_mime
mock_integrity.return_value = None
test_args = ValidateArgs(path=[], cram_reference=None, processes=1, test_integrity=False)
test_args = ValidateArgs(
path=[],
cram_reference=None,
unmapped_bam=False,
processes=1,
test_integrity=False
)

with warnings.catch_warnings():
warnings.filterwarnings("error")
Expand All @@ -147,14 +159,46 @@ def test__validate_bam_file__empty_bam_file(mock_pysam):
test_path = Path('empty/valid/bam')

with pytest.raises(ValueError):
_validate_bam_file(test_path)
_validate_bam_file(test_path, unmapped_bam=False)

@mock.patch('pipeval.validate.validators.bam.pysam')
def test__validate_bam_file__quickcheck_called_with_unmapped(mock_pysam):
mock_alignment_file = Mock()
mock_alignment_file.head.return_value = iter(['read1'])
mock_quickcheck = Mock()

mock_pysam.quickcheck = mock_quickcheck
mock_pysam.AlignmentFile.return_value = mock_alignment_file

test_path = 'empty/valid/bam'
test_unmapped_option = '-u'

_validate_bam_file(test_path, unmapped_bam=True)
mock_quickcheck.assert_called_with(test_path, test_unmapped_option)

@mock.patch('pipeval.validate.validators.bam.pysam')
def test__validate_bam_file__alignmentfile_called_with_unmapped(mock_pysam):
mock_alignment_file = MagicMock()
mock_alignment_file.__iter__.return_value = ['read1']
mock_quickcheck = Mock()

mock_pysam.quickcheck = mock_quickcheck
mock_pysam.AlignmentFile = mock_alignment_file

test_path = 'empty/valid/bam'
test_unmapped_option = False

_validate_bam_file(test_path, unmapped_bam=True)
mock_alignment_file.assert_called_with(
**{'filename': test_path, 'check_sq': test_unmapped_option}
)

@mock.patch('pipeval.validate.validators.bam.Path', autospec=True)
def test__validate_bam_file__quickcheck_fails(mock_path):
mock_path.exists.return_value = False

with pytest.raises(ValueError):
_validate_bam_file(mock_path)
_validate_bam_file(mock_path, unmapped_bam=False)

@mock.patch('pipeval.validate.validators.bam.pysam', autospec=True)
def test__check_bam_index__no_index_file_error(mock_pysam):
Expand Down Expand Up @@ -247,7 +291,13 @@ def test__validate_vcf_file__passes_vcf_validation(mock_call):
_validate_vcf_file('some/file')

def test__run_validate__passes_validation_no_files():
test_args = ValidateArgs(path=[], cram_reference=None, processes=1, test_integrity=False)
test_args = ValidateArgs(
path=[],
cram_reference=None,
unmapped_bam=False,
processes=1,
test_integrity=False
)
run_validate(test_args)

@pytest.mark.parametrize(
Expand All @@ -273,6 +323,7 @@ def test___validation_worker__fails_with_failing_checks(
test_args = ValidateArgs(
path=[test_path],
cram_reference=None,
unmapped_bam=False,
processes=1,
test_integrity=False)
mock_path_resolve.return_value = test_path
Expand All @@ -292,6 +343,7 @@ def test__run_validate__passes_on_all_valid_files(
test_args = ValidateArgs(
path=[test_path],
cram_reference=None,
unmapped_bam=False,
processes=1,
test_integrity=False)

Expand All @@ -309,6 +361,7 @@ def test__run_validate__fails_with_failing_file(
test_args = ValidateArgs(
path=[test_path],
cram_reference=None,
unmapped_bam=False,
processes=1,
test_integrity=False)
expected_code = 1
Expand Down Expand Up @@ -352,6 +405,7 @@ def test__validate_file__checks_compression(
test_args = ValidateArgs(
path=[],
cram_reference=None,
unmapped_bam=False,
processes=1,
test_integrity=False)

Expand All @@ -369,6 +423,7 @@ def test__run_validate__fails_on_unresolvable_symlink(mock_path_resolve):
test_args = ValidateArgs(
path=[test_path],
cram_reference=None,
unmapped_bam=False,
processes=1,
test_integrity=False)

Expand All @@ -394,6 +449,7 @@ def test___validation_worker__passes_proper_validation(
test_args = ValidateArgs(
path=[test_path],
cram_reference=None,
unmapped_bam=False,
processes=1,
test_integrity=False)

Expand Down

0 comments on commit cc1d3d4

Please sign in to comment.