Skip to content

Commit

Permalink
Merge pull request #204 from gymrek-lab/mergestr-filelist
Browse files Browse the repository at this point in the history
Adding option to mergeSTR to give file with list of VCFs
  • Loading branch information
gymreklab authored Jan 4, 2024
2 parents 835ced3 + 77cba23 commit 21c94e4
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 3 deletions.
10 changes: 10 additions & 0 deletions test/cmdline_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,16 @@ FILE2=${EXDATADIR}/NA12891_chr21_popstr.sorted.vcf.gz
FILE3=${EXDATADIR}/NA12892_chr21_popstr.sorted.vcf.gz
runcmd_pass "mergeSTR --vcfs ${FILE1},${FILE2},${FILE3} --out ${TMPDIR}/test_merge_popstr --vcftype popstr"

# Test mergeSTR on a file with list of VCFs
FILE1=${EXDATADIR}/NA12878_chr21_hipstr.sorted.vcf.gz
FILE2=${EXDATADIR}/NA12891_chr21_hipstr.sorted.vcf.gz
FILE3=${EXDATADIR}/NA12892_chr21_hipstr.sorted.vcf.gz
echo ${FILE1} > ${TMPDIR}/vcf.list
echo ${FILE2} >> ${TMPDIR}/vcf.list
echo ${FILE3} >> ${TMPDIR}/vcf.list
runcmd_pass "mergeSTR --vcfs-list ${TMPDIR}/vcf.list --out ${TMPDIR}/test_merge_hipstr_list --vcftype hipstr"
runcmd_fail "mergeSTR --vcfs ${FILE1},${FILE2},${FILE3} --vcfs-list ${TMPDIR}/vcf.list --out ${TMPDIR}/test_merge_hipstr_list --vcftype hipstr"

runcmd_pass "statSTR --vcf ${EXDATADIR}/NA12878_chr21_advntr.sorted.vcf.gz --out stdout --afreq"
runcmd_pass "statSTR --vcf ${EXDATADIR}/NA12891_chr21_eh.sorted.vcf.gz --out ${TMPDIR}/stats_eh --numcalled"
runcmd_pass "statSTR --vcf ${EXDATADIR}/trio_chr21_gangstr.sorted.vcf.gz --out ${TMPDIR}/stats_gangstr --numcalled --mean"
Expand Down
3 changes: 2 additions & 1 deletion trtools/mergeSTR/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ To run mergeSTR use the following command::

Required Parameters:

* :code:`--vcf <VCF>`: Comma-separated list of VCF files to merge. All must have been created by the same TR genotyper. Must be bgzipped, sorted, and indexed. (See `Instructions on Compressing and Indexing VCF files`_ below)
* :code:`--vcfs <VCFs>`: Comma-separated list of VCF files to merge. All must have been created by the same TR genotyper. Must be bgzipped, sorted, and indexed. (See `Instructions on Compressing and Indexing VCF files`_ below)
* :code:`--vcfs-list <FILE>`: As an alternative to :code:`--vcfs`, you can provide a file with a list of bgzipped/sorted/indexed VCF files (one filename per line) to merge.
* :code:`--vcftype <string>`: Type of VCF files being merged. Default = :code:`auto`. Must be one of: :code:`gangstr`, :code:`advntr`, :code:`hipstr`, :code:`eh`, :code:`popstr`.
* :code:`--out <string>`: prefix to name output files

Expand Down
19 changes: 17 additions & 2 deletions trtools/mergeSTR/mergeSTR.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,10 @@ def getargs() -> Any: # pragma: no cover
req_group = parser.add_argument_group("Required arguments")
req_group.add_argument("--vcfs",
help="Comma-separated list of VCF files to merge (must be sorted, bgzipped and indexed)",
type=str, required=True)
type=str, required=False)
req_group.add_argument("--vcfs-list",
help="File containing list of VCF files to merge. Must specify either --vcfs or --vcfs-list",
type=str, required=False)
req_group.add_argument("--out", help="Prefix to name output files", type=str, required=True)
req_group.add_argument("--vcftype", help="Options=%s" % [str(item) for item in trh.VcfTypes.__members__], type=str,
default="auto")
Expand Down Expand Up @@ -579,7 +582,19 @@ def main(args: Any) -> int:
"directory".format(args.out))
return 1

filenames = args.vcfs.split(",")
if args.vcfs is None and args.vcfs_list is None:
common.WARNING("Error: you must specify either --vcfs or --vcfs-list")
return 1

if args.vcfs is not None and args.vcfs_list is not None:
common.WARNING("Error: you cannot specify both --vcfs and --vcfs-list")
return 1

if args.vcfs is not None:
filenames = args.vcfs.split(",")
else:
filenames = [item.strip() for item in open(args.vcfs_list, "r").readlines()]

### Check and Load VCF files ###
vcfreaders = utils.LoadReaders(filenames, checkgz=True)
if vcfreaders is None:
Expand Down
27 changes: 27 additions & 0 deletions trtools/mergeSTR/tests/test_mergeSTR.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
def args(tmpdir):
args = argparse.ArgumentParser()
args.vcfs = None
args.vcfs_list = None
args.out = str(tmpdir / "test")
args.update_sample_from_file = False
args.quiet = False
Expand Down Expand Up @@ -46,6 +47,32 @@ def __init__(self, chrom, pos, ref, alts=None, info=None):
self.info = info if info is not None else {}
self.vcfrecord = DummyRecord(chrom, pos, ref, self.alt_alleles, self.info)

# Test file with list of VCFs
def test_FileList(args, mrgvcfdir, tmpdir):
fname1 = os.path.join(mrgvcfdir, "test_file_gangstr1.vcf.gz")
fname2 = os.path.join(mrgvcfdir, "test_file_gangstr2.vcf.gz")
args.vcftype = "gangstr"

# Run with files input to vcfs
nolist_outfile = str(tmpdir / "test-gangstr")
args.out = nolist_outfile
args.vcfs = fname1 + "," + fname2
args.vcfs_list = None
assert main(args)==0

# Run with files input as list
list_outfile = str(tmpdir / "test-gangstr-list")
args.out = list_outfile
listfile = str(tmpdir / "test.list")
f = open(listfile, "w")
f.write(fname1+"\n")
f.write(fname2+"\n")
f.close()
args.vcfs_list = listfile
args.vcfs = None
assert main(args)==0
assert_same_vcf(nolist_outfile + ".vcf", list_outfile + ".vcf")

# Test right files or directory - GangSTR
def test_GangSTRRightFile(args, mrgvcfdir):
fname1 = os.path.join(mrgvcfdir, "test_file_gangstr1.vcf.gz")
Expand Down

0 comments on commit 21c94e4

Please sign in to comment.