diff --git a/test/cmdline_tests.sh b/test/cmdline_tests.sh index 77762115..a4d58038 100755 --- a/test/cmdline_tests.sh +++ b/test/cmdline_tests.sh @@ -171,6 +171,16 @@ FILE2=${EXDATADIR}/NA12891_chr21_popstr.sorted.vcf.gz FILE3=${EXDATADIR}/NA12892_chr21_popstr.sorted.vcf.gz runcmd_pass "mergeSTR --vcfs ${FILE1},${FILE2},${FILE3} --out ${TMPDIR}/test_merge_popstr --vcftype popstr" +# Test mergeSTR on a file with list of VCFs +FILE1=${EXDATADIR}/NA12878_chr21_hipstr.sorted.vcf.gz +FILE2=${EXDATADIR}/NA12891_chr21_hipstr.sorted.vcf.gz +FILE3=${EXDATADIR}/NA12892_chr21_hipstr.sorted.vcf.gz +echo ${FILE1} > ${TMPDIR}/vcf.list +echo ${FILE2} >> ${TMPDIR}/vcf.list +echo ${FILE3} >> ${TMPDIR}/vcf.list +runcmd_pass "mergeSTR --vcfs-list ${TMPDIR}/vcf.list --out ${TMPDIR}/test_merge_hipstr_list --vcftype hipstr" +runcmd_fail "mergeSTR --vcfs ${FILE1},${FILE2},${FILE3} --vcfs-list ${TMPDIR}/vcf.list --out ${TMPDIR}/test_merge_hipstr_list --vcftype hipstr" + runcmd_pass "statSTR --vcf ${EXDATADIR}/NA12878_chr21_advntr.sorted.vcf.gz --out stdout --afreq" runcmd_pass "statSTR --vcf ${EXDATADIR}/NA12891_chr21_eh.sorted.vcf.gz --out ${TMPDIR}/stats_eh --numcalled" runcmd_pass "statSTR --vcf ${EXDATADIR}/trio_chr21_gangstr.sorted.vcf.gz --out ${TMPDIR}/stats_gangstr --numcalled --mean" diff --git a/trtools/mergeSTR/README.rst b/trtools/mergeSTR/README.rst index b648a07f..8c00b677 100644 --- a/trtools/mergeSTR/README.rst +++ b/trtools/mergeSTR/README.rst @@ -26,7 +26,8 @@ To run mergeSTR use the following command:: Required Parameters: -* :code:`--vcf `: Comma-separated list of VCF files to merge. All must have been created by the same TR genotyper. Must be bgzipped, sorted, and indexed. (See `Instructions on Compressing and Indexing VCF files`_ below) +* :code:`--vcfs `: Comma-separated list of VCF files to merge. All must have been created by the same TR genotyper. Must be bgzipped, sorted, and indexed. (See `Instructions on Compressing and Indexing VCF files`_ below) +* :code:`--vcfs-list `: As an alternative to :code:`--vcfs`, you can provide a file with a list of bgzipped/sorted/indexed VCF files (one filename per line) to merge. * :code:`--vcftype `: Type of VCF files being merged. Default = :code:`auto`. Must be one of: :code:`gangstr`, :code:`advntr`, :code:`hipstr`, :code:`eh`, :code:`popstr`. * :code:`--out `: prefix to name output files diff --git a/trtools/mergeSTR/mergeSTR.py b/trtools/mergeSTR/mergeSTR.py index 56b2d319..30335d63 100644 --- a/trtools/mergeSTR/mergeSTR.py +++ b/trtools/mergeSTR/mergeSTR.py @@ -537,7 +537,10 @@ def getargs() -> Any: # pragma: no cover req_group = parser.add_argument_group("Required arguments") req_group.add_argument("--vcfs", help="Comma-separated list of VCF files to merge (must be sorted, bgzipped and indexed)", - type=str, required=True) + type=str, required=False) + req_group.add_argument("--vcfs-list", + help="File containing list of VCF files to merge. Must specify either --vcfs or --vcfs-list", + type=str, required=False) req_group.add_argument("--out", help="Prefix to name output files", type=str, required=True) req_group.add_argument("--vcftype", help="Options=%s" % [str(item) for item in trh.VcfTypes.__members__], type=str, default="auto") @@ -579,7 +582,19 @@ def main(args: Any) -> int: "directory".format(args.out)) return 1 - filenames = args.vcfs.split(",") + if args.vcfs is None and args.vcfs_list is None: + common.WARNING("Error: you must specify either --vcfs or --vcfs-list") + return 1 + + if args.vcfs is not None and args.vcfs_list is not None: + common.WARNING("Error: you cannot specify both --vcfs and --vcfs-list") + return 1 + + if args.vcfs is not None: + filenames = args.vcfs.split(",") + else: + filenames = [item.strip() for item in open(args.vcfs_list, "r").readlines()] + ### Check and Load VCF files ### vcfreaders = utils.LoadReaders(filenames, checkgz=True) if vcfreaders is None: diff --git a/trtools/mergeSTR/tests/test_mergeSTR.py b/trtools/mergeSTR/tests/test_mergeSTR.py index 82d813d8..0662339b 100644 --- a/trtools/mergeSTR/tests/test_mergeSTR.py +++ b/trtools/mergeSTR/tests/test_mergeSTR.py @@ -14,6 +14,7 @@ def args(tmpdir): args = argparse.ArgumentParser() args.vcfs = None + args.vcfs_list = None args.out = str(tmpdir / "test") args.update_sample_from_file = False args.quiet = False @@ -46,6 +47,32 @@ def __init__(self, chrom, pos, ref, alts=None, info=None): self.info = info if info is not None else {} self.vcfrecord = DummyRecord(chrom, pos, ref, self.alt_alleles, self.info) +# Test file with list of VCFs +def test_FileList(args, mrgvcfdir, tmpdir): + fname1 = os.path.join(mrgvcfdir, "test_file_gangstr1.vcf.gz") + fname2 = os.path.join(mrgvcfdir, "test_file_gangstr2.vcf.gz") + args.vcftype = "gangstr" + + # Run with files input to vcfs + nolist_outfile = str(tmpdir / "test-gangstr") + args.out = nolist_outfile + args.vcfs = fname1 + "," + fname2 + args.vcfs_list = None + assert main(args)==0 + + # Run with files input as list + list_outfile = str(tmpdir / "test-gangstr-list") + args.out = list_outfile + listfile = str(tmpdir / "test.list") + f = open(listfile, "w") + f.write(fname1+"\n") + f.write(fname2+"\n") + f.close() + args.vcfs_list = listfile + args.vcfs = None + assert main(args)==0 + assert_same_vcf(nolist_outfile + ".vcf", list_outfile + ".vcf") + # Test right files or directory - GangSTR def test_GangSTRRightFile(args, mrgvcfdir): fname1 = os.path.join(mrgvcfdir, "test_file_gangstr1.vcf.gz")