Skip to content

Commit

Permalink
Allow empty ancestral alleles
Browse files Browse the repository at this point in the history
Fixes #884
  • Loading branch information
hyanwong committed Jan 19, 2024
1 parent 7db1d38 commit a769a76
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 3 deletions.
24 changes: 23 additions & 1 deletion tests/test_sgkit.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,9 +615,31 @@ def test_empty_alleles_not_at_end(self, tmp_path):
)
sgkit.save_dataset(ds, path)
samples = tsinfer.SgkitSampleData(path)
with pytest.raises(ValueError, match="Empty alleles must be at the end"):
with pytest.raises(ValueError, match="empty alleles must be at the end"):
tsinfer.infer(samples)

def test_empty_ancestral_alleles(self, tmp_path):
path = tmp_path / "data.zarr"
ds = sgkit.simulate_genotype_call_dataset(n_variant=3, n_sample=3, n_ploidy=1)
ds["variant_allele"] = (
ds["variant_allele"].dims,
np.array(
[["", "A", "C", ""], ["A", "C", "", ""], ["A", "C", "", ""]], dtype="S1"
),
)
ds["variant_ancestral_allele"] = (
["variants"],
np.array(["", "A", ""], dtype="S1"),
)
sgkit.save_dataset(ds, path)
samples = tsinfer.SgkitSampleData(path)
for v in samples.variants(recode_ancestral=True):
if v.site.ancestral_state in (b"", ""):
assert len(v.alleles) == 3
assert v.alleles[0] in (b"", "")
else:
assert len(v.alleles) == 2


class TestSgkitMatchSamplesToDisk:
@pytest.mark.skipif(sys.platform == "win32", reason="No cyvcf2 on windows")
Expand Down
10 changes: 8 additions & 2 deletions tsinfer/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2598,10 +2598,16 @@ def variants(self, sites=None, recode_ancestral=None):
# alleles are at the end of the list, so check this.
non_empty_alleles = []
empty_seen = False
for allele in alleles:
for i, allele in enumerate(alleles):
if allele != b"" and allele != "":
if empty_seen:
raise ValueError("Empty alleles must be at the end")
raise ValueError(
f"Site {site.id} (pos {site.position}): empty alleles "
f"must be at the end, but alleles are {alleles}"
)
non_empty_alleles.append(allele)
elif i == 0 and (site.alleles[aa] == b"" or site.alleles[aa] == ""):
# Single empty allele allowed if it is the starting ancestral allele
non_empty_alleles.append(allele)
else:
empty_seen = True
Expand Down

0 comments on commit a769a76

Please sign in to comment.