Skip to content

Commit

Permalink
issue SACGF/variantgrid_private#3676 - correctly calculate Variant.end
Browse files Browse the repository at this point in the history
  • Loading branch information
davmlaw committed Aug 6, 2024
1 parent a4bfd82 commit 39b2afe
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 11 deletions.
17 changes: 11 additions & 6 deletions snpdb/management/commands/one_off_fix_variant_end.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from django.conf import settings
from django.core.management.base import BaseCommand
from django.db.models import OuterRef, Subquery, F
from django.db.models.functions import Length
from django.db.models.functions import Length, Abs

from annotation.models import AnnotationRangeLock
from library.utils import mk_path
Expand All @@ -28,10 +28,15 @@ def add_arguments(self, parser):
@staticmethod
def update_variants_in_range_fix_end(variant_qs):
# These are all non-symbolic, so can just use ref length
variant_qs = variant_qs.exclude(alt__seq__startswith="<")
calc_end = F("locus__position") + Length("locus__ref__seq")
variant_subquery = Variant.objects.filter(pk=OuterRef("pk")).annotate(calc_end=calc_end).values("calc_end")[:1]
variant_qs.update(end=Subquery(variant_subquery))
non_symbolic_variant_qs = variant_qs.filter(svlen__isnull=True)
calc_end = F("locus__position") + Length("locus__ref__seq") - 1
non_symbolic_variant_subquery = Variant.objects.filter(pk=OuterRef("pk")).annotate(calc_end=calc_end).values("calc_end")[:1]
non_symbolic_variant_qs.update(end=Subquery(non_symbolic_variant_subquery))

symbolic_variant_qs = variant_qs.filter(svlen__isnull=False)
calc_end = F("locus__position") + Abs("svlen")
symbolic_variant_subquery = Variant.objects.filter(pk=OuterRef("pk")).annotate(calc_end=calc_end).values("calc_end")[:1]
symbolic_variant_qs.update(end=Subquery(symbolic_variant_subquery))

def handle(self, *args, **options):
# We want to do this in small batches - so use the variant annotation range locks which are all approx the same
Expand All @@ -43,7 +48,7 @@ def handle(self, *args, **options):
# This can take a few days, so we'll write the variant as we go, so we can resume without any troubles
migrations_dir = os.path.join(settings.PRIVATE_DATA_ROOT, "migrations")
mk_path(migrations_dir)
progress_file = os.path.join(migrations_dir, "one_off_fix_variant_end_progress.txt")
progress_file = os.path.join(migrations_dir, "one_off_fix_variant_end_progress_v2.txt")

highest_av = AnnotationRangeLock.objects.order_by("-max_variant").first()
arl_qs = AnnotationRangeLock.objects.filter(version=highest_av.version)
Expand Down
8 changes: 3 additions & 5 deletions snpdb/models/models_variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,11 +252,9 @@ def end(self) -> int:
* <DEL>, <DUP>, <INV>, and <CNV> symbolic structural variant alleles:, POS + SVLEN
"""
if self.is_symbolic():
# Insertions add w/o replacing, so their end is the start (ie 1 past start using half-open)
if self.alt == VCFSymbolicAllele.DUP:
return self.position + 1
return self.position + abs(self.svlen) + 1
return self.position + len(self.ref)
# We don't support <INV> so don't need to worry about it
return self.position + abs(self.svlen)
return self.position + len(self.ref) - 1

def __lt__(self, other):
return self.as_tuple < other.as_tuple
Expand Down

0 comments on commit 39b2afe

Please sign in to comment.