Skip to content

Commit

Permalink
issue SACGF/variantgrid_private#3652 - remove bad variant/allele links
Browse files Browse the repository at this point in the history
  • Loading branch information
davmlaw committed Aug 7, 2024
1 parent c7b88b1 commit 590a59f
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 6 deletions.
Original file line number Diff line number Diff line change
@@ -1,28 +1,48 @@
import logging

from django.core.management import BaseCommand

from snpdb.models import Variant, Allele, ClinGenAllele, Contig
from snpdb.models import Variant, Allele, ClinGenAllele, Contig, AlleleLiftover


class Command(BaseCommand):
""" Indel representation in g.HGVS doesn't have reference base - so we may have variants with
different reference bases linked to an allele """
def add_arguments(self, parser):
parser.add_argument('--dry-run', help="Just report, don't unlink variants from allele if incorrect", action='store_true')

def handle(self, *args, **options):
dry_run = options["dry_run"]

variants_with_clingen = Variant.objects.filter(variantallele__allele__clingen_allele__isnull=False)
indel_qs = variants_with_clingen.exclude(Variant.get_snp_q())
indel_alleles = indel_qs.values_list("variantallele__allele_id", flat=True)

alleles_qs = Allele.objects.filter(clingen_allele__isnull=False, pk__in=indel_alleles).distinct()
print(f"{alleles_qs.count()} indel alleles to check...")
logging.info(f"{alleles_qs.count()} indel alleles to check...")
if dry_run:
logging.info("Dry-run only - will not unlink incorrect variants...")
else:
logging.info("Going to unlink incorrect variants...")

num_unlinked = 0
for i, allele in enumerate(alleles_qs):
if i % 100 == 0:
print(f"processed {i} alleles")
if i % 500 == 0:
logging.info(f"processed {i} alleles")
for va in allele.variantallele_set.all():
existing_vc = va.variant.coordinate
try:
clingen_vc = allele.clingen_allele.get_variant_coordinate(va.genome_build)
if existing_vc != clingen_vc:
print(f"{allele} has variant {repr(existing_vc)} not matching expected for build: {repr(clingen_vc)}")
logging.info(f"{allele} has variant {repr(existing_vc)} not matching expected for build {va.genome_build}: {repr(clingen_vc)}")
if not dry_run:
liftover_res = AlleleLiftover.objects.filter(allele=allele, genome_build=va.genome_build).delete()
logging.info(f"Removing liftover record: %s", liftover_res)
va_res = va.delete()
logging.info(f"Unlinking variant: %s", va_res)

except (ClinGenAllele.ClinGenBuildNotInResponseError, Contig.ContigNotInBuildError):
pass

if num_unlinked:
logging.info("Unlinked %d variants, you'll need to re-run liftover to re-build these", num_unlinked)
1 change: 0 additions & 1 deletion snpdb/migrations/0141_one_off_fix_variant_end2.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,4 @@ class Migration(migrations.Migration):
operations = [
ManualOperation(task_id=ManualOperation.task_id_manage(["one_off_fix_variant_end"]),
test=_existing_deploy_has_variants),

]
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Generated by Django 4.2.9 on 2024-08-07 04:05

from django.db import migrations
from django.db.models import Q

from manual.operations.manual_operations import ManualOperation


def _has_clingen_indels(apps):
Variant = apps.get_model("snpdb", "Variant")
variants_with_clingen = Variant.objects.filter(variantallele__allele__clingen_allele__isnull=False)
bases = "GATC"
q_snp = Q(locus__ref__seq__in=bases) & Q(alt__seq__in=bases)
indel_qs = variants_with_clingen.exclude(q_snp)
return indel_qs.exists()


class Migration(migrations.Migration):

dependencies = [
('snpdb', '0141_one_off_fix_variant_end2'),
]

operations = [
ManualOperation(task_id=ManualOperation.task_id_manage(["clingen_allele_linked_variants_reference_base_check"]),
test=_has_clingen_indels),
]

0 comments on commit 590a59f

Please sign in to comment.