Skip to content

Commit

Permalink
Squashed commit of the following:
Browse files Browse the repository at this point in the history
commit 5a13047
Author: TheMadBug <[email protected]>
Date:   Sun Jul 14 00:50:04 2024 +1000

    Put more progress debug in one_off_fix_symbolic_variants

commit 7e3090d
Author: TheMadBug <[email protected]>
Date:   Sat Jul 13 23:54:08 2024 +1000

    Revert change that upgrade upgrade script to use sha before sha was in use

commit 9c54168
Author: TheMadBug <[email protected]>
Date:   Sat Jul 13 23:51:14 2024 +1000

    Update dependency of migrations (requires sha to be in place)

commit f75e9a4
Author: TheMadBug <[email protected]>
Date:   Sat Jul 13 20:13:01 2024 +1000

    Update deploy check to handle if Symbolic variants are disbaled

commit 1499a26
Author: TheMadBug <[email protected]>
Date:   Sat Jul 13 20:07:51 2024 +1000

    Fix error report on deploy check

commit aaac12f
Author: TheMadBug <[email protected]>
Date:   Sat Jul 13 20:05:08 2024 +1000

    Include error message for hgvs test

commit 0c460bd
Author: Dave Lawrence <[email protected]>
Date:   Fri Jul 12 17:20:31 2024 +0930

    issue #1114 - setting to reject symbolic variants
  • Loading branch information
TheMadBug committed Jul 15, 2024
1 parent dccce96 commit a8419be
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 11 deletions.
8 changes: 8 additions & 0 deletions snpdb/management/commands/one_off_fix_symbolic_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@ def add_arguments(self, parser):
def handle(self, *args, **options):
long_sequences = Sequence.objects.all().annotate(seq_length=Length("seq")).filter(seq_length__gte=1000)
long_variants = Variant.objects.filter(Q(locus__ref__in=long_sequences) | Q(alt__in=long_sequences))
print(f"Long variant count = {long_variants.count()}")

base_lookup = {s: Sequence.objects.get(seq=s) for s in ["G", "A", "T", "C", "<DEL>", "<DUP>"]}
not_symbolic = []
dry_run = options["dry_run"]

for genome_build in GenomeBuild.builds_with_annotation():
print(f"Genome build {genome_build}")
self._find_bad_symbolic_via_clinvar(dry_run, genome_build, "<DEL>") # DELISN stored as DEL
self._find_bad_symbolic_via_clinvar(dry_run, genome_build, "<DUP>") # INS stored as DUP
num_deleted = 0
Expand Down Expand Up @@ -107,19 +109,25 @@ def _find_bad_symbolic_via_clinvar(self, dry_run: bool, genome_build, alt_seq):
ClinVar.objects.filter(version__genome_build=genome_build, variant__alt__seq=alt_seq).values_list(
"clinvar_variation_id", flat=True))

print(f"{genome_build} Found clinvar {alt_seq} count = {clinvar_variation_del.count()}")

clinvar_variation_original = {}

for cv in ClinVar.objects.filter(version__genome_build=genome_build,
clinvar_variation_id__in=clinvar_variation_del).exclude(
variant__alt__seq=alt_seq):
clinvar_variation_original[cv.clinvar_variation_id] = cv.variant

print(f"{genome_build} clinvar_variation_original_count = {len(clinvar_variation_original)}")

clinvar_variation_bad = {}
for cv in ClinVar.objects.filter(version__genome_build=genome_build,
clinvar_variation_id__in=clinvar_variation_original,
variant__alt__seq=alt_seq):
clinvar_variation_bad[cv.clinvar_variation_id] = cv.variant

print(f"{genome_build} clinvar_variation_bad = {len(clinvar_variation_bad)}")

for clinvar_variation_id, bad_variant in clinvar_variation_bad.items():
original_variant = clinvar_variation_original[clinvar_variation_id]
print(f"{bad_variant} should have been {original_variant}")
Expand Down
4 changes: 2 additions & 2 deletions snpdb/migrations/0117_one_off_inv_symbolic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from django.db import migrations
from django.db.models import Q

from library.utils import sha256sum_str
from library.utils import sha256sum_str, md5sum_str


def _one_off_inv_symbolic(apps, _schema_editor):
Expand Down Expand Up @@ -41,7 +41,7 @@ def _one_off_inv_symbolic(apps, _schema_editor):
base_lookup = {s: Sequence.objects.get(seq=s) for s in "GATC"}
symbolic_alt = "<INV>"
inv, _ = Sequence.objects.get_or_create(seq=symbolic_alt,
seq_sha256_hash=sha256sum_str(symbolic_alt),
seq_md5_hash=md5sum_str(symbolic_alt),
length=len(symbolic_alt)) # This is wrong, but will remove this eventually

# There are so few of these, that we can just do them 1 by 1
Expand Down
23 changes: 14 additions & 9 deletions variantgrid/deployment_validation/library_version_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from genes.hgvs.hgvs_converter import HGVSConverterType
from snpdb.models import VariantCoordinate
from variantgrid import settings


def check_library_versions() -> dict:
Expand All @@ -15,8 +16,10 @@ def _test_biocommons_hgvs():
matcher.get_variant_coordinate("NC_000006.12:g.49949407_49949408=")

# Check it can handle contig names as chrom names
vc = VariantCoordinate(chrom="NC_000006.12", position=386486, ref="A", alt="<DUP>", svlen=5000)
matcher.variant_coordinate_to_g_hgvs(vc)
if settings.VARIANT_SYMBOLIC_ALT_ENABLED:
# only do this test if symbolic variants are supported
vc = VariantCoordinate(chrom="NC_000006.12", position=386486, ref="A", alt="<DUP>", svlen=5000)
matcher.variant_coordinate_to_g_hgvs(vc)

minimum_versions = {
"cdot": (0, 2, 21),
Expand All @@ -34,12 +37,14 @@ def _test_biocommons_hgvs():
version_str = metadata.version(name)
version = tuple(int(i) for i in version_str.split("."))
assert version >= version_required, "Library %s (%s) requires version >= %s" % (name, version, version_required)
valid = True
except:
valid = False
library_version_valid[name] = {
"valid": valid,
"fix": "Upgrade the library using the version in requirements.txt",
}
library_version_valid[name] = {
"valid": True,
"fix": f"All good",
}
except Exception as ex:
library_version_valid[name] = {
"valid": False,
"fix": f"Upgrade the library using the version in requirements.txt - error {ex}",
}

return library_version_valid

0 comments on commit a8419be

Please sign in to comment.