From a8419be20e2220e2c021deb716eb74cadaba8399 Mon Sep 17 00:00:00 2001 From: TheMadBug Date: Mon, 15 Jul 2024 10:16:24 +1000 Subject: [PATCH] Squashed commit of the following: commit 5a13047fa9353a441c821e2beade68c93242b81e Author: TheMadBug Date: Sun Jul 14 00:50:04 2024 +1000 Put more progress debug in one_off_fix_symbolic_variants commit 7e3090d12e7a087f0c733ec8da287c199c193cc7 Author: TheMadBug Date: Sat Jul 13 23:54:08 2024 +1000 Revert change that upgrade upgrade script to use sha before sha was in use commit 9c541687afd6f317d0282073b29885c1451dcdc0 Author: TheMadBug Date: Sat Jul 13 23:51:14 2024 +1000 Update dependency of migrations (requires sha to be in place) commit f75e9a4f7c9244be3944b28dcf6c48641b7dbaf0 Author: TheMadBug Date: Sat Jul 13 20:13:01 2024 +1000 Update deploy check to handle if Symbolic variants are disbaled commit 1499a266315aec2d7e615e38d1c266b05d224077 Author: TheMadBug Date: Sat Jul 13 20:07:51 2024 +1000 Fix error report on deploy check commit aaac12fb4b02e636fbfae72d28fc74e16b765847 Author: TheMadBug Date: Sat Jul 13 20:05:08 2024 +1000 Include error message for hgvs test commit 0c460bde073b7b83b3127baa935affe0b3ed746d Author: Dave Lawrence Date: Fri Jul 12 17:20:31 2024 +0930 issue #1114 - setting to reject symbolic variants --- .../commands/one_off_fix_symbolic_variants.py | 8 +++++++ snpdb/migrations/0117_one_off_inv_symbolic.py | 4 ++-- .../library_version_checks.py | 23 +++++++++++-------- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/snpdb/management/commands/one_off_fix_symbolic_variants.py b/snpdb/management/commands/one_off_fix_symbolic_variants.py index 6fee405d7..7caed4b0a 100644 --- a/snpdb/management/commands/one_off_fix_symbolic_variants.py +++ b/snpdb/management/commands/one_off_fix_symbolic_variants.py @@ -19,12 +19,14 @@ def add_arguments(self, parser): def handle(self, *args, **options): long_sequences = Sequence.objects.all().annotate(seq_length=Length("seq")).filter(seq_length__gte=1000) long_variants = Variant.objects.filter(Q(locus__ref__in=long_sequences) | Q(alt__in=long_sequences)) + print(f"Long variant count = {long_variants.count()}") base_lookup = {s: Sequence.objects.get(seq=s) for s in ["G", "A", "T", "C", "", ""]} not_symbolic = [] dry_run = options["dry_run"] for genome_build in GenomeBuild.builds_with_annotation(): + print(f"Genome build {genome_build}") self._find_bad_symbolic_via_clinvar(dry_run, genome_build, "") # DELISN stored as DEL self._find_bad_symbolic_via_clinvar(dry_run, genome_build, "") # INS stored as DUP num_deleted = 0 @@ -107,6 +109,8 @@ def _find_bad_symbolic_via_clinvar(self, dry_run: bool, genome_build, alt_seq): ClinVar.objects.filter(version__genome_build=genome_build, variant__alt__seq=alt_seq).values_list( "clinvar_variation_id", flat=True)) + print(f"{genome_build} Found clinvar {alt_seq} count = {clinvar_variation_del.count()}") + clinvar_variation_original = {} for cv in ClinVar.objects.filter(version__genome_build=genome_build, @@ -114,12 +118,16 @@ def _find_bad_symbolic_via_clinvar(self, dry_run: bool, genome_build, alt_seq): variant__alt__seq=alt_seq): clinvar_variation_original[cv.clinvar_variation_id] = cv.variant + print(f"{genome_build} clinvar_variation_original_count = {len(clinvar_variation_original)}") + clinvar_variation_bad = {} for cv in ClinVar.objects.filter(version__genome_build=genome_build, clinvar_variation_id__in=clinvar_variation_original, variant__alt__seq=alt_seq): clinvar_variation_bad[cv.clinvar_variation_id] = cv.variant + print(f"{genome_build} clinvar_variation_bad = {len(clinvar_variation_bad)}") + for clinvar_variation_id, bad_variant in clinvar_variation_bad.items(): original_variant = clinvar_variation_original[clinvar_variation_id] print(f"{bad_variant} should have been {original_variant}") diff --git a/snpdb/migrations/0117_one_off_inv_symbolic.py b/snpdb/migrations/0117_one_off_inv_symbolic.py index 85a219a54..2788e51ac 100644 --- a/snpdb/migrations/0117_one_off_inv_symbolic.py +++ b/snpdb/migrations/0117_one_off_inv_symbolic.py @@ -8,7 +8,7 @@ from django.db import migrations from django.db.models import Q -from library.utils import sha256sum_str +from library.utils import sha256sum_str, md5sum_str def _one_off_inv_symbolic(apps, _schema_editor): @@ -41,7 +41,7 @@ def _one_off_inv_symbolic(apps, _schema_editor): base_lookup = {s: Sequence.objects.get(seq=s) for s in "GATC"} symbolic_alt = "" inv, _ = Sequence.objects.get_or_create(seq=symbolic_alt, - seq_sha256_hash=sha256sum_str(symbolic_alt), + seq_md5_hash=md5sum_str(symbolic_alt), length=len(symbolic_alt)) # This is wrong, but will remove this eventually # There are so few of these, that we can just do them 1 by 1 diff --git a/variantgrid/deployment_validation/library_version_checks.py b/variantgrid/deployment_validation/library_version_checks.py index 1f09c9a95..53698cab7 100644 --- a/variantgrid/deployment_validation/library_version_checks.py +++ b/variantgrid/deployment_validation/library_version_checks.py @@ -2,6 +2,7 @@ from genes.hgvs.hgvs_converter import HGVSConverterType from snpdb.models import VariantCoordinate +from variantgrid import settings def check_library_versions() -> dict: @@ -15,8 +16,10 @@ def _test_biocommons_hgvs(): matcher.get_variant_coordinate("NC_000006.12:g.49949407_49949408=") # Check it can handle contig names as chrom names - vc = VariantCoordinate(chrom="NC_000006.12", position=386486, ref="A", alt="", svlen=5000) - matcher.variant_coordinate_to_g_hgvs(vc) + if settings.VARIANT_SYMBOLIC_ALT_ENABLED: + # only do this test if symbolic variants are supported + vc = VariantCoordinate(chrom="NC_000006.12", position=386486, ref="A", alt="", svlen=5000) + matcher.variant_coordinate_to_g_hgvs(vc) minimum_versions = { "cdot": (0, 2, 21), @@ -34,12 +37,14 @@ def _test_biocommons_hgvs(): version_str = metadata.version(name) version = tuple(int(i) for i in version_str.split(".")) assert version >= version_required, "Library %s (%s) requires version >= %s" % (name, version, version_required) - valid = True - except: - valid = False - library_version_valid[name] = { - "valid": valid, - "fix": "Upgrade the library using the version in requirements.txt", - } + library_version_valid[name] = { + "valid": True, + "fix": f"All good", + } + except Exception as ex: + library_version_valid[name] = { + "valid": False, + "fix": f"Upgrade the library using the version in requirements.txt - error {ex}", + } return library_version_valid