From cf0f61de3accc2be5ca8d3f3445eeb22d78e8b9a Mon Sep 17 00:00:00 2001 From: Jens Luebeck Date: Tue, 23 May 2023 15:44:32 -0700 Subject: [PATCH] 0.1537.1 filter seeds > 30 Mbp --- PrepareAA.py | 2 +- README.md | 2 +- cnv_prefilter.py | 15 ++++++++++++--- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/PrepareAA.py b/PrepareAA.py index 9df7965..0c27c72 100755 --- a/PrepareAA.py +++ b/PrepareAA.py @@ -15,7 +15,7 @@ import check_reference import cnv_prefilter -__version__ = "0.1537.0" +__version__ = "0.1537.1" PY3_PATH = "python3" # updated by command-line arg if specified metadata_dict = {} # stores the run metadata (bioinformatic metadata) diff --git a/README.md b/README.md index b46221f..02bc0fd 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Performs preliminary steps (alignment, seed detection, & seed filtering) require AmpliconSuite-pipeline supports hg19, GRCh37, GRCh38 (hg38), and mouse genome mm10 (GRCm38). The tool also supports analysis with a human-viral hybrid reference genome we provide, "GRCh38_viral", which can be used to detect oncoviral hybrid focal amplifications and ecDNA in cancers with oncoviral infections. -**Current version: 0.1537.0** +**Current version: 0.1537.1** [comment]: # (Versioning based on major_version.days_since_initial_commit.minor_version. Initial commit: March 5th, 2019) diff --git a/cnv_prefilter.py b/cnv_prefilter.py index f059750..cbb58b6 100644 --- a/cnv_prefilter.py +++ b/cnv_prefilter.py @@ -159,13 +159,22 @@ def prefilter_bed(bedfile, ref, centromere_dict, chr_sizes, cngain, outdir): init_cns = arm2cns[a] med_cn = compute_cn_median(init_cns, arm2lens[a]) for x in init_cns: + # ignore seeds over 30 Mbp + if x[2] - x[1] > 30000000: + continue + ccg = cngain continuous_high_hits = continuous_high_region_ivald[x[0]][x[1]:x[2]] if continuous_high_hits: + long_seed_region_penalty_mult = 1. for y in continuous_high_hits: - if y.end - y.begin > 10000000: - ccg *= 1.5 - break + if y.end - y.begin > 20000000: + long_seed_region_penalty_mult = max(3.0, long_seed_region_penalty_mult) + + elif y.end - y.begin > 10000000: + long_seed_region_penalty_mult = max(1.5, long_seed_region_penalty_mult) + + ccg*=long_seed_region_penalty_mult if x[3] > med_cn + ccg - 2: cn_filt_entries.append(x)