diff --git a/src/perllib/CiceroUtil.pm b/src/perllib/CiceroUtil.pm index de0a84a..3ba1e68 100755 --- a/src/perllib/CiceroUtil.pm +++ b/src/perllib/CiceroUtil.pm @@ -823,6 +823,18 @@ sub normalizeChromosomeName { return $query; } +sub exist_multiplename_checking { + my %genelist = %{(shift)}; + my $targetgene = shift;#e.g. targetgene UBTF,MIR6782 + my @genes = split(/,|\|/, $targetgene); + + foreach my $g1 (@genes) { + return 1 if(exists($genelist{$g1})); + } + + return 0; +} + 1; =head1 LICENCE AND COPYRIGHT diff --git a/src/scripts/annotate.pl b/src/scripts/annotate.pl index 11af969..4bf6b1b 100755 --- a/src/scripts/annotate.pl +++ b/src/scripts/annotate.pl @@ -17,7 +17,7 @@ use CiceroSCValidator qw($lowqual_cutoff LEFT_CLIP RIGHT_CLIP); use CiceroUtil qw(prepare_reads_file parse_range rev_comp - is_PCR_dup read_fa_file get_discordant_reads get_sclip_reads normalizeChromosomeName); + is_PCR_dup read_fa_file get_discordant_reads get_sclip_reads normalizeChromosomeName exist_multiplename_checking); require CiceroExtTools; @@ -27,6 +27,8 @@ use Gene; use GeneModel; +use constant BADFUSION_DISTANCE_CUTOFF => 200; + my $debug = 0; my $out_header = join("\t", "sample", "geneA", "chrA", "posA", "ortA", "featureA", "geneB", "chrB", "posB", "ortB", "featureB", @@ -257,20 +259,16 @@ } sub is_bad_fusion{ - my $badfusion_distance_cutoff = 200; my ($chrA, $posA, $chrB, $posB) = @_; $chrA = "chr".$chrA unless($chrA =~ /chr/); $chrB = "chr".$chrB unless($chrB =~ /chr/); - #print STDERR " test |".$chrA.":".$posA.":".$chrB.":".$posB."\n"; - my $size = keys %bad_fusions; - #print STDERR " size |".$size."\n"; foreach my $xx (keys %bad_fusions) { my ($chr1, $pos1, $chr2, $pos2) = split(":",$xx); #print STDERR " badfusionlist |".$chr1.":".$pos1.":".$chr2.":".$pos2."\n"; return 1 if($chrA eq $chr1 && $chrB eq $chr2 && - abs($pos1 - $posA) <$badfusion_distance_cutoff && abs($pos2 - $posB) <$badfusion_distance_cutoff);# cutoff is based on the cutoff of merging GTEx false positive fusions from CICERO running + abs($pos1 - $posA) < BADFUSION_DISTANCE_CUTOFF && abs($pos2 - $posB) < BADFUSION_DISTANCE_CUTOFF);# cutoff is based on the cutoff of merging GTEx false positive fusions from CICERO running return 1 if($chrA eq $chr2 && $chrB eq $chr1 && - abs($pos2 - $posA) <$badfusion_distance_cutoff && abs($pos1 - $posB) <$badfusion_distance_cutoff); + abs($pos2 - $posA) < BADFUSION_DISTANCE_CUTOFF && abs($pos1 - $posB) < BADFUSION_DISTANCE_CUTOFF); } return 0; } @@ -414,7 +412,7 @@ sub is_bad_fusion{ $second_bp->{tname} = normalizeChromosomeName($seq_ids[0], $second_bp->{tname}); # Ensure that the breakpoint chromosome names match - if(!$internal) {next if(is_bad_fusion($first_bp->{tname}, $first_bp->{tpos}, $second_bp->{tname}, $second_bp->{tpos}));} + next if(!$internal && is_bad_fusion($first_bp->{tname}, $first_bp->{tpos}, $second_bp->{tname}, $second_bp->{tpos})); # Determine the variant type: CTX, Internal_inv, Interal_splicing, Internal_dup, ITX, read_through, DEL, INS my $type = get_type($first_bp, $second_bp, $same_gene); @@ -811,34 +809,6 @@ sub exist_multiplename_pair_checking { return 0; } -sub is_knownfusiongenepair{ - my @genes1_tmp = %{(shift)}; - my @genes2_tmp = %{(shift)}; - - foreach my $g1 (@genes1_tmp) { - foreach my $g2 (@genes2_tmp) { - return 1 if (exists($known_fusion_partners{$g1}{$g2})); - } - } - - return 0; -} - - - -sub exist_multiplename_checking { - my %genelist = %{(shift)}; - my $targetgene = shift;#e.g. targetgene UBTF,MIR6782 - my @genes = split(/,|\|/, $targetgene); - - foreach my $g1 (@genes) { - return 1 if(exists($genelist{$g1})); - } - - return 0; -} - - sub is_good_ITD { my($bp1, $bp2) = @_; my @genes = split(/,|\|/, $bp1->{gene}); diff --git a/src/scripts/get_geneInfo.pl b/src/scripts/get_geneInfo.pl index 340cb82..3c9d977 100755 --- a/src/scripts/get_geneInfo.pl +++ b/src/scripts/get_geneInfo.pl @@ -18,7 +18,7 @@ use lib dirname($0); my $script_dir = dirname($0); #custom packages -use CiceroUtil qw(parse_range is_PCR_dup); +use CiceroUtil qw(parse_range is_PCR_dup exist_multiplename_checking); use TdtConfig; use constant FQ_BASE_NUMBER => 33; @@ -172,19 +172,6 @@ sub is_bad_chrom{ return 0; } -sub exist_multiplename_checking { - my %genelist = %{(shift)}; - my $targetgene = shift;#e.g. targetgene UBTF,MIR6782 - my @genes = split(/,|\|/, $targetgene); - - foreach my $g1 (@genes) { - return 1 if(exists($genelist{$g1})); - } - - return 0; -} - - =head1 LICENCE AND COPYRIGHT Copyright 2019 St. Jude Children's Research Hospital diff --git a/src/scripts/rank_SVs.pl b/src/scripts/rank_SVs.pl index 626d8a7..d5072e8 100755 --- a/src/scripts/rank_SVs.pl +++ b/src/scripts/rank_SVs.pl @@ -11,6 +11,8 @@ use List::Util qw[min max]; use TdtConfig; +use CiceroUtil qw(exist_multiplename_checking) + use DelimitedFile; use File::Temp qw/ tempdir /; @@ -299,15 +301,15 @@ sub scoring { my $rating = 'LQ'; $medal = 1 if(exist_multiplename_checking(\%known_fusion_partners,$fg1)); - $medal = 1 if(exist_multiplename_checking(\%known_fusion_partners,$fg2) && $sv->{ort} eq "?"); - $medal = 2 if(exist_multiplename_checking(\%known_fusion_partners,$fg2) && $sv->{ort} eq ">"); - $medal = 3 if(exist_multiplename_checking(\%known_fusion_partners,$fg1) && exist_multiplename_checking(\%known_fusion_partners,$fg2) && $sv->{type} !~ /Internal/); - $medal = 4 if(exists_knownfusionlist_checking(\%known_fusions,$fg1,$fg2)); + $medal = 1 if(exist_multiplename_checking(\%known_fusion_partners,$fg2) && $sv->{ort} eq "?"); + $medal = 2 if(exist_multiplename_checking(\%known_fusion_partners,$fg2) && $sv->{ort} eq ">"); + $medal = 3 if(exist_multiplename_checking(\%known_fusion_partners,$fg1) && exist_multiplename_checking(\%known_fusion_partners,$fg2) && $sv->{type} !~ /Internal/); + $medal = 4 if(exists_knownfusionlist_checking(\%known_fusions,$fg1,$fg2)); - my ($Is_known_ITD, $ITD_left_coor, $ITD_rightt_coor)=match_known_ITD(\%known_ITDs,$fg1); + my ($Is_known_ITD, $ITD_left_coor, $ITD_right_coor)=match_known_ITD(\%known_ITDs,$fg1); if($Is_known_ITD && $type eq 'Internal_dup' && - ($bp1->{tpos} >= $ITD_left_coor) && ($bp1->{tpos} <= $ITD_rightt_coor) && - ($bp2->{tpos} >= $ITD_left_coor) && ($bp2->{tpos} <= $ITD_rightt_coor)){ + ($bp1->{tpos} >= $ITD_left_coor) && ($bp1->{tpos} <= $ITD_right_coor) && + ($bp2->{tpos} >= $ITD_left_coor) && ($bp2->{tpos} <= $ITD_right_coor)){ $rating = 'HQ'; $medal = 4; } @@ -366,7 +368,6 @@ sub is_dup_SV { } sub exists_knownfusionlist_checking { - my %genelist = %{(shift)}; my $targetgene1 = shift;#e.g. targetgene UBTF,MIR6782 my $targetgene2 = shift;#e.g. targetgene UBTF,MIR6782 my @genes1 = split(/,|\|/, $targetgene1); @@ -382,18 +383,6 @@ sub exists_knownfusionlist_checking { return 0; } -sub exist_multiplename_checking { - my %genelist = %{(shift)}; - my $targetgene = shift;#e.g. targetgene UBTF,MIR6782 - my @genes = split(/,|\|/, $targetgene); - - foreach my $g1 (@genes) { - return 1 if(exists($known_fusion_partners{$g1})); - } - - return 0; -} - sub match_known_ITD { my %knownITDlist = %{(shift)}; my $targetgene = shift;#e.g. targetgene UBTF,MIR6782