forked from The-Sequence-Ontology/GAL
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfeature_overlaps
executable file
·96 lines (76 loc) · 2.37 KB
/
feature_overlaps
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
use Set::IntSpan::Fast;
#-----------------------------------------------------------------------------
#----------------------------------- MAIN ------------------------------------
#-----------------------------------------------------------------------------
my $usage = "
Synopsis:
feature_overlaps file1.gff file2.gff
Description:
Calculate various feature overlap statistics from a set of gff files.
Options:
--pad 1000
Pad the features by the given amount in each direction.
";
my ($help, $pad);
my $opt_success = GetOptions('help' => \$help,
'pad=i' => \$pad,
);
if (! $opt_success) {
print STDERR join ' : ', ('FATAL',
'command_line_parse_error',
'Use gal_feature_overlaps --help to see correct usage');
}
if ($help || !@ARGV) {
print $usage;
exit(0);
}
my @files = @ARGV ;
die $usage if grep {! -r $_} @files;
my %file_sets;
my %merged_set;
for my $file (@files) {
open (my $IN, '<', $file) or die "Can't open $file for reading\n$!\n";
my %file_set;
while (<$IN>) {
next if /^(\#|\s)/;
last if /^\#\#FASTA/;
my ($seqid, $source, $type, $start, $end) = split /\t/, $_;
$file_set{$seqid} ||= Set::IntSpan::Fast->new();
$file_set{$seqid}->add_range($start, $end);
if ($pad) {
$start -= $pad;
$start = $start < 0 ? 0 : $start;
$end += $pad;
}
$merged_set{$seqid} ||= Set::IntSpan::Fast->new();
$merged_set{$seqid}->add_range($start, $end);
}
$file_sets{$file} = \%file_set;
}
my %data;
for my $seqid (sort keys %merged_set) {
my $iterator = $merged_set{$seqid}->iterate_runs();
while (my ( $start, $end ) = $iterator->()) {
my $locus = join ':', ($seqid, $start, $end);
for my $file (sort keys %file_sets) {
my $set = $file_sets{$file}{$seqid};
next unless $set;
if ($set->contains_any($start .. $end)) {
push @{$data{$locus}}, $file;
}
}
}
}
for my $locus (sort keys %data) {
my $count = scalar @{$data{$locus}};
my $list = join ',', @{$data{$locus}};
print join "\t", $locus, $count, $list;
print "\n";
}
#-----------------------------------------------------------------------------
#-------------------------------- SUBROUTINES --------------------------------
#-----------------------------------------------------------------------------