-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrebase.pl
84 lines (81 loc) · 1.68 KB
/
rebase.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# Author: Lalitha Viswanathan
# Affilition: Tata Consultancy Services
$arg1=@ARGV[0];
open(fh,$arg1);
open(fhw,">rebaseparsed.txt");
#levae the first 118 lines
for($i=0;$i<118;$i++)
{
$waste=<fh>;
}
$"="\t";
while($waste)
{
chomp($waste);
if($waste =~m/</)
{
$caret=0;
for($x=1;$x<=8;$x++)
{
chomp($waste);
#remove <1> <2>..... from line
$waste=~s/(<\d>)|(\s)//g;
#in the fifth posn is the sqeuence code, if its a question mark , no processing further
if($x==1)
{
print fhw $waste."\t";
}
elsif($x==5)
{
#if the sequence is ?, then no further processig
if($waste =~ m/\?/)
{
print fhw "0\t0\t$waste\t0\t0\t0\n";
}
else
{
$temp=$waste;
#in temp replace brackets and \ sign with spaces
$temp=~ s/([()\/])/ /g;
@arr1=split(/ /,$temp);
# in waste, remove everthing other than sequence with carets
#in waste hunt for the caret, if any
$waste=~ s/([()\/-])|(\d)|(\^M)//g;
#print $waste;
#exit(1);
for($e=0;$e<length($waste);$e++)
{
if(substr($waste,$e,1)eq '^')
{
#print $e;
$caret=$e+1;
}
}
#after locating caret, eliminate it
$waste=~ s/\^//g;
#arr1 is done to obtain the posns in a seq((x/y)<seq(with ot w/o a caret)(w/z)>)
if((scalar(@arr1)==3)||(scalar(@arr1)==1))
{
print fhw "0\t0\t";
}
for($i=0;$i<scalar(@arr1);$i++)
{
#we have already located caret, hence we can eliminate it n write seq to file
if($arr1[$i]=~ m/\^/)
{$arr1[$i]=~s/\^//g;}
print fhw $arr1[$i]."\t";
}
if(scalar(@arr1)==1)
{
print fhw "0\t0\t";}
print fhw $caret."\n";
}
}
$waste=<fh>;
}
}
else
{
$waste=<fh>;
}
}