-
Notifications
You must be signed in to change notification settings - Fork 4
/
SetFASTARowLength.pl
executable file
·129 lines (99 loc) · 2.53 KB
/
SetFASTARowLength.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/perl
# This script makes sure that the sequence of a FASTA file is in rows of defined length (or less
# for the last row).
#USAGE: perl SetFASTARowLength.pl [IN FASTA FILE] [OUT FASTA FILE] [ROW LENGTH]
############################################
# UPDATE HISTORY AND LIST OF THINGS TO FIX #
############################################
#14.04.01
# - Wrote script.
#
#15.01.28
# - Added in removal of all blank lines to conform with 'samtools faidx'.
#
#15.03.26
# - Added subroutines to script so it no longer needs a common subroutine file.
#
#15.03.27
# - Added in use of ANSIColor module.
#
###############
# SUBROUTINES #
###############
use Term::ANSIColor;
sub fasta2hash {
my($file) = @_;
my @FASTA;
my $line;
my @line2;
my @line3;
my %fastahash;
my $curhead;
my $seq = "";
my $tmp;
#Open the FASTA file and store it in an array.
open (LIST, "$file");
@FASTA = <LIST>;
close LIST;
#Now go through the FASTA an store '>' lines as KEYS and sequence as VALUES.
foreach $line (@FASTA) {
chomp($line);
if(($line =~ />/) && ($seq eq "")) { #Here's what we do with the first header.
@line2 = split(/>/, $line);
@line3 = split(/ /, $line2[1]);
$curhead = $line3[0];
}
if($line !~ />/) { #Here's what we do with sequence lines.
@line2 = split(/>/, $line);
$seq .= $line;
}
if(($line =~ />/) && ($seq ne "")) { #Here's what we do with the subsequent headers.
$fastahash{$curhead} = $seq;
$seq = "";
@line2 = split(/>/, $line);
@line3 = split(/ /, $line2[1]);
$curhead = $line3[0];
}
}
$fastahash{$curhead} = $seq; #The final FASTA seq will be put in here.
return %fastahash;
}
if(($help == 1) || ($genome eq "") || ($bed eq "") || ($out eq "")) {
print colored['bright_red'], '
This script makes sure that the sequence of a FASTA file is in rows of defined length (or
less for the last row).
USAGE: perl SetFASTARowLength.pl [IN FASTA FILE] [OUT FASTA FILE] [ROW LENGTH]
Options and formatting are as follows:
--help or --h
Print this text.
';
exit;
}
##########
# SCRIPT #
##########
$len = $ARGV[2];
%fasta = fasta2hash($ARGV[0]);
@keys = sort(keys %fasta);
open(OUT, ">$ARGV[1]\_tmp");
$cnt = 1;
foreach $key (@keys) {
print OUT ">$key\n";
@temp = ();
@temp = split('', $fasta{$key});
$j = 1;
for($i = 0; $i < scalar(@temp); ++$i) {
if($j < $len) {
print OUT "$temp[$i]";
}
if($j == $len) {
print OUT "$temp[$i]\n";
$j = 0;
}
++$j;
}
print OUT "\n";
}
close OUT;
system "sed '/^\$/d' $ARGV[1]\_tmp > $ARGV[1]";
system "rm $ARGV[1]\_tmp";