forked from The-Sequence-Ontology/GAL
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfeature2gff
executable file
·123 lines (96 loc) · 2.94 KB
/
feature2gff
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
use Bio::DB::Fasta;
use FindBin;
use lib "$FindBin::RealBin/../lib";
use GAL::Parser;
use GAL::Writer::GFF3;
#-----------------------------------------------------------------------------
#----------------------------------- MAIN ------------------------------------
#-----------------------------------------------------------------------------
my $usage = "
Synopsis:
feature2gff --parser gigabayes --fasta /path/to/reference gigabayes_output.gff
Description:
Convert various sequence feature formats to GFF3/GVF.
Options:
--parser The format for the feature file - a subclass of GAL::Parser e.g.
--fasta Some parsers (such as those producing GVF from minimal input
data) need access to the reference sequence. This argument
points to the path where the reference sequences are located.
--pragma A file that contains pragmas to include at the begining of the GFF
output.
Notes:
Following are some of the parsers that are a part of GAL, but please see
GAL/lib/GAL/Parser for a complete list.
basic_snp
complete_genomics
cosmic
dbsnp_flat
gff3
illumina_indel
illumina_snp
maq_cns2snp
na18507_sanger_indel
quake
samtools_pileup
soap_indel
soap_snp
solid
template
template_sequence_alteration
venter_indel
venter_snp
watson_cshl
";
my ($help, $parser, $fasta, $pragma);
my $opt_success = GetOptions('help' => \$help,
'parser=s' => \$parser,
'fasta=s' => \$fasta,
'pragma=s' => \$pragma,
);
if (! $opt_success) {
print STDERR join ' : ', ('FATAL',
'command_line_parse_error',
'Use feature2gff --help to see correct usage');
}
if ($help || !@ARGV) {
print $usage;
exit(0);
}
my @files = @ARGV;
if (! $parser) {
(my $include_path = $INC{'GAL/Parser.pm'}) =~ s/.pm$//; # should be 'lib/GAL/Parser/'
my @parsers = glob "$include_path/*";
@parsers = grep {$_ !~ /[\#\~]/} @parsers;
map {s/^.*\///;s/\.pm$//;} @parsers;
my $list = join "\n", sort @parsers;
#$list =~ s/{(.{50,})\s/$1\n/;
print STDERR join ' : ', ('WARN',
'parser_class_not_given',
'Check the following list (or GAL/lib/GAL/Parser/).',
);
print "\n$list\n";
die;
}
my $class = "GAL::Parser::$parser";
eval "require $class";
die $@ if $@;
if ($pragma && -r $pragma) {
open(my $IN, '<', $pragma) or die "Can't open $pragma for reading\n";
print (<$IN>);
close $IN;
}
for my $file (@files) {
my $parser = $class->new(file => $file);
$parser->fasta($fasta) if $fasta;
my $writer = GAL::Writer::GFF3->new();
while (my $feature = $parser->next_feature_hash) {
$writer->write_feature($feature);
}
}
#-----------------------------------------------------------------------------
#------------------------------- SUBROUTINES ---------------------------------
#-----------------------------------------------------------------------------