-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathset_xml_for_lipidmaps_classif.pl
executable file
·291 lines (250 loc) · 9.02 KB
/
set_xml_for_lipidmaps_classif.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
#! perl
use strict ;
no strict "refs" ;
use warnings ;
use Carp qw (cluck croak carp) ;
use Data::Dumper ;
use XML::Twig;
use Getopt::Long ;
use XML::Writer;
use IO::File;
## Initialized values
my ( $OptHelp, $OptTaxFile, $OptLevels, $OptOutput ) = (undef, 'E:\\TESTs\\lipidmaps\\2013_lm_classif.txt' ,3, 'E:\\TESTs\\lipidmaps\\2013_lm_classif_conf.xml' ) ;
#my ( $OptHelp, $OptTaxFile, $OptLevels, $OptOutput ) = (undef, undef, undef, undef ) ;
&GetOptions ( "help|h" => \$OptHelp, # HELP
"tax:s" => \$OptTaxFile, ## option : path to the taxonomy file
"levels:i" => \$OptLevels, ## option : level 1 = create the category level | level 2 = create category and class levels | level 3 = create cat/class and subclass levels
"output|o:s"=> \$OptOutput, ## Output file containing tags for galaxy xml file
) ;
#=============================================================================
# EXCEPTIONS
#=============================================================================
$OptHelp and &Help ;
## Conf file
# N/A
my %Taxonomy = () ;
my $i = 0 ; # nb of categories
my $test_ifclasse = 0 ; # play if a class exist
my $current_cat = undef ;
my $current_cl = undef ;
my @Classes = () ;
my ( @cat, @cl, @subcl ) = ( (), (), () ) ; ##
## Existence des parametres d'entrées :
if ( ( defined $OptTaxFile ) and ( defined $OptLevels ) ) {
if (-e $OptTaxFile ) {
open (TAX, "<$OptTaxFile") or die "Can't open $OptTaxFile\n" ;
while (<TAX>) {
chomp $_ ;
## Parsing steps
if ( ( $OptLevels == 1 ) or ( $OptLevels > 1 ) ) {
my $new_cat = &catch_categories($_) ;
if (defined $$new_cat) { push ( @cat, $new_cat ) ; }
}
if ( ( $OptLevels == 2 ) or ( $OptLevels > 2 ) ) {
my $new_cl = &catch_classes($_) ;
if (defined $$new_cl) { push ( @cl, $new_cl ) ; }
}
if ( $OptLevels == 3 ) {
my $new_subcl = &catch_subclasses($_) ;
if (defined $$new_subcl) { push ( @subcl, $new_subcl ) ; }
}
} ## end of while
# print "===> CAT :\n";
# print Dumper @cat ;
# print "\n===> CLASSES :\n";
# print Dumper @cl ;
# print "\n===> SUBCLASSES :\n";
# print Dumper @subcl ;
close (TAX) ;
&write_xml_conf( $OptOutput, \@cat, \@cl, \@subcl ) ;
if (-e $OptOutput ) {
print "End of Generation : please open the file $OptOutput and copy paste content\n" ;
}
}
else {
croak "No taxonomy file is available in $OptTaxFile\n" ;
}
}
else {
&Help ;
croak "Some Parameters are not defined (-tax and -levels )\n" ;
}
## Fonction : catch any entry formatting like a category. Ex: category_name [XX]
## Input : $entry, list of already found categories
## Ouput : list of updated categories
sub catch_categories {
## Retrieve Values
my ( $entry ) = @_;
my @cats = undef ;
my $new_cat = undef ;
if ( defined $entry ) {
if ( $entry =~ /([\s|\w]+)\s+\[([A-Z]+)\]/ ) { ## ex: Glycerolipids [GL]
$new_cat = $entry ;
}
}
else {
croak "Can't parse any entry for catching any category\n" ;
}
return(\$new_cat) ;
}
### END of SUB
## Fonction : catch any entry formatting like a class. Ex: class_name [XXnn]
## Input : $entry, list of already found classes
## Ouput : list of updated classes
sub catch_classes {
## Retrieve Values
my ( $entry, $cl_ref ) = @_;
my $new_cl = undef ;
if ( defined $entry ) {
if ( $entry =~ /(.*)\s+\[(\w{2})(\d{2})\]/ ) { ## ex: Diradylglycerols [GL02]
$new_cl = $entry ;
}
}
else {
croak "Can't parse any entry for catching any class\n" ;
}
return(\$new_cl) ;
}
### END of SUB
## Fonction : catch any entry formatting like a subclass. Ex: subclass_name [XXnnnn]
## Input : $entry, list of already found subclasses
## Ouput : list of updated subclasses
sub catch_subclasses {
## Retrieve Values
my ( $entry, $subcl_ref ) = @_;
my $new_subcl = undef ;
if ( defined $entry ) {
if ( $entry =~ /(.*)\s+\[(\w{2})(\d{4})\]/ ) { ## ex: 1-acyl,2-alkylglycerols [GL0207]
$new_subcl = $entry ;
}
}
else {
croak "Can't parse any entry for catching any subclass\n" ;
}
return(\$new_subcl) ;
}
### END of SUB
## Fonction :
## Input :
## Ouput :
sub write_xml_conf {
## Retrieve Values
my ( $output, $cats, $cls, $subcls ) = @_;
my ( @cat_ids, @cl_ids ) = ( (), () ) ;
my $xml = new IO::File(">$output");
my $writer = new XML::Writer(
OUTPUT => $xml,
DATA_INDENT => 3, # indentation, trois espaces
DATA_MODE => 1, # changement ligne.
ENCODING => 'utf-8',
);
$writer->xmlDecl("UTF-8");
$writer->startTag("conditional", "name" => "select_cat" );
## START CAT PART --------------------------------------
$writer->startTag("param", "name" => "filter_cat", "label" => "Select a Lipid category for your query ", "type" => "select" );
if ( scalar @{$cats} > 0 ) {
my $nb_cat = 0 ;
## FOREACH CAT
foreach my $cat ( @{$cats} ) {
if ( $$cat =~ /(.*)\[([A-Z]+)\]/) {
$i++ ;
push( @cat_ids, $2 ) ;
$writer->startTag("option", "value" => $i ) ;
$writer->characters($$cat);
$writer->endTag("option");
}
} ## end foreach cat
}
else {
carp "The ref cat list is empty\n" ;
}
$writer->endTag("param");
## END CAT PART ----------------------------------------
## START CLASSES PART ----------------------------------
if ( scalar @{$cls} > 0 ) {
my $nb_cat = 0 ;
foreach my $cat_id ( @cat_ids ) {
$nb_cat++ ;
@cl_ids = () ;
$writer->startTag("when", "value" => $nb_cat ) ;
$writer->startTag("conditional", "name" => "select_cat" );
$writer->startTag("param", "name" => "filter_class", "label" => "Select a Lipid main class for your query ", "type" => "select" );
## FOREACH CLASSE
foreach my $cl ( @{$cls} ) {
if ( $$cl =~ /(.*)\[$cat_id(\d{2})\]/) {
push (@cl_ids, $2) ;
$writer->startTag("option", "value" => $nb_cat.$2 ) ;
$writer->characters($$cl);
$writer->endTag("option");
}
}
## add the possibility of No used class
$writer->startTag("option", "value" => "NA_".$nb_cat, "selected" => "True" ) ;
$writer->characters("No main class selected");
$writer->endTag("option");
$writer->endTag("param");
## START SUBCLASSES
if ( scalar @{$subcls} > 0 ) {
foreach my $cl_id ( @cl_ids ) {
$writer->startTag("when", "value" => $nb_cat.$cl_id ) ;
$writer->startTag("conditional", "name" => "select_subclass" ) ;
$writer->startTag("param", "name" => "filter_subclass", "label" => "Select a Lipid subclass for your query ", "type" => "select" );
## FOREACH CLASSE
my $sub_cl_test = 0 ;
foreach my $subcl ( @{$subcls} ) {
if ( $$subcl =~ /(.*)\[$cat_id$cl_id(\d{2})\]/) {
$sub_cl_test = 1 ;
$writer->startTag("option", "value" => $nb_cat.$cl_id.$2 ) ;
$writer->characters($$subcl);
$writer->endTag("option");
}
}
## if subclasses exists
if ( $sub_cl_test == 0 ) {
$writer->startTag("option", "value" => "NA_".$nb_cat.$cl_id ) ;
$writer->characters("No subclass available");
$writer->endTag("option");
}
else {
## add the possibility of No used class
$writer->startTag("option", "value" => "NA_".$nb_cat.$cl_id, "selected" => "True" ) ;
$writer->characters("No subclass selected");
$writer->endTag("option");
}
$writer->endTag("param");
$writer->endTag("conditional") ;
$writer->endTag("when") ;
} ## end foreach id
}
## END SUBCLASSES
$writer->endTag("conditional") ;
$writer->endTag("when") ;
} ## end foreach id
}
## END CLASSES PART ----------------------------------------
$writer->endTag("conditional") ;
return() ;
}
### END of SUB
#====================================================================================
# Help subroutine called with -h option
# number of arguments : 0
# Argument(s) :
# Return : 1
#====================================================================================
sub Help {
print STDERR "
createLipidmapsTax
# createLipidmapsTax is a script to process lipidmpas taxonomy file and return a list of xml tags for Galaxy web interface.
# Input : a file (TXT format)
# Author : Franck Giacomoni
# Email : fgiacomoni\@clermont.inra.fr
# Version : 1.0
# Created : 29/10/2012
USAGE :
createLipidmapsTax.pl -tax [path to input txt file] -levels [1|2|3] (1 for categories only, 2 for categories+classes and 3 for categories+classes+subclasses)
or createLipidmapsTax.pl -help
";
exit(1);
}
## END of script - F Giacomoni