Skip to content

Commit 477e1d9

Browse files
committed
Add some downloaded files
1 parent 2702641 commit 477e1d9

19 files changed

+179705
-2
lines changed

atis.fold3.pkl

+178,722
Large diffs are not rendered by default.

conlleval.pl

+327
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,327 @@
1+
#!/usr/bin/perl -w
2+
# conlleval: evaluate result of processing CoNLL-2000 shared task
3+
# usage: conlleval [-l] [-r] [-d delimiterTag] [-o oTag] < file
4+
# README: http://cnts.uia.ac.be/conll2000/chunking/output.html
5+
# options: l: generate LaTeX output for tables like in
6+
# http://cnts.uia.ac.be/conll2003/ner/example.tex
7+
# r: accept raw result tags (without B- and I- prefix;
8+
# assumes one word per chunk)
9+
# d: alternative delimiter tag (default is single space)
10+
# o: alternative outside tag (default is O)
11+
# note: the file should contain lines with items separated
12+
# by $delimiter characters (default space). The final
13+
# two items should contain the correct tag and the
14+
# guessed tag in that order. Sentences should be
15+
# separated from each other by empty lines or lines
16+
# with $boundary fields (default -X-).
17+
# url: http://lcg-www.uia.ac.be/conll2000/chunking/
18+
# started: 1998-09-25
19+
# version: 2004-01-26
20+
# author: Erik Tjong Kim Sang <[email protected]>
21+
22+
use strict;
23+
24+
my $false = 0;
25+
my $true = 42;
26+
27+
my $boundary = "-X-"; # sentence boundary
28+
my $correct; # current corpus chunk tag (I,O,B)
29+
my $correctChunk = 0; # number of correctly identified chunks
30+
my $correctTags = 0; # number of correct chunk tags
31+
my $correctType; # type of current corpus chunk tag (NP,VP,etc.)
32+
my $delimiter = " "; # field delimiter
33+
my $FB1 = 0.0; # FB1 score (Van Rijsbergen 1979)
34+
my $firstItem; # first feature (for sentence boundary checks)
35+
my $foundCorrect = 0; # number of chunks in corpus
36+
my $foundGuessed = 0; # number of identified chunks
37+
my $guessed; # current guessed chunk tag
38+
my $guessedType; # type of current guessed chunk tag
39+
my $i; # miscellaneous counter
40+
my $inCorrect = $false; # currently processed chunk is correct until now
41+
my $lastCorrect = "O"; # previous chunk tag in corpus
42+
my $latex = 0; # generate LaTeX formatted output
43+
my $lastCorrectType = ""; # type of previously identified chunk tag
44+
my $lastGuessed = "O"; # previously identified chunk tag
45+
my $lastGuessedType = ""; # type of previous chunk tag in corpus
46+
my $lastType; # temporary storage for detecting duplicates
47+
my $line; # line
48+
my $nbrOfFeatures = -1; # number of features per line
49+
my $precision = 0.0; # precision score
50+
my $oTag = "O"; # outside tag, default O
51+
my $raw = 0; # raw input: add B to every token
52+
my $recall = 0.0; # recall score
53+
my $tokenCounter = 0; # token counter (ignores sentence breaks)
54+
55+
my %correctChunk = (); # number of correctly identified chunks per type
56+
my %foundCorrect = (); # number of chunks in corpus per type
57+
my %foundGuessed = (); # number of identified chunks per type
58+
59+
my @features; # features on line
60+
my @sortedTypes; # sorted list of chunk type names
61+
62+
# sanity check
63+
while (@ARGV and $ARGV[0] =~ /^-/) {
64+
if ($ARGV[0] eq "-l") { $latex = 1; shift(@ARGV); }
65+
elsif ($ARGV[0] eq "-r") { $raw = 1; shift(@ARGV); }
66+
elsif ($ARGV[0] eq "-d") {
67+
shift(@ARGV);
68+
if (not defined $ARGV[0]) {
69+
die "conlleval: -d requires delimiter character";
70+
}
71+
$delimiter = shift(@ARGV);
72+
} elsif ($ARGV[0] eq "-o") {
73+
shift(@ARGV);
74+
if (not defined $ARGV[0]) {
75+
die "conlleval: -o requires delimiter character";
76+
}
77+
$oTag = shift(@ARGV);
78+
} else { die "conlleval: unknown argument $ARGV[0]\n"; }
79+
}
80+
if (@ARGV) { die "conlleval: unexpected command line argument\n"; }
81+
# process input
82+
while (<STDIN>) {
83+
chomp($line = $_);
84+
@features = split(/$delimiter/,$line);
85+
86+
#printf $line;
87+
#printf STDERR $#features;
88+
#printf "\n";
89+
90+
#printf $nbrOfFeatures;
91+
#printf "\n";
92+
#printf $#features;
93+
#printf "\n";
94+
95+
if ($nbrOfFeatures < 0) { $nbrOfFeatures = $#features; }
96+
elsif ($nbrOfFeatures != $#features and @features != 0) {
97+
printf STDERR "unexpected number of features: %d (%d)\n",
98+
$#features+1,$nbrOfFeatures+1;
99+
exit(1);
100+
}
101+
if (@features == 0 or
102+
$features[0] eq $boundary) { @features = ($boundary,"O","O"); }
103+
if (@features < 2) {
104+
die "conlleval: unexpected number of features in line $line\n";
105+
}
106+
if ($raw) {
107+
if ($features[$#features] eq $oTag) { $features[$#features] = "O"; }
108+
if ($features[$#features-1] eq $oTag) { $features[$#features-1] = "O"; }
109+
if ($features[$#features] ne "O") {
110+
$features[$#features] = "B-$features[$#features]";
111+
}
112+
if ($features[$#features-1] ne "O") {
113+
$features[$#features-1] = "B-$features[$#features-1]";
114+
}
115+
}
116+
# 20040126 ET code which allows hyphens in the types
117+
if ($features[$#features] =~ /^([^-]*)-(.*)$/) {
118+
$guessed = $1;
119+
$guessedType = $2;
120+
} else {
121+
$guessed = $features[$#features];
122+
$guessedType = "";
123+
}
124+
pop(@features);
125+
if ($features[$#features] =~ /^([^-]*)-(.*)$/) {
126+
$correct = $1;
127+
$correctType = $2;
128+
} else {
129+
$correct = $features[$#features];
130+
$correctType = "";
131+
}
132+
pop(@features);
133+
# ($guessed,$guessedType) = split(/-/,pop(@features));
134+
# ($correct,$correctType) = split(/-/,pop(@features));
135+
$guessedType = $guessedType ? $guessedType : "";
136+
$correctType = $correctType ? $correctType : "";
137+
$firstItem = shift(@features);
138+
139+
# 1999-06-26 sentence breaks should always be counted as out of chunk
140+
if ( $firstItem eq $boundary ) { $guessed = "O"; }
141+
142+
if ($inCorrect) {
143+
if ( &endOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) and
144+
&endOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) and
145+
$lastGuessedType eq $lastCorrectType) {
146+
$inCorrect=$false;
147+
$correctChunk++;
148+
$correctChunk{$lastCorrectType} = $correctChunk{$lastCorrectType} ?
149+
$correctChunk{$lastCorrectType}+1 : 1;
150+
} elsif (
151+
&endOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) !=
152+
&endOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) or
153+
$guessedType ne $correctType ) {
154+
$inCorrect=$false;
155+
}
156+
}
157+
158+
if ( &startOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) and
159+
&startOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) and
160+
$guessedType eq $correctType) { $inCorrect = $true; }
161+
162+
if ( &startOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) ) {
163+
$foundCorrect++;
164+
$foundCorrect{$correctType} = $foundCorrect{$correctType} ?
165+
$foundCorrect{$correctType}+1 : 1;
166+
}
167+
if ( &startOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) ) {
168+
$foundGuessed++;
169+
$foundGuessed{$guessedType} = $foundGuessed{$guessedType} ?
170+
$foundGuessed{$guessedType}+1 : 1;
171+
}
172+
if ( $firstItem ne $boundary ) {
173+
if ( $correct eq $guessed and $guessedType eq $correctType ) {
174+
$correctTags++;
175+
}
176+
$tokenCounter++;
177+
}
178+
179+
$lastGuessed = $guessed;
180+
$lastCorrect = $correct;
181+
$lastGuessedType = $guessedType;
182+
$lastCorrectType = $correctType;
183+
}
184+
if ($inCorrect) {
185+
$correctChunk++;
186+
$correctChunk{$lastCorrectType} = $correctChunk{$lastCorrectType} ?
187+
$correctChunk{$lastCorrectType}+1 : 1;
188+
}
189+
190+
if (not $latex) {
191+
# compute overall precision, recall and FB1 (default values are 0.0)
192+
$precision = 100*$correctChunk/$foundGuessed if ($foundGuessed > 0);
193+
$recall = 100*$correctChunk/$foundCorrect if ($foundCorrect > 0);
194+
$FB1 = 2*$precision*$recall/($precision+$recall)
195+
if ($precision+$recall > 0);
196+
197+
# print overall performance
198+
printf "processed $tokenCounter tokens with $foundCorrect phrases; ";
199+
printf "found: $foundGuessed phrases; correct: $correctChunk.\n";
200+
if ($tokenCounter>0) {
201+
printf "accuracy: %6.2f%%; ",100*$correctTags/$tokenCounter;
202+
print "$correctChunk $foundCorrect $foundGuessed ";
203+
printf "precision: %6.2f%%; ",$precision;
204+
printf "recall: %6.2f%%; ",$recall;
205+
printf "FB1: %6.2f\n",$FB1;
206+
}
207+
}
208+
209+
# sort chunk type names
210+
undef($lastType);
211+
@sortedTypes = ();
212+
foreach $i (sort (keys %foundCorrect,keys %foundGuessed)) {
213+
if (not($lastType) or $lastType ne $i) {
214+
push(@sortedTypes,($i));
215+
}
216+
$lastType = $i;
217+
}
218+
# print performance per chunk type
219+
if (not $latex) {
220+
for $i (@sortedTypes) {
221+
$correctChunk{$i} = $correctChunk{$i} ? $correctChunk{$i} : 0;
222+
if (not($foundGuessed{$i})) { $foundGuessed{$i} = 0; $precision = 0.0; }
223+
else { $precision = 100*$correctChunk{$i}/$foundGuessed{$i}; }
224+
if (not($foundCorrect{$i})) { $recall = 0.0; }
225+
else { $recall = 100*$correctChunk{$i}/$foundCorrect{$i}; }
226+
if ($precision+$recall == 0.0) { $FB1 = 0.0; }
227+
else { $FB1 = 2*$precision*$recall/($precision+$recall); }
228+
printf "%17s: ",$i;
229+
printf "% 4d % 4d % 4d ", $correctChunk{$i}, $foundCorrect{$i}, $foundGuessed{$i};
230+
printf "precision: %6.2f%%; ",$precision;
231+
printf "recall: %6.2f%%; ",$recall;
232+
printf "FB1: %6.2f %d\n",$FB1,$foundGuessed{$i};
233+
}
234+
} else {
235+
print " & Precision & Recall & F\$_{\\beta=1} \\\\\\hline";
236+
for $i (@sortedTypes) {
237+
$correctChunk{$i} = $correctChunk{$i} ? $correctChunk{$i} : 0;
238+
if (not($foundGuessed{$i})) { $precision = 0.0; }
239+
else { $precision = 100*$correctChunk{$i}/$foundGuessed{$i}; }
240+
if (not($foundCorrect{$i})) { $recall = 0.0; }
241+
else { $recall = 100*$correctChunk{$i}/$foundCorrect{$i}; }
242+
if ($precision+$recall == 0.0) { $FB1 = 0.0; }
243+
else { $FB1 = 2*$precision*$recall/($precision+$recall); }
244+
printf "\n%-7s & %6.2f\\%% & %6.2f\\%% & %6.2f \\\\",
245+
$i,$precision,$recall,$FB1;
246+
}
247+
print "\\hline\n";
248+
$precision = 0.0;
249+
$recall = 0;
250+
$FB1 = 0.0;
251+
$precision = 100*$correctChunk/$foundGuessed if ($foundGuessed > 0);
252+
$recall = 100*$correctChunk/$foundCorrect if ($foundCorrect > 0);
253+
$FB1 = 2*$precision*$recall/($precision+$recall)
254+
if ($precision+$recall > 0);
255+
printf "Overall & %6.2f\\%% & %6.2f\\%% & %6.2f \\\\\\hline\n",
256+
$precision,$recall,$FB1;
257+
}
258+
259+
exit 0;
260+
261+
# endOfChunk: checks if a chunk ended between the previous and current word
262+
# arguments: previous and current chunk tags, previous and current types
263+
# note: this code is capable of handling other chunk representations
264+
# than the default CoNLL-2000 ones, see EACL'99 paper of Tjong
265+
# Kim Sang and Veenstra http://xxx.lanl.gov/abs/cs.CL/9907006
266+
267+
sub endOfChunk {
268+
my $prevTag = shift(@_);
269+
my $tag = shift(@_);
270+
my $prevType = shift(@_);
271+
my $type = shift(@_);
272+
my $chunkEnd = $false;
273+
274+
if ( $prevTag eq "B" and $tag eq "B" ) { $chunkEnd = $true; }
275+
if ( $prevTag eq "B" and $tag eq "O" ) { $chunkEnd = $true; }
276+
if ( $prevTag eq "I" and $tag eq "B" ) { $chunkEnd = $true; }
277+
if ( $prevTag eq "I" and $tag eq "O" ) { $chunkEnd = $true; }
278+
279+
if ( $prevTag eq "E" and $tag eq "E" ) { $chunkEnd = $true; }
280+
if ( $prevTag eq "E" and $tag eq "I" ) { $chunkEnd = $true; }
281+
if ( $prevTag eq "E" and $tag eq "O" ) { $chunkEnd = $true; }
282+
if ( $prevTag eq "I" and $tag eq "O" ) { $chunkEnd = $true; }
283+
284+
if ($prevTag ne "O" and $prevTag ne "." and $prevType ne $type) {
285+
$chunkEnd = $true;
286+
}
287+
288+
# corrected 1998-12-22: these chunks are assumed to have length 1
289+
if ( $prevTag eq "]" ) { $chunkEnd = $true; }
290+
if ( $prevTag eq "[" ) { $chunkEnd = $true; }
291+
292+
return($chunkEnd);
293+
}
294+
295+
# startOfChunk: checks if a chunk started between the previous and current word
296+
# arguments: previous and current chunk tags, previous and current types
297+
# note: this code is capable of handling other chunk representations
298+
# than the default CoNLL-2000 ones, see EACL'99 paper of Tjong
299+
# Kim Sang and Veenstra http://xxx.lanl.gov/abs/cs.CL/9907006
300+
301+
sub startOfChunk {
302+
my $prevTag = shift(@_);
303+
my $tag = shift(@_);
304+
my $prevType = shift(@_);
305+
my $type = shift(@_);
306+
my $chunkStart = $false;
307+
308+
if ( $prevTag eq "B" and $tag eq "B" ) { $chunkStart = $true; }
309+
if ( $prevTag eq "I" and $tag eq "B" ) { $chunkStart = $true; }
310+
if ( $prevTag eq "O" and $tag eq "B" ) { $chunkStart = $true; }
311+
if ( $prevTag eq "O" and $tag eq "I" ) { $chunkStart = $true; }
312+
313+
if ( $prevTag eq "E" and $tag eq "E" ) { $chunkStart = $true; }
314+
if ( $prevTag eq "E" and $tag eq "I" ) { $chunkStart = $true; }
315+
if ( $prevTag eq "O" and $tag eq "E" ) { $chunkStart = $true; }
316+
if ( $prevTag eq "O" and $tag eq "I" ) { $chunkStart = $true; }
317+
318+
if ($tag ne "O" and $tag ne "." and $prevType ne $type) {
319+
$chunkStart = $true;
320+
}
321+
322+
# corrected 1998-12-22: these chunks are assumed to have length 1
323+
if ( $tag eq "[" ) { $chunkStart = $true; }
324+
if ( $tag eq "]" ) { $chunkStart = $true; }
325+
326+
return($chunkStart);
327+
}

elman_forward/W.npy

2.42 KB
Binary file not shown.

elman_forward/Wh.npy

78.2 KB
Binary file not shown.

elman_forward/Wx.npy

547 KB
Binary file not shown.

elman_forward/b.npy

104 Bytes
Binary file not shown.

elman_forward/bh.npy

880 Bytes
Binary file not shown.

elman_forward/embeddings.npy

18.3 MB
Binary file not shown.

elman_forward/h0.npy

880 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)