|
| 1 | +#!/usr/bin/perl -w |
| 2 | +# conlleval: evaluate result of processing CoNLL-2000 shared task |
| 3 | +# usage: conlleval [-l] [-r] [-d delimiterTag] [-o oTag] < file |
| 4 | +# README: http://cnts.uia.ac.be/conll2000/chunking/output.html |
| 5 | +# options: l: generate LaTeX output for tables like in |
| 6 | +# http://cnts.uia.ac.be/conll2003/ner/example.tex |
| 7 | +# r: accept raw result tags (without B- and I- prefix; |
| 8 | +# assumes one word per chunk) |
| 9 | +# d: alternative delimiter tag (default is single space) |
| 10 | +# o: alternative outside tag (default is O) |
| 11 | +# note: the file should contain lines with items separated |
| 12 | +# by $delimiter characters (default space). The final |
| 13 | +# two items should contain the correct tag and the |
| 14 | +# guessed tag in that order. Sentences should be |
| 15 | +# separated from each other by empty lines or lines |
| 16 | +# with $boundary fields (default -X-). |
| 17 | +# url: http://lcg-www.uia.ac.be/conll2000/chunking/ |
| 18 | +# started: 1998-09-25 |
| 19 | +# version: 2004-01-26 |
| 20 | +# author: Erik Tjong Kim Sang <[email protected]> |
| 21 | + |
| 22 | +use strict; |
| 23 | + |
| 24 | +my $false = 0; |
| 25 | +my $true = 42; |
| 26 | + |
| 27 | +my $boundary = "-X-"; # sentence boundary |
| 28 | +my $correct; # current corpus chunk tag (I,O,B) |
| 29 | +my $correctChunk = 0; # number of correctly identified chunks |
| 30 | +my $correctTags = 0; # number of correct chunk tags |
| 31 | +my $correctType; # type of current corpus chunk tag (NP,VP,etc.) |
| 32 | +my $delimiter = " "; # field delimiter |
| 33 | +my $FB1 = 0.0; # FB1 score (Van Rijsbergen 1979) |
| 34 | +my $firstItem; # first feature (for sentence boundary checks) |
| 35 | +my $foundCorrect = 0; # number of chunks in corpus |
| 36 | +my $foundGuessed = 0; # number of identified chunks |
| 37 | +my $guessed; # current guessed chunk tag |
| 38 | +my $guessedType; # type of current guessed chunk tag |
| 39 | +my $i; # miscellaneous counter |
| 40 | +my $inCorrect = $false; # currently processed chunk is correct until now |
| 41 | +my $lastCorrect = "O"; # previous chunk tag in corpus |
| 42 | +my $latex = 0; # generate LaTeX formatted output |
| 43 | +my $lastCorrectType = ""; # type of previously identified chunk tag |
| 44 | +my $lastGuessed = "O"; # previously identified chunk tag |
| 45 | +my $lastGuessedType = ""; # type of previous chunk tag in corpus |
| 46 | +my $lastType; # temporary storage for detecting duplicates |
| 47 | +my $line; # line |
| 48 | +my $nbrOfFeatures = -1; # number of features per line |
| 49 | +my $precision = 0.0; # precision score |
| 50 | +my $oTag = "O"; # outside tag, default O |
| 51 | +my $raw = 0; # raw input: add B to every token |
| 52 | +my $recall = 0.0; # recall score |
| 53 | +my $tokenCounter = 0; # token counter (ignores sentence breaks) |
| 54 | + |
| 55 | +my %correctChunk = (); # number of correctly identified chunks per type |
| 56 | +my %foundCorrect = (); # number of chunks in corpus per type |
| 57 | +my %foundGuessed = (); # number of identified chunks per type |
| 58 | + |
| 59 | +my @features; # features on line |
| 60 | +my @sortedTypes; # sorted list of chunk type names |
| 61 | + |
| 62 | +# sanity check |
| 63 | +while (@ARGV and $ARGV[0] =~ /^-/) { |
| 64 | + if ($ARGV[0] eq "-l") { $latex = 1; shift(@ARGV); } |
| 65 | + elsif ($ARGV[0] eq "-r") { $raw = 1; shift(@ARGV); } |
| 66 | + elsif ($ARGV[0] eq "-d") { |
| 67 | + shift(@ARGV); |
| 68 | + if (not defined $ARGV[0]) { |
| 69 | + die "conlleval: -d requires delimiter character"; |
| 70 | + } |
| 71 | + $delimiter = shift(@ARGV); |
| 72 | + } elsif ($ARGV[0] eq "-o") { |
| 73 | + shift(@ARGV); |
| 74 | + if (not defined $ARGV[0]) { |
| 75 | + die "conlleval: -o requires delimiter character"; |
| 76 | + } |
| 77 | + $oTag = shift(@ARGV); |
| 78 | + } else { die "conlleval: unknown argument $ARGV[0]\n"; } |
| 79 | +} |
| 80 | +if (@ARGV) { die "conlleval: unexpected command line argument\n"; } |
| 81 | +# process input |
| 82 | +while (<STDIN>) { |
| 83 | + chomp($line = $_); |
| 84 | + @features = split(/$delimiter/,$line); |
| 85 | + |
| 86 | + #printf $line; |
| 87 | + #printf STDERR $#features; |
| 88 | + #printf "\n"; |
| 89 | + |
| 90 | + #printf $nbrOfFeatures; |
| 91 | + #printf "\n"; |
| 92 | + #printf $#features; |
| 93 | + #printf "\n"; |
| 94 | + |
| 95 | + if ($nbrOfFeatures < 0) { $nbrOfFeatures = $#features; } |
| 96 | + elsif ($nbrOfFeatures != $#features and @features != 0) { |
| 97 | + printf STDERR "unexpected number of features: %d (%d)\n", |
| 98 | + $#features+1,$nbrOfFeatures+1; |
| 99 | + exit(1); |
| 100 | + } |
| 101 | + if (@features == 0 or |
| 102 | + $features[0] eq $boundary) { @features = ($boundary,"O","O"); } |
| 103 | + if (@features < 2) { |
| 104 | + die "conlleval: unexpected number of features in line $line\n"; |
| 105 | + } |
| 106 | + if ($raw) { |
| 107 | + if ($features[$#features] eq $oTag) { $features[$#features] = "O"; } |
| 108 | + if ($features[$#features-1] eq $oTag) { $features[$#features-1] = "O"; } |
| 109 | + if ($features[$#features] ne "O") { |
| 110 | + $features[$#features] = "B-$features[$#features]"; |
| 111 | + } |
| 112 | + if ($features[$#features-1] ne "O") { |
| 113 | + $features[$#features-1] = "B-$features[$#features-1]"; |
| 114 | + } |
| 115 | + } |
| 116 | + # 20040126 ET code which allows hyphens in the types |
| 117 | + if ($features[$#features] =~ /^([^-]*)-(.*)$/) { |
| 118 | + $guessed = $1; |
| 119 | + $guessedType = $2; |
| 120 | + } else { |
| 121 | + $guessed = $features[$#features]; |
| 122 | + $guessedType = ""; |
| 123 | + } |
| 124 | + pop(@features); |
| 125 | + if ($features[$#features] =~ /^([^-]*)-(.*)$/) { |
| 126 | + $correct = $1; |
| 127 | + $correctType = $2; |
| 128 | + } else { |
| 129 | + $correct = $features[$#features]; |
| 130 | + $correctType = ""; |
| 131 | + } |
| 132 | + pop(@features); |
| 133 | +# ($guessed,$guessedType) = split(/-/,pop(@features)); |
| 134 | +# ($correct,$correctType) = split(/-/,pop(@features)); |
| 135 | + $guessedType = $guessedType ? $guessedType : ""; |
| 136 | + $correctType = $correctType ? $correctType : ""; |
| 137 | + $firstItem = shift(@features); |
| 138 | + |
| 139 | + # 1999-06-26 sentence breaks should always be counted as out of chunk |
| 140 | + if ( $firstItem eq $boundary ) { $guessed = "O"; } |
| 141 | + |
| 142 | + if ($inCorrect) { |
| 143 | + if ( &endOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) and |
| 144 | + &endOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) and |
| 145 | + $lastGuessedType eq $lastCorrectType) { |
| 146 | + $inCorrect=$false; |
| 147 | + $correctChunk++; |
| 148 | + $correctChunk{$lastCorrectType} = $correctChunk{$lastCorrectType} ? |
| 149 | + $correctChunk{$lastCorrectType}+1 : 1; |
| 150 | + } elsif ( |
| 151 | + &endOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) != |
| 152 | + &endOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) or |
| 153 | + $guessedType ne $correctType ) { |
| 154 | + $inCorrect=$false; |
| 155 | + } |
| 156 | + } |
| 157 | + |
| 158 | + if ( &startOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) and |
| 159 | + &startOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) and |
| 160 | + $guessedType eq $correctType) { $inCorrect = $true; } |
| 161 | + |
| 162 | + if ( &startOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) ) { |
| 163 | + $foundCorrect++; |
| 164 | + $foundCorrect{$correctType} = $foundCorrect{$correctType} ? |
| 165 | + $foundCorrect{$correctType}+1 : 1; |
| 166 | + } |
| 167 | + if ( &startOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) ) { |
| 168 | + $foundGuessed++; |
| 169 | + $foundGuessed{$guessedType} = $foundGuessed{$guessedType} ? |
| 170 | + $foundGuessed{$guessedType}+1 : 1; |
| 171 | + } |
| 172 | + if ( $firstItem ne $boundary ) { |
| 173 | + if ( $correct eq $guessed and $guessedType eq $correctType ) { |
| 174 | + $correctTags++; |
| 175 | + } |
| 176 | + $tokenCounter++; |
| 177 | + } |
| 178 | + |
| 179 | + $lastGuessed = $guessed; |
| 180 | + $lastCorrect = $correct; |
| 181 | + $lastGuessedType = $guessedType; |
| 182 | + $lastCorrectType = $correctType; |
| 183 | +} |
| 184 | +if ($inCorrect) { |
| 185 | + $correctChunk++; |
| 186 | + $correctChunk{$lastCorrectType} = $correctChunk{$lastCorrectType} ? |
| 187 | + $correctChunk{$lastCorrectType}+1 : 1; |
| 188 | +} |
| 189 | + |
| 190 | +if (not $latex) { |
| 191 | + # compute overall precision, recall and FB1 (default values are 0.0) |
| 192 | + $precision = 100*$correctChunk/$foundGuessed if ($foundGuessed > 0); |
| 193 | + $recall = 100*$correctChunk/$foundCorrect if ($foundCorrect > 0); |
| 194 | + $FB1 = 2*$precision*$recall/($precision+$recall) |
| 195 | + if ($precision+$recall > 0); |
| 196 | + |
| 197 | + # print overall performance |
| 198 | + printf "processed $tokenCounter tokens with $foundCorrect phrases; "; |
| 199 | + printf "found: $foundGuessed phrases; correct: $correctChunk.\n"; |
| 200 | + if ($tokenCounter>0) { |
| 201 | + printf "accuracy: %6.2f%%; ",100*$correctTags/$tokenCounter; |
| 202 | + print "$correctChunk $foundCorrect $foundGuessed "; |
| 203 | + printf "precision: %6.2f%%; ",$precision; |
| 204 | + printf "recall: %6.2f%%; ",$recall; |
| 205 | + printf "FB1: %6.2f\n",$FB1; |
| 206 | + } |
| 207 | +} |
| 208 | + |
| 209 | +# sort chunk type names |
| 210 | +undef($lastType); |
| 211 | +@sortedTypes = (); |
| 212 | +foreach $i (sort (keys %foundCorrect,keys %foundGuessed)) { |
| 213 | + if (not($lastType) or $lastType ne $i) { |
| 214 | + push(@sortedTypes,($i)); |
| 215 | + } |
| 216 | + $lastType = $i; |
| 217 | +} |
| 218 | +# print performance per chunk type |
| 219 | +if (not $latex) { |
| 220 | + for $i (@sortedTypes) { |
| 221 | + $correctChunk{$i} = $correctChunk{$i} ? $correctChunk{$i} : 0; |
| 222 | + if (not($foundGuessed{$i})) { $foundGuessed{$i} = 0; $precision = 0.0; } |
| 223 | + else { $precision = 100*$correctChunk{$i}/$foundGuessed{$i}; } |
| 224 | + if (not($foundCorrect{$i})) { $recall = 0.0; } |
| 225 | + else { $recall = 100*$correctChunk{$i}/$foundCorrect{$i}; } |
| 226 | + if ($precision+$recall == 0.0) { $FB1 = 0.0; } |
| 227 | + else { $FB1 = 2*$precision*$recall/($precision+$recall); } |
| 228 | + printf "%17s: ",$i; |
| 229 | + printf "% 4d % 4d % 4d ", $correctChunk{$i}, $foundCorrect{$i}, $foundGuessed{$i}; |
| 230 | + printf "precision: %6.2f%%; ",$precision; |
| 231 | + printf "recall: %6.2f%%; ",$recall; |
| 232 | + printf "FB1: %6.2f %d\n",$FB1,$foundGuessed{$i}; |
| 233 | + } |
| 234 | +} else { |
| 235 | + print " & Precision & Recall & F\$_{\\beta=1} \\\\\\hline"; |
| 236 | + for $i (@sortedTypes) { |
| 237 | + $correctChunk{$i} = $correctChunk{$i} ? $correctChunk{$i} : 0; |
| 238 | + if (not($foundGuessed{$i})) { $precision = 0.0; } |
| 239 | + else { $precision = 100*$correctChunk{$i}/$foundGuessed{$i}; } |
| 240 | + if (not($foundCorrect{$i})) { $recall = 0.0; } |
| 241 | + else { $recall = 100*$correctChunk{$i}/$foundCorrect{$i}; } |
| 242 | + if ($precision+$recall == 0.0) { $FB1 = 0.0; } |
| 243 | + else { $FB1 = 2*$precision*$recall/($precision+$recall); } |
| 244 | + printf "\n%-7s & %6.2f\\%% & %6.2f\\%% & %6.2f \\\\", |
| 245 | + $i,$precision,$recall,$FB1; |
| 246 | + } |
| 247 | + print "\\hline\n"; |
| 248 | + $precision = 0.0; |
| 249 | + $recall = 0; |
| 250 | + $FB1 = 0.0; |
| 251 | + $precision = 100*$correctChunk/$foundGuessed if ($foundGuessed > 0); |
| 252 | + $recall = 100*$correctChunk/$foundCorrect if ($foundCorrect > 0); |
| 253 | + $FB1 = 2*$precision*$recall/($precision+$recall) |
| 254 | + if ($precision+$recall > 0); |
| 255 | + printf "Overall & %6.2f\\%% & %6.2f\\%% & %6.2f \\\\\\hline\n", |
| 256 | + $precision,$recall,$FB1; |
| 257 | +} |
| 258 | + |
| 259 | +exit 0; |
| 260 | + |
| 261 | +# endOfChunk: checks if a chunk ended between the previous and current word |
| 262 | +# arguments: previous and current chunk tags, previous and current types |
| 263 | +# note: this code is capable of handling other chunk representations |
| 264 | +# than the default CoNLL-2000 ones, see EACL'99 paper of Tjong |
| 265 | +# Kim Sang and Veenstra http://xxx.lanl.gov/abs/cs.CL/9907006 |
| 266 | + |
| 267 | +sub endOfChunk { |
| 268 | + my $prevTag = shift(@_); |
| 269 | + my $tag = shift(@_); |
| 270 | + my $prevType = shift(@_); |
| 271 | + my $type = shift(@_); |
| 272 | + my $chunkEnd = $false; |
| 273 | + |
| 274 | + if ( $prevTag eq "B" and $tag eq "B" ) { $chunkEnd = $true; } |
| 275 | + if ( $prevTag eq "B" and $tag eq "O" ) { $chunkEnd = $true; } |
| 276 | + if ( $prevTag eq "I" and $tag eq "B" ) { $chunkEnd = $true; } |
| 277 | + if ( $prevTag eq "I" and $tag eq "O" ) { $chunkEnd = $true; } |
| 278 | + |
| 279 | + if ( $prevTag eq "E" and $tag eq "E" ) { $chunkEnd = $true; } |
| 280 | + if ( $prevTag eq "E" and $tag eq "I" ) { $chunkEnd = $true; } |
| 281 | + if ( $prevTag eq "E" and $tag eq "O" ) { $chunkEnd = $true; } |
| 282 | + if ( $prevTag eq "I" and $tag eq "O" ) { $chunkEnd = $true; } |
| 283 | + |
| 284 | + if ($prevTag ne "O" and $prevTag ne "." and $prevType ne $type) { |
| 285 | + $chunkEnd = $true; |
| 286 | + } |
| 287 | + |
| 288 | + # corrected 1998-12-22: these chunks are assumed to have length 1 |
| 289 | + if ( $prevTag eq "]" ) { $chunkEnd = $true; } |
| 290 | + if ( $prevTag eq "[" ) { $chunkEnd = $true; } |
| 291 | + |
| 292 | + return($chunkEnd); |
| 293 | +} |
| 294 | + |
| 295 | +# startOfChunk: checks if a chunk started between the previous and current word |
| 296 | +# arguments: previous and current chunk tags, previous and current types |
| 297 | +# note: this code is capable of handling other chunk representations |
| 298 | +# than the default CoNLL-2000 ones, see EACL'99 paper of Tjong |
| 299 | +# Kim Sang and Veenstra http://xxx.lanl.gov/abs/cs.CL/9907006 |
| 300 | + |
| 301 | +sub startOfChunk { |
| 302 | + my $prevTag = shift(@_); |
| 303 | + my $tag = shift(@_); |
| 304 | + my $prevType = shift(@_); |
| 305 | + my $type = shift(@_); |
| 306 | + my $chunkStart = $false; |
| 307 | + |
| 308 | + if ( $prevTag eq "B" and $tag eq "B" ) { $chunkStart = $true; } |
| 309 | + if ( $prevTag eq "I" and $tag eq "B" ) { $chunkStart = $true; } |
| 310 | + if ( $prevTag eq "O" and $tag eq "B" ) { $chunkStart = $true; } |
| 311 | + if ( $prevTag eq "O" and $tag eq "I" ) { $chunkStart = $true; } |
| 312 | + |
| 313 | + if ( $prevTag eq "E" and $tag eq "E" ) { $chunkStart = $true; } |
| 314 | + if ( $prevTag eq "E" and $tag eq "I" ) { $chunkStart = $true; } |
| 315 | + if ( $prevTag eq "O" and $tag eq "E" ) { $chunkStart = $true; } |
| 316 | + if ( $prevTag eq "O" and $tag eq "I" ) { $chunkStart = $true; } |
| 317 | + |
| 318 | + if ($tag ne "O" and $tag ne "." and $prevType ne $type) { |
| 319 | + $chunkStart = $true; |
| 320 | + } |
| 321 | + |
| 322 | + # corrected 1998-12-22: these chunks are assumed to have length 1 |
| 323 | + if ( $tag eq "[" ) { $chunkStart = $true; } |
| 324 | + if ( $tag eq "]" ) { $chunkStart = $true; } |
| 325 | + |
| 326 | + return($chunkStart); |
| 327 | +} |
0 commit comments