-
Notifications
You must be signed in to change notification settings - Fork 4
/
prior2.pl
executable file
·82 lines (70 loc) · 2 KB
/
prior2.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/perl
#
# Prior2: Decompose query likelihood into lexical and repository information
# Estimate p(D | S)
#
$exp = $ARGV[1];
# 1,arrayexpress,18,108
my %repos;
open(F, "data/biocaddie-doc-repo.out");
while (<F>) {
chomp();
my ($docno, $repo, $tlen, $textlen) = split ",", $_;
$repos{$docno} = $repo;
}
my %qrels;
open(F, "qrels/biocaddie.qrels.csv");
while (<F>) {
#EA1,0,1330,0
chomp();
my ($query, $ord, $docno, $rel) = split ",", $_;
$qrels{$query}{$docno} = $rel;
}
# EA1 Q0 577787 1 30.148 indri
my %results;
open(R, $ARGV[0]);
while(<R>) {
chomp();
my ($query, $ignore, $docno, $rank, $score, $run) = split " ",$_;
$results{$query}{$docno} = $score;
}
for $query (sort keys %results) {
my %tmp;
$i = 0;
for $docno (sort {$results{$query}{$b} <=> $results{$query}{$a}} keys %{$results{$query}}) {
if ($i == 1000) { last;}
$score = $results{$query}{$docno};
if ($tmp{$repo} eq "") {$tmp{$repo} = 0;}
$tmp{$repo}++;
$repo = $repos{$docno};
$i++;
}
#for $repo (keys %tmp) {
# $tmp{$repo} /= 1000;
# #print "$query $repo =". $tmp{$repo} . "\n";
#}
my %rescore;
for $docno (keys %{$results{$query}}) {
$repo = $repos{$docno};
$score = $results{$query}{$docno};
if ($tmp{$repo} == "") { $tmp{$repo} = 0 ; }
$p = ($tmp{$repo} + 1) / (1000 + scalar(keys %tmp));
if ($ARGV[1]) {
$rescore{$docno} = $score + exp($p);
} else {
$rescore{$docno} = $score * $p;
}
}
$rank = 1;
for $docno (sort {$rescore{$b} <=> $rescore{$a}} keys %rescore) {
#for $docno (sort {$results{$query}{$b} <=> $results{$query}{$a}} keys %{$results{$query}}) {
# $score = $results{$query}{$docno};
$rel = $qrels{$query}{$docno};
$repo = $repos{$docno};
$newscore = $rescore{$docno};
$p = $tmp{$repo};
print "$query Q0 $docno $rank $newscore exp4-$repo-$rel-$p\n";
# print "$query Q0 $docno $rank $score indri $repo $p $newscore\n";
$rank++;
}
}