#!/usr/bin/perl use strict; my $prefFile = ""; my $resultsFile = ""; my $trec = 0; if (@ARGV < 2) { die "Usage: perl prefeval.pl -p -r [-trec]\n\t-trec: results in 6-column TREC format (default: off, 3-column format)\n"; } while (@ARGV) { my $arg = shift @ARGV; if ($arg eq "-p") { $prefFile = shift @ARGV; } if ($arg eq "-r") { $resultsFile = shift @ARGV; } if ($arg eq "-trec") { $trec = 1; } } my %prefs; readprefs($prefFile, \%prefs); my %results; readresults($resultsFile, \%results, $trec); my (@ppref, @rpref, @APpref); print "query\tppref10\tppref25\tppref50\tpprefMax\trpref10\trpref25\trpref50\trprefMax\tAPpref\n"; # evaluate results my @queries = sort {$a <=> $b} keys %results; for (my $q=0; $q<@queries; $q++) { my $query = $queries[$q]; next if (!defined $prefs{$query}); my %qres = %{$results{$query}}; my %qpref = %{$prefs{$query}}; my $qlen = scalar(keys %qres)-1; # get ppref and rpref at every rank @{$ppref[$q]} = ppref(\%qres, \%qpref); @{$rpref[$q]} = rpref(\%qres, \%qpref); # APpref $APpref[$q] = APpref(\%qres, \%qpref, \@{$ppref[$q]}, \@{$rpref[$q]}); format = @>>>>> @#.#### @#.#### @#.#### @#.#### @#.#### @#.#### @#.#### @#.#### @#.#### $query, $ppref[$q][9], $ppref[$q][24], $ppref[$q][49], $ppref[$q][$qlen], $rpref[$q][9], $rpref[$q][24], $rpref[$q][49], $rpref[$q][$qlen], $APpref[$q] . write; } sub readprefs { my ($file, $prefhash) = @_; my %bad; my (%n, %np); open F, "$file" or die "could not read preferences file $file\n"; while () { chop; my ($q, $d1, $d2, $j) = split(/\s+/); $q =~ s/TD//; if ($j == 2) { $bad{$q}{$d2} = 1; } elsif ($j == -2) { $bad{$q}{$d1} = 1; } else { $$prefhash{$q}{$d1}{$d2} = $j; $$prefhash{$q}{$d2}{$d1} = -$j; $n{$d1}++; $n{$d2}++; $np{$d1}++ if ($j < 0); $np{$d2}++ if ($j > 0); # transitivities foreach my $d (keys %{$$prefhash{$q}{$d1}}) { next if (defined $$prefhash{$q}{$d}{$d2} || $d eq $d2); if ($$prefhash{$q}{$d1}{$d} > 0 && $j <= 0) { $$prefhash{$q}{$d2}{$d} = 1; $$prefhash{$q}{$d}{$d2} = -1; $n{$d}++; $n{$d2}++; $np{$d}++; } if ($$prefhash{$q}{$d}{$d1} > 0 && $j >= 0) { $$prefhash{$q}{$d2}{$d} = -1; $$prefhash{$q}{$d}{$d2} = 1; $n{$d}++; $n{$d2}++; $np{$d2}++; } if (defined $$prefhash{$q}{$d}{$d1} && $$prefhash{$q}{$d}{$d1} == 0) { $$prefhash{$q}{$d}{$d2} = $j; $$prefhash{$q}{$d2}{$d} = -$j; $n{$d}++; $n{$d2}++; $np{$d}++ if ($j == -1); $np{$d2}++ if ($j == 1); } } foreach my $d (keys %{$$prefhash{$q}{$d2}}) { next if (defined $$prefhash{$q}{$d}{$d1} || $d eq $d1); if ($$prefhash{$q}{$d2}{$d} > 0 && $j >= 0) { $$prefhash{$q}{$d1}{$d} = 1; $$prefhash{$q}{$d}{$d1} = -1; $n{$d}++; $n{$d1}++; $np{$d}++; } if ($$prefhash{$q}{$d}{$d2} > 0 && $j <= 0) { $$prefhash{$q}{$d1}{$d} = -1; $$prefhash{$q}{$d}{$d1} = 1; $n{$d}++; $n{$d1}++; $np{$d1}++; } if (defined $$prefhash{$q}{$d}{$d2} && $$prefhash{$q}{$d}{$d2} == 0) { $$prefhash{$q}{$d}{$d1} = -$j; $$prefhash{$q}{$d1}{$d} = $j; $n{$d}++; $n{$d1}++; $np{$d}++ if ($j == 1); $np{$d1}++ if ($j == -1); } } } } close F; # put bads in with preferences foreach my $q (keys %bad) { foreach my $d (keys %{$bad{$q}}) { foreach my $d2 (keys %{$$prefhash{$q}}) { if (!$bad{$q}{$d2}) { $$prefhash{$q}{$d2}{$d} = -1; $$prefhash{$q}{$d}{$d2} = 1; } else { $$prefhash{$q}{$d2}{$d} = 0; $$prefhash{$q}{$d}{$d2} = 0; } } } } } sub readresults { my ($file, $hash, $trec) = @_; # ranked results stored as a hash mapping query -> doc -> rank my %rank; open F, $file or die "could not open results file $file\n"; while () { chop; my @i = split(/\s+/); my ($q, $d); if ($trec) { # trec 6-column format ($q, $d) = ($i[0], $i[2]); } else { ($q, $d) = ($i[0], $i[1]); } $rank{$q}++; $$hash{$q}{$d} = $rank{$q}; } close F; } # is d1 preferred to d2? sub preferred { my ($d1, $d2, $pref) = @_; if (defined $$pref{$d1}) { if (defined $$pref{$d1}{$d2}) { return 1 if ($$pref{$d1}{$d2} == -1); return 0; } } else { return 0; } } sub prefexists { my ($d1, $d2, $pref) = @_; if (defined $$pref{$d1}) { return 1 if (defined $$pref{$d1}{$d2}); return 0; } else { return 0; } } sub ppref { my ($res, $pref) = @_; my (@correct, @total, @ppref); # sort docids by rank my @docs = sort {$$res{$a} <=> $$res{$b}} keys %$res; # iterate over ranks for (my $k=0; $k<@docs; $k++) { # ppref@k starts as #correct@k-1 / total@k-1 if ($k > 0) { $correct[$k] = $correct[$k-1]; $total[$k] = $total[$k-1]; } my $d1 = $docs[$k]; # foreach doc involved in a pref with d1 foreach my $d2 (keys %{$$pref{$d1}}) { next unless ($$res{$d1} < $$res{$d2}); next if ($$pref{$d1}{$d2} == 0); $total[$k]++; $correct[$k]++ if ($$pref{$d1}{$d2} == -1); } if ($total[$k] == 0) { $ppref[$k] = 0; } else { $ppref[$k] = $correct[$k]/$total[$k]; } } # pad out to 50 for (my $i=@ppref; $i<50; $i++) { $ppref[$i] = $ppref[$i-1]; } return @ppref; } sub rpref { my ($res, $pref) = @_; my (@correct, @rpref); my $total = 0; # sort docids by rank my @docs = sort {$$res{$a} <=> $$res{$b}} keys %$res; # iterate over ranks for (my $k=0; $k<@docs; $k++) { if ($k > 0) { $correct[$k] = $correct[$k-1]; } my $d1 = $docs[$k]; # foreach doc involved in a pref with d1 foreach my $d2 (keys %{$$pref{$d1}}) { next unless ($$res{$d1} < $$res{$d2}); next if ($$pref{$d1}{$d2} == 0); $correct[$k]++ if ($$pref{$d1}{$d2} == -1); } } # count all prefs foreach my $d1 (keys %$pref) { foreach my $d2 (keys %{$$pref{$d1}}) { next if ($$pref{$d1}{$d2} == 1); next if ($$pref{$d1}{$d2} == 0); $total++; } } for (my $k=0; $k<@docs; $k++) { $rpref[$k] = $correct[$k]/$total; } # pad out to 50 for (my $i=@rpref; $i<50; $i++) { $rpref[$i] = $rpref[$i-1]; } return @rpref; } sub APpref { my ($res, $pref, $ppref, $rpref) = @_; my $APpref = 0; my $total = 0; # APpref = sum of pprefs at ranks at which rpref increases $APpref = $$ppref[0] if ($$rpref[0] > 0); for (my $i=1; $i<@$rpref; $i++) { if ($$rpref[$i] > $$rpref[$i-1]) { $APpref += $$ppref[$i] if ($$rpref[$i] > $$rpref[$i-1]); $total++; } } return $APpref/($total==0?1:$total); } sub round { my $x = shift; $x += 0.00005; $x =~ s/(\d\.\d\d\d\d).+/$1/; return $x; }