-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcombinedcount.IITB.pl
100 lines (83 loc) · 2.32 KB
/
combinedcount.IITB.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/perl
use warnings;
use strict;
if(@ARGV != 4){
print "\nUsage: combinedcount.IITB.pl transcript_file old_post_stats_file old_wordcount_stats_file directory_file\n\n";
exit;
}
open(IN,$ARGV[0]);
my %post;
my %word;
my $name="";
my $post="";
my $last="";
while(<IN>){
next if ((/:\d\d [AP]M - / && !/:.*:/) || !/\S/ );
chomp;
my @text_words;
if(!/:\d\d [AP]M - /){
@text_words = split(/\s+/);
}else{
($name,$post)=(/:\d\d [AP]M - ([^:]+):\s*(.*)$/);
next if $name=~/(joined using this group)|added|left|changed|created|You|group|(disappearing messages)/;
if($name ne $last){$post{$name}++;}
@text_words = split(/\s+/, $post);
$last=$name;
}
$word{$name}+=scalar(@text_words);
}
my (%oldp,%oldpr);
my (%oldw,%oldwr);
my %dir;
open(DIR,$ARGV[3]);
while(<DIR>){
my($key,$val)=(/^(.*)\t(.*)\s*$/);
$dir{$key}=$val;
}
close DIR;
open(OLD1,$ARGV[1]);
my $i=0;
while(<OLD1>){
$i++;
($name,$post)=(/^\s*([^|]+)\|\s*(\d+)\s*\|\s*\S+\s*\|\s*\S+\s*$/);
$name=~s/\s+$//;
my $key=$name;
if(exists $dir{$name}){$key=$dir{$name};}
$oldp{$key}=$post;
$oldpr{$key}=$i;
}
close OLD1;
open(OLD2,$ARGV[2]);
$i=0;
while(<OLD2>){
$i++;
($name,$post)=(/^\s*([^|]+)\|\s*(\d+)\s*\|\s*\S+\s*\|\s*\S+\s*$/);
$name=~s/\s+$//;
my $key=$name;
if(exists $dir{$name}){$key=$dir{$name};}
$oldw{$key}=$post;
$oldwr{$key}=$i;
}
close OLD2;
open(OUT1,">combinedpost.sort.tsv");
open(OUT2,">words.sort.tsv");
$i=0;
foreach my $key ( sort { $post{$b} <=> $post{$a} } ( keys(%post))) {
$i++;
$name=$key;
if(exists $dir{$key}){$name=$dir{$key};}
printf OUT1 ("%s | %4d | ",$name, $post{$key});
print OUT1 (exists($oldpr{$name})?(($oldpr{$name}>$i)?("+".($oldpr{$name}-$i)):(($oldpr{$name}==$i)?"=":($oldpr{$name}-$i))):"NA")." | ".(exists($oldp{$name})?(($post{$key}>$oldp{$name})?("+".($post{$key}-$oldp{$name})):($post{$key}-$oldp{$name})):"NA")."\n";
}
$i=0;
foreach my $key ( sort { $word{$b} <=> $word{$a} } ( keys(%word))) {
$i++;
$name=$key;
if(exists $dir{$key}){$name=$dir{$key};}
printf OUT2 ("%s | %4d | ",$name, $word{$key});
print OUT2 (exists($oldwr{$name})?(($oldwr{$name}>$i)?("+".($oldwr{$name}-$i)):(($oldwr{$name}==$i)?"=":($oldwr{$name}-$i))):"NA")." | ".(exists($oldw{$name})?(($word{$key}>$oldw{$name})?("+".($word{$key}-$oldw{$name})):($word{$key}-$oldw{$name})):"NA")."\n";
}
close IN;
close OUT1;
close OUT2;
exit;