-
Notifications
You must be signed in to change notification settings - Fork 2
/
pgsi.pl
executable file
·1939 lines (1556 loc) · 54.6 KB
/
pgsi.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env perl
# -*-mode:cperl; indent-tabs-mode: nil-*-
## Parse Postgres logs and determine the system impact
##
## Usage: pgsi.pl [options] < pglog_slice.log
##
## See the POD inside this file for full documentation:
## perldoc pgsi.pl
##
## Mark Johnson <[email protected]>
package PGSI;
use strict;
use warnings;
use Data::Dumper qw( Dumper );
use Time::Local qw();
use Getopt::Long;
use IO::Handle;
use 5.008003;
our $VERSION = '1.7.2';
*STDOUT->autoflush(1);
*STDERR->autoflush(1);
my $resolve_called = 0;
my (
%query,
%canonical_q,
$first_line,
$last_line,
%seen,
);
my %opt = (
'top-10' => '',
'all' => '',
'query-types' => '',
'pg-version' => '',
'offenders' => 0,
'verbose' => 0,
'format' => 'html',
'mode' => 'pid',
'color' => 1,
'quiet' => 0,
'interval' => undef,
);
my $USAGE = qq{Usage: $0 -f filename [options]\n};
GetOptions ( ## no critic
\%opt,
(
'top-10=s',
'all=s',
'query-types=s',
'pg-version=s',
'offenders=i',
'version',
'help',
'verbose+',
'file|f=s@',
'format=s',
'mode=s',
'color!',
'quiet',
'interval=s',
)
) or die $USAGE;
if ($opt{version}) {
print "$0 version $VERSION\n";
exit 0;
}
if ($opt{help}) {
print $USAGE;
print "Full documentation at: http://bucardo.org/wiki/pgsi\n";
exit 0;
}
## Prepare formatting vars based on opt{format}
## The default is 'html':
my $fmstartbold = q{<b>};
my $fmendbold = q{</b>};
my $fmstartheader1 = q{<h1>};
my $fmendheader1 = q{</h1>};
my $fmstartheader2 = q{<h2>};
my $fmendheader2 = q{</h2>};
my $fmstartheader3 = q{<h3>};
my $fmendheader3 = q{</h3>};
my $fmsep = q{<hr />};
my $fmstartquery = '<pre>';
my $fmendquery = '</pre>';
if ($opt{format} eq 'mediawiki') {
$fmstartbold = q{'''};
$fmendbold = q{'''};
$fmstartheader1 = q{==};
$fmendheader1 = q{==};
$fmstartheader2 = q{====};
$fmendheader2 = q{====};
$fmstartheader3 = q{=====};
$fmendheader3 = q{=====};
$fmsep = q{----};
$fmstartquery = ' ';
$fmendquery = '';
}
if ($opt{format} eq 'tsv') {
$fmstartbold = q{};
$fmendbold = q{};
$fmstartheader1 = q{};
$fmendheader1 = q{};
$fmstartheader2 = q{};
$fmendheader2 = q{};
$fmstartheader3 = q{};
$fmendheader3 = q{};
$fmsep = q{};
$fmstartquery = '';
$fmendquery = '';
}
my $minwrap1 = 80;
my $minwrap2 = 40; ## WHEN .. THEN
## Any keywords after the first
my $indent1 = ' ' x 1;
## Wrapping of long lines of a single type
my $indent2 = ' ' x 3;
## Secondary wrapping of a single type
my $indent3 = ' ' x 5;
## Even more indenting
my $indent4 = ' ' x 7;
## Special strings for internal comments
my $STARTCOMMENT = "startpgsicomment";
my $ENDCOMMENT = "endpgsicomment";
## We either read from a file or from stdin
my (@fh, $fh);
if ($opt{file}) {
my $x = 0;
my %dupe;
for my $file (@{$opt{file}}) {
if ($dupe{$file}++) {
die "File specified more than once: $file\n";
}
open $fh[$x], '<', $file or die qq{Could not open "$file": $!\n};
$x++;
}
}
else {
push @fh => \*STDIN;
}
for (@fh) {
$fh = $_;
## Do the actual parsing. Depends on what kind of log file we have
if ('pid' eq $opt{mode}) {
parse_pid_log();
}
elsif ('syslog' eq $opt{mode}) {
parse_syslog_log();
}
elsif ('csv' eq $opt{mode}) {
parse_csv_log();
}
elsif ('bare' eq $opt{mode}) {
parse_bare_log();
}
else {
die qq{Unknown mode: $opt{mode}\n};
}
}
sub parse_csv_log {
## Each line of interest, with PIDs as the keys
my %logline;
## The last PID we saw. Used to populate multi-line statements correctly.
my $lastpid = 0;
require Text::CSV_XS;
my $csv = Text::CSV_XS->new({ binary => 1 }) or die;
$csv->column_names(qw(log_time user_name database_name process_id connection_from session_id session_line_num command_tag session_start_time virtual_transaction_id transaction_id error_severity sql_state_code message detail hint internal_query internal_query_pos context query query_pos location application_name));
while (my $line = $csv->getline_hr($fh)) {
if ($opt{verbose} >= 2) {
warn "Checking line (" . Dumper($line) . ")\n";
}
my $date = $line->{log_time};
my $pid = $line->{process_id};
my $more = $line->{message};
## All we care about is statements and durations
## Got a duration? Store it for this PID and move on
if ($more =~ /duration: (\d+\.\d+) ms$/o) {
my $duration = $1;
## Store this duration, overwriting what is (presumably) -1
$logline{$pid}{duration} = $duration;
next;
}
## Got a statement with optional duration
## Handle the old statement and store the new
if ($more =~ /(?:duration: (\d+\.\d+) ms )?statement:\s+(.+)/o) {
my ($duration,$statement) = ($1,$2);
## Make sure any subsequent multi-line statements know where to go
$lastpid = $pid;
## If this PID has something stored, process it first
if (exists $logline{$pid}) {
resolve_pid_statement($logline{$pid});
}
## Store and blow away any old value
$logline{$pid} = {
line => $line,
statement => $statement,
duration => -1,
date => $date,
};
if (defined $duration) {
$logline{$pid}{duration} = $duration;
}
## Make sure we have a first and a last line
if (not defined $first_line) {
$first_line = $last_line = $line;
}
} ## end duration + statement
} ## end each line
defined $first_line or die qq{Could not find any matching lines: incorrect format??\n};
## Process any PIDS that are left
for my $pid (keys %logline) {
resolve_pid_statement($logline{$pid});
}
## Store the last PID seen as the last line
$last_line = $logline{$lastpid}{line};
return;
} ## end of parse_csv_log
sub parse_pid_log {
## Parse a log file in which the pid appears in the log_line_prefix
## and multi-line statements start with tabs
## Each line of interest, with PIDs as the keys
my %logline;
## The last PID we saw. Used to populate multi-line statements correctly.
my $lastpid = 0;
## We only store multi-line if the previous real line was a log: statement
my $lastwaslog=0;
while (my $line = <$fh>) {
if ($opt{verbose} >= 2) {
chomp $line;
warn "Checking line ($line)\n";
}
## There are only two possiblities we care about:
## 1. tab-prefixed line (continuation of a literal)
## 2. new date-and-pid-prefixed line
if ($line =~ /^\t(.*)/) {
## If the last real line was a statement, append this to last PID seen
if ($lastwaslog) {
(my $extra = $1) =~ s/^\s+//;
## If a comment, treat carefully
$extra =~ s/^(\s*\-\-.+)/$STARTCOMMENT $1 $ENDCOMMENT /;
$logline{$lastpid}{statement} .= " $extra";
}
next;
}
## Got a valid PID line?
if ($line =~ /^(\d\d\d\d\-\d\d\-\d\d \d\d:\d\d:\d\d)\D+(\d+)\s*(.+)/o) {
my ($date,$pid,$more) = ($1,$2,$3);
## Example:
## 2009-12-03 08:12:05 PST 11717 4b17e355.2dc5 127.0.0.1 dbuser dbname LOG: statement: SELECT ...
## Reset the last log indicator
$lastwaslog = 0;
## All we care about is statements and durations
next if ($more =~ /LOG: (?:duration: (\d+\.\d+) ms )?(?:bind|parse) [^:]+:.*/o);
## Got a duration? Store it for this PID and move on
if ($more =~ /LOG: duration: (\d+\.\d+) ms$/o) {
my $duration = $1;
## Store this duration, overwriting what is (presumably) -1
$logline{$pid}{duration} = $duration;
next;
}
## Got a statement with optional duration
## Handle the old statement and store the new
if ($more =~ /
LOG:\s\s
(?:duration:\s(\d+\.\d+)\sms\s\s)?
((?:(?:statement)|(?:execute\s[^:]+)))
:(.*)$
/x) {
my ($duration,$isexecute,$statement) = ($1,$2,$3);
## Slurp in any multi-line continuatiouns after this
$lastwaslog = 1;
## Make sure any subsequent multi-line statements know where to go
$lastpid = $pid;
## If this PID has something stored, process it first
if (exists $logline{$pid}) {
resolve_pid_statement($logline{$pid});
}
## Store and blow away any old value
$logline{$pid} = {
line => $line,
statement => $statement,
duration => -1,
date => $date,
};
if (defined $duration) {
$logline{$pid}{duration} = $duration;
}
## Make sure we have a first and a last line
if (not defined $first_line) {
$first_line = $last_line = $line;
}
} ## end LOG: statement
next;
} ## end if valid PID line
if ($opt{verbose}) {
chomp $line;
warn "Invalid line $.: $line\n";
}
} ## End while
defined $first_line or die qq{Could not find any matching lines: incorrect format??\n};
## Process any PIDS that are left
for my $pid (keys %logline) {
resolve_pid_statement($logline{$pid});
}
## Store the last PID seen as the last line
$last_line = $logline{$lastpid}{line};
return;
} ## end of parse_pid_log
sub parse_bare_log {
## Parse a log file in which multi-line statements start with tabs
## Each line of interest
my @logline;
my $more;
## We only store multi-line if the previous real line was a log: statement
my $lastwaslog=0;
while (my $line = <$fh>) {
if ($opt{verbose} >= 2) {
chomp $line;
warn "Checking line ($line)\n";
}
## There are only two possiblities we care about:
## 1. tab-prefixed line (continuation of a literal)
## 2. new line
if ($line =~ /^\t(.*)/) {
## If the last real line was a statement, append this to last PID seen
if ($lastwaslog) {
(my $extra = $1) =~ s/^\s+//;
## If a comment, treat carefully
$extra =~ s/^(\s*\-\-.+)/$STARTCOMMENT $1 $ENDCOMMENT /;
$logline[$#logline]->{statement} .= " $extra";
}
next;
}
## Lines will contain "parse <.*>", "bind <.*>", "execute <.*>" or "statement", with a duration
if ($line =~ /^LOG: /) {
$more = $line;
## Example:
## 2009-12-03 08:12:05 PST 11717 4b17e355.2dc5 127.0.0.1 dbuser dbname LOG: statement: SELECT ...
## Reset the last log indicator
$lastwaslog = 0;
## All we care about is statements/executes and durations
next if ($more =~ /LOG: (?:duration: (\d+\.\d+) ms )?(?:bind|parse) [^:]+:.*/o);
## Got a duration? Store it for this PID and move on
if ($more =~ /LOG: duration: (\d+\.\d+) ms$/o) {
my $duration = $1;
## Store this duration, overwriting what is (presumably) -1
$logline[$#logline]->{duration} = $duration;
next;
}
## Got a statement with optional duration
## Handle the old statement and store the new
if ($more =~ /
LOG:\s\s
(?:duration:\s(\d+\.\d+)\sms\s\s)?
((?:(?:statement)|(?:execute\s[^:]+)))
:(.*)$
/x) {
my ($duration,$statement, $other) = ($1, $3, $2);
## Slurp in any multi-line continuatiouns after this
$lastwaslog = 1;
## resolve whatever's already in this line
resolve_pid_statement($logline[$#logline]) if $#logline > -1;
shift @logline;
## Store and blow away any old value
push @logline, {
statement => $statement,
duration => $duration,
};
if (defined $duration) {
$logline[$#logline]->{duration} = $duration;
}
## Make sure we have a first and a last line
if (not defined $first_line) {
$first_line = $last_line = $line;
}
} ## end LOG: statement
next;
} ## end if valid def line
if ($opt{verbose}) {
chomp $line;
warn "Invalid line $.: $line\n";
}
} ## End while
defined $first_line or die qq{Could not find any matching lines: incorrect format??\n};
## Process any PIDS that are left
for my $line (@logline) {
resolve_pid_statement($line);
}
## Store the last PID seen as the last line
$last_line = $logline[$#logline]->{statement};
return;
} ## end of parse_def_log
## Globals used by syslog: move or refactor
my ($extract_query_re, $extract_duration_re);
sub parse_syslog_log {
# Todo: Move this out
# Build an or-list for regex extraction of
# the query types of interest.
my $query_types = $opt{'query-types'}
? join (
'|',
grep { /\w/ && !$seen{$_}++ }
split (
/[,\s]+/,
"\L$opt{'query-types'}"
)
)
: 'select'
;
# Partition the top-10 or all file-patterns
# into their path and file-name components
for my $k ( qw/top-10 all/ ) {
local ($_) = delete $opt{$k};
my @v =
map { defined ($_) ? $_ : '' }
m{(.*/)?(.*)};
my @k =
map {"$k-$_"} qw/path file/;
@opt{@k} = @v;
}
# Base regex to capture the main pieces of
# each log line entry
my $statement_re =
qr{
^(.+?
postgres\[
(\d+)
\]
):
\s+
\[
\d+
-
(\d+)
\]
\s
(.*)
}xms;
# Create the appropriate regex to pull out the actual query depending on the
# format implied by --pg-version. Blesses regex into the appropriate namespace
# to simplify access to routines that deviate between log formats.
$extract_query_re =
make_extractor(
$opt{'pg-version'},
$query_types
);
# Regex specifically for the concluding duration
# entry after a query finishes.
$extract_duration_re =
qr{
log:
\s+
duration:
\s
(
\d+
[.]
\d{3}
)
}ixms;
while (my $line = <$fh>) {
if ($opt{verbose} >= 2) {
chomp $line;
warn "Checking line ($line)\n";
}
# Lines that don't match the basic format are ignored.
if ($line !~ $statement_re) {
chomp $line;
$opt{verbose} and warn "Line $. did not match: $line\n";
next;
}
my ($st_id, $pid, $st_seq, $st_frag) = ($1, $2, $3, $4);
$first_line = $last_line = $line
unless defined $first_line;
# Allows for blocks of log to be unordered. Assumes earliest found timestamp
# is start time, and latest end time, regardless of the order in which
# they're encountered.
if (defined $line) {
$first_line = $line
if get_timelocal_from_line($line) < get_timelocal_from_line($first_line);
$last_line = $line
if get_timelocal_from_line($line) > get_timelocal_from_line($first_line);
}
my $arr;
# Starting a new statement. Close off possible previous one and begin new one.
resolve_syslog_stmt($pid)
if $st_seq == 1;
# Skip any entries that start the log
# after their first entry.
next unless $arr = $query{$pid}{fragments};
# "statement_id" is the earliest timestamp/pid
# entry found for the statement in question.
# It should suffice for a human to identify
# the query within the logfiles.
$query{$pid}{statement_id} = $st_id
if $st_seq == 1;
push (
@$arr,
$st_frag
);
} # end while
defined $first_line or die qq{Could not find any matching lines: incorrect format?\n};
# Go through all entries that haven't been resolved
# (indicated by the presence of elements in the
# fragments array) and add them to the canonical list.
while (my ($pid, $hsh) = each %query) {
next unless exists $hsh->{fragments} and @{ $hsh->{fragments} };
resolve_syslog_stmt($pid);
}
return;
} ## end of parse_syslog_log
# Determine the server and start/end times
# from the oldest and newest log lines.
my ($host, $start_time, $end_time) =
log_meta($first_line, $last_line);
# Calculate the ms interval of all log activity.
my $log_int_ms = log_interval($start_time, $end_time);
for my $hsh (values %canonical_q) {
# Mean runtime.
my $mean = $hsh->{duration} /= $hsh->{count};
# Average (mean) time between successive calls.
$hsh->{interval} = $opt{interval} || $log_int_ms / $hsh->{count};
# SI, expressed as a percent. 100 implies the query
# was, on average, constantly running on a single
# instance. If interval is invalid (after all,
# syslog precision is only 1 second), set to -1
$hsh->{sys_impact} =
$hsh->{interval} != 0
? 100 * $hsh->{duration} / $hsh->{interval}
: -1
;
## No sense in showing negative numbers unless exactly -1
if ($hsh->{sys_impact} < 0 and $hsh->{sys_impact} != -1) {
$hsh->{sys_impact} = 0;
}
# Determine standard deviation and median. If count <= 1,
# set to -1 to indicate not applicable.
if ($hsh->{count} > 1) {
my $sum = 0;
for my $duration ( @{$hsh->{durations}} ) {
$sum += ($duration - $mean)**2;
}
$hsh->{deviation} = sqrt($sum / ($hsh->{count} - 1));
my @sorted = sort { $a <=> $b } @{$hsh->{durations}};
if (($#sorted + 1) % 2 != 0) {
$hsh->{median} = $sorted[int($#sorted / 2)];
}
else {
$hsh->{median} = ($sorted[int($#sorted / 2)] + $sorted[int($#sorted / 2) + 1]) / 2;
}
}
else {
$hsh->{deviation} = -1;
$hsh->{median} = -1;
}
}
## Format each query and return a hash based on query_type
my $out = process_all_queries();
## If using HTML format, print a simple head and table of contents
if ($opt{format} eq 'html') {
print qq{<!DOCTYPE html
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US" lang="en-US">
<head>
<title>Postgres System Impact Report</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
\n};
if ($opt{color}) {
print qq!<style type="text/css">
span.prealias { color: red; font-weight: bolder; }
span.pretable { color: purple; font-weight: bolder; }
span.prekeyword { color: blue; font-weight: bolder; }
span.presemi { color: white; font-weight: normal; background-color: purple; }
span.prequestion { color: red; font-weight: normal }
</style>
!;
}
print qq{</head>\n<body>\n};
if ($opt{file}) {
if (! defined $opt{file}[1]) {
print qq{<p>Log file: $opt{file}[0]</p>\n};
}
else {
print qq{<p>Log files:<ul>\n};
for my $file (@{$opt{file}}) {
print qq{<li>$file</li>\n};
}
print qq{</ul></p>\n};
}
}
print "<ul>\n";
for my $qtype (
map { $_->[0] }
sort { $a->[1] <=> $b->[1] or $b->[2] <=> $a->[2] }
map { [$_, ($_ eq 'COPY' ? 1 : 0), scalar @{$out->{$_}} ] }
keys %$out) {
my $count = @{$out->{$qtype}};
my $safename = "${host}_$qtype";
print qq{<li><a href="#$safename">$qtype</a> ($count)</li>\n};
}
print "</ul>\n";
}
elsif ($opt{format} eq 'tsv') {
# print out the field headers
print join "\t" => qw( qtype query count duration interval deviation sysimpact minimum_threshold
maximum_threshold durations );
print "\n";
no warnings 'uninitialized';
while (my ($q,$v) = each %canonical_q) {
print join "\t" => $v->{qtype}, $q,
@{$v}{qw/count duration interval deviation sysimpact minimum_threshold maximum_threshold/},
(join q{,} => @{$v->{durations}}),
;
print "\n";
}
exit;
}
for my $qtype (
map { $_->[0] }
sort { $a->[1] <=> $b->[1] or $b->[2] <=> $a->[2] }
map { [$_, ($_ eq 'COPY' ? 1 : 0), scalar @{$out->{$_}} ] }
keys %$out) {
my $arr = $out->{$qtype};
my $type_top_ten = $opt{'top-10-file'}
? "$opt{'top-10-path'}$host-$qtype-$opt{'top-10-file'}"
: '/dev/null'
;
my ($all_fh, $type_all);
if ($opt{'all-file'}) {
$type_all = "$opt{'all-path'}$host-$qtype-$opt{'all-file'}";
open ($all_fh, '>', $type_all)
or die "Can't open '$type_all' to write: $!";
}
else {
$all_fh = \*STDOUT;
}
open (my $top_ten_fh, '>', $type_top_ten)
or die "Can't open '$type_top_ten' to write: $!";
# Start off reports with appropriate Wiki naming
# convention. Can automate the posting of reports
# with code that strips first line and uses it
# for Wiki page.
my $a1 = '';
my $a2 = '';
if ($opt{format} eq 'html') {
my $safename = "${host}_$qtype";
$a1 = qq{<a name="$safename">};
$a2 = qq{</a>};
my $phost = length $host ? qq{ : $host :} : ':';
print $all_fh qq{${fmstartheader2}${a1}Query System Impact $phost $qtype${a2}${fmendheader2}\n};
}
else {
print $all_fh qq{Query_System_Impact:$host:$qtype\n};
}
# Top 10 lists are put into templates, assuming
# they will be pulled in to a collection with
# other top 10s, typically for the same host.
print $top_ten_fh <<"EOP";
Template:${host}_SI_Top_10:$qtype
EOP
for my $fh ($all_fh, $top_ten_fh) {
print $fh "${fmstartheader3}Log activity from $start_time to $end_time${fmendheader3}\n";
}
print $all_fh join ("$fmsep\n", @$arr);
print $top_ten_fh join ("$fmsep", grep { defined $_ } @$arr[0..9]);
close ($top_ten_fh) or warn "Error closing '$type_top_ten': $!";
if ($type_all) {
close ($all_fh) or warn "Error closing '$type_all': $!";
}
}
if ($opt{format} eq 'html') {
print "</body></html>\n";
}
if (! $opt{quiet}) {
warn "Items processed: $resolve_called\n";
}
exit;
sub resolve_syslog_stmt {
my $pid = shift;
$query{$pid} ||= {};
my $prev = $query{$pid}{fragments};
# First time to see this pid. Initialize
# fragments array, and no previous
# statement to resolve.
unless (ref ($prev) eq 'ARRAY') {
$query{$pid}{fragments} = [];
return;
}
# Now have collected a full query and need to
# canonize and store.
my $full_statement = lc (join (' ', @$prev));
# Handle SQL comments, carefully
$full_statement =~ s{/[*].*?[*]/}{}msg;
# Tidy up spaces
$full_statement =~ s/#011/ /g;
$full_statement =~ s/^\s+|\s+$//g;
$full_statement =~ s/\s+/ /g;
# Special transform for crowded queries
$full_statement =~ s/=\s*(\d+)(and|or) /= $1 $2 /gio;
# If closing a query, store until we get
# subsequent duration statement
if (my @match_args = $full_statement =~ $extract_query_re) {
my ($main_query, $query_type) =
$extract_query_re->query_info(@match_args);
# Clean out arguments
# Quoted string
$main_query =~ s/'(?:''|\\+'?|[^'\\]+)*'/?/g;
# Numeric no quote, and bind params
$main_query =~ s/(?<=[^\w.])[\$-]?(?:\d*[.])?\d+\b/?/g;
# Collapsing IN () lists, so queries deviating
# only by 'IN (?,?)' and 'IN (?,?,?)' are logged
# as "the same"
$main_query =~
s{
# Starts IN ...
\s in \s?
# Outermost paren for IN list
[(]
(?:
# Could be list of rows
[(] [?,\s\$]* [)]
|
# or the standard stuff of a scalar list
[?,\s\$]+
)+
# Until we close the full IN list
[)]
}
{ in (?+)}xmsg;
# Remove remaining comments (instances of '--' that didn't indicate
# comments should have been removed already)
$main_query =~ s/--[^\n]+$//gm;
# Remove blank lines
$main_query =~ s/^\s*\n//gm;
# Store in temporary statement hashkey,
# along with UPPER type
$query{$pid}{statement} = $main_query;
$query{$pid}{qtype} = "\U$query_type";
}
if (
exists $query{$pid}{statement}
&&
$full_statement =~ $extract_duration_re
)
{
my $duration = $1 || 0;
my $stored = $query{$pid};
# Add canonical query to count hash
my $hsh =
$canonical_q{ delete $stored->{statement} }
||= {
count => 0,
duration => 0,
deviation => 0,
qtype => delete $stored->{qtype},
minimum_offenders => [ [ $stored->{statement_id} => $duration ] ],
minimum_threshold => $duration,
maximum_offenders => [ [ $stored->{statement_id} => $duration ] ],
maximum_threshold => $duration,
durations => [],
};
++$hsh->{count};
$hsh->{duration} += $duration;
push @{$hsh->{durations}}, $duration;
# If we're tracking offenders (best/worst queries)
# add them in if the newest one measures as one of the
# best or worst.
if ($opt{offenders}) {
if ($duration > $hsh->{maximum_threshold}) {
my $array = $hsh->{maximum_offenders};
@$array = (
sort { $b->[1] <=> $a->[1] }
@$array,
[ $stored->{statement_id}, $duration ],
);
splice (@$array, $opt{offenders}) if @$array > $opt{offenders};
$hsh->{maximum_threshold} = $array->[-1]->[1];
}
if ($duration < $hsh->{minimum_threshold}) {
my $array = $hsh->{minimum_offenders};
@$array = (
sort { $a->[1] <=> $b->[1] }
@$array,
[ $stored->{statement_id}, $duration ],
);
splice (@$array, $opt{offenders}) if @$array > $opt{offenders};
$hsh->{maximum_threshold} = $array->[-1]->[1];
}
}
}
@$prev = ();
$resolve_called++;
return 1;
} ## end of resolve_syslog_statement
sub resolve_pid_statement {
my $info = shift or die;
$resolve_called++;
my $string = $info->{statement} ? lc $info->{statement} : '';
my $duration = $info->{duration} || 0;