@@ -37,11 +37,11 @@ =head1 REQUIREMENTS
37
37
38
38
1.) password-less ssh access to all the hosts in the machine list
39
39
2.) ssh key in ~/.ssh/id_rsa or ~/.ssh/monitor-rsa
40
- 3.) ability to /bin/cat /proc/net/dev /proc/loadavg /proc/ diskstats
40
+ 3.) ability to /bin/cat /proc/net/dev /proc/diskstats
41
41
42
42
If you want to have a special key that is restricted to the cat command, here's an example:
43
43
44
- no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty,command="/bin/cat /proc/net/dev /proc/loadavg /proc/ diskstats" ssh-rsa AAAA...== [email protected]
44
+ no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty,command="/bin/cat /proc/net/dev /proc/diskstats" ssh-rsa AAAA...== [email protected]
45
45
46
46
=cut
47
47
@@ -84,7 +84,7 @@ =head1 REQUIREMENTS
84
84
port => 22
85
85
}) ; # ssh connection + metadata
86
86
87
- print BLUE , " Connecting to $host via SSH ... " , RESET;
87
+ print CYAN , " Connecting to $host via SSH ... " , RESET;
88
88
eval {
89
89
$bundle = libssh2_connect($host , 22);
90
90
};
@@ -108,7 +108,7 @@ =head1 REQUIREMENTS
108
108
$bundle -> comment($hosts {$host });
109
109
110
110
# set up the polling command and add to the poll list
111
- push @ssh , [ $host , $bundle , ' /bin/cat /proc/net/dev /proc/loadavg /proc/ diskstats' ];
111
+ push @ssh , [ $host , $bundle , ' /bin/cat /proc/net/dev /proc/diskstats' ];
112
112
push @sorted_host_list , $host ;
113
113
$host_bundles {$host } = $bundle ;
114
114
}
@@ -140,8 +140,10 @@ sub cl_netstat {
140
140
$struct -> {$hostname }{comment } = pop @{$stats {$hostname }};
141
141
142
142
my @legend ;
143
- $struct -> {$hostname }{dsk_rdi } = 0;
144
- $struct -> {$hostname }{dsk_wdi } = 0;
143
+ $struct -> {$hostname }{dsk_rds } = 0; # read sectors counter
144
+ $struct -> {$hostname }{dsk_rwt } = 0; # read wait ms counter
145
+ $struct -> {$hostname }{dsk_wds } = 0; # write sectors counter
146
+ $struct -> {$hostname }{dsk_wwt } = 0; # write wait ms counter
145
147
$struct -> {$hostname }{net } = {};
146
148
147
149
foreach my $line ( @{$stats {$hostname }} ) {
@@ -161,23 +163,17 @@ sub cl_netstat {
161
163
$struct -> {$hostname }{net }{$legend [$idx ]} += $sdata [$idx ] || 0;
162
164
}
163
165
}
164
- # load average
165
- # # 0.00 0.00 0.00 1/307 155781
166
- elsif ($line =~ / (\d +\.\d +) (\d +\.\d +) (\d +\.\d +) \d +\/\d + \d +/ ) {
167
- $struct -> {$hostname }{la_short } = $1 ;
168
- $struct -> {$hostname }{la_medium } = $2 ;
169
- $struct -> {$hostname }{la_long } = $3 ;
170
- }
171
166
# 8 0 sda 298890 2980 5498843 92328 10123211 2314394 134218078 10756944 0 419132 10866136
172
167
# 8 5 sda5 5540 826 44511 1528 15558 55975 572334 68312 0 2932 69848
173
168
# 8 32 sdc 913492 273 183151490 8217340 2047310 0 37711114 1259728 0 1267508 9476068
174
169
# 8 16 sdb 2640 380 18329 2860 1751748 13461886 121702720 249041290 78 2654720 249048720
175
170
# 8 1 sda1 35383589 4096190 515794290 173085956 58990656 100542811 1276270912 205189188 0 135658516 378268412
176
- # ignore whole devices, add up paritions, because EC2 machines get disks with partitions but not whole
177
- # disks (fucking xen)
171
+ # EC2 machines get disks with partitions but not whole disks
172
+ # TODO: sort out devices to make sure partitions are not double-counted with whole devices
173
+ #
178
174
# from Documentation/iostats.txt:
179
175
# Field 1 -- # of reads completed
180
- # Field 2 -- # of reads merged, field 6 -- # of writes merged
176
+ # Field 2 -- # of reads merged
181
177
# Field 3 -- # of sectors read
182
178
# Field 4 -- # of milliseconds spent reading
183
179
# Field 5 -- # of writes completed
@@ -188,14 +184,13 @@ sub cl_netstat {
188
184
# Field 10 -- # of milliseconds spent doing I/Os
189
185
# Field 11 -- weighted # of milliseconds spent doing I/Os
190
186
#
191
- # example: 8 1 sda1 35383589 4096190 515794290 173085956 58990656 100542811
192
- # capture: $1 $2 $3
193
- # field: major minor device 1 2 3 4 5 ... 6-11
194
- elsif ($line =~ / ^\s *\d +\s +\d +\s +(\w +)\s +(\d +)\s +\d +\s +\d +\s +\d +\s +(\d +)\s +/ ) {
187
+ # capture: major minor $1 $2 $3 $4 $5 $6 $7 $8 $9 $10 ...
188
+ elsif ($line =~ / ^\s *\d +\s +\d +\s +(\w +)\s +(\d +)\s +(\d +)\s +(\d +)\s +(\d +)\s +(\d +)\s +(\d +)\s +(\d +)\s +(\d +)\s +(\d +)\s +(\d +)\s +/ ) {
195
189
if (not $opt_device or $opt_device eq $1 ) {
196
- $struct -> {$hostname }{dsk_rdi } += $2 ;
197
- $struct -> {$hostname }{dsk_wdi } += $3 ;
198
- # $struct->{$hostname}{dsk_wms} += $4;
190
+ $struct -> {$hostname }{dsk_rds } += $2 ;
191
+ $struct -> {$hostname }{dsk_rwt } += $5 ;
192
+ $struct -> {$hostname }{dsk_wds } += $6 ;
193
+ $struct -> {$hostname }{dsk_wwt } += $9 ;
199
194
}
200
195
}
201
196
}
@@ -223,30 +218,33 @@ sub diff_cl_netstat {
223
218
if ( $iface eq ' net' ) {
224
219
my $rdiff = $s1 -> {$host }{$iface }{rbytes } - $s2 -> {$host }{$iface }{rbytes };
225
220
my $tdiff = $s1 -> {$host }{$iface }{tbytes } - $s2 -> {$host }{$iface }{tbytes };
221
+ my $tput = ($s1 -> {$host }{$iface }{rpackets } - $s2 -> {$host }{$iface }{rpackets })
222
+ + ($s1 -> {$host }{$iface }{tpackets } - $s2 -> {$host }{$iface }{tpackets });
226
223
227
- # watch for counter rollover
224
+ # counter rollover
228
225
if ($s1 -> {$host }{$iface }{rbytes } < $s2 -> {$host }{$iface }{rbytes }) {
229
- # wrong, but better than the huge negative numbers
230
- # fixing correctly will require keeping diffs across iterations
226
+ # this trades off the accuracy of one iteration to avoid having
227
+ # to track deltas across iterations
231
228
$rdiff = $s2 -> {$host }{$iface }{rbytes };
232
229
}
233
230
if ($s1 -> {$host }{$iface }{tbytes } < $s2 -> {$host }{$iface }{tbytes }) {
234
231
$tdiff = $s2 -> {$host }{$iface }{tbytes };
235
232
}
233
+
234
+ # 0: read_bytesps, 1: write_bytesps
236
235
push @host_traffic , int ($rdiff / $seconds ), int ($tdiff / $seconds );
236
+ # 2: total_byteps, 3: 0 (using an array here was silly, should be hash)
237
+ push @host_traffic , int ($tput / $seconds ), 0;
237
238
}
238
- # elsif ($iface =~ /^dsk_[rw]d[is]/) {
239
- # print "$iface: " . Dumper($s1->{$host}{$iface});
240
- # }
241
239
}
242
240
243
- # for now, just set second interface to 0 if it doesn't exist
244
- if ( @host_traffic == 2 ) {
245
- push @host_traffic , 0, 0;
246
- }
241
+ # iops
242
+ $host_traffic [4] = ($s1 -> {$host }{dsk_rds } - $s2 -> {$host }{dsk_rds }) / $seconds ;
243
+ $host_traffic [5] = ($s1 -> {$host }{dsk_wds } - $s2 -> {$host }{dsk_wds }) / $seconds ;
247
244
248
- $host_traffic [4] = ($s1 -> {$host }{dsk_rdi } - $s2 -> {$host }{dsk_rdi }) / $seconds ;
249
- $host_traffic [5] = ($s1 -> {$host }{dsk_wdi } - $s2 -> {$host }{dsk_wdi }) / $seconds ;;
245
+ # iowait
246
+ $host_traffic [6] = ($s1 -> {$host }{dsk_rwt } - $s2 -> {$host }{dsk_rwt });
247
+ $host_traffic [7] = ($s1 -> {$host }{dsk_wwt } - $s2 -> {$host }{dsk_wwt });
250
248
251
249
$out {$host } = \@host_traffic ;
252
250
}
@@ -255,7 +253,12 @@ sub diff_cl_netstat {
255
253
256
254
# ## MAIN
257
255
258
- my ( $iterations , $total_send , $total_recv , $total_disk_read , $total_disk_write , %averages ) = ( 0, 0, 0, 0, 0, () );
256
+ my ($iterations , %averages ) = (0, ());
257
+
258
+ # these totals are for the lifetime of this process
259
+ my ($total_net_tx , $total_net_rx ) = (0, 0);
260
+ my ($total_disk_riops , $total_disk_wiops ) = (0, 0);
261
+ my ($total_disk_rwait , $total_disk_wwait ) = (0, 0);
259
262
260
263
my $previous = cl_netstat();
261
264
print GREEN, " Acquired first round. Output begins in $opt_interval seconds.\n " , WHITE;
@@ -267,15 +270,15 @@ sub diff_cl_netstat {
267
270
my %diff = diff_cl_netstat( $current , $previous );
268
271
$previous = $current ;
269
272
270
- my $header = sprintf " % ${hostname_pad} s: % 13s % 13s % 13s %12s %12s %5s %5s %5s " ,
271
- qw( hostname net_total net_recv net_send read_iops write_iops 1min 5min 15min ) ;
272
- print BLUE , $header , $/ , ' -' x length ($header ), $/ , RESET;
273
+ my $header = sprintf " % ${hostname_pad} s: % 13s % 13s % 14s %8s %8s %8s %8s " ,
274
+ qw( hostname net_packets net_rx_bytes net_tx_bytes dsk_riops dsk_wiops rwait_ms wwait_ms ) ;
275
+ print CYAN , $header , $/ , ' -' x length ($header ), $/ , RESET;
273
276
274
- my $host_count = 0;
275
- my $host_r_total = 0;
276
- my $host_s_total = 0 ;
277
- my $host_dr_total = 0 ;
278
- my $host_dw_total = 0 ;
277
+ # iteration totals
278
+ my $host_count = 0;
279
+ my ( $ivl_net_rx_total , $ivl_net_tx_total ) = (0, 0) ;
280
+ my ( $ivl_riops_total , $ivl_wiops_total ) = (0, 0) ;
281
+ my ( $ivl_rwait_total , $ivl_wwait_total ) = (0, 0) ;
279
282
280
283
HOST: foreach my $host ( @sorted_host_list ) {
281
284
my $hostname = $host ;
@@ -290,56 +293,71 @@ sub diff_cl_netstat {
290
293
}
291
294
292
295
# network
293
- printf " %s % ${hostname_pad} s: %s % 13s %s % 13s %s % 13s%s " ,
296
+ printf " %s % ${hostname_pad} s: %s % 13s %s % 13s %s % 13s%s " ,
294
297
WHITE, $hostname ,
295
- net_c ($diff {$host }-> [0] + $diff { $host } -> [1] , 2),
296
- net_c($diff {$host }-> [0]),
297
- net_c($diff {$host }-> [1]),
298
+ io_c ($diff {$host }-> [2] , 2), # total pps
299
+ net_c($diff {$host }-> [0]), # read bytes per second
300
+ net_c($diff {$host }-> [1]), # write bytes per second
298
301
RESET;
299
302
300
303
# disk iops
301
- printf " %s %12s/s %s %12s/s " ,
304
+ printf " %s %8s %s %8s " ,
302
305
io_c($diff {$host }-> [4]),
303
306
io_c($diff {$host }-> [5]);
304
307
305
- # load average
306
- printf " %s %5s %s %5s %s %5s %s%s%s \n " ,
307
- la_c($current -> {$host }{la_short }),
308
- la_c($current -> {$host }{la_medium }),
309
- la_c($current -> {$host }{la_long }),
310
- DKGRAY, $current -> {$host }{comment } || ' ' , RESET;
308
+ # iowait
309
+ my $avg_rwait = $diff {$host }-> [6] / ($diff {$host }-> [4] || 1);
310
+ my $avg_wwait = $diff {$host }-> [7] / ($diff {$host }-> [5] || 1);
311
+ printf " %s %8s %s %8s %s%s%s \n " ,
312
+ io_c($avg_rwait ),
313
+ io_c($avg_wwait ),
314
+ DKGRAY, $current -> {$host }{comment } || ' ' , RESET;
311
315
316
+ # increment totals
312
317
$host_count ++;
313
- $host_r_total += $diff {$host }-> [0] + $diff {$host }-> [2];
314
- $host_s_total += $diff {$host }-> [1] + $diff {$host }-> [3];
315
-
316
- $host_dr_total += $diff {$host }-> [4];
317
- $host_dw_total += $diff {$host }-> [5];
318
+ $ivl_net_rx_total += $diff {$host }-> [0] + $diff {$host }-> [2];
319
+ $ivl_net_tx_total += $diff {$host }-> [1] + $diff {$host }-> [3];
320
+ $ivl_riops_total += $diff {$host }-> [4];
321
+ $ivl_wiops_total += $diff {$host }-> [5];
322
+ $ivl_rwait_total += $avg_rwait ;
323
+ $ivl_wwait_total += $avg_wwait ;
318
324
}
319
325
320
- # W C V W C V W C V W V C V W C V W R
321
- printf " %sTotal : %s % 13s %sRecv : %s % 12s %sSend : %s % 12s %s (%s mbit/s) | %s % 6s %sread /s %s % 6s %swrite /s%s \n " ,
322
- WHITE, net_c($host_r_total + $host_s_total , 2 * $host_count ), WHITE, # wcvw
323
- net_c($host_r_total , $host_count ), WHITE, # cvw
324
- net_c($host_s_total , $host_count ), WHITE, # cvw
325
- c((($host_r_total + $host_s_total )*8)/(2**20)), # v
326
- io_c($host_dr_total , $host_count ), WHITE, # cvw
327
- io_c($host_dw_total , $host_count ), WHITE, # cvw
326
+ printf " %sNetwork total: %s % 13s %sRecv : %s % 12s %sSend : %s % 12s %s (%s MiB/s)%s \n " ,
327
+ WHITE, net_c($ivl_net_rx_total + $ivl_net_tx_total , 2 * $host_count ), WHITE,
328
+ net_c($ivl_net_rx_total , $host_count ), WHITE,
329
+ net_c($ivl_net_tx_total , $host_count ), WHITE,
330
+ c(($ivl_net_rx_total + $ivl_net_tx_total )/(2**20)),
331
+ RESET;
332
+
333
+ $total_net_tx += $ivl_net_tx_total ;
334
+ $total_net_rx += $ivl_net_rx_total ;
335
+
336
+ printf " %sNetwork average: %s % 13s %sRecv : %s % 12s %sSend : %s % 12s %s (%s MiB/s)%s \n " ,
337
+ WHITE, net_c(($total_net_rx + $total_net_tx ) / $iterations , 2), WHITE,
338
+ net_c(($total_net_rx / $iterations ) / $host_count ), WHITE,
339
+ net_c(($total_net_tx / $iterations ) / $host_count ), WHITE,
340
+ c((($total_net_rx + $total_net_tx ) / $iterations )/(2**20)),
341
+ RESET;
342
+
343
+ $total_disk_riops += $ivl_riops_total ;
344
+ $total_disk_wiops += $ivl_wiops_total ;
345
+
346
+ printf " %sIOPS : %s % 10s %stotal riops %s % 10s %stotal wiops %s % 6s %savg riops %s % 6s %savg wiops%s \n " ,
347
+ WHITE, io_c($ivl_riops_total , $host_count ), WHITE,
348
+ io_c($ivl_wiops_total , $host_count ), WHITE,
349
+ io_c(($total_disk_riops / $iterations ) / $host_count ), WHITE,
350
+ io_c(($total_disk_wiops / $iterations ) / $host_count ), WHITE,
328
351
RESET;
329
-
330
- $total_send += $host_s_total ;
331
- $total_recv += $host_r_total ;
332
- $total_disk_read += $host_dr_total ;
333
- $total_disk_write += $host_dw_total ;
334
-
335
- # W C V W C V W C V W V C V W C V W R
336
- printf " %sAverage : %s % 13s %sRecv : %s % 12s %sSend : %s % 12s %s (%s mbit/s) | %s %6s %sread /s %s %6s %swrite /s%s \n\n " ,
337
- WHITE, net_c(($total_recv + $total_send ) / $iterations , 2), WHITE, # wcvw
338
- net_c(($total_recv / $iterations ) / $host_count ), WHITE, # cvw
339
- net_c(($total_send / $iterations ) / $host_count ), WHITE, # cvw
340
- c(((($total_recv + $total_send ) / $iterations )*8)/(2**20)), # v
341
- io_c(($total_disk_read / $iterations ) / $host_count ), WHITE, # cvw
342
- io_c(($total_disk_write / $iterations ) / $host_count ), WHITE, # cvw
352
+
353
+ $total_disk_rwait += $ivl_rwait_total ;
354
+ $total_disk_wwait += $ivl_wwait_total ;
355
+
356
+ printf " %siowait ms: %s % 10s %stotal rwait %s % 10s %stotal wwait %s % 6s %savg rwait %s % 6s %savg wwait%s \n\n " ,
357
+ WHITE, io_c($ivl_rwait_total , $host_count ), WHITE,
358
+ io_c($ivl_wwait_total , $host_count ), WHITE,
359
+ io_c(($total_disk_rwait / $iterations ) / $host_count ), WHITE,
360
+ io_c(($total_disk_wwait / $iterations ) / $host_count ), WHITE,
343
361
RESET;
344
362
345
363
sleep $opt_interval ;
0 commit comments