Skip to content

Commit 5d98fab

Browse files
committed
integrate disk io thoroughly, drop LA
dropped load average for now added disk wait add disk iops/wait summary lines
1 parent 5f297b8 commit 5d98fab

File tree

1 file changed

+100
-82
lines changed

1 file changed

+100
-82
lines changed

cl-netstat.pl

+100-82
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@ =head1 REQUIREMENTS
3737
3838
1.) password-less ssh access to all the hosts in the machine list
3939
2.) ssh key in ~/.ssh/id_rsa or ~/.ssh/monitor-rsa
40-
3.) ability to /bin/cat /proc/net/dev /proc/loadavg /proc/diskstats
40+
3.) ability to /bin/cat /proc/net/dev /proc/diskstats
4141
4242
If you want to have a special key that is restricted to the cat command, here's an example:
4343
44-
no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty,command="/bin/cat /proc/net/dev /proc/loadavg /proc/diskstats" ssh-rsa AAAA...== [email protected]
44+
no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty,command="/bin/cat /proc/net/dev /proc/diskstats" ssh-rsa AAAA...== [email protected]
4545
4646
=cut
4747

@@ -84,7 +84,7 @@ =head1 REQUIREMENTS
8484
port => 22
8585
}) ; # ssh connection + metadata
8686

87-
print BLUE, "Connecting to $host via SSH ... ", RESET;
87+
print CYAN, "Connecting to $host via SSH ... ", RESET;
8888
eval {
8989
$bundle = libssh2_connect($host, 22);
9090
};
@@ -108,7 +108,7 @@ =head1 REQUIREMENTS
108108
$bundle->comment($hosts{$host});
109109

110110
# set up the polling command and add to the poll list
111-
push @ssh, [ $host, $bundle, '/bin/cat /proc/net/dev /proc/loadavg /proc/diskstats' ];
111+
push @ssh, [ $host, $bundle, '/bin/cat /proc/net/dev /proc/diskstats' ];
112112
push @sorted_host_list, $host;
113113
$host_bundles{$host} = $bundle;
114114
}
@@ -140,8 +140,10 @@ sub cl_netstat {
140140
$struct->{$hostname}{comment} = pop @{$stats{$hostname}};
141141

142142
my @legend;
143-
$struct->{$hostname}{dsk_rdi} = 0;
144-
$struct->{$hostname}{dsk_wdi} = 0;
143+
$struct->{$hostname}{dsk_rds} = 0; # read sectors counter
144+
$struct->{$hostname}{dsk_rwt} = 0; # read wait ms counter
145+
$struct->{$hostname}{dsk_wds} = 0; # write sectors counter
146+
$struct->{$hostname}{dsk_wwt} = 0; # write wait ms counter
145147
$struct->{$hostname}{net} = {};
146148

147149
foreach my $line ( @{$stats{$hostname}} ) {
@@ -161,23 +163,17 @@ sub cl_netstat {
161163
$struct->{$hostname}{net}{$legend[$idx]} += $sdata[$idx] || 0;
162164
}
163165
}
164-
# load average
165-
# # 0.00 0.00 0.00 1/307 155781
166-
elsif ($line =~ /(\d+\.\d+) (\d+\.\d+) (\d+\.\d+) \d+\/\d+ \d+/) {
167-
$struct->{$hostname}{la_short} = $1;
168-
$struct->{$hostname}{la_medium} = $2;
169-
$struct->{$hostname}{la_long} = $3;
170-
}
171166
# 8 0 sda 298890 2980 5498843 92328 10123211 2314394 134218078 10756944 0 419132 10866136
172167
# 8 5 sda5 5540 826 44511 1528 15558 55975 572334 68312 0 2932 69848
173168
# 8 32 sdc 913492 273 183151490 8217340 2047310 0 37711114 1259728 0 1267508 9476068
174169
# 8 16 sdb 2640 380 18329 2860 1751748 13461886 121702720 249041290 78 2654720 249048720
175170
# 8 1 sda1 35383589 4096190 515794290 173085956 58990656 100542811 1276270912 205189188 0 135658516 378268412
176-
# ignore whole devices, add up paritions, because EC2 machines get disks with partitions but not whole
177-
# disks (fucking xen)
171+
# EC2 machines get disks with partitions but not whole disks
172+
# TODO: sort out devices to make sure partitions are not double-counted with whole devices
173+
#
178174
# from Documentation/iostats.txt:
179175
# Field 1 -- # of reads completed
180-
# Field 2 -- # of reads merged, field 6 -- # of writes merged
176+
# Field 2 -- # of reads merged
181177
# Field 3 -- # of sectors read
182178
# Field 4 -- # of milliseconds spent reading
183179
# Field 5 -- # of writes completed
@@ -188,14 +184,13 @@ sub cl_netstat {
188184
# Field 10 -- # of milliseconds spent doing I/Os
189185
# Field 11 -- weighted # of milliseconds spent doing I/Os
190186
#
191-
# example: 8 1 sda1 35383589 4096190 515794290 173085956 58990656 100542811
192-
# capture: $1 $2 $3
193-
# field: major minor device 1 2 3 4 5 ... 6-11
194-
elsif ($line =~ /^\s*\d+\s+\d+\s+(\w+)\s+(\d+)\s+\d+\s+\d+\s+\d+\s+(\d+)\s+/) {
187+
# capture: major minor $1 $2 $3 $4 $5 $6 $7 $8 $9 $10 ...
188+
elsif ($line =~ /^\s*\d+\s+\d+\s+(\w+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+/) {
195189
if (not $opt_device or $opt_device eq $1) {
196-
$struct->{$hostname}{dsk_rdi} += $2;
197-
$struct->{$hostname}{dsk_wdi} += $3;
198-
#$struct->{$hostname}{dsk_wms} += $4;
190+
$struct->{$hostname}{dsk_rds} += $2;
191+
$struct->{$hostname}{dsk_rwt} += $5;
192+
$struct->{$hostname}{dsk_wds} += $6;
193+
$struct->{$hostname}{dsk_wwt} += $9;
199194
}
200195
}
201196
}
@@ -223,30 +218,33 @@ sub diff_cl_netstat {
223218
if ( $iface eq 'net' ) {
224219
my $rdiff = $s1->{$host}{$iface}{rbytes} - $s2->{$host}{$iface}{rbytes};
225220
my $tdiff = $s1->{$host}{$iface}{tbytes} - $s2->{$host}{$iface}{tbytes};
221+
my $tput = ($s1->{$host}{$iface}{rpackets} - $s2->{$host}{$iface}{rpackets})
222+
+ ($s1->{$host}{$iface}{tpackets} - $s2->{$host}{$iface}{tpackets});
226223

227-
# watch for counter rollover
224+
# counter rollover
228225
if ($s1->{$host}{$iface}{rbytes} < $s2->{$host}{$iface}{rbytes}) {
229-
# wrong, but better than the huge negative numbers
230-
# fixing correctly will require keeping diffs across iterations
226+
# this trades off the accuracy of one iteration to avoid having
227+
# to track deltas across iterations
231228
$rdiff = $s2->{$host}{$iface}{rbytes};
232229
}
233230
if ($s1->{$host}{$iface}{tbytes} < $s2->{$host}{$iface}{tbytes}) {
234231
$tdiff = $s2->{$host}{$iface}{tbytes};
235232
}
233+
234+
# 0: read_bytesps, 1: write_bytesps
236235
push @host_traffic, int($rdiff / $seconds), int($tdiff / $seconds);
236+
# 2: total_byteps, 3: 0 (using an array here was silly, should be hash)
237+
push @host_traffic, int($tput / $seconds), 0;
237238
}
238-
#elsif ($iface =~ /^dsk_[rw]d[is]/) {
239-
# print "$iface: " . Dumper($s1->{$host}{$iface});
240-
#}
241239
}
242240

243-
# for now, just set second interface to 0 if it doesn't exist
244-
if ( @host_traffic == 2 ) {
245-
push @host_traffic, 0, 0;
246-
}
241+
# iops
242+
$host_traffic[4] = ($s1->{$host}{dsk_rds} - $s2->{$host}{dsk_rds}) / $seconds;
243+
$host_traffic[5] = ($s1->{$host}{dsk_wds} - $s2->{$host}{dsk_wds}) / $seconds;
247244

248-
$host_traffic[4] = ($s1->{$host}{dsk_rdi} - $s2->{$host}{dsk_rdi}) / $seconds;
249-
$host_traffic[5] = ($s1->{$host}{dsk_wdi} - $s2->{$host}{dsk_wdi}) / $seconds;;
245+
# iowait
246+
$host_traffic[6] = ($s1->{$host}{dsk_rwt} - $s2->{$host}{dsk_rwt});
247+
$host_traffic[7] = ($s1->{$host}{dsk_wwt} - $s2->{$host}{dsk_wwt});
250248

251249
$out{$host} = \@host_traffic;
252250
}
@@ -255,7 +253,12 @@ sub diff_cl_netstat {
255253

256254
### MAIN
257255

258-
my( $iterations, $total_send, $total_recv, $total_disk_read, $total_disk_write, %averages ) = ( 0, 0, 0, 0, 0, () );
256+
my($iterations, %averages) = (0, ());
257+
258+
# these totals are for the lifetime of this process
259+
my($total_net_tx, $total_net_rx) = (0, 0);
260+
my($total_disk_riops, $total_disk_wiops) = (0, 0);
261+
my($total_disk_rwait, $total_disk_wwait) = (0, 0);
259262

260263
my $previous = cl_netstat();
261264
print GREEN, "Acquired first round. Output begins in $opt_interval seconds.\n", WHITE;
@@ -267,15 +270,15 @@ sub diff_cl_netstat {
267270
my %diff = diff_cl_netstat( $current, $previous );
268271
$previous = $current;
269272

270-
my $header = sprintf "% ${hostname_pad}s: % 13s % 13s % 13s %12s %12s %5s %5s %5s",
271-
qw( hostname net_total net_recv net_send read_iops write_iops 1min 5min 15min );
272-
print BLUE, $header, $/, '-' x length($header), $/, RESET;
273+
my $header = sprintf "% ${hostname_pad}s: % 13s % 13s % 14s %8s %8s %8s %8s",
274+
qw( hostname net_packets net_rx_bytes net_tx_bytes dsk_riops dsk_wiops rwait_ms wwait_ms );
275+
print CYAN, $header, $/, '-' x length($header), $/, RESET;
273276

274-
my $host_count = 0;
275-
my $host_r_total = 0;
276-
my $host_s_total = 0;
277-
my $host_dr_total = 0;
278-
my $host_dw_total = 0;
277+
# iteration totals
278+
my $host_count = 0;
279+
my($ivl_net_rx_total, $ivl_net_tx_total) = (0, 0);
280+
my($ivl_riops_total, $ivl_wiops_total) = (0, 0);
281+
my($ivl_rwait_total, $ivl_wwait_total) = (0, 0);
279282

280283
HOST: foreach my $host ( @sorted_host_list ) {
281284
my $hostname = $host;
@@ -290,56 +293,71 @@ sub diff_cl_netstat {
290293
}
291294

292295
# network
293-
printf "%s% ${hostname_pad}s: %s% 13s %s% 13s %s% 13s%s",
296+
printf "%s% ${hostname_pad}s: %s% 13s %s% 13s %s% 13s%s ",
294297
WHITE, $hostname,
295-
net_c($diff{$host}->[0] + $diff{$host}->[1], 2),
296-
net_c($diff{$host}->[0]),
297-
net_c($diff{$host}->[1]),
298+
io_c($diff{$host}->[2], 2), # total pps
299+
net_c($diff{$host}->[0]), # read bytes per second
300+
net_c($diff{$host}->[1]), # write bytes per second
298301
RESET;
299302

300303
# disk iops
301-
printf "%s%12s/s %s%12s/s ",
304+
printf "%s%8s %s%8s ",
302305
io_c($diff{$host}->[4]),
303306
io_c($diff{$host}->[5]);
304307

305-
# load average
306-
printf "%s%5s %s%5s %s%5s %s%s%s\n",
307-
la_c($current->{$host}{la_short}),
308-
la_c($current->{$host}{la_medium}),
309-
la_c($current->{$host}{la_long}),
310-
DKGRAY, $current->{$host}{comment} || '', RESET;
308+
# iowait
309+
my $avg_rwait = $diff{$host}->[6] / ($diff{$host}->[4] || 1);
310+
my $avg_wwait = $diff{$host}->[7] / ($diff{$host}->[5] || 1);
311+
printf "%s%8s %s%8s %s%s%s\n",
312+
io_c($avg_rwait),
313+
io_c($avg_wwait),
314+
DKGRAY, $current->{$host}{comment} || '', RESET;
311315

316+
# increment totals
312317
$host_count++;
313-
$host_r_total += $diff{$host}->[0] + $diff{$host}->[2];
314-
$host_s_total += $diff{$host}->[1] + $diff{$host}->[3];
315-
316-
$host_dr_total += $diff{$host}->[4];
317-
$host_dw_total += $diff{$host}->[5];
318+
$ivl_net_rx_total += $diff{$host}->[0] + $diff{$host}->[2];
319+
$ivl_net_tx_total += $diff{$host}->[1] + $diff{$host}->[3];
320+
$ivl_riops_total += $diff{$host}->[4];
321+
$ivl_wiops_total += $diff{$host}->[5];
322+
$ivl_rwait_total += $avg_rwait;
323+
$ivl_wwait_total += $avg_wwait;
318324
}
319325

320-
# W C V W C V W C V W V C V W C V W R
321-
printf "%sTotal: %s% 13s %sRecv: %s% 12s %sSend: %s% 12s %s(%s mbit/s) | %s% 6s %sread/s %s% 6s %swrite/s%s\n",
322-
WHITE, net_c($host_r_total + $host_s_total, 2 * $host_count), WHITE, # wcvw
323-
net_c($host_r_total, $host_count), WHITE, # cvw
324-
net_c($host_s_total, $host_count), WHITE, # cvw
325-
c((($host_r_total + $host_s_total)*8)/(2**20)), # v
326-
io_c($host_dr_total, $host_count), WHITE, # cvw
327-
io_c($host_dw_total, $host_count), WHITE, # cvw
326+
printf "%sNetwork total: %s% 13s %sRecv: %s% 12s %sSend: %s% 12s %s(%s MiB/s)%s\n",
327+
WHITE, net_c($ivl_net_rx_total + $ivl_net_tx_total, 2 * $host_count), WHITE,
328+
net_c($ivl_net_rx_total, $host_count), WHITE,
329+
net_c($ivl_net_tx_total, $host_count), WHITE,
330+
c(($ivl_net_rx_total + $ivl_net_tx_total)/(2**20)),
331+
RESET;
332+
333+
$total_net_tx += $ivl_net_tx_total;
334+
$total_net_rx += $ivl_net_rx_total;
335+
336+
printf "%sNetwork average: %s% 13s %sRecv: %s% 12s %sSend: %s% 12s %s(%s MiB/s)%s\n",
337+
WHITE, net_c(($total_net_rx + $total_net_tx) / $iterations, 2), WHITE,
338+
net_c(($total_net_rx / $iterations) / $host_count), WHITE,
339+
net_c(($total_net_tx / $iterations) / $host_count), WHITE,
340+
c((($total_net_rx + $total_net_tx) / $iterations)/(2**20)),
341+
RESET;
342+
343+
$total_disk_riops += $ivl_riops_total;
344+
$total_disk_wiops += $ivl_wiops_total;
345+
346+
printf "%sIOPS: %s% 10s %stotal riops %s% 10s %stotal wiops %s% 6s %savg riops %s% 6s %savg wiops%s\n",
347+
WHITE, io_c($ivl_riops_total, $host_count), WHITE,
348+
io_c($ivl_wiops_total, $host_count), WHITE,
349+
io_c(($total_disk_riops / $iterations) / $host_count), WHITE,
350+
io_c(($total_disk_wiops / $iterations) / $host_count), WHITE,
328351
RESET;
329-
330-
$total_send += $host_s_total;
331-
$total_recv += $host_r_total;
332-
$total_disk_read += $host_dr_total;
333-
$total_disk_write += $host_dw_total;
334-
335-
# W C V W C V W C V W V C V W C V W R
336-
printf "%sAverage: %s% 13s %sRecv: %s% 12s %sSend: %s% 12s %s(%s mbit/s) | %s%6s %sread/s %s%6s %swrite/s%s\n\n",
337-
WHITE, net_c(($total_recv + $total_send) / $iterations, 2), WHITE, # wcvw
338-
net_c(($total_recv / $iterations) / $host_count), WHITE, # cvw
339-
net_c(($total_send / $iterations) / $host_count), WHITE, # cvw
340-
c(((($total_recv + $total_send) / $iterations)*8)/(2**20)), # v
341-
io_c(($total_disk_read / $iterations) / $host_count), WHITE, # cvw
342-
io_c(($total_disk_write / $iterations) / $host_count), WHITE, # cvw
352+
353+
$total_disk_rwait += $ivl_rwait_total;
354+
$total_disk_wwait += $ivl_wwait_total;
355+
356+
printf "%siowait ms: %s% 10s %stotal rwait %s% 10s %stotal wwait %s% 6s %savg rwait %s% 6s %savg wwait%s\n\n",
357+
WHITE, io_c($ivl_rwait_total, $host_count), WHITE,
358+
io_c($ivl_wwait_total, $host_count), WHITE,
359+
io_c(($total_disk_rwait / $iterations) / $host_count), WHITE,
360+
io_c(($total_disk_wwait / $iterations) / $host_count), WHITE,
343361
RESET;
344362

345363
sleep $opt_interval;

0 commit comments

Comments
 (0)