3 # Copyright (c) 2002-2003
\r
4 # The President and Fellows of Harvard College.
\r
6 # Redistribution and use in source and binary forms, with or without
\r
7 # modification, are permitted provided that the following conditions
\r
9 # 1. Redistributions of source code must retain the above copyright
\r
10 # notice, this list of conditions and the following disclaimer.
\r
11 # 2. Redistributions in binary form must reproduce the above copyright
\r
12 # notice, this list of conditions and the following disclaimer in the
\r
13 # documentation and/or other materials provided with the distribution.
\r
14 # 3. Neither the name of the University nor the names of its contributors
\r
15 # may be used to endorse or promote products derived from this software
\r
16 # without specific prior written permission.
\r
18 # THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY AND CONTRIBUTORS ``AS IS'' AND
\r
19 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
\r
20 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
\r
21 # ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OR CONTRIBUTORS BE LIABLE
\r
22 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
\r
23 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
\r
24 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
\r
25 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
\r
26 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
\r
27 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
\r
30 # $Id: nfsscan,v 1.18 2003/07/28 14:27:16 ellard Exp $
\r
33 $ProgDir =~ /(^.*)\//;
\r
39 require "$ProgDir/nfsdump.pl";
\r
40 require "$ProgDir/userUtils.pl";
\r
41 require "$ProgDir/hier.pl";
\r
42 require "$ProgDir/counts.pl";
\r
43 require "$ProgDir/latency.pl";
\r
44 require "$ProgDir/key.pl";
\r
45 require "$ProgDir/common.pl";
\r
49 $INTERVAL = 5 * 60; # in seconds (5 minutes)
\r
66 $FH_TYPE = 'unknown';
\r
77 $OutFileBaseName = undef;
\r
79 $nextPruneTime = -1;
\r
80 $PRUNE_INTERVAL = 1 * 60; # One minute.
\r
83 # Is this really the right default set of operations?
\r
85 $DEF_OPLIST = 'read,write,lookup,getattr,access,create,remove';
\r
86 @OPLIST = ('TOTAL', 'INTERESTING',
\r
87 split (/,/, $DEF_OPLIST));
\r
92 Usage: $0 [options] [trace1 [trace2 ...]]
\r
94 If no trace files are specified, then the trace is read from stdin.
\r
96 Command line options:
\r
98 -h Print usage message and exit.
\r
100 -B [CFUG] Compute per-Client, per-File, per-User, or per-Group info.
\r
102 -c c1[,c2]* Include only activity performed by the specified clients.
\r
104 -C c1[,c2]* Exclude activity performed by the specified clients.
\r
106 -d Compute per-directory statistics. This implicitly
\r
107 enables -BF so that per-file info is computed.
\r
109 -f Do file info tracking. This implicitly enables -BF so
\r
110 that per-File info is computed.
\r
112 -F fhtype Specify the file handle type used by the server.
\r
115 -g g1[,g2]* Include only activity performed by the specified groups.
\r
117 -G g1[,g2]* Exclude activity performed by the specified groups.
\r
119 -l Record average operation latency.
\r
121 -o basename Write output to files starting with the specified
\r
122 basename. The "Count" table goes to basename.cnt,
\r
123 "Latency" to basename.lat, and "File" to basename.fil.
\r
124 The default is to write all output to stdout.
\r
126 -O op[,op]* Specify the list of "interesting" operations.
\r
127 The default list is:
\r
129 read,write,lookup,getattr,access,create,remove
\r
131 If the first op starts with +, then the specified list
\r
132 of ops is appended to the default list. The special
\r
133 pseudo-ops readM and writeM represent the number of
\r
134 bytes read and written, expressed in MB.
\r
136 -t interval Time interval for cummulative statistics (such as
\r
137 operation count). The default is $INTERVAL seconds.
\r
138 If set to 0, then the entire trace is processed. By
\r
139 default, time is specified in seconds, but if the last
\r
140 character of the interval is any of s, m, h, or d,
\r
141 then the interval is interpreted as seconds, minutes,
\r
144 -u u1[,u2]* Include only activity performed by the specified users.
\r
146 -U u1[,u2]* Exclude activity performed by the specified users.
\r
148 -Z Omit count and latency lines that have a zero total
\r
160 counts::printTitle (*OUT_COUNTS);
\r
164 latency::printTitle (*OUT_LATENCY);
\r
167 counts::resetOpCounts ();
\r
169 my $cmdbuf = 'rm -f noattrdirdiscard noattrdir-root';
\r
177 my $cmdline = "$0 " . join (' ', @ARGV);
\r
179 my $Options = "B:dfF:g:G:hlO:o:t:u:U:SR:Z";
\r
180 if (! getopts ($Options)) {
\r
181 print STDERR "$0: Incorrect usage.\n";
\r
182 print STDERR $Usage;
\r
185 if (defined $opt_h) {
\r
190 #RFS: neednot input arguments
\r
194 #$opt_F = 'RFSNN'; # advfs or netapp
\r
196 if (defined $opt_B) {
\r
197 $UseClient = ($opt_B =~ /C/);
\r
198 $UseFH = ($opt_B =~ /F/);
\r
199 $UseUID = ($opt_B =~ /U/);
\r
200 $UseGID = ($opt_B =~ /G/);
\r
203 if (defined $opt_o) {
\r
204 $OutFileBaseName = $opt_o;
\r
207 if (defined $opt_O) {
\r
208 if ($opt_O =~ /^\+(.*)/) {
\r
209 @OPLIST = (@OPLIST, split (/,/, $1));
\r
212 @OPLIST = ('TOTAL', 'INTERESTING', split (/,/, $opt_O));
\r
217 if (defined $opt_l) {
\r
221 if (defined $opt_t) {
\r
222 if ($INTERVAL =~ /([0-9]*)([smhd])/) {
\r
226 if ($unit eq 's') {
\r
227 $INTERVAL = $opt_t;
\r
229 elsif ($unit eq 'm') {
\r
230 $INTERVAL = $opt_t * 60;
\r
232 elsif ($unit eq 'h') {
\r
233 $INTERVAL = $opt_t * 60 * 60;
\r
235 elsif ($unit eq 'd') {
\r
236 $INTERVAL = $opt_t * 24 * 60 * 60;
\r
240 $INTERVAL = $opt_t;
\r
244 $DO_PATHS = (defined $opt_d);
\r
245 $DO_FILES = (defined $opt_f);
\r
246 $DO_SQUEEZE = (defined $opt_S);
\r
247 $OMIT_ZEROS = (defined $opt_Z);
\r
249 $TIME_ROUNDING = (defined $opt_R) ? $opt_R : 0;
\r
251 if (defined $opt_F) {
\r
255 if (defined $opt_c) {
\r
256 @ADD_CLIENTS = split (/,/, $opt_c);
\r
258 if (defined $opt_C) {
\r
259 @DEL_CLIENTS = split (/,/, $opt_c);
\r
262 if (defined $opt_g) {
\r
263 @ADD_GROUPS = groups2gids (split (/,/, $opt_g));
\r
265 if (defined $opt_G) {
\r
266 @DEL_GROUPS = groups2gids (split (/,/, $opt_G));
\r
269 if (defined $opt_u) {
\r
270 @ADD_USERS = logins2uids (split (/,/, $opt_u));
\r
272 if (defined $opt_U) {
\r
273 @DEL_USERS = logins2uids (split (/,/, $opt_U));
\r
277 # Now that we know what options the user asked for, initialize
\r
278 # things accordingly.
\r
280 if ($DO_PATHS || $DO_FILES) {
\r
285 latency::init (@OPLIST);
\r
289 counts::init (@OPLIST);
\r
292 if (defined $OutFileBaseName) {
\r
294 open (OUT_COUNTS, ">$OutFileBaseName.cnt") ||
\r
295 die "Can't create $OutFileBaseName.cnt.";
\r
296 print OUT_COUNTS "#cmdline $cmdline\n";
\r
299 open (OUT_LATENCY, ">$OutFileBaseName.lat") ||
\r
300 die "Can't create $OutFileBaseName.lat.";
\r
301 print OUT_LATENCY "#cmdline $cmdline\n";
\r
304 open (OUT_FILES, ">$OutFileBaseName.fil") ||
\r
305 die "Can't create $OutFileBaseName.fil.";
\r
306 print OUT_FILES "#cmdline $cmdline\n";
\r
309 open (OUT_PATHS, ">$OutFileBaseName.pat") ||
\r
310 die "Can't create $OutFileBaseName.pat.";
\r
311 print OUT_PATHS "#cmdline $cmdline\n";
\r
315 *OUT_COUNTS = STDOUT;
\r
316 *OUT_LATENCY = STDOUT;
\r
317 *OUT_FILES = STDOUT;
\r
318 *OUT_PATHS = STDOUT;
\r
320 print STDOUT "#cmdline $cmdline\n";
\r
323 foreach my $op ( @OPLIST ) {
\r
333 while (my $line = <>) {
\r
335 $hier::rfsLineNum++;
\r
336 if ( ($hier::rfsLineNum % 1000) eq 0) {
\r
337 print STDERR "$hier::rfsLineNum\n";
\r
341 if ($line =~ /SHORT\ PACKET/) {
\r
345 my ($proto, $op, $xid, $client, $now, $response) =
\r
346 nfsd::nfsDumpParseLineHeader ($line);
\r
349 # NOTE: This next bit of logic requires a little
\r
350 # extra attention. We want to discard lines as
\r
351 # quickly as we can if they're not "interesting".
\r
352 # However, different lines are interesting in
\r
353 # different contexts, so the order of the tests and
\r
354 # the manner in which they are interspersed with
\r
355 # subroutine calls to pluck info from the lines is
\r
358 # Check whether it is a line that we should prune and
\r
359 # ignore, because of the filters.
\r
361 next if (($op eq 'C3' || $op eq 'C2') &&
\r
362 ! pruneCall ($line, $client));
\r
364 if ($DO_PATHS || $DO_FILES) {
\r
365 hier::processLine ($line,
\r
366 $proto, $op, $xid, $client,
\r
367 $now, $response, $FH_TYPE);
\r
370 my $key = key::makeKey ($line, $proto, $op,
\r
371 $xid, $client, $now,
\r
372 $UseClient, $UseFH, $UseUID, $UseGID,
\r
374 if (! defined $key) {
\r
377 $KeysSeen{$key} = 1;
\r
379 # Count everything towards the total, but only
\r
380 # do the rest of the processing for things
\r
381 # that are "interesting".
\r
383 if ($proto eq 'C3' || $proto eq 'C2') {
\r
384 $counts::OpCounts{"$key,TOTAL"}++;
\r
385 $counts::KeysSeen{$key} = 1;
\r
387 next if (! exists $OPARRAY{$op});
\r
389 $counts::OpCounts{"$key,$op"}++;
\r
390 $counts::OpCounts{"$key,INTERESTING"}++;
\r
393 if ($op eq 'read' && exists $OPARRAY{'readM'}) {
\r
394 doReadSize ($line, $proto, $op, $key, $client, $xid, $response, $now);
\r
397 if ($op eq 'write' && exists $OPARRAY{'writeM'}) {
\r
398 doWriteSize ($line, $proto, $op, $key, $client, $xid, $response, $now);
\r
402 latency::update ($key, $proto, $op,
\r
403 $xid, $client, $now);
\r
406 if ($END_TIME < 0) {
\r
407 $START_TIME = findStartTime ($NOW, $TIME_ROUNDING);
\r
408 $END_TIME = $START_TIME + $INTERVAL;
\r
411 # Note that this is a loop, because if the interval is
\r
412 # short enough, or the system is very idle (or there's
\r
413 # a filter in place that makes it look idle), entire
\r
414 # intervals can go by without anything happening at
\r
415 # all. Some tools can get confused if intervals are
\r
416 # missing from the table, so we emit them anyway.
\r
418 while (($INTERVAL > 0) && ($NOW >= $END_TIME)) {
\r
419 printAll ($START_TIME);
\r
421 counts::resetOpCounts ();
\r
422 latency::resetOpCounts ();
\r
424 $START_TIME += $INTERVAL;
\r
425 $END_TIME = $START_TIME + $INTERVAL;
\r
428 if ($now > $nextPruneTime) {
\r
429 key::prunePending ($now - $PRUNE_INTERVAL);
\r
430 latency::prunePending ($now - $PRUNE_INTERVAL);
\r
432 prunePending ($now - $PRUNE_INTERVAL);
\r
434 $nextPruneTime = $now + $PRUNE_INTERVAL;
\r
438 # Squeeze out the last little bit, if there's anything that we
\r
439 # counted but did not emit. If DO_SQUEEZE is true, then
\r
440 # always do this. Otherwise, only squeeze out the results of
\r
441 # the last interval if the interval is "almost" complete (ie
\r
442 # within 10 seconds of the end).
\r
444 if (($NOW > $START_TIME) && ($DO_SQUEEZE || (($END_TIME - $NOW) < 10))) {
\r
445 printAll ($START_TIME);
\r
446 counts::resetOpCounts ();
\r
449 print "#T endtime = $NOW\n";
\r
454 my ($start_time) = @_;
\r
457 counts::printOps ($start_time, *OUT_COUNTS);
\r
461 latency::printOps ($start_time, *OUT_LATENCY);
\r
465 hier::printAll ($start_time, *OUT_FILES);
\r
469 printPaths ($start_time, *OUT_PATHS);
\r
474 my ($line, $client) = @_;
\r
476 if (@ADD_USERS > 0 || @DEL_USERS > 0) {
\r
477 my $c_uid = nfsd::nfsDumpParseLineField ($line, 'euid');
\r
478 if (! defined ($c_uid)) {
\r
481 $c_uid = hex ($c_uid);
\r
483 if (@ADD_USERS && !grep (/^$c_uid$/, @ADD_USERS)) {
\r
486 if (@DEL_USERS && grep (/^$c_uid$/, @DEL_USERS)) {
\r
491 if (@ADD_GROUPS > 0 || @DEL_GROUPS > 0) {
\r
492 my $c_gid = nfsd::nfsDumpParseLineField ($line, 'egid');
\r
493 if (! defined ($c_gid)) {
\r
496 $c_gid = hex ($c_gid);
\r
498 if (@ADD_GROUPS && !grep (/^$c_gid$/, @ADD_GROUPS)) {
\r
501 if (@DEL_GROUPS && grep (/^$c_gid$/, @DEL_GROUPS)) {
\r
506 if (@ADD_CLIENTS > 0 || @DEL_CLIENTS > 0) {
\r
507 if (@ADD_CLIENTS && !grep (/^$client$/, @ADD_CLIENTS)) {
\r
510 if (@DEL_CLIENTS && grep (/^$client$/, @DEL_CLIENTS)) {
\r
518 %PathOpCounts = ();
\r
522 my ($fh, $key) = @_;
\r
526 foreach my $op ( @OPLIST ) {
\r
527 if (exists $counts::OpCounts{"$key,$op"}) {
\r
528 $cnt = $counts::OpCounts{"$key,$op"};
\r
533 $PathOpCounts{"$fh,$op"} = $cnt;
\r
535 $PathsSeen{$fh} = 1;
\r
539 while (defined ($pfh = $hier::fh2Parent{$pfh})) {
\r
542 print "Really long path ($fh)\n";
\r
546 if (exists $PathOpCounts{"$pfh,$op"}) {
\r
547 $PathOpCounts{"$pfh,$op"} += $cnt;
\r
550 $PathOpCounts{"$pfh,$op"} = $cnt;
\r
552 $PathsSeen{$pfh} = 1;
\r
560 my ($start_time, $out) = @_;
\r
562 my $str = "#D time Dir/File dircnt path fh";
\r
563 foreach my $op ( @OPLIST ) {
\r
572 foreach my $key ( keys %KeysSeen ) {
\r
573 my ($client_id, $fh, $euid, $egid) = split (/,/, $key);
\r
575 buildDirPath ($fh, $key);
\r
578 foreach my $fh ( keys %PathsSeen ) {
\r
579 my ($path, $cnt) = hier::findPath ($fh);
\r
585 my $type = (exists $hier::fhIsDir{$fh} && $hier::fhIsDir{$fh}==2) ? 'D' : 'F';
\r
587 my $str = "$cnt $type $path $fh ";
\r
589 foreach my $op ( @OPLIST ) {
\r
592 if (exists $PathOpCounts{"$fh,$op"}) {
\r
593 $cnt = $PathOpCounts{"$fh,$op"};
\r
596 print "Missing $fh $op\n";
\r
602 $PathOpCounts{"$fh,$op"} = 0; # &&& reset
\r
605 print $out "D $start_time $str\n";
\r
613 my ($line, $proto, $op, $key, $client, $xid, $response, $time) = @_;
\r
615 my $uxid = "$client-$xid";
\r
617 if ($proto eq 'C3' || $proto eq 'C2') {
\r
618 $uxid2time{$uxid} = $time;
\r
619 $uxid2key{$uxid} = $key;
\r
622 if (! exists $uxid2key{$uxid}) {
\r
625 if ($response ne 'OK') {
\r
629 $key = $uxid2key{$uxid};
\r
630 my $count = nfsd::nfsDumpParseLineField ($line, 'count');
\r
631 $count = hex ($count);
\r
633 delete $uxid2key{$uxid};
\r
634 delete $uxid2time{$uxid};
\r
636 $counts::OpCounts{"$key,readM"} += $count;
\r
640 # Note that we always just assume that writes succeed, because on most
\r
641 # systems they virtually always do. If you're tracing a system where
\r
642 # your users are constantly filling up the disk or exceeding their
\r
643 # quotas, then you will need to fix this.
\r
646 my ($line, $proto, $op, $key, $client, $xid, $response, $time) = @_;
\r
648 if ($proto eq 'C3' || $proto eq 'C2') {
\r
650 my $tag = ($proto eq 'C3') ? 'count' : 'tcount';
\r
652 my $count = nfsd::nfsDumpParseLineField ($line, $tag);
\r
655 printf "WEIRD count $line\n";
\r
658 $count = hex ($count);
\r
660 $counts::OpCounts{"$key,writeM"} += $count;
\r
665 # Purge all the pending XID records dated earlier than $when (which is
\r
666 # typically at least $PRUNE_INTERVAL seconds ago). This is important
\r
667 # because otherwise missing XID records can pile up, eating a lot of
\r
673 foreach my $uxid ( keys %uxid2time ) {
\r
674 if ($uxid2time{$uxid} < $when) {
\r
675 delete $uxid2key{$uxid};
\r
676 delete $uxid2time{$uxid};
\r