Add proper per-file copyright notices/licenses and top-level license.
[bluesky.git] / TBBT / trace_play / sfs_c_chd.2thread.c
1 #ifndef lint
2 static char sfs_c_chdSid[] = "@(#)sfs_c_chd.c   2.1     97/10/23";
3 #endif
4
5 /*
6  *   Copyright (c) 1992-1997,2001 by Standard Performance Evaluation Corporation
7  *      All rights reserved.
8  *              Standard Performance Evaluation Corporation (SPEC)
9  *              6585 Merchant Place, Suite 100
10  *              Warrenton, VA 20187
11  *
12  *      This product contains benchmarks acquired from several sources who
13  *      understand and agree with SPEC's goal of creating fair and objective
14  *      benchmarks to measure computer performance.
15  *
16  *      This copyright notice is placed here only to protect SPEC in the
17  *      event the source is misused in any manner that is contrary to the
18  *      spirit, the goals and the intent of SPEC.
19  *
20  *      The source code is provided to the user or company under the license
21  *      agreement for the SPEC Benchmark Suite for this product.
22  */
23
24 /*****************************************************************
25  *                                                               *
26  *      Copyright 1991,1992  Legato Systems, Inc.                *
27  *      Copyright 1991,1992  Auspex Systems, Inc.                *
28  *      Copyright 1991,1992  Data General Corporation            *
29  *      Copyright 1991,1992  Digital Equipment Corporation       *
30  *      Copyright 1991,1992  Interphase Corporation              *
31  *      Copyright 1991,1992  Sun Microsystems, Inc.              *
32  *                                                               *
33  *****************************************************************/
34
35 /*
36  * -------------------------- sfs_c_chd.c -------------------------
37  *
38  *      The sfs child.  Routines to initialize child parameters,
39  *      initialize test directories, and generate load.
40  *
41  *.Exported_Routines
42  *      void child(int, float, int, char *);
43  *      void init_fileinfo(void);
44  *      void init_counters(void);
45  *      sfs_fh_type * randfh(int, int, uint_t, sfs_state_type,
46  *                              sfs_file_type);
47  *      int check_access(struct *stat)
48  *      int check_fh_access();
49  *
50  *.Local_Routines
51  *      void check_call_rate(void);
52  *      void init_targets(void);
53  *      void init_dirlayout(void);
54  *      void init_rpc(void);
55  *      void init_testdir(void);
56  *      int do_op(void);
57  *      int op(int);
58  *
59  *.Revision_History
60  *      21-Aug-92       Wittle          randfh() uses working set files array.
61  *                                      init_fileinfo() sets up working set.
62  *      02-Jul-92       Teelucksingh    Target file size now based on peak load
63  *                                      instead of BTDT.
64  *      04-Jan-92       Pawlowski       Added raw data dump hooks.
65  *      16-Dec-91       Wittle          Created.
66  */
67
68
69 /*
70  * -------------------------  Include Files  -------------------------
71  */
72
73 /*
74  * ANSI C headers
75  */
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <errno.h>
80 #include <math.h>
81 #include <signal.h>
82
83 #include <sys/types.h>
84 #include <sys/stat.h> 
85  
86 #include <unistd.h>
87
88 #include "sfs_c_def.h"
89 #include "sfs_m_def.h"
90 #include "rfs_c_def.h"
91 #include "generic_hash.h"
92 #include "nfsd_nfsfh_cust.h"
93
94 extern struct hostent   *Server_hostent;
95
96 #define PROB_SCALE 1000L
97 #define _M_MODULUS 2147483647L /* (2**31)-1 */
98
99 #define _GROUP_DIVISOR 500
100 #define _FILES_PER_GROUP 4
101 #define _MIN_GROUPS 12
102 #define _WORKING_SET_AT_25_OPS_PER_SEC 975
103
104
105 /*
106  * -----------------------  External Definitions  -----------------------
107  */
108 extern uint32_t    biod_clnt_call(CLIENT *, uint32_t, xdrproc_t, void *);
109 extern enum clnt_stat proc_header(CLIENT *cl, xdrproc_t xdr_results, void *results_ptr);
110 extern int  biod_poll_wait(CLIENT *, uint32_t);
111 extern enum clnt_stat get_areply_udp (CLIENT * cl, uint32_t *xid, struct timeval *timeout);
112 extern char * parse_name (char * t, char * buf);
113
114 /* forward definitions for local functions */
115 static int init_rpc(void);
116
117 /* RFS: forward definitions for local functions */
118 void init_ops(void);
119 static void init_signal();
120 extern void init_file_system (void);
121 extern void init_dep_tab (void);
122 static int read_trace(void);
123 static void read_fh_map();
124 static void init_play (char * mount_point);
125 static void trace_play(void);
126 void print_result(void);
127 static int get_nextop(void);
128 static int check_timeout(void);
129 static struct biod_req * get_biod_req(int dep_tab_index);
130 static int lookup_biod_req (int xid);
131 static void init_time_offset(void);
132 void adjust_play_window (int flag, int * poll_timeout);
133 static int poll_and_get_reply (int usecs);
134 static char * nfs3_strerror(int status);
135 static void check_clock(void);
136 static double time_so_far1(void);
137 static double get_resolution(void);
138 static void usage(void);
139 void init_dep_tab_entry (int dep_index);
140 extern inline fh_map_t * lookup_fh (char * trace_fh );
141 static inline void finish_request (int biod_index, int dep_index, int status);
142 static inline void add_new_file_system_object (int proc, int dep_index, char * line, char * reply_line);
143 static inline char * find_lead_trace_fh(int proc, char * line);
144 static inline char * find_reply_trace_fh (char * line);
145
146 /*
147  * -------------  Per Child Load Generation Rate Variables  -----------
148  */
149 static uint_t Calls_this_period; /* calls made during the current run period */
150 static uint_t Calls_this_test;  /* calls made during the test so far */
151 static uint_t Reqs_this_period; /* reqs made during the current run period */
152 static uint_t Reqs_this_test;   /* reqs made during the test so far */
153 static uint_t Sleep_msec_this_test; /* msec slept during the test so far */
154 static uint_t Sleep_msec_this_period;
155 static uint_t Previous_chkpnt_msec; /* beginning time of current run period */
156 static int Target_sleep_mspc;   /* targeted sleep time per call */
157
158 static char io_buf[BUFSIZ];     /* io buffer for print out messages */
159
160 char * sfs_Myname;
161 int     Log_fd;                         /* log fd */
162 char    Logname[NFS_MAXNAMLEN];         /* child processes sync logfile */
163 int     Validate = 0;                                   /* fake variable */
164 int Child_num = 0;                                              /* fake: child index */
165 int Tcp = 0;                                                    /* We implement UDP first */
166 int Client_num = 1;                                             /* fake: number of client */
167 uid_t Real_uid;
168 gid_t Cur_gid;
169 uid_t Cur_uid;
170 /*
171  * -------------------------  SFS Child  -------------------------
172  */
173
174 static int nfs2proc_to_rfsproc[18] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
175                                                                         10, 11, 12, 13, 14, 15, 16, 17};
176 static int nfs3proc_to_rfsproc[NFS3_PROCEDURE_COUNT] = {0, 1, 2, 4, 18, 5, 6, 8, 9, 14, 
177                                                                                                         13, 21, 10, 15, 11, 12, 16, 23, 17, 20, 
178                                                                                                         22, 19};
179 void print_usage(int pos, int argc, char ** argv)
180 {
181         int i;
182         printf("sfs3 hostname:mount_dir trace_file|stdin fh_map_file play_scale warmup_time(in seconds) \n");
183         printf("sfs3 -pair_trace trace_file\n");
184         printf("sfs3 -pair_write trace_file\n");
185         printf("sfs3 -help\n");
186         printf ("pos %d argc %d", pos, argc);
187         for (i=0; i<argc; i++) {
188                 printf(" %s", argv[i]);
189         } 
190         printf ("\n");
191         exit;
192 }
193
194 void print_cyclic_buffers ()
195 {
196         CYCLIC_PRINT(memory_trace_index);
197         CYCLIC_PRINT(dep_tab_index);
198         CYCLIC_PRINT(dep_window_index);
199 }
200
201 void add_to_dep_tab(int i)
202 {
203         char * trace_fh;
204         fh_map_t * fh_map_entry;
205         dep_tab_t * ent;
206
207     trace_fh = strstr (memory_trace[i].line, "fh");
208     if (!trace_fh)
209         printf ("memory_trace[%d].line %s\n", i, memory_trace[i].line);
210     RFS_ASSERT (trace_fh);
211     trace_fh += 3;
212     fh_map_entry = lookup_fh (trace_fh);
213     if (fh_map_entry && (fh_map_entry->flag==FH_MAP_FLAG_DISCARD) )  {
214         req_num_with_discard_fh ++;
215                 return;
216         }
217     if (fh_map_entry)
218         req_num_with_init_fh ++;
219     else
220         req_num_with_new_fh ++;
221                                                                                                                               
222         RFS_ASSERT (!CYCLIC_FULL(dep_tab_index));
223         ent = &(dep_tab[dep_tab_index.head]);
224
225     ent->disk_index = memory_trace[i].disk_index;
226         ent->memory_index = i;
227 #ifdef REDUCE_MEMORY_TRACE_SIZE
228     ent->trace_status = memory_trace[i].trace_status;
229     ent->reply_trace_fh = memory_trace[i].reply_trace_fh;
230 #endif
231     ent->line = memory_trace[i].line;
232     init_dep_tab_entry(dep_tab_index.head);
233
234     if (rfs_debug && (i%100000)==0)
235         printf ("dep_tab[%d].disk_index %d = memory_trace[%d].disk_index %d\n", dep_tab_index.head, ent->disk_index, i, memory_trace[i].disk_index);
236     CYCLIC_MOVE_HEAD(memory_trace_index);
237     CYCLIC_MOVE_HEAD(dep_tab_index);
238 }
239
240 void init_profile_variables ()
241 {
242         init_profile ("total_profile", &total_profile);
243         init_profile ("execute_next_request_profile", &execute_next_request_profile);
244         init_profile ("valid_get_nextop_profile", &valid_get_nextop_profile);
245         init_profile ("invalid_get_nextop_profile",&invalid_get_nextop_profile);
246         init_profile ("prepare_argument_profile", &prepare_argument_profile);
247         init_profile ("biod_clnt_call_profile", &biod_clnt_call_profile);
248         init_profile ("receive_next_reply_profile", &receive_next_reply_profile);
249         init_profile ("valid_poll_and_get_reply_profile", &valid_poll_and_get_reply_profile);
250         init_profile ("invalid_poll_and_get_reply_profile", &invalid_poll_and_get_reply_profile);
251         init_profile ("decode_reply_profile", &decode_reply_profile);
252         init_profile ("check_reply_profile", &check_reply_profile);
253         init_profile ("add_create_object_profile", &add_create_object_profile);
254         init_profile ("check_timeout_profile", &check_timeout_profile);
255         init_profile ("adjust_play_window_profile",&adjust_play_window_profile);
256         init_profile ("fgets_profile",&fgets_profile);
257         init_profile ("read_line_profile",&read_line_profile);
258         init_profile ("read_trace_profile",&read_trace_profile);
259 }
260
261 static char trace_file[256]="anon-lair62-011130-1200.txt";
262 int print_memory_usage()
263 {
264         printf("size of fh_map_t %d size of fh_map %d\n", sizeof(fh_map_t), sizeof(fh_map));
265         printf("sizeof dep_tab_t %d sizeof dep_tab %d\n", sizeof(dep_tab_t), sizeof(dep_tab));
266         printf("size of memory_trace_ent_t %d sizeof memory_trace %d\n", sizeof(memory_trace_ent_t), sizeof(memory_trace));
267         printf("size of CREATE3args %d\n", sizeof( CREATE3args));
268         printf("size of MKDIR3args %d\n", sizeof( MKDIR3args));
269         printf("size of READ3args %d\n", sizeof( READ3args));
270         printf("size of WRITE3args %d\n", sizeof( WRITE3args));
271         printf("size of RENAME3args %d\n", sizeof( RENAME3args));
272         printf("size of GETATTR3args %d\n", sizeof( GETATTR3args));
273         printf("size of SETATTR3args %d\n", sizeof( SETATTR3args));
274         printf("size of LINK3args %d\n", sizeof( LINK3args));
275         printf("size of SYMLINK3args %d\n", sizeof( SYMLINK3args));
276         printf("size of MKNOD3args %d\n", sizeof( MKNOD3args));
277         printf("size of RMDIR3args %d\n", sizeof( RMDIR3args));
278         printf("size of REMOVE3args %d\n", sizeof( REMOVE3args));
279         printf("size of LOOKUP3args %d\n", sizeof( LOOKUP3args));
280         printf("size of READDIR3args %d\n", sizeof( READDIR3args));
281         printf("size of READDIRPLUS3args %d\n", sizeof( READDIRPLUS3args));
282         printf("size of FSSTAT3args %d\n", sizeof( FSSTAT3args));
283         printf("size of FSINFO3args %d\n", sizeof( FSINFO3args));
284         printf("size of COMMIT3args %d\n", sizeof( COMMIT3args));
285         printf("size of ACCESS3args %d\n", sizeof( ACCESS3args));
286         printf("size of READLINK3args %d\n", sizeof( READLINK3args));
287
288
289 }
290
291 int io_thread ()
292 {
293 /* number of seconds the I/O thread pauses after each time trying to read the requests */
294 #define IO_THREAD_PAUSE_TIME 1
295
296         int i;
297         int j = 0;
298
299         disk_io_status = read_trace ();
300         while (disk_io_status == TRACE_BUF_FULL) {
301
302                 usleep (10000);
303         if ((j++%200)==0) {
304                 printf("&&&&&&&&&& io thread, sleep %d seconds\n", j/10);
305         }
306
307                 disk_io_status = read_trace ();
308                 //printf ("io_thread, after read_trace, disk_index %d\n", disk_index);
309
310 #ifdef SEQUEN_READ
311                 for (i=0; i<SEQUEN_READ_NUM; i++) {
312                         add_to_dep_tab(CYCLIC_MINUS(memory_trace_index.head,1,memory_trace_index.size));
313                 }
314 #endif
315                 //printf ("***************** IO THREAD SLEEP 1 s\n");
316                 //print_cyclic_buffers();
317                 //pthread_yield();
318         }
319         RFS_ASSERT (disk_io_status == TRACE_FILE_END);
320 }
321
322 int execute_thread()
323 {
324         int i;
325         struct timeval playstart_time, playend_time;
326
327         sleep (10);             /* first let io_thread to run for a while */
328         init_time_offset();
329         printf ("trace_play\n");
330
331         gettimeofday(&playstart_time, NULL);
332
333         trace_play ();
334
335         gettimeofday(&playend_time, NULL);
336
337         {
338                 int usec, sec;
339                 sec = playend_time.tv_sec - playstart_time.tv_sec;
340                 usec = sec * 1000000 + (playend_time.tv_usec - playstart_time.tv_usec);
341                 sec = usec / 1000000;
342                 usec = usec % 1000000;
343                 printf("Total play time: %d sec %d usec\n", sec, usec);
344         }
345
346         print_result();
347 }
348
349 int init_thread ()
350 {
351         pthread_attr_t  attr;
352         int arg;
353         int ret = 0;
354         pthread_t io_thread_var;
355         pthread_t execute_thread_var;
356
357         ret = pthread_attr_init (&attr);
358         if (ret!=0) {
359                 perror("pthread_attr_init attr");
360                 return ret;
361         }
362         ret = pthread_create (&(io_thread_var), &attr, &io_thread, (void *)&arg );
363         if (ret!=0) {
364                 perror("io_pthread_attr_init");
365                 return ret;
366         }
367
368 /*
369         ret = pthread_create (&(execute_thread_var), &attr, &execute_thread, (void *)&arg );
370         if (ret!=0) {
371                 perror("io_pthread_attr_init");
372                 return ret;
373         }
374 */
375 }
376
377 void init_buffers()
378 {
379         CYCLIC_INIT("memory_trace_index",memory_trace_index,MAX_MEMORY_TRACE_LINES);
380         CYCLIC_INIT("dep_tab_index     ",dep_tab_index,DEP_TAB_SIZE);
381         CYCLIC_INIT("dep_window_index  ",dep_window_index,DEP_TAB_SIZE);
382 }
383
384 int main(int argc, char ** argv)
385 {
386         extern char * optarg;
387         int i;
388         struct timeval in={1000000, 100};
389         
390         init_profile_variables();
391         if ((argc==1) || (argc==2 && !strcmp(argv[1],"-help"))) {
392                 print_usage(0, argc, argv);
393                 exit(0);
394         }
395         if (!strcmp(argv[1], "-pair_write")) {
396                 if (argc==3) 
397                         strcpy(trace_file, argv[2]);
398                 else
399                         strcpy(trace_file, "stdin");
400                 pair_write(trace_file);
401                 exit(0);
402         }
403         if (!strcmp(argv[1], "-pair_trace")) {
404                 if (argc==3) 
405                         strcpy(trace_file, argv[2]);
406                 else
407                         strcpy(trace_file, "stdin");
408                 pair_trace(trace_file);
409                 exit(0);
410         }
411         if (!strcmp(argv[1], "-check_aging")) {
412                 if (argc!=3) {
413                         print_usage(3, argc, argv);
414                         exit(0);
415                 }
416                 strcpy(trace_file, argv[2]);
417                 check_aging (trace_file);
418                 exit(0);
419         }
420         if (!strcmp(argv[1], "-check_statistics")) {
421                 if (argc!=3) {
422                         print_usage(1, argc, argv);
423                         exit(0);
424                 }
425                 strcpy(trace_file, argv[2]);
426                 memset(fh_htable, 0, sizeof (fh_htable));
427                 check_statistics (trace_file);
428                 exit(0);
429         }
430
431         if (argc!=6) {
432                 print_usage(2, argc, argv);
433                 exit(0);
434         }
435         PLAY_SCALE = atoi (argv[4]);
436         RFS_ASSERT (PLAY_SCALE >=1 && PLAY_SCALE <=10000);
437
438         WARMUP_TIME = atoi (argv[5]);
439         RFS_ASSERT (WARMUP_TIME >=0 && WARMUP_TIME <=1000);
440         
441         print_memory_usage();
442         check_clock();
443     getmyhostname(lad_hostname, HOSTNAME_LEN);
444
445     init_ops();
446         /*
447          * Get the uid and gid information.
448          */
449         Real_uid = getuid();
450         Cur_gid = getgid();
451         //Real_uid = 513;
452         //Cur_gid = 513;
453
454         Nfs_timers = Nfs_udp_timers;
455
456         init_file_system ();
457         init_signal();
458         init_play (argv[1]);
459         //init_play ("capella:/p5/RFSFS");
460         init_fh_map();
461         read_fh_map (argv[3]);
462         //read_fh_map ("fh-path-map-play");
463         strcpy(trace_file, argv[2]);
464
465 /* If ordered by TIMESTAMP,
466  * memory_trace_index.tail <= dep_tab_index.tail < dep_window_max <=
467  * dep_tab_index.head <= memory_trace_index.head
468  */
469
470         init_buffers();
471         //init_thread();
472         pthread_yield();
473         execute_thread();
474 }
475
476 void init_ops (void)
477 {
478         Ops = nfsv3_Ops;
479         nfs_version = NFS_V3;
480 }
481
482 /* Set up the signal handlers for all signals */
483 void init_signal()
484 {
485 #if (defined(_XOPEN_SOURCE) || defined(USE_POSIX_SIGNALS))
486         struct sigaction sig_act, old_sig_act;
487
488         /* use XOPEN signal handling */
489
490         sig_act.sa_handler = generic_catcher;
491         (void)sigemptyset(&sig_act.sa_mask);
492         sig_act.sa_flags = 0;
493
494         /* signals handlers for signals used by sfs */
495         sig_act.sa_handler = sfs_cleanup;
496         if (sigaction(SIGINT,&sig_act,&old_sig_act) == -1) {
497             perror("sigaction failed: SIGINT");
498             exit(135);
499         }
500
501         sig_act.sa_handler = sfs_cleanup;
502         if (sigaction(SIGTERM,&sig_act,&old_sig_act) != 0)  {
503             perror("sigaction failed: SIGTERM");
504             exit(137);
505         }
506 #else
507     /* signals handlers for signals used by sfs */
508     (void) signal(SIGINT, sfs_cleanup);
509     // RFS (void) signal(SIGALRM, sfs_alarm);
510         (void) signal(SIGTERM, sfs_cleanup);
511 #endif
512 }
513
514 void
515 init_play (
516     char        * mount_point)          /* Mount point for remote FS */
517 {
518     char        namebuf[NFS_MAXNAMLEN] = "trace_play";  /* unique name for this program */
519     CLIENT *    mount_client_ptr;       /* Mount client handle */
520
521         if (!rfs_debug);
522         (void) setvbuf(stderr, io_buf, _IOLBF, BUFSIZ);
523
524     sfs_Myname = namebuf;
525
526     /*
527      * May require root priv to perform bindresvport operation
528      */
529     mount_client_ptr = lad_getmnt_hand(mount_point);
530     if (mount_client_ptr == NULL) {
531                 exit(145);
532     }
533
534     /*
535      * should be all done doing priv port stuff
536      */
537
538     if (init_rpc() == -1) {
539                 (void) fprintf(stderr, "%s: rpc initialization failed\n", sfs_Myname);
540                 (void) generic_kill(0, SIGINT);
541                 exit(146);
542     }
543
544
545     /*
546      * finish all priv bindresvport calls
547      * reset uid
548      */
549     if (setuid(Real_uid) != (uid_t)0) {
550         (void) fprintf(stderr,"%s: %s%s", sfs_Myname,
551             "cannot perform setuid operation.\n",
552             "Do `make install` as root.\n");
553     }
554
555     init_mount_point(0, mount_point, mount_client_ptr);
556
557
558     /*
559      * Cleanup client handle for mount point
560      */
561     clnt_destroy(mount_client_ptr);
562
563         init_counters();
564 }
565
566 #ifdef REDUCE_MEMORY_TRACE_SIZE
567 inline char * read_line (int disk_index)
568 {
569         static FILE * fp=NULL;
570         static int start=0;
571         static int start_disk_index=0;
572         int i;
573         static int finish_flag = 0;
574
575 #define READ_LINE_BUF_SIZE (MAX_COMMAND_REPLY_DISTANCE_FOR_PAIR+2)
576 #define SAFE_BYTES 1000
577 #define READ_LINE_LENGTH (MAX_TRACE_LINE_LENGTH+SAFE_BYTES)
578
579         static char line_buf[READ_LINE_BUF_SIZE][READ_LINE_LENGTH];
580         start_profile (&read_line_profile);
581
582         if (fp==NULL) {
583                 if (strcmp(trace_file, "stdin")) {
584                         fp = fopen(trace_file, "r");
585                         if (!fp) {
586                                 printf("can not open files %s\n", fp);
587                                 perror("open");
588                         }
589                 } else {
590                         fp = stdin;
591                 }
592                 RFS_ASSERT (fp!=NULL);
593                 for (i=0; i<READ_LINE_BUF_SIZE; i++) {
594                         start_profile(&fgets_profile);
595                         if (!fgets(line_buf[i], READ_LINE_LENGTH, fp)) {
596                                 RFS_ASSERT (0);
597                         }
598                         end_profile(&fgets_profile);
599                         //printf ("read_line, line_buf[%d]:%s", i, line_buf[i]);
600                 }
601         }
602         
603         RFS_ASSERT (disk_index <= start_disk_index+READ_LINE_BUF_SIZE)
604         if (disk_index==(start_disk_index+READ_LINE_BUF_SIZE)) {
605                 if (finish_flag) {
606                         return NULL;
607                 }
608                 start_profile(&fgets_profile);
609                 if (!fgets(line_buf[start], READ_LINE_LENGTH, fp)) {
610                         end_profile(&fgets_profile);
611                         finish_flag = 1;
612                         return NULL;
613                 }
614                 end_profile(&fgets_profile);
615                 //printf ("read_line, line_buf[%d]:%s", start, line_buf[start]);
616                 start = (start+1) % READ_LINE_BUF_SIZE;
617                 start_disk_index ++;
618         }
619         RFS_ASSERT (disk_index < start_disk_index+READ_LINE_BUF_SIZE)
620         i = (start+disk_index-start_disk_index)%READ_LINE_BUF_SIZE;
621
622 /*
623         if (!(strlen(line_buf[i])>80)) {
624                 printf ("start %d start_disk_index %d disk_index %d strlen %d line_buf[%d] %s\n", start, start_disk_index, disk_index, strlen(line_buf[i]), i, line_buf[i]);
625                 RFS_ASSERT (strlen(line_buf[i])>80);
626         }
627         if (!((strlen(line_buf[i])>80) && (strlen(line_buf[i])<MAX_TRACE_LINE_LENGTH)))
628                 printf ("disk_index %d strlen %d line_buf[%d] %s\n", disk_index, strlen(line_buf[i]), i, line_buf[i]);
629         RFS_ASSERT ((strlen(line_buf[i])>80) && (strlen(line_buf[i])<MAX_TRACE_LINE_LENGTH));
630 */
631
632         end_profile (&read_line_profile);
633         return (line_buf[i]);
634 }
635
636 #define OPS_FLAG_INC 0
637 #define OPS_FLAG_PRINT 1
638 int read_write_fh_statistics (int flag, char * buf, int timestamp)
639 {
640         static FILE * fp = NULL;
641         char * p;
642         char c;
643         if (!fp) {
644                 fp = fopen ("read_write_fh.output", "w");
645                 RFS_ASSERT (fp);
646         }
647         if (flag == OPS_FLAG_INC) {
648                 p = strstr (buf, "fh");
649                 RFS_ASSERT (p);
650                 c = p[40+3];
651                 p[40+3]=0;
652                 fprintf(fp, "%s\n", p+3+24);
653                 p[40+3]=c;
654         };
655         if (flag == OPS_FLAG_PRINT) { 
656                 int disk_index = (int) buf;
657                 fprintf(fp, "###### disk_index %d timestamp %d\n", disk_index, timestamp);
658         }
659 }
660
661 int write_statistics(int flag, char * buf, char * reply_buf, int trace_status)
662 {
663         static FILE * fp = NULL;
664         int pre_size, size, count;
665         char * p = reply_buf;
666         if (!fp) {
667                 fp = fopen ("write.output", "w");
668                 RFS_ASSERT (fp);
669         }
670         if (flag == OPS_FLAG_INC) {
671                 p = strstr (p, "pre-size");
672                 RFS_ASSERT (p);
673                 sscanf (p, "pre-size %x", &pre_size); 
674                 p += strlen("pre-size");
675                 p = strstr (p, "size");
676                 RFS_ASSERT (p);
677                 sscanf (p, "size %x", &size); 
678                 p = strstr (p, "count");
679                 if (!p) 
680                         fprintf (fp, "%s status %x pre-size %x size %x count %x\n", buf, trace_status, pre_size, size, count);
681                 RFS_ASSERT (p);
682                 sscanf (p, "count %x", &count); 
683                 fprintf (fp, "%s status %x pre-size %x size %x count %x\n", buf, trace_status, pre_size, size, count);
684         }
685         if (flag == OPS_FLAG_PRINT) {
686                 int disk_index = (int) buf;
687                 int timestamp = (int) reply_buf;
688                 fprintf(fp, "###### disk_index %d timestamp %d\n", disk_index, timestamp);
689         }
690 }
691
692 void ops_statistics (int ops_flag, int proc, int timestamp)
693 {
694         static FILE * fp = NULL;
695         static struct {
696                 int count;
697                 int count_K;
698                 int update_flag;
699                 char name[32];
700         } ops[NOPS]={{0, 0, 0, "NULL"},{0, 0, 0, "GETATTR"},{0, 0, 1, "SETATTR"},{0, 0, 0, "ROOT"},
701                                   {0, 0, 0, "LOOKUP"},{0, 0, 0, "READLINK"},{0, 0, 0, "READ"},{0, 0, 1, "WRCACHE"},
702                                   {0, 0, 1, "WRITE"}, {0, 0, 1, "CREATE"},{0, 0, 1, "REMOVE"},{0, 0, 1, "RENAME"},
703                                   {0, 0, 1, "LINK"}, {0, 0, 1, "SYMLINK"},{0, 0, 1, "MKDIR"},{0, 0, 1, "RMDIR"},
704                                   {0, 0, 0, "READDIR"},{0, 0, 0, "FSSTAT"},{0, 0, 0, "ACCESS"},{0, 0, 0, "COMMIT"},
705                                   {0, 0, 0, "FSINFO"},{0, 0, 1, "MKNOD"}, {0, 0, 0, "PATHCONF"}, {0, 0, 0, "READDIRPLUS"}};
706
707         if (ops_flag == OPS_FLAG_INC) {
708                 RFS_ASSERT (proc>=0 && proc<NOPS);
709                 ops[proc].count ++;
710                 if (ops[proc].count == 1000) {
711                         ops[proc].count_K ++;
712                         ops[proc].count = 0;
713                 }
714         }
715         if (ops_flag == OPS_FLAG_PRINT) {
716                 int i;
717                 int update_K=0, update=0, total_K=0, total=0;
718                 float f1, f2;
719                 int disk_index = proc;
720
721                 if (!fp) {
722                         fp = fopen ("mix.output", "w");
723                         RFS_ASSERT (fp);
724                 }
725                 for (i=0; i<NOPS; i++) {
726                         total_K += ops[i].count_K;
727                         total += ops[i].count;
728                         if (ops[i].update_flag) {
729                                 update_K += ops[i].count_K;
730                                 update += ops[i].count;
731                         }
732                 }
733                 update_K += update/1000;
734                 update = update%1000;
735                 total_K += total/1000;
736                 total = total%1000;
737
738                 f1 = total_K;
739                 f1 = f1*1000+total;
740                 for (i=0; i<NOPS; i++) {
741                         f2 = ops[i].count_K;
742                         f2 = f2*1000+ops[i].count;
743                         fprintf (fp, "%12s %8d,%03d %3.2f\%\n", ops[i].name, ops[i].count_K, ops[i].count, f2*100/f1);  
744                         fprintf (stderr, "%12s %8d,%03d %3.2f\%\n", ops[i].name, ops[i].count_K, ops[i].count, f2*100/f1);      
745                 }
746                 f2 = update_K;
747                 f2 = f2*1000+update;
748                 fprintf (fp, "       total %8d,%03d\n", total_K, total);
749                 fprintf (stderr, "       total %8d,%03d\n", total_K, total);
750                 fprintf (fp, "      update %8d,%03d %2.2f\%\n", update_K, update, f2*100/f1);   
751                 fprintf (stderr, "      update %8d,%03d %2.2f\%\n", update_K, update, f2*100/f1);       
752                 fprintf(fp, "###### disk_index %d timestamp %d\n", disk_index, timestamp);
753                 fprintf(stderr, "###### disk_index %d timestamp %d\n", disk_index, timestamp);
754         }
755 }
756
757
758 void truncate_statistics (int flag, int proc, char * buf, char * reply_buf) 
759 {
760 #define TRACE_FH_SIZE2 16
761 #define BLOCK_SIZE 4096
762         static int no_size_num = 0;
763         static int have_size_num = 0;
764         static int equal_size_num = 0;
765         static int first_size_num = 0;
766         static int truncate_num = 0;
767         static int truncate_size = 0;
768         static int truncate_KB = 0;
769         static int truncate_block_num = 0;
770         static int padding_num = 0;
771         static int padding_KB = 0;
772         static int padding_size = 0;
773         static FILE * fp = NULL;
774         char * p;
775         char * trace_fh;
776         int pre_size, size, count;
777         struct generic_entry * ent;
778
779         if (flag == OPS_FLAG_PRINT) {
780                 int disk_index = proc;
781                 int timestamp = (int)buf;
782                 if (!fp) {
783                         fp = fopen ("truncate.output", "w");
784                         RFS_ASSERT (fp);
785                 }
786                 truncate_KB += truncate_size/1000;
787                 truncate_size %= 1000;
788                 padding_KB += padding_size/1000;
789                 padding_size %= 1000;
790                 fprintf(fp, "###### disk_index %d timestamp %d no_size_req %d have_size_req %d equal_size_req %d trunc_req %d trunc_KB %d trunc_block_num %d padding_num %d padding_KB %d\n", disk_index, timestamp, no_size_num, have_size_num, equal_size_num, truncate_num, truncate_KB, truncate_block_num, padding_num, padding_KB);
791                 fprintf(stderr, "###### disk_index %d timestamp %d no_size_req %d have_size_req %d equal_size_req %d trunc_req %d trunc_KB %d trunc_block_num %d padding_num %d padding_KB %d\n", disk_index, timestamp, no_size_num, have_size_num, equal_size_num, truncate_num, truncate_KB, truncate_block_num, padding_num, padding_KB);
792                 return;
793         }
794
795         p = strstr (&buf[TRACE_MSGID_POS], "fh");
796         RFS_ASSERT (p);
797         p+=3; 
798         trace_fh = p;
799                 
800         if (proc==SETATTR) {
801                 p = strstr (buf, " size ");
802         } else {
803                 RFS_ASSERT (proc==WRITE);
804                 p = strstr (reply_buf, " ftype 1 ");
805                 RFS_ASSERT (p);
806                 p = strstr (p, " size ");
807                 RFS_ASSERT (p);
808                 if (strstr(p, " ftype 1 ")) {
809                         fprintf (fp, "#### %s%s\n", buf, reply_buf);
810                         fprintf (stderr, "#### %s%s\n", buf, reply_buf);
811                 }
812                 RFS_ASSERT (!strstr(p, " ftype 1 "));
813         }
814         if (!p) {
815                 no_size_num ++;
816                 return;
817         }
818         have_size_num ++;
819
820         sscanf (p, " size %x", &size); 
821         if (size <0 || size > 2000000000) {
822                 fprintf (fp, "#### size too big %x %s %s\n", size, buf, reply_buf);
823                 fprintf (stderr, "#### size too big %x %s %s\n", size, buf, reply_buf);
824         }
825                                                         
826         RFS_ASSERT (size >=0 && size <2000000000);              
827         ent = generic_lookup (trace_fh+24, TRACE_FH_SIZE2, 0, fh_htable, FH_HTABLE_SIZE);
828         if (ent) {
829                 if (ent->key3 != size) {
830                         if (proc==SETATTR) {
831                                 //printf ("%s\n", buf);
832                                 //printf ("size change fh %s pre-size %x size %x\n", trace_fh, ent->key3, size);
833                                 if (ent->key3 > size) {
834                                         truncate_num ++;
835                                         truncate_size += ent->key3 - size;
836                                         truncate_block_num += (ent->key3+BLOCK_SIZE-1)/BLOCK_SIZE;
837                                         if (size!=0) {
838                                                 //fprintf (stderr, "truncate: pre_size %x size %x %s\n", ent->key3, size, buf);
839                                                 //fprintf (fp, "truncate: pre_size %x size %x %s\n", ent->key3, size, buf);
840                                                 truncate_block_num -= (size + BLOCK_SIZE-1)/BLOCK_SIZE;
841                                         }
842                                         if (truncate_size > 1000000000) {
843                                                 truncate_KB += truncate_size/1000;
844                                                 truncate_size %= 1000;
845                                         }
846                                 } else {
847                                         padding_num ++; 
848                                         //printf ("%s\n", buf);
849                                         //printf ("padding fh %s pre-size %x size %x\n", trace_fh, ent->key3, size);
850                                         padding_size += size - ent->key3;
851                                         if (padding_size > 1000000000) {
852                                                 padding_KB += padding_size/1000;
853                                                 padding_size %= 1000;
854                                         }
855                                 }
856                         }
857                         ent->key3 = size; 
858                 }else 
859                         equal_size_num++;
860         } else {
861                 generic_insert(trace_fh+24, TRACE_FH_SIZE2, size, fh_htable, FH_HTABLE_SIZE);
862                 first_size_num ++;
863         }
864 };
865
866 int get_timestamp (char * buf)
867 {
868         char str[128];
869         int ret;
870         strncpy(str, buf, 100);
871         RFS_ASSERT (str[10]=='.');
872         str[10]=0;
873         ret = atoi(str);
874         RFS_ASSERT (ret >1000000000 && ret <2000000000);
875         return ret;
876 }
877
878 int check_aging (char * tracefile)
879 {
880         int disk_index=-1;
881         char *buf; 
882         char *reply_buf;
883         int i;
884         int trace_status;
885         int debug = 0;
886         int nfs3proc, msgid, proc;
887
888         while ((buf=read_line(++disk_index))!=NULL) {
889                 if (buf[TRACE_COMMAND_REPLY_FLAG_POS]!='C') 
890                         continue;
891                 if (buf[TRACE_VERSION_POS]!='3') 
892                         continue;
893                 sscanf (&buf[TRACE_MSGID_POS], "%x %x", &msgid, &nfs3proc);
894                 
895                 RFS_ASSERT (nfs3proc>=0 && nfs3proc<NFS3_PROCEDURE_COUNT);
896                 
897                 proc = nfs3proc_to_rfsproc[nfs3proc];
898                 ops_statistics (OPS_FLAG_INC, proc, -1);
899                 
900                 switch (proc) {
901                 int off, count, size;
902                 char * t;
903                 case CREATE: printf("%s\n", "create"); break;
904                 case MKDIR: printf("%s\n", "mkdir"); break;
905                 case REMOVE: printf("%s\n", "remove"); break;
906                 case RMDIR: printf("%s\n", "rmdir"); break;
907                 case WRITE: 
908                         t = buf;
909                         t = strstr (t, "off");
910                 RFS_ASSERT (t);
911                     t+=4;
912                         sscanf (t, "%x", &off);
913                         RFS_ASSERT (off>=0 && off<0x7FFFFFFF)
914                         t = strstr (t, "count");
915                     RFS_ASSERT (t);
916                         t+=6;
917                         sscanf (t, "%x", &count);
918                         RFS_ASSERT (count <= 32768);
919                         printf("%s off %x count %x\n", "write", off, count); 
920                         //printf("%s count %x\n", "write", count); 
921                         break;
922                 case SETATTR: 
923                         t = strstr (buf, " size ");
924                         if (t) {
925                                 sscanf (t, " size %x", &size);
926                                 printf ("%s size %x\n", "setattr", size);
927                         }
928                 }
929                 if ((disk_index%10000)==0) {
930                         fprintf(stderr, "%d disk trace passed\n", disk_index);
931                 }
932         };
933
934         fprintf(stderr, "%d disk trace parsed\n", disk_index);
935         ops_statistics (OPS_FLAG_PRINT, disk_index, -1);
936 }
937
938
939 int check_statistics (char * tracefile)
940 {
941         int disk_index=-1;
942         char *buf; 
943         char *reply_buf;
944         int i;
945         char * p;
946         int trace_status;
947         int debug = 0;
948         int nfs3proc, msgid, proc;
949         static int last_timestamp_sec = -1;
950         int timestamp_sec;
951         int memory_trace_size = 0;
952
953         while ((buf=read_line(++disk_index))!=NULL) {
954                 if (buf[TRACE_COMMAND_REPLY_FLAG_POS]!='C') 
955                         continue;
956                 if (buf[TRACE_VERSION_POS]!='3') 
957                         continue;
958                 sscanf (&buf[TRACE_MSGID_POS], "%x %x", &msgid, &nfs3proc);
959                 
960                 RFS_ASSERT (nfs3proc>=0 && nfs3proc<NFS3_PROCEDURE_COUNT);
961                 timestamp_sec = get_timestamp(buf);
962                 
963                 proc = nfs3proc_to_rfsproc[nfs3proc];
964                 ops_statistics (OPS_FLAG_INC, proc, -1);
965                 
966                 if (proc!= WRITE && proc!=SETATTR && proc!=READ) {
967                         continue;
968                 }
969                 RFS_ASSERT (buf[strlen(buf)-1]=='\n');
970                 buf [strlen(buf)-1] = 0;
971                 if (!((strlen(buf)>80) && (strlen(buf)<MAX_TRACE_LINE_LENGTH)))
972                         printf("disk_index %d strlen(buf) %d buf %s \n", disk_index, strlen(buf), buf);
973                 RFS_ASSERT ((strlen(buf)>80) && (strlen(buf)<MAX_TRACE_LINE_LENGTH));
974                 if (debug)
975                         printf("read line disk_index %d %s\n", disk_index, buf);
976
977                 trace_status = NFS3ERR_RFS_MISS;
978                 for (i=disk_index+1; i<disk_index+MAX_COMMAND_REPLY_DISTANCE; i++) {
979                         reply_buf = read_line(i);
980                         if (debug)
981                                 printf("searching for reply: read line %s\n", reply_buf);
982                         if (!reply_buf)
983                                 break;
984                 if (reply_buf[TRACE_COMMAND_REPLY_FLAG_POS]=='R') {
985                         p = strchr (&reply_buf[TRACE_MSGID_POS], ' ');
986                         RFS_ASSERT (p);
987                         if (!strncmp(&reply_buf[TRACE_MSGID_POS], &buf[TRACE_MSGID_POS], p-&(reply_buf[TRACE_MSGID_POS]))) {
988                         trace_status = find_reply_status(reply_buf);
989                                         if (trace_status == NFS3_OK) {
990                                                 if (proc==READ || proc==WRITE) 
991                                                         read_write_fh_statistics (OPS_FLAG_INC, buf, 0);
992                                                 if (proc == WRITE)
993                                                         write_statistics (OPS_FLAG_INC, buf, reply_buf, trace_status);
994                                                 if (proc==WRITE || proc==SETATTR) 
995                                                         truncate_statistics (OPS_FLAG_INC, proc, buf, reply_buf);
996                                         }
997                                 };
998                 }
999                 }
1000                 //if (memory_trace[memory_trace_size].trace_status == NFS3ERR_RFS_MISS)
1001                 if (trace_status == NFS3ERR_RFS_MISS) {
1002                         //printf ("%s no reply\n", buf);
1003                         missing_reply_num ++;
1004                 }
1005
1006 #if     0       /* commented out by G. Jason Peng */
1007                 if *
1008                 if ((missing_reply_num > memory_trace_size/10) && (missing_reply_num > 100)) {
1009                         printf ("missing_reply_num %d too high for memory_trace_size %d\n", missing_reply_num, memory_trace_size);
1010                         exit (0);
1011                 }
1012 #endif
1013
1014                 memory_trace_size ++;
1015
1016                 if (last_timestamp_sec == -1) {
1017                         last_timestamp_sec = timestamp_sec;
1018                 } else if (timestamp_sec - last_timestamp_sec >=3600) {
1019                         ops_statistics (OPS_FLAG_PRINT, disk_index, timestamp_sec);
1020                         truncate_statistics (OPS_FLAG_PRINT, disk_index, (char *)timestamp_sec, NULL);
1021                         read_write_fh_statistics(OPS_FLAG_PRINT, (char *)disk_index, timestamp_sec);
1022                         write_statistics(OPS_FLAG_PRINT, (char *)disk_index, (char *)timestamp_sec, -1);
1023                         last_timestamp_sec = timestamp_sec;
1024                 }
1025 /*
1026                 if ((memory_trace_size%10000)==0) {
1027                         fprintf(stderr, "%d disk trace parsed, missing_reply %d\n", disk_index, missing_reply_num);
1028                         ops_statistics (OPS_FLAG_PRINT, -1);
1029                         truncate_statistics (OPS_FLAG_PRINT, -1, NULL, NULL);
1030                 }
1031 */
1032         };
1033
1034         fprintf(stderr, "%d disk trace parsed, missing_reply %d\n", disk_index, missing_reply_num);
1035         ops_statistics (OPS_FLAG_PRINT, disk_index, timestamp_sec);
1036         truncate_statistics (OPS_FLAG_PRINT, disk_index, (char *)timestamp_sec, NULL);
1037         read_write_fh_statistics(OPS_FLAG_PRINT, (char *)disk_index, timestamp_sec);
1038         write_statistics(OPS_FLAG_PRINT, (char *)disk_index, (char *)timestamp_sec, -1);
1039 }
1040
1041
1042 /* This routine output all the requests, together with their replies */
1043 int pair_trace (char * tracefile)
1044 {
1045         int disk_index=-1;
1046         char *buf; 
1047         char *reply_buf;
1048         int i;
1049         char * p;
1050         int trace_status;
1051         int debug = 0;
1052         int nfs3proc, msgid;
1053         int ops[NFS3_PROCEDURE_COUNT]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
1054         int memory_trace_size = 0;
1055         FILE * outputfp;
1056
1057         outputfp = fopen ("pair.output", "w");
1058         RFS_ASSERT (outputfp);
1059
1060         while ((buf=read_line(++disk_index))!=NULL) {
1061                 if (buf[TRACE_COMMAND_REPLY_FLAG_POS]!='C') 
1062                         continue;
1063                 if (buf[TRACE_VERSION_POS]!='3') 
1064                         continue;
1065                 sscanf (&buf[TRACE_MSGID_POS], "%x %x", &msgid, &nfs3proc);
1066                 
1067                 RFS_ASSERT (nfs3proc>=0 && nfs3proc<NFS3_PROCEDURE_COUNT);
1068                 ops[nfs3proc]++;
1069
1070                 buf [strlen(buf)-1] = 0;
1071                 if (!((strlen(buf)>80) && (strlen(buf)<MAX_TRACE_LINE_LENGTH)))
1072                         printf("disk_index %d strlen(buf) %d buf %s \n", disk_index, strlen(buf), buf);
1073                 RFS_ASSERT ((strlen(buf)>80) && (strlen(buf)<MAX_TRACE_LINE_LENGTH));
1074                 if (debug)
1075                         printf("read line disk_index %d %s\n", disk_index, buf);
1076                 fprintf (outputfp, "%s\n", buf);
1077
1078                 trace_status = NFS3ERR_RFS_MISS;
1079                 for (i=disk_index+1; i<disk_index+MAX_COMMAND_REPLY_DISTANCE_FOR_PAIR; i++) {
1080                         reply_buf = read_line(i);
1081                         if (debug)
1082                                 printf("searching for reply: read line %s\n", reply_buf);
1083                         if (!reply_buf)
1084                                 break;
1085                 if (reply_buf[TRACE_COMMAND_REPLY_FLAG_POS]=='R') {
1086                         p = strchr (&reply_buf[TRACE_MSGID_POS], ' ');
1087                         RFS_ASSERT (p);
1088                         if (!strncmp(&reply_buf[TRACE_MSGID_POS], &buf[TRACE_MSGID_POS], p-&(reply_buf[TRACE_MSGID_POS]))) {
1089                                         fprintf(outputfp, "%s", reply_buf);
1090                                         trace_status = find_reply_status(reply_buf);
1091                                         if (debug)
1092                                                 fprintf(stderr, "reply found trace_status %d\n", find_reply_status(reply_buf));
1093                                 };
1094                 }
1095                 }
1096
1097                 if (trace_status == NFS3ERR_RFS_MISS) {
1098                         fprintf (stderr, "%s no reply\n", buf);
1099                         fprintf(outputfp, "missing_reply\n");
1100                         missing_reply_num ++;
1101                 }
1102
1103                 if (missing_reply_num > memory_trace_size/10 && missing_reply_num >100) {
1104                         fprintf (stderr, "missing_reply_num %d too high for memory_trace_size %d\n", missing_reply_num, memory_trace_size);
1105                         exit (0);
1106                 }
1107
1108                 memory_trace_size ++;
1109
1110                 if ((memory_trace_size%10000)==0)
1111                         fprintf(stderr, "total %d disk lines %d memory lines missing_reply_num %d\n", disk_index, memory_trace_size, missing_reply_num );
1112         };
1113
1114         fprintf(stderr, "total %d disk lines %d memory lines missing_reply_num %d\n", disk_index, memory_trace_size, missing_reply_num );
1115     //fprintf (stderr, "init_dep_tab, req_num_with_init_fh %d req_num_with_new_fh %d discard %d\n", req_num_with_init_fh, req_num_with_new_fh, req_num_with_discard_fh);
1116
1117 }
1118 /* This routine output all the write requests, together with their replies. It is used for
1119  * analysis of write requests: appended bytes, overwrite bytes etc
1120  */
1121 int pair_write (char * tracefile)
1122 {
1123         int disk_index=-1;
1124         char *buf; 
1125         char *reply_buf;
1126         int i;
1127         char * p;
1128         int trace_status;
1129         int pair_write_debug = 0;
1130         int nfs3proc, msgid;
1131         int ops[NFS3_PROCEDURE_COUNT]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
1132         int memory_trace_size = 0;
1133
1134         while ((buf=read_line(++disk_index))!=NULL) {
1135                 if (buf[TRACE_COMMAND_REPLY_FLAG_POS]!='C') 
1136                         continue;
1137                 if (buf[TRACE_VERSION_POS]!='3') 
1138                         continue;
1139                 sscanf (&buf[TRACE_MSGID_POS], "%x %x", &msgid, &nfs3proc);
1140                 
1141                 RFS_ASSERT (nfs3proc>=0 && nfs3proc<NFS3_PROCEDURE_COUNT);
1142                 ops[nfs3proc]++;
1143
1144                 if (!strstr(buf, "write")) 
1145                         continue;
1146
1147                 buf [strlen(buf)-1] = 0;
1148                 if (!((strlen(buf)>80) && (strlen(buf)<MAX_TRACE_LINE_LENGTH)))
1149                         printf("disk_index %d strlen(buf) %d buf %s \n", disk_index, strlen(buf), buf);
1150                 RFS_ASSERT ((strlen(buf)>80) && (strlen(buf)<MAX_TRACE_LINE_LENGTH));
1151                 if (pair_write_debug)
1152                         printf("read line disk_index %d %s\n", disk_index, buf);
1153
1154                 /* store the request to memory */
1155                 //strcpy (memory_trace[memory_trace_size].line, buf);
1156                 //memory_trace[memory_trace_size].disk_index = disk_index;
1157
1158                 /* find and store the reply status and reply fhandle to memory */
1159                 //memory_trace[memory_trace_size].trace_status = NFS3ERR_RFS_MISS;
1160                 trace_status = NFS3ERR_RFS_MISS;
1161                 for (i=disk_index+1; i<disk_index+MAX_COMMAND_REPLY_DISTANCE_FOR_PAIR; i++) {
1162                         reply_buf = read_line(i);
1163                         if (pair_write_debug)
1164                                 printf("searching for reply: read line %s\n", reply_buf);
1165                         if (!reply_buf)
1166                                 break;
1167                 if (reply_buf[TRACE_COMMAND_REPLY_FLAG_POS]=='R') {
1168                         p = strchr (&reply_buf[TRACE_MSGID_POS], ' ');
1169                         RFS_ASSERT (p);
1170                         if (!strncmp(&reply_buf[TRACE_MSGID_POS], &buf[TRACE_MSGID_POS], p-&(reply_buf[TRACE_MSGID_POS]))) {
1171                                         int pre_size, size, count;
1172                         //memory_trace[memory_trace_size].trace_status = find_reply_status(reply_buf);
1173                                         if (pair_write_debug)
1174                                                 printf("reply found trace_status %d\n", find_reply_status(reply_buf));
1175                                                 //break;
1176                         trace_status = find_reply_status(reply_buf);
1177                                         if (trace_status == NFS3_OK) {
1178                                                 p = strstr (p, "pre-size");
1179                                                 RFS_ASSERT (p);
1180                                                 sscanf (p, "pre-size %x", &pre_size); 
1181                                                 p += strlen("pre-size");
1182                                                 p = strstr (p, "size");
1183                                                 RFS_ASSERT (p);
1184                                                 sscanf (p, "size %x", &size); 
1185                                                 p = strstr (p, "count");
1186                                                 if (!p) 
1187                                                         printf ("%s status %x pre-size %x size %x count %x\n", buf, trace_status, pre_size, size, count);
1188                                                 RFS_ASSERT (p);
1189                                                 sscanf (p, "count %x", &count); 
1190                                                 printf ("%s status %x pre-size %x size %x count %x\n", buf, trace_status, pre_size, size, count);
1191                                                 break;
1192                                         }
1193                                 };
1194                 }
1195                 }
1196                 //if (memory_trace[memory_trace_size].trace_status == NFS3ERR_RFS_MISS)
1197                 if (trace_status == NFS3ERR_RFS_MISS) {
1198                         printf ("%s no reply\n", buf);
1199                         missing_reply_num ++;
1200                 }
1201
1202 #if     0       /* commented out by G. Jason Peng */
1203                 if (missing_reply_num > memory_trace_size/10) {
1204                         printf ("missing_reply_num %d too high for memory_trace_size %d\n", missing_reply_num, memory_trace_size);
1205                         exit (0);
1206                 }
1207 #endif
1208
1209                 memory_trace_size ++;
1210
1211                 /*
1212                 if (memory_trace_size >= MAX_MEMORY_TRACE_LINES) {
1213                         fprintf (stderr, "memory trace size %d is not enough\n", MAX_MEMORY_TRACE_LINES);
1214                         break;
1215                 }
1216                 */
1217                 if ((memory_trace_size%10000)==0)
1218                         fprintf(stderr, "total %d disk lines %d memory lines missing_reply_num %d\n", disk_index, memory_trace_size, missing_reply_num );
1219         };
1220
1221         fprintf(stderr, "total %d disk lines %d memory lines missing_reply_num %d\n", disk_index, memory_trace_size, missing_reply_num );
1222     //fprintf (stderr, "init_dep_tab, req_num_with_init_fh %d req_num_with_new_fh %d discard %d\n", req_num_with_init_fh, req_num_with_new_fh, req_num_with_discard_fh);
1223
1224 }
1225
1226 int read_trace ()
1227 {
1228         char *buf; 
1229         char *reply_buf;
1230         int i;
1231         char * p;
1232         int debug = 0;
1233         memory_trace_ent_t * ent=NULL;
1234
1235         start_profile (&read_trace_profile);
1236
1237         while (!CYCLIC_FULL(memory_trace_index)) {
1238                 if (ent!=NULL && (ent->trace_status == NFS3ERR_RFS_MISS))
1239                         buf = reply_buf;
1240                 if ((buf=read_line(++disk_index))==NULL) {
1241 END:            fprintf(stderr, "total %d disk lines %d memory lines missing_reply_num %d\n", disk_index, CYCLIC_NUM(memory_trace_index), missing_reply_num );
1242                 fprintf (stderr, "init_dep_tab, req_num_with_init_fh %d req_num_with_new_fh %d discard %d\n", req_num_with_init_fh, req_num_with_new_fh, req_num_with_discard_fh);
1243                         end_profile (&read_trace_profile);
1244                         return TRACE_FILE_END;
1245                 }
1246         
1247 #ifdef notdef
1248                 if (buf[TRACE_COMMAND_REPLY_FLAG_POS]!='C') 
1249                         continue;
1250                 if (buf[TRACE_VERSION_POS]!='3') 
1251                         continue;
1252                 if (!((strlen(buf)>80) && (strlen(buf)<MAX_TRACE_LINE_LENGTH)))
1253                         printf("disk_index %d strlen(buf) %d buf %s \n", disk_index, strlen(buf), buf);
1254                 RFS_ASSERT ((strlen(buf)>80) && (strlen(buf)<MAX_TRACE_LINE_LENGTH));
1255                 if (debug)
1256                         printf("read line disk_index %d %s\n", disk_index, buf);
1257
1258                 if (disk_index==1) {
1259                         trace_timestamp1 = get_timestamp (buf);
1260                 } else
1261                         trace_timestamp2 = get_timestamp (buf);
1262 #endif
1263                 /* store the request to memory */
1264                 ent = &(memory_trace[memory_trace_index.head]);
1265                 strcpy (ent->line, buf);
1266                 ent->disk_index = disk_index;
1267
1268                 if (MAX_COMMAND_REPLY_DISTANCE ==1) {
1269                         ent->trace_status == NFS3ERR_RFS_MISS;
1270                 } else {
1271                         reply_buf=read_line(++disk_index);
1272                         RFS_ASSERT (reply_buf);
1273                         if (!strcmp(reply_buf, "missing_reply\n")) {
1274                                 ent->trace_status == NFS3ERR_RFS_MISS;
1275                         } else {
1276                                 ent->trace_status = find_reply_status(reply_buf);
1277                         }
1278                 };
1279
1280                 if (ent->trace_status == NFS3ERR_RFS_MISS)
1281                         missing_reply_num ++;
1282
1283                 if (MAX_COMMAND_REPLY_DISTANCE > 1) {
1284                         if ((missing_reply_num > disk_index/5) && (missing_reply_num > 100)) {
1285                                 printf ("missing_reply_num %d too high for disk_index %d\n", missing_reply_num, disk_index);
1286                                 exit (0);
1287                         }
1288                 }
1289
1290                 /* find and store the reply trace fhandle for create-class requests */
1291                 if (ent->trace_status==NFS3_OK) {
1292                         if (strstr(buf, "create") || strstr(buf, "mkdir") 
1293                                 || (strstr(buf, "symlink") && (buf[TRACE_VERSION_POS]!='2')) 
1294                                 || strstr(buf, "mknod") ) {
1295                                 p = find_reply_trace_fh(reply_buf);
1296                                 memcpy(ent->reply_trace_fh, p, TRACE_FH_SIZE);
1297                         } else
1298                                 memset(ent->reply_trace_fh, 0, TRACE_FH_SIZE);
1299                 }
1300
1301                 add_to_dep_tab(memory_trace_index.head);
1302
1303                 if (((disk_index+1)%20000)==0) {
1304                         fprintf(stderr, "%d disk trace parsed \n", disk_index+1);
1305                 };
1306         };
1307
1308         end_profile (&read_trace_profile);
1309         return TRACE_BUF_FULL;
1310 }
1311 #else   /* not defined REDUCE_MEMORY_TRACE_SIZE */
1312 int read_trace ()
1313 {
1314         FILE * fp;
1315         char buf[1024];
1316         // char * t=buf;        
1317         int disk_index=0;
1318
1319         fp = fopen(trace_file, "r");
1320         RFS_ASSERT (fp!=NULL);
1321         while (fgets(buf, MAX_TRACE_LINE_LENGTH, fp)) {
1322                 if (!((strlen(buf)>80) && (strlen(buf)<MAX_TRACE_LINE_LENGTH)))
1323                         printf("strlen(buf) %d buf %s \n", strlen(buf), buf);
1324                 RFS_ASSERT ((strlen(buf)>80) && (strlen(buf)<MAX_TRACE_LINE_LENGTH));
1325
1326                 /* store the request to memory */
1327                 strcpy (memory_trace[memory_trace_size].line, buf);
1328                 memory_trace[memory_trace_size].disk_index = disk_index;
1329                 memory_trace_size ++;
1330
1331                 if (memory_trace_size >= MAX_MEMORY_TRACE_LINES) {
1332                         fprintf (stderr, "memory trace size %d is not enough\n", MAX_MEMORY_TRACE_LINES);
1333                         break;
1334                 }
1335                 if ((disk_index%100000)==0)
1336                         fprintf(stderr, "%d disk trace parsed, store %d trace lines to memory\n", disk_index, memory_trace_size);
1337                 disk_index ++;
1338         }
1339
1340         fprintf(stderr, "total %d disk lines %d memory lines \n", disk_index, memory_trace_size );
1341 }
1342 #endif
1343
1344
1345 #ifdef REDUCE_MEMORY_TRACE_SIZE
1346 inline int disk_index_to_memory_index (int disk_index)
1347 {
1348         static int memory_index = 0;
1349
1350         RFS_ASSERT (!CYCLIC_EMPTY(memory_trace_index));
1351         RFS_ASSERT (memory_trace[memory_trace_index.tail].disk_index <= disk_index);
1352         RFS_ASSERT (memory_trace[CYCLIC_MINUS(memory_trace_index.head,1,memory_trace_index.size)].disk_index >=disk_index);
1353         if (disk_index > memory_trace[memory_index].disk_index) {
1354                 while (memory_trace[memory_index].disk_index < disk_index) {
1355                         memory_index = CYCLIC_ADD(memory_index,1,memory_trace_index.size);
1356                 }
1357         };
1358         if (disk_index < memory_trace[memory_index].disk_index) {
1359                 while (memory_trace[memory_index].disk_index > disk_index) {
1360                         memory_index = CYCLIC_MINUS(memory_index,1,memory_trace_index.size);
1361                 }
1362         };
1363
1364         RFS_ASSERT (disk_index == memory_trace[memory_index].disk_index);
1365         return memory_index;
1366 }
1367 #else
1368 #define disk_index_to_memory_index(disk_index)  disk_index
1369 #endif
1370
1371 #define get_line_by_disk_index(disk_index) \
1372         memory_trace[disk_index_to_memory_index(disk_index)].line
1373
1374 inline char * find_reply_line (char * command_line, int cur_disk_index)
1375 {
1376         int i;
1377         char * line;
1378         char * p;
1379         int request_memory_index = disk_index_to_memory_index (cur_disk_index);
1380         for (i=request_memory_index+1; i<request_memory_index+MAX_COMMAND_REPLY_DISTANCE && i<MAX_MEMORY_TRACE_LINES; i++) {
1381                 line = memory_trace[i].line;
1382                 if (line[TRACE_COMMAND_REPLY_FLAG_POS]=='R') {
1383                 p = strchr (&line[TRACE_MSGID_POS], ' ');
1384             RFS_ASSERT (p);
1385                         if (!strncmp(&line[TRACE_MSGID_POS], &command_line[TRACE_MSGID_POS], p-&(line[TRACE_MSGID_POS]))) 
1386                                 return line;
1387                 }
1388         }
1389         return NULL;
1390 }
1391
1392 inline int find_reply_status (char * line)
1393 {
1394         char * p;
1395         int i=0;
1396
1397         //printf ("line %s  flag %c\n", line, line[TRACE_COMMAND_REPLY_FLAG_POS]);
1398         RFS_ASSERT (line[TRACE_COMMAND_REPLY_FLAG_POS]=='R');
1399         p = line+TRACE_MSGID_POS+2;     /* at least one letter for msgid and one letter for space */
1400         if (strstr(p, "OK"))
1401                 return NFS3_OK;
1402         if (strstr(p, "lookup 2"))
1403                 return 0x2;
1404         if (strstr(p, "create d"))
1405                 return 0xd;
1406         if (strstr(p, "setattr 1"))
1407                 return 0x1;
1408         if (strstr(p, "setattr 2712")) /* 10002 NFS3ERR_NOT_SYNC */
1409                 return 0x2712;
1410         if (strstr(p, "lookup d"))
1411                 return 0xd;
1412         if (strstr(p, "read d"))
1413                 return 0xd;
1414         if (strstr(p, "write d"))
1415                 return 0xd;
1416         if (strstr(p, "write 46"))
1417                 return 0x46;
1418         if (strstr(p, "getattr 46"))
1419                 return 0x46;
1420         if (strstr(p, "mkdir d"))
1421                 return 0xd;
1422         printf ("line %s  flag %c return value weird\n", line, line[TRACE_COMMAND_REPLY_FLAG_POS]);
1423         printf ("!!!!!!!!!!!!!!!!!!!!!!!!\n");
1424         fprintf (stderr, "line %s  flag %c return value weird\n", line, line[TRACE_COMMAND_REPLY_FLAG_POS]);
1425         fprintf (stderr, "!!!!!!!!!!!!!!!!!!!!!!!!\n");
1426 }
1427
1428 inline int find_reply_status_old (char * line)
1429 {
1430         char * p;
1431         int i=0;
1432
1433         //printf ("line %s  flag %c\n", line, line[TRACE_COMMAND_REPLY_FLAG_POS]);
1434         RFS_ASSERT (line[TRACE_COMMAND_REPLY_FLAG_POS]=='R');
1435         if (!strstr(line, "OK")) {
1436                 p=strstr(line, " 6 read ");
1437                 if (p) {
1438                         p+=strlen(" 6 read ");
1439                 } else {
1440                         p = strstr (line, "status=XXX");
1441                         RFS_ASSERT (p);
1442                         p--;
1443                         RFS_ASSERT (*p==' ');
1444                         while (*p==' ')
1445                                 p--;
1446                         while (*p!=' ') {
1447                                 p--;
1448                         }
1449                         p++;
1450                 }
1451                 sscanf (p, "%x", &i);
1452                 if ((i<=0) || (i>10000))
1453                         printf("line %s\n", line);
1454                 RFS_ASSERT (i>0 && i<10009);
1455         }
1456         return i;
1457 }
1458
1459 inline char * find_reply_trace_fh (char * line)
1460 {
1461         char * p;       
1462         p = strstr (line, "OK fh");
1463         if (!p)
1464                 printf ("find_reply_trace_fh line %s\n", line);
1465         RFS_ASSERT (p);
1466         return p+6;
1467 }
1468
1469 #ifndef NO_DEPENDENCY_TABLE
1470 inline int disk_index_to_dep_index(int cur_dep_index, int disk_index)
1471 {
1472         int i;
1473         for (i=cur_dep_index; i>min_dep_index; i--) {
1474                 if (dep_tab[i].disk_index == disk_index)
1475                         return i;
1476         } 
1477         RFS_ASSERT (0);
1478 }
1479 #endif
1480
1481 inline int is_play_candidate (int dep_index)
1482 {
1483         int proc = dep_tab[dep_index].proc;
1484         int status = dep_tab[dep_index].status;
1485         int trace_status = dep_tab[dep_index].trace_status;
1486
1487 #ifndef TAKE_CARE_CREATE_MODE_BY_DAN
1488         /* for a failed create in trace, trace_replay just ignore many time the trace create fail
1489          * due to access control, but trace_play will success because our access control
1490          * may be loose (all uid/gid is mapped to single one 513:513, so we just skip these requests 
1491          */
1492         if ((proc==CREATE || proc==MKDIR) && (trace_status!=NFS3_OK) && (status!=NFS3ERR_RFS_MISS)) {
1493                 if (dependency_debug)
1494                         printf ("disk[%d] ignore failed create/mkdir in trace, trace_status %d line %s", 
1495                                 dep_tab[dep_index].disk_index, trace_status, dep_tab[dep_index].line);
1496                 failed_create_command_num ++;
1497                 return FALSE;
1498         }
1499 #endif
1500 #ifndef TAKE_CARE_OTHER_FAILED_COMMAND
1501         if (((trace_status == NFS3ERR_ACCES) && (proc==READ || proc==WRITE || proc==LOOKUP)) || 
1502             ((trace_status == NFS3ERR_PERM) && (proc==SETATTR))                                                                         ){
1503                 if (dependency_debug)
1504                         printf ("disk[%d] ignore other failed command in trace, trace_status %d line %s", 
1505                                 dep_tab[dep_index].disk_index, trace_status, dep_tab[dep_index].line);
1506                 
1507                 failed_other_command_num ++;
1508                 return FALSE;
1509         }
1510 #endif
1511 #ifndef TAKE_CARE_SYMBOLIC_LINK
1512         if ((dep_tab[dep_index].proc==READLINK) ) { /* send request */
1513                 skipped_readlink_command_num ++;
1514                 return FALSE;
1515         }
1516 #endif
1517 /* This is actually take care in get_nextop by checking fh_map error when dep_index==min_dep_index */
1518 #ifndef TAKE_CARE_CUSTOM_COMMAND
1519         /* this line has a file handle which should belong to discard but it is not
1520          * the file handle directly appears as parent directory in a lookup request
1521          * the return value is NOENT, the parent directory should have been initialized
1522          * but the initialization code just ignored all lookup request which didn't success
1523          * including NOENT even though the parent directory is still valid.
1524          */
1525 /*
1526         if ((    ((dep_tab[dep_index].disk_index==262213) || (dep_tab[dep_index].disk_index==214402))
1527                   && !(strcmp(trace_file, "anon-lair62-011130-1100.txt")) 
1528                 ) || 
1529                 (        ((dep_tab[dep_index].disk_index==238460) || (dep_tab[dep_index].disk_index ==238470))
1530                   && !(strcmp(trace_file, "anon-lair62-011130-1000.txt"))
1531                 )) {
1532                 skipped_custom_command_num++;
1533                 return FALSE;
1534         }
1535 */
1536         if ((    ((dep_tab[dep_index].disk_index==423727) || (0))
1537                   && !(strncmp(trace_file, "anon-lair62-011130-1500.txt", strlen("anon-lair62-011130-1500.txt"))) 
1538                 ) || 
1539                 (        ((dep_tab[dep_index].disk_index==238460) || (dep_tab[dep_index].disk_index ==238470))
1540                   && !(strcmp(trace_file, "anon-lair62-011130-1000.txt"))
1541                 )) {
1542                 skipped_custom_command_num++;
1543                 return FALSE;
1544         }
1545         /* this line is about the mkdir 116d9d originally in anon-lair62-011130-1400.txt */
1546         if (!strncmp(dep_tab[dep_index].line, "1007147245.194201", strlen("1007147245.194201"))) {
1547                 skipped_custom_command_num++;
1548                 return FALSE;
1549         }
1550 #endif
1551 #ifndef TAKE_CARE_FSSTAT_COMMAND
1552         /* the file handle used in this command is not processed properly by pre-processing */
1553         if (proc==FSSTAT) {
1554                 char * trace_fh = find_lead_trace_fh(proc, dep_tab[dep_index].line);
1555                 fh_map_t * fh = lookup_fh (trace_fh);
1556                 if (!fh) {
1557                         skipped_fsstat_command_num++;
1558                         return FALSE;
1559                 }
1560         }
1561 #endif
1562         return TRUE;
1563 }
1564
1565 inline int is_dir_op (int proc)
1566 {
1567         switch (proc) {
1568         case MKDIR:
1569         case CREATE:
1570         case LINK:
1571         case SYMLINK:
1572         case MKNOD:
1573         case REMOVE:
1574         case RMDIR:
1575         case RENAME:
1576                 return 1;
1577         default:
1578                 return 0;
1579         }
1580 }       
1581
1582 inline int is_create_op (int proc)
1583 {
1584         if (proc==CREATE || proc==MKDIR || proc==LINK || proc==SYMLINK || proc==MKNOD || proc==RENAME)
1585                 return 1;
1586         return 0;
1587 }
1588
1589 inline int is_delete_op (int proc)
1590 {
1591         if (proc==REMOVE || proc==RMDIR || proc==RENAME)
1592                 return 1;
1593         return 0;
1594 }       
1595
1596 static inline char * find_lead_trace_fh(int proc, char * line)
1597 {
1598         char * p;
1599         /* check the file handle availability */ 
1600         p = strstr (line, "fh");
1601         RFS_ASSERT (p);
1602         p+=3; //printf ("check dependency dep_tab[%d] trace_fh %s line %s \n", dep_index, trace_fh, line);
1603         return p;
1604 }
1605
1606 inline char * find_another_trace_fh(int proc, char * line)
1607 {
1608         char * p;
1609         /* check the file handle availability */ 
1610         p = strstr (line, "fh2");
1611         RFS_ASSERT (p);
1612         p+=4; //printf ("check dependency dep_tab[%d] trace_fh %s line %s \n", dep_index, trace_fh, line);
1613         return p;
1614 }
1615
1616 /* return the index of next request in dep_tab.
1617  * Return -1 if there is no suitable request to send
1618  */
1619 inline int get_nextop(void)
1620 {
1621         int i,j, k;
1622         int * t;
1623         static int dep_index = -2;
1624         char * line;
1625         char * p;
1626 #define INIT_MIN_WAIT_VALUE -999
1627         static int min_wait_fhandle_dep_index = INIT_MIN_WAIT_VALUE;
1628         int proc;
1629         int flag;
1630
1631         if (min_wait_fhandle_dep_index == -999)
1632                 min_wait_fhandle_dep_index = dep_window_index.head;
1633
1634         for (i=0; i<CYCLIC_NUM(dep_window_index); i++) {
1635                 dep_index = (dep_window_index.tail+i) % dep_window_index.size;
1636         
1637                 proc = dep_tab[dep_index].proc;
1638                 flag = dep_tab[dep_index].flag;
1639
1640                 if (dependency_debug)
1641                         printf ("get_nextop check dep_tab[%d].disk_index %d\n", dep_index, dep_tab[dep_index].disk_index);
1642 #ifdef NO_DEPENDENCY_TABLE
1643                 if (dep_tab[dep_index].flag == DEP_FLAG_INIT) {
1644                         if (is_play_candidate(dep_index)==TRUE) {
1645                                 /* the trace_fh is the file handle for the operation directory, trace_fh_2 is other file handle
1646                                  * used in the request */
1647                                 if (proc==LINK || proc==RENAME) {
1648                                         dep_tab[dep_index].trace_fh = find_another_trace_fh (proc, dep_tab[dep_index].line);
1649                                         dep_tab[dep_index].trace_fh_2 = find_lead_trace_fh(proc, dep_tab[dep_index].line);
1650                                         dep_tab[dep_index].fh = 0;
1651                                         dep_tab[dep_index].fh_2 = 0;
1652                                 } else {
1653                                         dep_tab[dep_index].trace_fh = find_lead_trace_fh(proc, dep_tab[dep_index].line);
1654                                         dep_tab[dep_index].fh = 0;
1655                                         dep_tab[dep_index].fh_2 = (fh_map_t *)1;
1656                                 };
1657                                 dep_tab[dep_index].flag = DEP_FLAG_CANDIDATE;
1658 #ifdef TIME_PLAY
1659                                 dep_tab[dep_index].skip_sec = skip_sec;
1660 #endif
1661                                 if (dependency_debug)
1662                                         printf ("disk[%d] state DEP_FLAG_INIT to DEP_FLAG_CANDIDATE\n", dep_tab[dep_index].disk_index);
1663                         } else {
1664                                 if (dependency_debug)
1665                                         printf ("disk[%d] state DEP_FLAG_INIT to DEP_FLAG_DONE\n", dep_tab[dep_index].disk_index);
1666                                 dep_tab[dep_index].flag = DEP_FLAG_DONE;
1667                                 continue;
1668                         }
1669                 }
1670
1671                 if ((dep_tab[dep_index].flag == DEP_FLAG_CANDIDATE) || (dep_tab[dep_index].flag == DEP_FLAG_WAIT_FHANDLE) ) {
1672
1673                         if (!dep_tab[dep_index].fh)
1674                                 dep_tab[dep_index].fh = lookup_fh (dep_tab[dep_index].trace_fh);
1675                         if (!dep_tab[dep_index].fh_2)
1676                                 dep_tab[dep_index].fh_2 = lookup_fh (dep_tab[dep_index].trace_fh_2);
1677
1678                         /* need to wait for file handle */
1679                         if ((!dep_tab[dep_index].fh) || (!dep_tab[dep_index].fh_2)) {
1680                                 if (dependency_debug)
1681                                         printf("disk[%d] can not lookup file handle\n", dep_tab[dep_index].disk_index);
1682                                 if (dep_tab[dep_index].flag == DEP_FLAG_CANDIDATE) {
1683                                         if (dependency_debug)
1684                                                 printf ("disk[%d] state DEP_FLAG_CANDIDATE to DEP_FLAG_WAIT_FHANDLE\n", dep_tab[dep_index].disk_index);
1685                                         dep_tab[dep_index].flag = DEP_FLAG_WAIT_FHANDLE;
1686                                         sfs_gettime (&dep_tab[dep_index].start);
1687                                         if (CYCLIC_LESS(dep_tab_index,dep_index,min_wait_fhandle_dep_index)) 
1688                                                 min_wait_fhandle_dep_index = dep_index;
1689                                 } else {
1690                                         struct ladtime tmp;
1691                                         if (dep_index==dep_window_index.tail) {
1692                                                 if (!profile_debug) 
1693                                                         printf ("fh_path_map error disk[%d] state DEP_FLAG_WAIT_FHANDLE to DEP_FLAG_DONE\n", dep_tab[dep_index].disk_index);
1694                                                 fh_path_map_err_num ++;
1695                                                 dep_tab[dep_index].flag = DEP_FLAG_DONE;
1696                                                 continue;
1697                                         }
1698                                         sfs_gettime (&tmp);
1699                                         SUBTIME (tmp, dep_tab[dep_index].start);
1700 #define DEPENDENCY_TIMEOUT 5
1701 #ifdef TIME_PLAY
1702                                         RFS_ASSERT (tmp.sec < DEPENDENCY_TIMEOUT + (skip_sec - dep_tab[dep_index].skip_sec));   
1703 #else
1704                                         if (tmp.sec >= DEPENDENCY_TIMEOUT) {
1705                                                 printf("dep_tab[%d].flag %d disk_index %d line %s\n", dep_index,
1706                                                         dep_tab[dep_index].flag, dep_tab[dep_index].disk_index,
1707                                                         dep_tab[dep_index].line);
1708                                         }
1709                                         RFS_ASSERT (tmp.sec < DEPENDENCY_TIMEOUT );     
1710 #endif
1711                                 }
1712                                 continue;
1713                         }
1714
1715                         /* file handle ready, adjust_min_wait_fhandle_dep_index */
1716                         if ((dep_tab[dep_index].flag == DEP_FLAG_WAIT_FHANDLE)) {
1717                                 if (dep_index == min_wait_fhandle_dep_index) {
1718                                         min_wait_fhandle_dep_index = dep_window_index.head;
1719                                         for (j=CYCLIC_ADD(dep_index,1,dep_window_index.size); CYCLIC_LESS(dep_window_index,j,dep_window_index.head); j++) {
1720                                                 if (dep_tab[j].flag ==DEP_FLAG_WAIT_FHANDLE) {
1721                                                         min_wait_fhandle_dep_index = j;
1722                                                         break;
1723                                                 }
1724                                         }
1725                                 }
1726                         }
1727                         if (dependency_debug)
1728                                 printf("disk[%d] found file handle\n", dep_tab[dep_index].disk_index);
1729                         dep_tab[dep_index].flag = DEP_FLAG_FHANDLE_READY;
1730
1731                         /* the normal file operation can be executed now */
1732                         if (!is_dir_op (dep_tab[dep_index].proc)) {
1733                                 if (dependency_debug)
1734                                         printf ("return disk[%d]\n", dep_tab[dep_index].disk_index);
1735                                 return dep_index;
1736                         }
1737
1738                         if (dependency_debug)
1739                                 printf("disk[%d] directory operation \n", dep_tab[dep_index].disk_index);
1740                         /* the directory operation need to lock the directory first */
1741                         if (dep_tab[dep_index].fh->lock) {
1742                                 if (dependency_debug)
1743                                         printf ("disk[%d] state %d to DEP_FLAG_WAIT_DIRECTORY\n", dep_tab[dep_index].disk_index, dep_tab[dep_index].flag);
1744                                 dep_tab[dep_index].flag = DEP_FLAG_WAIT_DIRECTORY;
1745                                 continue;
1746                         }
1747                 }
1748                                 
1749                 if ((dep_tab[dep_index].flag == DEP_FLAG_FHANDLE_READY) || (dep_tab[dep_index].flag == DEP_FLAG_WAIT_DIRECTORY)) {
1750                         int j = dep_tab[dep_index].fh - fh_map;
1751                         if (dependency_debug) {
1752                                 printf ("dep_tab[%d].disk_index %d, fh_map[%d] lock=%d\n",dep_index, dep_tab[dep_index].disk_index, j, dep_tab[dep_index].fh->lock);
1753                                 printf ("trace_fh %s path %s\n", dep_tab[dep_index].fh->trace_fh, dep_tab[dep_index].fh->path);
1754                                 printf ("trace_fh %s path %s\n", fh_map[j].trace_fh, fh_map[j].path);
1755                         }
1756                         if ((dep_tab[dep_index].fh->lock) || ((proc==RENAME) && (dep_tab[dep_index].fh_2->lock)) ) {
1757                                 if (dependency_debug) 
1758                                         printf ("continue to wait for directory lock\n");
1759                                 continue;
1760                         }
1761                         if (dependency_debug) 
1762                                 printf ("dep_tab[%d] disk index %d LOCK fh_map[%d] \n", dep_index, dep_tab[dep_index].disk_index, j);
1763                         dep_tab[dep_index].fh->lock = 1;
1764                         if (proc==RENAME)
1765                                 dep_tab[dep_index].fh_2->lock = 1;
1766
1767                         /* the non-delete directory operation can proceed now */
1768                         if (!is_delete_op (dep_tab[dep_index].proc)) {
1769                                 if (dependency_debug) 
1770                                         printf ("return disk[%d]\n", dep_tab[dep_index].disk_index);
1771                                 return dep_index;
1772                         }
1773
1774                         /* the delete operation can proceed if nobody ahead is waiting for fhandle */
1775                         /* probably this condition is not strong enough */
1776 //                      if ((min_wait_fhandle_dep_index<dep_index) ) {
1777                         if (dep_index!=dep_window_index.tail) {
1778                                 if (dependency_debug) 
1779                                         printf ("disk[%d] state %d to DEP_FLAG_WAIT_DELETE\n", dep_tab[dep_index].disk_index, dep_tab[dep_index].flag);
1780                                 dep_tab[dep_index].flag = DEP_FLAG_WAIT_DELETE;
1781                                 continue;
1782                         } 
1783                         dep_tab[dep_index].flag = DEP_FLAG_DIRECTORY_READY;
1784                 }
1785
1786                 if ((dep_tab[dep_index].flag == DEP_FLAG_DIRECTORY_READY) || (dep_tab[dep_index].flag == DEP_FLAG_WAIT_DELETE)) {
1787 //                      if (min_wait_fhandle_dep_index > dep_index) {
1788                         if (dep_index==dep_window_index.tail) {
1789                                 if (dependency_debug) 
1790                                         printf ("return disk[%d]\n", dep_tab[dep_index].disk_index);
1791                                 return dep_index;
1792                         }
1793                 }
1794 #else /*NO_DEPENDENCY_TABLE undefined */
1795         /* this part of code will be invalid after CYCLIC buffer design */
1796                 if (dep_tab[dep_index].flag == DEP_FLAG_INIT){
1797                         for (j=0, t=&(dep_tab[dep_index].dep_ops[0]);
1798                                 (j<dep_tab[dep_index].init_dep_num) && (dep_tab[dep_index].cur_dep_num>0); 
1799                                 j++, t++) {
1800                                 if (*t !=-1) {
1801                                         if (dep_tab[disk_index_to_dep_index(dep_index, *t)].flag == DEP_FLAG_DONE) { 
1802                                                 /* The depended request has been finished */ 
1803                                                 *t = -1;
1804                                                 dep_tab[dep_index].cur_dep_num --;
1805                                         }
1806                                 } 
1807                         }
1808
1809                         if (dep_tab[dep_index].cur_dep_num == 0) {
1810                                 return dep_index;
1811                         }
1812                 }
1813 #endif
1814         }
1815
1816         if (dependency_debug) 
1817                 printf ("get_nexop return -1\n");
1818         return -1;
1819 }
1820
1821 int check_timeout(void)
1822 {
1823         static int biod_index = 0;
1824         int i;
1825         int dep_index;  /* index into dep_tab */
1826         int proc;
1827         sfs_op_type *op_ptr;            /* per operation info */
1828         struct ladtime timeout;
1829
1830         sfs_gettime (&current); 
1831
1832         for (i=0; i<max_biod_reqs; i++, biod_index = (biod_index+1)%max_biod_reqs) {
1833                 if (biod_reqp[biod_index].in_use==TRUE) {
1834                         timeout = biod_reqp[biod_index].timeout;
1835                         if ((current.sec>timeout.sec) ||
1836                                 ((current.sec==timeout.sec) && (current.usec>timeout.usec))) {
1837
1838                                 dep_index = biod_reqp[biod_index].dep_tab_index;
1839                                 proc = dep_tab[dep_index].proc;
1840                                 op_ptr = &Ops[proc];
1841                                 op_ptr->results.timeout_calls++;
1842                                 Ops[TOTAL].results.timeout_calls++;
1843
1844                                 finish_request (biod_index, dep_index, NFS3ERR_RFS_TIMEOUT);
1845
1846                                 if (is_create_op(proc)) {
1847                                         dep_tab[dep_index].flag = DEP_FLAG_CANDIDATE;
1848                                         printf ("resend dep_tab[%d], disk_index %d\n", dep_index, dep_tab[dep_index].disk_index);
1849                                 }
1850                                 //RFS_ASSERT (!is_create_op(proc));
1851
1852                                 //printf ("timeout request: biod_reqp[%d].start %d:%d timeout %d:%d current %d:%d\n", biod_index, biod_reqp[biod_index].start.sec, biod_reqp[biod_index].start.usec, timeout.sec, timeout.usec, current.sec, current.usec);
1853                         }
1854                 }
1855         }
1856 }
1857
1858 /* Allocate a biod_req entry to send and receive request dep_tab[dep_index]
1859  * build the cross reference between dep_tab entry and biod_req entry
1860  */
1861 struct biod_req * get_biod_req(int dep_index) /* index into dep_tab */
1862 {
1863         static int biod_index = 0;
1864         int i;
1865         for (i=0; i<max_biod_reqs; i++, biod_index = (biod_index+1)%max_biod_reqs) {
1866                 if (!biod_reqp[biod_index].in_use) {
1867                         biod_reqp[biod_index].in_use = 1;
1868                         biod_reqp[biod_index].dep_tab_index = dep_index;
1869                         dep_tab[dep_index].biod_req_index = biod_index;
1870                 num_out_reqs++;
1871                         return &(biod_reqp[biod_index]);
1872                 }
1873         }
1874         return NULL;
1875 }
1876
1877 /* Return index into biod_reqp
1878  * return -1 upon failure 
1879  */
1880 int lookup_biod_req (int xid)
1881 {
1882         static int biod_index = 0;
1883         int i;
1884         for (i=0; i<max_biod_reqs; i++, biod_index = (biod_index+1)%max_biod_reqs) {
1885                 /* give a NULL as timeout pointer may cause indefinitely block */
1886                 if (biod_reqp[biod_index].xid == xid) {
1887                         return biod_index;
1888                 }
1889         }
1890         return -1;
1891 }
1892
1893 extern struct ladtime test_start;
1894 void init_time_offset(void)
1895 {
1896         struct ladtime tmp1;
1897         struct ladtime tmp2;
1898
1899         test_start.sec = 0;
1900         test_start.usec = 0;
1901         sfs_gettime (&tmp1);            /* called at initial time: tmp1 = play_starttime */
1902 #ifdef SPEED_UP
1903         DIVTIME (tmp1, PLAY_SCALE) /* tmp1 = play_starttime / SCALE */
1904 #endif
1905 #ifdef SLOW_DOWN
1906         MULTIME (tmp1, PLAY_SCALE) /* tmp1 = play_starttime * SCALE */
1907 #endif
1908
1909         tmp2 = trace_starttime; /* tmp2 = trace_starttime */
1910         SUBTIME (tmp2, tmp1);   /* tmp2 = trace_starttime - play_starttime *|/ SCALE */
1911         time_offset = tmp2;             /* time_offset = trace_starttime - play_starttime *|/ SCALE */ 
1912 }
1913
1914 /* initialize timestamp and proc field of dep_tab entry */
1915 void init_dep_tab_entry (int dep_index)
1916 {
1917         char * line;
1918         int version;
1919         int nfsproc;
1920         int msgid;
1921
1922         //line = get_line_by_disk_index (dep_tab[dep_index].disk_index);
1923         line = dep_tab[dep_index].line;
1924         sscanf (line, "%d.%d", &(dep_tab[dep_index].timestamp.tv_sec), &(dep_tab[dep_index].timestamp.tv_usec));
1925         sscanf (&line[39], "%x %x", &msgid, &nfsproc);
1926         if (line[TRACE_VERSION_POS]=='2') {
1927                 dep_tab[dep_index].proc = nfs2proc_to_rfsproc[nfsproc];
1928                 RFS_ASSERT (nfsproc <18);
1929         } else {
1930                 /* This is for debug purpose */
1931                 if (line[TRACE_VERSION_POS] !='3') {
1932                         fprintf(stderr, "line[TRACE_VERSION_POS] %c line %s\n", line[TRACE_VERSION_POS], line);
1933                         line = get_line_by_disk_index (dep_tab[dep_index].disk_index-1);
1934                         if (!line)
1935                                 line = get_line_by_disk_index (dep_tab[dep_index].disk_index-2);
1936                         RFS_ASSERT (line);
1937                         fprintf(stderr, "previousline %s\n", line);
1938                 }
1939                 RFS_ASSERT (line[TRACE_VERSION_POS] =='3');
1940                 if (nfsproc >= NFS3_PROCEDURE_COUNT) {
1941                         fprintf(stderr, "proc %d line %s\n", nfsproc, line);
1942                         
1943                 }
1944                 RFS_ASSERT (nfsproc <NFS3_PROCEDURE_COUNT);
1945                 dep_tab[dep_index].proc = nfs3proc_to_rfsproc[nfsproc];
1946         }
1947         RFS_ASSERT (dep_tab[dep_index].proc >= 0 && dep_tab[dep_index].proc < NOPS);
1948         dep_tab[dep_index].flag = DEP_FLAG_INIT;
1949 #ifndef REDUCE_MEMORY_TRACE_SIZE
1950         dep_tab[dep_index].reply_line = find_reply_line (line, dep_tab[dep_index].disk_index);
1951         if (dep_tab[dep_index].reply_line == NULL) {
1952                 //printf ("disk[%d] can not find the reply line, assume trace_status OK\n", dep_tab[dep_index].disk_index);
1953                 dep_tab[dep_index].trace_status = NFS3ERR_RFS_MISS;
1954                 missing_reply_num ++;
1955         } else 
1956                 dep_tab[dep_index].trace_status = find_reply_status (dep_tab[dep_index].reply_line);
1957 #endif
1958 }
1959
1960 void adjust_play_window (int flag, int * poll_timeout)
1961 {
1962         struct ladtime max_window_time;
1963         static struct ladtime max_poll_time = {0, 2000, 0};
1964         struct ladtime t;
1965         int i;
1966         char * line;
1967         cyclic_index_t old_dep_window_index = dep_window_index;
1968
1969 #ifdef notdef
1970         printf ("^^^^^^^^^^^^^^^ adjust_play_window, begin\n");
1971         CYCLIC_PRINT (dep_tab_index);
1972         printf ("dep_tab[%d].memory_index %d\n", dep_tab_index.tail, dep_tab[dep_tab_index.tail].memory_index);
1973         CYCLIC_PRINT (dep_window_index);
1974         CYCLIC_PRINT (memory_trace_index);
1975         printf ("                adjust_play_window, begin\n");
1976 #endif
1977
1978         while ((!CYCLIC_EMPTY(dep_window_index)) && (dep_tab[dep_window_index.tail].flag == DEP_FLAG_DONE)) {
1979 #ifdef notdef
1980                 //CYCLIC_PRINT (memory_trace_index);
1981                 //printf("MOVE_TAIL_TO memory_index %d\n", dep_tab[dep_tab_index.tail].memory_index);
1982                 RFS_ASSERT (!CYCLIC_EMPTY(memory_trace_index)); 
1983                 RFS_ASSERT (CYCLIC_LESS (memory_trace_index, dep_tab[dep_tab_index.tail].memory_index, memory_trace_index.head));
1984                 printf("%d is done\n", dep_window_index.tail);
1985 #endif
1986                 CYCLIC_MOVE_TAIL(dep_tab_index);
1987                 CYCLIC_MOVE_TAIL(dep_window_index);
1988
1989 #ifdef notdef
1990                 CYCLIC_PRINT (dep_tab_index);
1991                 CYCLIC_PRINT (dep_window_index);
1992
1993                 if (! (dep_tab_index.tail == dep_window_index.tail)) {
1994                         CYCLIC_PRINT(dep_tab_index);
1995                         CYCLIC_PRINT(dep_window_index);
1996                 };
1997                 RFS_ASSERT ( dep_tab_index.tail == dep_window_index.tail);
1998 #endif
1999
2000                 if (!CYCLIC_EMPTY(dep_tab_index)) {
2001 #ifdef notdef
2002                         RFS_ASSERT (!CYCLIC_EMPTY(memory_trace_index)); 
2003                         if (!(CYCLIC_LESS (memory_trace_index, dep_tab[dep_tab_index.tail].memory_index, memory_trace_index.head))) {
2004                                 CYCLIC_PRINT(memory_trace_index);
2005                                 CYCLIC_PRINT(dep_tab_index);
2006                                 printf("dep_tab[head-1].memory_index, %d [tail].memory_index %d\n", 
2007                                         dep_tab[CYCLIC_MINUS(dep_tab_index.head,1,dep_tab_index.size)].memory_index,
2008                                         dep_tab[dep_tab_index.tail].memory_index);
2009                         }
2010                         RFS_ASSERT (CYCLIC_LESS (memory_trace_index, dep_tab[dep_tab_index.tail].memory_index, memory_trace_index.head));
2011 #endif
2012                         CYCLIC_SET_TAIL_TO(&memory_trace_index, dep_tab[dep_tab_index.tail].memory_index);
2013                         //printf ("set memory_trace_index to %d=%d, dep_tab_index.tail %d\n", memory_trace_index.tail, dep_tab[dep_tab_index.tail].memory_index, dep_tab_index.tail);
2014                 } else {
2015                 //      CYCLIC_MOVE_TAIL (memory_trace_index);
2016                 }
2017         }
2018
2019         while (CYCLIC_EMPTY(dep_tab_index)) {
2020                 
2021                 if (disk_io_status == TRACE_FILE_END) 
2022                         return;
2023                 else {
2024                         //printf ("************** ADJUST_PLAY_WINDOW sleep 1 s\n"); 
2025                         //print_cyclic_buffers();
2026                         pthread_yield();
2027                         //usleep (1000);
2028                 }
2029         }
2030
2031         /* max_trace_window_time = current *|/ SCALE + trace_starttime */
2032         sfs_gettime (&current);
2033
2034 #ifdef TIME_PLAY
2035 #ifdef SPEED_UP
2036         MULTIME (current, PLAY_SCALE);
2037 #endif
2038 #ifdef SLOW_DOWN
2039         DIVTIME (current, PLAY_SCALE);
2040 #endif
2041         ADDTIME (current, trace_starttime);
2042         max_window_time = current;
2043
2044         /* Right now it is not clear how to deal with the situation where MAX_PLAY_WINDOW is reached */
2045         if (CYCLIC_NUM(dep_window_index) == MAX_PLAY_WINDOW) {
2046                 //printf ("can not catch up the speed, dep_tab_size %d dep_window_max %d reach min_dep_index %d+MAX_PLAY_WINDOW\n", dep_tab_size, dep_window_max, min_dep_index);
2047                 //printf (".");
2048                 can_not_catch_speed_num ++;
2049         }
2050         //RFS_ASSERT (dep_window_max < min_dep_index+MAX_PLAY_WINDOW);
2051 #else
2052         ADDTIME (current, trace_starttime);
2053         max_window_time = current;
2054         while ((CYCLIC_NUM(dep_window_index) < MAX_PLAY_WINDOW) &&
2055                    (CYCLIC_NUM(dep_window_index) < CYCLIC_NUM(dep_tab_index)) ) {
2056                 CYCLIC_MOVE_HEAD(dep_window_index);
2057         }
2058 #endif
2059
2060         if (flag == BUSY)
2061                 *poll_timeout = 0;
2062         else if (CYCLIC_NUM(dep_window_index)==CYCLIC_NUM(dep_tab_index)) {
2063                 *poll_timeout = 1000000;        /* poll_timeout set to 1 second for the last request */
2064         } else {
2065 #ifdef TIME_PLAY
2066                 struct ladtime tmp;
2067                 struct ladtime tmp1;
2068                 tmp.sec = dep_tab[dep_window_index.head].timestamp.tv_sec;
2069                 tmp.usec = dep_tab[dep_window_index.head].timestamp.tv_usec;
2070                 if (adjust_play_window_debug>=2)
2071                         printf ("dep_tab[dep_window_index.head %d].timestamp %d:%d, max_window_time %d:%d\n",
2072                                 dep_window_index.head, tmp.sec, tmp.usec, max_window_time.sec, max_window_time.usec);
2073
2074                 SUBTIME (tmp, max_window_time);
2075 #ifdef SPEED_UP
2076                 DIVTIME (tmp, PLAY_SCALE);
2077 #endif
2078 #ifdef SLOW_DOWN
2079                 MULTIME (tmp, PLAY_SCALE);
2080 #endif
2081 /*
2082                 tmp1 = tmp;
2083
2084                 if (tmp.sec > max_poll_time.sec) {
2085
2086                         if (rfs_debug) 
2087                                 printf ("dep_tab[%d].timestamp %d:%d, max_window_time %d:%d\n",
2088                                 dep_window_max, dep_tab[dep_window_max].timestamp.tv_sec, dep_tab[dep_window_max].timestamp.tv_usec, max_window_time.sec, max_window_time.usec);
2089                         printf ("skip %d seconds\n", tmp.sec-max_poll_time.sec);
2090                         SUBTIME (tmp, max_poll_time);
2091                         tmp.usec = 0;
2092                         skip_sec += tmp.sec;
2093                         SUBTIME (test_start, tmp);
2094                         tmp = max_poll_time;
2095                 }
2096 */
2097
2098                 //RFS_ASSERT ((tmp.sec < 1000));
2099                 if (tmp.sec > 1000)
2100                         tmp.sec = 1000;
2101                 if ((tmp.sec ==0) && (tmp.usec==0)) {
2102                         *poll_timeout = 0;
2103                 } else
2104                         *poll_timeout = tmp.sec*1000000+tmp.usec;
2105 #else 
2106                 /*
2107                 struct ladtime tmp;
2108                 struct ladtime tmp1;
2109                 tmp.sec = dep_tab[dep_window_max].timestamp.tv_sec;
2110                 tmp.usec = dep_tab[dep_window_max].timestamp.tv_usec;
2111                 tmp1.sec = dep_tab[dep_window_max-1].timestamp.tv_sec;
2112                 tmp1.usec = dep_tab[dep_window_max-1].timestamp.tv_usec;
2113                 SUBTIME (tmp, tmp1);
2114                 RFS_ASSERT ((tmp.sec < 1000));
2115                 RFS_ASSERT ((tmp.sec>0) || ((tmp.sec==0) && (tmp.usec>0)));
2116                 *poll_timeout = tmp.sec*1000000+tmp.usec;
2117                 */
2118
2119                 *poll_timeout = 100000;
2120 #endif
2121         }       
2122         if (rfs_debug)
2123                 printf ("adjust_play_window: flag %d min %d -> %d, max %d -> %d poll_timeout %d \n", 
2124                 flag, old_dep_window_index.tail, dep_window_index.tail, old_dep_window_index.head,
2125                 dep_window_index.head, *poll_timeout);
2126
2127 #ifdef notdef
2128         printf ("^^^^^^^^^^^^^^^ adjust_play_window, end\n");
2129         CYCLIC_PRINT (dep_tab_index);
2130         printf ("dep_tab[%d].memory_index %d\n", dep_tab_index.tail, dep_tab[dep_tab_index.tail].memory_index);
2131         CYCLIC_PRINT (dep_window_index);
2132         CYCLIC_PRINT (memory_trace_index);
2133         printf ("        adjust_play_window, end\n\n");
2134 #endif
2135         //CYCLIC_ASSERT(4);
2136 }
2137
2138 /* poll for usecs and receive, after receive one reply,
2139  * return index in biod_reqp of the corresponding request
2140  */
2141 int poll_and_get_reply (int usecs)
2142 {
2143         int biod_index = -1;
2144         int xid;
2145         int error;
2146         struct timeval zero_time = {0, 0}; /* Immediately return */
2147
2148         do {
2149                 error = biod_poll_wait (NFS_client, usecs);
2150                 switch (error) {
2151                 case -1:
2152                         if (errno == EINTR) {
2153                                 error = 1;
2154                                 continue;
2155                         }
2156                         if (rfs_debug) {
2157                                 (void) fprintf(stderr, "biod_poll_wait error\n");
2158                                 perror ("");
2159                             (void) fflush(stderr);
2160                         }
2161                         break;
2162                 case 0:
2163                         break;
2164                 default:
2165 #ifdef UDP
2166                         error = get_areply_udp (NFS_client, &xid, &zero_time);
2167                         // RFS_ASSERT (error!= RPC_TIMEOUT);    /* we have polled and know there is data */
2168                         // RFS_ASSERT (error!= RPC_CANTRECV);
2169                         RFS_ASSERT (error == RPC_SUCCESS);
2170
2171                         biod_index = lookup_biod_req (xid);
2172                         sfs_gettime (&(biod_reqp[biod_index].stop));
2173 #else
2174                         RFS_ASSERT (0);
2175 #endif
2176                 }
2177         } while (0);
2178         return biod_index;
2179 }
2180
2181 void print_result(void)
2182 {
2183         int i, j;
2184         struct ladtime t;
2185         int dep_index;
2186         int avg_msecs;
2187         unsigned long long tmp;
2188         int avg_usecs;
2189
2190     if (DEBUG_CHILD_GENERAL) {
2191                 (void) fprintf(stdout, "trace play result:\n");
2192                 (void) fprintf(stdout, "\t    percentage good_cnt bad_cnt timeout_cnt\telapsed time\t\t\taverage time\n");
2193                 for (i=0; i<NOPS+1; i++) {
2194                         if (Ops[i].results.good_calls==0) {
2195                                 avg_msecs = 0;
2196                                 avg_usecs = 0;
2197                         } else {
2198                                 tmp = Ops[i].results.time.sec*1000000 + Ops[i].results.time.usec;
2199                                 avg_msecs = 0;
2200                                 avg_usecs = tmp/Ops[i].results.good_calls;
2201 /*
2202                                 avg_msecs = (Ops[i].results.time.sec*1000 + Ops[i].results.time.usec/1000)/Ops[i].results.good_calls;
2203                                 avg_usecs = (Ops[i].results.time.usec%1000)/Ops[i].results.good_calls;
2204 */
2205                         }
2206                         (void) fprintf(stdout,  "%11s\t%4.1f\t%4d\t%4d\t%4d\t\tsec %8d usec %8d \tusec %8d\n", 
2207                                 Ops[i].name, 
2208                                 (float)(100*Ops[i].results.good_calls)/(float)Ops[TOTAL].results.good_calls, 
2209                                 Ops[i].results.good_calls, Ops[i].results.bad_calls, Ops[i].results.timeout_calls,
2210                                 Ops[i].results.time.sec, Ops[i].results.time.usec, avg_msecs*1000+avg_usecs);
2211                 }
2212                 (void) fflush (stdout);
2213     }
2214
2215 #if     0       /* commented out by G. Jason Peng */
2216         RFS_ASSERT (read_data_owe_GB==0);
2217         printf("read_data_total %d GB and %d bytes, owe %d GB and %d bytes, %d percent, adjusted %d times \n",read_data_total_GB, read_data_total, read_data_owe_GB, read_data_owe, (read_data_owe)/(read_data_total/100), read_data_adjust_times);
2218         printf("write_data_total %d GB and %d bytes, owe %d GB and %d bytes, %d percent, adjusted %d times \n",write_data_total_GB, write_data_total, write_data_owe_GB, write_data_owe, (write_data_owe)/(write_data_total/100), write_data_adjust_times);
2219         printf("poll_timeout_0_num %d poll_timeout_pos_num %d\n", poll_timeout_0_num, poll_timeout_pos_num);
2220         printf("failed_create_command_num_in_original_trace %d\nfailed_other_command_num_in_original_trace %d\nskipped_readlink_command_num %d\nskipped_custom_command_num %d\nfh_path_map_err_num %d\nskipped_fsstat_command_num %d\nmissing_reply_num %d\nrename_rmdir_noent_reply_num %d\nrmdir_not_empty_reply_num %d\nloose_access_control_reply_num %d\nlookup_err_due_to_rename %d\nlookup_err_due_to_parallel_remove %d\nlookup_eaccess_enoent_mismatch %d\nread_io_err_num %d\nstale_fhandle_err_num %d abnormal_EEXIST_num %d abnormal_ENOENT_num %d proper_reply_num %d run_stage_proper_reply_num %d\n", 
2221                         failed_create_command_num,
2222                         failed_other_command_num,
2223                         skipped_readlink_command_num, 
2224                         skipped_custom_command_num,
2225                         fh_path_map_err_num, 
2226                         skipped_fsstat_command_num, 
2227                         missing_reply_num, 
2228                         rename_rmdir_noent_reply_num, 
2229                         rmdir_not_empty_reply_num, 
2230                         loose_access_control_reply_num, 
2231                         lookup_err_due_to_rename_num, 
2232                         lookup_err_due_to_parallel_remove_num,
2233                         lookup_eaccess_enoent_mismatch_num, 
2234                         read_io_err_num, 
2235                         stale_fhandle_err_num,
2236                         abnormal_EEXIST_num,
2237                         abnormal_ENOENT_num,
2238                         proper_reply_num, run_stage_proper_reply_num);
2239 #endif
2240
2241     clnt_destroy(NFS_client);
2242     biod_term();
2243
2244 //  print_dump(Client_num, Child_num);
2245
2246
2247 /*
2248  * allocate and initialize client handles
2249  */
2250 static int
2251 init_rpc(void)
2252 {
2253         int dummy = 0;
2254
2255     /*
2256      * Set up the client handles.  We get them all before trying one
2257      * out to insure that the client handle for LOOKUP class is allocated
2258      * before calling op_getattr().
2259      */
2260     if (DEBUG_CHILD_GENERAL) {
2261         (void) fprintf(stderr, "%s: set up client handle\n", sfs_Myname);
2262     }
2263
2264     NFS_client = lad_clnt_create(Tcp? 1: 0, Server_hostent,
2265                                         (uint32_t) NFS_PROGRAM,
2266                                         (uint32_t) nfs_version,
2267                                         RPC_ANYSOCK, &Nfs_timers[0]);
2268                 
2269     if (NFS_client  == ((CLIENT *) NULL)) {
2270         return(-1);
2271     }
2272
2273     /*
2274      * create credentials using the REAL uid
2275      */
2276     NFS_client->cl_auth = authunix_create(lad_hostname, (int)Real_uid,
2277                                       (int)Cur_gid, 0, NULL);
2278
2279
2280         if (biod_init(dummy, dummy) == -1) {
2281                     return(-1);
2282         }
2283
2284     return(0);
2285 } /* init_rpc */
2286
2287 void
2288 init_counters(void)
2289 {
2290     uint_t i;
2291     uint_t start_msec;
2292
2293     /* Ready to go - initialize operation counters */
2294     for (i = 0; i < NOPS + 1; i++) {
2295         Ops[i].req_cnt = 0;
2296         Ops[i].results.good_calls = 0;
2297         Ops[i].results.bad_calls = 0;
2298         Ops[i].results.timeout_calls = 0;       // RFS
2299         Ops[i].results.fast_calls = 0;
2300         Ops[i].results.time.sec = 0;
2301         Ops[i].results.time.usec = 0;
2302         Ops[i].results.msec2 = 0;
2303     }
2304
2305     /* initialize timers and period variables */
2306     sfs_gettime(&Starttime);
2307     Cur_time = Starttime;
2308     start_msec = (Starttime.sec * 1000) + (Starttime.usec / 1000);
2309     Previous_chkpnt_msec = start_msec;
2310     Calls_this_period = 0;
2311     Reqs_this_period = 0;
2312     Sleep_msec_this_period = 0;
2313     Calls_this_test = 0;
2314     Reqs_this_test = 0;
2315     Sleep_msec_this_test = 0;
2316 }
2317
2318 static char *
2319 nfs3_strerror(int status)
2320 {
2321     static char str[40];
2322     switch (status) {
2323         case NFS3_OK:
2324             (void) strcpy(str, "no error");
2325             break;
2326         case NFS3ERR_PERM:
2327             (void) strcpy(str, "Not owner");
2328             break;
2329         case NFS3ERR_NOENT:
2330             (void) strcpy(str, "No such file or directory");
2331             break;
2332         case NFS3ERR_IO:
2333             (void) strcpy(str, "I/O error");
2334             break;
2335         case NFS3ERR_NXIO:
2336             (void) strcpy(str, "No such device or address");
2337             break;
2338         case NFS3ERR_ACCES:
2339             (void) strcpy(str, "Permission denied");
2340             break;
2341         case NFS3ERR_EXIST:
2342             (void) strcpy(str, "File exists");
2343             break;
2344         case NFS3ERR_XDEV:
2345             (void) strcpy(str, "Cross-device link");
2346             break;
2347         case NFS3ERR_NODEV:
2348             (void) strcpy(str, "No such device");
2349             break;
2350         case NFS3ERR_NOTDIR:
2351             (void) strcpy(str, "Not a directory");
2352             break;
2353         case NFS3ERR_ISDIR:
2354             (void) strcpy(str, "Is a directory");
2355             break;
2356         case NFS3ERR_INVAL:
2357             (void) strcpy(str, "Invalid argument");
2358             break;
2359         case NFS3ERR_FBIG:
2360             (void) strcpy(str, "File too large");
2361             break;
2362         case NFS3ERR_NOSPC:
2363             (void) strcpy(str, "No space left on device");
2364             break;
2365         case NFS3ERR_ROFS:
2366             (void) strcpy(str, "Read-only file system");
2367             break;
2368         case NFS3ERR_MLINK:
2369             (void) strcpy(str, "Too many links");
2370             break;
2371         case NFS3ERR_NAMETOOLONG:
2372             (void) strcpy(str, "File name too long");
2373             break;
2374         case NFS3ERR_NOTEMPTY:
2375             (void) strcpy(str, "Directory not empty");
2376             break;
2377         case NFS3ERR_DQUOT:
2378             (void) strcpy(str, "Disc quota exceeded");
2379             break;
2380         case NFS3ERR_STALE:
2381             (void) strcpy(str, "Stale NFS file handle");
2382             break;
2383         case NFS3ERR_REMOTE:
2384             (void) strcpy(str, "Object is remote");
2385             break;
2386         case NFS3ERR_BADHANDLE:
2387             (void) strcpy(str, "Bad file handle");
2388             break;
2389         case NFS3ERR_NOT_SYNC:
2390             (void) strcpy(str, "Not sync write");
2391             break;
2392         case NFS3ERR_BAD_COOKIE:
2393             (void) strcpy(str, "Bad cookie");
2394             break;
2395         case NFS3ERR_NOTSUPP:
2396             (void) strcpy(str, "Operation not supported");
2397             break;
2398         case NFS3ERR_TOOSMALL:
2399             (void) strcpy(str, "Value too small");
2400             break;
2401         case NFS3ERR_SERVERFAULT:
2402             (void) strcpy(str, "Server fault");
2403             break;
2404         case NFS3ERR_BADTYPE:
2405             (void) strcpy(str, "Bad type");
2406             break;
2407         case NFS3ERR_JUKEBOX:
2408             (void) strcpy(str, "Jukebox");
2409             break;
2410         case NFS3ERR_RFS_TIMEOUT:
2411                 (void) strcpy(str, "Timeout");
2412                 break;
2413         default:
2414             (void) sprintf(str, "Unknown status %d", status);
2415             break;
2416     }
2417     return (str);
2418 }
2419
2420 /*
2421  * Check the gettimeofday() resolution. If the resolution
2422  * is in chunks bigger than SFS_MIN_RES then the client
2423  * does not have a usable resolution for running the 
2424  * benchmark.
2425  */
2426 static void
2427 check_clock(void)
2428 {
2429         double time_res;
2430         char tmp_hostname[HOSTNAME_LEN];
2431
2432         time_res = get_resolution();
2433         getmyhostname(tmp_hostname, HOSTNAME_LEN);
2434         if( time_res > (double)SFS_MIN_RES )
2435         {
2436                 (void) fprintf(stderr,
2437                 "\n%s: Clock resolution too poor to obtain valid results.\n",
2438                         tmp_hostname);
2439                 (void) fprintf(stderr,
2440                 "%s: Clock resolution %f Micro seconds.\n", tmp_hostname,
2441                         time_res);
2442                 exit(175);
2443         }
2444         else
2445         {
2446                 (void) fprintf(stderr,
2447                 "\n%s: Good clock resolution [ %f ] Micro seconds.\n", 
2448                         tmp_hostname, time_res);
2449         }
2450 }
2451
2452 /*
2453  * Lifted code from Iozone with permission from author. (Don Capps)
2454  * Returns the resolution of the gettimeofday() function 
2455  * in microseconds.
2456  */
2457 static double
2458 get_resolution(void)
2459 {
2460         double starttime, finishtime, besttime;
2461         long  j,delay;
2462         int k;
2463
2464         finishtime=time_so_far1(); /* Warm up the instruction cache */
2465         starttime=time_so_far1();  /* Warm up the instruction cache */
2466         delay=j=0;                 /* Warm up the data cache */
2467         for(k=0;k<10;k++)
2468         {
2469                 while(1)
2470                 {
2471                         starttime=time_so_far1();
2472                         for(j=0;j< delay;j++)
2473                         ;
2474                         finishtime=time_so_far1();
2475                         if(starttime==finishtime)
2476                                 delay++;
2477                         else
2478                         {
2479                                 if(k==0)
2480                                         besttime=(finishtime-starttime);
2481                                 if((finishtime-starttime) < besttime)
2482                                         besttime=(finishtime-starttime);
2483                                 break;
2484                         }
2485                 }
2486         }
2487          return(besttime);
2488 }
2489
2490 /*
2491  * Lifted code from Iozone with permission from author. (Don Capps)
2492  * Returns current result of gettimeofday() in microseconds.
2493  */
2494 /************************************************************************/
2495 /* Time measurement routines.                                           */
2496 /* Return time in microseconds                                          */
2497 /************************************************************************/
2498
2499 static double
2500 time_so_far1(void)
2501 {
2502         /* For Windows the time_of_day() is useless. It increments in 55 */
2503         /* milli second increments. By using the Win32api one can get */
2504         /* access to the high performance measurement interfaces. */
2505         /* With this one can get back into the 8 to 9 microsecond */
2506         /* resolution.  */
2507 #ifdef Windows
2508         LARGE_INTEGER freq,counter;
2509         double wintime;
2510         double bigcounter;
2511
2512         QueryPerformanceFrequency(&freq);
2513         QueryPerformanceCounter(&counter);
2514         bigcounter=(double)counter.HighPart *(double)0xffffffff +
2515                 (double)counter.LowPart;
2516         wintime = (double)(bigcounter/(double)freq.LowPart);
2517         return((double)wintime*1000000.0);
2518 #else
2519 #if defined (OSFV4) || defined(OSFV3) || defined(OSFV5)
2520   struct timespec gp;
2521
2522   if (getclock(TIMEOFDAY, (struct timespec *) &gp) == -1)
2523     perror("getclock");
2524   return (( (double) (gp.tv_sec)*1000000.0) +
2525     ( ((float)(gp.tv_nsec)) * 0.001 ));
2526 #else
2527   struct timeval tp;
2528
2529   if (gettimeofday(&tp, (struct timezone *) NULL) == -1)
2530     perror("gettimeofday");
2531   return ((double) (tp.tv_sec)*1000000.0) +
2532     (((double) tp.tv_usec) );
2533 #endif
2534 #endif
2535 }
2536
2537 static void
2538 usage(void)
2539 {
2540         fprintf(stderr, "trace play usage");
2541 }
2542 extern void init_file_system (void)
2543 {
2544         return;
2545 }
2546
2547 void show_fhandle (nfs_fh3 * fhp)
2548 {
2549         struct knfs_fh * kfhp = (struct knfs_fh *)fhp;
2550
2551         int dev;
2552
2553         if (quiet_flag)
2554                 return;
2555                 
2556         RFS_ASSERT (kfhp->fh_version == 1);
2557         RFS_ASSERT (kfhp->fh_fsid_type == 0);
2558         RFS_ASSERT (kfhp->fh_auth_type == 0);
2559
2560         dev = ntohs(kfhp->fh_dev_major);
2561         dev = dev<<8;
2562         dev = dev + ntohs(kfhp->fh_dev_minor);
2563
2564         /* kfhp->fh_dev_ino hold the inode number of export point of the mounted
2565          * file system. For example, if /tmp/t1 is exported, /tmp/t1/t2 is mounted,
2566          * then fh_dev_ino hold the inode number of t1, not t2
2567          */
2568
2569         switch (kfhp->fh_fileid_type) {
2570                 case 0:
2571                         printf("fh:type 0 root dev 0x%x dev_ino %d\n", dev, kfhp->fh_dev_ino); 
2572                         break;
2573                 case 1:
2574                         printf("fh:type 1 %d %x dev %x dev_ino %x\n", 
2575                                 kfhp->fh_ino, kfhp->fh_generation, dev, kfhp->fh_dev_ino);
2576                         break;
2577                 case 2:
2578                         printf("fh:type2 %d %x dirino %d dev 0x%x dev_ino %x\n", 
2579                                 kfhp->fh_ino, kfhp->fh_generation, kfhp->fh_dirino, dev, kfhp->fh_dev_ino);
2580                         break;
2581                 default:
2582                         RFS_ASSERT (0);
2583         }
2584 }
2585
2586 nfs_fh3 zero_fhandle;
2587 int init_fh_map ()
2588 {
2589         memset (fh_map, 0, sizeof (fh_map));
2590         memset(fh_htable, 0, sizeof (fh_htable));
2591         memset (&zero_fhandle, 0, sizeof(nfs_fh3));
2592         printf ("SIZE of fh map %d KB\n", sizeof (fh_map)/1000);
2593         fh_i = 0;
2594 }
2595
2596 int add_fh (int map_flag, char * trace_fh, char * path, nfs_fh3 * play_fh)
2597 {
2598         char * old_trace_fh;
2599
2600         /* first lookup if the entry for fh is already in the table */
2601     struct generic_entry * p;
2602
2603     p = generic_lookup (trace_fh, TRACE_FH_SIZE, 0, fh_htable, FH_HTABLE_SIZE);
2604         if (p) {
2605                 RFS_ASSERT (fh_map[p->key3].flag = FH_MAP_FLAG_PARTIAL);
2606                 RFS_ASSERT (map_flag ==FH_MAP_FLAG_COMPLETE);
2607                 fh_map[p->key3].flag = map_flag;
2608                 //RFS_ASSERT (!memcmp(fh_map[p->key3].trace_fh, trace_fh, TRACE_FH_SIZE));
2609                 if (memcmp(fh_map[p->key3].trace_fh, trace_fh, TRACE_FH_SIZE)) {
2610                         int i;
2611                         printf ("fh_map[%d].trace_fh %s trace_fh %s", p->key3, fh_map[p->key3].trace_fh, trace_fh);
2612                         for (i=0; i<fh_i; i++) {
2613                                 int * p1 = (int *)&(fh_map[i].play_fh);
2614 #ifdef COMPRESS_TRACE_FH
2615                                 int * p = (int *)fh_map[i].trace_fh;
2616                                 printf("fh_map[%d].trace_fh %8x%8x%8x%8x%8x%8x%8x%8x path %s \nnew_filehandle %8x%8x%8x%8x%8x%8x%8x%8x\n",
2617                                  i, *p, *(p+1), *(p+2), *(p+3), *(p+4), *(p+5), *(p+6), *(p+7), fh_map[i].path,
2618                                  *p1, *(p1+1), *(p1+2), *(p1+3), *(p1+4), *(p1+5), *(p1+6), *(p1+7));
2619 #else
2620                                 printf("fh_map[%d].trace_fh %s path %s \nnew_filehandle %8x%8x%8x%8x%8x%8x%8x%8x\n",
2621                                  i, fh_map[i].trace_fh, fh_map[i].path,
2622                                  *p1, *(p1+1), *(p1+2), *(p1+3), *(p1+4), *(p1+5), *(p1+6), *(p1+7));
2623                         }
2624 #endif
2625                         RFS_ASSERT (0);
2626                 }
2627                 RFS_ASSERT (!strcmp(fh_map[p->key3].path, path));
2628                 /* It's possible that in fh-path-map, many trace_fh are corresponding to one path
2629                  * some of it may be the result of lookup after symlink, which is not handled
2630                  * properly as new created objects 
2631                  */
2632 #ifdef TAKE_CARE_SYMBOLIC_LINK
2633                 RFS_ASSERT (!memcmp(&fh_map[p->key3].play_fh, &zero_fhandle, sizeof(nfs_fh3)));
2634 #endif
2635                 memcpy (&fh_map[p->key3].play_fh, play_fh, sizeof (nfs_fh3));
2636                 if ((fh_map_debug==1)) // || (stage ==TRACE_PLAY_STAGE)) 
2637                         printf ("update the play_fh for trace_fh %s path %s \n", trace_fh, path);
2638                 return 0;
2639         }
2640
2641         fh_map[fh_i].flag = map_flag;
2642         fh_map[fh_i].lock = 0;
2643         strncpy(fh_map[fh_i].trace_fh, trace_fh, TRACE_FH_SIZE);
2644
2645         RFS_ASSERT (strlen(path) < MAX_PLAY_PATH_SIZE);
2646         strcpy (fh_map [fh_i].path, path);
2647         if (map_flag==FH_MAP_FLAG_COMPLETE)
2648                 memcpy (&fh_map[fh_i].play_fh, play_fh, sizeof(nfs_fh3));
2649         else 
2650                 memset (&fh_map[fh_i].play_fh, 0, sizeof(nfs_fh3));
2651
2652         if ((fh_map_debug==1)) { // || (stage ==TRACE_PLAY_STAGE)) {
2653                 printf ("insert trace_fh %s path %s play_fh:\n", trace_fh, path);
2654                 if (map_flag == FH_MAP_FLAG_COMPLETE) {
2655                         //show_fhandle(play_fh);
2656                 } else 
2657                         printf("null\n");
2658         }
2659
2660 /*
2661         if (map_flag == FH_MAP_FLAG_DISCARD)
2662                 printf ("insert flag %d trace_fh %s path %s play_fh:\n", map_flag, trace_fh, path);
2663 */
2664
2665     generic_insert(trace_fh, TRACE_FH_SIZE, fh_i, fh_htable, FH_HTABLE_SIZE);
2666         
2667         fh_i = (fh_i+1);
2668         RFS_ASSERT (fh_i < FH_MAP_SIZE);
2669
2670     return 0;
2671 };
2672
2673 inline fh_map_t * lookup_fh (char * trace_fh )
2674 {
2675     struct generic_entry * p;
2676     p = generic_lookup (trace_fh, TRACE_FH_SIZE, 0, fh_htable, FH_HTABLE_SIZE);
2677         if (fh_map_debug==1)
2678                 printf ("lookup trace_fh %s\n", trace_fh);
2679
2680     if (p) {
2681                 if (fh_map_debug==1) {
2682                         printf ("found: fh_i[%d] trace_fh %s path %s play_fh:\n", p->key3, fh_map[p->key3].trace_fh, fh_map[p->key3].path);
2683                         //show_fhandle(&fh_map[p->key3].play_fh);
2684                 }
2685                 RFS_ASSERT (!memcmp(fh_map[p->key3].trace_fh, trace_fh, TRACE_FH_SIZE));
2686         return (&(fh_map[p->key3]));
2687     } else {
2688                 //printf ("lookup_fh %s not found\n", trace_fh);
2689                 if (stage != READ_DEP_TAB_STAGE && (fh_map_debug==1)) {
2690                         printf ("lookup not found trace_fh %s\n", trace_fh);
2691                 }
2692         return NULL;
2693         }
2694         RFS_ASSERT (0);
2695 }
2696
2697 int delete_fh (char * trace_fh, int fh_map_index)
2698 {
2699     generic_delete (trace_fh, TRACE_FH_SIZE, fh_map_index, fh_htable, FH_HTABLE_SIZE);
2700     return 0;
2701 };
2702
2703 int lookup_init_filesystem (nfs_fh3 * parent, char * name, nfs_fh3 * result)
2704 {
2705     LOOKUP3args         args;
2706     LOOKUP3res          reply;          /* the reply */
2707     enum clnt_stat      rpc_stat;       /* result from RPC call */
2708     struct ladtime      start;
2709     struct ladtime      stop;
2710         static int i=0;
2711
2712     /* set up the arguments */
2713     (void) memcpy((char *) &args.what.dir, (char *) parent,
2714                                                         sizeof (nfs_fh3));
2715     args.what.name = name;
2716     (void) memset((char *) &reply.resok.object, '\0', sizeof (nfs_fh3));
2717
2718     /* make the call */
2719     sfs_gettime(&start);
2720     rpc_stat = clnt_call(NFS_client, NFSPROC3_LOOKUP,
2721                         xdr_LOOKUP3args, (char *) &args,
2722                         xdr_LOOKUP3res, (char *) &reply,
2723                         Nfs_timers[Init]);
2724     sfs_gettime(&stop);
2725
2726         if (rpc_stat !=RPC_SUCCESS) {
2727                 printf("rpc_stat %d\n", rpc_stat);
2728                 perror("");
2729         }
2730     RFS_ASSERT (rpc_stat == RPC_SUCCESS);
2731         (void) memcpy((char *) result, (char *) &reply.resok.object, sizeof (nfs_fh3));
2732         return (reply.status);
2733 }
2734
2735 void read_fh_map(char * fh_map_file)
2736 {
2737         FILE * fp;
2738         int i = 0;
2739         char buf[1024];
2740         char trace_fh[TRACE_FH_SIZE];
2741         char intbuf[9];
2742         char * trace_path;
2743         char * p;
2744         int map_flag;
2745 #define MAX_PATH_DEPTH 20
2746         nfs_fh3 parents[MAX_PATH_DEPTH];
2747         char * lookup_path_ptr[MAX_PATH_DEPTH];
2748         char lookup_path [MAX_PLAY_PATH_SIZE];
2749         int depth;
2750         int new_dir_flag = 0;
2751         int lineno = 0;
2752
2753         depth = 0;
2754         memset(lookup_path_ptr, 0, sizeof(lookup_path_ptr));
2755         memcpy(&parents[depth], &(Export_dir.fh_data->sfs_fh_un.f_fh3), sizeof(nfs_fh3));
2756         strcpy(lookup_path, "/");
2757         lookup_path_ptr[depth]=&lookup_path[0];
2758
2759         fp = fopen(fh_map_file, "r");
2760         if (!fp) {
2761                 printf ("can not opern %s\n", fh_map_file);
2762                 perror("open");
2763                 exit (0);
2764         }
2765         RFS_ASSERT (fp!=NULL);
2766         
2767         intbuf[8]=0;
2768
2769         memset(buf, 0, sizeof(buf));
2770         while (fgets(buf, 1024, fp)) {
2771                 lineno ++;
2772                 if (fh_i % 10000==0)
2773                         printf("%d fh_map entry read\n", fh_i);
2774
2775                 RFS_ASSERT (buf[strlen(buf)-1]=='\n');
2776                 buf[strlen(buf)-1]=0;
2777                 if (fh_map_debug) {
2778                         printf("%d fgets return %s\n", fh_i, buf);
2779                         printf("depth %d lookup_path %s\n", depth, lookup_path);
2780                 }
2781                 //for (i=0; i<=depth; i++) 
2782                         //printf("lookup_path_ptr[%d] %s ", i, lookup_path_ptr[i]);
2783                 //printf("\n");
2784 #ifdef COMPRESS_TRACE_FH 
2785                 for (i=0; i<TRACE_FH_SIZE/8; i++) {
2786                         strncpy(intbuf, buf+i*8, 8);
2787                         sscanf(intbuf, "%x", trace_fh+i*8); // maybe it should be 4, anyway we don't compress for now 
2788                 }
2789                 trace_path = buf+TRACE_FH_SIZE*2+1;             /* +1 the trace contains only initial file handle */
2790 #else
2791                 memcpy(trace_fh, buf, TRACE_FH_SIZE);
2792                 trace_path = buf + TRACE_FH_SIZE +1;
2793 #endif
2794 #ifdef TRACE_CONTAIN_LATER_FHANDLE
2795                 trace_path = +=2;       /* +3 if the trace contains both initial and later created file handle */
2796 #endif
2797
2798 #ifdef NO_DEPENDENCY_TABLE
2799                 if (!strncmp (trace_path, "DISCARD", 7) ||
2800                         !strncmp (trace_path, "LN", 2)                  ) {
2801                         map_flag = FH_MAP_FLAG_DISCARD;
2802                         add_fh (map_flag, buf, trace_path, 0);
2803                         continue;
2804                 }
2805 #endif
2806                 
2807                 p = trace_path+strlen(trace_path)-2;
2808                 while (*p!='/')
2809                         p--;
2810                 p++;
2811                 //RFS_ASSERT (p-trace_path<=strlen(lookup_path)+1);
2812                 //RFS_ASSERT (p>trace_path);
2813
2814                 if (strncmp(lookup_path, trace_path, p-trace_path)) {
2815                         printf("strncmp lookup_path %s trace_path %s for length %d\n", lookup_path, trace_path, p-trace_path);
2816                 }
2817                 RFS_ASSERT (!strncmp(lookup_path, trace_path, p-trace_path));
2818                 //while (strncmp(lookup_path, trace_path, p-trace_path)) {      /* one step deeper */
2819                 while (strlen(lookup_path)>p-trace_path && depth>0) {
2820                         //printf("depth--\n");
2821                         if (depth<=0) 
2822                                 printf ("lookup_path %s trace_path %s p-trace_path %d depth %d\n", lookup_path, trace_path, p-trace_path, depth);
2823                         RFS_ASSERT (depth>0);
2824                         *lookup_path_ptr[depth]=0;
2825                         lookup_path_ptr[depth]=0;
2826                         depth--;
2827                 }
2828                 RFS_ASSERT (strlen(lookup_path)==(p-trace_path) || (depth==0));
2829
2830
2831 #ifdef TRACE_CONTAIN_LATER_FHANDLE
2832                 if (buf[TRACE_FH_SIZE*2+1]=='Y') {
2833                         map_flag = FH_MAP_FLAG_COMPLETE;
2834                 } else {
2835                         map_flag = FH_MAP_FLAG_PARTIAL;
2836                         RFS_ASSERT (buf[TRACE_FH_SIZE*2+1]=='N');
2837                 }
2838 #else
2839                 map_flag = FH_MAP_FLAG_COMPLETE;
2840 #endif
2841                 if ((*(p+strlen(p)-1))=='/') {
2842                         *(p+strlen(p)-1)=0;
2843                         new_dir_flag = 1;
2844                 } else 
2845                         new_dir_flag = 0;
2846
2847                 if (map_flag == FH_MAP_FLAG_COMPLETE) {
2848                         int ret = lookup_init_filesystem (&parents[depth], p, &parents[depth+1]);               
2849                         if (ret!=NFS3_OK) {
2850                                 printf ("lineno %d %s\n", lineno, buf);
2851                         }
2852                         RFS_ASSERT (ret == NFS3_OK);
2853                         add_fh (map_flag, buf, trace_path, &parents[depth+1]);  
2854                 } else 
2855                         add_fh (map_flag, buf, trace_path, 0);
2856
2857                 if (new_dir_flag) {
2858                         /* the new fhandle is of a directory */
2859                         lookup_path_ptr[depth+1] = lookup_path+strlen(lookup_path);
2860                         strcat (lookup_path, p);
2861                         strcat (lookup_path, "/");
2862
2863                         //printf("depth++\n");
2864                         depth++;
2865                 }
2866
2867                 memset(buf, 0, sizeof(buf));
2868         }
2869                         
2870         if (fh_map_debug) {
2871                 for (i=0; i<fh_i; i++) {
2872                         int * p1 = (int *)&(fh_map[i].play_fh);
2873 #ifdef COMPRESS_TRACE_FH
2874                         int * p = (int *)fh_map[i].trace_fh;
2875                         printf("fh_map[%d].trace_fh %8x%8x%8x%8x%8x%8x%8x%8x path %s \nnew_filehandle %8x%8x%8x%8x%8x%8x%8x%8x\n",
2876                          i, *p, *(p+1), *(p+2), *(p+3), *(p+4), *(p+5), *(p+6), *(p+7), fh_map[i].path,
2877                          *p1, *(p1+1), *(p1+2), *(p1+3), *(p1+4), *(p1+5), *(p1+6), *(p1+7));
2878 #else
2879                         printf("fh_map[%d].trace_fh %s path %s \nnew_filehandle %8x%8x%8x%8x%8x%8x%8x%8x\n",
2880                          i, fh_map[i].trace_fh, fh_map[i].path,
2881                          *p1, *(p1+1), *(p1+2), *(p1+3), *(p1+4), *(p1+5), *(p1+6), *(p1+7));
2882                 }
2883 #endif
2884
2885                 fprintf(stderr, "total %d requests \n", i);
2886         }
2887 }
2888
2889 int f()
2890 {
2891         return 1;
2892 }
2893
2894 inline free_biod_req (int biod_index)
2895 {
2896         RFS_ASSERT (biod_reqp[biod_index].in_use == TRUE);
2897         biod_reqp[biod_index].in_use = FALSE;
2898         num_out_reqs --;
2899 }
2900
2901 inline void finish_request (int biod_index, int dep_index, int status)
2902 {
2903         /* the ending operation, same as when a request time out */
2904
2905         dep_tab[dep_index].stop = biod_reqp[biod_index].stop;   /* RFS: to dump data */
2906         free_biod_req (biod_index);
2907
2908         dep_tab[dep_index].status = status;
2909         if (event_order_index < EVENT_ORDER_SIZE)
2910                 event_order[event_order_index++] = -dep_tab[dep_index].disk_index;
2911
2912         dep_tab[dep_index].flag = DEP_FLAG_DONE;
2913         if (is_dir_op(dep_tab[dep_index].proc)) {
2914                 int j;
2915                 RFS_ASSERT (dep_tab[dep_index].fh->lock = 1);
2916                 dep_tab[dep_index].fh->lock = 0;
2917                 if (dep_tab[dep_index].proc==RENAME)
2918                         dep_tab[dep_index].fh_2->lock = 0;
2919                 j = dep_tab[dep_index].fh-fh_map;
2920                 if (dependency_debug) {
2921                         printf ("fh_map[%d] is UNlocked\n",j);
2922                         printf ("trace_fh %d path %s\n", dep_tab[dep_index].fh->trace_fh, dep_tab[dep_index].fh->path);
2923                         printf ("trace_fh %d path %s\n", fh_map[j].trace_fh, fh_map[j].path);
2924                 }
2925         }
2926 }
2927
2928 /* the request argument may have pointers pointing to buffers, e.g. the name in lookup, 
2929  * the target of symlink, the write data */
2930 char arg_res[MAX_ARG_RES_SIZE];
2931 int poll_timeout = 0;           /* timeout in usecs */
2932 char buf1 [MAX_BUF1_SIZE]; 
2933 char buf2 [MAX_BUF2_SIZE];
2934
2935 int execute_next_request ()
2936 {
2937         int dep_index;
2938         int proc;
2939         char * line;
2940         struct biod_req * reqp;
2941         sfs_op_type *op_ptr;            /* per operation info */
2942         struct ladtime call_timeout;
2943         static int last_print_time = -1;
2944
2945         if (num_out_reqs == max_biod_reqs) {
2946                 return -1;
2947         }
2948
2949         start_profile (&valid_get_nextop_profile);
2950         start_profile (&invalid_get_nextop_profile);
2951         dep_index = get_nextop();
2952         if (dep_index == -1) {
2953                 end_profile (&invalid_get_nextop_profile);
2954                 return dep_index;
2955         };
2956         end_profile (&valid_get_nextop_profile);
2957
2958         start_profile (&prepare_argument_profile);
2959         line = dep_tab[dep_index].line;
2960
2961         end_profile(&total_profile);
2962         if ((total_profile.in.tv_sec - last_print_time >= 10)) {
2963                 last_print_time = total_profile.in.tv_sec;
2964                 //fprintf (stderr, "time %d processing dep_tab[%d] disk_index %d num_out_reqs %d can_not_catch_speed_num %d PLAY_SCALE %d \n", total_profile.in.tv_sec, dep_index, dep_tab[dep_index].disk_index, num_out_reqs, can_not_catch_speed_num, PLAY_SCALE);
2965                 fprintf (stdout, "time %d processing dep_tab[%d] disk_index %d num_out_reqs %d can_not_catch_speed_num %d PLAY_SCALE %d \n", total_profile.in.tv_sec, dep_index, dep_tab[dep_index].disk_index, num_out_reqs, can_not_catch_speed_num, PLAY_SCALE);
2966 /*
2967                 CYCLIC_PRINT (dep_tab_index);
2968                 {
2969                         int tmp = CYCLIC_MINUS(dep_tab_index.head,1,dep_tab_index.size);
2970                         printf("dep_tab_index.head-1 %d disk_index %d tail %d disk_index %d\n", tmp, dep_tab[tmp].disk_index,
2971                         dep_tab_index.tail, dep_tab[dep_tab_index.tail].disk_index);
2972                 }
2973 */
2974 #ifdef TIME_PLAY
2975 #ifdef SPEED_UP
2976 /*
2977                 if (can_not_catch_speed_num < 2000) {
2978                         PLAY_SCALE ++;
2979                         printf ("set PLAY_SCALE to %d\n", PLAY_SCALE);
2980                 };
2981                 if (can_not_catch_speed_num > 50000) {
2982                         PLAY_SCALE /= 2;
2983                 } else {
2984                         if (can_not_catch_speed_num > 5000) {
2985                                 PLAY_SCALE -= 2;
2986                                 if (PLAY_SCALE < 1)
2987                                         PLAY_SCALE = 1;
2988                         }
2989                 }
2990 */
2991 #endif
2992                 if ((total_profile.in.tv_sec > 100)) {
2993                         can_not_catch_speed_num_total += can_not_catch_speed_num;
2994                 }
2995                 can_not_catch_speed_num = 0;
2996 #endif
2997         }
2998         if (rfs_debug)
2999                 printf ("processing dep_tab[%d] disk_index %d %s\n", dep_index, dep_tab[dep_index].disk_index, line);
3000
3001         proc = dep_tab[dep_index].proc;
3002         rfs_Ops[proc].setarg (dep_index, line, arg_res, buf1, buf2);
3003
3004         op_ptr = &Ops[proc];
3005         reqp = get_biod_req (dep_index);
3006         RFS_ASSERT (reqp);
3007
3008 #ifdef  notdef  /* place to set request timeout. G. Jason Peng */
3009         call_timeout.sec = 2; //Nfs_timers[op_ptr->call_class].tv_sec;
3010         call_timeout.usec = Nfs_timers[op_ptr->call_class].tv_usec;
3011 #else
3012         call_timeout.sec = 0;
3013         call_timeout.usec = 500000;
3014         //call_timeout.usec = 14000;
3015         //call_timeout.usec = 13000;
3016         //call_timeout.usec = 6000;
3017         //call_timeout.usec = 8000;
3018         //call_timeout.usec = 10000;
3019 #endif
3020
3021     /* make the call */
3022     sfs_gettime(&(reqp->start));
3023         end_profile (&prepare_argument_profile);
3024         start_profile (&biod_clnt_call_profile);
3025 #define REAL_PLAY
3026 #ifdef REAL_PLAY
3027     reqp->xid = biod_clnt_call(NFS_client, rfs_Ops[proc].nfsv3_proc, 
3028                                         rfs_Ops[proc].xdr_arg, arg_res);
3029 #else
3030
3031         reqp->xid = dep_index+1;        /* just fake a message id and let it expire */
3032 #endif
3033     RFS_ASSERT (reqp->xid != 0);
3034     reqp->timeout = reqp->start;
3035     ADDTIME (reqp->timeout, call_timeout);
3036     dep_tab[dep_index].flag = DEP_FLAG_SENT;
3037         if (event_order_index < EVENT_ORDER_SIZE)
3038                 event_order[event_order_index++] = dep_tab[dep_index].disk_index;
3039
3040         dep_tab[dep_index].start = reqp->start; /* RFS: to dump data */
3041         end_profile (&biod_clnt_call_profile);
3042 }
3043
3044 void check_reply (int proc, int biod_index, int dep_index, int status, char * errmsg, int trace_status)
3045 {
3046         if (((status!=trace_status)) && (status!=NFS3_OK) && (trace_status!=NFS3ERR_RFS_MISS)) {
3047                 if (!profile_debug)
3048                         printf ("receive problem reply, xid %x nfs_ret %d %s trace_status %d start %d:%d stop %d:%d command disk index %d\n", biod_reqp[biod_index].xid, status, errmsg, trace_status, biod_reqp[biod_index].start.sec, biod_reqp[biod_index].start.usec, biod_reqp[biod_index].stop.sec, biod_reqp[biod_index].stop.usec, dep_tab[dep_index].disk_index); 
3049 #ifndef TAKE_CARE_UNLOOKED_UP_NON_NEW_FILES
3050                 /* these files is not looked up and is not create/mkdir/symlink/link/mknod ed before they
3051                  * are refered by name through rename, remove
3052                  */
3053                 if ((proc==RENAME || proc==REMOVE) && (status==NFS3ERR_NOENT) && (trace_status ==0)) {
3054                         /* current initialization doesnot take care of rename source, if there is no
3055                          * create or lookup before that source, the source object will not exist when
3056                          * rename occurs
3057                          */
3058                         rename_rmdir_noent_reply_num++;
3059                 } else 
3060 #endif
3061 #ifndef TAKE_CARE_SYMBOLIC_LINK
3062                 if ((proc==LOOKUP) && (status==NFS3_OK) && (trace_status==NFS3ERR_NOENT)) {
3063                         /* in the original trace, first lookup return NOENT, then symlink is executed, then lookup return OK
3064                          * the initialization considers only the lookup return OK and created the file in the initialization
3065                          * so in trace play the first lookup return OK
3066                          */
3067                         RFS_ASSERT (1);
3068                 } else // if ((proc==SYMLINK) && (status == NFS3ERR_EXIST) && (trace_status == 0)) {
3069                                 /* trace_status could be EAGAIN */
3070                         if ((proc==SYMLINK) && (status == NFS3ERR_EXIST) ) {
3071                         /* due to similar reason as above, the initialization code initializes the symbolic link as a normal
3072                          * file already
3073                          */
3074                         RFS_ASSERT (1);
3075                 } else
3076 #endif
3077 #ifndef TAKE_CARE_NOEMPTY_RMDIR
3078                 /* the remove packet seems got lost in the trace capture, so replay can not finish */
3079                 if ((proc==RMDIR) && (status==NFS3ERR_NOTEMPTY)) {
3080                         RENAME3args             args;
3081                         RENAME3res              reply;          /* the reply */
3082                         RMDIR3args * rmdir_argp;
3083                         enum clnt_stat rpc_stat;        /* result from RPC call */
3084
3085                         rfs_Ops[proc].setarg (dep_index, dep_tab[dep_index].line, arg_res, buf1, buf2);
3086                         rmdir_argp = (RMDIR3args *)arg_res;
3087
3088                         memcpy(&args.from, &(rmdir_argp->object), sizeof (diropargs3));
3089                         memcpy(&args.to.dir, &(Export_dir.fh_data->sfs_fh_un.f_fh3), sizeof(nfs_fh3));
3090                         args.from.name = buf1;  /* the buf1 is already filled when parsing rmdir */
3091                         args.to.name = buf2;
3092                         sprintf(buf2, "rmdir_%d_%s", dep_tab[dep_index].disk_index, rmdir_argp->object.name);
3093
3094                         rpc_stat = clnt_call(NFS_client, NFSPROC3_RENAME,
3095                         xdr_RENAME3args, (char *) &args,
3096                         xdr_RENAME3res, (char *) &reply,
3097                                 Nfs_timers[Init]);
3098                         RFS_ASSERT (rpc_stat == RPC_SUCCESS);
3099                         if (reply.status!=NFS3_OK)
3100                                 printf ("change rmdir into rename, reply.status %d\n", reply.status);
3101                         RFS_ASSERT (reply.status==NFS3_OK);
3102                         rmdir_not_empty_reply_num ++;
3103 #endif
3104 #ifndef TAKE_CARE_ACCESS_ERROR
3105                 } else if ((status==0) && (trace_status==NFS3ERR_ACCES)) {
3106                         loose_access_control_reply_num ++;
3107 #endif
3108 #ifdef NO_DEPENDENCY_TABLE 
3109                 } else if ((proc==LOOKUP) && (status==NFS3ERR_NOENT) && (trace_status==NFS3_OK)) {
3110                         lookup_err_due_to_rename_num ++;
3111                 } else if ((proc==LOOKUP) && (status==NFS3_OK) && (trace_status == NFS3ERR_NOENT)) {
3112                         /* if there is a remove in front of the lookup, but it is
3113                          * actually executed later than the lookup
3114                          */
3115                         lookup_err_due_to_parallel_remove_num ++;
3116 #endif
3117 #ifndef TAKE_CARE_LOOKUP_EACCESS_ENOENT_MISMATCH
3118                 /* if the looked return EACCESS in the trace, means the object still exists
3119                  * should have initialized, right not don't initialize it, hence play status 
3120                  * could be ENOENT
3121                  */
3122                 } else if ((proc==LOOKUP) && (status==NFS3ERR_NOENT) && (trace_status==NFS3ERR_ACCES)) {
3123                         lookup_eaccess_enoent_mismatch_num ++;
3124 #endif
3125 #ifdef TOLERANT_READ_IO_ERR
3126                 } else if ((proc==READ) && (status==NFS3ERR_IO) && (trace_status==NFS3_OK)) {
3127                         read_io_err_num ++;
3128 #endif
3129 #ifdef TOLERANT_STALE_FHANDLE_ERR
3130                 } else if ((status==NFS3ERR_STALE) && (trace_status==NFS3_OK)) {
3131                         printf ("!!!!!!! STALE FILE HANDLE \n");
3132                         //sleep(1);
3133                         stale_fhandle_err_num ++;
3134 #endif
3135                 } else {
3136                         int i;
3137                         for (i=dep_window_index.tail; CYCLIC_LESS(dep_window_index,i,dep_window_index.head); i++) 
3138                                 printf ("dep_tab[%d].disk_index %d, flag %d line %s\n", i, dep_tab[i].disk_index, dep_tab[i].flag, dep_tab[i].line);
3139                         if (status==EEXIST) {
3140                                 abnormal_EEXIST_num ++;
3141                         } else if (status == ENOENT) {
3142                                 abnormal_ENOENT_num ++;
3143                         } else
3144                                 RFS_ASSERT (0);
3145                 }
3146         } else {
3147                 proper_reply_num ++;
3148                 if (total_profile.in.tv_sec >= WARMUP_TIME) 
3149                         run_stage_proper_reply_num ++;
3150         }
3151
3152 }
3153
3154 /* return -1 if there is no reply being received 
3155  * return the dep_index if the corresponding reply has been received
3156  */
3157 inline int receive_next_reply (int busy_flag)
3158 {
3159         int dep_index;
3160         int biod_index;
3161         int proc;
3162         char * line;
3163         char * reply_line;
3164         sfs_op_type *op_ptr;            /* per operation info */
3165         int ret;
3166         int status;
3167         int trace_status;
3168         char * errmsg;
3169
3170         /* wait for reply */
3171         start_profile (&valid_poll_and_get_reply_profile);
3172         start_profile (&invalid_poll_and_get_reply_profile);
3173
3174         if (busy_flag == BUSY) {
3175                 poll_timeout = 0;
3176                 poll_timeout_0_num ++;
3177         } else {
3178                 poll_timeout = 2000;    /* 10000 or 2000 is a better number in non-debugging state */
3179                 //poll_timeout = 0;     /* 10000 or 2000 is a better number in non-debugging state */
3180                 poll_timeout_pos_num ++;
3181         }
3182
3183         biod_index = poll_and_get_reply (poll_timeout);
3184         if (biod_index==-1) {
3185                 end_profile (&invalid_poll_and_get_reply_profile);
3186                 return -1;
3187         };
3188         end_profile (&valid_poll_and_get_reply_profile);
3189
3190         start_profile (&decode_reply_profile);
3191         /* check the corresponding request */
3192         dep_index = biod_reqp[biod_index].dep_tab_index;
3193         if (biod_reqp[biod_index].in_use==1) {
3194                 RFS_ASSERT (dep_tab[dep_index].biod_req_index == biod_index);
3195         } else {
3196                 printf ("biod_index %d reply received but the request has been time out\n", biod_index);
3197                 return -1;
3198         }
3199
3200         proc = dep_tab[dep_index].proc;
3201         op_ptr = &Ops[proc];
3202
3203         if (dep_tab[dep_index].flag != DEP_FLAG_SENT) {
3204                 printf("dep_tab[%d].flag %d proc %d status %d start %d:%d stop %d:%d\n",
3205                         dep_index, dep_tab[dep_index].flag, proc, dep_tab[dep_index].status, 
3206                         dep_tab[dep_index].start.sec, dep_tab[dep_index].start.usec,
3207                         dep_tab[dep_index].stop.sec, dep_tab[dep_index].stop.usec );
3208                 printf ("received reply for timeout requests dep_tab[%d].disk_index %d\n", dep_index, dep_tab[dep_index].disk_index);
3209                 return dep_index;
3210         }
3211         RFS_ASSERT (dep_tab[dep_index].flag == DEP_FLAG_SENT);
3212
3213         /* decode the reply */
3214         rfs_Ops[proc].setres (arg_res, buf1);
3215         ret = proc_header (NFS_client, rfs_Ops[proc].xdr_res, arg_res);
3216         RFS_ASSERT (ret == RPC_SUCCESS);
3217         status = *((int *)arg_res);
3218         errmsg = nfs3_strerror (status);
3219         end_profile (&decode_reply_profile);
3220
3221         start_profile (&check_reply_profile);
3222         /* compare with the reply in the trace */
3223         line = dep_tab[dep_index].line;
3224         reply_line = dep_tab[dep_index].reply_line;
3225         trace_status = dep_tab[dep_index].trace_status;
3226
3227         /* print the result, trace play progress indicator 
3228         if (rfs_debug || (      !profile_debug && ((dep_index %10000)==0)       ) )
3229                 fprintf (stdout, "dep_tab[%d], disk_index %d, receive reply, rpc_ret %d xid %x nfs_ret %d %s trace_status %d start %d:%d stop %d:%d \n", dep_index, dep_tab[dep_index].disk_index, ret, biod_reqp[biod_index].xid, status, errmsg, trace_status, biod_reqp[biod_index].start.sec, biod_reqp[biod_index].start.usec, biod_reqp[biod_index].stop.sec, biod_reqp[biod_index].stop.usec);
3230         */
3231
3232         /* error checking */
3233         check_reply (proc, biod_index, dep_index, status, errmsg, trace_status);
3234
3235         /* free resources */
3236         finish_request (biod_index, dep_index, status);
3237         
3238         /* we set 100 seconds warm up time */
3239         if ((total_profile.in.tv_sec >= WARMUP_TIME)) {
3240         /* get statistics */
3241         if (status == trace_status || (status==NFS3_OK && trace_status==NFS3ERR_RFS_MISS) ) {
3242                 op_ptr->results.good_calls++;
3243                 Ops[TOTAL].results.good_calls++;
3244         } else {
3245                 op_ptr->results.bad_calls++;
3246                 Ops[TOTAL].results.bad_calls++;
3247         }
3248         sfs_elapsedtime (op_ptr, &(biod_reqp[biod_index].start), &(biod_reqp[biod_index].stop));
3249         end_profile (&check_reply_profile);
3250         }
3251         
3252         //start_profile (&add_create_object_profile);
3253
3254         if (trace_status == NFS3_OK && (proc==CREATE || proc==MKDIR || proc==SYMLINK || proc==MKNOD)) {
3255 #ifndef REDUCE_MEMORY_TRACE_SIZE
3256                 RFS_ASSERT (reply_line);
3257 #endif
3258                 if (status!=NFS3_OK) {
3259                         /* commented out for 1022 */
3260                         printf ("!!!!!! Should have been an ASSERTION FAILURE \n");
3261                         RFS_ASSERT (proc==SYMLINK);
3262                         RFS_ASSERT (0);
3263                 } else {
3264                         if (proc!=SYMLINK || line[TRACE_VERSION_POS]!='2')
3265                                 add_new_file_system_object(proc, dep_index, line, reply_line);
3266                 }
3267         }
3268         //end_profile (&add_create_object_profile);
3269 }
3270
3271 inline void add_new_file_system_object (int proc, int dep_index, char * line, char * reply_line)
3272 {
3273         char * child_trace_fh;
3274         fh_map_t * parent_entryp;
3275         char component_name[MAX_PLAY_PATH_SIZE];
3276         char * parent_trace_fh;
3277         char child_path[MAX_PLAY_PATH_SIZE];
3278         post_op_fh3 * post_op_fh3_child;
3279         char * reply_trace_fh;
3280         nfs_fh3 * child_fh3;
3281
3282         parent_trace_fh = strstr (line, "fh");
3283         RFS_ASSERT (parent_trace_fh);
3284         parent_trace_fh +=3;
3285         parent_entryp = lookup_fh (parent_trace_fh);
3286         RFS_ASSERT (parent_entryp);
3287         parse_name (parent_trace_fh+65, component_name);
3288         strcpy (child_path, parent_entryp->path);
3289         strcat (child_path, "/");
3290         strcat (child_path, component_name);
3291                                 
3292         /* find the corresponding create request */
3293         //printf ("before find reply trace_fh reply_line %s\n", reply_line);
3294 #ifdef REDUCE_MEMORY_TRACE_SIZE
3295         reply_trace_fh = dep_tab[dep_index].reply_trace_fh;
3296 #else
3297         reply_trace_fh = find_reply_trace_fh (reply_line);
3298 #endif
3299         RFS_ASSERT (reply_trace_fh != NULL);
3300         switch (proc) {
3301         case CREATE:
3302                 RFS_ASSERT (((CREATE3res *)arg_res)->res_u.ok.obj.handle_follows==TRUE);
3303                 child_fh3 = &((CREATE3res *)arg_res)->res_u.ok.obj.handle;
3304                 break;
3305         case MKDIR:
3306                 RFS_ASSERT (((MKDIR3res *)arg_res)->res_u.ok.obj.handle_follows==TRUE);
3307                 child_fh3 = &((MKDIR3res *)arg_res)->res_u.ok.obj.handle;
3308                 break;
3309         case SYMLINK:
3310                 RFS_ASSERT (((SYMLINK3res *)arg_res)->res_u.ok.obj.handle_follows==TRUE);
3311                 child_fh3 = &((SYMLINK3res *)arg_res)->res_u.ok.obj.handle;
3312                 break;
3313         case MKNOD:
3314                 RFS_ASSERT (((MKNOD3res *)arg_res)->res_u.ok.obj.handle_follows==TRUE);
3315                 child_fh3 = &((MKNOD3res *)arg_res)->res_u.ok.obj.handle;
3316                 break;
3317         case LOOKUP:
3318                 RFS_ASSERT (proc==LOOKUP);
3319                 child_fh3 = &((LOOKUP3res *)arg_res)->res_u.ok.object;
3320                 break;
3321         default:
3322                 RFS_ASSERT (0);
3323         }
3324 #ifndef REDUCE_MEMORY_TRACE_SIZE
3325         RFS_ASSERT (reply_trace_fh[TRACE_FH_SIZE]==' ');
3326 #endif
3327         reply_trace_fh[TRACE_FH_SIZE] = 0;
3328         add_fh (FH_MAP_FLAG_COMPLETE, reply_trace_fh, child_path, child_fh3);   /* exist flag is not used now, set to 1 */
3329 #ifndef REDUCE_MEMORY_TRACE_SIZE
3330         /* just to preserve the original reply line not changed */
3331         reply_trace_fh[TRACE_FH_SIZE] = ' ';
3332 #endif
3333 }
3334
3335 /* initialize timestamp and proc field of dep_tab entry */
3336 void trace_play(void)
3337 {
3338         
3339         /* The flag to indicate whether trace_player is BUSY. Trace_player is BUSY
3340          * when either there is request to send or there is reply being
3341          * received. Otherwise it is IDLE. The timeout for polling replies 
3342          * is set to 0 when BUSY, it is set to the waiting time to the first
3343          * request outside of current <min_dep_index, dep_window_max> window when IDLE.
3344          */
3345         int busy_flag = BUSY;           
3346         //int dep_index;                /* index into dependency table: dep_tab */
3347         //int biod_index;       /* index into outstanding requests: biod_reqp */
3348
3349         disk_io_status = read_trace();
3350         RFS_ASSERT (!CYCLIC_EMPTY(dep_tab_index));
3351         CYCLIC_MOVE_HEAD(dep_window_index);
3352
3353         adjust_play_window(busy_flag, &poll_timeout);
3354
3355         start_profile (&total_profile);
3356         while (!CYCLIC_EMPTY(dep_tab_index)) {
3357                 if ((total_profile.in.tv_sec > 600)) {
3358                         printf ("the process has run for more than 600 seconds, exit\n");
3359                         goto END;
3360                 }
3361
3362                 if (busy_flag == IDLE) {
3363                         //start_profile (&check_timeout_profile);
3364                         check_timeout();
3365                         //end_profile (&check_timeout_profile);
3366                         if (disk_io_status!=TRACE_FILE_END) {
3367                                 disk_io_status = read_trace();
3368                         };
3369                 }
3370
3371                 //start_profile (&adjust_play_window_profile);
3372                 adjust_play_window (busy_flag, &poll_timeout);
3373                 if (rfs_debug)
3374                         printf("num_out_reqs %d\n", num_out_reqs);
3375                 busy_flag = IDLE;
3376                 //end_profile (&adjust_play_window_profile);
3377
3378                 start_profile (&execute_next_request_profile);
3379                 while (execute_next_request()!=-1) {
3380                         busy_flag = BUSY;
3381                 }
3382                 end_profile (&execute_next_request_profile);
3383
3384
3385                 start_profile (&receive_next_reply_profile);
3386                 /* actually the performance of two policy seems to be same */
3387 //#define SEND_HIGHER_PRIORITY_POLICY
3388 #define SEND_RECEIVE_EQUAL_PRIORITY_POLICY      
3389
3390 #ifdef SEND_HIGHER_PRIORITY_POLICY
3391                 receive_next_reply(IDLE);
3392 #endif
3393 #ifdef SEND_RECEIVE_EQUAL_PRIORITY_POLICY
3394                 busy_flag = IDLE;
3395                 while (receive_next_reply(busy_flag)!=-1)
3396                         busy_flag = BUSY;
3397 #endif
3398                 end_profile (&receive_next_reply_profile);
3399
3400                 CYCLIC_ASSERT (0);
3401         }
3402         end_profile (&total_profile);
3403
3404         RFS_ASSERT (disk_io_status == TRACE_FILE_END);
3405         if (num_out_reqs !=0 ) {
3406                 printf ("num_out_reqs %d\n", num_out_reqs);
3407                 CYCLIC_PRINT(dep_tab_index);
3408         }
3409         RFS_ASSERT(num_out_reqs==0);
3410
3411 END:
3412         printf ("trace starttime %d, trace_end_time %d trace_duration %d\n", trace_timestamp1, trace_timestamp2,
3413                 trace_timestamp2 - trace_timestamp1);
3414         printf ("can_not_catch_speed_num_total %d can_not_catch_speed_num_last_10_seconds %d", 
3415                 can_not_catch_speed_num_total, can_not_catch_speed_num);
3416         printf ("total_profile.about: %s\n", total_profile.about);
3417         print_profile ("total_profile", &total_profile);
3418         printf("\n");
3419         //print_profile ("check_timeout", &check_timeout_profile);
3420         //printf("\n");
3421         //print_profile ("adjust_play_window", &adjust_play_window_profile);
3422         //printf("\n");
3423         print_profile ("execute_next_request_profile", &execute_next_request_profile);
3424         print_profile ("valid_get_nextop_profile", &valid_get_nextop_profile);
3425         print_profile ("invalid_get_nextop_profile", &invalid_get_nextop_profile);
3426         print_profile ("prepare_argument_profile", &prepare_argument_profile);
3427         print_profile ("biod_clnt_call_profile", &biod_clnt_call_profile);
3428         printf("\n");
3429         print_profile ("receive_next_reply_profile", &receive_next_reply_profile);
3430         print_profile ("valid_poll_and_get_reply_profile", &valid_poll_and_get_reply_profile);
3431         print_profile ("invalid_poll_and_get_reply_profile", &invalid_poll_and_get_reply_profile);
3432         print_profile ("decode_reply_profile", &decode_reply_profile);
3433         print_profile ("check_reply_profile", &check_reply_profile);
3434         print_profile ("fgets_profile", &fgets_profile);
3435         print_profile ("read_line_profile", &read_line_profile);
3436         print_profile ("read_trace_profile", &read_trace_profile);
3437         //print_profile ("add_create_object", &add_create_object_profile);
3438         printf("\n");
3439         
3440         printf ("dep_tab_index.tail %d dep_tab_index.head %d num_out_reqs %d\n", dep_tab_index.tail, dep_tab_index.head, num_out_reqs);
3441 }
3442
3443 int CYCLIC_SET_TAIL_TO(cyclic_index_t * index, int dest) 
3444
3445         cyclic_index_t indextmp, indextmp2;
3446         int oldnum, num;
3447         indextmp = *index;
3448         indextmp2 = indextmp;
3449         oldnum = CYCLIC_NUM(indextmp); 
3450
3451         if (! ((dest>=0) && (dest<indextmp.size))) {
3452                 CYCLIC_PRINT(indextmp);
3453                 printf("dest %d\n", dest);
3454         }
3455         RFS_ASSERT ((dest>=0) && (dest<indextmp.size));
3456         index->tail = dest; 
3457         indextmp2.tail = dest;
3458         num = CYCLIC_NUM(indextmp2); 
3459
3460         if (num > oldnum) { 
3461                 CYCLIC_PRINT(indextmp);
3462                 CYCLIC_PRINT(indextmp2);
3463                 printf("dest %d old_num %d num %d\n", dest, oldnum, num);
3464         }
3465         RFS_ASSERT (num <= oldnum);
3466 }
3467
3468 int flush_junk()
3469 {
3470         int i;
3471         for (i=0; i<500; i++) {
3472                 printf ("*************************************************************\n");
3473         }
3474         fflush(stdout);
3475 }
3476
3477 int CYCLIC_ASSERT (int i)
3478 {
3479         int j;
3480         if (!(dep_tab_index.tail == dep_window_index.tail)) {
3481                 printf("%s head %d tail %d, size %d\n", dep_tab_index.name, dep_tab_index.head, dep_tab_index.tail, dep_tab_index.size);
3482                 printf("%s head %d tail %d, size %d\n", dep_window_index.name, dep_window_index.head, dep_window_index.tail, dep_window_index.size);
3483                 printf("pos %d\n", i); 
3484                 flush_junk();
3485                 sleep (10);
3486                 RFS_ASSERT (0);
3487         };
3488  
3489         if (!((dep_window_index.head == dep_tab_index.head) || 
3490                    CYCLIC_LESS(dep_tab_index, dep_window_index.head, dep_tab_index.head ) )) {
3491                 printf("%s head %d tail %d, size %d\n", dep_tab_index.name, dep_tab_index.head, dep_tab_index.tail, dep_tab_index.size);
3492                 printf("%s head %d tail %d, size %d\n", dep_window_index.name, dep_window_index.head, dep_window_index.tail, dep_window_index.size);
3493                 printf("pos %d\n", i); 
3494                 flush_junk();
3495                 sleep (10);
3496                 RFS_ASSERT (0);
3497         };
3498         for (i=0, j=0; i<max_biod_reqs; i++) {
3499                 if (biod_reqp[i].in_use == 1)
3500                         j++;
3501         }
3502         RFS_ASSERT (num_out_reqs==j);
3503 /*
3504                 RFS_ASSERT ((dep_window_index.head == dep_tab_index.head) || 
3505                            CYCLIC_LESS(dep_tab_index, dep_window_index.head, dep_tab_index.head ));
3506 */
3507 }
3508
3509 /* sfs_c_chd.c */