Merge pull request #2303 from jwillemsen/jwi-803
[ACE_TAO.git] / ACE / apps / JAWS / clients / WebSTONE / bin / mine-logs.pl
blob6e14d731bfce0ed4c2e0e689c98ace4de8a49a30
1 #!/pkg/gnu/bin//perl
3 # mine-logs.pl:
4 # script to transform access logs into WebStone workload
6 # created 18 December 1995 mblakele@engr.sgi.com
8 # functional map:
9 # usage: mine-logs.pl access.log
11 # 1. For each line in the input
12 # a. parse the URL and the time, the outcome code, and the size
13 # b. if the code is 200, and it's a GET,
14 # do we already know about this URL?
15 # i. yes - increment its counter
16 # ii. no - create a slot for it, record size,
17 # and set counter=1
20 $debug = 0;
21 $line_number = 0;
23 while (<>) {
24 chomp;
26 $line_number++;
27 ($line_number % 1000) || printf STDERR ".";
28 # parse line
29 ( $client, $junk1, $junk2, $date, $timezone,
30 $command, $url, $version, $result_code, $size ) =
31 split;
32 # strip some junk
33 $command =~ s/\"//;
34 $date =~ s/\[//;
36 ($debug) && printf STDERR "$client, $date, $command, $url, $result_code, $size\n";
38 # is it a GET? Did it succeed? (i.e., is the result code 200?)
39 if (($command eq 'GET') && ($result_code == 200)) {
40 # is this URL already in the key set?
41 if (exists $counter{$url}) {
42 # URL is in key set
43 ($debug) && printf STDERR "URL $url already in key set: incrementing\n";
44 $counter{$url}++;
45 if ($size == $size{$url}) {
46 ($debug) && printf STDERR "size mismatch on $url: $size != $size{$url}\n";
47 if ($size <=> $size{$url}) { $size{$url} = $size; }
50 else {
51 # URL isn't in key set
52 ($debug) && printf STDERR "URL $url isn't in key set: adding size $size\n";
53 $counter{$url} = 1;
54 $size{$url} = $size;
56 # end if key set
57 } # end if GET
59 # end of input file
60 printf STDERR "\n";
62 # now we print out a workload file
64 # first, the headline
65 $date = `date`;
66 chomp($date);
67 printf "# WebStone workload file\n# \tgenerated by $0 $date\n#\n";
69 # next, sort the keys
70 @sorted_keys = sort by_counter keys(%counter);
72 # iterate through sorted keys
73 foreach $key (@sorted_keys) {
74 # print url, weighting, and (commented) the size in bytes
75 ($debug) && printf STDERR "printing data for $key\n";
76 printf "$key\t$counter{$key}\t#$size{$key}\n";
78 # end foreach
80 # end main
82 sub
83 by_counter {
84 $counter{$b} <=> $counter{$a};
86 # end by_counter
88 # end mine-logs.pl