4 # script to transform access logs into WebStone workload
6 # created 18 December 1995 mblakele@engr.sgi.com
9 # usage: mine-logs.pl access.log
11 # 1. For each line in the input
12 # a. parse the URL and the time, the outcome code, and the size
13 # b. if the code is 200, and it's a GET,
14 # do we already know about this URL?
15 # i. yes - increment its counter
16 # ii. no - create a slot for it, record size,
27 ($line_number % 1000) || printf STDERR
".";
29 ( $client, $junk1, $junk2, $date, $timezone,
30 $command, $url, $version, $result_code, $size ) =
36 ($debug) && printf STDERR
"$client, $date, $command, $url, $result_code, $size\n";
38 # is it a GET? Did it succeed? (i.e., is the result code 200?)
39 if (($command eq 'GET') && ($result_code == 200)) {
40 # is this URL already in the key set?
41 if (exists $counter{$url}) {
43 ($debug) && printf STDERR
"URL $url already in key set: incrementing\n";
45 if ($size == $size{$url}) {
46 ($debug) && printf STDERR
"size mismatch on $url: $size != $size{$url}\n";
47 if ($size <=> $size{$url}) { $size{$url} = $size; }
51 # URL isn't in key set
52 ($debug) && printf STDERR
"URL $url isn't in key set: adding size $size\n";
62 # now we print out a workload file
67 printf "# WebStone workload file\n# \tgenerated by $0 $date\n#\n";
70 @sorted_keys = sort by_counter
keys(%counter);
72 # iterate through sorted keys
73 foreach $key (@sorted_keys) {
74 # print url, weighting, and (commented) the size in bytes
75 ($debug) && printf STDERR
"printing data for $key\n";
76 printf "$key\t$counter{$key}\t#$size{$key}\n";
84 $counter{$b} <=> $counter{$a};