Tidy ups of mapping code
[nativeclient.git] / tools / syscall_stats.py
bloba4435a1720ae02715469b7ad697432201d466c45
1 #!/usr/bin/python
3 # Copyright 2008, Google Inc.
4 # All rights reserved.
5 #
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are
8 # met:
9 #
10 # * Redistributions of source code must retain the above copyright
11 # notice, this list of conditions and the following disclaimer.
12 # * Redistributions in binary form must reproduce the above
13 # copyright notice, this list of conditions and the following disclaimer
14 # in the documentation and/or other materials provided with the
15 # distribution.
16 # * Neither the name of Google Inc. nor the names of its
17 # contributors may be used to endorse or promote products derived from
18 # this software without specific prior written permission.
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 """
35 Filter service runtime logging output and compute system call statistics.
37 To use this script, define the BENCHMARK symbol to be zero (default)
38 in nacl_syscall_hook.c. Next, run the service runtime with NACLLOG
39 set to an output file name. When the run is complete, run this script
40 with that file as input.
42 """
44 import math
45 import re
46 import sys
49 class Stats:
50 """
51 Compute basic statistics.
52 """
53 def __init__(self):
54 self._sum_x = 0.0
55 self._sum_x_squared = 0.0
56 self._n = 0
57 # enddef
59 def Enter(self, val):
60 """Enter a new value.
62 Args:
63 val: the new (floating point) value
64 """
65 self._sum_x += val
66 self._sum_x_squared += val * val
67 self._n += 1
68 # enddef
70 def Mean(self):
71 """Returns the mean of entered values.
72 """
73 return self._sum_x / self._n
74 # enddef
76 def Variance(self):
77 """Returns the variance of entered values.
78 """
79 mean = self.Mean()
80 return self._sum_x_squared / self._n - mean * mean
81 # enddef
83 def Stddev(self):
84 """Returns the standard deviation of entered values.
85 """
86 return math.sqrt(self.Variance())
87 # enddef
89 def NumEntries(self):
90 """Returns the number of data points entered.
91 """
92 return self._n
93 # enddef
94 # endclass
97 class PeakStats:
98 """Compute min and max for a data set. While far less efficient
99 than using a reduce, this class makes streaming data handling
100 easier.
103 def __init__(self):
104 self._min = 1L << 64
105 self._max = -1
106 # enddef
108 def Enter(self, val):
109 """Enter a new datum.
111 Args:
112 val: the new datum to be entered.
114 if val > self._max:
115 self._max = val
116 # endif
117 if val < self._min:
118 self._min = val
119 # endif
120 # enddef
122 def Max(self):
123 """Returns the maximum value found so far.
125 return self._max
126 # enddef
128 def Min(self):
129 """Returns the minimum value found so far.
131 return self._min
132 # enddef
133 # endclass
136 class WindowedRate:
138 """Class for computing statistics on events based on counting the
139 number of occurrences in a time interval. Statistcs on these
140 bucketed counts are then available.
143 def __init__(self, duration):
144 self._t_start = -1
145 self._t_duration = duration
146 self._t_end = -1
147 self._event_count = 0
148 self._rate_stats = Stats()
149 self._peak_stats = PeakStats()
150 # enddef
152 def Enter(self, t):
153 """Enter in a new event that occurred at time t.
155 Args:
156 t: the time at which an event occurred.
158 if self._t_start == -1:
159 self._t_start = t
160 self._t_end = t + self._t_duration
161 return
162 # [ t_start, t_start + duration )
163 if t < self._t_end:
164 self._event_count += 1
165 return
166 # endif
167 self.Compute()
168 self._event_count = 1
169 next_end = self._t_end
170 while next_end < t:
171 next_end += self._t_duration
172 # endwhile
173 self._t_end = next_end
174 self._t_start = next_end - self._t_duration
175 # enddef
177 def Compute(self):
178 """Finalize the last bucket.
181 self._rate_stats.Enter(self._event_count)
182 self._peak_stats.Enter(self._event_count)
183 self._event_count = 0
184 # enddef
186 def RateStats(self):
187 """Returns the event rate statistics object.
190 return self._rate_stats
191 # enddef
193 def PeakStats(self):
194 """Returns the peak event rate statistics object.
197 return self._peak_stats
198 # endif
199 # endclass
202 class TimestampParser:
204 A class to parse timestamp strings. This is needed because there is
205 implicit state: the timestamp string is HH:MM:SS.fract and may cross
206 a 24 hour boundary -- we do not log the date since that would make
207 the log file much larger and generally it is not needed (implicit in
208 file modification time) -- so we convert to a numeric representation
209 that is relative to an arbitrary epoch start, and the state enables
210 us to correctly handle midnight.
212 This code assumes that the timestamps are monotonically
213 non-decreasing.
216 def __init__(self):
217 self._min_time = -1
218 # enddef
220 def Convert(self, timestamp):
221 """Converts a timestamp string into a numeric timestamp value.
223 Args:
224 timestamp: A timestamp string in HH:MM:SS.fraction format.
226 Returns:
227 a numeric timestamp (arbitrary epoch)
229 (hh, mm, ss) = map(float,timestamp.split(':'))
230 t = ((hh * 60) + mm) * 60 + ss
231 if self._min_time == -1:
232 self._min_time = t
233 # endif
234 while t < self._min_time:
235 t += 24 * 60 * 60
236 # endwhile
237 self._min_time = t
238 return t
239 # enddef
240 # endclass
243 def ReadFileHandle(fh, duration):
244 """Reads log data from the provided file handle, and compute and
245 print various statistics on the system call rate based on the log
246 data.
249 # log format "[pid:timestamp] msg" where the timestamp is
250 log_re = re.compile(r'\[[0-9,]+:([:.0-9]+)\] system call [0-9]+')
251 parser = TimestampParser()
252 inter_stats = Stats()
253 rate_stats = Stats()
254 windowed = WindowedRate(duration)
255 prev_time = -1
256 start_time = 0
257 for line in fh: # generator
258 m = log_re.search(line)
259 if m is not None:
260 timestamp = m.group(1)
261 t = parser.Convert(timestamp)
263 windowed.Enter(t)
265 if prev_time != -1:
266 elapsed = t - prev_time
267 inter_stats.Enter(elapsed)
268 rate_stats.Enter(1.0/elapsed)
269 else:
270 start_time = t
271 # endif
272 prev_time = t
274 # endif
275 # endfor
277 print '\nInter-syscall time'
278 print 'Mean: %g' % inter_stats.Mean()
279 print 'Stddev: %g' % inter_stats.Stddev()
280 print '\nInstantaneous Syscall Rate (unweighted!)'
281 print 'Mean : %g' % rate_stats.Mean()
282 print 'Stddev: %g' % rate_stats.Stddev()
283 print '\nAvg Syscall Rate: %g' % (rate_stats.NumEntries()
284 / (prev_time - start_time))
286 print '\nSyscalls in %f interval' % duration
287 print 'Mean: %g' % windowed.RateStats().Mean()
288 print 'Stddev: %g' % windowed.RateStats().Stddev()
289 print 'Min: %g' % windowed.PeakStats().Min()
290 print 'Max: %g' % windowed.PeakStats().Max()
291 # enddef
294 def main(argv):
295 if len(argv) > 1:
296 print >>sys.stderr, 'no arguments expected\n'
297 return 1
298 # endif
299 ReadFileHandle(sys.stdin, 0.010)
300 return 0
301 # enddef
303 if __name__ == '__main__':
304 sys.exit(main(sys.argv))
305 # endif