Port it to use the new Linux perf_event API rather than the old perfmon patch.
[beedb.git] / perf / perftest.cc
blob9638e339bcac7faa7793bef50fb5c126670a97ba
1 /*
2 Copyright 2009 Kristian Nielsen
4 This file is part of BeeDB.
6 BeeDB is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 2 of the License, or
9 (at your option) any later version.
11 BeeDB is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with BeeDB. If not, see <http://www.gnu.org/licenses/>.
22 Helper code for BeeDB performance regression testing.
26 Need to put this early, as it does not work after some other system header
27 might include inttypes.h
29 #include "port/format_macros.h"
31 #include <errno.h>
32 #include <sys/types.h>
33 #include <unistd.h>
34 #include <sys/time.h>
35 #include <time.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <stdio.h>
40 #include "beedb.h"
41 #include "perftest.h"
44 For now we are based on libpfm4/perf_event to do our stuff.
46 We might want later to support other methods or at least fallback to a
47 simple portable time measurement. But for now its important to get some
48 results.
50 Initially there are even Core 2 specific parts in the list of performance
51 counters to use, though that should be easy to extend to other CPUs later.
55 perftest::perftest()
57 static bool perfmon_inited= false;
58 if (!perfmon_inited)
60 int ret= pfm_initialize();
61 if (ret != PFM_SUCCESS)
62 fatal_error("Cannot initialize libperfmon: %s\n", pfm_strerror(ret));
63 perfmon_inited= true;
66 memset(perf_counter_values, 0, sizeof(perf_counter_values));
67 setup_perfmon();
70 perftest::~perftest()
72 pfm_terminate();
75 void
76 perftest::run_test(test *t, uint64_t loops)
78 printf("T: %s", t->text);
79 if (t->variant != NULL)
80 printf(" {%s}", t->variant);
81 if (t->param1 != NULL)
83 printf(" [%s", t->param1);
84 if (t->param2 != NULL)
85 printf(";%s", t->param2);
86 printf("]");
88 printf(" I=%" PRIu64, loops);
89 if (t->workunits != 0)
90 printf(" U=%" PRIu64, t->workunits);
91 printf("\n");
93 /* First run it without timing, to warm up caches etc. */
94 t->prepare(-1);
95 t->run(loops);
97 /* Now run the test over all the performance counter runs. */
98 for (int run= 0; run < num_runs; run++)
100 t->prepare(run);
101 t->run(loops);
104 printf(" Seconds: %.4f", t->elapsed_time[0]);
105 for (int run= 1; run < num_runs; run++)
106 printf(" %+.4f", t->elapsed_time[run] - t->elapsed_time[0]);
107 printf("\n");
108 report_perfmon();
111 double
112 perftest::gettime(void)
114 struct timeval tv;
115 if (gettimeofday(&tv, NULL))
117 perror("gettimeofday()");
118 exit(1);
120 return (double)tv.tv_sec + (double)tv.tv_usec*1e-6;
123 void
124 perftest::fatal_error(const char *format, ...)
126 va_list ap;
127 va_start(ap, format);
128 fatal_error(format, ap);
129 va_end(ap);
132 void
133 perftest::fatal_error(const char *format, va_list ap)
135 vfprintf(stderr, format, ap);
136 exit(1);
139 void
140 perftest::prepare_perfmon_event(int idx, int run, const char *event_name)
143 ToDo: This sets us to count only while in user mode (PFM_PLM3).
144 We may want to make this configurable per-test or something.
146 int ret= pfm_get_perf_event_encoding(event_name, PFM_PLM3,
147 &(perf_attr[run][idx]), NULL, NULL);
148 if (ret != PFM_SUCCESS)
149 fatal_error("Failed to prepare event '%s': %s\n",
150 event_name, pfm_strerror(ret));
151 perf_event_names[run][idx]= event_name;
154 static struct {
155 const char *event0;
156 const char *event1;
157 } core2_events[]= {
158 { "RS_UOPS_DISPATCHED", "UOPS_RETIRED:ANY" },
159 { "INST_RETIRED:LOADS", "INST_RETIRED:STORES" },
160 { "BRANCH_INSTRUCTIONS_RETIRED", "MISPREDICTED_BRANCH_RETIRED" },
161 { "MEM_LOAD_RETIRED:L1D_LINE_MISS", "CYCLES_L1I_MEM_STALLED" },
162 { "MEM_LOAD_RETIRED:L2_LINE_MISS", "RS_UOPS_DISPATCHED_NONE" },
163 { "MEM_LOAD_RETIRED:DTLB_MISS", "L1D_PREFETCH:REQUESTS" }
166 void
167 perftest::setup_perfmon()
169 memset(perf_attr, 0, sizeof(perf_attr));
170 /* Compute performance counter config for all runs. */
171 for (int run= 0; run < num_runs; run++)
173 prepare_perfmon_event(0, run, "UNHALTED_CORE_CYCLES");
174 prepare_perfmon_event(1, run, "INSTRUCTIONS_RETIRED");
175 prepare_perfmon_event(2, run, core2_events[run].event0);
176 prepare_perfmon_event(3, run, core2_events[run].event1);
177 for (unsigned i= 0; i < num_counters; i++)
179 /* Type, config, and exclude_* are set in prepare_perfmon_event(). */
180 perf_attr[run][i].size= sizeof(perf_attr[run][i]);
181 /* Could later add time enabled/running. */
182 perf_attr[run][i].read_format= PERF_FORMAT_GROUP;
183 perf_attr[run][i].disabled= (i == 0);
184 perf_attr[run][i].pinned= (i == 0);
189 perftest::test::test(perftest *tester, const char *text, const char *variant,
190 const char *param1, const char *param2, uint64_t workunits)
191 : text(text), variant(variant), param1(param1), param2(param2),
192 workunits(workunits), tester(tester)
196 void
197 perftest::test::fatal_error(const char *format, ...)
199 va_list ap;
200 va_start(ap, format);
201 tester->fatal_error(format, ap);
202 va_end(ap);
205 void
206 perftest::test::start()
208 if (current_run < 0)
209 return;
211 start_time= gettime();
212 tester->start_perfmon(current_run);
215 void
216 perftest::test::record_time(const char *text)
218 if (current_run < 0)
219 return;
221 tester->record_perfmon(current_run);
222 elapsed_time[current_run]= gettime() - start_time;
223 tester->record_perfmon_not_time_critical(current_run);
226 void
227 perftest::start_perfmon(int run)
229 if (skip_perfmon)
230 return;
232 for (unsigned i= 0; i < num_counters; i++)
234 perf_fds[i]= perf_event_open(&(perf_attr[run][i]), getpid(), -1,
235 (i ? perf_fds[0] : -1), 0);
236 if (perf_fds[i] < 0)
237 fatal_error("perf_event_open(%d) failed: ret=%d errno=%d (%s)\n",
238 i, perf_fds[i], errno, strerror(errno));
240 int ret= ioctl(perf_fds[i], PERF_EVENT_IOC_RESET, 0);
241 if (ret)
242 fatal_error("ioctl(%d, PERF_EVENT_IOC_RESET) error, ret=%d errno=%d\n",
243 i, ret, errno);
245 int ret= ioctl(perf_fds[0], PERF_EVENT_IOC_ENABLE, 0);
246 if (ret)
247 fatal_error("ioctl(0, PERF_EVENT_IOC_ENABLE) error, ret=%d errno=%d\n",
248 ret, errno);
251 void
252 perftest::record_perfmon(int run)
254 if (skip_perfmon)
255 return;
257 int ret= ioctl(perf_fds[0], PERF_EVENT_IOC_DISABLE, 0);
258 if (ret)
259 fatal_error("ioctl(0, PERF_EVENT_IOC_DISABLE, 0) failed: ret=%d errno=%d\n",
260 ret, errno);
264 We split this out from record_perfmon(), as it is not time critical (once
265 the counters are stopped, the read out can happen at any point after with no
266 difference in result), so other time critical tasks (ie. stopping wallclock)
267 can be done first.
269 void
270 perftest::record_perfmon_not_time_critical(int run)
272 uint64_t buf[num_counters+1];
274 if (skip_perfmon)
275 return;
277 int actual= read(perf_fds[0], &(buf[0]), sizeof(buf));
278 if (actual != sizeof(buf))
279 fatal_error("Perf_event read(%d) failed: ret=%d errno=%d (%s)\n",
280 perf_fds[0], actual, errno, strerror(errno));
281 if (buf[0] != num_counters)
282 fatal_error("Perf_event read() returned %"PRIu64", expected %u.\n",
283 buf[0], num_counters);
284 for (unsigned i= 0; i < num_counters; i++)
286 perf_counter_values[run][i]= buf[i+1];
287 close(perf_fds[i]);
291 static int
292 get_num_digits_64(uint64_t value)
294 int digits= 1;
295 uint64_t x= 10;
296 while (value >= x)
298 digits++;
299 if (x > UINT64_MAX/10)
300 break;
301 x*= 10;
303 return digits;
306 void
307 perftest::report_perfmon()
309 const char *name;
311 /* To get a nicely aligned output, we first get the lengths of everything. */
312 int counter_name_max= 1;
313 int counter_value_max= 1;
314 int counter_delta_max[num_runs - 1];
315 for (int run= 1; run < num_runs; run++)
316 counter_delta_max[run - 1]= 0;
318 for (int run= 0; run < num_runs; run++)
320 for (unsigned int i= 0; i < num_counters; i++)
322 name= perf_event_names[run][i];
323 int len= strlen(name);
324 if (len > counter_name_max)
325 counter_name_max= len;
327 For the fixed counters, which are the same in every run, we output
328 the value only in the first run, and deltas for the rest.
330 if (run == 0 || i >= num_fixed_counters)
332 len= get_num_digits_64(perf_counter_values[run][i]);
333 if (len > counter_value_max)
334 counter_value_max= len;
336 else
338 uint64_t first_value= perf_counter_values[0][i];
339 uint64_t this_value= perf_counter_values[run][i];
340 uint64_t delta;
341 if (first_value >= this_value)
342 delta= first_value - this_value;
343 else
344 delta= this_value - first_value;
345 len= get_num_digits_64(delta);
346 if (len > counter_delta_max[run - 1])
347 counter_delta_max[run - 1]= len;
352 for (unsigned int i= 0; i < num_fixed_counters; i++)
354 name= perf_event_names[0][i];
355 uint64_t first_value= perf_counter_values[0][i];
356 printf(" %s %-*s %*" PRIu64, (i == 0 ? "F: " : " "),
357 counter_name_max, name,
358 counter_value_max, first_value);
359 for (int run= 1; run < num_runs; run++)
361 int64_t delta;
362 uint64_t value= perf_counter_values[run][i];
363 if (value > first_value)
364 delta= (int64_t)(value - first_value);
365 else
366 delta= -(int64_t)(first_value - value);
367 printf(" %+*" PRIi64, 1 + counter_delta_max[run - 1], delta);
369 printf("\n");
372 for (int run= 0; run < num_runs; run++)
374 for (unsigned int i= num_fixed_counters; i < num_counters; i++)
376 name= perf_event_names[run][i];
377 uint64_t value= perf_counter_values[run][i];
378 if (i == num_fixed_counters)
379 printf(" V%d: ", run + 1);
380 else
381 printf(" ");
382 printf("%-*s %*" PRIu64 "\n",
383 counter_name_max, name, counter_value_max, value);
388 perftest::test::~test()