1 // This artificial program runs a lot of code. The exact amount depends on
2 // the command line -- if an arg "0" is given, it does exactly
3 // the same amount of work, but using four times as much code.
4 // If an arg >= 1 is given, the amount of code is multiplied by this arg.
6 // It's a stress test for Valgrind's translation speed; natively the two
7 // modes run in about the same time (the I-cache effects aren't big enough
8 // to make a difference), but under Valgrind the one running more code is
9 // significantly slower due to the extra translation time.
16 #include <asm/cachectl.h>
17 #include <sys/syscall.h>
18 #elif defined(__tilegx__)
19 #include <asm/cachectl.h>
21 #include "tests/sys_mman.h"
23 #define FN_SIZE 1280 // Must be big enough to hold the compiled f()
24 // and any literal pool that might be used
25 #define N_LOOPS 20000 // Should be divisible by four
26 #define RATIO 4 // Ratio of code sizes between the two modes
31 for (i
= 0; i
< 5000; i
++) {
42 int main(int argc
, char* argv
[])
44 int h
, i
, sum1
= 0, sum2
= 0, sum3
= 0, sum4
= 0;
48 // Mode 1: not so much code
49 n_fns
= N_LOOPS
/ RATIO
;
53 // Mode 2: lots of code
54 const int mul
= atoi(argv
[1]);
58 n_fns
= N_LOOPS
* mul
;
62 printf("%d copies of f(), %d reps\n", n_fns
, n_reps
);
64 char* a
= mmap(0, FN_SIZE
* n_fns
,
66 MAP_PRIVATE
|MAP_ANONYMOUS
, -1,0);
67 assert(a
!= (char*)MAP_FAILED
);
69 // Make a whole lot of copies of f(). FN_SIZE is much bigger than f()
70 // will ever be (we hope).
71 for (i
= 0; i
< n_fns
; i
++) {
72 memcpy(&a
[FN_SIZE
*i
], f
, FN_SIZE
);
76 syscall(__NR_cacheflush
, a
, FN_SIZE
* n_fns
, ICACHE
);
77 #elif defined(__tilegx__)
78 cacheflush(a
, FN_SIZE
* n_fns
, ICACHE
);
81 for (h
= 0; h
< n_reps
; h
+= 1) {
82 for (i
= 0; i
< n_fns
; i
+= 4) {
83 int(*f1
)(int,int) = (void*)&a
[FN_SIZE
*(i
+0)];
84 int(*f2
)(int,int) = (void*)&a
[FN_SIZE
*(i
+1)];
85 int(*f3
)(int,int) = (void*)&a
[FN_SIZE
*(i
+2)];
86 int(*f4
)(int,int) = (void*)&a
[FN_SIZE
*(i
+3)];
87 sum1
+= f1(i
+0, n_fns
-i
+0);
88 sum2
+= f2(i
+1, n_fns
-i
+1);
89 sum3
+= f3(i
+2, n_fns
-i
+2);
90 sum4
+= f4(i
+3, n_fns
-i
+3);
95 printf("result = %d\n", sum1
+ sum2
+ sum3
+ sum4
);