1 // Mini-benchmark for tsan VTS worst case performance
3 // 1) Spawn M + N threads (M >> N)
4 // We'll call the 'M' threads as 'garbage threads'.
5 // 2) Make sure all threads have created thus no TIDs were reused
6 // 3) Join the garbage threads
7 // 4) Do many sync operations on the remaining N threads
9 // It turns out that due to O(M+N) VTS complexity the (4) is much slower with
13 // a) clang++ native O1 with n_iterations=200kk takes
15 // clang++ tsanv2 O1 with n_iterations=20kk takes
18 // i.e. tsanv2 is ~23x to ~47x slower than native, depends on M.
19 // b) g++ native O1 with n_iterations=200kk takes
20 // 5.5s regardless of M
21 // g++ tsanv1 O1 with n_iterations=2kk takes
24 // i.e. tsanv1 is ~370x to ~720x slower than native, depends on M.
31 class __attribute__((aligned(64))) Mutex
{
33 Mutex() { pthread_mutex_init(&m_
, NULL
); }
34 ~Mutex() { pthread_mutex_destroy(&m_
); }
35 void Lock() { pthread_mutex_lock(&m_
); }
36 void Unlock() { pthread_mutex_unlock(&m_
); }
42 const int kNumMutexes
= 1024;
43 Mutex mutexes
[kNumMutexes
];
45 int n_threads
, n_iterations
;
47 pthread_barrier_t all_threads_ready
, main_threads_ready
;
49 void* GarbageThread(void *unused
) {
50 pthread_barrier_wait(&all_threads_ready
);
54 void *Thread(void *arg
) {
56 pthread_barrier_wait(&all_threads_ready
);
58 // Wait for the main thread to join the garbage threads.
59 pthread_barrier_wait(&main_threads_ready
);
61 printf("Thread %ld go!\n", idx
);
62 int offset
= idx
* kNumMutexes
/ n_threads
;
63 for (int i
= 0; i
< n_iterations
; i
++) {
64 mutexes
[(offset
+ i
) % kNumMutexes
].Lock();
65 mutexes
[(offset
+ i
) % kNumMutexes
].Unlock();
67 printf("Thread %ld done\n", idx
);
71 int main(int argc
, char **argv
) {
72 int n_garbage_threads
;
75 n_garbage_threads
= 200;
76 n_iterations
= 20000000;
77 } else if (argc
== 4) {
78 n_threads
= atoi(argv
[1]);
79 assert(n_threads
> 0 && n_threads
<= 32);
80 n_garbage_threads
= atoi(argv
[2]);
81 assert(n_garbage_threads
> 0 && n_garbage_threads
<= 16000);
82 n_iterations
= atoi(argv
[3]);
84 printf("Usage: %s n_threads n_garbage_threads n_iterations\n", argv
[0]);
87 printf("%s: n_threads=%d n_garbage_threads=%d n_iterations=%d\n",
88 __FILE__
, n_threads
, n_garbage_threads
, n_iterations
);
90 pthread_barrier_init(&all_threads_ready
, NULL
, n_garbage_threads
+ n_threads
+ 1);
91 pthread_barrier_init(&main_threads_ready
, NULL
, n_threads
+ 1);
93 pthread_t
*t
= new pthread_t
[n_threads
];
95 pthread_t
*g_t
= new pthread_t
[n_garbage_threads
];
96 for (int i
= 0; i
< n_garbage_threads
; i
++) {
97 int status
= pthread_create(&g_t
[i
], 0, GarbageThread
, NULL
);
100 for (int i
= 0; i
< n_threads
; i
++) {
101 int status
= pthread_create(&t
[i
], 0, Thread
, (void*)i
);
104 pthread_barrier_wait(&all_threads_ready
);
105 printf("All threads started! Killing the garbage threads.\n");
106 for (int i
= 0; i
< n_garbage_threads
; i
++) {
107 pthread_join(g_t
[i
], 0);
111 printf("Resuming the main threads.\n");
112 pthread_barrier_wait(&main_threads_ready
);
115 for (int i
= 0; i
< n_threads
; i
++) {
116 pthread_join(t
[i
], 0);