Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / openmp / tools / archer / ompt-tsan.cpp
blob8b338f6b18b6e7732a3b39ae61c0a0c8a89a7e9b
1 /*
2 * ompt-tsan.cpp -- Archer runtime library, TSan annotations for Archer
3 */
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for details.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
13 #ifndef __STDC_FORMAT_MACROS
14 #define __STDC_FORMAT_MACROS
15 #endif
17 #include <algorithm>
18 #include <atomic>
19 #include <cassert>
20 #include <cstdlib>
21 #include <cstring>
22 #include <inttypes.h>
23 #include <iostream>
24 #include <list>
25 #include <mutex>
26 #include <sstream>
27 #include <string>
28 #include <sys/resource.h>
29 #include <unistd.h>
30 #include <unordered_map>
31 #include <vector>
32 #include <dlfcn.h>
34 #include "omp-tools.h"
36 // Define attribute that indicates that the fall through from the previous
37 // case label is intentional and should not be diagnosed by a compiler
38 // Code from libcxx/include/__config
39 // Use a function like macro to imply that it must be followed by a semicolon
40 #if __cplusplus > 201402L && __has_cpp_attribute(fallthrough)
41 #define KMP_FALLTHROUGH() [[fallthrough]]
42 // icc cannot properly tell this attribute is absent so force off
43 #elif defined(__INTEL_COMPILER)
44 #define KMP_FALLTHROUGH() ((void)0)
45 #elif __has_cpp_attribute(clang::fallthrough)
46 #define KMP_FALLTHROUGH() [[clang::fallthrough]]
47 #elif __has_attribute(fallthrough) || __GNUC__ >= 7
48 #define KMP_FALLTHROUGH() __attribute__((__fallthrough__))
49 #else
50 #define KMP_FALLTHROUGH() ((void)0)
51 #endif
53 static int hasReductionCallback;
55 namespace {
56 class ArcherFlags {
57 public:
58 #if (LLVM_VERSION) >= 40
59 int flush_shadow{0};
60 #endif
61 int print_max_rss{0};
62 int verbose{0};
63 int enabled{1};
64 int report_data_leak{0};
65 int ignore_serial{0};
66 std::atomic<int> all_memory{0};
68 ArcherFlags(const char *env) {
69 if (env) {
70 std::vector<std::string> tokens;
71 std::string token;
72 std::string str(env);
73 std::istringstream iss(str);
74 int tmp_int;
75 while (std::getline(iss, token, ' '))
76 tokens.push_back(token);
78 for (std::vector<std::string>::iterator it = tokens.begin();
79 it != tokens.end(); ++it) {
80 #if (LLVM_VERSION) >= 40
81 if (sscanf(it->c_str(), "flush_shadow=%d", &flush_shadow))
82 continue;
83 #endif
84 if (sscanf(it->c_str(), "print_max_rss=%d", &print_max_rss))
85 continue;
86 if (sscanf(it->c_str(), "verbose=%d", &verbose))
87 continue;
88 if (sscanf(it->c_str(), "report_data_leak=%d", &report_data_leak))
89 continue;
90 if (sscanf(it->c_str(), "enable=%d", &enabled))
91 continue;
92 if (sscanf(it->c_str(), "ignore_serial=%d", &ignore_serial))
93 continue;
94 if (sscanf(it->c_str(), "all_memory=%d", &tmp_int)) {
95 all_memory = tmp_int;
96 continue;
98 std::cerr << "Illegal values for ARCHER_OPTIONS variable: " << token
99 << std::endl;
105 class TsanFlags {
106 public:
107 int ignore_noninstrumented_modules;
109 TsanFlags(const char *env) : ignore_noninstrumented_modules(0) {
110 if (env) {
111 std::vector<std::string> tokens;
112 std::string str(env);
113 auto end = str.end();
114 auto it = str.begin();
115 auto is_sep = [](char c) {
116 return c == ' ' || c == ',' || c == ':' || c == '\n' || c == '\t' ||
117 c == '\r';
119 while (it != end) {
120 auto next_it = std::find_if(it, end, is_sep);
121 tokens.emplace_back(it, next_it);
122 it = next_it;
123 if (it != end) {
124 ++it;
128 for (const auto &token : tokens) {
129 // we are interested in ignore_noninstrumented_modules to print a
130 // warning
131 if (sscanf(token.c_str(), "ignore_noninstrumented_modules=%d",
132 &ignore_noninstrumented_modules))
133 continue;
138 } // namespace
140 #if (LLVM_VERSION) >= 40
141 extern "C" {
142 int __attribute__((weak)) __archer_get_omp_status();
143 void __attribute__((weak)) __tsan_flush_memory() {}
145 #endif
146 static ArcherFlags *archer_flags;
148 #ifndef TsanHappensBefore
149 // Thread Sanitizer is a tool that finds races in code.
150 // See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations .
151 // tsan detects these exact functions by name.
152 extern "C" {
153 static void (*AnnotateHappensAfter)(const char *, int, const volatile void *);
154 static void (*AnnotateHappensBefore)(const char *, int, const volatile void *);
155 static void (*AnnotateIgnoreWritesBegin)(const char *, int);
156 static void (*AnnotateIgnoreWritesEnd)(const char *, int);
157 static void (*AnnotateNewMemory)(const char *, int, const volatile void *,
158 size_t);
159 static void (*__tsan_func_entry)(const void *);
160 static void (*__tsan_func_exit)(void);
161 static int (*RunningOnValgrind)(void);
164 // This marker is used to define a happens-before arc. The race detector will
165 // infer an arc from the begin to the end when they share the same pointer
166 // argument.
167 #define TsanHappensBefore(cv) AnnotateHappensBefore(__FILE__, __LINE__, cv)
169 // This marker defines the destination of a happens-before arc.
170 #define TsanHappensAfter(cv) AnnotateHappensAfter(__FILE__, __LINE__, cv)
172 // Ignore any races on writes between here and the next TsanIgnoreWritesEnd.
173 #define TsanIgnoreWritesBegin() AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
175 // Resume checking for racy writes.
176 #define TsanIgnoreWritesEnd() AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
178 // We don't really delete the clock for now
179 #define TsanDeleteClock(cv)
181 // newMemory
182 #define TsanNewMemory(addr, size) \
183 AnnotateNewMemory(__FILE__, __LINE__, addr, size)
184 #define TsanFreeMemory(addr, size) \
185 AnnotateNewMemory(__FILE__, __LINE__, addr, size)
186 #endif
188 // Function entry/exit
189 #define TsanFuncEntry(pc) __tsan_func_entry(pc)
190 #define TsanFuncExit() __tsan_func_exit()
192 /// Required OMPT inquiry functions.
193 static ompt_get_parallel_info_t ompt_get_parallel_info;
194 static ompt_get_thread_data_t ompt_get_thread_data;
196 typedef char ompt_tsan_clockid;
198 static uint64_t my_next_id() {
199 static uint64_t ID = 0;
200 uint64_t ret = __sync_fetch_and_add(&ID, 1);
201 return ret;
204 static int pagesize{0};
206 // Data structure to provide a threadsafe pool of reusable objects.
207 // DataPool<Type of objects>
208 namespace {
209 template <typename T> struct DataPool final {
210 static __thread DataPool<T> *ThreadDataPool;
211 std::mutex DPMutex{};
213 // store unused objects
214 std::vector<T *> DataPointer{};
215 std::vector<T *> RemoteDataPointer{};
217 // store all allocated memory to finally release
218 std::list<void *> memory;
220 // count remotely returned data (RemoteDataPointer.size())
221 std::atomic<int> remote{0};
223 // totally allocated data objects in pool
224 int total{0};
225 #ifdef DEBUG_DATA
226 int remoteReturn{0};
227 int localReturn{0};
229 int getRemote() { return remoteReturn + remote; }
230 int getLocal() { return localReturn; }
231 #endif
232 int getTotal() { return total; }
233 int getMissing() {
234 return total - DataPointer.size() - RemoteDataPointer.size();
237 // fill the pool by allocating a page of memory
238 void newDatas() {
239 if (remote > 0) {
240 const std::lock_guard<std::mutex> lock(DPMutex);
241 // DataPointer is empty, so just swap the vectors
242 DataPointer.swap(RemoteDataPointer);
243 remote = 0;
244 return;
246 // calculate size of an object including padding to cacheline size
247 size_t elemSize = sizeof(T);
248 size_t paddedSize = (((elemSize - 1) / 64) + 1) * 64;
249 // number of padded elements to allocate
250 int ndatas = pagesize / paddedSize;
251 char *datas = (char *)malloc(ndatas * paddedSize);
252 memory.push_back(datas);
253 for (int i = 0; i < ndatas; i++) {
254 DataPointer.push_back(new (datas + i * paddedSize) T(this));
256 total += ndatas;
259 // get data from the pool
260 T *getData() {
261 T *ret;
262 if (DataPointer.empty())
263 newDatas();
264 ret = DataPointer.back();
265 DataPointer.pop_back();
266 return ret;
269 // accesses to the thread-local datapool don't need locks
270 void returnOwnData(T *data) {
271 DataPointer.emplace_back(data);
272 #ifdef DEBUG_DATA
273 localReturn++;
274 #endif
277 // returning to a remote datapool using lock
278 void returnData(T *data) {
279 const std::lock_guard<std::mutex> lock(DPMutex);
280 RemoteDataPointer.emplace_back(data);
281 remote++;
282 #ifdef DEBUG_DATA
283 remoteReturn++;
284 #endif
287 ~DataPool() {
288 // we assume all memory is returned when the thread finished / destructor is
289 // called
290 if (archer_flags->report_data_leak && getMissing() != 0) {
291 printf("ERROR: While freeing DataPool (%s) we are missing %i data "
292 "objects.\n",
293 __PRETTY_FUNCTION__, getMissing());
294 exit(-3);
296 for (auto i : DataPointer)
297 if (i)
298 i->~T();
299 for (auto i : RemoteDataPointer)
300 if (i)
301 i->~T();
302 for (auto i : memory)
303 if (i)
304 free(i);
308 template <typename T> struct DataPoolEntry {
309 DataPool<T> *owner;
311 static T *New() { return DataPool<T>::ThreadDataPool->getData(); }
313 void Delete() {
314 static_cast<T *>(this)->Reset();
315 if (owner == DataPool<T>::ThreadDataPool)
316 owner->returnOwnData(static_cast<T *>(this));
317 else
318 owner->returnData(static_cast<T *>(this));
321 DataPoolEntry(DataPool<T> *dp) : owner(dp) {}
324 struct DependencyData;
325 typedef DataPool<DependencyData> DependencyDataPool;
326 template <>
327 __thread DependencyDataPool *DependencyDataPool::ThreadDataPool = nullptr;
329 /// Data structure to store additional information for task dependency.
330 struct DependencyData final : DataPoolEntry<DependencyData> {
331 ompt_tsan_clockid in;
332 ompt_tsan_clockid out;
333 ompt_tsan_clockid inoutset;
334 void *GetInPtr() { return &in; }
335 void *GetOutPtr() { return &out; }
336 void *GetInoutsetPtr() { return &inoutset; }
338 void Reset() {}
340 static DependencyData *New() { return DataPoolEntry<DependencyData>::New(); }
342 DependencyData(DataPool<DependencyData> *dp)
343 : DataPoolEntry<DependencyData>(dp) {}
346 struct TaskDependency {
347 void *inPtr;
348 void *outPtr;
349 void *inoutsetPtr;
350 ompt_dependence_type_t type;
351 TaskDependency(DependencyData *depData, ompt_dependence_type_t type)
352 : inPtr(depData->GetInPtr()), outPtr(depData->GetOutPtr()),
353 inoutsetPtr(depData->GetInoutsetPtr()), type(type) {}
354 void AnnotateBegin() {
355 if (type == ompt_dependence_type_out ||
356 type == ompt_dependence_type_inout ||
357 type == ompt_dependence_type_mutexinoutset) {
358 TsanHappensAfter(inPtr);
359 TsanHappensAfter(outPtr);
360 TsanHappensAfter(inoutsetPtr);
361 } else if (type == ompt_dependence_type_in) {
362 TsanHappensAfter(outPtr);
363 TsanHappensAfter(inoutsetPtr);
364 } else if (type == ompt_dependence_type_inoutset) {
365 TsanHappensAfter(inPtr);
366 TsanHappensAfter(outPtr);
369 void AnnotateEnd() {
370 if (type == ompt_dependence_type_out ||
371 type == ompt_dependence_type_inout ||
372 type == ompt_dependence_type_mutexinoutset) {
373 TsanHappensBefore(outPtr);
374 } else if (type == ompt_dependence_type_in) {
375 TsanHappensBefore(inPtr);
376 } else if (type == ompt_dependence_type_inoutset) {
377 TsanHappensBefore(inoutsetPtr);
382 struct ParallelData;
383 typedef DataPool<ParallelData> ParallelDataPool;
384 template <>
385 __thread ParallelDataPool *ParallelDataPool::ThreadDataPool = nullptr;
387 /// Data structure to store additional information for parallel regions.
388 struct ParallelData final : DataPoolEntry<ParallelData> {
390 // Parallel fork is just another barrier, use Barrier[1]
392 /// Two addresses for relationships with barriers.
393 ompt_tsan_clockid Barrier[2];
395 const void *codePtr;
397 void *GetParallelPtr() { return &(Barrier[1]); }
399 void *GetBarrierPtr(unsigned Index) { return &(Barrier[Index]); }
401 ParallelData *Init(const void *codeptr) {
402 codePtr = codeptr;
403 return this;
406 void Reset() {}
408 static ParallelData *New(const void *codeptr) {
409 return DataPoolEntry<ParallelData>::New()->Init(codeptr);
412 ParallelData(DataPool<ParallelData> *dp) : DataPoolEntry<ParallelData>(dp) {}
415 static inline ParallelData *ToParallelData(ompt_data_t *parallel_data) {
416 return reinterpret_cast<ParallelData *>(parallel_data->ptr);
419 struct Taskgroup;
420 typedef DataPool<Taskgroup> TaskgroupPool;
421 template <> __thread TaskgroupPool *TaskgroupPool::ThreadDataPool = nullptr;
423 /// Data structure to support stacking of taskgroups and allow synchronization.
424 struct Taskgroup final : DataPoolEntry<Taskgroup> {
425 /// Its address is used for relationships of the taskgroup's task set.
426 ompt_tsan_clockid Ptr;
428 /// Reference to the parent taskgroup.
429 Taskgroup *Parent;
431 void *GetPtr() { return &Ptr; }
433 Taskgroup *Init(Taskgroup *parent) {
434 Parent = parent;
435 return this;
438 void Reset() {}
440 static Taskgroup *New(Taskgroup *Parent) {
441 return DataPoolEntry<Taskgroup>::New()->Init(Parent);
444 Taskgroup(DataPool<Taskgroup> *dp) : DataPoolEntry<Taskgroup>(dp) {}
447 enum ArcherTaskFlag { ArcherTaskFulfilled = 0x00010000 };
449 struct TaskData;
450 typedef DataPool<TaskData> TaskDataPool;
451 template <> __thread TaskDataPool *TaskDataPool::ThreadDataPool = nullptr;
453 /// Data structure to store additional information for tasks.
454 struct TaskData final : DataPoolEntry<TaskData> {
455 /// Its address is used for relationships of this task.
456 ompt_tsan_clockid Task{0};
458 /// Child tasks use its address to declare a relationship to a taskwait in
459 /// this task.
460 ompt_tsan_clockid Taskwait{0};
462 /// Child tasks use its address to model omp_all_memory dependencies
463 ompt_tsan_clockid AllMemory[2]{0};
465 /// Index of which barrier to use next.
466 char BarrierIndex{0};
468 /// Whether this task is currently executing a barrier.
469 bool InBarrier{false};
471 /// Whether this task is an included task.
472 int TaskType{0};
474 /// count execution phase
475 int execution{0};
477 /// Count how often this structure has been put into child tasks + 1.
478 std::atomic_int RefCount{1};
480 /// Reference to the parent that created this task.
481 TaskData *Parent{nullptr};
483 /// Reference to the team of this task.
484 ParallelData *Team{nullptr};
486 /// Reference to the current taskgroup that this task either belongs to or
487 /// that it just created.
488 Taskgroup *TaskGroup{nullptr};
490 /// Dependency information for this task.
491 TaskDependency *Dependencies{nullptr};
493 /// Number of dependency entries.
494 unsigned DependencyCount{0};
496 // The dependency-map stores DependencyData objects representing
497 // the dependency variables used on the sibling tasks created from
498 // this task
499 // We expect a rare need for the dependency-map, so alloc on demand
500 std::unordered_map<void *, DependencyData *> *DependencyMap{nullptr};
502 #ifdef DEBUG
503 int freed{0};
504 #endif
506 bool isIncluded() { return TaskType & ompt_task_undeferred; }
507 bool isUntied() { return TaskType & ompt_task_untied; }
508 bool isFinal() { return TaskType & ompt_task_final; }
509 bool isMergable() { return TaskType & ompt_task_mergeable; }
510 bool isMerged() { return TaskType & ompt_task_merged; }
512 bool isExplicit() { return TaskType & ompt_task_explicit; }
513 bool isImplicit() { return TaskType & ompt_task_implicit; }
514 bool isInitial() { return TaskType & ompt_task_initial; }
515 bool isTarget() { return TaskType & ompt_task_target; }
517 bool isFulfilled() { return TaskType & ArcherTaskFulfilled; }
518 void setFulfilled() { TaskType |= ArcherTaskFulfilled; }
520 void setAllMemoryDep() { AllMemory[0] = 1; }
521 bool hasAllMemoryDep() { return AllMemory[0]; }
523 void *GetTaskPtr() { return &Task; }
525 void *GetTaskwaitPtr() { return &Taskwait; }
527 void *GetLastAllMemoryPtr() { return AllMemory; }
528 void *GetNextAllMemoryPtr() { return AllMemory + 1; }
530 TaskData *Init(TaskData *parent, int taskType) {
531 TaskType = taskType;
532 Parent = parent;
533 Team = Parent->Team;
534 BarrierIndex = Parent->BarrierIndex;
535 if (Parent != nullptr) {
536 Parent->RefCount++;
537 // Copy over pointer to taskgroup. This task may set up its own stack
538 // but for now belongs to its parent's taskgroup.
539 TaskGroup = Parent->TaskGroup;
541 return this;
544 TaskData *Init(ParallelData *team, int taskType) {
545 TaskType = taskType;
546 execution = 1;
547 Team = team;
548 return this;
551 void Reset() {
552 InBarrier = false;
553 TaskType = 0;
554 execution = 0;
555 BarrierIndex = 0;
556 RefCount = 1;
557 Parent = nullptr;
558 Team = nullptr;
559 TaskGroup = nullptr;
560 if (DependencyMap) {
561 for (auto i : *DependencyMap)
562 i.second->Delete();
563 delete DependencyMap;
565 DependencyMap = nullptr;
566 if (Dependencies)
567 free(Dependencies);
568 Dependencies = nullptr;
569 DependencyCount = 0;
570 #ifdef DEBUG
571 freed = 0;
572 #endif
575 static TaskData *New(TaskData *parent, int taskType) {
576 return DataPoolEntry<TaskData>::New()->Init(parent, taskType);
579 static TaskData *New(ParallelData *team, int taskType) {
580 return DataPoolEntry<TaskData>::New()->Init(team, taskType);
583 TaskData(DataPool<TaskData> *dp) : DataPoolEntry<TaskData>(dp) {}
585 } // namespace
587 static inline TaskData *ToTaskData(ompt_data_t *task_data) {
588 if (task_data)
589 return reinterpret_cast<TaskData *>(task_data->ptr);
590 return nullptr;
593 /// Store a mutex for each wait_id to resolve race condition with callbacks.
594 static std::unordered_map<ompt_wait_id_t, std::mutex> Locks;
595 static std::mutex LocksMutex;
597 static void ompt_tsan_thread_begin(ompt_thread_t thread_type,
598 ompt_data_t *thread_data) {
599 ParallelDataPool::ThreadDataPool = new ParallelDataPool;
600 TsanNewMemory(ParallelDataPool::ThreadDataPool,
601 sizeof(ParallelDataPool::ThreadDataPool));
602 TaskgroupPool::ThreadDataPool = new TaskgroupPool;
603 TsanNewMemory(TaskgroupPool::ThreadDataPool,
604 sizeof(TaskgroupPool::ThreadDataPool));
605 TaskDataPool::ThreadDataPool = new TaskDataPool;
606 TsanNewMemory(TaskDataPool::ThreadDataPool,
607 sizeof(TaskDataPool::ThreadDataPool));
608 DependencyDataPool::ThreadDataPool = new DependencyDataPool;
609 TsanNewMemory(DependencyDataPool::ThreadDataPool,
610 sizeof(DependencyDataPool::ThreadDataPool));
611 thread_data->value = my_next_id();
614 static void ompt_tsan_thread_end(ompt_data_t *thread_data) {
615 TsanIgnoreWritesBegin();
616 delete ParallelDataPool::ThreadDataPool;
617 delete TaskgroupPool::ThreadDataPool;
618 delete TaskDataPool::ThreadDataPool;
619 delete DependencyDataPool::ThreadDataPool;
620 TsanIgnoreWritesEnd();
623 /// OMPT event callbacks for handling parallel regions.
625 static void ompt_tsan_parallel_begin(ompt_data_t *parent_task_data,
626 const ompt_frame_t *parent_task_frame,
627 ompt_data_t *parallel_data,
628 uint32_t requested_team_size, int flag,
629 const void *codeptr_ra) {
630 ParallelData *Data = ParallelData::New(codeptr_ra);
631 parallel_data->ptr = Data;
633 TsanHappensBefore(Data->GetParallelPtr());
634 if (archer_flags->ignore_serial && ToTaskData(parent_task_data)->isInitial())
635 TsanIgnoreWritesEnd();
638 static void ompt_tsan_parallel_end(ompt_data_t *parallel_data,
639 ompt_data_t *task_data, int flag,
640 const void *codeptr_ra) {
641 if (archer_flags->ignore_serial && ToTaskData(task_data)->isInitial())
642 TsanIgnoreWritesBegin();
643 ParallelData *Data = ToParallelData(parallel_data);
644 TsanHappensAfter(Data->GetBarrierPtr(0));
645 TsanHappensAfter(Data->GetBarrierPtr(1));
647 Data->Delete();
649 #if (LLVM_VERSION >= 40)
650 if (&__archer_get_omp_status) {
651 if (__archer_get_omp_status() == 0 && archer_flags->flush_shadow)
652 __tsan_flush_memory();
654 #endif
657 static void ompt_tsan_implicit_task(ompt_scope_endpoint_t endpoint,
658 ompt_data_t *parallel_data,
659 ompt_data_t *task_data,
660 unsigned int team_size,
661 unsigned int thread_num, int type) {
662 switch (endpoint) {
663 case ompt_scope_begin:
664 if (type & ompt_task_initial) {
665 parallel_data->ptr = ParallelData::New(nullptr);
667 task_data->ptr = TaskData::New(ToParallelData(parallel_data), type);
668 TsanHappensAfter(ToParallelData(parallel_data)->GetParallelPtr());
669 TsanFuncEntry(ToParallelData(parallel_data)->codePtr);
670 break;
671 case ompt_scope_end: {
672 TaskData *Data = ToTaskData(task_data);
673 #ifdef DEBUG
674 assert(Data->freed == 0 && "Implicit task end should only be called once!");
675 Data->freed = 1;
676 #endif
677 assert(Data->RefCount == 1 &&
678 "All tasks should have finished at the implicit barrier!");
679 if (type & ompt_task_initial) {
680 Data->Team->Delete();
682 Data->Delete();
683 TsanFuncExit();
684 break;
686 case ompt_scope_beginend:
687 // Should not occur according to OpenMP 5.1
688 // Tested in OMPT tests
689 break;
693 static void ompt_tsan_sync_region(ompt_sync_region_t kind,
694 ompt_scope_endpoint_t endpoint,
695 ompt_data_t *parallel_data,
696 ompt_data_t *task_data,
697 const void *codeptr_ra) {
698 TaskData *Data = ToTaskData(task_data);
699 switch (endpoint) {
700 case ompt_scope_begin:
701 case ompt_scope_beginend:
702 TsanFuncEntry(codeptr_ra);
703 switch (kind) {
704 case ompt_sync_region_barrier_implementation:
705 case ompt_sync_region_barrier_implicit:
706 case ompt_sync_region_barrier_explicit:
707 case ompt_sync_region_barrier_implicit_parallel:
708 case ompt_sync_region_barrier_implicit_workshare:
709 case ompt_sync_region_barrier_teams:
710 case ompt_sync_region_barrier: {
711 char BarrierIndex = Data->BarrierIndex;
712 TsanHappensBefore(Data->Team->GetBarrierPtr(BarrierIndex));
714 if (hasReductionCallback < ompt_set_always) {
715 // We ignore writes inside the barrier. These would either occur during
716 // 1. reductions performed by the runtime which are guaranteed to be
717 // race-free.
718 // 2. execution of another task.
719 // For the latter case we will re-enable tracking in task_switch.
720 Data->InBarrier = true;
721 TsanIgnoreWritesBegin();
724 break;
727 case ompt_sync_region_taskwait:
728 break;
730 case ompt_sync_region_taskgroup:
731 Data->TaskGroup = Taskgroup::New(Data->TaskGroup);
732 break;
734 case ompt_sync_region_reduction:
735 // should never be reached
736 break;
738 if (endpoint == ompt_scope_begin)
739 break;
740 KMP_FALLTHROUGH();
741 case ompt_scope_end:
742 TsanFuncExit();
743 switch (kind) {
744 case ompt_sync_region_barrier_implementation:
745 case ompt_sync_region_barrier_implicit:
746 case ompt_sync_region_barrier_explicit:
747 case ompt_sync_region_barrier_implicit_parallel:
748 case ompt_sync_region_barrier_implicit_workshare:
749 case ompt_sync_region_barrier_teams:
750 case ompt_sync_region_barrier: {
751 if (hasReductionCallback < ompt_set_always) {
752 // We want to track writes after the barrier again.
753 Data->InBarrier = false;
754 TsanIgnoreWritesEnd();
757 char BarrierIndex = Data->BarrierIndex;
758 // Barrier will end after it has been entered by all threads.
759 if (parallel_data)
760 TsanHappensAfter(Data->Team->GetBarrierPtr(BarrierIndex));
762 // It is not guaranteed that all threads have exited this barrier before
763 // we enter the next one. So we will use a different address.
764 // We are however guaranteed that this current barrier is finished
765 // by the time we exit the next one. So we can then reuse the first
766 // address.
767 Data->BarrierIndex = (BarrierIndex + 1) % 2;
768 break;
771 case ompt_sync_region_taskwait: {
772 if (Data->execution > 1)
773 TsanHappensAfter(Data->GetTaskwaitPtr());
774 break;
777 case ompt_sync_region_taskgroup: {
778 assert(Data->TaskGroup != nullptr &&
779 "Should have at least one taskgroup!");
781 TsanHappensAfter(Data->TaskGroup->GetPtr());
783 // Delete this allocated taskgroup, all descendent task are finished by
784 // now.
785 Taskgroup *Parent = Data->TaskGroup->Parent;
786 Data->TaskGroup->Delete();
787 Data->TaskGroup = Parent;
788 break;
791 case ompt_sync_region_reduction:
792 // Should not occur according to OpenMP 5.1
793 // Tested in OMPT tests
794 break;
796 break;
800 static void ompt_tsan_reduction(ompt_sync_region_t kind,
801 ompt_scope_endpoint_t endpoint,
802 ompt_data_t *parallel_data,
803 ompt_data_t *task_data,
804 const void *codeptr_ra) {
805 switch (endpoint) {
806 case ompt_scope_begin:
807 switch (kind) {
808 case ompt_sync_region_reduction:
809 TsanIgnoreWritesBegin();
810 break;
811 default:
812 break;
814 break;
815 case ompt_scope_end:
816 switch (kind) {
817 case ompt_sync_region_reduction:
818 TsanIgnoreWritesEnd();
819 break;
820 default:
821 break;
823 break;
824 case ompt_scope_beginend:
825 // Should not occur according to OpenMP 5.1
826 // Tested in OMPT tests
827 // Would have no implications for DR detection
828 break;
832 /// OMPT event callbacks for handling tasks.
834 static void ompt_tsan_task_create(
835 ompt_data_t *parent_task_data, /* id of parent task */
836 const ompt_frame_t *parent_frame, /* frame data for parent task */
837 ompt_data_t *new_task_data, /* id of created task */
838 int type, int has_dependences,
839 const void *codeptr_ra) /* pointer to outlined function */
841 TaskData *Data;
842 assert(new_task_data->ptr == NULL &&
843 "Task data should be initialized to NULL");
844 if (type & ompt_task_initial) {
845 ompt_data_t *parallel_data;
846 int team_size = 1;
847 ompt_get_parallel_info(0, &parallel_data, &team_size);
848 ParallelData *PData = ParallelData::New(nullptr);
849 parallel_data->ptr = PData;
851 Data = TaskData::New(PData, type);
852 new_task_data->ptr = Data;
853 } else if (type & ompt_task_undeferred) {
854 Data = TaskData::New(ToTaskData(parent_task_data), type);
855 new_task_data->ptr = Data;
856 } else if (type & ompt_task_explicit || type & ompt_task_target) {
857 Data = TaskData::New(ToTaskData(parent_task_data), type);
858 new_task_data->ptr = Data;
860 // Use the newly created address. We cannot use a single address from the
861 // parent because that would declare wrong relationships with other
862 // sibling tasks that may be created before this task is started!
863 TsanHappensBefore(Data->GetTaskPtr());
864 ToTaskData(parent_task_data)->execution++;
868 static void freeTask(TaskData *task) {
869 while (task != nullptr && --task->RefCount == 0) {
870 TaskData *Parent = task->Parent;
871 task->Delete();
872 task = Parent;
876 // LastAllMemoryPtr marks the beginning of an all_memory epoch
877 // NextAllMemoryPtr marks the end of an all_memory epoch
878 // All tasks with depend begin execution after LastAllMemoryPtr
879 // and end before NextAllMemoryPtr
880 static void releaseDependencies(TaskData *task) {
881 if (archer_flags->all_memory) {
882 if (task->hasAllMemoryDep()) {
883 TsanHappensBefore(task->Parent->GetLastAllMemoryPtr());
884 TsanHappensBefore(task->Parent->GetNextAllMemoryPtr());
885 } else if (task->DependencyCount)
886 TsanHappensBefore(task->Parent->GetNextAllMemoryPtr());
888 for (unsigned i = 0; i < task->DependencyCount; i++) {
889 task->Dependencies[i].AnnotateEnd();
893 static void acquireDependencies(TaskData *task) {
894 if (archer_flags->all_memory) {
895 if (task->hasAllMemoryDep())
896 TsanHappensAfter(task->Parent->GetNextAllMemoryPtr());
897 else if (task->DependencyCount)
898 TsanHappensAfter(task->Parent->GetLastAllMemoryPtr());
900 for (unsigned i = 0; i < task->DependencyCount; i++) {
901 task->Dependencies[i].AnnotateBegin();
905 static void completeTask(TaskData *FromTask) {
906 if (!FromTask)
907 return;
908 // Task-end happens after a possible omp_fulfill_event call
909 if (FromTask->isFulfilled())
910 TsanHappensAfter(FromTask->GetTaskPtr());
911 // Included tasks are executed sequentially, no need to track
912 // synchronization
913 if (!FromTask->isIncluded()) {
914 // Task will finish before a barrier in the surrounding parallel region
915 // ...
916 ParallelData *PData = FromTask->Team;
917 TsanHappensBefore(PData->GetBarrierPtr(FromTask->BarrierIndex));
919 // ... and before an eventual taskwait by the parent thread.
920 TsanHappensBefore(FromTask->Parent->GetTaskwaitPtr());
922 if (FromTask->TaskGroup != nullptr) {
923 // This task is part of a taskgroup, so it will finish before the
924 // corresponding taskgroup_end.
925 TsanHappensBefore(FromTask->TaskGroup->GetPtr());
928 // release dependencies
929 releaseDependencies(FromTask);
932 static void suspendTask(TaskData *FromTask) {
933 if (!FromTask)
934 return;
935 // Task may be resumed at a later point in time.
936 TsanHappensBefore(FromTask->GetTaskPtr());
939 static void switchTasks(TaskData *FromTask, TaskData *ToTask) {
940 // Legacy handling for missing reduction callback
941 if (hasReductionCallback < ompt_set_always) {
942 if (FromTask && FromTask->InBarrier) {
943 // We want to ignore writes in the runtime code during barriers,
944 // but not when executing tasks with user code!
945 TsanIgnoreWritesEnd();
947 if (ToTask && ToTask->InBarrier) {
948 // We want to ignore writes in the runtime code during barriers,
949 // but not when executing tasks with user code!
950 TsanIgnoreWritesBegin();
953 //// Not yet used
954 // if (FromTask)
955 // FromTask->deactivate();
956 // if (ToTask)
957 // ToTask->activate();
960 static void endTask(TaskData *FromTask) {
961 if (!FromTask)
962 return;
965 static void startTask(TaskData *ToTask) {
966 if (!ToTask)
967 return;
968 // Handle dependencies on first execution of the task
969 if (ToTask->execution == 0) {
970 ToTask->execution++;
971 acquireDependencies(ToTask);
973 // 1. Task will begin execution after it has been created.
974 // 2. Task will resume after it has been switched away.
975 TsanHappensAfter(ToTask->GetTaskPtr());
978 static void ompt_tsan_task_schedule(ompt_data_t *first_task_data,
979 ompt_task_status_t prior_task_status,
980 ompt_data_t *second_task_data) {
983 // The necessary action depends on prior_task_status:
985 // ompt_task_early_fulfill = 5,
986 // -> ignored
988 // ompt_task_late_fulfill = 6,
989 // -> first completed, first freed, second ignored
991 // ompt_task_complete = 1,
992 // ompt_task_cancel = 3,
993 // -> first completed, first freed, second starts
995 // ompt_taskwait_complete = 8,
996 // -> first starts, first completes, first freed, second ignored
998 // ompt_task_detach = 4,
999 // ompt_task_yield = 2,
1000 // ompt_task_switch = 7
1001 // -> first suspended, second starts
1004 TaskData *FromTask = ToTaskData(first_task_data);
1005 TaskData *ToTask = ToTaskData(second_task_data);
1007 switch (prior_task_status) {
1008 case ompt_task_early_fulfill:
1009 TsanHappensBefore(FromTask->GetTaskPtr());
1010 FromTask->setFulfilled();
1011 return;
1012 case ompt_task_late_fulfill:
1013 TsanHappensAfter(FromTask->GetTaskPtr());
1014 completeTask(FromTask);
1015 freeTask(FromTask);
1016 return;
1017 case ompt_taskwait_complete:
1018 acquireDependencies(FromTask);
1019 freeTask(FromTask);
1020 return;
1021 case ompt_task_complete:
1022 completeTask(FromTask);
1023 endTask(FromTask);
1024 switchTasks(FromTask, ToTask);
1025 freeTask(FromTask);
1026 return;
1027 case ompt_task_cancel:
1028 completeTask(FromTask);
1029 endTask(FromTask);
1030 switchTasks(FromTask, ToTask);
1031 freeTask(FromTask);
1032 startTask(ToTask);
1033 return;
1034 case ompt_task_detach:
1035 endTask(FromTask);
1036 suspendTask(FromTask);
1037 switchTasks(FromTask, ToTask);
1038 startTask(ToTask);
1039 return;
1040 case ompt_task_yield:
1041 suspendTask(FromTask);
1042 switchTasks(FromTask, ToTask);
1043 startTask(ToTask);
1044 return;
1045 case ompt_task_switch:
1046 suspendTask(FromTask);
1047 switchTasks(FromTask, ToTask);
1048 startTask(ToTask);
1049 return;
1053 static void ompt_tsan_dependences(ompt_data_t *task_data,
1054 const ompt_dependence_t *deps, int ndeps) {
1055 if (ndeps > 0) {
1056 // Copy the data to use it in task_switch and task_end.
1057 TaskData *Data = ToTaskData(task_data);
1058 if (!Data->Parent) {
1059 // Return since doacross dependences are not supported yet.
1060 return;
1062 if (!Data->Parent->DependencyMap)
1063 Data->Parent->DependencyMap =
1064 new std::unordered_map<void *, DependencyData *>();
1065 Data->Dependencies =
1066 (TaskDependency *)malloc(sizeof(TaskDependency) * ndeps);
1067 Data->DependencyCount = ndeps;
1068 for (int i = 0, d = 0; i < ndeps; i++, d++) {
1069 if (deps[i].dependence_type == ompt_dependence_type_out_all_memory ||
1070 deps[i].dependence_type == ompt_dependence_type_inout_all_memory) {
1071 Data->setAllMemoryDep();
1072 Data->DependencyCount--;
1073 if (!archer_flags->all_memory) {
1074 printf("The application uses omp_all_memory, but Archer was\n"
1075 "started to not consider omp_all_memory. This can lead\n"
1076 "to false data race alerts.\n"
1077 "Include all_memory=1 in ARCHER_OPTIONS to consider\n"
1078 "omp_all_memory from the beginning.\n");
1079 archer_flags->all_memory = 1;
1081 d--;
1082 continue;
1084 auto ret = Data->Parent->DependencyMap->insert(
1085 std::make_pair(deps[i].variable.ptr, nullptr));
1086 if (ret.second) {
1087 ret.first->second = DependencyData::New();
1089 new ((void *)(Data->Dependencies + d))
1090 TaskDependency(ret.first->second, deps[i].dependence_type);
1093 // This callback is executed before this task is first started.
1094 TsanHappensBefore(Data->GetTaskPtr());
1098 /// OMPT event callbacks for handling locking.
1099 static void ompt_tsan_mutex_acquired(ompt_mutex_t kind, ompt_wait_id_t wait_id,
1100 const void *codeptr_ra) {
1102 // Acquire our own lock to make sure that
1103 // 1. the previous release has finished.
1104 // 2. the next acquire doesn't start before we have finished our release.
1105 LocksMutex.lock();
1106 std::mutex &Lock = Locks[wait_id];
1107 LocksMutex.unlock();
1109 Lock.lock();
1110 TsanHappensAfter(&Lock);
1113 static void ompt_tsan_mutex_released(ompt_mutex_t kind, ompt_wait_id_t wait_id,
1114 const void *codeptr_ra) {
1115 LocksMutex.lock();
1116 std::mutex &Lock = Locks[wait_id];
1117 LocksMutex.unlock();
1118 TsanHappensBefore(&Lock);
1120 Lock.unlock();
1123 // callback , signature , variable to store result , required support level
1124 #define SET_OPTIONAL_CALLBACK_T(event, type, result, level) \
1125 do { \
1126 ompt_callback_##type##_t tsan_##event = &ompt_tsan_##event; \
1127 result = ompt_set_callback(ompt_callback_##event, \
1128 (ompt_callback_t)tsan_##event); \
1129 if (result < level) \
1130 printf("Registered callback '" #event "' is not supported at " #level \
1131 " (%i)\n", \
1132 result); \
1133 } while (0)
1135 #define SET_CALLBACK_T(event, type) \
1136 do { \
1137 int res; \
1138 SET_OPTIONAL_CALLBACK_T(event, type, res, ompt_set_always); \
1139 } while (0)
1141 #define SET_CALLBACK(event) SET_CALLBACK_T(event, event)
1143 #define findTsanFunction(f, fSig) \
1144 do { \
1145 if (NULL == (f = fSig dlsym(RTLD_DEFAULT, #f))) \
1146 printf("Unable to find TSan function " #f ".\n"); \
1147 } while (0)
1149 #define findTsanFunctionSilent(f, fSig) f = fSig dlsym(RTLD_DEFAULT, #f)
1151 static int ompt_tsan_initialize(ompt_function_lookup_t lookup, int device_num,
1152 ompt_data_t *tool_data) {
1153 const char *options = getenv("TSAN_OPTIONS");
1154 TsanFlags tsan_flags(options);
1156 ompt_set_callback_t ompt_set_callback =
1157 (ompt_set_callback_t)lookup("ompt_set_callback");
1158 if (ompt_set_callback == NULL) {
1159 std::cerr << "Could not set callback, exiting..." << std::endl;
1160 std::exit(1);
1162 ompt_get_parallel_info =
1163 (ompt_get_parallel_info_t)lookup("ompt_get_parallel_info");
1164 ompt_get_thread_data = (ompt_get_thread_data_t)lookup("ompt_get_thread_data");
1166 if (ompt_get_parallel_info == NULL) {
1167 fprintf(stderr, "Could not get inquiry function 'ompt_get_parallel_info', "
1168 "exiting...\n");
1169 exit(1);
1172 findTsanFunction(AnnotateHappensAfter,
1173 (void (*)(const char *, int, const volatile void *)));
1174 findTsanFunction(AnnotateHappensBefore,
1175 (void (*)(const char *, int, const volatile void *)));
1176 findTsanFunction(AnnotateIgnoreWritesBegin, (void (*)(const char *, int)));
1177 findTsanFunction(AnnotateIgnoreWritesEnd, (void (*)(const char *, int)));
1178 findTsanFunction(
1179 AnnotateNewMemory,
1180 (void (*)(const char *, int, const volatile void *, size_t)));
1181 findTsanFunction(__tsan_func_entry, (void (*)(const void *)));
1182 findTsanFunction(__tsan_func_exit, (void (*)(void)));
1184 SET_CALLBACK(thread_begin);
1185 SET_CALLBACK(thread_end);
1186 SET_CALLBACK(parallel_begin);
1187 SET_CALLBACK(implicit_task);
1188 SET_CALLBACK(sync_region);
1189 SET_CALLBACK(parallel_end);
1191 SET_CALLBACK(task_create);
1192 SET_CALLBACK(task_schedule);
1193 SET_CALLBACK(dependences);
1195 SET_CALLBACK_T(mutex_acquired, mutex);
1196 SET_CALLBACK_T(mutex_released, mutex);
1197 SET_OPTIONAL_CALLBACK_T(reduction, sync_region, hasReductionCallback,
1198 ompt_set_never);
1200 if (!tsan_flags.ignore_noninstrumented_modules)
1201 fprintf(stderr,
1202 "Warning: please export "
1203 "TSAN_OPTIONS='ignore_noninstrumented_modules=1' "
1204 "to avoid false positive reports from the OpenMP runtime!\n");
1205 if (archer_flags->ignore_serial)
1206 TsanIgnoreWritesBegin();
1208 return 1; // success
1211 static void ompt_tsan_finalize(ompt_data_t *tool_data) {
1212 if (archer_flags->ignore_serial)
1213 TsanIgnoreWritesEnd();
1214 if (archer_flags->print_max_rss) {
1215 struct rusage end;
1216 getrusage(RUSAGE_SELF, &end);
1217 printf("MAX RSS[KBytes] during execution: %ld\n", end.ru_maxrss);
1220 if (archer_flags)
1221 delete archer_flags;
1224 extern "C" ompt_start_tool_result_t *
1225 ompt_start_tool(unsigned int omp_version, const char *runtime_version) {
1226 const char *options = getenv("ARCHER_OPTIONS");
1227 archer_flags = new ArcherFlags(options);
1228 if (!archer_flags->enabled) {
1229 if (archer_flags->verbose)
1230 std::cout << "Archer disabled, stopping operation" << std::endl;
1231 delete archer_flags;
1232 return NULL;
1235 pagesize = getpagesize();
1237 static ompt_start_tool_result_t ompt_start_tool_result = {
1238 &ompt_tsan_initialize, &ompt_tsan_finalize, {0}};
1240 // The OMPT start-up code uses dlopen with RTLD_LAZY. Therefore, we cannot
1241 // rely on dlopen to fail if TSan is missing, but would get a runtime error
1242 // for the first TSan call. We use RunningOnValgrind to detect whether
1243 // an implementation of the Annotation interface is available in the
1244 // execution or disable the tool (by returning NULL).
1246 findTsanFunctionSilent(RunningOnValgrind, (int (*)(void)));
1247 if (!RunningOnValgrind) // if we are not running on TSAN, give a different
1248 // tool the chance to be loaded
1250 if (archer_flags->verbose)
1251 std::cout << "Archer detected OpenMP application without TSan "
1252 "stopping operation"
1253 << std::endl;
1254 delete archer_flags;
1255 return NULL;
1258 if (archer_flags->verbose)
1259 std::cout << "Archer detected OpenMP application with TSan, supplying "
1260 "OpenMP synchronization semantics"
1261 << std::endl;
1262 return &ompt_start_tool_result;