2 * ompt-tsan.cpp -- Archer runtime library, TSan annotations for Archer
5 //===----------------------------------------------------------------------===//
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for details.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
13 #ifndef __STDC_FORMAT_MACROS
14 #define __STDC_FORMAT_MACROS
29 #include <sys/resource.h>
31 #include <unordered_map>
34 #include "omp-tools.h"
36 // Define attribute that indicates that the fall through from the previous
37 // case label is intentional and should not be diagnosed by a compiler
38 // Code from libcxx/include/__config
39 // Use a function like macro to imply that it must be followed by a semicolon
40 #if __cplusplus > 201402L && __has_cpp_attribute(fallthrough)
41 #define KMP_FALLTHROUGH() [[fallthrough]]
42 // icc cannot properly tell this attribute is absent so force off
43 #elif defined(__INTEL_COMPILER)
44 #define KMP_FALLTHROUGH() ((void)0)
45 #elif __has_cpp_attribute(clang::fallthrough)
46 #define KMP_FALLTHROUGH() [[clang::fallthrough]]
47 #elif __has_attribute(fallthrough) || __GNUC__ >= 7
48 #define KMP_FALLTHROUGH() __attribute__((__fallthrough__))
50 #define KMP_FALLTHROUGH() ((void)0)
53 static int hasReductionCallback
;
58 #if (LLVM_VERSION) >= 40
64 int report_data_leak
{0};
66 std::atomic
<int> all_memory
{0};
68 ArcherFlags(const char *env
) {
70 std::vector
<std::string
> tokens
;
73 std::istringstream
iss(str
);
75 while (std::getline(iss
, token
, ' '))
76 tokens
.push_back(token
);
78 for (std::vector
<std::string
>::iterator it
= tokens
.begin();
79 it
!= tokens
.end(); ++it
) {
80 #if (LLVM_VERSION) >= 40
81 if (sscanf(it
->c_str(), "flush_shadow=%d", &flush_shadow
))
84 if (sscanf(it
->c_str(), "print_max_rss=%d", &print_max_rss
))
86 if (sscanf(it
->c_str(), "verbose=%d", &verbose
))
88 if (sscanf(it
->c_str(), "report_data_leak=%d", &report_data_leak
))
90 if (sscanf(it
->c_str(), "enable=%d", &enabled
))
92 if (sscanf(it
->c_str(), "ignore_serial=%d", &ignore_serial
))
94 if (sscanf(it
->c_str(), "all_memory=%d", &tmp_int
)) {
98 std::cerr
<< "Illegal values for ARCHER_OPTIONS variable: " << token
107 int ignore_noninstrumented_modules
;
109 TsanFlags(const char *env
) : ignore_noninstrumented_modules(0) {
111 std::vector
<std::string
> tokens
;
112 std::string
str(env
);
113 auto end
= str
.end();
114 auto it
= str
.begin();
115 auto is_sep
= [](char c
) {
116 return c
== ' ' || c
== ',' || c
== ':' || c
== '\n' || c
== '\t' ||
120 auto next_it
= std::find_if(it
, end
, is_sep
);
121 tokens
.emplace_back(it
, next_it
);
128 for (const auto &token
: tokens
) {
129 // we are interested in ignore_noninstrumented_modules to print a
131 if (sscanf(token
.c_str(), "ignore_noninstrumented_modules=%d",
132 &ignore_noninstrumented_modules
))
140 #if (LLVM_VERSION) >= 40
142 int __attribute__((weak
)) __archer_get_omp_status();
143 void __attribute__((weak
)) __tsan_flush_memory() {}
146 static ArcherFlags
*archer_flags
;
148 #ifndef TsanHappensBefore
150 template <typename
... Args
> static void __ompt_tsan_func(Args
...) {}
152 #define DECLARE_TSAN_FUNCTION(name, ...) \
153 static void (*name)(__VA_ARGS__) = __ompt_tsan_func<__VA_ARGS__>;
155 // Thread Sanitizer is a tool that finds races in code.
156 // See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations .
157 // tsan detects these exact functions by name.
159 DECLARE_TSAN_FUNCTION(AnnotateHappensAfter
, const char *, int,
160 const volatile void *)
161 DECLARE_TSAN_FUNCTION(AnnotateHappensBefore
, const char *, int,
162 const volatile void *)
163 DECLARE_TSAN_FUNCTION(AnnotateIgnoreWritesBegin
, const char *, int)
164 DECLARE_TSAN_FUNCTION(AnnotateIgnoreWritesEnd
, const char *, int)
165 DECLARE_TSAN_FUNCTION(AnnotateNewMemory
, const char *, int,
166 const volatile void *, size_t)
167 DECLARE_TSAN_FUNCTION(__tsan_func_entry
, const void *)
168 DECLARE_TSAN_FUNCTION(__tsan_func_exit
)
170 // RunningOnValgrind is used to detect absence of TSan and must intentionally be a nullptr.
171 static int (*RunningOnValgrind
)(void);
174 // This marker is used to define a happens-before arc. The race detector will
175 // infer an arc from the begin to the end when they share the same pointer
177 #define TsanHappensBefore(cv) AnnotateHappensBefore(__FILE__, __LINE__, cv)
179 // This marker defines the destination of a happens-before arc.
180 #define TsanHappensAfter(cv) AnnotateHappensAfter(__FILE__, __LINE__, cv)
182 // Ignore any races on writes between here and the next TsanIgnoreWritesEnd.
183 #define TsanIgnoreWritesBegin() AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
185 // Resume checking for racy writes.
186 #define TsanIgnoreWritesEnd() AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
188 // We don't really delete the clock for now
189 #define TsanDeleteClock(cv)
192 #define TsanNewMemory(addr, size) \
193 AnnotateNewMemory(__FILE__, __LINE__, addr, size)
194 #define TsanFreeMemory(addr, size) \
195 AnnotateNewMemory(__FILE__, __LINE__, addr, size)
198 // Function entry/exit
199 #define TsanFuncEntry(pc) __tsan_func_entry(pc)
200 #define TsanFuncExit() __tsan_func_exit()
202 /// Required OMPT inquiry functions.
203 static ompt_get_parallel_info_t ompt_get_parallel_info
;
204 static ompt_get_thread_data_t ompt_get_thread_data
;
206 typedef char ompt_tsan_clockid
;
208 static uint64_t my_next_id() {
209 static uint64_t ID
= 0;
210 uint64_t ret
= __sync_fetch_and_add(&ID
, 1);
214 static int pagesize
{0};
216 // Data structure to provide a threadsafe pool of reusable objects.
217 // DataPool<Type of objects>
219 template <typename T
> struct DataPool final
{
220 static __thread DataPool
<T
> *ThreadDataPool
;
221 std::mutex DPMutex
{};
223 // store unused objects
224 std::vector
<T
*> DataPointer
{};
225 std::vector
<T
*> RemoteDataPointer
{};
227 // store all allocated memory to finally release
228 std::list
<void *> memory
;
230 // count remotely returned data (RemoteDataPointer.size())
231 std::atomic
<int> remote
{0};
233 // totally allocated data objects in pool
239 int getRemote() { return remoteReturn
+ remote
; }
240 int getLocal() { return localReturn
; }
242 int getTotal() { return total
; }
244 return total
- DataPointer
.size() - RemoteDataPointer
.size();
247 // fill the pool by allocating a page of memory
250 const std::lock_guard
<std::mutex
> lock(DPMutex
);
251 // DataPointer is empty, so just swap the vectors
252 DataPointer
.swap(RemoteDataPointer
);
256 // calculate size of an object including padding to cacheline size
257 size_t elemSize
= sizeof(T
);
258 size_t paddedSize
= (((elemSize
- 1) / 64) + 1) * 64;
259 // number of padded elements to allocate
260 int ndatas
= pagesize
/ paddedSize
;
261 char *datas
= (char *)malloc(ndatas
* paddedSize
);
262 memory
.push_back(datas
);
263 for (int i
= 0; i
< ndatas
; i
++) {
264 DataPointer
.push_back(new (datas
+ i
* paddedSize
) T(this));
269 // get data from the pool
272 if (DataPointer
.empty())
274 ret
= DataPointer
.back();
275 DataPointer
.pop_back();
279 // accesses to the thread-local datapool don't need locks
280 void returnOwnData(T
*data
) {
281 DataPointer
.emplace_back(data
);
287 // returning to a remote datapool using lock
288 void returnData(T
*data
) {
289 const std::lock_guard
<std::mutex
> lock(DPMutex
);
290 RemoteDataPointer
.emplace_back(data
);
298 // we assume all memory is returned when the thread finished / destructor is
300 if (archer_flags
->report_data_leak
&& getMissing() != 0) {
301 printf("ERROR: While freeing DataPool (%s) we are missing %i data "
303 __PRETTY_FUNCTION__
, getMissing());
306 for (auto i
: DataPointer
)
309 for (auto i
: RemoteDataPointer
)
312 for (auto i
: memory
)
318 template <typename T
> struct DataPoolEntry
{
321 static T
*New() { return DataPool
<T
>::ThreadDataPool
->getData(); }
324 static_cast<T
*>(this)->Reset();
325 if (owner
== DataPool
<T
>::ThreadDataPool
)
326 owner
->returnOwnData(static_cast<T
*>(this));
328 owner
->returnData(static_cast<T
*>(this));
331 DataPoolEntry(DataPool
<T
> *dp
) : owner(dp
) {}
334 struct DependencyData
;
335 typedef DataPool
<DependencyData
> DependencyDataPool
;
337 __thread DependencyDataPool
*DependencyDataPool::ThreadDataPool
= nullptr;
339 /// Data structure to store additional information for task dependency.
340 struct DependencyData final
: DataPoolEntry
<DependencyData
> {
341 ompt_tsan_clockid in
;
342 ompt_tsan_clockid out
;
343 ompt_tsan_clockid inoutset
;
344 void *GetInPtr() { return &in
; }
345 void *GetOutPtr() { return &out
; }
346 void *GetInoutsetPtr() { return &inoutset
; }
350 static DependencyData
*New() { return DataPoolEntry
<DependencyData
>::New(); }
352 DependencyData(DataPool
<DependencyData
> *dp
)
353 : DataPoolEntry
<DependencyData
>(dp
) {}
356 struct TaskDependency
{
360 ompt_dependence_type_t type
;
361 TaskDependency(DependencyData
*depData
, ompt_dependence_type_t type
)
362 : inPtr(depData
->GetInPtr()), outPtr(depData
->GetOutPtr()),
363 inoutsetPtr(depData
->GetInoutsetPtr()), type(type
) {}
364 void AnnotateBegin() {
365 if (type
== ompt_dependence_type_out
||
366 type
== ompt_dependence_type_inout
||
367 type
== ompt_dependence_type_mutexinoutset
) {
368 TsanHappensAfter(inPtr
);
369 TsanHappensAfter(outPtr
);
370 TsanHappensAfter(inoutsetPtr
);
371 } else if (type
== ompt_dependence_type_in
) {
372 TsanHappensAfter(outPtr
);
373 TsanHappensAfter(inoutsetPtr
);
374 } else if (type
== ompt_dependence_type_inoutset
) {
375 TsanHappensAfter(inPtr
);
376 TsanHappensAfter(outPtr
);
380 if (type
== ompt_dependence_type_out
||
381 type
== ompt_dependence_type_inout
||
382 type
== ompt_dependence_type_mutexinoutset
) {
383 TsanHappensBefore(outPtr
);
384 } else if (type
== ompt_dependence_type_in
) {
385 TsanHappensBefore(inPtr
);
386 } else if (type
== ompt_dependence_type_inoutset
) {
387 TsanHappensBefore(inoutsetPtr
);
393 typedef DataPool
<ParallelData
> ParallelDataPool
;
395 __thread ParallelDataPool
*ParallelDataPool::ThreadDataPool
= nullptr;
397 /// Data structure to store additional information for parallel regions.
398 struct ParallelData final
: DataPoolEntry
<ParallelData
> {
400 // Parallel fork is just another barrier, use Barrier[1]
402 /// Two addresses for relationships with barriers.
403 ompt_tsan_clockid Barrier
[2];
407 void *GetParallelPtr() { return &(Barrier
[1]); }
409 void *GetBarrierPtr(unsigned Index
) { return &(Barrier
[Index
]); }
411 ParallelData
*Init(const void *codeptr
) {
418 static ParallelData
*New(const void *codeptr
) {
419 return DataPoolEntry
<ParallelData
>::New()->Init(codeptr
);
422 ParallelData(DataPool
<ParallelData
> *dp
) : DataPoolEntry
<ParallelData
>(dp
) {}
425 static inline ParallelData
*ToParallelData(ompt_data_t
*parallel_data
) {
426 return reinterpret_cast<ParallelData
*>(parallel_data
->ptr
);
430 typedef DataPool
<Taskgroup
> TaskgroupPool
;
431 template <> __thread TaskgroupPool
*TaskgroupPool::ThreadDataPool
= nullptr;
433 /// Data structure to support stacking of taskgroups and allow synchronization.
434 struct Taskgroup final
: DataPoolEntry
<Taskgroup
> {
435 /// Its address is used for relationships of the taskgroup's task set.
436 ompt_tsan_clockid Ptr
;
438 /// Reference to the parent taskgroup.
441 void *GetPtr() { return &Ptr
; }
443 Taskgroup
*Init(Taskgroup
*parent
) {
450 static Taskgroup
*New(Taskgroup
*Parent
) {
451 return DataPoolEntry
<Taskgroup
>::New()->Init(Parent
);
454 Taskgroup(DataPool
<Taskgroup
> *dp
) : DataPoolEntry
<Taskgroup
>(dp
) {}
457 enum ArcherTaskFlag
{ ArcherTaskFulfilled
= 0x00010000 };
460 typedef DataPool
<TaskData
> TaskDataPool
;
461 template <> __thread TaskDataPool
*TaskDataPool::ThreadDataPool
= nullptr;
463 /// Data structure to store additional information for tasks.
464 struct TaskData final
: DataPoolEntry
<TaskData
> {
465 /// Its address is used for relationships of this task.
466 ompt_tsan_clockid Task
{0};
468 /// Child tasks use its address to declare a relationship to a taskwait in
470 ompt_tsan_clockid Taskwait
{0};
472 /// Child tasks use its address to model omp_all_memory dependencies
473 ompt_tsan_clockid AllMemory
[2]{0};
475 /// Index of which barrier to use next.
476 char BarrierIndex
{0};
478 /// Whether this task is currently executing a barrier.
479 bool InBarrier
{false};
481 /// Whether this task is an included task.
484 /// count execution phase
487 /// Count how often this structure has been put into child tasks + 1.
488 std::atomic_int RefCount
{1};
490 /// Reference to the parent that created this task.
491 TaskData
*Parent
{nullptr};
493 /// Reference to the team of this task.
494 ParallelData
*Team
{nullptr};
496 /// Reference to the current taskgroup that this task either belongs to or
497 /// that it just created.
498 Taskgroup
*TaskGroup
{nullptr};
500 /// Dependency information for this task.
501 TaskDependency
*Dependencies
{nullptr};
503 /// Number of dependency entries.
504 unsigned DependencyCount
{0};
506 // The dependency-map stores DependencyData objects representing
507 // the dependency variables used on the sibling tasks created from
509 // We expect a rare need for the dependency-map, so alloc on demand
510 std::unordered_map
<void *, DependencyData
*> *DependencyMap
{nullptr};
516 bool isIncluded() { return TaskType
& ompt_task_undeferred
; }
517 bool isUntied() { return TaskType
& ompt_task_untied
; }
518 bool isFinal() { return TaskType
& ompt_task_final
; }
519 bool isMergable() { return TaskType
& ompt_task_mergeable
; }
520 bool isMerged() { return TaskType
& ompt_task_merged
; }
522 bool isExplicit() { return TaskType
& ompt_task_explicit
; }
523 bool isImplicit() { return TaskType
& ompt_task_implicit
; }
524 bool isInitial() { return TaskType
& ompt_task_initial
; }
525 bool isTarget() { return TaskType
& ompt_task_target
; }
527 bool isFulfilled() { return TaskType
& ArcherTaskFulfilled
; }
528 void setFulfilled() { TaskType
|= ArcherTaskFulfilled
; }
530 void setAllMemoryDep() { AllMemory
[0] = 1; }
531 bool hasAllMemoryDep() { return AllMemory
[0]; }
533 void *GetTaskPtr() { return &Task
; }
535 void *GetTaskwaitPtr() { return &Taskwait
; }
537 void *GetLastAllMemoryPtr() { return AllMemory
; }
538 void *GetNextAllMemoryPtr() { return AllMemory
+ 1; }
540 TaskData
*Init(TaskData
*parent
, int taskType
) {
544 BarrierIndex
= Parent
->BarrierIndex
;
545 if (Parent
!= nullptr) {
547 // Copy over pointer to taskgroup. This task may set up its own stack
548 // but for now belongs to its parent's taskgroup.
549 TaskGroup
= Parent
->TaskGroup
;
554 TaskData
*Init(ParallelData
*team
, int taskType
) {
571 for (auto i
: *DependencyMap
)
573 delete DependencyMap
;
575 DependencyMap
= nullptr;
578 Dependencies
= nullptr;
585 static TaskData
*New(TaskData
*parent
, int taskType
) {
586 return DataPoolEntry
<TaskData
>::New()->Init(parent
, taskType
);
589 static TaskData
*New(ParallelData
*team
, int taskType
) {
590 return DataPoolEntry
<TaskData
>::New()->Init(team
, taskType
);
593 TaskData(DataPool
<TaskData
> *dp
) : DataPoolEntry
<TaskData
>(dp
) {}
597 static inline TaskData
*ToTaskData(ompt_data_t
*task_data
) {
599 return reinterpret_cast<TaskData
*>(task_data
->ptr
);
603 /// Store a mutex for each wait_id to resolve race condition with callbacks.
604 static std::unordered_map
<ompt_wait_id_t
, std::mutex
> Locks
;
605 static std::mutex LocksMutex
;
607 static void ompt_tsan_thread_begin(ompt_thread_t thread_type
,
608 ompt_data_t
*thread_data
) {
609 ParallelDataPool::ThreadDataPool
= new ParallelDataPool
;
610 TsanNewMemory(ParallelDataPool::ThreadDataPool
,
611 sizeof(ParallelDataPool::ThreadDataPool
));
612 TaskgroupPool::ThreadDataPool
= new TaskgroupPool
;
613 TsanNewMemory(TaskgroupPool::ThreadDataPool
,
614 sizeof(TaskgroupPool::ThreadDataPool
));
615 TaskDataPool::ThreadDataPool
= new TaskDataPool
;
616 TsanNewMemory(TaskDataPool::ThreadDataPool
,
617 sizeof(TaskDataPool::ThreadDataPool
));
618 DependencyDataPool::ThreadDataPool
= new DependencyDataPool
;
619 TsanNewMemory(DependencyDataPool::ThreadDataPool
,
620 sizeof(DependencyDataPool::ThreadDataPool
));
621 thread_data
->value
= my_next_id();
624 static void ompt_tsan_thread_end(ompt_data_t
*thread_data
) {
625 TsanIgnoreWritesBegin();
626 delete ParallelDataPool::ThreadDataPool
;
627 delete TaskgroupPool::ThreadDataPool
;
628 delete TaskDataPool::ThreadDataPool
;
629 delete DependencyDataPool::ThreadDataPool
;
630 TsanIgnoreWritesEnd();
633 /// OMPT event callbacks for handling parallel regions.
635 static void ompt_tsan_parallel_begin(ompt_data_t
*parent_task_data
,
636 const ompt_frame_t
*parent_task_frame
,
637 ompt_data_t
*parallel_data
,
638 uint32_t requested_team_size
, int flag
,
639 const void *codeptr_ra
) {
640 ParallelData
*Data
= ParallelData::New(codeptr_ra
);
641 parallel_data
->ptr
= Data
;
643 TsanHappensBefore(Data
->GetParallelPtr());
644 if (archer_flags
->ignore_serial
&& ToTaskData(parent_task_data
)->isInitial())
645 TsanIgnoreWritesEnd();
648 static void ompt_tsan_parallel_end(ompt_data_t
*parallel_data
,
649 ompt_data_t
*task_data
, int flag
,
650 const void *codeptr_ra
) {
651 if (archer_flags
->ignore_serial
&& ToTaskData(task_data
)->isInitial())
652 TsanIgnoreWritesBegin();
653 ParallelData
*Data
= ToParallelData(parallel_data
);
654 TsanHappensAfter(Data
->GetBarrierPtr(0));
655 TsanHappensAfter(Data
->GetBarrierPtr(1));
659 #if (LLVM_VERSION >= 40)
660 if (&__archer_get_omp_status
) {
661 if (__archer_get_omp_status() == 0 && archer_flags
->flush_shadow
)
662 __tsan_flush_memory();
667 static void ompt_tsan_implicit_task(ompt_scope_endpoint_t endpoint
,
668 ompt_data_t
*parallel_data
,
669 ompt_data_t
*task_data
,
670 unsigned int team_size
,
671 unsigned int thread_num
, int type
) {
673 case ompt_scope_begin
:
674 if (type
& ompt_task_initial
) {
675 parallel_data
->ptr
= ParallelData::New(nullptr);
677 task_data
->ptr
= TaskData::New(ToParallelData(parallel_data
), type
);
678 TsanHappensAfter(ToParallelData(parallel_data
)->GetParallelPtr());
679 TsanFuncEntry(ToParallelData(parallel_data
)->codePtr
);
681 case ompt_scope_end
: {
682 TaskData
*Data
= ToTaskData(task_data
);
684 assert(Data
->freed
== 0 && "Implicit task end should only be called once!");
687 assert(Data
->RefCount
== 1 &&
688 "All tasks should have finished at the implicit barrier!");
689 if (type
& ompt_task_initial
) {
690 Data
->Team
->Delete();
696 case ompt_scope_beginend
:
697 // Should not occur according to OpenMP 5.1
698 // Tested in OMPT tests
703 static void ompt_tsan_sync_region(ompt_sync_region_t kind
,
704 ompt_scope_endpoint_t endpoint
,
705 ompt_data_t
*parallel_data
,
706 ompt_data_t
*task_data
,
707 const void *codeptr_ra
) {
708 TaskData
*Data
= ToTaskData(task_data
);
710 case ompt_scope_begin
:
711 case ompt_scope_beginend
:
712 TsanFuncEntry(codeptr_ra
);
714 case ompt_sync_region_barrier_implementation
:
715 case ompt_sync_region_barrier_implicit
:
716 case ompt_sync_region_barrier_explicit
:
717 case ompt_sync_region_barrier_implicit_parallel
:
718 case ompt_sync_region_barrier_implicit_workshare
:
719 case ompt_sync_region_barrier_teams
:
720 case ompt_sync_region_barrier
: {
721 char BarrierIndex
= Data
->BarrierIndex
;
722 TsanHappensBefore(Data
->Team
->GetBarrierPtr(BarrierIndex
));
724 if (hasReductionCallback
< ompt_set_always
) {
725 // We ignore writes inside the barrier. These would either occur during
726 // 1. reductions performed by the runtime which are guaranteed to be
728 // 2. execution of another task.
729 // For the latter case we will re-enable tracking in task_switch.
730 Data
->InBarrier
= true;
731 TsanIgnoreWritesBegin();
737 case ompt_sync_region_taskwait
:
740 case ompt_sync_region_taskgroup
:
741 Data
->TaskGroup
= Taskgroup::New(Data
->TaskGroup
);
744 case ompt_sync_region_reduction
:
745 // should never be reached
748 if (endpoint
== ompt_scope_begin
)
754 case ompt_sync_region_barrier_implementation
:
755 case ompt_sync_region_barrier_implicit
:
756 case ompt_sync_region_barrier_explicit
:
757 case ompt_sync_region_barrier_implicit_parallel
:
758 case ompt_sync_region_barrier_implicit_workshare
:
759 case ompt_sync_region_barrier_teams
:
760 case ompt_sync_region_barrier
: {
761 if (hasReductionCallback
< ompt_set_always
) {
762 // We want to track writes after the barrier again.
763 Data
->InBarrier
= false;
764 TsanIgnoreWritesEnd();
767 char BarrierIndex
= Data
->BarrierIndex
;
768 // Barrier will end after it has been entered by all threads.
770 TsanHappensAfter(Data
->Team
->GetBarrierPtr(BarrierIndex
));
772 // It is not guaranteed that all threads have exited this barrier before
773 // we enter the next one. So we will use a different address.
774 // We are however guaranteed that this current barrier is finished
775 // by the time we exit the next one. So we can then reuse the first
777 Data
->BarrierIndex
= (BarrierIndex
+ 1) % 2;
781 case ompt_sync_region_taskwait
: {
782 if (Data
->execution
> 1)
783 TsanHappensAfter(Data
->GetTaskwaitPtr());
787 case ompt_sync_region_taskgroup
: {
788 assert(Data
->TaskGroup
!= nullptr &&
789 "Should have at least one taskgroup!");
791 TsanHappensAfter(Data
->TaskGroup
->GetPtr());
793 // Delete this allocated taskgroup, all descendent task are finished by
795 Taskgroup
*Parent
= Data
->TaskGroup
->Parent
;
796 Data
->TaskGroup
->Delete();
797 Data
->TaskGroup
= Parent
;
801 case ompt_sync_region_reduction
:
802 // Should not occur according to OpenMP 5.1
803 // Tested in OMPT tests
810 static void ompt_tsan_reduction(ompt_sync_region_t kind
,
811 ompt_scope_endpoint_t endpoint
,
812 ompt_data_t
*parallel_data
,
813 ompt_data_t
*task_data
,
814 const void *codeptr_ra
) {
816 case ompt_scope_begin
:
818 case ompt_sync_region_reduction
:
819 TsanIgnoreWritesBegin();
827 case ompt_sync_region_reduction
:
828 TsanIgnoreWritesEnd();
834 case ompt_scope_beginend
:
835 // Should not occur according to OpenMP 5.1
836 // Tested in OMPT tests
837 // Would have no implications for DR detection
842 /// OMPT event callbacks for handling tasks.
844 static void ompt_tsan_task_create(
845 ompt_data_t
*parent_task_data
, /* id of parent task */
846 const ompt_frame_t
*parent_frame
, /* frame data for parent task */
847 ompt_data_t
*new_task_data
, /* id of created task */
848 int type
, int has_dependences
,
849 const void *codeptr_ra
) /* pointer to outlined function */
852 assert(new_task_data
->ptr
== NULL
&&
853 "Task data should be initialized to NULL");
854 if (type
& ompt_task_initial
) {
855 ompt_data_t
*parallel_data
;
857 ompt_get_parallel_info(0, ¶llel_data
, &team_size
);
858 ParallelData
*PData
= ParallelData::New(nullptr);
859 parallel_data
->ptr
= PData
;
861 Data
= TaskData::New(PData
, type
);
862 new_task_data
->ptr
= Data
;
863 } else if (type
& ompt_task_undeferred
) {
864 Data
= TaskData::New(ToTaskData(parent_task_data
), type
);
865 new_task_data
->ptr
= Data
;
866 } else if (type
& ompt_task_explicit
|| type
& ompt_task_target
) {
867 Data
= TaskData::New(ToTaskData(parent_task_data
), type
);
868 new_task_data
->ptr
= Data
;
870 // Use the newly created address. We cannot use a single address from the
871 // parent because that would declare wrong relationships with other
872 // sibling tasks that may be created before this task is started!
873 TsanHappensBefore(Data
->GetTaskPtr());
874 ToTaskData(parent_task_data
)->execution
++;
878 static void freeTask(TaskData
*task
) {
879 while (task
!= nullptr && --task
->RefCount
== 0) {
880 TaskData
*Parent
= task
->Parent
;
886 // LastAllMemoryPtr marks the beginning of an all_memory epoch
887 // NextAllMemoryPtr marks the end of an all_memory epoch
888 // All tasks with depend begin execution after LastAllMemoryPtr
889 // and end before NextAllMemoryPtr
890 static void releaseDependencies(TaskData
*task
) {
891 if (archer_flags
->all_memory
) {
892 if (task
->hasAllMemoryDep()) {
893 TsanHappensBefore(task
->Parent
->GetLastAllMemoryPtr());
894 TsanHappensBefore(task
->Parent
->GetNextAllMemoryPtr());
895 } else if (task
->DependencyCount
)
896 TsanHappensBefore(task
->Parent
->GetNextAllMemoryPtr());
898 for (unsigned i
= 0; i
< task
->DependencyCount
; i
++) {
899 task
->Dependencies
[i
].AnnotateEnd();
903 static void acquireDependencies(TaskData
*task
) {
904 if (archer_flags
->all_memory
) {
905 if (task
->hasAllMemoryDep())
906 TsanHappensAfter(task
->Parent
->GetNextAllMemoryPtr());
907 else if (task
->DependencyCount
)
908 TsanHappensAfter(task
->Parent
->GetLastAllMemoryPtr());
910 for (unsigned i
= 0; i
< task
->DependencyCount
; i
++) {
911 task
->Dependencies
[i
].AnnotateBegin();
915 static void completeTask(TaskData
*FromTask
) {
918 // Task-end happens after a possible omp_fulfill_event call
919 if (FromTask
->isFulfilled())
920 TsanHappensAfter(FromTask
->GetTaskPtr());
921 // Included tasks are executed sequentially, no need to track
923 if (!FromTask
->isIncluded()) {
924 // Task will finish before a barrier in the surrounding parallel region
926 ParallelData
*PData
= FromTask
->Team
;
927 TsanHappensBefore(PData
->GetBarrierPtr(FromTask
->BarrierIndex
));
929 // ... and before an eventual taskwait by the parent thread.
930 TsanHappensBefore(FromTask
->Parent
->GetTaskwaitPtr());
932 if (FromTask
->TaskGroup
!= nullptr) {
933 // This task is part of a taskgroup, so it will finish before the
934 // corresponding taskgroup_end.
935 TsanHappensBefore(FromTask
->TaskGroup
->GetPtr());
938 // release dependencies
939 releaseDependencies(FromTask
);
942 static void suspendTask(TaskData
*FromTask
) {
945 // Task may be resumed at a later point in time.
946 TsanHappensBefore(FromTask
->GetTaskPtr());
949 static void switchTasks(TaskData
*FromTask
, TaskData
*ToTask
) {
950 // Legacy handling for missing reduction callback
951 if (hasReductionCallback
< ompt_set_always
) {
952 if (FromTask
&& FromTask
->InBarrier
) {
953 // We want to ignore writes in the runtime code during barriers,
954 // but not when executing tasks with user code!
955 TsanIgnoreWritesEnd();
957 if (ToTask
&& ToTask
->InBarrier
) {
958 // We want to ignore writes in the runtime code during barriers,
959 // but not when executing tasks with user code!
960 TsanIgnoreWritesBegin();
965 // FromTask->deactivate();
967 // ToTask->activate();
970 static void endTask(TaskData
*FromTask
) {
975 static void startTask(TaskData
*ToTask
) {
978 // Handle dependencies on first execution of the task
979 if (ToTask
->execution
== 0) {
981 acquireDependencies(ToTask
);
983 // 1. Task will begin execution after it has been created.
984 // 2. Task will resume after it has been switched away.
985 TsanHappensAfter(ToTask
->GetTaskPtr());
988 static void ompt_tsan_task_schedule(ompt_data_t
*first_task_data
,
989 ompt_task_status_t prior_task_status
,
990 ompt_data_t
*second_task_data
) {
993 // The necessary action depends on prior_task_status:
995 // ompt_task_early_fulfill = 5,
998 // ompt_task_late_fulfill = 6,
999 // -> first completed, first freed, second ignored
1001 // ompt_task_complete = 1,
1002 // ompt_task_cancel = 3,
1003 // -> first completed, first freed, second starts
1005 // ompt_taskwait_complete = 8,
1006 // -> first starts, first completes, first freed, second ignored
1008 // ompt_task_detach = 4,
1009 // ompt_task_yield = 2,
1010 // ompt_task_switch = 7
1011 // -> first suspended, second starts
1014 TaskData
*FromTask
= ToTaskData(first_task_data
);
1015 TaskData
*ToTask
= ToTaskData(second_task_data
);
1017 switch (prior_task_status
) {
1018 case ompt_task_early_fulfill
:
1019 TsanHappensBefore(FromTask
->GetTaskPtr());
1020 FromTask
->setFulfilled();
1022 case ompt_task_late_fulfill
:
1023 TsanHappensAfter(FromTask
->GetTaskPtr());
1024 completeTask(FromTask
);
1027 case ompt_taskwait_complete
:
1028 acquireDependencies(FromTask
);
1031 case ompt_task_complete
:
1032 completeTask(FromTask
);
1034 switchTasks(FromTask
, ToTask
);
1037 case ompt_task_cancel
:
1038 completeTask(FromTask
);
1040 switchTasks(FromTask
, ToTask
);
1044 case ompt_task_detach
:
1046 suspendTask(FromTask
);
1047 switchTasks(FromTask
, ToTask
);
1050 case ompt_task_yield
:
1051 suspendTask(FromTask
);
1052 switchTasks(FromTask
, ToTask
);
1055 case ompt_task_switch
:
1056 suspendTask(FromTask
);
1057 switchTasks(FromTask
, ToTask
);
1063 static void ompt_tsan_dependences(ompt_data_t
*task_data
,
1064 const ompt_dependence_t
*deps
, int ndeps
) {
1066 // Copy the data to use it in task_switch and task_end.
1067 TaskData
*Data
= ToTaskData(task_data
);
1068 if (!Data
->Parent
) {
1069 // Return since doacross dependences are not supported yet.
1072 if (!Data
->Parent
->DependencyMap
)
1073 Data
->Parent
->DependencyMap
=
1074 new std::unordered_map
<void *, DependencyData
*>();
1075 Data
->Dependencies
=
1076 (TaskDependency
*)malloc(sizeof(TaskDependency
) * ndeps
);
1077 Data
->DependencyCount
= ndeps
;
1078 for (int i
= 0, d
= 0; i
< ndeps
; i
++, d
++) {
1079 if (deps
[i
].dependence_type
== ompt_dependence_type_out_all_memory
||
1080 deps
[i
].dependence_type
== ompt_dependence_type_inout_all_memory
) {
1081 Data
->setAllMemoryDep();
1082 Data
->DependencyCount
--;
1083 if (!archer_flags
->all_memory
) {
1084 printf("The application uses omp_all_memory, but Archer was\n"
1085 "started to not consider omp_all_memory. This can lead\n"
1086 "to false data race alerts.\n"
1087 "Include all_memory=1 in ARCHER_OPTIONS to consider\n"
1088 "omp_all_memory from the beginning.\n");
1089 archer_flags
->all_memory
= 1;
1094 auto ret
= Data
->Parent
->DependencyMap
->insert(
1095 std::make_pair(deps
[i
].variable
.ptr
, nullptr));
1097 ret
.first
->second
= DependencyData::New();
1099 new ((void *)(Data
->Dependencies
+ d
))
1100 TaskDependency(ret
.first
->second
, deps
[i
].dependence_type
);
1103 // This callback is executed before this task is first started.
1104 TsanHappensBefore(Data
->GetTaskPtr());
1108 /// OMPT event callbacks for handling locking.
1109 static void ompt_tsan_mutex_acquired(ompt_mutex_t kind
, ompt_wait_id_t wait_id
,
1110 const void *codeptr_ra
) {
1112 // Acquire our own lock to make sure that
1113 // 1. the previous release has finished.
1114 // 2. the next acquire doesn't start before we have finished our release.
1116 std::mutex
&Lock
= Locks
[wait_id
];
1117 LocksMutex
.unlock();
1120 TsanHappensAfter(&Lock
);
1123 static void ompt_tsan_mutex_released(ompt_mutex_t kind
, ompt_wait_id_t wait_id
,
1124 const void *codeptr_ra
) {
1126 std::mutex
&Lock
= Locks
[wait_id
];
1127 LocksMutex
.unlock();
1128 TsanHappensBefore(&Lock
);
1133 // callback , signature , variable to store result , required support level
1134 #define SET_OPTIONAL_CALLBACK_T(event, type, result, level) \
1136 ompt_callback_##type##_t tsan_##event = &ompt_tsan_##event; \
1137 result = ompt_set_callback(ompt_callback_##event, \
1138 (ompt_callback_t)tsan_##event); \
1139 if (result < level) \
1140 printf("Registered callback '" #event "' is not supported at " #level \
1145 #define SET_CALLBACK_T(event, type) \
1148 SET_OPTIONAL_CALLBACK_T(event, type, res, ompt_set_always); \
1151 #define SET_CALLBACK(event) SET_CALLBACK_T(event, event)
1153 #define findTsanFunction(f, fSig) \
1155 void *fp = dlsym(RTLD_DEFAULT, #f); \
1159 printf("Unable to find TSan function " #f ".\n"); \
1162 #define findTsanFunctionSilent(f, fSig) f = fSig dlsym(RTLD_DEFAULT, #f)
1164 static int ompt_tsan_initialize(ompt_function_lookup_t lookup
, int device_num
,
1165 ompt_data_t
*tool_data
) {
1166 const char *options
= getenv("TSAN_OPTIONS");
1167 TsanFlags
tsan_flags(options
);
1169 ompt_set_callback_t ompt_set_callback
=
1170 (ompt_set_callback_t
)lookup("ompt_set_callback");
1171 if (ompt_set_callback
== NULL
) {
1172 std::cerr
<< "Could not set callback, exiting..." << std::endl
;
1175 ompt_get_parallel_info
=
1176 (ompt_get_parallel_info_t
)lookup("ompt_get_parallel_info");
1177 ompt_get_thread_data
= (ompt_get_thread_data_t
)lookup("ompt_get_thread_data");
1179 if (ompt_get_parallel_info
== NULL
) {
1180 fprintf(stderr
, "Could not get inquiry function 'ompt_get_parallel_info', "
1185 findTsanFunction(AnnotateHappensAfter
,
1186 (void (*)(const char *, int, const volatile void *)));
1187 findTsanFunction(AnnotateHappensBefore
,
1188 (void (*)(const char *, int, const volatile void *)));
1189 findTsanFunction(AnnotateIgnoreWritesBegin
, (void (*)(const char *, int)));
1190 findTsanFunction(AnnotateIgnoreWritesEnd
, (void (*)(const char *, int)));
1193 (void (*)(const char *, int, const volatile void *, size_t)));
1194 findTsanFunction(__tsan_func_entry
, (void (*)(const void *)));
1195 findTsanFunction(__tsan_func_exit
, (void (*)(void)));
1197 SET_CALLBACK(thread_begin
);
1198 SET_CALLBACK(thread_end
);
1199 SET_CALLBACK(parallel_begin
);
1200 SET_CALLBACK(implicit_task
);
1201 SET_CALLBACK(sync_region
);
1202 SET_CALLBACK(parallel_end
);
1204 SET_CALLBACK(task_create
);
1205 SET_CALLBACK(task_schedule
);
1206 SET_CALLBACK(dependences
);
1208 SET_CALLBACK_T(mutex_acquired
, mutex
);
1209 SET_CALLBACK_T(mutex_released
, mutex
);
1210 SET_OPTIONAL_CALLBACK_T(reduction
, sync_region
, hasReductionCallback
,
1213 if (!tsan_flags
.ignore_noninstrumented_modules
)
1215 "Warning: please export "
1216 "TSAN_OPTIONS='ignore_noninstrumented_modules=1' "
1217 "to avoid false positive reports from the OpenMP runtime!\n");
1218 if (archer_flags
->ignore_serial
)
1219 TsanIgnoreWritesBegin();
1221 return 1; // success
1224 static void ompt_tsan_finalize(ompt_data_t
*tool_data
) {
1225 if (archer_flags
->ignore_serial
)
1226 TsanIgnoreWritesEnd();
1227 if (archer_flags
->print_max_rss
) {
1229 getrusage(RUSAGE_SELF
, &end
);
1230 printf("MAX RSS[KBytes] during execution: %ld\n", end
.ru_maxrss
);
1234 delete archer_flags
;
1237 extern "C" ompt_start_tool_result_t
*
1238 ompt_start_tool(unsigned int omp_version
, const char *runtime_version
) {
1239 const char *options
= getenv("ARCHER_OPTIONS");
1240 archer_flags
= new ArcherFlags(options
);
1241 if (!archer_flags
->enabled
) {
1242 if (archer_flags
->verbose
)
1243 std::cout
<< "Archer disabled, stopping operation" << std::endl
;
1244 delete archer_flags
;
1248 pagesize
= getpagesize();
1250 static ompt_start_tool_result_t ompt_start_tool_result
= {
1251 &ompt_tsan_initialize
, &ompt_tsan_finalize
, {0}};
1253 // The OMPT start-up code uses dlopen with RTLD_LAZY. Therefore, we cannot
1254 // rely on dlopen to fail if TSan is missing, but would get a runtime error
1255 // for the first TSan call. We use RunningOnValgrind to detect whether
1256 // an implementation of the Annotation interface is available in the
1257 // execution or disable the tool (by returning NULL).
1259 findTsanFunctionSilent(RunningOnValgrind
, (int (*)(void)));
1260 if (!RunningOnValgrind
) // if we are not running on TSAN, give a different
1261 // tool the chance to be loaded
1263 if (archer_flags
->verbose
)
1264 std::cout
<< "Archer detected OpenMP application without TSan; "
1265 "stopping operation"
1267 delete archer_flags
;
1271 if (archer_flags
->verbose
)
1272 std::cout
<< "Archer detected OpenMP application with TSan, supplying "
1273 "OpenMP synchronization semantics"
1275 return &ompt_start_tool_result
;