2 * ompt-tsan.cpp -- Archer runtime library, TSan annotations for Archer
5 //===----------------------------------------------------------------------===//
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for details.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
13 #ifndef __STDC_FORMAT_MACROS
14 #define __STDC_FORMAT_MACROS
28 #include <sys/resource.h>
30 #include <unordered_map>
34 #include "omp-tools.h"
36 // Define attribute that indicates that the fall through from the previous
37 // case label is intentional and should not be diagnosed by a compiler
38 // Code from libcxx/include/__config
39 // Use a function like macro to imply that it must be followed by a semicolon
40 #if __cplusplus > 201402L && __has_cpp_attribute(fallthrough)
41 #define KMP_FALLTHROUGH() [[fallthrough]]
42 // icc cannot properly tell this attribute is absent so force off
43 #elif defined(__INTEL_COMPILER)
44 #define KMP_FALLTHROUGH() ((void)0)
45 #elif __has_cpp_attribute(clang::fallthrough)
46 #define KMP_FALLTHROUGH() [[clang::fallthrough]]
47 #elif __has_attribute(fallthrough) || __GNUC__ >= 7
48 #define KMP_FALLTHROUGH() __attribute__((__fallthrough__))
50 #define KMP_FALLTHROUGH() ((void)0)
53 static int hasReductionCallback
;
58 #if (LLVM_VERSION) >= 40
64 int report_data_leak
{0};
66 std::atomic
<int> all_memory
{0};
68 ArcherFlags(const char *env
) {
70 std::vector
<std::string
> tokens
;
73 std::istringstream
iss(str
);
75 while (std::getline(iss
, token
, ' '))
76 tokens
.push_back(token
);
78 for (std::vector
<std::string
>::iterator it
= tokens
.begin();
79 it
!= tokens
.end(); ++it
) {
80 #if (LLVM_VERSION) >= 40
81 if (sscanf(it
->c_str(), "flush_shadow=%d", &flush_shadow
))
84 if (sscanf(it
->c_str(), "print_max_rss=%d", &print_max_rss
))
86 if (sscanf(it
->c_str(), "verbose=%d", &verbose
))
88 if (sscanf(it
->c_str(), "report_data_leak=%d", &report_data_leak
))
90 if (sscanf(it
->c_str(), "enable=%d", &enabled
))
92 if (sscanf(it
->c_str(), "ignore_serial=%d", &ignore_serial
))
94 if (sscanf(it
->c_str(), "all_memory=%d", &tmp_int
)) {
98 std::cerr
<< "Illegal values for ARCHER_OPTIONS variable: " << token
107 int ignore_noninstrumented_modules
;
109 TsanFlags(const char *env
) : ignore_noninstrumented_modules(0) {
111 std::vector
<std::string
> tokens
;
112 std::string
str(env
);
113 auto end
= str
.end();
114 auto it
= str
.begin();
115 auto is_sep
= [](char c
) {
116 return c
== ' ' || c
== ',' || c
== ':' || c
== '\n' || c
== '\t' ||
120 auto next_it
= std::find_if(it
, end
, is_sep
);
121 tokens
.emplace_back(it
, next_it
);
128 for (const auto &token
: tokens
) {
129 // we are interested in ignore_noninstrumented_modules to print a
131 if (sscanf(token
.c_str(), "ignore_noninstrumented_modules=%d",
132 &ignore_noninstrumented_modules
))
140 #if (LLVM_VERSION) >= 40
142 int __attribute__((weak
)) __archer_get_omp_status();
143 void __attribute__((weak
)) __tsan_flush_memory() {}
146 static ArcherFlags
*archer_flags
;
148 #ifndef TsanHappensBefore
149 // Thread Sanitizer is a tool that finds races in code.
150 // See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations .
151 // tsan detects these exact functions by name.
153 static void (*AnnotateHappensAfter
)(const char *, int, const volatile void *);
154 static void (*AnnotateHappensBefore
)(const char *, int, const volatile void *);
155 static void (*AnnotateIgnoreWritesBegin
)(const char *, int);
156 static void (*AnnotateIgnoreWritesEnd
)(const char *, int);
157 static void (*AnnotateNewMemory
)(const char *, int, const volatile void *,
159 static void (*__tsan_func_entry
)(const void *);
160 static void (*__tsan_func_exit
)(void);
161 static int (*RunningOnValgrind
)(void);
164 // This marker is used to define a happens-before arc. The race detector will
165 // infer an arc from the begin to the end when they share the same pointer
167 #define TsanHappensBefore(cv) AnnotateHappensBefore(__FILE__, __LINE__, cv)
169 // This marker defines the destination of a happens-before arc.
170 #define TsanHappensAfter(cv) AnnotateHappensAfter(__FILE__, __LINE__, cv)
172 // Ignore any races on writes between here and the next TsanIgnoreWritesEnd.
173 #define TsanIgnoreWritesBegin() AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
175 // Resume checking for racy writes.
176 #define TsanIgnoreWritesEnd() AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
178 // We don't really delete the clock for now
179 #define TsanDeleteClock(cv)
182 #define TsanNewMemory(addr, size) \
183 AnnotateNewMemory(__FILE__, __LINE__, addr, size)
184 #define TsanFreeMemory(addr, size) \
185 AnnotateNewMemory(__FILE__, __LINE__, addr, size)
188 // Function entry/exit
189 #define TsanFuncEntry(pc) __tsan_func_entry(pc)
190 #define TsanFuncExit() __tsan_func_exit()
192 /// Required OMPT inquiry functions.
193 static ompt_get_parallel_info_t ompt_get_parallel_info
;
194 static ompt_get_thread_data_t ompt_get_thread_data
;
196 typedef char ompt_tsan_clockid
;
198 static uint64_t my_next_id() {
199 static uint64_t ID
= 0;
200 uint64_t ret
= __sync_fetch_and_add(&ID
, 1);
204 static int pagesize
{0};
206 // Data structure to provide a threadsafe pool of reusable objects.
207 // DataPool<Type of objects>
209 template <typename T
> struct DataPool final
{
210 static __thread DataPool
<T
> *ThreadDataPool
;
211 std::mutex DPMutex
{};
213 // store unused objects
214 std::vector
<T
*> DataPointer
{};
215 std::vector
<T
*> RemoteDataPointer
{};
217 // store all allocated memory to finally release
218 std::list
<void *> memory
;
220 // count remotely returned data (RemoteDataPointer.size())
221 std::atomic
<int> remote
{0};
223 // totally allocated data objects in pool
229 int getRemote() { return remoteReturn
+ remote
; }
230 int getLocal() { return localReturn
; }
232 int getTotal() { return total
; }
234 return total
- DataPointer
.size() - RemoteDataPointer
.size();
237 // fill the pool by allocating a page of memory
240 const std::lock_guard
<std::mutex
> lock(DPMutex
);
241 // DataPointer is empty, so just swap the vectors
242 DataPointer
.swap(RemoteDataPointer
);
246 // calculate size of an object including padding to cacheline size
247 size_t elemSize
= sizeof(T
);
248 size_t paddedSize
= (((elemSize
- 1) / 64) + 1) * 64;
249 // number of padded elements to allocate
250 int ndatas
= pagesize
/ paddedSize
;
251 char *datas
= (char *)malloc(ndatas
* paddedSize
);
252 memory
.push_back(datas
);
253 for (int i
= 0; i
< ndatas
; i
++) {
254 DataPointer
.push_back(new (datas
+ i
* paddedSize
) T(this));
259 // get data from the pool
262 if (DataPointer
.empty())
264 ret
= DataPointer
.back();
265 DataPointer
.pop_back();
269 // accesses to the thread-local datapool don't need locks
270 void returnOwnData(T
*data
) {
271 DataPointer
.emplace_back(data
);
277 // returning to a remote datapool using lock
278 void returnData(T
*data
) {
279 const std::lock_guard
<std::mutex
> lock(DPMutex
);
280 RemoteDataPointer
.emplace_back(data
);
288 // we assume all memory is returned when the thread finished / destructor is
290 if (archer_flags
->report_data_leak
&& getMissing() != 0) {
291 printf("ERROR: While freeing DataPool (%s) we are missing %i data "
293 __PRETTY_FUNCTION__
, getMissing());
296 for (auto i
: DataPointer
)
299 for (auto i
: RemoteDataPointer
)
302 for (auto i
: memory
)
308 template <typename T
> struct DataPoolEntry
{
311 static T
*New() { return DataPool
<T
>::ThreadDataPool
->getData(); }
314 static_cast<T
*>(this)->Reset();
315 if (owner
== DataPool
<T
>::ThreadDataPool
)
316 owner
->returnOwnData(static_cast<T
*>(this));
318 owner
->returnData(static_cast<T
*>(this));
321 DataPoolEntry(DataPool
<T
> *dp
) : owner(dp
) {}
324 struct DependencyData
;
325 typedef DataPool
<DependencyData
> DependencyDataPool
;
327 __thread DependencyDataPool
*DependencyDataPool::ThreadDataPool
= nullptr;
329 /// Data structure to store additional information for task dependency.
330 struct DependencyData final
: DataPoolEntry
<DependencyData
> {
331 ompt_tsan_clockid in
;
332 ompt_tsan_clockid out
;
333 ompt_tsan_clockid inoutset
;
334 void *GetInPtr() { return &in
; }
335 void *GetOutPtr() { return &out
; }
336 void *GetInoutsetPtr() { return &inoutset
; }
340 static DependencyData
*New() { return DataPoolEntry
<DependencyData
>::New(); }
342 DependencyData(DataPool
<DependencyData
> *dp
)
343 : DataPoolEntry
<DependencyData
>(dp
) {}
346 struct TaskDependency
{
350 ompt_dependence_type_t type
;
351 TaskDependency(DependencyData
*depData
, ompt_dependence_type_t type
)
352 : inPtr(depData
->GetInPtr()), outPtr(depData
->GetOutPtr()),
353 inoutsetPtr(depData
->GetInoutsetPtr()), type(type
) {}
354 void AnnotateBegin() {
355 if (type
== ompt_dependence_type_out
||
356 type
== ompt_dependence_type_inout
||
357 type
== ompt_dependence_type_mutexinoutset
) {
358 TsanHappensAfter(inPtr
);
359 TsanHappensAfter(outPtr
);
360 TsanHappensAfter(inoutsetPtr
);
361 } else if (type
== ompt_dependence_type_in
) {
362 TsanHappensAfter(outPtr
);
363 TsanHappensAfter(inoutsetPtr
);
364 } else if (type
== ompt_dependence_type_inoutset
) {
365 TsanHappensAfter(inPtr
);
366 TsanHappensAfter(outPtr
);
370 if (type
== ompt_dependence_type_out
||
371 type
== ompt_dependence_type_inout
||
372 type
== ompt_dependence_type_mutexinoutset
) {
373 TsanHappensBefore(outPtr
);
374 } else if (type
== ompt_dependence_type_in
) {
375 TsanHappensBefore(inPtr
);
376 } else if (type
== ompt_dependence_type_inoutset
) {
377 TsanHappensBefore(inoutsetPtr
);
383 typedef DataPool
<ParallelData
> ParallelDataPool
;
385 __thread ParallelDataPool
*ParallelDataPool::ThreadDataPool
= nullptr;
387 /// Data structure to store additional information for parallel regions.
388 struct ParallelData final
: DataPoolEntry
<ParallelData
> {
390 // Parallel fork is just another barrier, use Barrier[1]
392 /// Two addresses for relationships with barriers.
393 ompt_tsan_clockid Barrier
[2];
397 void *GetParallelPtr() { return &(Barrier
[1]); }
399 void *GetBarrierPtr(unsigned Index
) { return &(Barrier
[Index
]); }
401 ParallelData
*Init(const void *codeptr
) {
408 static ParallelData
*New(const void *codeptr
) {
409 return DataPoolEntry
<ParallelData
>::New()->Init(codeptr
);
412 ParallelData(DataPool
<ParallelData
> *dp
) : DataPoolEntry
<ParallelData
>(dp
) {}
415 static inline ParallelData
*ToParallelData(ompt_data_t
*parallel_data
) {
416 return reinterpret_cast<ParallelData
*>(parallel_data
->ptr
);
420 typedef DataPool
<Taskgroup
> TaskgroupPool
;
421 template <> __thread TaskgroupPool
*TaskgroupPool::ThreadDataPool
= nullptr;
423 /// Data structure to support stacking of taskgroups and allow synchronization.
424 struct Taskgroup final
: DataPoolEntry
<Taskgroup
> {
425 /// Its address is used for relationships of the taskgroup's task set.
426 ompt_tsan_clockid Ptr
;
428 /// Reference to the parent taskgroup.
431 void *GetPtr() { return &Ptr
; }
433 Taskgroup
*Init(Taskgroup
*parent
) {
440 static Taskgroup
*New(Taskgroup
*Parent
) {
441 return DataPoolEntry
<Taskgroup
>::New()->Init(Parent
);
444 Taskgroup(DataPool
<Taskgroup
> *dp
) : DataPoolEntry
<Taskgroup
>(dp
) {}
447 enum ArcherTaskFlag
{ ArcherTaskFulfilled
= 0x00010000 };
450 typedef DataPool
<TaskData
> TaskDataPool
;
451 template <> __thread TaskDataPool
*TaskDataPool::ThreadDataPool
= nullptr;
453 /// Data structure to store additional information for tasks.
454 struct TaskData final
: DataPoolEntry
<TaskData
> {
455 /// Its address is used for relationships of this task.
456 ompt_tsan_clockid Task
{0};
458 /// Child tasks use its address to declare a relationship to a taskwait in
460 ompt_tsan_clockid Taskwait
{0};
462 /// Child tasks use its address to model omp_all_memory dependencies
463 ompt_tsan_clockid AllMemory
[2]{0};
465 /// Index of which barrier to use next.
466 char BarrierIndex
{0};
468 /// Whether this task is currently executing a barrier.
469 bool InBarrier
{false};
471 /// Whether this task is an included task.
474 /// count execution phase
477 /// Count how often this structure has been put into child tasks + 1.
478 std::atomic_int RefCount
{1};
480 /// Reference to the parent that created this task.
481 TaskData
*Parent
{nullptr};
483 /// Reference to the team of this task.
484 ParallelData
*Team
{nullptr};
486 /// Reference to the current taskgroup that this task either belongs to or
487 /// that it just created.
488 Taskgroup
*TaskGroup
{nullptr};
490 /// Dependency information for this task.
491 TaskDependency
*Dependencies
{nullptr};
493 /// Number of dependency entries.
494 unsigned DependencyCount
{0};
496 // The dependency-map stores DependencyData objects representing
497 // the dependency variables used on the sibling tasks created from
499 // We expect a rare need for the dependency-map, so alloc on demand
500 std::unordered_map
<void *, DependencyData
*> *DependencyMap
{nullptr};
506 bool isIncluded() { return TaskType
& ompt_task_undeferred
; }
507 bool isUntied() { return TaskType
& ompt_task_untied
; }
508 bool isFinal() { return TaskType
& ompt_task_final
; }
509 bool isMergable() { return TaskType
& ompt_task_mergeable
; }
510 bool isMerged() { return TaskType
& ompt_task_merged
; }
512 bool isExplicit() { return TaskType
& ompt_task_explicit
; }
513 bool isImplicit() { return TaskType
& ompt_task_implicit
; }
514 bool isInitial() { return TaskType
& ompt_task_initial
; }
515 bool isTarget() { return TaskType
& ompt_task_target
; }
517 bool isFulfilled() { return TaskType
& ArcherTaskFulfilled
; }
518 void setFulfilled() { TaskType
|= ArcherTaskFulfilled
; }
520 void setAllMemoryDep() { AllMemory
[0] = 1; }
521 bool hasAllMemoryDep() { return AllMemory
[0]; }
523 void *GetTaskPtr() { return &Task
; }
525 void *GetTaskwaitPtr() { return &Taskwait
; }
527 void *GetLastAllMemoryPtr() { return AllMemory
; }
528 void *GetNextAllMemoryPtr() { return AllMemory
+ 1; }
530 TaskData
*Init(TaskData
*parent
, int taskType
) {
534 BarrierIndex
= Parent
->BarrierIndex
;
535 if (Parent
!= nullptr) {
537 // Copy over pointer to taskgroup. This task may set up its own stack
538 // but for now belongs to its parent's taskgroup.
539 TaskGroup
= Parent
->TaskGroup
;
544 TaskData
*Init(ParallelData
*team
, int taskType
) {
561 for (auto i
: *DependencyMap
)
563 delete DependencyMap
;
565 DependencyMap
= nullptr;
568 Dependencies
= nullptr;
575 static TaskData
*New(TaskData
*parent
, int taskType
) {
576 return DataPoolEntry
<TaskData
>::New()->Init(parent
, taskType
);
579 static TaskData
*New(ParallelData
*team
, int taskType
) {
580 return DataPoolEntry
<TaskData
>::New()->Init(team
, taskType
);
583 TaskData(DataPool
<TaskData
> *dp
) : DataPoolEntry
<TaskData
>(dp
) {}
587 static inline TaskData
*ToTaskData(ompt_data_t
*task_data
) {
589 return reinterpret_cast<TaskData
*>(task_data
->ptr
);
593 /// Store a mutex for each wait_id to resolve race condition with callbacks.
594 static std::unordered_map
<ompt_wait_id_t
, std::mutex
> Locks
;
595 static std::mutex LocksMutex
;
597 static void ompt_tsan_thread_begin(ompt_thread_t thread_type
,
598 ompt_data_t
*thread_data
) {
599 ParallelDataPool::ThreadDataPool
= new ParallelDataPool
;
600 TsanNewMemory(ParallelDataPool::ThreadDataPool
,
601 sizeof(ParallelDataPool::ThreadDataPool
));
602 TaskgroupPool::ThreadDataPool
= new TaskgroupPool
;
603 TsanNewMemory(TaskgroupPool::ThreadDataPool
,
604 sizeof(TaskgroupPool::ThreadDataPool
));
605 TaskDataPool::ThreadDataPool
= new TaskDataPool
;
606 TsanNewMemory(TaskDataPool::ThreadDataPool
,
607 sizeof(TaskDataPool::ThreadDataPool
));
608 DependencyDataPool::ThreadDataPool
= new DependencyDataPool
;
609 TsanNewMemory(DependencyDataPool::ThreadDataPool
,
610 sizeof(DependencyDataPool::ThreadDataPool
));
611 thread_data
->value
= my_next_id();
614 static void ompt_tsan_thread_end(ompt_data_t
*thread_data
) {
615 TsanIgnoreWritesBegin();
616 delete ParallelDataPool::ThreadDataPool
;
617 delete TaskgroupPool::ThreadDataPool
;
618 delete TaskDataPool::ThreadDataPool
;
619 delete DependencyDataPool::ThreadDataPool
;
620 TsanIgnoreWritesEnd();
623 /// OMPT event callbacks for handling parallel regions.
625 static void ompt_tsan_parallel_begin(ompt_data_t
*parent_task_data
,
626 const ompt_frame_t
*parent_task_frame
,
627 ompt_data_t
*parallel_data
,
628 uint32_t requested_team_size
, int flag
,
629 const void *codeptr_ra
) {
630 ParallelData
*Data
= ParallelData::New(codeptr_ra
);
631 parallel_data
->ptr
= Data
;
633 TsanHappensBefore(Data
->GetParallelPtr());
634 if (archer_flags
->ignore_serial
&& ToTaskData(parent_task_data
)->isInitial())
635 TsanIgnoreWritesEnd();
638 static void ompt_tsan_parallel_end(ompt_data_t
*parallel_data
,
639 ompt_data_t
*task_data
, int flag
,
640 const void *codeptr_ra
) {
641 if (archer_flags
->ignore_serial
&& ToTaskData(task_data
)->isInitial())
642 TsanIgnoreWritesBegin();
643 ParallelData
*Data
= ToParallelData(parallel_data
);
644 TsanHappensAfter(Data
->GetBarrierPtr(0));
645 TsanHappensAfter(Data
->GetBarrierPtr(1));
649 #if (LLVM_VERSION >= 40)
650 if (&__archer_get_omp_status
) {
651 if (__archer_get_omp_status() == 0 && archer_flags
->flush_shadow
)
652 __tsan_flush_memory();
657 static void ompt_tsan_implicit_task(ompt_scope_endpoint_t endpoint
,
658 ompt_data_t
*parallel_data
,
659 ompt_data_t
*task_data
,
660 unsigned int team_size
,
661 unsigned int thread_num
, int type
) {
663 case ompt_scope_begin
:
664 if (type
& ompt_task_initial
) {
665 parallel_data
->ptr
= ParallelData::New(nullptr);
667 task_data
->ptr
= TaskData::New(ToParallelData(parallel_data
), type
);
668 TsanHappensAfter(ToParallelData(parallel_data
)->GetParallelPtr());
669 TsanFuncEntry(ToParallelData(parallel_data
)->codePtr
);
671 case ompt_scope_end
: {
672 TaskData
*Data
= ToTaskData(task_data
);
674 assert(Data
->freed
== 0 && "Implicit task end should only be called once!");
677 assert(Data
->RefCount
== 1 &&
678 "All tasks should have finished at the implicit barrier!");
679 if (type
& ompt_task_initial
) {
680 Data
->Team
->Delete();
686 case ompt_scope_beginend
:
687 // Should not occur according to OpenMP 5.1
688 // Tested in OMPT tests
693 static void ompt_tsan_sync_region(ompt_sync_region_t kind
,
694 ompt_scope_endpoint_t endpoint
,
695 ompt_data_t
*parallel_data
,
696 ompt_data_t
*task_data
,
697 const void *codeptr_ra
) {
698 TaskData
*Data
= ToTaskData(task_data
);
700 case ompt_scope_begin
:
701 case ompt_scope_beginend
:
702 TsanFuncEntry(codeptr_ra
);
704 case ompt_sync_region_barrier_implementation
:
705 case ompt_sync_region_barrier_implicit
:
706 case ompt_sync_region_barrier_explicit
:
707 case ompt_sync_region_barrier_implicit_parallel
:
708 case ompt_sync_region_barrier_implicit_workshare
:
709 case ompt_sync_region_barrier_teams
:
710 case ompt_sync_region_barrier
: {
711 char BarrierIndex
= Data
->BarrierIndex
;
712 TsanHappensBefore(Data
->Team
->GetBarrierPtr(BarrierIndex
));
714 if (hasReductionCallback
< ompt_set_always
) {
715 // We ignore writes inside the barrier. These would either occur during
716 // 1. reductions performed by the runtime which are guaranteed to be
718 // 2. execution of another task.
719 // For the latter case we will re-enable tracking in task_switch.
720 Data
->InBarrier
= true;
721 TsanIgnoreWritesBegin();
727 case ompt_sync_region_taskwait
:
730 case ompt_sync_region_taskgroup
:
731 Data
->TaskGroup
= Taskgroup::New(Data
->TaskGroup
);
734 case ompt_sync_region_reduction
:
735 // should never be reached
738 if (endpoint
== ompt_scope_begin
)
744 case ompt_sync_region_barrier_implementation
:
745 case ompt_sync_region_barrier_implicit
:
746 case ompt_sync_region_barrier_explicit
:
747 case ompt_sync_region_barrier_implicit_parallel
:
748 case ompt_sync_region_barrier_implicit_workshare
:
749 case ompt_sync_region_barrier_teams
:
750 case ompt_sync_region_barrier
: {
751 if (hasReductionCallback
< ompt_set_always
) {
752 // We want to track writes after the barrier again.
753 Data
->InBarrier
= false;
754 TsanIgnoreWritesEnd();
757 char BarrierIndex
= Data
->BarrierIndex
;
758 // Barrier will end after it has been entered by all threads.
760 TsanHappensAfter(Data
->Team
->GetBarrierPtr(BarrierIndex
));
762 // It is not guaranteed that all threads have exited this barrier before
763 // we enter the next one. So we will use a different address.
764 // We are however guaranteed that this current barrier is finished
765 // by the time we exit the next one. So we can then reuse the first
767 Data
->BarrierIndex
= (BarrierIndex
+ 1) % 2;
771 case ompt_sync_region_taskwait
: {
772 if (Data
->execution
> 1)
773 TsanHappensAfter(Data
->GetTaskwaitPtr());
777 case ompt_sync_region_taskgroup
: {
778 assert(Data
->TaskGroup
!= nullptr &&
779 "Should have at least one taskgroup!");
781 TsanHappensAfter(Data
->TaskGroup
->GetPtr());
783 // Delete this allocated taskgroup, all descendent task are finished by
785 Taskgroup
*Parent
= Data
->TaskGroup
->Parent
;
786 Data
->TaskGroup
->Delete();
787 Data
->TaskGroup
= Parent
;
791 case ompt_sync_region_reduction
:
792 // Should not occur according to OpenMP 5.1
793 // Tested in OMPT tests
800 static void ompt_tsan_reduction(ompt_sync_region_t kind
,
801 ompt_scope_endpoint_t endpoint
,
802 ompt_data_t
*parallel_data
,
803 ompt_data_t
*task_data
,
804 const void *codeptr_ra
) {
806 case ompt_scope_begin
:
808 case ompt_sync_region_reduction
:
809 TsanIgnoreWritesBegin();
817 case ompt_sync_region_reduction
:
818 TsanIgnoreWritesEnd();
824 case ompt_scope_beginend
:
825 // Should not occur according to OpenMP 5.1
826 // Tested in OMPT tests
827 // Would have no implications for DR detection
832 /// OMPT event callbacks for handling tasks.
834 static void ompt_tsan_task_create(
835 ompt_data_t
*parent_task_data
, /* id of parent task */
836 const ompt_frame_t
*parent_frame
, /* frame data for parent task */
837 ompt_data_t
*new_task_data
, /* id of created task */
838 int type
, int has_dependences
,
839 const void *codeptr_ra
) /* pointer to outlined function */
842 assert(new_task_data
->ptr
== NULL
&&
843 "Task data should be initialized to NULL");
844 if (type
& ompt_task_initial
) {
845 ompt_data_t
*parallel_data
;
847 ompt_get_parallel_info(0, ¶llel_data
, &team_size
);
848 ParallelData
*PData
= ParallelData::New(nullptr);
849 parallel_data
->ptr
= PData
;
851 Data
= TaskData::New(PData
, type
);
852 new_task_data
->ptr
= Data
;
853 } else if (type
& ompt_task_undeferred
) {
854 Data
= TaskData::New(ToTaskData(parent_task_data
), type
);
855 new_task_data
->ptr
= Data
;
856 } else if (type
& ompt_task_explicit
|| type
& ompt_task_target
) {
857 Data
= TaskData::New(ToTaskData(parent_task_data
), type
);
858 new_task_data
->ptr
= Data
;
860 // Use the newly created address. We cannot use a single address from the
861 // parent because that would declare wrong relationships with other
862 // sibling tasks that may be created before this task is started!
863 TsanHappensBefore(Data
->GetTaskPtr());
864 ToTaskData(parent_task_data
)->execution
++;
868 static void freeTask(TaskData
*task
) {
869 while (task
!= nullptr && --task
->RefCount
== 0) {
870 TaskData
*Parent
= task
->Parent
;
876 // LastAllMemoryPtr marks the beginning of an all_memory epoch
877 // NextAllMemoryPtr marks the end of an all_memory epoch
878 // All tasks with depend begin execution after LastAllMemoryPtr
879 // and end before NextAllMemoryPtr
880 static void releaseDependencies(TaskData
*task
) {
881 if (archer_flags
->all_memory
) {
882 if (task
->hasAllMemoryDep()) {
883 TsanHappensBefore(task
->Parent
->GetLastAllMemoryPtr());
884 TsanHappensBefore(task
->Parent
->GetNextAllMemoryPtr());
885 } else if (task
->DependencyCount
)
886 TsanHappensBefore(task
->Parent
->GetNextAllMemoryPtr());
888 for (unsigned i
= 0; i
< task
->DependencyCount
; i
++) {
889 task
->Dependencies
[i
].AnnotateEnd();
893 static void acquireDependencies(TaskData
*task
) {
894 if (archer_flags
->all_memory
) {
895 if (task
->hasAllMemoryDep())
896 TsanHappensAfter(task
->Parent
->GetNextAllMemoryPtr());
897 else if (task
->DependencyCount
)
898 TsanHappensAfter(task
->Parent
->GetLastAllMemoryPtr());
900 for (unsigned i
= 0; i
< task
->DependencyCount
; i
++) {
901 task
->Dependencies
[i
].AnnotateBegin();
905 static void completeTask(TaskData
*FromTask
) {
908 // Task-end happens after a possible omp_fulfill_event call
909 if (FromTask
->isFulfilled())
910 TsanHappensAfter(FromTask
->GetTaskPtr());
911 // Included tasks are executed sequentially, no need to track
913 if (!FromTask
->isIncluded()) {
914 // Task will finish before a barrier in the surrounding parallel region
916 ParallelData
*PData
= FromTask
->Team
;
917 TsanHappensBefore(PData
->GetBarrierPtr(FromTask
->BarrierIndex
));
919 // ... and before an eventual taskwait by the parent thread.
920 TsanHappensBefore(FromTask
->Parent
->GetTaskwaitPtr());
922 if (FromTask
->TaskGroup
!= nullptr) {
923 // This task is part of a taskgroup, so it will finish before the
924 // corresponding taskgroup_end.
925 TsanHappensBefore(FromTask
->TaskGroup
->GetPtr());
928 // release dependencies
929 releaseDependencies(FromTask
);
932 static void suspendTask(TaskData
*FromTask
) {
935 // Task may be resumed at a later point in time.
936 TsanHappensBefore(FromTask
->GetTaskPtr());
939 static void switchTasks(TaskData
*FromTask
, TaskData
*ToTask
) {
940 // Legacy handling for missing reduction callback
941 if (hasReductionCallback
< ompt_set_always
) {
942 if (FromTask
&& FromTask
->InBarrier
) {
943 // We want to ignore writes in the runtime code during barriers,
944 // but not when executing tasks with user code!
945 TsanIgnoreWritesEnd();
947 if (ToTask
&& ToTask
->InBarrier
) {
948 // We want to ignore writes in the runtime code during barriers,
949 // but not when executing tasks with user code!
950 TsanIgnoreWritesBegin();
955 // FromTask->deactivate();
957 // ToTask->activate();
960 static void endTask(TaskData
*FromTask
) {
965 static void startTask(TaskData
*ToTask
) {
968 // Handle dependencies on first execution of the task
969 if (ToTask
->execution
== 0) {
971 acquireDependencies(ToTask
);
973 // 1. Task will begin execution after it has been created.
974 // 2. Task will resume after it has been switched away.
975 TsanHappensAfter(ToTask
->GetTaskPtr());
978 static void ompt_tsan_task_schedule(ompt_data_t
*first_task_data
,
979 ompt_task_status_t prior_task_status
,
980 ompt_data_t
*second_task_data
) {
983 // The necessary action depends on prior_task_status:
985 // ompt_task_early_fulfill = 5,
988 // ompt_task_late_fulfill = 6,
989 // -> first completed, first freed, second ignored
991 // ompt_task_complete = 1,
992 // ompt_task_cancel = 3,
993 // -> first completed, first freed, second starts
995 // ompt_taskwait_complete = 8,
996 // -> first starts, first completes, first freed, second ignored
998 // ompt_task_detach = 4,
999 // ompt_task_yield = 2,
1000 // ompt_task_switch = 7
1001 // -> first suspended, second starts
1004 TaskData
*FromTask
= ToTaskData(first_task_data
);
1005 TaskData
*ToTask
= ToTaskData(second_task_data
);
1007 switch (prior_task_status
) {
1008 case ompt_task_early_fulfill
:
1009 TsanHappensBefore(FromTask
->GetTaskPtr());
1010 FromTask
->setFulfilled();
1012 case ompt_task_late_fulfill
:
1013 TsanHappensAfter(FromTask
->GetTaskPtr());
1014 completeTask(FromTask
);
1017 case ompt_taskwait_complete
:
1018 acquireDependencies(FromTask
);
1021 case ompt_task_complete
:
1022 completeTask(FromTask
);
1024 switchTasks(FromTask
, ToTask
);
1027 case ompt_task_cancel
:
1028 completeTask(FromTask
);
1030 switchTasks(FromTask
, ToTask
);
1034 case ompt_task_detach
:
1036 suspendTask(FromTask
);
1037 switchTasks(FromTask
, ToTask
);
1040 case ompt_task_yield
:
1041 suspendTask(FromTask
);
1042 switchTasks(FromTask
, ToTask
);
1045 case ompt_task_switch
:
1046 suspendTask(FromTask
);
1047 switchTasks(FromTask
, ToTask
);
1053 static void ompt_tsan_dependences(ompt_data_t
*task_data
,
1054 const ompt_dependence_t
*deps
, int ndeps
) {
1056 // Copy the data to use it in task_switch and task_end.
1057 TaskData
*Data
= ToTaskData(task_data
);
1058 if (!Data
->Parent
) {
1059 // Return since doacross dependences are not supported yet.
1062 if (!Data
->Parent
->DependencyMap
)
1063 Data
->Parent
->DependencyMap
=
1064 new std::unordered_map
<void *, DependencyData
*>();
1065 Data
->Dependencies
=
1066 (TaskDependency
*)malloc(sizeof(TaskDependency
) * ndeps
);
1067 Data
->DependencyCount
= ndeps
;
1068 for (int i
= 0, d
= 0; i
< ndeps
; i
++, d
++) {
1069 if (deps
[i
].dependence_type
== ompt_dependence_type_out_all_memory
||
1070 deps
[i
].dependence_type
== ompt_dependence_type_inout_all_memory
) {
1071 Data
->setAllMemoryDep();
1072 Data
->DependencyCount
--;
1073 if (!archer_flags
->all_memory
) {
1074 printf("The application uses omp_all_memory, but Archer was\n"
1075 "started to not consider omp_all_memory. This can lead\n"
1076 "to false data race alerts.\n"
1077 "Include all_memory=1 in ARCHER_OPTIONS to consider\n"
1078 "omp_all_memory from the beginning.\n");
1079 archer_flags
->all_memory
= 1;
1084 auto ret
= Data
->Parent
->DependencyMap
->insert(
1085 std::make_pair(deps
[i
].variable
.ptr
, nullptr));
1087 ret
.first
->second
= DependencyData::New();
1089 new ((void *)(Data
->Dependencies
+ d
))
1090 TaskDependency(ret
.first
->second
, deps
[i
].dependence_type
);
1093 // This callback is executed before this task is first started.
1094 TsanHappensBefore(Data
->GetTaskPtr());
1098 /// OMPT event callbacks for handling locking.
1099 static void ompt_tsan_mutex_acquired(ompt_mutex_t kind
, ompt_wait_id_t wait_id
,
1100 const void *codeptr_ra
) {
1102 // Acquire our own lock to make sure that
1103 // 1. the previous release has finished.
1104 // 2. the next acquire doesn't start before we have finished our release.
1106 std::mutex
&Lock
= Locks
[wait_id
];
1107 LocksMutex
.unlock();
1110 TsanHappensAfter(&Lock
);
1113 static void ompt_tsan_mutex_released(ompt_mutex_t kind
, ompt_wait_id_t wait_id
,
1114 const void *codeptr_ra
) {
1116 std::mutex
&Lock
= Locks
[wait_id
];
1117 LocksMutex
.unlock();
1118 TsanHappensBefore(&Lock
);
1123 // callback , signature , variable to store result , required support level
1124 #define SET_OPTIONAL_CALLBACK_T(event, type, result, level) \
1126 ompt_callback_##type##_t tsan_##event = &ompt_tsan_##event; \
1127 result = ompt_set_callback(ompt_callback_##event, \
1128 (ompt_callback_t)tsan_##event); \
1129 if (result < level) \
1130 printf("Registered callback '" #event "' is not supported at " #level \
1135 #define SET_CALLBACK_T(event, type) \
1138 SET_OPTIONAL_CALLBACK_T(event, type, res, ompt_set_always); \
1141 #define SET_CALLBACK(event) SET_CALLBACK_T(event, event)
1143 #define findTsanFunction(f, fSig) \
1145 if (NULL == (f = fSig dlsym(RTLD_DEFAULT, #f))) \
1146 printf("Unable to find TSan function " #f ".\n"); \
1149 #define findTsanFunctionSilent(f, fSig) f = fSig dlsym(RTLD_DEFAULT, #f)
1151 static int ompt_tsan_initialize(ompt_function_lookup_t lookup
, int device_num
,
1152 ompt_data_t
*tool_data
) {
1153 const char *options
= getenv("TSAN_OPTIONS");
1154 TsanFlags
tsan_flags(options
);
1156 ompt_set_callback_t ompt_set_callback
=
1157 (ompt_set_callback_t
)lookup("ompt_set_callback");
1158 if (ompt_set_callback
== NULL
) {
1159 std::cerr
<< "Could not set callback, exiting..." << std::endl
;
1162 ompt_get_parallel_info
=
1163 (ompt_get_parallel_info_t
)lookup("ompt_get_parallel_info");
1164 ompt_get_thread_data
= (ompt_get_thread_data_t
)lookup("ompt_get_thread_data");
1166 if (ompt_get_parallel_info
== NULL
) {
1167 fprintf(stderr
, "Could not get inquiry function 'ompt_get_parallel_info', "
1172 findTsanFunction(AnnotateHappensAfter
,
1173 (void (*)(const char *, int, const volatile void *)));
1174 findTsanFunction(AnnotateHappensBefore
,
1175 (void (*)(const char *, int, const volatile void *)));
1176 findTsanFunction(AnnotateIgnoreWritesBegin
, (void (*)(const char *, int)));
1177 findTsanFunction(AnnotateIgnoreWritesEnd
, (void (*)(const char *, int)));
1180 (void (*)(const char *, int, const volatile void *, size_t)));
1181 findTsanFunction(__tsan_func_entry
, (void (*)(const void *)));
1182 findTsanFunction(__tsan_func_exit
, (void (*)(void)));
1184 SET_CALLBACK(thread_begin
);
1185 SET_CALLBACK(thread_end
);
1186 SET_CALLBACK(parallel_begin
);
1187 SET_CALLBACK(implicit_task
);
1188 SET_CALLBACK(sync_region
);
1189 SET_CALLBACK(parallel_end
);
1191 SET_CALLBACK(task_create
);
1192 SET_CALLBACK(task_schedule
);
1193 SET_CALLBACK(dependences
);
1195 SET_CALLBACK_T(mutex_acquired
, mutex
);
1196 SET_CALLBACK_T(mutex_released
, mutex
);
1197 SET_OPTIONAL_CALLBACK_T(reduction
, sync_region
, hasReductionCallback
,
1200 if (!tsan_flags
.ignore_noninstrumented_modules
)
1202 "Warning: please export "
1203 "TSAN_OPTIONS='ignore_noninstrumented_modules=1' "
1204 "to avoid false positive reports from the OpenMP runtime!\n");
1205 if (archer_flags
->ignore_serial
)
1206 TsanIgnoreWritesBegin();
1208 return 1; // success
1211 static void ompt_tsan_finalize(ompt_data_t
*tool_data
) {
1212 if (archer_flags
->ignore_serial
)
1213 TsanIgnoreWritesEnd();
1214 if (archer_flags
->print_max_rss
) {
1216 getrusage(RUSAGE_SELF
, &end
);
1217 printf("MAX RSS[KBytes] during execution: %ld\n", end
.ru_maxrss
);
1221 delete archer_flags
;
1224 extern "C" ompt_start_tool_result_t
*
1225 ompt_start_tool(unsigned int omp_version
, const char *runtime_version
) {
1226 const char *options
= getenv("ARCHER_OPTIONS");
1227 archer_flags
= new ArcherFlags(options
);
1228 if (!archer_flags
->enabled
) {
1229 if (archer_flags
->verbose
)
1230 std::cout
<< "Archer disabled, stopping operation" << std::endl
;
1231 delete archer_flags
;
1235 pagesize
= getpagesize();
1237 static ompt_start_tool_result_t ompt_start_tool_result
= {
1238 &ompt_tsan_initialize
, &ompt_tsan_finalize
, {0}};
1240 // The OMPT start-up code uses dlopen with RTLD_LAZY. Therefore, we cannot
1241 // rely on dlopen to fail if TSan is missing, but would get a runtime error
1242 // for the first TSan call. We use RunningOnValgrind to detect whether
1243 // an implementation of the Annotation interface is available in the
1244 // execution or disable the tool (by returning NULL).
1246 findTsanFunctionSilent(RunningOnValgrind
, (int (*)(void)));
1247 if (!RunningOnValgrind
) // if we are not running on TSAN, give a different
1248 // tool the chance to be loaded
1250 if (archer_flags
->verbose
)
1251 std::cout
<< "Archer detected OpenMP application without TSan "
1252 "stopping operation"
1254 delete archer_flags
;
1258 if (archer_flags
->verbose
)
1259 std::cout
<< "Archer detected OpenMP application with TSan, supplying "
1260 "OpenMP synchronization semantics"
1262 return &ompt_start_tool_result
;