Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / compiler-rt / lib / tsan / rtl / tsan_rtl_access.cpp
blob8b20984a01000603e533faa824174fedba6269b6
1 //===-- tsan_rtl_access.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of ThreadSanitizer (TSan), a race detector.
11 // Definitions of memory access and function entry/exit entry points.
12 //===----------------------------------------------------------------------===//
14 #include "tsan_rtl.h"
16 namespace __tsan {
18 ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState* thr, uptr pc,
19 uptr addr, uptr size,
20 AccessType typ) {
21 DCHECK(size == 1 || size == 2 || size == 4 || size == 8);
22 if (!kCollectHistory)
23 return true;
24 EventAccess* ev;
25 if (UNLIKELY(!TraceAcquire(thr, &ev)))
26 return false;
27 u64 size_log = size == 1 ? 0 : size == 2 ? 1 : size == 4 ? 2 : 3;
28 uptr pc_delta = pc - thr->trace_prev_pc + (1 << (EventAccess::kPCBits - 1));
29 thr->trace_prev_pc = pc;
30 if (LIKELY(pc_delta < (1 << EventAccess::kPCBits))) {
31 ev->is_access = 1;
32 ev->is_read = !!(typ & kAccessRead);
33 ev->is_atomic = !!(typ & kAccessAtomic);
34 ev->size_log = size_log;
35 ev->pc_delta = pc_delta;
36 DCHECK_EQ(ev->pc_delta, pc_delta);
37 ev->addr = CompressAddr(addr);
38 TraceRelease(thr, ev);
39 return true;
41 auto* evex = reinterpret_cast<EventAccessExt*>(ev);
42 evex->is_access = 0;
43 evex->is_func = 0;
44 evex->type = EventType::kAccessExt;
45 evex->is_read = !!(typ & kAccessRead);
46 evex->is_atomic = !!(typ & kAccessAtomic);
47 evex->size_log = size_log;
48 // Note: this is important, see comment in EventAccessExt.
49 evex->_ = 0;
50 evex->addr = CompressAddr(addr);
51 evex->pc = pc;
52 TraceRelease(thr, evex);
53 return true;
56 ALWAYS_INLINE
57 bool TryTraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
58 AccessType typ) {
59 if (!kCollectHistory)
60 return true;
61 EventAccessRange* ev;
62 if (UNLIKELY(!TraceAcquire(thr, &ev)))
63 return false;
64 thr->trace_prev_pc = pc;
65 ev->is_access = 0;
66 ev->is_func = 0;
67 ev->type = EventType::kAccessRange;
68 ev->is_read = !!(typ & kAccessRead);
69 ev->is_free = !!(typ & kAccessFree);
70 ev->size_lo = size;
71 ev->pc = CompressAddr(pc);
72 ev->addr = CompressAddr(addr);
73 ev->size_hi = size >> EventAccessRange::kSizeLoBits;
74 TraceRelease(thr, ev);
75 return true;
78 void TraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
79 AccessType typ) {
80 if (LIKELY(TryTraceMemoryAccessRange(thr, pc, addr, size, typ)))
81 return;
82 TraceSwitchPart(thr);
83 UNUSED bool res = TryTraceMemoryAccessRange(thr, pc, addr, size, typ);
84 DCHECK(res);
87 void TraceFunc(ThreadState* thr, uptr pc) {
88 if (LIKELY(TryTraceFunc(thr, pc)))
89 return;
90 TraceSwitchPart(thr);
91 UNUSED bool res = TryTraceFunc(thr, pc);
92 DCHECK(res);
95 NOINLINE void TraceRestartFuncEntry(ThreadState* thr, uptr pc) {
96 TraceSwitchPart(thr);
97 FuncEntry(thr, pc);
100 NOINLINE void TraceRestartFuncExit(ThreadState* thr) {
101 TraceSwitchPart(thr);
102 FuncExit(thr);
105 void TraceMutexLock(ThreadState* thr, EventType type, uptr pc, uptr addr,
106 StackID stk) {
107 DCHECK(type == EventType::kLock || type == EventType::kRLock);
108 if (!kCollectHistory)
109 return;
110 EventLock ev;
111 ev.is_access = 0;
112 ev.is_func = 0;
113 ev.type = type;
114 ev.pc = CompressAddr(pc);
115 ev.stack_lo = stk;
116 ev.stack_hi = stk >> EventLock::kStackIDLoBits;
117 ev._ = 0;
118 ev.addr = CompressAddr(addr);
119 TraceEvent(thr, ev);
122 void TraceMutexUnlock(ThreadState* thr, uptr addr) {
123 if (!kCollectHistory)
124 return;
125 EventUnlock ev;
126 ev.is_access = 0;
127 ev.is_func = 0;
128 ev.type = EventType::kUnlock;
129 ev._ = 0;
130 ev.addr = CompressAddr(addr);
131 TraceEvent(thr, ev);
134 void TraceTime(ThreadState* thr) {
135 if (!kCollectHistory)
136 return;
137 FastState fast_state = thr->fast_state;
138 EventTime ev;
139 ev.is_access = 0;
140 ev.is_func = 0;
141 ev.type = EventType::kTime;
142 ev.sid = static_cast<u64>(fast_state.sid());
143 ev.epoch = static_cast<u64>(fast_state.epoch());
144 ev._ = 0;
145 TraceEvent(thr, ev);
148 NOINLINE void DoReportRace(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
149 Shadow old,
150 AccessType typ) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
151 // For the free shadow markers the first element (that contains kFreeSid)
152 // triggers the race, but the second element contains info about the freeing
153 // thread, take it.
154 if (old.sid() == kFreeSid)
155 old = Shadow(LoadShadow(&shadow_mem[1]));
156 // This prevents trapping on this address in future.
157 for (uptr i = 0; i < kShadowCnt; i++)
158 StoreShadow(&shadow_mem[i], i == 0 ? Shadow::kRodata : Shadow::kEmpty);
159 // See the comment in MemoryRangeFreed as to why the slot is locked
160 // for free memory accesses. ReportRace must not be called with
161 // the slot locked because of the fork. But MemoryRangeFreed is not
162 // called during fork because fork sets ignore_reads_and_writes,
163 // so simply unlocking the slot should be fine.
164 if (typ & kAccessSlotLocked)
165 SlotUnlock(thr);
166 ReportRace(thr, shadow_mem, cur, Shadow(old), typ);
167 if (typ & kAccessSlotLocked)
168 SlotLock(thr);
171 #if !TSAN_VECTORIZE
172 ALWAYS_INLINE
173 bool ContainsSameAccess(RawShadow* s, Shadow cur, int unused0, int unused1,
174 AccessType typ) {
175 for (uptr i = 0; i < kShadowCnt; i++) {
176 auto old = LoadShadow(&s[i]);
177 if (!(typ & kAccessRead)) {
178 if (old == cur.raw())
179 return true;
180 continue;
182 auto masked = static_cast<RawShadow>(static_cast<u32>(old) |
183 static_cast<u32>(Shadow::kRodata));
184 if (masked == cur.raw())
185 return true;
186 if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
187 if (old == Shadow::kRodata)
188 return true;
191 return false;
194 ALWAYS_INLINE
195 bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
196 int unused0, int unused1, AccessType typ) {
197 bool stored = false;
198 for (uptr idx = 0; idx < kShadowCnt; idx++) {
199 RawShadow* sp = &shadow_mem[idx];
200 Shadow old(LoadShadow(sp));
201 if (LIKELY(old.raw() == Shadow::kEmpty)) {
202 if (!(typ & kAccessCheckOnly) && !stored)
203 StoreShadow(sp, cur.raw());
204 return false;
206 if (LIKELY(!(cur.access() & old.access())))
207 continue;
208 if (LIKELY(cur.sid() == old.sid())) {
209 if (!(typ & kAccessCheckOnly) &&
210 LIKELY(cur.access() == old.access() && old.IsRWWeakerOrEqual(typ))) {
211 StoreShadow(sp, cur.raw());
212 stored = true;
214 continue;
216 if (LIKELY(old.IsBothReadsOrAtomic(typ)))
217 continue;
218 if (LIKELY(thr->clock.Get(old.sid()) >= old.epoch()))
219 continue;
220 DoReportRace(thr, shadow_mem, cur, old, typ);
221 return true;
223 // We did not find any races and had already stored
224 // the current access info, so we are done.
225 if (LIKELY(stored))
226 return false;
227 // Choose a random candidate slot and replace it.
228 uptr index =
229 atomic_load_relaxed(&thr->trace_pos) / sizeof(Event) % kShadowCnt;
230 StoreShadow(&shadow_mem[index], cur.raw());
231 return false;
234 # define LOAD_CURRENT_SHADOW(cur, shadow_mem) UNUSED int access = 0, shadow = 0
236 #else /* !TSAN_VECTORIZE */
238 ALWAYS_INLINE
239 bool ContainsSameAccess(RawShadow* unused0, Shadow unused1, m128 shadow,
240 m128 access, AccessType typ) {
241 // Note: we could check if there is a larger access of the same type,
242 // e.g. we just allocated/memset-ed a block (so it contains 8 byte writes)
243 // and now do smaller reads/writes, these can also be considered as "same
244 // access". However, it will make the check more expensive, so it's unclear
245 // if it's worth it. But this would conserve trace space, so it's useful
246 // besides potential speed up.
247 if (!(typ & kAccessRead)) {
248 const m128 same = _mm_cmpeq_epi32(shadow, access);
249 return _mm_movemask_epi8(same);
251 // For reads we need to reset read bit in the shadow,
252 // because we need to match read with both reads and writes.
253 // Shadow::kRodata has only read bit set, so it does what we want.
254 // We also abuse it for rodata check to save few cycles
255 // since we already loaded Shadow::kRodata into a register.
256 // Reads from rodata can't race.
257 // Measurements show that they can be 10-20% of all memory accesses.
258 // Shadow::kRodata has epoch 0 which cannot appear in shadow normally
259 // (thread epochs start from 1). So the same read bit mask
260 // serves as rodata indicator.
261 const m128 read_mask = _mm_set1_epi32(static_cast<u32>(Shadow::kRodata));
262 const m128 masked_shadow = _mm_or_si128(shadow, read_mask);
263 m128 same = _mm_cmpeq_epi32(masked_shadow, access);
264 // Range memory accesses check Shadow::kRodata before calling this,
265 // Shadow::kRodatas is not possible for free memory access
266 // and Go does not use Shadow::kRodata.
267 if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
268 const m128 ro = _mm_cmpeq_epi32(shadow, read_mask);
269 same = _mm_or_si128(ro, same);
271 return _mm_movemask_epi8(same);
274 NOINLINE void DoReportRaceV(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
275 u32 race_mask, m128 shadow, AccessType typ) {
276 // race_mask points which of the shadow elements raced with the current
277 // access. Extract that element.
278 CHECK_NE(race_mask, 0);
279 u32 old;
280 // Note: _mm_extract_epi32 index must be a constant value.
281 switch (__builtin_ffs(race_mask) / 4) {
282 case 0:
283 old = _mm_extract_epi32(shadow, 0);
284 break;
285 case 1:
286 old = _mm_extract_epi32(shadow, 1);
287 break;
288 case 2:
289 old = _mm_extract_epi32(shadow, 2);
290 break;
291 case 3:
292 old = _mm_extract_epi32(shadow, 3);
293 break;
295 Shadow prev(static_cast<RawShadow>(old));
296 // For the free shadow markers the first element (that contains kFreeSid)
297 // triggers the race, but the second element contains info about the freeing
298 // thread, take it.
299 if (prev.sid() == kFreeSid)
300 prev = Shadow(static_cast<RawShadow>(_mm_extract_epi32(shadow, 1)));
301 DoReportRace(thr, shadow_mem, cur, prev, typ);
304 ALWAYS_INLINE
305 bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
306 m128 shadow, m128 access, AccessType typ) {
307 // Note: empty/zero slots don't intersect with any access.
308 const m128 zero = _mm_setzero_si128();
309 const m128 mask_access = _mm_set1_epi32(0x000000ff);
310 const m128 mask_sid = _mm_set1_epi32(0x0000ff00);
311 const m128 mask_read_atomic = _mm_set1_epi32(0xc0000000);
312 const m128 access_and = _mm_and_si128(access, shadow);
313 const m128 access_xor = _mm_xor_si128(access, shadow);
314 const m128 intersect = _mm_and_si128(access_and, mask_access);
315 const m128 not_intersect = _mm_cmpeq_epi32(intersect, zero);
316 const m128 not_same_sid = _mm_and_si128(access_xor, mask_sid);
317 const m128 same_sid = _mm_cmpeq_epi32(not_same_sid, zero);
318 const m128 both_read_or_atomic = _mm_and_si128(access_and, mask_read_atomic);
319 const m128 no_race =
320 _mm_or_si128(_mm_or_si128(not_intersect, same_sid), both_read_or_atomic);
321 const int race_mask = _mm_movemask_epi8(_mm_cmpeq_epi32(no_race, zero));
322 if (UNLIKELY(race_mask))
323 goto SHARED;
325 STORE : {
326 if (typ & kAccessCheckOnly)
327 return false;
328 // We could also replace different sid's if access is the same,
329 // rw weaker and happens before. However, just checking access below
330 // is not enough because we also need to check that !both_read_or_atomic
331 // (reads from different sids can be concurrent).
332 // Theoretically we could replace smaller accesses with larger accesses,
333 // but it's unclear if it's worth doing.
334 const m128 mask_access_sid = _mm_set1_epi32(0x0000ffff);
335 const m128 not_same_sid_access = _mm_and_si128(access_xor, mask_access_sid);
336 const m128 same_sid_access = _mm_cmpeq_epi32(not_same_sid_access, zero);
337 const m128 access_read_atomic =
338 _mm_set1_epi32((typ & (kAccessRead | kAccessAtomic)) << 30);
339 const m128 rw_weaker =
340 _mm_cmpeq_epi32(_mm_max_epu32(shadow, access_read_atomic), shadow);
341 const m128 rewrite = _mm_and_si128(same_sid_access, rw_weaker);
342 const int rewrite_mask = _mm_movemask_epi8(rewrite);
343 int index = __builtin_ffs(rewrite_mask);
344 if (UNLIKELY(index == 0)) {
345 const m128 empty = _mm_cmpeq_epi32(shadow, zero);
346 const int empty_mask = _mm_movemask_epi8(empty);
347 index = __builtin_ffs(empty_mask);
348 if (UNLIKELY(index == 0))
349 index = (atomic_load_relaxed(&thr->trace_pos) / 2) % 16;
351 StoreShadow(&shadow_mem[index / 4], cur.raw());
352 // We could zero other slots determined by rewrite_mask.
353 // That would help other threads to evict better slots,
354 // but it's unclear if it's worth it.
355 return false;
358 SHARED:
359 m128 thread_epochs = _mm_set1_epi32(0x7fffffff);
360 // Need to unwind this because _mm_extract_epi8/_mm_insert_epi32
361 // indexes must be constants.
362 # define LOAD_EPOCH(idx) \
363 if (LIKELY(race_mask & (1 << (idx * 4)))) { \
364 u8 sid = _mm_extract_epi8(shadow, idx * 4 + 1); \
365 u16 epoch = static_cast<u16>(thr->clock.Get(static_cast<Sid>(sid))); \
366 thread_epochs = _mm_insert_epi32(thread_epochs, u32(epoch) << 16, idx); \
368 LOAD_EPOCH(0);
369 LOAD_EPOCH(1);
370 LOAD_EPOCH(2);
371 LOAD_EPOCH(3);
372 # undef LOAD_EPOCH
373 const m128 mask_epoch = _mm_set1_epi32(0x3fff0000);
374 const m128 shadow_epochs = _mm_and_si128(shadow, mask_epoch);
375 const m128 concurrent = _mm_cmplt_epi32(thread_epochs, shadow_epochs);
376 const int concurrent_mask = _mm_movemask_epi8(concurrent);
377 if (LIKELY(concurrent_mask == 0))
378 goto STORE;
380 DoReportRaceV(thr, shadow_mem, cur, concurrent_mask, shadow, typ);
381 return true;
384 # define LOAD_CURRENT_SHADOW(cur, shadow_mem) \
385 const m128 access = _mm_set1_epi32(static_cast<u32>((cur).raw())); \
386 const m128 shadow = _mm_load_si128(reinterpret_cast<m128*>(shadow_mem))
387 #endif
389 char* DumpShadow(char* buf, RawShadow raw) {
390 if (raw == Shadow::kEmpty) {
391 internal_snprintf(buf, 64, "0");
392 return buf;
394 Shadow s(raw);
395 AccessType typ;
396 s.GetAccess(nullptr, nullptr, &typ);
397 internal_snprintf(buf, 64, "{tid=%u@%u access=0x%x typ=%x}",
398 static_cast<u32>(s.sid()), static_cast<u32>(s.epoch()),
399 s.access(), static_cast<u32>(typ));
400 return buf;
403 // TryTrace* and TraceRestart* functions allow to turn memory access and func
404 // entry/exit callbacks into leaf functions with all associated performance
405 // benefits. These hottest callbacks do only 2 slow path calls: report a race
406 // and trace part switching. Race reporting is easy to turn into a tail call, we
407 // just always return from the runtime after reporting a race. But trace part
408 // switching is harder because it needs to be in the middle of callbacks. To
409 // turn it into a tail call we immidiately return after TraceRestart* functions,
410 // but TraceRestart* functions themselves recurse into the callback after
411 // switching trace part. As the result the hottest callbacks contain only tail
412 // calls, which effectively makes them leaf functions (can use all registers,
413 // no frame setup, etc).
414 NOINLINE void TraceRestartMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
415 uptr size, AccessType typ) {
416 TraceSwitchPart(thr);
417 MemoryAccess(thr, pc, addr, size, typ);
420 ALWAYS_INLINE USED void MemoryAccess(ThreadState* thr, uptr pc, uptr addr,
421 uptr size, AccessType typ) {
422 RawShadow* shadow_mem = MemToShadow(addr);
423 UNUSED char memBuf[4][64];
424 DPrintf2("#%d: Access: %d@%d %p/%zd typ=0x%x {%s, %s, %s, %s}\n", thr->tid,
425 static_cast<int>(thr->fast_state.sid()),
426 static_cast<int>(thr->fast_state.epoch()), (void*)addr, size,
427 static_cast<int>(typ), DumpShadow(memBuf[0], shadow_mem[0]),
428 DumpShadow(memBuf[1], shadow_mem[1]),
429 DumpShadow(memBuf[2], shadow_mem[2]),
430 DumpShadow(memBuf[3], shadow_mem[3]));
432 FastState fast_state = thr->fast_state;
433 Shadow cur(fast_state, addr, size, typ);
435 LOAD_CURRENT_SHADOW(cur, shadow_mem);
436 if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
437 return;
438 if (UNLIKELY(fast_state.GetIgnoreBit()))
439 return;
440 if (!TryTraceMemoryAccess(thr, pc, addr, size, typ))
441 return TraceRestartMemoryAccess(thr, pc, addr, size, typ);
442 CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
445 void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr, AccessType typ);
447 NOINLINE
448 void RestartMemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
449 AccessType typ) {
450 TraceSwitchPart(thr);
451 MemoryAccess16(thr, pc, addr, typ);
454 ALWAYS_INLINE USED void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
455 AccessType typ) {
456 const uptr size = 16;
457 FastState fast_state = thr->fast_state;
458 if (UNLIKELY(fast_state.GetIgnoreBit()))
459 return;
460 Shadow cur(fast_state, 0, 8, typ);
461 RawShadow* shadow_mem = MemToShadow(addr);
462 bool traced = false;
464 LOAD_CURRENT_SHADOW(cur, shadow_mem);
465 if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
466 goto SECOND;
467 if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
468 return RestartMemoryAccess16(thr, pc, addr, typ);
469 traced = true;
470 if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
471 return;
473 SECOND:
474 shadow_mem += kShadowCnt;
475 LOAD_CURRENT_SHADOW(cur, shadow_mem);
476 if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
477 return;
478 if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
479 return RestartMemoryAccess16(thr, pc, addr, typ);
480 CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
483 NOINLINE
484 void RestartUnalignedMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
485 uptr size, AccessType typ) {
486 TraceSwitchPart(thr);
487 UnalignedMemoryAccess(thr, pc, addr, size, typ);
490 ALWAYS_INLINE USED void UnalignedMemoryAccess(ThreadState* thr, uptr pc,
491 uptr addr, uptr size,
492 AccessType typ) {
493 DCHECK_LE(size, 8);
494 FastState fast_state = thr->fast_state;
495 if (UNLIKELY(fast_state.GetIgnoreBit()))
496 return;
497 RawShadow* shadow_mem = MemToShadow(addr);
498 bool traced = false;
499 uptr size1 = Min<uptr>(size, RoundUp(addr + 1, kShadowCell) - addr);
501 Shadow cur(fast_state, addr, size1, typ);
502 LOAD_CURRENT_SHADOW(cur, shadow_mem);
503 if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
504 goto SECOND;
505 if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
506 return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
507 traced = true;
508 if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
509 return;
511 SECOND:
512 uptr size2 = size - size1;
513 if (LIKELY(size2 == 0))
514 return;
515 shadow_mem += kShadowCnt;
516 Shadow cur(fast_state, 0, size2, typ);
517 LOAD_CURRENT_SHADOW(cur, shadow_mem);
518 if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
519 return;
520 if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
521 return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
522 CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
525 void ShadowSet(RawShadow* p, RawShadow* end, RawShadow v) {
526 DCHECK_LE(p, end);
527 DCHECK(IsShadowMem(p));
528 DCHECK(IsShadowMem(end));
529 UNUSED const uptr kAlign = kShadowCnt * kShadowSize;
530 DCHECK_EQ(reinterpret_cast<uptr>(p) % kAlign, 0);
531 DCHECK_EQ(reinterpret_cast<uptr>(end) % kAlign, 0);
532 #if !TSAN_VECTORIZE
533 for (; p < end; p += kShadowCnt) {
534 p[0] = v;
535 for (uptr i = 1; i < kShadowCnt; i++) p[i] = Shadow::kEmpty;
537 #else
538 m128 vv = _mm_setr_epi32(
539 static_cast<u32>(v), static_cast<u32>(Shadow::kEmpty),
540 static_cast<u32>(Shadow::kEmpty), static_cast<u32>(Shadow::kEmpty));
541 m128* vp = reinterpret_cast<m128*>(p);
542 m128* vend = reinterpret_cast<m128*>(end);
543 for (; vp < vend; vp++) _mm_store_si128(vp, vv);
544 #endif
547 static void MemoryRangeSet(uptr addr, uptr size, RawShadow val) {
548 if (size == 0)
549 return;
550 DCHECK_EQ(addr % kShadowCell, 0);
551 DCHECK_EQ(size % kShadowCell, 0);
552 // If a user passes some insane arguments (memset(0)),
553 // let it just crash as usual.
554 if (!IsAppMem(addr) || !IsAppMem(addr + size - 1))
555 return;
556 RawShadow* begin = MemToShadow(addr);
557 RawShadow* end = begin + size / kShadowCell * kShadowCnt;
558 // Don't want to touch lots of shadow memory.
559 // If a program maps 10MB stack, there is no need reset the whole range.
560 // UnmapOrDie/MmapFixedNoReserve does not work on Windows.
561 if (SANITIZER_WINDOWS ||
562 size <= common_flags()->clear_shadow_mmap_threshold) {
563 ShadowSet(begin, end, val);
564 return;
566 // The region is big, reset only beginning and end.
567 const uptr kPageSize = GetPageSizeCached();
568 // Set at least first kPageSize/2 to page boundary.
569 RawShadow* mid1 =
570 Min(end, reinterpret_cast<RawShadow*>(RoundUp(
571 reinterpret_cast<uptr>(begin) + kPageSize / 2, kPageSize)));
572 ShadowSet(begin, mid1, val);
573 // Reset middle part.
574 RawShadow* mid2 = RoundDown(end, kPageSize);
575 if (mid2 > mid1) {
576 if (!MmapFixedSuperNoReserve((uptr)mid1, (uptr)mid2 - (uptr)mid1))
577 Die();
579 // Set the ending.
580 ShadowSet(mid2, end, val);
583 void MemoryResetRange(ThreadState* thr, uptr pc, uptr addr, uptr size) {
584 uptr addr1 = RoundDown(addr, kShadowCell);
585 uptr size1 = RoundUp(size + addr - addr1, kShadowCell);
586 MemoryRangeSet(addr1, size1, Shadow::kEmpty);
589 void MemoryRangeFreed(ThreadState* thr, uptr pc, uptr addr, uptr size) {
590 // Callers must lock the slot to ensure synchronization with the reset.
591 // The problem with "freed" memory is that it's not "monotonic"
592 // with respect to bug detection: freed memory is bad to access,
593 // but then if the heap block is reallocated later, it's good to access.
594 // As the result a garbage "freed" shadow can lead to a false positive
595 // if it happens to match a real free in the thread trace,
596 // but the heap block was reallocated before the current memory access,
597 // so it's still good to access. It's not the case with data races.
598 DCHECK(thr->slot_locked);
599 DCHECK_EQ(addr % kShadowCell, 0);
600 size = RoundUp(size, kShadowCell);
601 // Processing more than 1k (2k of shadow) is expensive,
602 // can cause excessive memory consumption (user does not necessary touch
603 // the whole range) and most likely unnecessary.
604 size = Min<uptr>(size, 1024);
605 const AccessType typ = kAccessWrite | kAccessFree | kAccessSlotLocked |
606 kAccessCheckOnly | kAccessNoRodata;
607 TraceMemoryAccessRange(thr, pc, addr, size, typ);
608 RawShadow* shadow_mem = MemToShadow(addr);
609 Shadow cur(thr->fast_state, 0, kShadowCell, typ);
610 #if TSAN_VECTORIZE
611 const m128 access = _mm_set1_epi32(static_cast<u32>(cur.raw()));
612 const m128 freed = _mm_setr_epi32(
613 static_cast<u32>(Shadow::FreedMarker()),
614 static_cast<u32>(Shadow::FreedInfo(cur.sid(), cur.epoch())), 0, 0);
615 for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
616 const m128 shadow = _mm_load_si128((m128*)shadow_mem);
617 if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
618 return;
619 _mm_store_si128((m128*)shadow_mem, freed);
621 #else
622 for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
623 if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, 0, 0, typ)))
624 return;
625 StoreShadow(&shadow_mem[0], Shadow::FreedMarker());
626 StoreShadow(&shadow_mem[1], Shadow::FreedInfo(cur.sid(), cur.epoch()));
627 StoreShadow(&shadow_mem[2], Shadow::kEmpty);
628 StoreShadow(&shadow_mem[3], Shadow::kEmpty);
630 #endif
633 void MemoryRangeImitateWrite(ThreadState* thr, uptr pc, uptr addr, uptr size) {
634 DCHECK_EQ(addr % kShadowCell, 0);
635 size = RoundUp(size, kShadowCell);
636 TraceMemoryAccessRange(thr, pc, addr, size, kAccessWrite);
637 Shadow cur(thr->fast_state, 0, 8, kAccessWrite);
638 MemoryRangeSet(addr, size, cur.raw());
641 void MemoryRangeImitateWriteOrResetRange(ThreadState* thr, uptr pc, uptr addr,
642 uptr size) {
643 if (thr->ignore_reads_and_writes == 0)
644 MemoryRangeImitateWrite(thr, pc, addr, size);
645 else
646 MemoryResetRange(thr, pc, addr, size);
649 ALWAYS_INLINE
650 bool MemoryAccessRangeOne(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
651 AccessType typ) {
652 LOAD_CURRENT_SHADOW(cur, shadow_mem);
653 if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
654 return false;
655 return CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
658 template <bool is_read>
659 NOINLINE void RestartMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr,
660 uptr size) {
661 TraceSwitchPart(thr);
662 MemoryAccessRangeT<is_read>(thr, pc, addr, size);
665 template <bool is_read>
666 void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) {
667 const AccessType typ =
668 (is_read ? kAccessRead : kAccessWrite) | kAccessNoRodata;
669 RawShadow* shadow_mem = MemToShadow(addr);
670 DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_read=%d\n", thr->tid,
671 (void*)pc, (void*)addr, (int)size, is_read);
673 #if SANITIZER_DEBUG
674 if (!IsAppMem(addr)) {
675 Printf("Access to non app mem %zx\n", addr);
676 DCHECK(IsAppMem(addr));
678 if (!IsAppMem(addr + size - 1)) {
679 Printf("Access to non app mem %zx\n", addr + size - 1);
680 DCHECK(IsAppMem(addr + size - 1));
682 if (!IsShadowMem(shadow_mem)) {
683 Printf("Bad shadow addr %p (%zx)\n", static_cast<void*>(shadow_mem), addr);
684 DCHECK(IsShadowMem(shadow_mem));
686 if (!IsShadowMem(shadow_mem + size * kShadowCnt - 1)) {
687 Printf("Bad shadow addr %p (%zx)\n",
688 static_cast<void*>(shadow_mem + size * kShadowCnt - 1),
689 addr + size - 1);
690 DCHECK(IsShadowMem(shadow_mem + size * kShadowCnt - 1));
692 #endif
694 // Access to .rodata section, no races here.
695 // Measurements show that it can be 10-20% of all memory accesses.
696 // Check here once to not check for every access separately.
697 // Note: we could (and should) do this only for the is_read case
698 // (writes shouldn't go to .rodata). But it happens in Chromium tests:
699 // https://bugs.chromium.org/p/chromium/issues/detail?id=1275581#c19
700 // Details are unknown since it happens only on CI machines.
701 if (*shadow_mem == Shadow::kRodata)
702 return;
704 FastState fast_state = thr->fast_state;
705 if (UNLIKELY(fast_state.GetIgnoreBit()))
706 return;
708 if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
709 return RestartMemoryAccessRange<is_read>(thr, pc, addr, size);
711 if (UNLIKELY(addr % kShadowCell)) {
712 // Handle unaligned beginning, if any.
713 uptr size1 = Min(size, RoundUp(addr, kShadowCell) - addr);
714 size -= size1;
715 Shadow cur(fast_state, addr, size1, typ);
716 if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
717 return;
718 shadow_mem += kShadowCnt;
720 // Handle middle part, if any.
721 Shadow cur(fast_state, 0, kShadowCell, typ);
722 for (; size >= kShadowCell; size -= kShadowCell, shadow_mem += kShadowCnt) {
723 if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
724 return;
726 // Handle ending, if any.
727 if (UNLIKELY(size)) {
728 Shadow cur(fast_state, 0, size, typ);
729 if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
730 return;
734 template void MemoryAccessRangeT<true>(ThreadState* thr, uptr pc, uptr addr,
735 uptr size);
736 template void MemoryAccessRangeT<false>(ThreadState* thr, uptr pc, uptr addr,
737 uptr size);
739 } // namespace __tsan
741 #if !SANITIZER_GO
742 // Must be included in this file to make sure everything is inlined.
743 # include "tsan_interface.inc"
744 #endif