drd/tests/std_thread2.supp: Add a suppression pattern
[valgrind.git] / helgrind / libhb_core.c
blob3659aadfa8be72a8699c296e8076761efe6288a9
2 /*--------------------------------------------------------------------*/
3 /*--- LibHB: a library for implementing and checking ---*/
4 /*--- the happens-before relationship in concurrent programs. ---*/
5 /*--- libhb_main.c ---*/
6 /*--------------------------------------------------------------------*/
8 /*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
12 Copyright (C) 2008-2013 OpenWorks Ltd
13 info@open-works.co.uk
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
30 The GNU General Public License is contained in the file COPYING.
33 #include "pub_tool_basics.h"
34 #include "pub_tool_poolalloc.h"
35 #include "pub_tool_libcassert.h"
36 #include "pub_tool_libcbase.h"
37 #include "pub_tool_libcprint.h"
38 #include "pub_tool_mallocfree.h"
39 #include "pub_tool_wordfm.h"
40 #include "pub_tool_sparsewa.h"
41 #include "pub_tool_xarray.h"
42 #include "pub_tool_oset.h"
43 #include "pub_tool_threadstate.h"
44 #include "pub_tool_aspacemgr.h"
45 #include "pub_tool_execontext.h"
46 #include "pub_tool_errormgr.h"
47 #include "pub_tool_options.h" // VG_(clo_stats)
48 #include "hg_basics.h"
49 #include "hg_wordset.h"
50 #include "hg_lock_n_thread.h"
51 #include "hg_errors.h"
53 #include "libhb.h"
56 /////////////////////////////////////////////////////////////////
57 /////////////////////////////////////////////////////////////////
58 // //
59 // Debugging #defines //
60 // //
61 /////////////////////////////////////////////////////////////////
62 /////////////////////////////////////////////////////////////////
64 /* Check the sanity of shadow values in the core memory state
65 machine. Change #if 0 to #if 1 to enable this. */
66 #if 0
67 # define CHECK_MSM 1
68 #else
69 # define CHECK_MSM 0
70 #endif
73 /* Check sanity (reference counts, etc) in the conflicting access
74 machinery. Change #if 0 to #if 1 to enable this. */
75 #if 0
76 # define CHECK_CEM 1
77 #else
78 # define CHECK_CEM 0
79 #endif
82 /* Check sanity in the compressed shadow memory machinery,
83 particularly in its caching innards. Unfortunately there's no
84 almost-zero-cost way to make them selectable at run time. Hence
85 set the #if 0 to #if 1 and rebuild if you want them. */
86 #if 0
87 # define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
88 # define inline __attribute__((noinline))
89 /* probably want to ditch -fomit-frame-pointer too */
90 #else
91 # define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
92 #endif
95 /////////////////////////////////////////////////////////////////
96 /////////////////////////////////////////////////////////////////
97 // //
98 // data decls: VtsID //
99 // //
100 /////////////////////////////////////////////////////////////////
101 /////////////////////////////////////////////////////////////////
103 /* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30
104 bits, since they have to be packed into the lowest 30 bits of an
105 SVal. */
106 typedef UInt VtsID;
107 #define VtsID_INVALID 0xFFFFFFFF
111 /////////////////////////////////////////////////////////////////
112 /////////////////////////////////////////////////////////////////
113 // //
114 // data decls: SVal //
115 // //
116 /////////////////////////////////////////////////////////////////
117 /////////////////////////////////////////////////////////////////
119 typedef ULong SVal;
121 /* This value has special significance to the implementation, and callers
122 may not store it in the shadow memory. */
123 #define SVal_INVALID (3ULL << 62)
125 /* This is the default value for shadow memory. Initially the shadow
126 memory contains no accessible areas and so all reads produce this
127 value. TODO: make this caller-defineable. */
128 #define SVal_NOACCESS (2ULL << 62)
132 /////////////////////////////////////////////////////////////////
133 /////////////////////////////////////////////////////////////////
134 // //
135 // data decls: ScalarTS //
136 // //
137 /////////////////////////////////////////////////////////////////
138 /////////////////////////////////////////////////////////////////
140 /* Scalar Timestamp. We have to store a lot of these, so there is
141 some effort to make them as small as possible. Logically they are
142 a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
143 We pack it into 64 bits by representing the Thr* using a ThrID, a
144 small integer (18 bits), and a 46 bit integer for the timestamp
145 number. The 46/18 split is arbitary, but has the effect that
146 Helgrind can only handle programs that create 2^18 or fewer threads
147 over their entire lifetime, and have no more than 2^46 timestamp
148 ticks (synchronisation operations on the same thread).
150 This doesn't seem like much of a limitation. 2^46 ticks is
151 7.06e+13, and if each tick (optimistically) takes the machine 1000
152 cycles to process, then the minimum time to process that many ticks
153 at a clock rate of 5 GHz is 162.9 days. And that's doing nothing
154 but VTS ticks, which isn't realistic.
156 NB1: SCALARTS_N_THRBITS must be 29 or lower. The obvious limit is
157 32 since a ThrID is a UInt. 29 comes from the fact that
158 'Thr_n_RCEC', which records information about old accesses, packs
159 not only a ThrID but also 2+1 other bits (access size and
160 writeness) in a UInt, hence limiting size to 32-(2+1) == 29.
162 NB2: thrid values are issued upwards from 1024, and values less
163 than that aren't valid. This isn't per se necessary (any order
164 will do, so long as they are unique), but it does help ensure they
165 are less likely to get confused with the various other kinds of
166 small-integer thread ids drifting around (eg, TId). See also NB5.
168 NB3: this probably also relies on the fact that Thr's are never
169 deallocated -- they exist forever. Hence the 1-1 mapping from
170 Thr's to thrid values (set up in Thr__new) persists forever.
172 NB4: temp_max_sized_VTS is allocated at startup and never freed.
173 It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
174 ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without
175 making the memory use for this go sky-high. With
176 SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
177 like an OK tradeoff. If more than 256k threads need to be
178 supported, we could change SCALARTS_N_THRBITS to 20, which would
179 facilitate supporting 1 million threads at the cost of 8MB storage
180 for temp_max_sized_VTS.
182 NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
183 ThrID == 0 to denote an empty Thr_n_RCEC record. So ThrID == 0
184 must never be a valid ThrID. Given NB2 that's OK.
186 #define SCALARTS_N_THRBITS 18 /* valid range: 11 to 29 inclusive */
188 #define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
189 typedef
190 struct {
191 ThrID thrid : SCALARTS_N_THRBITS;
192 ULong tym : SCALARTS_N_TYMBITS;
194 ScalarTS;
196 #define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
200 /////////////////////////////////////////////////////////////////
201 /////////////////////////////////////////////////////////////////
202 // //
203 // data decls: Filter //
204 // //
205 /////////////////////////////////////////////////////////////////
206 /////////////////////////////////////////////////////////////////
208 // baseline: 5, 9
209 #define FI_LINE_SZB_LOG2 5
210 #define FI_NUM_LINES_LOG2 10
212 #define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
213 #define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
215 #define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
216 #define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
218 #define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
219 & (Addr)(FI_NUM_LINES-1) )
222 /* In the lines, each 8 bytes are treated individually, and are mapped
223 to a UShort. Regardless of endianness of the underlying machine,
224 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
225 the highest address.
227 Of each bit pair, the higher numbered bit is set if a R has been
228 seen, so the actual layout is:
230 15 14 ... 01 00
232 R W for addr+7 ... R W for addr+0
234 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
237 /* tags are separated from lines. tags are Addrs and are
238 the base address of the line. */
239 typedef
240 struct {
241 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
243 FiLine;
245 typedef
246 struct {
247 Addr tags[FI_NUM_LINES];
248 FiLine lines[FI_NUM_LINES];
250 Filter;
254 /////////////////////////////////////////////////////////////////
255 /////////////////////////////////////////////////////////////////
256 // //
257 // data decls: Thr, ULong_n_EC //
258 // //
259 /////////////////////////////////////////////////////////////////
260 /////////////////////////////////////////////////////////////////
262 // Records stacks for H1 history mechanism (DRD-style)
263 typedef
264 struct { ULong ull; ExeContext* ec; }
265 ULong_n_EC;
268 /* How many of the above records to collect for each thread? Older
269 ones are dumped when we run out of space. 62.5k requires 1MB per
270 thread, since each ULong_n_EC record is 16 bytes long. When more
271 than N_KWs_N_STACKs_PER_THREAD are present, the older half are
272 deleted to make space. Hence in the worst case we will be able to
273 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
274 Kw transitions (segments in this thread). For the current setting
275 that gives a guaranteed stack for at least the last 31.25k
276 segments. */
277 #define N_KWs_N_STACKs_PER_THREAD 62500
280 struct _Thr {
281 /* Current VTSs for this thread. They change as we go along. viR
282 is the VTS to be used for reads, viW for writes. Usually they
283 are the same, but can differ when we deal with reader-writer
284 locks. It is always the case that
285 VtsID__cmpLEQ(viW,viR) == True
286 that is, viW must be the same, or lagging behind, viR. */
287 VtsID viR;
288 VtsID viW;
290 /* Is initially False, and is set to True after the thread really
291 has done a low-level exit. When True, we expect to never see
292 any more memory references done by this thread. */
293 Bool llexit_done;
295 /* Is initially False, and is set to True after the thread has been
296 joined with (reaped by some other thread). After this point, we
297 do not expect to see any uses of .viR or .viW, so it is safe to
298 set them to VtsID_INVALID. */
299 Bool joinedwith_done;
301 /* A small integer giving a unique identity to this Thr. See
302 comments on the definition of ScalarTS for details. */
303 ThrID thrid : SCALARTS_N_THRBITS;
305 /* A filter that removes references for which we believe that
306 msmcread/msmcwrite will not change the state, nor report a
307 race. */
308 Filter* filter;
310 /* A pointer back to the top level Thread structure. There is a
311 1-1 mapping between Thread and Thr structures -- each Thr points
312 at its corresponding Thread, and vice versa. Really, Thr and
313 Thread should be merged into a single structure. */
314 Thread* hgthread;
316 /* The ULongs (scalar Kws) in this accumulate in strictly
317 increasing order, without duplicates. This is important because
318 we need to be able to find a given scalar Kw in this array
319 later, by binary search. */
320 XArray* /* ULong_n_EC */ local_Kws_n_stacks;
325 /////////////////////////////////////////////////////////////////
326 /////////////////////////////////////////////////////////////////
327 // //
328 // data decls: SO //
329 // //
330 /////////////////////////////////////////////////////////////////
331 /////////////////////////////////////////////////////////////////
333 // (UInt) `echo "Synchronisation object" | md5sum`
334 #define SO_MAGIC 0x56b3c5b0U
336 struct _SO {
337 struct _SO* admin_prev;
338 struct _SO* admin_next;
339 VtsID viR; /* r-clock of sender */
340 VtsID viW; /* w-clock of sender */
341 UInt magic;
346 /////////////////////////////////////////////////////////////////
347 /////////////////////////////////////////////////////////////////
348 // //
349 // Forward declarations //
350 // //
351 /////////////////////////////////////////////////////////////////
352 /////////////////////////////////////////////////////////////////
354 /* fwds for
355 Globals needed by other parts of the library. These are set
356 once at startup and then never changed. */
357 static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
358 static ExeContext* (*main_get_EC)( Thr* ) = NULL;
360 /* misc fn and data fwdses */
361 static void VtsID__rcinc ( VtsID ii );
362 static void VtsID__rcdec ( VtsID ii );
364 static inline Bool SVal__isC ( SVal s );
365 static inline VtsID SVal__unC_Rmin ( SVal s );
366 static inline VtsID SVal__unC_Wmin ( SVal s );
367 static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
369 /* A double linked list of all the SO's. */
370 SO* admin_SO;
374 /////////////////////////////////////////////////////////////////
375 /////////////////////////////////////////////////////////////////
376 // //
377 // SECTION BEGIN compressed shadow memory //
378 // //
379 /////////////////////////////////////////////////////////////////
380 /////////////////////////////////////////////////////////////////
382 #ifndef __HB_ZSM_H
383 #define __HB_ZSM_H
385 /* Initialise the library. Once initialised, it will (or may) call
386 rcinc and rcdec in response to all the calls below, in order to
387 allow the user to do reference counting on the SVals stored herein.
388 It is important to understand, however, that due to internal
389 caching, the reference counts are in general inaccurate, and can be
390 both above or below the true reference count for an item. In
391 particular, the library may indicate that the reference count for
392 an item is zero, when in fact it is not.
394 To make the reference counting exact and therefore non-pointless,
395 call zsm_flush_cache. Immediately after it returns, the reference
396 counts for all items, as deduced by the caller by observing calls
397 to rcinc and rcdec, will be correct, and so any items with a zero
398 reference count may be freed (or at least considered to be
399 unreferenced by this library).
401 static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
403 static void zsm_sset_range ( Addr, SizeT, SVal );
404 static void zsm_scopy_range ( Addr, Addr, SizeT );
405 static void zsm_flush_cache ( void );
407 #endif /* ! __HB_ZSM_H */
410 /* Round a up to the next multiple of N. N must be a power of 2 */
411 #define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
412 /* Round a down to the next multiple of N. N must be a power of 2 */
413 #define ROUNDDN(a, N) ((a) & ~(N-1))
417 /* ------ User-supplied RC functions ------ */
418 static void(*rcinc)(SVal) = NULL;
419 static void(*rcdec)(SVal) = NULL;
422 /* ------ CacheLine ------ */
424 #define N_LINE_BITS 6 /* must be >= 3 */
425 #define N_LINE_ARANGE (1 << N_LINE_BITS)
426 #define N_LINE_TREES (N_LINE_ARANGE >> 3)
428 typedef
429 struct {
430 UShort descrs[N_LINE_TREES];
431 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
433 CacheLine;
435 #define TREE_DESCR_16_0 (1<<0)
436 #define TREE_DESCR_32_0 (1<<1)
437 #define TREE_DESCR_16_1 (1<<2)
438 #define TREE_DESCR_64 (1<<3)
439 #define TREE_DESCR_16_2 (1<<4)
440 #define TREE_DESCR_32_1 (1<<5)
441 #define TREE_DESCR_16_3 (1<<6)
442 #define TREE_DESCR_8_0 (1<<7)
443 #define TREE_DESCR_8_1 (1<<8)
444 #define TREE_DESCR_8_2 (1<<9)
445 #define TREE_DESCR_8_3 (1<<10)
446 #define TREE_DESCR_8_4 (1<<11)
447 #define TREE_DESCR_8_5 (1<<12)
448 #define TREE_DESCR_8_6 (1<<13)
449 #define TREE_DESCR_8_7 (1<<14)
450 #define TREE_DESCR_DTY (1<<15)
452 typedef
453 struct {
454 SVal dict[4]; /* can represent up to 4 diff values in the line */
455 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
456 dict indexes */
457 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
458 LineF to use, and dict[2..] are also SVal_INVALID. */
460 LineZ; /* compressed rep for a cache line */
462 typedef
463 struct {
464 Bool inUse;
465 SVal w64s[N_LINE_ARANGE];
467 LineF; /* full rep for a cache line */
469 /* Shadow memory.
470 Primary map is a WordFM Addr SecMap*.
471 SecMaps cover some page-size-ish section of address space and hold
472 a compressed representation.
473 CacheLine-sized chunks of SecMaps are copied into a Cache, being
474 decompressed when moved into the cache and recompressed on the
475 way out. Because of this, the cache must operate as a writeback
476 cache, not a writethrough one.
478 Each SecMap must hold a power-of-2 number of CacheLines. Hence
479 N_SECMAP_BITS must >= N_LINE_BITS.
481 #define N_SECMAP_BITS 13
482 #define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
484 // # CacheLines held by a SecMap
485 #define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
487 /* The data in the SecMap is held in the array of LineZs. Each LineZ
488 either carries the required data directly, in a compressed
489 representation, or it holds (in .dict[0]) an index to the LineF in
490 .linesF that holds the full representation.
492 Currently-unused LineF's have their .inUse bit set to zero.
493 Since each in-use LineF is referred to be exactly one LineZ,
494 the number of .linesZ[] that refer to .linesF should equal
495 the number of .linesF[] that have .inUse == True.
497 RC obligations: the RCs presented to the user include exactly
498 the values in:
499 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
500 * F reps that are in use (.inUse == True)
502 Hence the following actions at the following transitions are required:
504 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
505 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
506 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
507 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
509 typedef
510 struct {
511 UInt magic;
512 LineZ linesZ[N_SECMAP_ZLINES];
513 LineF* linesF;
514 UInt linesF_size;
516 SecMap;
518 #define SecMap_MAGIC 0x571e58cbU
520 __attribute__((unused))
521 static inline Bool is_sane_SecMap ( SecMap* sm ) {
522 return sm != NULL && sm->magic == SecMap_MAGIC;
525 /* ------ Cache ------ */
527 #define N_WAY_BITS 16
528 #define N_WAY_NENT (1 << N_WAY_BITS)
530 /* Each tag is the address of the associated CacheLine, rounded down
531 to a CacheLine address boundary. A CacheLine size must be a power
532 of 2 and must be 8 or more. Hence an easy way to initialise the
533 cache so it is empty is to set all the tag values to any value % 8
534 != 0, eg 1. This means all queries in the cache initially miss.
535 It does however require us to detect and not writeback, any line
536 with a bogus tag. */
537 typedef
538 struct {
539 CacheLine lyns0[N_WAY_NENT];
540 Addr tags0[N_WAY_NENT];
542 Cache;
544 static inline Bool is_valid_scache_tag ( Addr tag ) {
545 /* a valid tag should be naturally aligned to the start of
546 a CacheLine. */
547 return 0 == (tag & (N_LINE_ARANGE - 1));
551 /* --------- Primary data structures --------- */
553 /* Shadow memory primary map */
554 static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
555 static Cache cache_shmem;
558 static UWord stats__secmaps_search = 0; // # SM finds
559 static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
560 static UWord stats__secmaps_allocd = 0; // # SecMaps issued
561 static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
562 static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
563 static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
564 static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
565 static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
566 static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
567 static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
568 static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
569 static UWord stats__cache_F_fetches = 0; // # F lines fetched
570 static UWord stats__cache_F_wbacks = 0; // # F lines written back
571 static UWord stats__cache_invals = 0; // # cache invals
572 static UWord stats__cache_flushes = 0; // # cache flushes
573 static UWord stats__cache_totrefs = 0; // # total accesses
574 static UWord stats__cache_totmisses = 0; // # misses
575 static ULong stats__cache_make_New_arange = 0; // total arange made New
576 static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
577 static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
578 static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
579 static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
580 static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
581 static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
582 static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
583 static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
584 static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
585 static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
586 static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
587 static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
588 static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
589 static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
590 static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
591 static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
592 static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
593 static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
594 static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
595 static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
596 static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
597 static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
598 static UWord stats__vts__tick = 0; // # calls to VTS__tick
599 static UWord stats__vts__join = 0; // # calls to VTS__join
600 static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
601 static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
603 // # calls to VTS__cmp_structural w/ slow case
604 static UWord stats__vts__cmp_structural_slow = 0;
606 // # calls to VTS__indexAt_SLOW
607 static UWord stats__vts__indexat_slow = 0;
609 // # calls to vts_set__find__or__clone_and_add
610 static UWord stats__vts_set__focaa = 0;
612 // # calls to vts_set__find__or__clone_and_add that lead to an
613 // allocation
614 static UWord stats__vts_set__focaa_a = 0;
617 static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
618 return a & ~(N_SECMAP_ARANGE - 1);
620 static inline UWord shmem__get_SecMap_offset ( Addr a ) {
621 return a & (N_SECMAP_ARANGE - 1);
625 /*----------------------------------------------------------------*/
626 /*--- map_shmem :: WordFM Addr SecMap ---*/
627 /*--- shadow memory (low level handlers) (shmem__* fns) ---*/
628 /*----------------------------------------------------------------*/
630 /*--------------- SecMap allocation --------------- */
632 static HChar* shmem__bigchunk_next = NULL;
633 static HChar* shmem__bigchunk_end1 = NULL;
635 static void* shmem__bigchunk_alloc ( SizeT n )
637 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
638 tl_assert(n > 0);
639 n = VG_ROUNDUP(n, 16);
640 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
641 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
642 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
643 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
644 if (0)
645 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
646 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
647 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
648 if (shmem__bigchunk_next == NULL)
649 VG_(out_of_memory_NORETURN)(
650 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
651 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
653 tl_assert(shmem__bigchunk_next);
654 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
655 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
656 shmem__bigchunk_next += n;
657 return shmem__bigchunk_next - n;
660 static SecMap* shmem__alloc_SecMap ( void )
662 Word i, j;
663 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
664 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
665 tl_assert(sm);
666 sm->magic = SecMap_MAGIC;
667 for (i = 0; i < N_SECMAP_ZLINES; i++) {
668 sm->linesZ[i].dict[0] = SVal_NOACCESS;
669 sm->linesZ[i].dict[1] = SVal_INVALID;
670 sm->linesZ[i].dict[2] = SVal_INVALID;
671 sm->linesZ[i].dict[3] = SVal_INVALID;
672 for (j = 0; j < N_LINE_ARANGE/4; j++)
673 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
675 sm->linesF = NULL;
676 sm->linesF_size = 0;
677 stats__secmaps_allocd++;
678 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
679 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
680 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
681 return sm;
684 typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
685 static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
687 static SecMap* shmem__find_SecMap ( Addr ga )
689 SecMap* sm = NULL;
690 Addr gaKey = shmem__round_to_SecMap_base(ga);
691 // Cache
692 stats__secmaps_search++;
693 if (LIKELY(gaKey == smCache[0].gaKey))
694 return smCache[0].sm;
695 if (LIKELY(gaKey == smCache[1].gaKey)) {
696 SMCacheEnt tmp = smCache[0];
697 smCache[0] = smCache[1];
698 smCache[1] = tmp;
699 return smCache[0].sm;
701 if (gaKey == smCache[2].gaKey) {
702 SMCacheEnt tmp = smCache[1];
703 smCache[1] = smCache[2];
704 smCache[2] = tmp;
705 return smCache[1].sm;
707 // end Cache
708 stats__secmaps_search_slow++;
709 if (VG_(lookupFM)( map_shmem,
710 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
711 tl_assert(sm != NULL);
712 smCache[2] = smCache[1];
713 smCache[1] = smCache[0];
714 smCache[0].gaKey = gaKey;
715 smCache[0].sm = sm;
716 } else {
717 tl_assert(sm == NULL);
719 return sm;
722 static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
724 SecMap* sm = shmem__find_SecMap ( ga );
725 if (LIKELY(sm)) {
726 return sm;
727 } else {
728 /* create a new one */
729 Addr gaKey = shmem__round_to_SecMap_base(ga);
730 sm = shmem__alloc_SecMap();
731 tl_assert(sm);
732 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
733 return sm;
738 /* ------------ LineF and LineZ related ------------ */
740 static void rcinc_LineF ( LineF* lineF ) {
741 UWord i;
742 tl_assert(lineF->inUse);
743 for (i = 0; i < N_LINE_ARANGE; i++)
744 rcinc(lineF->w64s[i]);
747 static void rcdec_LineF ( LineF* lineF ) {
748 UWord i;
749 tl_assert(lineF->inUse);
750 for (i = 0; i < N_LINE_ARANGE; i++)
751 rcdec(lineF->w64s[i]);
754 static void rcinc_LineZ ( LineZ* lineZ ) {
755 tl_assert(lineZ->dict[0] != SVal_INVALID);
756 rcinc(lineZ->dict[0]);
757 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
758 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
759 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
762 static void rcdec_LineZ ( LineZ* lineZ ) {
763 tl_assert(lineZ->dict[0] != SVal_INVALID);
764 rcdec(lineZ->dict[0]);
765 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
766 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
767 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
770 inline
771 static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
772 Word bix, shft, mask, prep;
773 tl_assert(ix >= 0);
774 bix = ix >> 2;
775 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
776 mask = 3 << shft;
777 prep = b2 << shft;
778 arr[bix] = (arr[bix] & ~mask) | prep;
781 inline
782 static UWord read_twobit_array ( UChar* arr, UWord ix ) {
783 Word bix, shft;
784 tl_assert(ix >= 0);
785 bix = ix >> 2;
786 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
787 return (arr[bix] >> shft) & 3;
790 /* Given address 'tag', find either the Z or F line containing relevant
791 data, so it can be read into the cache.
793 static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
794 /*OUT*/LineF** fp, Addr tag ) {
795 LineZ* lineZ;
796 LineF* lineF;
797 UWord zix;
798 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
799 UWord smoff = shmem__get_SecMap_offset(tag);
800 /* since smoff is derived from a valid tag, it should be
801 cacheline-aligned. */
802 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
803 zix = smoff >> N_LINE_BITS;
804 tl_assert(zix < N_SECMAP_ZLINES);
805 lineZ = &sm->linesZ[zix];
806 lineF = NULL;
807 if (lineZ->dict[0] == SVal_INVALID) {
808 UInt fix = (UInt)lineZ->dict[1];
809 tl_assert(sm->linesF);
810 tl_assert(sm->linesF_size > 0);
811 tl_assert(fix >= 0 && fix < sm->linesF_size);
812 lineF = &sm->linesF[fix];
813 tl_assert(lineF->inUse);
814 lineZ = NULL;
816 *zp = lineZ;
817 *fp = lineF;
820 /* Given address 'tag', return the relevant SecMap and the index of
821 the LineZ within it, in the expectation that the line is to be
822 overwritten. Regardless of whether 'tag' is currently associated
823 with a Z or F representation, to rcdec on the current
824 representation, in recognition of the fact that the contents are
825 just about to be overwritten. */
826 static __attribute__((noinline))
827 void find_Z_for_writing ( /*OUT*/SecMap** smp,
828 /*OUT*/Word* zixp,
829 Addr tag ) {
830 LineZ* lineZ;
831 LineF* lineF;
832 UWord zix;
833 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
834 UWord smoff = shmem__get_SecMap_offset(tag);
835 /* since smoff is derived from a valid tag, it should be
836 cacheline-aligned. */
837 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
838 zix = smoff >> N_LINE_BITS;
839 tl_assert(zix < N_SECMAP_ZLINES);
840 lineZ = &sm->linesZ[zix];
841 lineF = NULL;
842 /* re RCs, we are freeing up this LineZ/LineF so that new data can
843 be parked in it. Hence have to rcdec it accordingly. */
844 /* If lineZ has an associated lineF, free it up. */
845 if (lineZ->dict[0] == SVal_INVALID) {
846 UInt fix = (UInt)lineZ->dict[1];
847 tl_assert(sm->linesF);
848 tl_assert(sm->linesF_size > 0);
849 tl_assert(fix >= 0 && fix < sm->linesF_size);
850 lineF = &sm->linesF[fix];
851 tl_assert(lineF->inUse);
852 rcdec_LineF(lineF);
853 lineF->inUse = False;
854 } else {
855 rcdec_LineZ(lineZ);
857 *smp = sm;
858 *zixp = zix;
861 static __attribute__((noinline))
862 void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
863 UInt i, new_size;
864 LineF* nyu;
866 if (sm->linesF) {
867 tl_assert(sm->linesF_size > 0);
868 } else {
869 tl_assert(sm->linesF_size == 0);
872 if (sm->linesF) {
873 for (i = 0; i < sm->linesF_size; i++) {
874 if (!sm->linesF[i].inUse) {
875 *fixp = (Word)i;
876 return;
881 /* No free F line found. Expand existing array and try again. */
882 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
883 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
884 new_size * sizeof(LineF) );
886 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
887 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
888 * sizeof(LineF);
890 if (0)
891 VG_(printf)("SM %p: expand F array from %d to %d\n",
892 sm, (Int)sm->linesF_size, new_size);
894 for (i = 0; i < new_size; i++)
895 nyu[i].inUse = False;
897 if (sm->linesF) {
898 for (i = 0; i < sm->linesF_size; i++) {
899 tl_assert(sm->linesF[i].inUse);
900 nyu[i] = sm->linesF[i];
902 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
903 HG_(free)(sm->linesF);
906 sm->linesF = nyu;
907 sm->linesF_size = new_size;
909 for (i = 0; i < sm->linesF_size; i++) {
910 if (!sm->linesF[i].inUse) {
911 *fixp = (Word)i;
912 return;
916 /*NOTREACHED*/
917 tl_assert(0);
921 /* ------------ CacheLine and implicit-tree related ------------ */
923 __attribute__((unused))
924 static void pp_CacheLine ( CacheLine* cl ) {
925 Word i;
926 if (!cl) {
927 VG_(printf)("%s","pp_CacheLine(NULL)\n");
928 return;
930 for (i = 0; i < N_LINE_TREES; i++)
931 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
932 for (i = 0; i < N_LINE_ARANGE; i++)
933 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
936 static UChar descr_to_validbits ( UShort descr )
938 /* a.k.a Party Time for gcc's constant folder */
939 # define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
940 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
941 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
942 ( (b8_5) << 12) | ( (b8_4) << 11) | \
943 ( (b8_3) << 10) | ( (b8_2) << 9) | \
944 ( (b8_1) << 8) | ( (b8_0) << 7) | \
945 ( (b16_3) << 6) | ( (b32_1) << 5) | \
946 ( (b16_2) << 4) | ( (b64) << 3) | \
947 ( (b16_1) << 2) | ( (b32_0) << 1) | \
948 ( (b16_0) << 0) ) )
950 # define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
951 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
952 ( (bit5) << 5) | ( (bit4) << 4) | \
953 ( (bit3) << 3) | ( (bit2) << 2) | \
954 ( (bit1) << 1) | ( (bit0) << 0) ) )
956 /* these should all get folded out at compile time */
957 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
958 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
959 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
960 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
961 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
962 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
963 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
964 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
965 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
967 switch (descr) {
969 +--------------------------------- TREE_DESCR_8_7
970 | +------------------- TREE_DESCR_8_0
971 | | +---------------- TREE_DESCR_16_3
972 | | | +-------------- TREE_DESCR_32_1
973 | | | | +------------ TREE_DESCR_16_2
974 | | | | | +--------- TREE_DESCR_64
975 | | | | | | +------ TREE_DESCR_16_1
976 | | | | | | | +---- TREE_DESCR_32_0
977 | | | | | | | | +-- TREE_DESCR_16_0
978 | | | | | | | | |
979 | | | | | | | | | GRANULARITY, 7 -> 0 */
980 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
981 return BYTE(1,1,1,1,1,1,1,1);
982 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
983 return BYTE(1,1,0,1,1,1,1,1);
984 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
985 return BYTE(0,1,1,1,1,1,1,1);
986 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
987 return BYTE(0,1,0,1,1,1,1,1);
989 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
990 return BYTE(1,1,1,1,1,1,0,1);
991 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
992 return BYTE(1,1,0,1,1,1,0,1);
993 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
994 return BYTE(0,1,1,1,1,1,0,1);
995 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
996 return BYTE(0,1,0,1,1,1,0,1);
998 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
999 return BYTE(1,1,1,1,0,1,1,1);
1000 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
1001 return BYTE(1,1,0,1,0,1,1,1);
1002 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
1003 return BYTE(0,1,1,1,0,1,1,1);
1004 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
1005 return BYTE(0,1,0,1,0,1,1,1);
1007 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
1008 return BYTE(1,1,1,1,0,1,0,1);
1009 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
1010 return BYTE(1,1,0,1,0,1,0,1);
1011 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
1012 return BYTE(0,1,1,1,0,1,0,1);
1013 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
1014 return BYTE(0,1,0,1,0,1,0,1);
1016 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
1017 return BYTE(0,0,0,1,1,1,1,1);
1018 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
1019 return BYTE(0,0,0,1,1,1,0,1);
1020 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
1021 return BYTE(0,0,0,1,0,1,1,1);
1022 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
1023 return BYTE(0,0,0,1,0,1,0,1);
1025 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
1026 return BYTE(1,1,1,1,0,0,0,1);
1027 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
1028 return BYTE(1,1,0,1,0,0,0,1);
1029 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
1030 return BYTE(0,1,1,1,0,0,0,1);
1031 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
1032 return BYTE(0,1,0,1,0,0,0,1);
1034 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
1035 return BYTE(0,0,0,1,0,0,0,1);
1037 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
1038 return BYTE(0,0,0,0,0,0,0,1);
1040 default: return BYTE(0,0,0,0,0,0,0,0);
1041 /* INVALID - any valid descr produces at least one
1042 valid bit in tree[0..7]*/
1044 /* NOTREACHED*/
1045 tl_assert(0);
1047 # undef DESCR
1048 # undef BYTE
1051 __attribute__((unused))
1052 static Bool is_sane_Descr ( UShort descr ) {
1053 return descr_to_validbits(descr) != 0;
1056 static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
1057 VG_(sprintf)(dst,
1058 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
1059 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
1060 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
1061 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
1062 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
1063 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
1064 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
1065 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
1066 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
1067 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
1068 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
1069 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
1070 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
1071 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
1072 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
1073 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
1076 static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
1077 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
1078 (Int)((byte & 128) ? 1 : 0),
1079 (Int)((byte & 64) ? 1 : 0),
1080 (Int)((byte & 32) ? 1 : 0),
1081 (Int)((byte & 16) ? 1 : 0),
1082 (Int)((byte & 8) ? 1 : 0),
1083 (Int)((byte & 4) ? 1 : 0),
1084 (Int)((byte & 2) ? 1 : 0),
1085 (Int)((byte & 1) ? 1 : 0)
1089 static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
1090 Word i;
1091 UChar validbits = descr_to_validbits(descr);
1092 HChar buf[128], buf2[128];
1093 if (validbits == 0)
1094 goto bad;
1095 for (i = 0; i < 8; i++) {
1096 if (validbits & (1<<i)) {
1097 if (tree[i] == SVal_INVALID)
1098 goto bad;
1099 } else {
1100 if (tree[i] != SVal_INVALID)
1101 goto bad;
1104 return True;
1105 bad:
1106 sprintf_Descr( buf, descr );
1107 sprintf_Byte( buf2, validbits );
1108 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
1109 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
1110 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
1111 for (i = 0; i < 8; i++)
1112 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
1113 VG_(printf)("%s","}\n");
1114 return 0;
1117 static Bool is_sane_CacheLine ( CacheLine* cl )
1119 Word tno, cloff;
1121 if (!cl) goto bad;
1123 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1124 UShort descr = cl->descrs[tno];
1125 SVal* tree = &cl->svals[cloff];
1126 if (!is_sane_Descr_and_Tree(descr, tree))
1127 goto bad;
1129 tl_assert(cloff == N_LINE_ARANGE);
1130 return True;
1131 bad:
1132 pp_CacheLine(cl);
1133 return False;
1136 static UShort normalise_tree ( /*MOD*/SVal* tree )
1138 UShort descr;
1139 /* pre: incoming tree[0..7] does not have any invalid shvals, in
1140 particular no zeroes. */
1141 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
1142 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
1143 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
1144 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
1145 tl_assert(0);
1147 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
1148 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
1149 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
1150 /* build 16-bit layer */
1151 if (tree[1] == tree[0]) {
1152 tree[1] = SVal_INVALID;
1153 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
1154 descr |= TREE_DESCR_16_0;
1156 if (tree[3] == tree[2]) {
1157 tree[3] = SVal_INVALID;
1158 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
1159 descr |= TREE_DESCR_16_1;
1161 if (tree[5] == tree[4]) {
1162 tree[5] = SVal_INVALID;
1163 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
1164 descr |= TREE_DESCR_16_2;
1166 if (tree[7] == tree[6]) {
1167 tree[7] = SVal_INVALID;
1168 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
1169 descr |= TREE_DESCR_16_3;
1171 /* build 32-bit layer */
1172 if (tree[2] == tree[0]
1173 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
1174 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
1175 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
1176 descr |= TREE_DESCR_32_0;
1178 if (tree[6] == tree[4]
1179 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
1180 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
1181 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
1182 descr |= TREE_DESCR_32_1;
1184 /* build 64-bit layer */
1185 if (tree[4] == tree[0]
1186 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
1187 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
1188 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
1189 descr |= TREE_DESCR_64;
1191 return descr;
1194 /* This takes a cacheline where all the data is at the leaves
1195 (w8[..]) and builds a correctly normalised tree. */
1196 static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
1198 Word tno, cloff;
1199 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1200 SVal* tree = &cl->svals[cloff];
1201 cl->descrs[tno] = normalise_tree( tree );
1203 tl_assert(cloff == N_LINE_ARANGE);
1204 if (CHECK_ZSM)
1205 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1206 stats__cline_normalises++;
1210 typedef struct { UChar count; SVal sval; } CountedSVal;
1212 static
1213 void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
1214 /*OUT*/Word* dstUsedP,
1215 Word nDst, CacheLine* src )
1217 Word tno, cloff, dstUsed;
1219 tl_assert(nDst == N_LINE_ARANGE);
1220 dstUsed = 0;
1222 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1223 UShort descr = src->descrs[tno];
1224 SVal* tree = &src->svals[cloff];
1226 /* sequentialise the tree described by (descr,tree). */
1227 # define PUT(_n,_v) \
1228 do { dst[dstUsed ].count = (_n); \
1229 dst[dstUsed++].sval = (_v); \
1230 } while (0)
1232 /* byte 0 */
1233 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
1234 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
1235 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
1236 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
1237 /* byte 1 */
1238 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
1239 /* byte 2 */
1240 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
1241 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
1242 /* byte 3 */
1243 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
1244 /* byte 4 */
1245 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
1246 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
1247 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
1248 /* byte 5 */
1249 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
1250 /* byte 6 */
1251 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
1252 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
1253 /* byte 7 */
1254 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
1256 # undef PUT
1257 /* END sequentialise the tree described by (descr,tree). */
1260 tl_assert(cloff == N_LINE_ARANGE);
1261 tl_assert(dstUsed <= nDst);
1263 *dstUsedP = dstUsed;
1266 /* Write the cacheline 'wix' to backing store. Where it ends up
1267 is determined by its tag field. */
1268 static __attribute__((noinline)) void cacheline_wback ( UWord wix )
1270 Word i, j, k, m;
1271 Addr tag;
1272 SecMap* sm;
1273 CacheLine* cl;
1274 LineZ* lineZ;
1275 LineF* lineF;
1276 Word zix, fix, csvalsUsed;
1277 CountedSVal csvals[N_LINE_ARANGE];
1278 SVal sv;
1280 if (0)
1281 VG_(printf)("scache wback line %d\n", (Int)wix);
1283 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1285 tag = cache_shmem.tags0[wix];
1286 cl = &cache_shmem.lyns0[wix];
1288 /* The cache line may have been invalidated; if so, ignore it. */
1289 if (!is_valid_scache_tag(tag))
1290 return;
1292 /* Where are we going to put it? */
1293 sm = NULL;
1294 lineZ = NULL;
1295 lineF = NULL;
1296 zix = fix = -1;
1298 /* find the Z line to write in and rcdec it or the associated F
1299 line. */
1300 find_Z_for_writing( &sm, &zix, tag );
1302 tl_assert(sm);
1303 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1304 lineZ = &sm->linesZ[zix];
1306 /* Generate the data to be stored */
1307 if (CHECK_ZSM)
1308 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1310 csvalsUsed = -1;
1311 sequentialise_CacheLine( csvals, &csvalsUsed,
1312 N_LINE_ARANGE, cl );
1313 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1314 if (0) VG_(printf)("%lu ", csvalsUsed);
1316 lineZ->dict[0] = lineZ->dict[1]
1317 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1319 /* i indexes actual shadow values, k is cursor in csvals */
1320 i = 0;
1321 for (k = 0; k < csvalsUsed; k++) {
1323 sv = csvals[k].sval;
1324 if (CHECK_ZSM)
1325 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1326 /* do we already have it? */
1327 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1328 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1329 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1330 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1331 /* no. look for a free slot. */
1332 if (CHECK_ZSM)
1333 tl_assert(sv != SVal_INVALID);
1334 if (lineZ->dict[0]
1335 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1336 if (lineZ->dict[1]
1337 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1338 if (lineZ->dict[2]
1339 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1340 if (lineZ->dict[3]
1341 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1342 break; /* we'll have to use the f rep */
1343 dict_ok:
1344 m = csvals[k].count;
1345 if (m == 8) {
1346 write_twobit_array( lineZ->ix2s, i+0, j );
1347 write_twobit_array( lineZ->ix2s, i+1, j );
1348 write_twobit_array( lineZ->ix2s, i+2, j );
1349 write_twobit_array( lineZ->ix2s, i+3, j );
1350 write_twobit_array( lineZ->ix2s, i+4, j );
1351 write_twobit_array( lineZ->ix2s, i+5, j );
1352 write_twobit_array( lineZ->ix2s, i+6, j );
1353 write_twobit_array( lineZ->ix2s, i+7, j );
1354 i += 8;
1356 else if (m == 4) {
1357 write_twobit_array( lineZ->ix2s, i+0, j );
1358 write_twobit_array( lineZ->ix2s, i+1, j );
1359 write_twobit_array( lineZ->ix2s, i+2, j );
1360 write_twobit_array( lineZ->ix2s, i+3, j );
1361 i += 4;
1363 else if (m == 1) {
1364 write_twobit_array( lineZ->ix2s, i+0, j );
1365 i += 1;
1367 else if (m == 2) {
1368 write_twobit_array( lineZ->ix2s, i+0, j );
1369 write_twobit_array( lineZ->ix2s, i+1, j );
1370 i += 2;
1372 else {
1373 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1378 if (LIKELY(i == N_LINE_ARANGE)) {
1379 /* Construction of the compressed representation was
1380 successful. */
1381 rcinc_LineZ(lineZ);
1382 stats__cache_Z_wbacks++;
1383 } else {
1384 /* Cannot use the compressed(z) representation. Use the full(f)
1385 rep instead. */
1386 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1387 alloc_F_for_writing( sm, &fix );
1388 tl_assert(sm->linesF);
1389 tl_assert(sm->linesF_size > 0);
1390 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1391 lineF = &sm->linesF[fix];
1392 tl_assert(!lineF->inUse);
1393 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1394 lineZ->dict[1] = (SVal)fix;
1395 lineF->inUse = True;
1396 i = 0;
1397 for (k = 0; k < csvalsUsed; k++) {
1398 if (CHECK_ZSM)
1399 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1400 sv = csvals[k].sval;
1401 if (CHECK_ZSM)
1402 tl_assert(sv != SVal_INVALID);
1403 for (m = csvals[k].count; m > 0; m--) {
1404 lineF->w64s[i] = sv;
1405 i++;
1408 tl_assert(i == N_LINE_ARANGE);
1409 rcinc_LineF(lineF);
1410 stats__cache_F_wbacks++;
1414 /* Fetch the cacheline 'wix' from the backing store. The tag
1415 associated with 'wix' is assumed to have already been filled in;
1416 hence that is used to determine where in the backing store to read
1417 from. */
1418 static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1420 Word i;
1421 Addr tag;
1422 CacheLine* cl;
1423 LineZ* lineZ;
1424 LineF* lineF;
1426 if (0)
1427 VG_(printf)("scache fetch line %d\n", (Int)wix);
1429 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1431 tag = cache_shmem.tags0[wix];
1432 cl = &cache_shmem.lyns0[wix];
1434 /* reject nonsense requests */
1435 tl_assert(is_valid_scache_tag(tag));
1437 lineZ = NULL;
1438 lineF = NULL;
1439 find_ZF_for_reading( &lineZ, &lineF, tag );
1440 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1442 /* expand the data into the bottom layer of the tree, then get
1443 cacheline_normalise to build the descriptor array. */
1444 if (lineF) {
1445 tl_assert(lineF->inUse);
1446 for (i = 0; i < N_LINE_ARANGE; i++) {
1447 cl->svals[i] = lineF->w64s[i];
1449 stats__cache_F_fetches++;
1450 } else {
1451 for (i = 0; i < N_LINE_ARANGE; i++) {
1452 SVal sv;
1453 UWord ix = read_twobit_array( lineZ->ix2s, i );
1454 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
1455 sv = lineZ->dict[ix];
1456 tl_assert(sv != SVal_INVALID);
1457 cl->svals[i] = sv;
1459 stats__cache_Z_fetches++;
1461 normalise_CacheLine( cl );
1464 static void shmem__invalidate_scache ( void ) {
1465 Word wix;
1466 if (0) VG_(printf)("%s","scache inval\n");
1467 tl_assert(!is_valid_scache_tag(1));
1468 for (wix = 0; wix < N_WAY_NENT; wix++) {
1469 cache_shmem.tags0[wix] = 1/*INVALID*/;
1471 stats__cache_invals++;
1474 static void shmem__flush_and_invalidate_scache ( void ) {
1475 Word wix;
1476 Addr tag;
1477 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1478 tl_assert(!is_valid_scache_tag(1));
1479 for (wix = 0; wix < N_WAY_NENT; wix++) {
1480 tag = cache_shmem.tags0[wix];
1481 if (tag == 1/*INVALID*/) {
1482 /* already invalid; nothing to do */
1483 } else {
1484 tl_assert(is_valid_scache_tag(tag));
1485 cacheline_wback( wix );
1487 cache_shmem.tags0[wix] = 1/*INVALID*/;
1489 stats__cache_flushes++;
1490 stats__cache_invals++;
1494 static inline Bool aligned16 ( Addr a ) {
1495 return 0 == (a & 1);
1497 static inline Bool aligned32 ( Addr a ) {
1498 return 0 == (a & 3);
1500 static inline Bool aligned64 ( Addr a ) {
1501 return 0 == (a & 7);
1503 static inline UWord get_cacheline_offset ( Addr a ) {
1504 return (UWord)(a & (N_LINE_ARANGE - 1));
1506 static inline Addr cacheline_ROUNDUP ( Addr a ) {
1507 return ROUNDUP(a, N_LINE_ARANGE);
1509 static inline Addr cacheline_ROUNDDN ( Addr a ) {
1510 return ROUNDDN(a, N_LINE_ARANGE);
1512 static inline UWord get_treeno ( Addr a ) {
1513 return get_cacheline_offset(a) >> 3;
1515 static inline UWord get_tree_offset ( Addr a ) {
1516 return a & 7;
1519 static __attribute__((noinline))
1520 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1521 static inline CacheLine* get_cacheline ( Addr a )
1523 /* tag is 'a' with the in-line offset masked out,
1524 eg a[31]..a[4] 0000 */
1525 Addr tag = a & ~(N_LINE_ARANGE - 1);
1526 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1527 stats__cache_totrefs++;
1528 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1529 return &cache_shmem.lyns0[wix];
1530 } else {
1531 return get_cacheline_MISS( a );
1535 static __attribute__((noinline))
1536 CacheLine* get_cacheline_MISS ( Addr a )
1538 /* tag is 'a' with the in-line offset masked out,
1539 eg a[31]..a[4] 0000 */
1541 CacheLine* cl;
1542 Addr* tag_old_p;
1543 Addr tag = a & ~(N_LINE_ARANGE - 1);
1544 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1546 tl_assert(tag != cache_shmem.tags0[wix]);
1548 /* Dump the old line into the backing store. */
1549 stats__cache_totmisses++;
1551 cl = &cache_shmem.lyns0[wix];
1552 tag_old_p = &cache_shmem.tags0[wix];
1554 if (is_valid_scache_tag( *tag_old_p )) {
1555 /* EXPENSIVE and REDUNDANT: callee does it */
1556 if (CHECK_ZSM)
1557 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1558 cacheline_wback( wix );
1560 /* and reload the new one */
1561 *tag_old_p = tag;
1562 cacheline_fetch( wix );
1563 if (CHECK_ZSM)
1564 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1565 return cl;
1568 static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1569 stats__cline_64to32pulldown++;
1570 switch (toff) {
1571 case 0: case 4:
1572 tl_assert(descr & TREE_DESCR_64);
1573 tree[4] = tree[0];
1574 descr &= ~TREE_DESCR_64;
1575 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1576 break;
1577 default:
1578 tl_assert(0);
1580 return descr;
1583 static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1584 stats__cline_32to16pulldown++;
1585 switch (toff) {
1586 case 0: case 2:
1587 if (!(descr & TREE_DESCR_32_0)) {
1588 descr = pulldown_to_32(tree, 0, descr);
1590 tl_assert(descr & TREE_DESCR_32_0);
1591 tree[2] = tree[0];
1592 descr &= ~TREE_DESCR_32_0;
1593 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1594 break;
1595 case 4: case 6:
1596 if (!(descr & TREE_DESCR_32_1)) {
1597 descr = pulldown_to_32(tree, 4, descr);
1599 tl_assert(descr & TREE_DESCR_32_1);
1600 tree[6] = tree[4];
1601 descr &= ~TREE_DESCR_32_1;
1602 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1603 break;
1604 default:
1605 tl_assert(0);
1607 return descr;
1610 static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1611 stats__cline_16to8pulldown++;
1612 switch (toff) {
1613 case 0: case 1:
1614 if (!(descr & TREE_DESCR_16_0)) {
1615 descr = pulldown_to_16(tree, 0, descr);
1617 tl_assert(descr & TREE_DESCR_16_0);
1618 tree[1] = tree[0];
1619 descr &= ~TREE_DESCR_16_0;
1620 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1621 break;
1622 case 2: case 3:
1623 if (!(descr & TREE_DESCR_16_1)) {
1624 descr = pulldown_to_16(tree, 2, descr);
1626 tl_assert(descr & TREE_DESCR_16_1);
1627 tree[3] = tree[2];
1628 descr &= ~TREE_DESCR_16_1;
1629 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1630 break;
1631 case 4: case 5:
1632 if (!(descr & TREE_DESCR_16_2)) {
1633 descr = pulldown_to_16(tree, 4, descr);
1635 tl_assert(descr & TREE_DESCR_16_2);
1636 tree[5] = tree[4];
1637 descr &= ~TREE_DESCR_16_2;
1638 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1639 break;
1640 case 6: case 7:
1641 if (!(descr & TREE_DESCR_16_3)) {
1642 descr = pulldown_to_16(tree, 6, descr);
1644 tl_assert(descr & TREE_DESCR_16_3);
1645 tree[7] = tree[6];
1646 descr &= ~TREE_DESCR_16_3;
1647 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1648 break;
1649 default:
1650 tl_assert(0);
1652 return descr;
1656 static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1657 UShort mask;
1658 switch (toff) {
1659 case 0:
1660 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1661 tl_assert( (descr & mask) == mask );
1662 descr &= ~mask;
1663 descr |= TREE_DESCR_16_0;
1664 break;
1665 case 2:
1666 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1667 tl_assert( (descr & mask) == mask );
1668 descr &= ~mask;
1669 descr |= TREE_DESCR_16_1;
1670 break;
1671 case 4:
1672 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1673 tl_assert( (descr & mask) == mask );
1674 descr &= ~mask;
1675 descr |= TREE_DESCR_16_2;
1676 break;
1677 case 6:
1678 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1679 tl_assert( (descr & mask) == mask );
1680 descr &= ~mask;
1681 descr |= TREE_DESCR_16_3;
1682 break;
1683 default:
1684 tl_assert(0);
1686 return descr;
1689 static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1690 UShort mask;
1691 switch (toff) {
1692 case 0:
1693 if (!(descr & TREE_DESCR_16_0))
1694 descr = pullup_descr_to_16(descr, 0);
1695 if (!(descr & TREE_DESCR_16_1))
1696 descr = pullup_descr_to_16(descr, 2);
1697 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1698 tl_assert( (descr & mask) == mask );
1699 descr &= ~mask;
1700 descr |= TREE_DESCR_32_0;
1701 break;
1702 case 4:
1703 if (!(descr & TREE_DESCR_16_2))
1704 descr = pullup_descr_to_16(descr, 4);
1705 if (!(descr & TREE_DESCR_16_3))
1706 descr = pullup_descr_to_16(descr, 6);
1707 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1708 tl_assert( (descr & mask) == mask );
1709 descr &= ~mask;
1710 descr |= TREE_DESCR_32_1;
1711 break;
1712 default:
1713 tl_assert(0);
1715 return descr;
1718 static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1719 switch (toff) {
1720 case 0: case 4:
1721 return 0 != (descr & TREE_DESCR_64);
1722 default:
1723 tl_assert(0);
1727 static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1728 switch (toff) {
1729 case 0:
1730 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1731 case 2:
1732 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1733 case 4:
1734 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1735 case 6:
1736 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1737 default:
1738 tl_assert(0);
1742 /* ------------ Cache management ------------ */
1744 static void zsm_flush_cache ( void )
1746 shmem__flush_and_invalidate_scache();
1750 static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
1752 tl_assert( sizeof(UWord) == sizeof(Addr) );
1754 rcinc = p_rcinc;
1755 rcdec = p_rcdec;
1757 tl_assert(map_shmem == NULL);
1758 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1759 HG_(free),
1760 NULL/*unboxed UWord cmp*/);
1761 shmem__invalidate_scache();
1763 /* a SecMap must contain an integral number of CacheLines */
1764 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1765 /* also ... a CacheLine holds an integral number of trees */
1766 tl_assert(0 == (N_LINE_ARANGE % 8));
1769 /////////////////////////////////////////////////////////////////
1770 /////////////////////////////////////////////////////////////////
1771 // //
1772 // SECTION END compressed shadow memory //
1773 // //
1774 /////////////////////////////////////////////////////////////////
1775 /////////////////////////////////////////////////////////////////
1779 /////////////////////////////////////////////////////////////////
1780 /////////////////////////////////////////////////////////////////
1781 // //
1782 // SECTION BEGIN vts primitives //
1783 // //
1784 /////////////////////////////////////////////////////////////////
1785 /////////////////////////////////////////////////////////////////
1788 /* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
1789 being compact stand-ins for Thr*'s. Use these functions to map
1790 between them. */
1791 static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */
1792 static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */
1794 __attribute__((noreturn))
1795 static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
1797 if (due_to_nThrs) {
1798 const HChar* s =
1799 "\n"
1800 "Helgrind: cannot continue, run aborted: too many threads.\n"
1801 "Sorry. Helgrind can only handle programs that create\n"
1802 "%'llu or fewer threads over their entire lifetime.\n"
1803 "\n";
1804 VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
1805 } else {
1806 const HChar* s =
1807 "\n"
1808 "Helgrind: cannot continue, run aborted: too many\n"
1809 "synchronisation events. Sorry. Helgrind can only handle\n"
1810 "programs which perform %'llu or fewer\n"
1811 "inter-thread synchronisation events (locks, unlocks, etc).\n"
1812 "\n";
1813 VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
1815 VG_(exit)(1);
1816 /*NOTREACHED*/
1817 tl_assert(0); /*wtf?!*/
1821 /* The dead thread (ThrID, actually) table. A thread may only be
1822 listed here if we have been notified thereof by libhb_async_exit.
1823 New entries are added at the end. The order isn't important, but
1824 the ThrID values must be unique. This table lists the identity of
1825 all threads that have ever died -- none are ever removed. We keep
1826 this table so as to be able to prune entries from VTSs. We don't
1827 actually need to keep the set of threads that have ever died --
1828 only the threads that have died since the previous round of
1829 pruning. But it's useful for sanity check purposes to keep the
1830 entire set, so we do. */
1831 static XArray* /* of ThrID */ verydead_thread_table = NULL;
1833 /* Arbitrary total ordering on ThrIDs. */
1834 static Int cmp__ThrID ( const void* v1, const void* v2 ) {
1835 ThrID id1 = *(const ThrID*)v1;
1836 ThrID id2 = *(const ThrID*)v2;
1837 if (id1 < id2) return -1;
1838 if (id1 > id2) return 1;
1839 return 0;
1842 static void verydead_thread_table_init ( void )
1844 tl_assert(!verydead_thread_table);
1845 verydead_thread_table
1846 = VG_(newXA)( HG_(zalloc),
1847 "libhb.verydead_thread_table_init.1",
1848 HG_(free), sizeof(ThrID) );
1849 VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
1853 /* A VTS contains .ts, its vector clock, and also .id, a field to hold
1854 a backlink for the caller's convenience. Since we have no idea
1855 what to set that to in the library, it always gets set to
1856 VtsID_INVALID. */
1857 typedef
1858 struct {
1859 VtsID id;
1860 UInt usedTS;
1861 UInt sizeTS;
1862 ScalarTS ts[0];
1864 VTS;
1866 /* Allocate a VTS capable of storing 'sizeTS' entries. */
1867 static VTS* VTS__new ( const HChar* who, UInt sizeTS );
1869 /* Make a clone of 'vts', sizing the new array to exactly match the
1870 number of ScalarTSs present. */
1871 static VTS* VTS__clone ( const HChar* who, VTS* vts );
1873 /* Make a clone of 'vts' with the thrids in 'thrids' removed. The new
1874 array is sized exactly to hold the number of required elements.
1875 'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
1876 must be in strictly increasing order. */
1877 static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel );
1879 /* Delete this VTS in its entirety. */
1880 static void VTS__delete ( VTS* vts );
1882 /* Create a new singleton VTS in 'out'. Caller must have
1883 pre-allocated 'out' sufficiently big to hold the result in all
1884 possible cases. */
1885 static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
1887 /* Create in 'out' a VTS which is the same as 'vts' except with
1888 vts[me]++, so to speak. Caller must have pre-allocated 'out'
1889 sufficiently big to hold the result in all possible cases. */
1890 static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
1892 /* Create in 'out' a VTS which is the join (max) of 'a' and
1893 'b'. Caller must have pre-allocated 'out' sufficiently big to hold
1894 the result in all possible cases. */
1895 static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
1897 /* Compute the partial ordering relation of the two args. Although we
1898 could be completely general and return an enumeration value (EQ,
1899 LT, GT, UN), in fact we only need LEQ, and so we may as well
1900 hardwire that fact.
1902 Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
1903 invald ThrID). In the latter case, the returned ThrID indicates
1904 the discovered point for which they are not. There may be more
1905 than one such point, but we only care about seeing one of them, not
1906 all of them. This rather strange convention is used because
1907 sometimes we want to know the actual index at which they first
1908 differ. */
1909 static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
1911 /* Compute an arbitrary structural (total) ordering on the two args,
1912 based on their VCs, so they can be looked up in a table, tree, etc.
1913 Returns -1, 0 or 1. */
1914 static Word VTS__cmp_structural ( VTS* a, VTS* b );
1916 /* Debugging only. Display the given VTS in the buffer. */
1917 static void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
1919 /* Debugging only. Return vts[index], so to speak. */
1920 static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
1922 /* Notify the VTS machinery that a thread has been declared
1923 comprehensively dead: that is, it has done an async exit AND it has
1924 been joined with. This should ensure that its local clocks (.viR
1925 and .viW) will never again change, and so all mentions of this
1926 thread from all VTSs in the system may be removed. */
1927 static void VTS__declare_thread_very_dead ( Thr* idx );
1929 /*--------------- to do with Vector Timestamps ---------------*/
1931 static Bool is_sane_VTS ( VTS* vts )
1933 UWord i, n;
1934 ScalarTS *st1, *st2;
1935 if (!vts) return False;
1936 if (vts->usedTS > vts->sizeTS) return False;
1937 n = vts->usedTS;
1938 if (n == 1) {
1939 st1 = &vts->ts[0];
1940 if (st1->tym == 0)
1941 return False;
1943 else
1944 if (n >= 2) {
1945 for (i = 0; i < n-1; i++) {
1946 st1 = &vts->ts[i];
1947 st2 = &vts->ts[i+1];
1948 if (st1->thrid >= st2->thrid)
1949 return False;
1950 if (st1->tym == 0 || st2->tym == 0)
1951 return False;
1954 return True;
1958 /* Create a new, empty VTS.
1960 static VTS* VTS__new ( const HChar* who, UInt sizeTS )
1962 VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
1963 tl_assert(vts->usedTS == 0);
1964 vts->sizeTS = sizeTS;
1965 *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
1966 return vts;
1969 /* Clone this VTS.
1971 static VTS* VTS__clone ( const HChar* who, VTS* vts )
1973 tl_assert(vts);
1974 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
1975 UInt nTS = vts->usedTS;
1976 VTS* clone = VTS__new(who, nTS);
1977 clone->id = vts->id;
1978 clone->sizeTS = nTS;
1979 clone->usedTS = nTS;
1980 UInt i;
1981 for (i = 0; i < nTS; i++) {
1982 clone->ts[i] = vts->ts[i];
1984 tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
1985 return clone;
1989 /* Make a clone of a VTS with specified ThrIDs removed. 'thridsToDel'
1990 must be in strictly increasing order. We could obviously do this
1991 much more efficiently (in linear time) if necessary.
1993 static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel )
1995 UInt i, j;
1996 tl_assert(vts);
1997 tl_assert(thridsToDel);
1998 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
1999 UInt nTS = vts->usedTS;
2000 /* Figure out how many ScalarTSs will remain in the output. */
2001 UInt nReq = nTS;
2002 for (i = 0; i < nTS; i++) {
2003 ThrID thrid = vts->ts[i].thrid;
2004 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2005 nReq--;
2007 tl_assert(nReq <= nTS);
2008 /* Copy the ones that will remain. */
2009 VTS* res = VTS__new(who, nReq);
2010 j = 0;
2011 for (i = 0; i < nTS; i++) {
2012 ThrID thrid = vts->ts[i].thrid;
2013 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2014 continue;
2015 res->ts[j++] = vts->ts[i];
2017 tl_assert(j == nReq);
2018 tl_assert(j == res->sizeTS);
2019 res->usedTS = j;
2020 tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
2021 return res;
2025 /* Delete this VTS in its entirety.
2027 static void VTS__delete ( VTS* vts )
2029 tl_assert(vts);
2030 tl_assert(vts->usedTS <= vts->sizeTS);
2031 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2032 HG_(free)(vts);
2036 /* Create a new singleton VTS.
2038 static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
2040 tl_assert(thr);
2041 tl_assert(tym >= 1);
2042 tl_assert(out);
2043 tl_assert(out->usedTS == 0);
2044 tl_assert(out->sizeTS >= 1);
2045 UInt hi = out->usedTS++;
2046 out->ts[hi].thrid = Thr__to_ThrID(thr);
2047 out->ts[hi].tym = tym;
2051 /* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
2052 not modified.
2054 static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
2056 UInt i, n;
2057 ThrID me_thrid;
2058 Bool found = False;
2060 stats__vts__tick++;
2062 tl_assert(out);
2063 tl_assert(out->usedTS == 0);
2064 if (vts->usedTS >= ThrID_MAX_VALID)
2065 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2066 tl_assert(out->sizeTS >= 1 + vts->usedTS);
2068 tl_assert(me);
2069 me_thrid = Thr__to_ThrID(me);
2070 tl_assert(is_sane_VTS(vts));
2071 n = vts->usedTS;
2073 /* Copy all entries which precede 'me'. */
2074 for (i = 0; i < n; i++) {
2075 ScalarTS* here = &vts->ts[i];
2076 if (UNLIKELY(here->thrid >= me_thrid))
2077 break;
2078 UInt hi = out->usedTS++;
2079 out->ts[hi] = *here;
2082 /* 'i' now indicates the next entry to copy, if any.
2083 There are 3 possibilities:
2084 (a) there is no next entry (we used them all up already):
2085 add (me_thrid,1) to the output, and quit
2086 (b) there is a next entry, and its thrid > me_thrid:
2087 add (me_thrid,1) to the output, then copy the remaining entries
2088 (c) there is a next entry, and its thrid == me_thrid:
2089 copy it to the output but increment its timestamp value.
2090 Then copy the remaining entries. (c) is the common case.
2092 tl_assert(i >= 0 && i <= n);
2093 if (i == n) { /* case (a) */
2094 UInt hi = out->usedTS++;
2095 out->ts[hi].thrid = me_thrid;
2096 out->ts[hi].tym = 1;
2097 } else {
2098 /* cases (b) and (c) */
2099 ScalarTS* here = &vts->ts[i];
2100 if (me_thrid == here->thrid) { /* case (c) */
2101 if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
2102 /* We're hosed. We have to stop. */
2103 scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
2105 UInt hi = out->usedTS++;
2106 out->ts[hi].thrid = here->thrid;
2107 out->ts[hi].tym = here->tym + 1;
2108 i++;
2109 found = True;
2110 } else { /* case (b) */
2111 UInt hi = out->usedTS++;
2112 out->ts[hi].thrid = me_thrid;
2113 out->ts[hi].tym = 1;
2115 /* And copy any remaining entries. */
2116 for (/*keepgoing*/; i < n; i++) {
2117 ScalarTS* here2 = &vts->ts[i];
2118 UInt hi = out->usedTS++;
2119 out->ts[hi] = *here2;
2123 tl_assert(is_sane_VTS(out));
2124 tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
2125 tl_assert(out->usedTS <= out->sizeTS);
2129 /* Return a new VTS constructed as the join (max) of the 2 args.
2130 Neither arg is modified.
2132 static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
2134 UInt ia, ib, useda, usedb;
2135 ULong tyma, tymb, tymMax;
2136 ThrID thrid;
2137 UInt ncommon = 0;
2139 stats__vts__join++;
2141 tl_assert(a);
2142 tl_assert(b);
2143 useda = a->usedTS;
2144 usedb = b->usedTS;
2146 tl_assert(out);
2147 tl_assert(out->usedTS == 0);
2148 /* overly conservative test, but doing better involves comparing
2149 the two VTSs, which we don't want to do at this point. */
2150 if (useda + usedb >= ThrID_MAX_VALID)
2151 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2152 tl_assert(out->sizeTS >= useda + usedb);
2154 ia = ib = 0;
2156 while (1) {
2158 /* This logic is to enumerate triples (thrid, tyma, tymb) drawn
2159 from a and b in order, where thrid is the next ThrID
2160 occurring in either a or b, and tyma/b are the relevant
2161 scalar timestamps, taking into account implicit zeroes. */
2162 tl_assert(ia >= 0 && ia <= useda);
2163 tl_assert(ib >= 0 && ib <= usedb);
2165 if (ia == useda && ib == usedb) {
2166 /* both empty - done */
2167 break;
2169 } else if (ia == useda && ib != usedb) {
2170 /* a empty, use up b */
2171 ScalarTS* tmpb = &b->ts[ib];
2172 thrid = tmpb->thrid;
2173 tyma = 0;
2174 tymb = tmpb->tym;
2175 ib++;
2177 } else if (ia != useda && ib == usedb) {
2178 /* b empty, use up a */
2179 ScalarTS* tmpa = &a->ts[ia];
2180 thrid = tmpa->thrid;
2181 tyma = tmpa->tym;
2182 tymb = 0;
2183 ia++;
2185 } else {
2186 /* both not empty; extract lowest-ThrID'd triple */
2187 ScalarTS* tmpa = &a->ts[ia];
2188 ScalarTS* tmpb = &b->ts[ib];
2189 if (tmpa->thrid < tmpb->thrid) {
2190 /* a has the lowest unconsidered ThrID */
2191 thrid = tmpa->thrid;
2192 tyma = tmpa->tym;
2193 tymb = 0;
2194 ia++;
2195 } else if (tmpa->thrid > tmpb->thrid) {
2196 /* b has the lowest unconsidered ThrID */
2197 thrid = tmpb->thrid;
2198 tyma = 0;
2199 tymb = tmpb->tym;
2200 ib++;
2201 } else {
2202 /* they both next mention the same ThrID */
2203 tl_assert(tmpa->thrid == tmpb->thrid);
2204 thrid = tmpa->thrid; /* == tmpb->thrid */
2205 tyma = tmpa->tym;
2206 tymb = tmpb->tym;
2207 ia++;
2208 ib++;
2209 ncommon++;
2213 /* having laboriously determined (thr, tyma, tymb), do something
2214 useful with it. */
2215 tymMax = tyma > tymb ? tyma : tymb;
2216 if (tymMax > 0) {
2217 UInt hi = out->usedTS++;
2218 out->ts[hi].thrid = thrid;
2219 out->ts[hi].tym = tymMax;
2224 tl_assert(is_sane_VTS(out));
2225 tl_assert(out->usedTS <= out->sizeTS);
2226 tl_assert(out->usedTS == useda + usedb - ncommon);
2230 /* Determine if 'a' <= 'b', in the partial ordering. Returns zero if
2231 they are, or the first ThrID for which they are not (no valid ThrID
2232 has the value zero). This rather strange convention is used
2233 because sometimes we want to know the actual index at which they
2234 first differ. */
2235 static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
2237 Word ia, ib, useda, usedb;
2238 ULong tyma, tymb;
2240 stats__vts__cmpLEQ++;
2242 tl_assert(a);
2243 tl_assert(b);
2244 useda = a->usedTS;
2245 usedb = b->usedTS;
2247 ia = ib = 0;
2249 while (1) {
2251 /* This logic is to enumerate doubles (tyma, tymb) drawn
2252 from a and b in order, and tyma/b are the relevant
2253 scalar timestamps, taking into account implicit zeroes. */
2254 ThrID thrid;
2256 tl_assert(ia >= 0 && ia <= useda);
2257 tl_assert(ib >= 0 && ib <= usedb);
2259 if (ia == useda && ib == usedb) {
2260 /* both empty - done */
2261 break;
2263 } else if (ia == useda && ib != usedb) {
2264 /* a empty, use up b */
2265 ScalarTS* tmpb = &b->ts[ib];
2266 tyma = 0;
2267 tymb = tmpb->tym;
2268 thrid = tmpb->thrid;
2269 ib++;
2271 } else if (ia != useda && ib == usedb) {
2272 /* b empty, use up a */
2273 ScalarTS* tmpa = &a->ts[ia];
2274 tyma = tmpa->tym;
2275 thrid = tmpa->thrid;
2276 tymb = 0;
2277 ia++;
2279 } else {
2280 /* both not empty; extract lowest-ThrID'd triple */
2281 ScalarTS* tmpa = &a->ts[ia];
2282 ScalarTS* tmpb = &b->ts[ib];
2283 if (tmpa->thrid < tmpb->thrid) {
2284 /* a has the lowest unconsidered ThrID */
2285 tyma = tmpa->tym;
2286 thrid = tmpa->thrid;
2287 tymb = 0;
2288 ia++;
2290 else
2291 if (tmpa->thrid > tmpb->thrid) {
2292 /* b has the lowest unconsidered ThrID */
2293 tyma = 0;
2294 tymb = tmpb->tym;
2295 thrid = tmpb->thrid;
2296 ib++;
2297 } else {
2298 /* they both next mention the same ThrID */
2299 tl_assert(tmpa->thrid == tmpb->thrid);
2300 tyma = tmpa->tym;
2301 thrid = tmpa->thrid;
2302 tymb = tmpb->tym;
2303 ia++;
2304 ib++;
2308 /* having laboriously determined (tyma, tymb), do something
2309 useful with it. */
2310 if (tyma > tymb) {
2311 /* not LEQ at this index. Quit, since the answer is
2312 determined already. */
2313 tl_assert(thrid >= 1024);
2314 return thrid;
2318 return 0; /* all points are LEQ => return an invalid ThrID */
2322 /* Compute an arbitrary structural (total) ordering on the two args,
2323 based on their VCs, so they can be looked up in a table, tree, etc.
2324 Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be
2325 performance critical so there is some effort expended to make it sa
2326 fast as possible.
2328 Word VTS__cmp_structural ( VTS* a, VTS* b )
2330 /* We just need to generate an arbitrary total ordering based on
2331 a->ts and b->ts. Preferably do it in a way which comes across likely
2332 differences relatively quickly. */
2333 Word i;
2334 Word useda = 0, usedb = 0;
2335 ScalarTS *ctsa = NULL, *ctsb = NULL;
2337 stats__vts__cmp_structural++;
2339 tl_assert(a);
2340 tl_assert(b);
2342 ctsa = &a->ts[0]; useda = a->usedTS;
2343 ctsb = &b->ts[0]; usedb = b->usedTS;
2345 if (LIKELY(useda == usedb)) {
2346 ScalarTS *tmpa = NULL, *tmpb = NULL;
2347 stats__vts__cmp_structural_slow++;
2348 /* Same length vectors. Find the first difference, if any, as
2349 fast as possible. */
2350 for (i = 0; i < useda; i++) {
2351 tmpa = &ctsa[i];
2352 tmpb = &ctsb[i];
2353 if (LIKELY(tmpa->tym == tmpb->tym
2354 && tmpa->thrid == tmpb->thrid))
2355 continue;
2356 else
2357 break;
2359 if (UNLIKELY(i == useda)) {
2360 /* They're identical. */
2361 return 0;
2362 } else {
2363 tl_assert(i >= 0 && i < useda);
2364 if (tmpa->tym < tmpb->tym) return -1;
2365 if (tmpa->tym > tmpb->tym) return 1;
2366 if (tmpa->thrid < tmpb->thrid) return -1;
2367 if (tmpa->thrid > tmpb->thrid) return 1;
2368 /* we just established them as non-identical, hence: */
2370 /*NOTREACHED*/
2371 tl_assert(0);
2374 if (useda < usedb) return -1;
2375 if (useda > usedb) return 1;
2376 /*NOTREACHED*/
2377 tl_assert(0);
2381 /* Debugging only. Display the given VTS in the buffer.
2383 void VTS__show ( HChar* buf, Int nBuf, VTS* vts )
2385 ScalarTS* st;
2386 HChar unit[64];
2387 Word i, n;
2388 Int avail = nBuf;
2389 tl_assert(vts && vts->ts);
2390 tl_assert(nBuf > 16);
2391 buf[0] = '[';
2392 buf[1] = 0;
2393 n = vts->usedTS;
2394 for (i = 0; i < n; i++) {
2395 tl_assert(avail >= 40);
2396 st = &vts->ts[i];
2397 VG_(memset)(unit, 0, sizeof(unit));
2398 VG_(sprintf)(unit, i < n-1 ? "%u:%llu " : "%u:%llu",
2399 st->thrid, (ULong)st->tym);
2400 if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
2401 VG_(strcat)(buf, " ...]");
2402 buf[nBuf-1] = 0;
2403 return;
2405 VG_(strcat)(buf, unit);
2406 avail -= VG_(strlen)(unit);
2408 VG_(strcat)(buf, "]");
2409 buf[nBuf-1] = 0;
2413 /* Debugging only. Return vts[index], so to speak.
2415 ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
2417 UWord i, n;
2418 ThrID idx_thrid = Thr__to_ThrID(idx);
2419 stats__vts__indexat_slow++;
2420 tl_assert(vts && vts->ts);
2421 n = vts->usedTS;
2422 for (i = 0; i < n; i++) {
2423 ScalarTS* st = &vts->ts[i];
2424 if (st->thrid == idx_thrid)
2425 return st->tym;
2427 return 0;
2431 /* See comment on prototype above.
2433 static void VTS__declare_thread_very_dead ( Thr* thr )
2435 if (0) VG_(printf)("VTQ: tae %p\n", thr);
2437 tl_assert(thr->llexit_done);
2438 tl_assert(thr->joinedwith_done);
2440 ThrID nyu;
2441 nyu = Thr__to_ThrID(thr);
2442 VG_(addToXA)( verydead_thread_table, &nyu );
2444 /* We can only get here if we're assured that we'll never again
2445 need to look at this thread's ::viR or ::viW. Set them to
2446 VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
2447 mostly so that we don't wind up pruning them (as that would be
2448 nonsensical: the only interesting ScalarTS entry for a dead
2449 thread is its own index, and the pruning will remove that.). */
2450 VtsID__rcdec(thr->viR);
2451 VtsID__rcdec(thr->viW);
2452 thr->viR = VtsID_INVALID;
2453 thr->viW = VtsID_INVALID;
2457 /////////////////////////////////////////////////////////////////
2458 /////////////////////////////////////////////////////////////////
2459 // //
2460 // SECTION END vts primitives //
2461 // //
2462 /////////////////////////////////////////////////////////////////
2463 /////////////////////////////////////////////////////////////////
2467 /////////////////////////////////////////////////////////////////
2468 /////////////////////////////////////////////////////////////////
2469 // //
2470 // SECTION BEGIN main library //
2471 // //
2472 /////////////////////////////////////////////////////////////////
2473 /////////////////////////////////////////////////////////////////
2476 /////////////////////////////////////////////////////////
2477 // //
2478 // VTS set //
2479 // //
2480 /////////////////////////////////////////////////////////
2482 static WordFM* /* WordFM VTS* void */ vts_set = NULL;
2484 static void vts_set_init ( void )
2486 tl_assert(!vts_set);
2487 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2488 HG_(free),
2489 (Word(*)(UWord,UWord))VTS__cmp_structural );
2492 /* Given a VTS, look in vts_set to see if we already have a
2493 structurally identical one. If yes, return the pair (True, pointer
2494 to the existing one). If no, clone this one, add the clone to the
2495 set, and return (False, pointer to the clone). */
2496 static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
2498 UWord keyW, valW;
2499 stats__vts_set__focaa++;
2500 tl_assert(cand->id == VtsID_INVALID);
2501 /* lookup cand (by value) */
2502 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2503 /* found it */
2504 tl_assert(valW == 0);
2505 /* if this fails, cand (by ref) was already present (!) */
2506 tl_assert(keyW != (UWord)cand);
2507 *res = (VTS*)keyW;
2508 return True;
2509 } else {
2510 /* not present. Clone, add and return address of clone. */
2511 stats__vts_set__focaa_a++;
2512 VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
2513 tl_assert(clone != cand);
2514 VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
2515 *res = clone;
2516 return False;
2521 /////////////////////////////////////////////////////////
2522 // //
2523 // VTS table //
2524 // //
2525 /////////////////////////////////////////////////////////
2527 static void VtsID__invalidate_caches ( void ); /* fwds */
2529 /* A type to hold VTS table entries. Invariants:
2530 If .vts == NULL, then this entry is not in use, so:
2531 - .rc == 0
2532 - this entry is on the freelist (unfortunately, does not imply
2533 any constraints on value for .freelink)
2534 If .vts != NULL, then this entry is in use:
2535 - .vts is findable in vts_set
2536 - .vts->id == this entry number
2537 - no specific value for .rc (even 0 is OK)
2538 - this entry is not on freelist, so .freelink == VtsID_INVALID
2540 typedef
2541 struct {
2542 VTS* vts; /* vts, in vts_set */
2543 UWord rc; /* reference count - enough for entire aspace */
2544 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2545 VtsID remap; /* used only during pruning */
2547 VtsTE;
2549 /* The VTS table. */
2550 static XArray* /* of VtsTE */ vts_tab = NULL;
2552 /* An index into the VTS table, indicating the start of the list of
2553 free (available for use) entries. If the list is empty, this is
2554 VtsID_INVALID. */
2555 static VtsID vts_tab_freelist = VtsID_INVALID;
2557 /* Do a GC of vts_tab when the freelist becomes empty AND the size of
2558 vts_tab equals or exceeds this size. After GC, the value here is
2559 set appropriately so as to check for the next GC point. */
2560 static Word vts_next_GC_at = 1000;
2562 static void vts_tab_init ( void )
2564 vts_tab = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2565 HG_(free), sizeof(VtsTE) );
2566 vts_tab_freelist = VtsID_INVALID;
2569 /* Add ii to the free list, checking that it looks out-of-use. */
2570 static void add_to_free_list ( VtsID ii )
2572 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2573 tl_assert(ie->vts == NULL);
2574 tl_assert(ie->rc == 0);
2575 tl_assert(ie->freelink == VtsID_INVALID);
2576 ie->freelink = vts_tab_freelist;
2577 vts_tab_freelist = ii;
2580 /* Get an entry from the free list. This will return VtsID_INVALID if
2581 the free list is empty. */
2582 static VtsID get_from_free_list ( void )
2584 VtsID ii;
2585 VtsTE* ie;
2586 if (vts_tab_freelist == VtsID_INVALID)
2587 return VtsID_INVALID;
2588 ii = vts_tab_freelist;
2589 ie = VG_(indexXA)( vts_tab, ii );
2590 tl_assert(ie->vts == NULL);
2591 tl_assert(ie->rc == 0);
2592 vts_tab_freelist = ie->freelink;
2593 return ii;
2596 /* Produce a new VtsID that can be used, either by getting it from
2597 the freelist, or, if that is empty, by expanding vts_tab. */
2598 static VtsID get_new_VtsID ( void )
2600 VtsID ii;
2601 VtsTE te;
2602 ii = get_from_free_list();
2603 if (ii != VtsID_INVALID)
2604 return ii;
2605 te.vts = NULL;
2606 te.rc = 0;
2607 te.freelink = VtsID_INVALID;
2608 te.remap = VtsID_INVALID;
2609 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2610 return ii;
2614 /* Indirect callback from lib_zsm. */
2615 static void VtsID__rcinc ( VtsID ii )
2617 VtsTE* ie;
2618 /* VG_(indexXA) does a range check for us */
2619 ie = VG_(indexXA)( vts_tab, ii );
2620 tl_assert(ie->vts); /* else it's not in use */
2621 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2622 tl_assert(ie->vts->id == ii);
2623 ie->rc++;
2626 /* Indirect callback from lib_zsm. */
2627 static void VtsID__rcdec ( VtsID ii )
2629 VtsTE* ie;
2630 /* VG_(indexXA) does a range check for us */
2631 ie = VG_(indexXA)( vts_tab, ii );
2632 tl_assert(ie->vts); /* else it's not in use */
2633 tl_assert(ie->rc > 0); /* else RC snafu */
2634 tl_assert(ie->vts->id == ii);
2635 ie->rc--;
2639 /* Look up 'cand' in our collection of VTSs. If present, return the
2640 VtsID for the pre-existing version. If not present, clone it, add
2641 the clone to both vts_tab and vts_set, allocate a fresh VtsID for
2642 it, and return that. */
2643 static VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
2645 VTS* in_tab = NULL;
2646 tl_assert(cand->id == VtsID_INVALID);
2647 Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
2648 tl_assert(in_tab);
2649 if (already_have) {
2650 /* We already have a copy of 'cand'. Use that. */
2651 VtsTE* ie;
2652 tl_assert(in_tab->id != VtsID_INVALID);
2653 ie = VG_(indexXA)( vts_tab, in_tab->id );
2654 tl_assert(ie->vts == in_tab);
2655 return in_tab->id;
2656 } else {
2657 VtsID ii = get_new_VtsID();
2658 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2659 ie->vts = in_tab;
2660 ie->rc = 0;
2661 ie->freelink = VtsID_INVALID;
2662 in_tab->id = ii;
2663 return ii;
2668 static void show_vts_stats ( const HChar* caller )
2670 UWord nSet, nTab, nLive;
2671 ULong totrc;
2672 UWord n, i;
2673 nSet = VG_(sizeFM)( vts_set );
2674 nTab = VG_(sizeXA)( vts_tab );
2675 totrc = 0;
2676 nLive = 0;
2677 n = VG_(sizeXA)( vts_tab );
2678 for (i = 0; i < n; i++) {
2679 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2680 if (ie->vts) {
2681 nLive++;
2682 totrc += (ULong)ie->rc;
2683 } else {
2684 tl_assert(ie->rc == 0);
2687 VG_(printf)(" show_vts_stats %s\n", caller);
2688 VG_(printf)(" vts_tab size %4lu\n", nTab);
2689 VG_(printf)(" vts_tab live %4lu\n", nLive);
2690 VG_(printf)(" vts_set size %4lu\n", nSet);
2691 VG_(printf)(" total rc %4llu\n", totrc);
2695 /* --- Helpers for VtsID pruning --- */
2697 static
2698 void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2699 /*MOD*/XArray* /* of VtsTE */ new_tab,
2700 VtsID* ii )
2702 VtsTE *old_te, *new_te;
2703 VtsID old_id, new_id;
2704 /* We're relying here on VG_(indexXA)'s range checking to assert on
2705 any stupid values, in particular *ii == VtsID_INVALID. */
2706 old_id = *ii;
2707 old_te = VG_(indexXA)( old_tab, old_id );
2708 old_te->rc--;
2709 new_id = old_te->remap;
2710 new_te = VG_(indexXA)( new_tab, new_id );
2711 new_te->rc++;
2712 *ii = new_id;
2715 static
2716 void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2717 /*MOD*/XArray* /* of VtsTE */ new_tab,
2718 SVal* s )
2720 SVal old_sv, new_sv;
2721 old_sv = *s;
2722 if (SVal__isC(old_sv)) {
2723 VtsID rMin, wMin;
2724 rMin = SVal__unC_Rmin(old_sv);
2725 wMin = SVal__unC_Wmin(old_sv);
2726 remap_VtsID( old_tab, new_tab, &rMin );
2727 remap_VtsID( old_tab, new_tab, &wMin );
2728 new_sv = SVal__mkC( rMin, wMin );
2729 *s = new_sv;
2734 /* NOT TO BE CALLED FROM WITHIN libzsm. */
2735 __attribute__((noinline))
2736 static void vts_tab__do_GC ( Bool show_stats )
2738 UWord i, nTab, nLive, nFreed;
2740 /* ---------- BEGIN VTS GC ---------- */
2741 /* check this is actually necessary. */
2742 tl_assert(vts_tab_freelist == VtsID_INVALID);
2744 /* empty the caches for partial order checks and binary joins. We
2745 could do better and prune out the entries to be deleted, but it
2746 ain't worth the hassle. */
2747 VtsID__invalidate_caches();
2749 /* First, make the reference counts up to date. */
2750 zsm_flush_cache();
2752 nTab = VG_(sizeXA)( vts_tab );
2754 if (show_stats) {
2755 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2756 show_vts_stats("before GC");
2759 /* Now we can inspect the entire vts_tab. Any entries with zero
2760 .rc fields are now no longer in use and can be put back on the
2761 free list, removed from vts_set, and deleted. */
2762 nFreed = 0;
2763 for (i = 0; i < nTab; i++) {
2764 Bool present;
2765 UWord oldK = 0, oldV = 12345;
2766 VtsTE* te = VG_(indexXA)( vts_tab, i );
2767 if (te->vts == NULL) {
2768 tl_assert(te->rc == 0);
2769 continue; /* already on the free list (presumably) */
2771 if (te->rc > 0)
2772 continue; /* in use */
2773 /* Ok, we got one we can free. */
2774 tl_assert(te->vts->id == i);
2775 /* first, remove it from vts_set. */
2776 present = VG_(delFromFM)( vts_set,
2777 &oldK, &oldV, (UWord)te->vts );
2778 tl_assert(present); /* else it isn't in vts_set ?! */
2779 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2780 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2781 /* now free the VTS itself */
2782 VTS__delete(te->vts);
2783 te->vts = NULL;
2784 /* and finally put this entry on the free list */
2785 tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
2786 add_to_free_list( i );
2787 nFreed++;
2790 /* Now figure out when the next GC should be. We'll allow the
2791 number of VTSs to double before GCing again. Except of course
2792 that since we can't (or, at least, don't) shrink vts_tab, we
2793 can't set the threshhold value smaller than it. */
2794 tl_assert(nFreed <= nTab);
2795 nLive = nTab - nFreed;
2796 tl_assert(nLive >= 0 && nLive <= nTab);
2797 vts_next_GC_at = 2 * nLive;
2798 if (vts_next_GC_at < nTab)
2799 vts_next_GC_at = nTab;
2801 if (show_stats) {
2802 show_vts_stats("after GC");
2803 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2806 if (VG_(clo_stats)) {
2807 static UInt ctr = 1;
2808 tl_assert(nTab > 0);
2809 VG_(message)(Vg_DebugMsg,
2810 "libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)\n",
2811 ctr++, nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
2813 /* ---------- END VTS GC ---------- */
2815 /* Decide whether to do VTS pruning. We have one of three
2816 settings. */
2817 static UInt pruning_auto_ctr = 0; /* do not make non-static */
2819 Bool do_pruning = False;
2820 switch (HG_(clo_vts_pruning)) {
2821 case 0: /* never */
2822 break;
2823 case 1: /* auto */
2824 do_pruning = (++pruning_auto_ctr % 5) == 0;
2825 break;
2826 case 2: /* always */
2827 do_pruning = True;
2828 break;
2829 default:
2830 tl_assert(0);
2833 /* The rest of this routine only handles pruning, so we can
2834 quit at this point if it is not to be done. */
2835 if (!do_pruning)
2836 return;
2838 /* ---------- BEGIN VTS PRUNING ---------- */
2839 /* We begin by sorting the backing table on its .thr values, so as
2840 to (1) check they are unique [else something has gone wrong,
2841 since it means we must have seen some Thr* exiting more than
2842 once, which can't happen], and (2) so that we can quickly look
2843 up the dead-thread entries as we work through the VTSs. */
2844 VG_(sortXA)( verydead_thread_table );
2845 /* Sanity check: check for unique .sts.thr values. */
2846 UWord nBT = VG_(sizeXA)( verydead_thread_table );
2847 if (nBT > 0) {
2848 ThrID thrid1, thrid2;
2849 thrid2 = *(ThrID*)VG_(indexXA)( verydead_thread_table, 0 );
2850 for (i = 1; i < nBT; i++) {
2851 thrid1 = thrid2;
2852 thrid2 = *(ThrID*)VG_(indexXA)( verydead_thread_table, i );
2853 tl_assert(thrid1 < thrid2);
2856 /* Ok, so the dead thread table has unique and in-order keys. */
2858 /* We will run through the old table, and create a new table and
2859 set, at the same time setting the .remap entries in the old
2860 table to point to the new entries. Then, visit every VtsID in
2861 the system, and replace all of them with new ones, using the
2862 .remap entries in the old table. Finally, we can delete the old
2863 table and set. */
2865 XArray* /* of VtsTE */ new_tab
2866 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab",
2867 HG_(free), sizeof(VtsTE) );
2869 /* WordFM VTS* void */
2870 WordFM* new_set
2871 = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set",
2872 HG_(free),
2873 (Word(*)(UWord,UWord))VTS__cmp_structural );
2875 /* Visit each old VTS. For each one:
2877 * make a pruned version
2879 * search new_set for the pruned version, yielding either
2880 Nothing (not present) or the new VtsID for it.
2882 * if not present, allocate a new VtsID for it, insert (pruned
2883 VTS, new VtsID) in the tree, and set
2884 remap_table[old VtsID] = new VtsID.
2886 * if present, set remap_table[old VtsID] = new VtsID, where
2887 new VtsID was determined by the tree lookup. Then free up
2888 the clone.
2891 UWord nBeforePruning = 0, nAfterPruning = 0;
2892 UWord nSTSsBefore = 0, nSTSsAfter = 0;
2893 VtsID new_VtsID_ctr = 0;
2895 for (i = 0; i < nTab; i++) {
2897 /* For each old VTS .. */
2898 VtsTE* old_te = VG_(indexXA)( vts_tab, i );
2899 VTS* old_vts = old_te->vts;
2900 tl_assert(old_te->remap == VtsID_INVALID);
2902 /* Skip it if not in use */
2903 if (old_te->rc == 0) {
2904 tl_assert(old_vts == NULL);
2905 continue;
2907 tl_assert(old_vts != NULL);
2908 tl_assert(old_vts->id == i);
2909 tl_assert(old_vts->ts != NULL);
2911 /* It is in use. Make a pruned version. */
2912 nBeforePruning++;
2913 nSTSsBefore += old_vts->usedTS;
2914 VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts",
2915 old_vts, verydead_thread_table);
2916 tl_assert(new_vts->sizeTS == new_vts->usedTS);
2917 tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS])
2918 == 0x0ddC0ffeeBadF00dULL);
2920 /* Get rid of the old VTS and the tree entry. It's a bit more
2921 complex to incrementally delete the VTSs now than to nuke
2922 them all after we're done, but the upside is that we don't
2923 wind up temporarily storing potentially two complete copies
2924 of each VTS and hence spiking memory use. */
2925 UWord oldK = 0, oldV = 12345;
2926 Bool present = VG_(delFromFM)( vts_set,
2927 &oldK, &oldV, (UWord)old_vts );
2928 tl_assert(present); /* else it isn't in vts_set ?! */
2929 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2930 tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */
2931 /* now free the VTS itself */
2932 VTS__delete(old_vts);
2933 old_te->vts = NULL;
2934 old_vts = NULL;
2936 /* NO MENTIONS of old_vts allowed beyond this point. */
2938 /* Ok, we have the pruned copy in new_vts. See if a
2939 structurally identical version is already present in new_set.
2940 If so, delete the one we just made and move on; if not, add
2941 it. */
2942 VTS* identical_version = NULL;
2943 UWord valW = 12345;
2944 if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW,
2945 (UWord)new_vts)) {
2946 // already have it
2947 tl_assert(valW == 0);
2948 tl_assert(identical_version != NULL);
2949 tl_assert(identical_version != new_vts);
2950 VTS__delete(new_vts);
2951 new_vts = identical_version;
2952 tl_assert(new_vts->id != VtsID_INVALID);
2953 } else {
2954 tl_assert(valW == 12345);
2955 tl_assert(identical_version == NULL);
2956 new_vts->id = new_VtsID_ctr++;
2957 Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0);
2958 tl_assert(!b);
2959 VtsTE new_te;
2960 new_te.vts = new_vts;
2961 new_te.rc = 0;
2962 new_te.freelink = VtsID_INVALID;
2963 new_te.remap = VtsID_INVALID;
2964 Word j = VG_(addToXA)( new_tab, &new_te );
2965 tl_assert(j <= i);
2966 tl_assert(j == new_VtsID_ctr - 1);
2967 // stats
2968 nAfterPruning++;
2969 nSTSsAfter += new_vts->usedTS;
2971 old_te->remap = new_vts->id;
2973 } /* for (i = 0; i < nTab; i++) */
2975 /* At this point, we have:
2976 * the old VTS table, with its .remap entries set,
2977 and with all .vts == NULL.
2978 * the old VTS tree should be empty, since it and the old VTSs
2979 it contained have been incrementally deleted was we worked
2980 through the old table.
2981 * the new VTS table, with all .rc == 0, all .freelink and .remap
2982 == VtsID_INVALID.
2983 * the new VTS tree.
2985 tl_assert( VG_(sizeFM)(vts_set) == 0 );
2987 /* Now actually apply the mapping. */
2988 /* Visit all the VtsIDs in the entire system. Where do we expect
2989 to find them?
2990 (a) in shadow memory -- the LineZs and LineFs
2991 (b) in our collection of struct _Thrs.
2992 (c) in our collection of struct _SOs.
2993 Nowhere else, AFAICS. Not in the zsm cache, because that just
2994 got invalidated.
2996 Using the .remap fields in vts_tab, map each old VtsID to a new
2997 VtsID. For each old VtsID, dec its rc; and for each new one,
2998 inc it. This sets up the new refcounts, and it also gives a
2999 cheap sanity check of the old ones: all old refcounts should be
3000 zero after this operation.
3003 /* Do the mappings for (a) above: iterate over the Primary shadow
3004 mem map (WordFM Addr SecMap*). */
3005 UWord secmapW = 0;
3006 VG_(initIterFM)( map_shmem );
3007 while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) {
3008 UWord j;
3009 SecMap* sm = (SecMap*)secmapW;
3010 tl_assert(sm->magic == SecMap_MAGIC);
3011 /* Deal with the LineZs */
3012 for (i = 0; i < N_SECMAP_ZLINES; i++) {
3013 LineZ* lineZ = &sm->linesZ[i];
3014 if (lineZ->dict[0] == SVal_INVALID)
3015 continue; /* not in use -- data is in F rep instead */
3016 for (j = 0; j < 4; j++)
3017 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]);
3019 /* Deal with the LineFs */
3020 for (i = 0; i < sm->linesF_size; i++) {
3021 LineF* lineF = &sm->linesF[i];
3022 if (!lineF->inUse)
3023 continue;
3024 for (j = 0; j < N_LINE_ARANGE; j++)
3025 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]);
3028 VG_(doneIterFM)( map_shmem );
3030 /* Do the mappings for (b) above: visit our collection of struct
3031 _Thrs. */
3032 Thread* hgthread = get_admin_threads();
3033 tl_assert(hgthread);
3034 while (hgthread) {
3035 Thr* hbthr = hgthread->hbthr;
3036 tl_assert(hbthr);
3037 /* Threads that are listed in the prunable set have their viR
3038 and viW set to VtsID_INVALID, so we can't mess with them. */
3039 if (hbthr->llexit_done && hbthr->joinedwith_done) {
3040 tl_assert(hbthr->viR == VtsID_INVALID);
3041 tl_assert(hbthr->viW == VtsID_INVALID);
3042 hgthread = hgthread->admin;
3043 continue;
3045 remap_VtsID( vts_tab, new_tab, &hbthr->viR );
3046 remap_VtsID( vts_tab, new_tab, &hbthr->viW );
3047 hgthread = hgthread->admin;
3050 /* Do the mappings for (c) above: visit the struct _SOs. */
3051 SO* so = admin_SO;
3052 while (so) {
3053 if (so->viR != VtsID_INVALID)
3054 remap_VtsID( vts_tab, new_tab, &so->viR );
3055 if (so->viW != VtsID_INVALID)
3056 remap_VtsID( vts_tab, new_tab, &so->viW );
3057 so = so->admin_next;
3060 /* So, we're nearly done (with this incredibly complex operation).
3061 Check the refcounts for the old VtsIDs all fell to zero, as
3062 expected. Any failure is serious. */
3063 for (i = 0; i < nTab; i++) {
3064 VtsTE* te = VG_(indexXA)( vts_tab, i );
3065 tl_assert(te->vts == NULL);
3066 /* This is the assert proper. Note we're also asserting
3067 zeroness for old entries which are unmapped (hence have
3068 .remap == VtsID_INVALID). That's OK. */
3069 tl_assert(te->rc == 0);
3072 /* Install the new table and set. */
3073 VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/);
3074 vts_set = new_set;
3075 VG_(deleteXA)( vts_tab );
3076 vts_tab = new_tab;
3078 /* The freelist of vts_tab entries is empty now, because we've
3079 compacted all of the live entries at the low end of the
3080 table. */
3081 vts_tab_freelist = VtsID_INVALID;
3083 /* Sanity check vts_set and vts_tab. */
3085 /* Because all the live entries got slid down to the bottom of vts_tab: */
3086 tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set ));
3088 /* Assert that the vts_tab and vts_set entries point at each other
3089 in the required way */
3090 UWord wordK = 0, wordV = 0;
3091 VG_(initIterFM)( vts_set );
3092 while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) {
3093 tl_assert(wordK != 0);
3094 tl_assert(wordV == 0);
3095 VTS* vts = (VTS*)wordK;
3096 tl_assert(vts->id != VtsID_INVALID);
3097 VtsTE* te = VG_(indexXA)( vts_tab, vts->id );
3098 tl_assert(te->vts == vts);
3100 VG_(doneIterFM)( vts_set );
3102 /* Also iterate over the table, and check each entry is
3103 plausible. */
3104 nTab = VG_(sizeXA)( vts_tab );
3105 for (i = 0; i < nTab; i++) {
3106 VtsTE* te = VG_(indexXA)( vts_tab, i );
3107 tl_assert(te->vts);
3108 tl_assert(te->vts->id == i);
3109 tl_assert(te->rc > 0); /* 'cos we just GC'd */
3110 tl_assert(te->freelink == VtsID_INVALID); /* in use */
3111 tl_assert(te->remap == VtsID_INVALID); /* not relevant */
3114 /* And we're done. Bwahahaha. Ha. Ha. Ha. */
3115 if (VG_(clo_stats)) {
3116 static UInt ctr = 1;
3117 tl_assert(nTab > 0);
3118 VG_(message)(
3119 Vg_DebugMsg,
3120 "libhb: VTS PR: #%u before %lu (avg sz %lu) "
3121 "after %lu (avg sz %lu)\n",
3122 ctr++,
3123 nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1),
3124 nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1)
3127 if (0)
3128 VG_(printf)("VTQ: before pruning %lu (avg sz %lu), "
3129 "after pruning %lu (avg sz %lu)\n",
3130 nBeforePruning, nSTSsBefore / nBeforePruning,
3131 nAfterPruning, nSTSsAfter / nAfterPruning);
3132 /* ---------- END VTS PRUNING ---------- */
3136 /////////////////////////////////////////////////////////
3137 // //
3138 // Vts IDs //
3139 // //
3140 /////////////////////////////////////////////////////////
3142 //////////////////////////
3143 /* A temporary, max-sized VTS which is used as a temporary (the first
3144 argument) in VTS__singleton, VTS__tick and VTS__join operations. */
3145 static VTS* temp_max_sized_VTS = NULL;
3147 //////////////////////////
3148 static ULong stats__cmpLEQ_queries = 0;
3149 static ULong stats__cmpLEQ_misses = 0;
3150 static ULong stats__join2_queries = 0;
3151 static ULong stats__join2_misses = 0;
3153 static inline UInt ROL32 ( UInt w, Int n ) {
3154 w = (w << n) | (w >> (32-n));
3155 return w;
3157 static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
3158 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
3159 return hash % nTab;
3162 #define N_CMPLEQ_CACHE 1023
3163 static
3164 struct { VtsID vi1; VtsID vi2; Bool leq; }
3165 cmpLEQ_cache[N_CMPLEQ_CACHE];
3167 #define N_JOIN2_CACHE 1023
3168 static
3169 struct { VtsID vi1; VtsID vi2; VtsID res; }
3170 join2_cache[N_JOIN2_CACHE];
3172 static void VtsID__invalidate_caches ( void ) {
3173 Int i;
3174 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
3175 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
3176 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
3177 cmpLEQ_cache[i].leq = False;
3179 for (i = 0; i < N_JOIN2_CACHE; i++) {
3180 join2_cache[i].vi1 = VtsID_INVALID;
3181 join2_cache[i].vi2 = VtsID_INVALID;
3182 join2_cache[i].res = VtsID_INVALID;
3185 //////////////////////////
3187 //static Bool VtsID__is_valid ( VtsID vi ) {
3188 // VtsTE* ve;
3189 // if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
3190 // return False;
3191 // ve = VG_(indexXA)( vts_tab, vi );
3192 // if (!ve->vts)
3193 // return False;
3194 // tl_assert(ve->vts->id == vi);
3195 // return True;
3198 static VTS* VtsID__to_VTS ( VtsID vi ) {
3199 VtsTE* te = VG_(indexXA)( vts_tab, vi );
3200 tl_assert(te->vts);
3201 return te->vts;
3204 static void VtsID__pp ( VtsID vi ) {
3205 HChar buf[100];
3206 VTS* vts = VtsID__to_VTS(vi);
3207 VTS__show( buf, sizeof(buf)-1, vts );
3208 buf[sizeof(buf)-1] = 0;
3209 VG_(printf)("%s", buf);
3212 /* compute partial ordering relation of vi1 and vi2. */
3213 __attribute__((noinline))
3214 static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
3215 UInt hash;
3216 Bool leq;
3217 VTS *v1, *v2;
3218 //if (vi1 == vi2) return True;
3219 tl_assert(vi1 != vi2);
3220 ////++
3221 stats__cmpLEQ_queries++;
3222 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
3223 if (cmpLEQ_cache[hash].vi1 == vi1
3224 && cmpLEQ_cache[hash].vi2 == vi2)
3225 return cmpLEQ_cache[hash].leq;
3226 stats__cmpLEQ_misses++;
3227 ////--
3228 v1 = VtsID__to_VTS(vi1);
3229 v2 = VtsID__to_VTS(vi2);
3230 leq = VTS__cmpLEQ( v1, v2 ) == 0;
3231 ////++
3232 cmpLEQ_cache[hash].vi1 = vi1;
3233 cmpLEQ_cache[hash].vi2 = vi2;
3234 cmpLEQ_cache[hash].leq = leq;
3235 ////--
3236 return leq;
3238 static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
3239 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
3242 /* compute binary join */
3243 __attribute__((noinline))
3244 static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
3245 UInt hash;
3246 VtsID res;
3247 VTS *vts1, *vts2;
3248 //if (vi1 == vi2) return vi1;
3249 tl_assert(vi1 != vi2);
3250 ////++
3251 stats__join2_queries++;
3252 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
3253 if (join2_cache[hash].vi1 == vi1
3254 && join2_cache[hash].vi2 == vi2)
3255 return join2_cache[hash].res;
3256 stats__join2_misses++;
3257 ////--
3258 vts1 = VtsID__to_VTS(vi1);
3259 vts2 = VtsID__to_VTS(vi2);
3260 temp_max_sized_VTS->usedTS = 0;
3261 VTS__join(temp_max_sized_VTS, vts1,vts2);
3262 res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3263 ////++
3264 join2_cache[hash].vi1 = vi1;
3265 join2_cache[hash].vi2 = vi2;
3266 join2_cache[hash].res = res;
3267 ////--
3268 return res;
3270 static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
3271 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
3274 /* create a singleton VTS, namely [thr:1] */
3275 static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
3276 temp_max_sized_VTS->usedTS = 0;
3277 VTS__singleton(temp_max_sized_VTS, thr,tym);
3278 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3281 /* tick operation, creates value 1 if specified index is absent */
3282 static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
3283 VTS* vts = VtsID__to_VTS(vi);
3284 temp_max_sized_VTS->usedTS = 0;
3285 VTS__tick(temp_max_sized_VTS, idx,vts);
3286 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3289 /* index into a VTS (only for assertions) */
3290 static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
3291 VTS* vts = VtsID__to_VTS(vi);
3292 return VTS__indexAt_SLOW( vts, idx );
3295 /* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
3296 any, really) element in vi1 which is pointwise greater-than the
3297 corresponding element in vi2. If no such element exists, return
3298 NULL. This needs to be fairly quick since it is called every time
3299 a race is detected. */
3300 static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
3302 VTS *vts1, *vts2;
3303 Thr* diffthr;
3304 ThrID diffthrid;
3305 tl_assert(vi1 != vi2);
3306 vts1 = VtsID__to_VTS(vi1);
3307 vts2 = VtsID__to_VTS(vi2);
3308 tl_assert(vts1 != vts2);
3309 diffthrid = VTS__cmpLEQ(vts1, vts2);
3310 diffthr = Thr__from_ThrID(diffthrid);
3311 tl_assert(diffthr); /* else they are LEQ ! */
3312 return diffthr;
3316 /////////////////////////////////////////////////////////
3317 // //
3318 // Filters //
3319 // //
3320 /////////////////////////////////////////////////////////
3322 /* Forget everything we know -- clear the filter and let everything
3323 through. This needs to be as fast as possible, since it is called
3324 every time the running thread changes, and every time a thread's
3325 vector clocks change, which can be quite frequent. The obvious
3326 fast way to do this is simply to stuff in tags which we know are
3327 not going to match anything, since they're not aligned to the start
3328 of a line. */
3329 static void Filter__clear ( Filter* fi, const HChar* who )
3331 UWord i;
3332 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
3333 for (i = 0; i < FI_NUM_LINES; i += 8) {
3334 fi->tags[i+0] = 1; /* impossible value -- cannot match */
3335 fi->tags[i+1] = 1;
3336 fi->tags[i+2] = 1;
3337 fi->tags[i+3] = 1;
3338 fi->tags[i+4] = 1;
3339 fi->tags[i+5] = 1;
3340 fi->tags[i+6] = 1;
3341 fi->tags[i+7] = 1;
3343 tl_assert(i == FI_NUM_LINES);
3346 /* Clearing an arbitrary range in the filter. Unfortunately
3347 we have to do this due to core-supplied new/die-mem events. */
3349 static void Filter__clear_1byte ( Filter* fi, Addr a )
3351 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3352 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3353 FiLine* line = &fi->lines[lineno];
3354 UWord loff = (a - atag) / 8;
3355 UShort mask = 0x3 << (2 * (a & 7));
3356 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3357 if (LIKELY( fi->tags[lineno] == atag )) {
3358 /* hit. clear the bits. */
3359 UShort u16 = line->u16s[loff];
3360 line->u16s[loff] = u16 & ~mask; /* clear them */
3361 } else {
3362 /* miss. The filter doesn't hold this address, so ignore. */
3366 static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
3368 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3369 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3370 FiLine* line = &fi->lines[lineno];
3371 UWord loff = (a - atag) / 8;
3372 if (LIKELY( fi->tags[lineno] == atag )) {
3373 line->u16s[loff] = 0;
3374 } else {
3375 /* miss. The filter doesn't hold this address, so ignore. */
3379 static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
3381 //VG_(printf)("%lu ", len);
3382 /* slowly do part preceding 8-alignment */
3383 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
3384 Filter__clear_1byte( fi, a );
3385 a++;
3386 len--;
3388 /* vector loop */
3389 while (len >= 8) {
3390 Filter__clear_8bytes_aligned( fi, a );
3391 a += 8;
3392 len -= 8;
3394 /* slowly do tail */
3395 while (UNLIKELY(len > 0)) {
3396 Filter__clear_1byte( fi, a );
3397 a++;
3398 len--;
3403 /* ------ Read handlers for the filter. ------ */
3405 static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
3407 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3408 return False;
3410 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3411 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3412 FiLine* line = &fi->lines[lineno];
3413 UWord loff = (a - atag) / 8;
3414 UShort mask = 0xAAAA;
3415 if (LIKELY( fi->tags[lineno] == atag )) {
3416 /* hit. check line and update. */
3417 UShort u16 = line->u16s[loff];
3418 Bool ok = (u16 & mask) == mask; /* all R bits set? */
3419 line->u16s[loff] = u16 | mask; /* set them */
3420 return ok;
3421 } else {
3422 /* miss. nuke existing line and re-use it. */
3423 UWord i;
3424 fi->tags[lineno] = atag;
3425 for (i = 0; i < FI_LINE_SZB / 8; i++)
3426 line->u16s[i] = 0;
3427 line->u16s[loff] = mask;
3428 return False;
3433 static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
3435 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3436 return False;
3438 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3439 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3440 FiLine* line = &fi->lines[lineno];
3441 UWord loff = (a - atag) / 8;
3442 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
3443 if (LIKELY( fi->tags[lineno] == atag )) {
3444 /* hit. check line and update. */
3445 UShort u16 = line->u16s[loff];
3446 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
3447 line->u16s[loff] = u16 | mask; /* set them */
3448 return ok;
3449 } else {
3450 /* miss. nuke existing line and re-use it. */
3451 UWord i;
3452 fi->tags[lineno] = atag;
3453 for (i = 0; i < FI_LINE_SZB / 8; i++)
3454 line->u16s[i] = 0;
3455 line->u16s[loff] = mask;
3456 return False;
3461 static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
3463 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3464 return False;
3466 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3467 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3468 FiLine* line = &fi->lines[lineno];
3469 UWord loff = (a - atag) / 8;
3470 UShort mask = 0xA << (2 * (a & 6));
3471 /* mask is A000, 0A00, 00A0 or 000A */
3472 if (LIKELY( fi->tags[lineno] == atag )) {
3473 /* hit. check line and update. */
3474 UShort u16 = line->u16s[loff];
3475 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
3476 line->u16s[loff] = u16 | mask; /* set them */
3477 return ok;
3478 } else {
3479 /* miss. nuke existing line and re-use it. */
3480 UWord i;
3481 fi->tags[lineno] = atag;
3482 for (i = 0; i < FI_LINE_SZB / 8; i++)
3483 line->u16s[i] = 0;
3484 line->u16s[loff] = mask;
3485 return False;
3490 static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
3493 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3494 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3495 FiLine* line = &fi->lines[lineno];
3496 UWord loff = (a - atag) / 8;
3497 UShort mask = 0x2 << (2 * (a & 7));
3498 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
3499 if (LIKELY( fi->tags[lineno] == atag )) {
3500 /* hit. check line and update. */
3501 UShort u16 = line->u16s[loff];
3502 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3503 line->u16s[loff] = u16 | mask; /* set them */
3504 return ok;
3505 } else {
3506 /* miss. nuke existing line and re-use it. */
3507 UWord i;
3508 fi->tags[lineno] = atag;
3509 for (i = 0; i < FI_LINE_SZB / 8; i++)
3510 line->u16s[i] = 0;
3511 line->u16s[loff] = mask;
3512 return False;
3518 /* ------ Write handlers for the filter. ------ */
3520 static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
3522 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3523 return False;
3525 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3526 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3527 FiLine* line = &fi->lines[lineno];
3528 UWord loff = (a - atag) / 8;
3529 UShort mask = 0xFFFF;
3530 if (LIKELY( fi->tags[lineno] == atag )) {
3531 /* hit. check line and update. */
3532 UShort u16 = line->u16s[loff];
3533 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
3534 line->u16s[loff] = u16 | mask; /* set them */
3535 return ok;
3536 } else {
3537 /* miss. nuke existing line and re-use it. */
3538 UWord i;
3539 fi->tags[lineno] = atag;
3540 for (i = 0; i < FI_LINE_SZB / 8; i++)
3541 line->u16s[i] = 0;
3542 line->u16s[loff] = mask;
3543 return False;
3548 static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
3550 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3551 return False;
3553 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3554 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3555 FiLine* line = &fi->lines[lineno];
3556 UWord loff = (a - atag) / 8;
3557 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
3558 if (LIKELY( fi->tags[lineno] == atag )) {
3559 /* hit. check line and update. */
3560 UShort u16 = line->u16s[loff];
3561 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
3562 line->u16s[loff] = u16 | mask; /* set them */
3563 return ok;
3564 } else {
3565 /* miss. nuke existing line and re-use it. */
3566 UWord i;
3567 fi->tags[lineno] = atag;
3568 for (i = 0; i < FI_LINE_SZB / 8; i++)
3569 line->u16s[i] = 0;
3570 line->u16s[loff] = mask;
3571 return False;
3576 static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
3578 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3579 return False;
3581 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3582 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3583 FiLine* line = &fi->lines[lineno];
3584 UWord loff = (a - atag) / 8;
3585 UShort mask = 0xF << (2 * (a & 6));
3586 /* mask is F000, 0F00, 00F0 or 000F */
3587 if (LIKELY( fi->tags[lineno] == atag )) {
3588 /* hit. check line and update. */
3589 UShort u16 = line->u16s[loff];
3590 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
3591 line->u16s[loff] = u16 | mask; /* set them */
3592 return ok;
3593 } else {
3594 /* miss. nuke existing line and re-use it. */
3595 UWord i;
3596 fi->tags[lineno] = atag;
3597 for (i = 0; i < FI_LINE_SZB / 8; i++)
3598 line->u16s[i] = 0;
3599 line->u16s[loff] = mask;
3600 return False;
3605 static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
3608 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3609 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3610 FiLine* line = &fi->lines[lineno];
3611 UWord loff = (a - atag) / 8;
3612 UShort mask = 0x3 << (2 * (a & 7));
3613 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3614 if (LIKELY( fi->tags[lineno] == atag )) {
3615 /* hit. check line and update. */
3616 UShort u16 = line->u16s[loff];
3617 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3618 line->u16s[loff] = u16 | mask; /* set them */
3619 return ok;
3620 } else {
3621 /* miss. nuke existing line and re-use it. */
3622 UWord i;
3623 fi->tags[lineno] = atag;
3624 for (i = 0; i < FI_LINE_SZB / 8; i++)
3625 line->u16s[i] = 0;
3626 line->u16s[loff] = mask;
3627 return False;
3633 /////////////////////////////////////////////////////////
3634 // //
3635 // Threads //
3636 // //
3637 /////////////////////////////////////////////////////////
3639 /* Maps ThrID values to their Thr*s (which contain ThrID values that
3640 should point back to the relevant slot in the array. Lowest
3641 numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
3642 static XArray* /* of Thr* */ thrid_to_thr_map = NULL;
3644 /* And a counter to dole out ThrID values. For rationale/background,
3645 see comments on definition of ScalarTS (far) above. */
3646 static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
3648 static ThrID Thr__to_ThrID ( Thr* thr ) {
3649 return thr->thrid;
3651 static Thr* Thr__from_ThrID ( UInt thrid ) {
3652 Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
3653 tl_assert(thr->thrid == thrid);
3654 return thr;
3657 static Thr* Thr__new ( void )
3659 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
3660 thr->viR = VtsID_INVALID;
3661 thr->viW = VtsID_INVALID;
3662 thr->llexit_done = False;
3663 thr->joinedwith_done = False;
3664 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
3665 if (HG_(clo_history_level) == 1)
3666 thr->local_Kws_n_stacks
3667 = VG_(newXA)( HG_(zalloc),
3668 "libhb.Thr__new.3 (local_Kws_and_stacks)",
3669 HG_(free), sizeof(ULong_n_EC) );
3671 /* Add this Thr* <-> ThrID binding to the mapping, and
3672 cross-check */
3673 if (!thrid_to_thr_map) {
3674 thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
3675 HG_(free), sizeof(Thr*) );
3678 if (thrid_counter >= ThrID_MAX_VALID) {
3679 /* We're hosed. We have to stop. */
3680 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
3683 thr->thrid = thrid_counter++;
3684 Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
3685 tl_assert(ix + 1024 == thr->thrid);
3687 return thr;
3690 static void note_local_Kw_n_stack_for ( Thr* thr )
3692 Word nPresent;
3693 ULong_n_EC pair;
3694 tl_assert(thr);
3696 // We only collect this info at history level 1 (approx)
3697 if (HG_(clo_history_level) != 1)
3698 return;
3700 /* This is the scalar Kw for thr. */
3701 pair.ull = VtsID__indexAt( thr->viW, thr );
3702 pair.ec = main_get_EC( thr );
3703 tl_assert(pair.ec);
3704 tl_assert(thr->local_Kws_n_stacks);
3706 /* check that we're not adding duplicates */
3707 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
3709 /* Throw away old stacks, if necessary. We can't accumulate stuff
3710 indefinitely. */
3711 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
3712 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
3713 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
3714 if (0)
3715 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n",
3716 thr, pair.ull, pair.ec );
3719 if (nPresent > 0) {
3720 ULong_n_EC* prevPair
3721 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
3722 tl_assert( prevPair->ull <= pair.ull );
3725 if (nPresent == 0)
3726 pair.ec = NULL;
3728 VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
3730 if (0)
3731 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n",
3732 thr, pair.ull, pair.ec );
3733 if (0)
3734 VG_(pp_ExeContext)(pair.ec);
3737 static Int cmp__ULong_n_EC__by_ULong ( const ULong_n_EC* pair1,
3738 const ULong_n_EC* pair2 )
3740 if (pair1->ull < pair2->ull) return -1;
3741 if (pair1->ull > pair2->ull) return 1;
3742 return 0;
3746 /////////////////////////////////////////////////////////
3747 // //
3748 // Shadow Values //
3749 // //
3750 /////////////////////////////////////////////////////////
3752 // type SVal, SVal_INVALID and SVal_NOACCESS are defined by
3753 // hb_zsm.h. We have to do everything else here.
3755 /* SVal is 64 bit unsigned int.
3757 <---------30---------> <---------30--------->
3758 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
3759 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
3760 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
3763 #define SVAL_TAGMASK (3ULL << 62)
3765 static inline Bool SVal__isC ( SVal s ) {
3766 return (0ULL << 62) == (s & SVAL_TAGMASK);
3768 static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
3769 //tl_assert(VtsID__is_valid(rmini));
3770 //tl_assert(VtsID__is_valid(wmini));
3771 return (((ULong)rmini) << 32) | ((ULong)wmini);
3773 static inline VtsID SVal__unC_Rmin ( SVal s ) {
3774 tl_assert(SVal__isC(s));
3775 return (VtsID)(s >> 32);
3777 static inline VtsID SVal__unC_Wmin ( SVal s ) {
3778 tl_assert(SVal__isC(s));
3779 return (VtsID)(s & 0xFFFFFFFFULL);
3782 static inline Bool SVal__isA ( SVal s ) {
3783 return (2ULL << 62) == (s & SVAL_TAGMASK);
3785 __attribute__((unused))
3786 static inline SVal SVal__mkA ( void ) {
3787 return 2ULL << 62;
3790 /* Direct callback from lib_zsm. */
3791 static void SVal__rcinc ( SVal s ) {
3792 if (SVal__isC(s)) {
3793 VtsID__rcinc( SVal__unC_Rmin(s) );
3794 VtsID__rcinc( SVal__unC_Wmin(s) );
3798 /* Direct callback from lib_zsm. */
3799 static void SVal__rcdec ( SVal s ) {
3800 if (SVal__isC(s)) {
3801 VtsID__rcdec( SVal__unC_Rmin(s) );
3802 VtsID__rcdec( SVal__unC_Wmin(s) );
3807 /////////////////////////////////////////////////////////
3808 // //
3809 // Change-event map2 //
3810 // //
3811 /////////////////////////////////////////////////////////
3813 #define EVENT_MAP_GC_DISCARD_FRACTION 0.5
3815 /* This is in two parts:
3817 1. A hash table of RCECs. This is a set of reference-counted stack
3818 traces. When the reference count of a stack trace becomes zero,
3819 it is removed from the set and freed up. The intent is to have
3820 a set of stack traces which can be referred to from (2), but to
3821 only represent each one once. The set is indexed/searched by
3822 ordering on the stack trace vectors.
3824 2. A SparseWA of OldRefs. These store information about each old
3825 ref that we need to record. It is indexed by address of the
3826 location for which the information is recorded. For LRU
3827 purposes, each OldRef also contains a generation number,
3828 indicating when it was most recently accessed.
3830 The important part of an OldRef is, however, its accs[] array.
3831 This is an array of N_OLDREF_ACCS which binds (thread, R/W,
3832 size) triples to RCECs. This allows us to collect the last
3833 access-traceback by up to N_OLDREF_ACCS different triples for
3834 this location. The accs[] array is a MTF-array. If a binding
3835 falls off the end, that's too bad -- we will lose info about
3836 that triple's access to this location.
3838 When the SparseWA becomes too big, we can throw away the OldRefs
3839 whose generation numbers are below some threshold; hence doing
3840 approximate LRU discarding. For each discarded OldRef we must
3841 of course decrement the reference count on the all RCECs it
3842 refers to, in order that entries from (1) eventually get
3843 discarded too.
3845 A major improvement in reliability of this mechanism would be to
3846 have a dynamically sized OldRef.accs[] array, so no entries ever
3847 fall off the end. In investigations (Dec 08) it appears that a
3848 major cause for the non-availability of conflicting-access traces
3849 in race reports is caused by the fixed size of this array. I
3850 suspect for most OldRefs, only a few entries are used, but for a
3851 minority of cases there is an overflow, leading to info lossage.
3852 Investigations also suggest this is very workload and scheduling
3853 sensitive. Therefore a dynamic sizing would be better.
3855 However, dynamic sizing would defeat the use of a PoolAllocator
3856 for OldRef structures. And that's important for performance. So
3857 it's not straightforward to do.
3861 static UWord stats__ctxt_rcdec1 = 0;
3862 static UWord stats__ctxt_rcdec2 = 0;
3863 static UWord stats__ctxt_rcdec3 = 0;
3864 static UWord stats__ctxt_rcdec_calls = 0;
3865 static UWord stats__ctxt_rcdec_discards = 0;
3866 static UWord stats__ctxt_rcdec1_eq = 0;
3868 static UWord stats__ctxt_tab_curr = 0;
3869 static UWord stats__ctxt_tab_max = 0;
3871 static UWord stats__ctxt_tab_qs = 0;
3872 static UWord stats__ctxt_tab_cmps = 0;
3875 ///////////////////////////////////////////////////////
3876 //// Part (1): A hash table of RCECs
3879 #define N_FRAMES 8
3881 // (UInt) `echo "Reference Counted Execution Context" | md5sum`
3882 #define RCEC_MAGIC 0xab88abb2UL
3884 //#define N_RCEC_TAB 98317 /* prime */
3885 #define N_RCEC_TAB 196613 /* prime */
3887 typedef
3888 struct _RCEC {
3889 UWord magic; /* sanity check only */
3890 struct _RCEC* next;
3891 UWord rc;
3892 UWord rcX; /* used for crosschecking */
3893 UWord frames_hash; /* hash of all the frames */
3894 UWord frames[N_FRAMES];
3896 RCEC;
3898 static RCEC** contextTab = NULL; /* hash table of RCEC*s */
3901 /* Gives an arbitrary total order on RCEC .frames fields */
3902 static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
3903 Word i;
3904 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
3905 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
3906 if (ec1->frames_hash < ec2->frames_hash) return -1;
3907 if (ec1->frames_hash > ec2->frames_hash) return 1;
3908 for (i = 0; i < N_FRAMES; i++) {
3909 if (ec1->frames[i] < ec2->frames[i]) return -1;
3910 if (ec1->frames[i] > ec2->frames[i]) return 1;
3912 return 0;
3916 /* Dec the ref of this RCEC. */
3917 static void ctxt__rcdec ( RCEC* ec )
3919 stats__ctxt_rcdec_calls++;
3920 tl_assert(ec && ec->magic == RCEC_MAGIC);
3921 tl_assert(ec->rc > 0);
3922 ec->rc--;
3925 static void ctxt__rcinc ( RCEC* ec )
3927 tl_assert(ec && ec->magic == RCEC_MAGIC);
3928 ec->rc++;
3932 //////////// BEGIN RCEC pool allocator
3933 static PoolAlloc* rcec_pool_allocator;
3935 static RCEC* alloc_RCEC ( void ) {
3936 return VG_(allocEltPA) ( rcec_pool_allocator );
3939 static void free_RCEC ( RCEC* rcec ) {
3940 tl_assert(rcec->magic == RCEC_MAGIC);
3941 VG_(freeEltPA)( rcec_pool_allocator, rcec );
3943 //////////// END RCEC pool allocator
3946 /* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
3947 move it one step closer the the front of the list, so as to make
3948 subsequent searches for it cheaper. */
3949 static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
3951 RCEC *ec0, *ec1, *ec2;
3952 if (ec == *headp)
3953 tl_assert(0); /* already at head of list */
3954 tl_assert(ec != NULL);
3955 ec0 = *headp;
3956 ec1 = NULL;
3957 ec2 = NULL;
3958 while (True) {
3959 if (ec0 == NULL || ec0 == ec) break;
3960 ec2 = ec1;
3961 ec1 = ec0;
3962 ec0 = ec0->next;
3964 tl_assert(ec0 == ec);
3965 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
3966 RCEC* tmp;
3967 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
3968 predecessor. Swap ec0 and ec1, that is, move ec0 one step
3969 closer to the start of the list. */
3970 tl_assert(ec2->next == ec1);
3971 tl_assert(ec1->next == ec0);
3972 tmp = ec0->next;
3973 ec2->next = ec0;
3974 ec0->next = ec1;
3975 ec1->next = tmp;
3977 else
3978 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
3979 /* it's second in the list. */
3980 tl_assert(*headp == ec1);
3981 tl_assert(ec1->next == ec0);
3982 ec1->next = ec0->next;
3983 ec0->next = ec1;
3984 *headp = ec0;
3989 /* Find the given RCEC in the tree, and return a pointer to it. Or,
3990 if not present, add the given one to the tree (by making a copy of
3991 it, so the caller can immediately deallocate the original) and
3992 return a pointer to the copy. The caller can safely have 'example'
3993 on its stack, since we will always return a pointer to a copy of
3994 it, not to the original. Note that the inserted node will have .rc
3995 of zero and so the caller must immediatly increment it. */
3996 __attribute__((noinline))
3997 static RCEC* ctxt__find_or_add ( RCEC* example )
3999 UWord hent;
4000 RCEC* copy;
4001 tl_assert(example && example->magic == RCEC_MAGIC);
4002 tl_assert(example->rc == 0);
4004 /* Search the hash table to see if we already have it. */
4005 stats__ctxt_tab_qs++;
4006 hent = example->frames_hash % N_RCEC_TAB;
4007 copy = contextTab[hent];
4008 while (1) {
4009 if (!copy) break;
4010 tl_assert(copy->magic == RCEC_MAGIC);
4011 stats__ctxt_tab_cmps++;
4012 if (0 == RCEC__cmp_by_frames(copy, example)) break;
4013 copy = copy->next;
4016 if (copy) {
4017 tl_assert(copy != example);
4018 /* optimisation: if it's not at the head of its list, move 1
4019 step fwds, to make future searches cheaper */
4020 if (copy != contextTab[hent]) {
4021 move_RCEC_one_step_forward( &contextTab[hent], copy );
4023 } else {
4024 copy = alloc_RCEC();
4025 tl_assert(copy != example);
4026 *copy = *example;
4027 copy->next = contextTab[hent];
4028 contextTab[hent] = copy;
4029 stats__ctxt_tab_curr++;
4030 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
4031 stats__ctxt_tab_max = stats__ctxt_tab_curr;
4033 return copy;
4036 static inline UWord ROLW ( UWord w, Int n )
4038 Int bpw = 8 * sizeof(UWord);
4039 w = (w << n) | (w >> (bpw-n));
4040 return w;
4043 __attribute__((noinline))
4044 static RCEC* get_RCEC ( Thr* thr )
4046 UWord hash, i;
4047 RCEC example;
4048 example.magic = RCEC_MAGIC;
4049 example.rc = 0;
4050 example.rcX = 0;
4051 example.next = NULL;
4052 main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
4053 hash = 0;
4054 for (i = 0; i < N_FRAMES; i++) {
4055 hash ^= example.frames[i];
4056 hash = ROLW(hash, 19);
4058 example.frames_hash = hash;
4059 return ctxt__find_or_add( &example );
4062 ///////////////////////////////////////////////////////
4063 //// Part (2):
4064 /// A SparseWA guest-addr -> OldRef, that refers to (1)
4067 // (UInt) `echo "Old Reference Information" | md5sum`
4068 #define OldRef_MAGIC 0x30b1f075UL
4070 /* Records an access: a thread, a context (size & writeness) and the
4071 number of held locks. The size (1,2,4,8) is encoded as 00 = 1, 01 =
4072 2, 10 = 4, 11 = 8.
4074 typedef
4075 struct {
4076 RCEC* rcec;
4077 WordSetID locksHeldW;
4078 UInt thrid : SCALARTS_N_THRBITS;
4079 UInt szLg2B : 2;
4080 UInt isW : 1;
4082 Thr_n_RCEC;
4084 #define N_OLDREF_ACCS 5
4086 typedef
4087 struct {
4088 UWord magic; /* sanity check only */
4089 UWord gen; /* when most recently accessed */
4090 /* or free list when not in use */
4091 /* unused slots in this array have .thrid == 0, which is invalid */
4092 Thr_n_RCEC accs[N_OLDREF_ACCS];
4094 OldRef;
4097 //////////// BEGIN OldRef pool allocator
4098 static PoolAlloc* oldref_pool_allocator;
4100 static OldRef* alloc_OldRef ( void ) {
4101 return VG_(allocEltPA) ( oldref_pool_allocator );
4104 static void free_OldRef ( OldRef* r ) {
4105 tl_assert(r->magic == OldRef_MAGIC);
4106 VG_(freeEltPA)( oldref_pool_allocator, r );
4108 //////////// END OldRef pool allocator
4111 static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
4112 static UWord oldrefGen = 0; /* current LRU generation # */
4113 static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
4114 static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
4116 inline static UInt min_UInt ( UInt a, UInt b ) {
4117 return a < b ? a : b;
4120 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
4121 first interval is lower, 1 if the first interval is higher, and 0
4122 if there is any overlap. Redundant paranoia with casting is there
4123 following what looked distinctly like a bug in gcc-4.1.2, in which
4124 some of the comparisons were done signedly instead of
4125 unsignedly. */
4126 /* Copied from exp-ptrcheck/sg_main.c */
4127 static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
4128 Addr a2, SizeT n2 ) {
4129 UWord a1w = (UWord)a1;
4130 UWord n1w = (UWord)n1;
4131 UWord a2w = (UWord)a2;
4132 UWord n2w = (UWord)n2;
4133 tl_assert(n1w > 0 && n2w > 0);
4134 if (a1w + n1w <= a2w) return -1L;
4135 if (a2w + n2w <= a1w) return 1L;
4136 return 0;
4139 static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
4141 OldRef* ref;
4142 RCEC* rcec;
4143 Word i, j;
4144 UWord keyW, valW;
4145 Bool b;
4147 tl_assert(thr);
4148 ThrID thrid = thr->thrid;
4149 tl_assert(thrid != 0); /* zero is used to denote an empty slot. */
4151 WordSetID locksHeldW = thr->hgthread->locksetW;
4153 rcec = get_RCEC( thr );
4154 ctxt__rcinc(rcec);
4156 UInt szLg2B = 0;
4157 switch (szB) {
4158 /* This doesn't look particularly branch-predictor friendly. */
4159 case 1: szLg2B = 0; break;
4160 case 2: szLg2B = 1; break;
4161 case 4: szLg2B = 2; break;
4162 case 8: szLg2B = 3; break;
4163 default: tl_assert(0);
4166 /* Look in the map to see if we already have a record for this
4167 address. */
4168 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, a );
4170 if (b) {
4172 /* We already have a record for this address. We now need to
4173 see if we have a stack trace pertaining to this (thrid, R/W,
4174 size) triple. */
4175 tl_assert(keyW == a);
4176 ref = (OldRef*)valW;
4177 tl_assert(ref->magic == OldRef_MAGIC);
4179 for (i = 0; i < N_OLDREF_ACCS; i++) {
4180 if (ref->accs[i].thrid != thrid)
4181 continue;
4182 if (ref->accs[i].szLg2B != szLg2B)
4183 continue;
4184 if (ref->accs[i].isW != (UInt)(isW & 1))
4185 continue;
4186 /* else we have a match, so stop looking. */
4187 break;
4190 if (i < N_OLDREF_ACCS) {
4191 /* thread 'thr' has an entry at index 'i'. Update its RCEC. */
4192 if (i > 0) {
4193 Thr_n_RCEC tmp = ref->accs[i-1];
4194 ref->accs[i-1] = ref->accs[i];
4195 ref->accs[i] = tmp;
4196 i--;
4198 if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
4199 stats__ctxt_rcdec1++;
4200 ctxt__rcdec( ref->accs[i].rcec );
4201 tl_assert(ref->accs[i].thrid == thrid);
4202 /* Update the RCEC and the W-held lockset. */
4203 ref->accs[i].rcec = rcec;
4204 ref->accs[i].locksHeldW = locksHeldW;
4205 } else {
4206 /* No entry for this (thread, R/W, size, nWHeld) quad.
4207 Shuffle all of them down one slot, and put the new entry
4208 at the start of the array. */
4209 if (ref->accs[N_OLDREF_ACCS-1].thrid != 0) {
4210 /* the last slot is in use. We must dec the rc on the
4211 associated rcec. */
4212 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
4213 stats__ctxt_rcdec2++;
4214 if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
4215 VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
4216 ctxt__rcdec( ref->accs[N_OLDREF_ACCS-1].rcec );
4217 } else {
4218 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
4220 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
4221 ref->accs[j] = ref->accs[j-1];
4222 ref->accs[0].thrid = thrid;
4223 ref->accs[0].szLg2B = szLg2B;
4224 ref->accs[0].isW = (UInt)(isW & 1);
4225 ref->accs[0].locksHeldW = locksHeldW;
4226 ref->accs[0].rcec = rcec;
4227 /* thrid==0 is used to signify an empty slot, so we can't
4228 add zero thrid (such a ThrID is invalid anyway). */
4229 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
4232 ref->gen = oldrefGen;
4234 } else {
4236 /* We don't have a record for this address. Create a new one. */
4237 if (oldrefTreeN >= oldrefGenIncAt) {
4238 oldrefGen++;
4239 oldrefGenIncAt = oldrefTreeN + 50000;
4240 if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
4241 oldrefGen, oldrefTreeN );
4244 ref = alloc_OldRef();
4245 ref->magic = OldRef_MAGIC;
4246 ref->gen = oldrefGen;
4247 ref->accs[0].thrid = thrid;
4248 ref->accs[0].szLg2B = szLg2B;
4249 ref->accs[0].isW = (UInt)(isW & 1);
4250 ref->accs[0].locksHeldW = locksHeldW;
4251 ref->accs[0].rcec = rcec;
4253 /* thrid==0 is used to signify an empty slot, so we can't
4254 add zero thrid (such a ThrID is invalid anyway). */
4255 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
4257 /* Clear out the rest of the entries */
4258 for (j = 1; j < N_OLDREF_ACCS; j++) {
4259 ref->accs[j].rcec = NULL;
4260 ref->accs[j].thrid = 0;
4261 ref->accs[j].szLg2B = 0;
4262 ref->accs[j].isW = 0;
4263 ref->accs[j].locksHeldW = 0;
4265 VG_(addToSWA)( oldrefTree, a, (UWord)ref );
4266 oldrefTreeN++;
4272 /* Extract info from the conflicting-access machinery. */
4273 Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
4274 /*OUT*/Thr** resThr,
4275 /*OUT*/SizeT* resSzB,
4276 /*OUT*/Bool* resIsW,
4277 /*OUT*/WordSetID* locksHeldW,
4278 Thr* thr, Addr a, SizeT szB, Bool isW )
4280 Word i, j;
4281 OldRef* ref;
4282 UWord keyW, valW;
4283 Bool b;
4285 ThrID cand_thrid;
4286 RCEC* cand_rcec;
4287 Bool cand_isW;
4288 SizeT cand_szB;
4289 WordSetID cand_locksHeldW;
4290 Addr cand_a;
4292 Addr toCheck[15];
4293 Int nToCheck = 0;
4295 tl_assert(thr);
4296 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
4298 ThrID thrid = thr->thrid;
4300 toCheck[nToCheck++] = a;
4301 for (i = -7; i < (Word)szB; i++) {
4302 if (i != 0)
4303 toCheck[nToCheck++] = a + i;
4305 tl_assert(nToCheck <= 15);
4307 /* Now see if we can find a suitable matching event for
4308 any of the addresses in toCheck[0 .. nToCheck-1]. */
4309 for (j = 0; j < nToCheck; j++) {
4311 cand_a = toCheck[j];
4312 // VG_(printf)("test %ld %p\n", j, cand_a);
4314 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, cand_a );
4315 if (!b)
4316 continue;
4318 ref = (OldRef*)valW;
4319 tl_assert(keyW == cand_a);
4320 tl_assert(ref->magic == OldRef_MAGIC);
4321 tl_assert(ref->accs[0].thrid != 0); /* first slot must always be used */
4323 cand_thrid = 0; /* invalid; see comments in event_map_bind */
4324 cand_rcec = NULL;
4325 cand_isW = False;
4326 cand_szB = 0;
4327 cand_locksHeldW = 0; /* always valid; see initialise_data_structures() */
4329 for (i = 0; i < N_OLDREF_ACCS; i++) {
4330 Thr_n_RCEC* cand = &ref->accs[i];
4331 cand_rcec = cand->rcec;
4332 cand_thrid = cand->thrid;
4333 cand_isW = (Bool)cand->isW;
4334 cand_szB = 1 << cand->szLg2B;
4335 cand_locksHeldW = cand->locksHeldW;
4337 if (cand_thrid == 0)
4338 /* This slot isn't in use. Ignore it. */
4339 continue;
4341 if (cand_thrid == thrid)
4342 /* This is an access by the same thread, but we're only
4343 interested in accesses from other threads. Ignore. */
4344 continue;
4346 if ((!cand_isW) && (!isW))
4347 /* We don't want to report a read racing against another
4348 read; that's stupid. So in this case move on. */
4349 continue;
4351 if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
4352 /* No overlap with the access we're asking about. Ignore. */
4353 continue;
4355 /* We have a match. Stop searching. */
4356 break;
4359 tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
4361 if (i < N_OLDREF_ACCS) {
4362 Int n, maxNFrames;
4363 /* return with success */
4364 tl_assert(cand_thrid);
4365 tl_assert(cand_rcec);
4366 tl_assert(cand_rcec->magic == RCEC_MAGIC);
4367 tl_assert(cand_szB >= 1);
4368 /* Count how many non-zero frames we have. */
4369 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
4370 for (n = 0; n < maxNFrames; n++) {
4371 if (0 == cand_rcec->frames[n]) break;
4373 *resEC = VG_(make_ExeContext_from_StackTrace)
4374 (cand_rcec->frames, n);
4375 *resThr = Thr__from_ThrID(cand_thrid);
4376 *resSzB = cand_szB;
4377 *resIsW = cand_isW;
4378 *locksHeldW = cand_locksHeldW;
4379 return True;
4382 /* consider next address in toCheck[] */
4383 } /* for (j = 0; j < nToCheck; j++) */
4385 /* really didn't find anything. */
4386 return False;
4389 static void event_map_init ( void )
4391 Word i;
4393 /* Context (RCEC) pool allocator */
4394 rcec_pool_allocator = VG_(newPA) (
4395 sizeof(RCEC),
4396 1000 /* RCECs per pool */,
4397 HG_(zalloc),
4398 "libhb.event_map_init.1 (RCEC pools)",
4399 HG_(free)
4402 /* Context table */
4403 tl_assert(!contextTab);
4404 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
4405 N_RCEC_TAB * sizeof(RCEC*) );
4406 for (i = 0; i < N_RCEC_TAB; i++)
4407 contextTab[i] = NULL;
4409 /* Oldref pool allocator */
4410 oldref_pool_allocator = VG_(newPA)(
4411 sizeof(OldRef),
4412 1000 /* OldRefs per pool */,
4413 HG_(zalloc),
4414 "libhb.event_map_init.3 (OldRef pools)",
4415 HG_(free)
4418 /* Oldref tree */
4419 tl_assert(!oldrefTree);
4420 oldrefTree = VG_(newSWA)(
4421 HG_(zalloc),
4422 "libhb.event_map_init.4 (oldref tree)",
4423 HG_(free)
4426 oldrefGen = 0;
4427 oldrefGenIncAt = 0;
4428 oldrefTreeN = 0;
4431 static void event_map__check_reference_counts ( Bool before )
4433 RCEC* rcec;
4434 OldRef* oldref;
4435 Word i;
4436 UWord nEnts = 0;
4437 UWord keyW, valW;
4439 /* Set the 'check' reference counts to zero. Also, optionally
4440 check that the real reference counts are non-zero. We allow
4441 these to fall to zero before a GC, but the GC must get rid of
4442 all those that are zero, hence none should be zero after a
4443 GC. */
4444 for (i = 0; i < N_RCEC_TAB; i++) {
4445 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4446 nEnts++;
4447 tl_assert(rcec);
4448 tl_assert(rcec->magic == RCEC_MAGIC);
4449 if (!before)
4450 tl_assert(rcec->rc > 0);
4451 rcec->rcX = 0;
4455 /* check that the stats are sane */
4456 tl_assert(nEnts == stats__ctxt_tab_curr);
4457 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
4459 /* visit all the referencing points, inc check ref counts */
4460 VG_(initIterSWA)( oldrefTree );
4461 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
4462 oldref = (OldRef*)valW;
4463 tl_assert(oldref->magic == OldRef_MAGIC);
4464 for (i = 0; i < N_OLDREF_ACCS; i++) {
4465 ThrID aThrID = oldref->accs[i].thrid;
4466 RCEC* aRef = oldref->accs[i].rcec;
4467 if (aThrID != 0) {
4468 tl_assert(aRef);
4469 tl_assert(aRef->magic == RCEC_MAGIC);
4470 aRef->rcX++;
4471 } else {
4472 tl_assert(!aRef);
4477 /* compare check ref counts with actual */
4478 for (i = 0; i < N_RCEC_TAB; i++) {
4479 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4480 tl_assert(rcec->rc == rcec->rcX);
4485 __attribute__((noinline))
4486 static void event_map_maybe_GC ( void )
4488 OldRef* oldref;
4489 UWord keyW, valW, retained, maxGen;
4490 XArray* refs2del;
4491 Word i, j, n2del;
4493 UWord* genMap = NULL;
4494 UWord genMap_min = 0;
4495 UWord genMap_size = 0;
4497 if (LIKELY(oldrefTreeN < HG_(clo_conflict_cache_size)))
4498 return;
4500 if (0)
4501 VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
4503 /* Check for sane command line params. Limit values must match
4504 those in hg_process_cmd_line_option. */
4505 tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 );
4506 tl_assert( HG_(clo_conflict_cache_size) <= 30*1000*1000 );
4508 /* Check our counting is sane (expensive) */
4509 if (CHECK_CEM)
4510 tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree ));
4512 /* Check the reference counts (expensive) */
4513 if (CHECK_CEM)
4514 event_map__check_reference_counts( True/*before*/ );
4516 /* Compute the distribution of generation values in the ref tree.
4517 There are likely only to be a few different generation numbers
4518 in the whole tree, but we don't know what they are. Hence use a
4519 dynamically resized array of counters. The array is genMap[0
4520 .. genMap_size-1], where genMap[0] is the count for the
4521 generation number genMap_min, genMap[1] is the count for
4522 genMap_min+1, etc. If a new number is seen outside the range
4523 [genMap_min .. genMap_min + genMap_size - 1] then the array is
4524 copied into a larger array, and genMap_min and genMap_size are
4525 adjusted accordingly. */
4527 /* genMap :: generation-number -> count-of-nodes-with-that-number */
4529 VG_(initIterSWA)( oldrefTree );
4530 while ( VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
4532 UWord ea, key;
4533 oldref = (OldRef*)valW;
4534 key = oldref->gen;
4536 /* BEGIN find 'ea', which is the index in genMap holding the
4537 count for generation number 'key'. */
4538 if (UNLIKELY(genMap == NULL)) {
4539 /* deal with the first key to be seen, so that the following
4540 cases don't need to handle the complexity of a NULL count
4541 array. */
4542 genMap_min = key;
4543 genMap_size = 1;
4544 genMap = HG_(zalloc)( "libhb.emmG.1a",
4545 genMap_size * sizeof(UWord) );
4546 ea = 0;
4547 if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n",
4548 key, genMap_min, genMap_min+genMap_size- 1 );
4550 else
4551 if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) {
4552 /* this is the expected (almost-always-happens) case: 'key'
4553 is already mapped in the array. */
4554 ea = key - genMap_min;
4556 else
4557 if (key < genMap_min) {
4558 /* 'key' appears before the start of the current array.
4559 Extend the current array by allocating a larger one and
4560 copying the current one to the upper end of it. */
4561 Word more;
4562 UWord* map2;
4563 more = genMap_min - key;
4564 tl_assert(more > 0);
4565 map2 = HG_(zalloc)( "libhb.emmG.1b",
4566 (genMap_size + more) * sizeof(UWord) );
4567 VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) );
4568 HG_(free)( genMap );
4569 genMap = map2;
4570 genMap_size += more;
4571 genMap_min -= more;
4572 ea = 0;
4573 tl_assert(genMap_min == key);
4574 if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n",
4575 key, genMap_min, genMap_min+genMap_size- 1 );
4577 else {
4578 /* 'key' appears after the end of the current array. Extend
4579 the current array by allocating a larger one and copying
4580 the current one to the lower end of it. */
4581 Word more;
4582 UWord* map2;
4583 tl_assert(key >= genMap_min + genMap_size);
4584 more = key - (genMap_min + genMap_size) + 1;
4585 tl_assert(more > 0);
4586 map2 = HG_(zalloc)( "libhb.emmG.1c",
4587 (genMap_size + more) * sizeof(UWord) );
4588 VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) );
4589 HG_(free)( genMap );
4590 genMap = map2;
4591 genMap_size += more;
4592 ea = genMap_size - 1;;
4593 tl_assert(genMap_min + genMap_size - 1 == key);
4594 if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n",
4595 key, genMap_min, genMap_min+genMap_size- 1 );
4597 /* END find 'ea' from 'key' */
4599 tl_assert(ea >= 0 && ea < genMap_size);
4600 /* and the whole point of this elaborate computation of 'ea' is .. */
4601 genMap[ea]++;
4604 tl_assert(genMap);
4605 tl_assert(genMap_size > 0);
4607 /* Sanity check what we just computed */
4608 { UWord sum = 0;
4609 for (i = 0; i < genMap_size; i++) {
4610 if (0) VG_(printf)(" xxx: gen %ld has %lu\n",
4611 i + genMap_min, genMap[i] );
4612 sum += genMap[i];
4614 tl_assert(sum == oldrefTreeN);
4617 /* Figure out how many generations to throw away */
4618 retained = oldrefTreeN;
4619 maxGen = 0;
4621 for (i = 0; i < genMap_size; i++) {
4622 keyW = i + genMap_min;
4623 valW = genMap[i];
4624 tl_assert(keyW > 0); /* can't allow a generation # 0 */
4625 if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
4626 tl_assert(keyW >= maxGen);
4627 tl_assert(retained >= valW);
4628 if (retained - valW
4629 > (UWord)(HG_(clo_conflict_cache_size)
4630 * EVENT_MAP_GC_DISCARD_FRACTION)) {
4631 retained -= valW;
4632 maxGen = keyW;
4633 } else {
4634 break;
4638 HG_(free)(genMap);
4640 tl_assert(retained >= 0 && retained <= oldrefTreeN);
4642 /* Now make up a big list of the oldrefTree entries we want to
4643 delete. We can't simultaneously traverse the tree and delete
4644 stuff from it, so first we need to copy them off somewhere
4645 else. (sigh) */
4646 refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2",
4647 HG_(free), sizeof(Addr) );
4649 if (retained < oldrefTreeN) {
4651 /* This is the normal (expected) case. We discard any ref whose
4652 generation number <= maxGen. */
4653 VG_(initIterSWA)( oldrefTree );
4654 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
4655 oldref = (OldRef*)valW;
4656 tl_assert(oldref->magic == OldRef_MAGIC);
4657 if (oldref->gen <= maxGen) {
4658 VG_(addToXA)( refs2del, &keyW );
4661 if (VG_(clo_stats)) {
4662 VG_(message)(Vg_DebugMsg,
4663 "libhb: EvM GC: delete generations %lu and below, "
4664 "retaining %lu entries\n",
4665 maxGen, retained );
4668 } else {
4670 static UInt rand_seed = 0; /* leave as static */
4672 /* Degenerate case: there's only one generation in the entire
4673 tree, so we need to have some other way of deciding which
4674 refs to throw away. Just throw out half of them randomly. */
4675 tl_assert(retained == oldrefTreeN);
4676 VG_(initIterSWA)( oldrefTree );
4677 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
4678 UInt n;
4679 oldref = (OldRef*)valW;
4680 tl_assert(oldref->magic == OldRef_MAGIC);
4681 n = VG_(random)( &rand_seed );
4682 if ((n & 0xFFF) < 0x800) {
4683 VG_(addToXA)( refs2del, &keyW );
4684 retained--;
4687 if (VG_(clo_stats)) {
4688 VG_(message)(Vg_DebugMsg,
4689 "libhb: EvM GC: randomly delete half the entries, "
4690 "retaining %lu entries\n",
4691 retained );
4696 n2del = VG_(sizeXA)( refs2del );
4697 tl_assert(n2del == (Word)(oldrefTreeN - retained));
4699 if (0) VG_(printf)("%s","deleting entries\n");
4700 for (i = 0; i < n2del; i++) {
4701 Bool b;
4702 Addr ga2del = *(Addr*)VG_(indexXA)( refs2del, i );
4703 b = VG_(delFromSWA)( oldrefTree, &keyW, &valW, ga2del );
4704 tl_assert(b);
4705 tl_assert(keyW == ga2del);
4706 oldref = (OldRef*)valW;
4707 for (j = 0; j < N_OLDREF_ACCS; j++) {
4708 ThrID aThrID = oldref->accs[j].thrid;
4709 RCEC* aRef = oldref->accs[j].rcec;
4710 if (aRef) {
4711 tl_assert(aThrID != 0);
4712 stats__ctxt_rcdec3++;
4713 ctxt__rcdec( aRef );
4714 } else {
4715 tl_assert(aThrID == 0);
4719 free_OldRef( oldref );
4722 VG_(deleteXA)( refs2del );
4724 tl_assert( VG_(sizeSWA)( oldrefTree ) == retained );
4726 oldrefTreeN = retained;
4727 oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
4729 /* Throw away all RCECs with zero reference counts */
4730 for (i = 0; i < N_RCEC_TAB; i++) {
4731 RCEC** pp = &contextTab[i];
4732 RCEC* p = *pp;
4733 while (p) {
4734 if (p->rc == 0) {
4735 *pp = p->next;
4736 free_RCEC(p);
4737 p = *pp;
4738 tl_assert(stats__ctxt_tab_curr > 0);
4739 stats__ctxt_tab_curr--;
4740 } else {
4741 pp = &p->next;
4742 p = p->next;
4747 /* Check the reference counts (expensive) */
4748 if (CHECK_CEM)
4749 event_map__check_reference_counts( False/*after*/ );
4751 //if (0)
4752 //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
4753 // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
4758 /////////////////////////////////////////////////////////
4759 // //
4760 // Core MSM //
4761 // //
4762 /////////////////////////////////////////////////////////
4764 /* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
4765 Nov 08, and again after [...],
4766 June 09. */
4768 static ULong stats__msmcread = 0;
4769 static ULong stats__msmcread_change = 0;
4770 static ULong stats__msmcwrite = 0;
4771 static ULong stats__msmcwrite_change = 0;
4773 /* Some notes on the H1 history mechanism:
4775 Transition rules are:
4777 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw)
4778 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
4780 After any access by a thread T to a location L, L's constraint pair
4781 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
4783 After a race by thread T conflicting with some previous access by
4784 some other thread U, for a location with constraint (before
4785 processing the later access) (Cr,Cw), then Cw[U] is the segment in
4786 which the previously access lies.
4788 Hence in record_race_info, we pass in Cfailed and Kfailed, which
4789 are compared so as to find out which thread(s) this access
4790 conflicts with. Once that is established, we also require the
4791 pre-update Cw for the location, so we can index into it for those
4792 threads, to get the scalar clock values for the point at which the
4793 former accesses were made. (In fact we only bother to do any of
4794 this for an arbitrarily chosen one of the conflicting threads, as
4795 that's simpler, it avoids flooding the user with vast amounts of
4796 mostly useless information, and because the program is wrong if it
4797 contains any races at all -- so we don't really need to show all
4798 conflicting access pairs initially, so long as we only show none if
4799 none exist).
4803 That requires the auxiliary proof that
4805 (Cr `join` Kw)[T] == Kw[T]
4807 Why should that be true? Because for any thread T, Kw[T] >= the
4808 scalar clock value for T known by any other thread. In other
4809 words, because T's value for its own scalar clock is at least as up
4810 to date as the value for it known by any other thread (that is true
4811 for both the R- and W- scalar clocks). Hence no other thread will
4812 be able to feed in a value for that element (indirectly via a
4813 constraint) which will exceed Kw[T], and hence the join cannot
4814 cause that particular element to advance.
4817 __attribute__((noinline))
4818 static void record_race_info ( Thr* acc_thr,
4819 Addr acc_addr, SizeT szB, Bool isWrite,
4820 VtsID Cfailed,
4821 VtsID Kfailed,
4822 VtsID Cw )
4824 /* Call here to report a race. We just hand it onwards to
4825 HG_(record_error_Race). If that in turn discovers that the
4826 error is going to be collected, then, at history_level 2, that
4827 queries the conflicting-event map. The alternative would be to
4828 query it right here. But that causes a lot of pointless queries
4829 for errors which will shortly be discarded as duplicates, and
4830 can become a performance overhead; so we defer the query until
4831 we know the error is not a duplicate. */
4833 /* Stacks for the bounds of the (or one of the) conflicting
4834 segment(s). These are only set at history_level 1. */
4835 ExeContext* hist1_seg_start = NULL;
4836 ExeContext* hist1_seg_end = NULL;
4837 Thread* hist1_conf_thr = NULL;
4839 tl_assert(acc_thr);
4840 tl_assert(acc_thr->hgthread);
4841 tl_assert(acc_thr->hgthread->hbthr == acc_thr);
4842 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
4844 if (HG_(clo_history_level) == 1) {
4845 Bool found;
4846 Word firstIx, lastIx;
4847 ULong_n_EC key;
4849 /* At history_level 1, we must round up the relevant stack-pair
4850 for the conflicting segment right now. This is because
4851 deferring it is complex; we can't (easily) put Kfailed and
4852 Cfailed into the XError and wait for later without
4853 getting tied up in difficulties with VtsID reference
4854 counting. So just do it now. */
4855 Thr* confThr;
4856 ULong confTym = 0;
4857 /* Which thread are we in conflict with? There may be more than
4858 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
4859 (in fact it's the one with the lowest Thr* value). */
4860 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
4861 /* This must exist! since if it was NULL then there's no
4862 conflict (semantics of return value of
4863 VtsID__findFirst_notLEQ), and msmc{read,write}, which has
4864 called us, just checked exactly this -- that there was in
4865 fact a race. */
4866 tl_assert(confThr);
4868 /* Get the scalar clock value that the conflicting thread
4869 introduced into the constraint. A careful examination of the
4870 base machine rules shows that this must be the same as the
4871 conflicting thread's scalar clock when it created this
4872 constraint. Hence we know the scalar clock of the
4873 conflicting thread when the conflicting access was made. */
4874 confTym = VtsID__indexAt( Cfailed, confThr );
4876 /* Using this scalar clock, index into the conflicting thread's
4877 collection of stack traces made each time its vector clock
4878 (hence its scalar clock) changed. This gives the stack
4879 traces at the start and end of the conflicting segment (well,
4880 as per comment just above, of one of the conflicting
4881 segments, if there are more than one). */
4882 key.ull = confTym;
4883 key.ec = NULL;
4884 /* tl_assert(confThr); -- asserted just above */
4885 tl_assert(confThr->local_Kws_n_stacks);
4886 firstIx = lastIx = 0;
4887 found = VG_(lookupXA_UNSAFE)(
4888 confThr->local_Kws_n_stacks,
4889 &key, &firstIx, &lastIx,
4890 (XACmpFn_t)cmp__ULong_n_EC__by_ULong
4892 if (0) VG_(printf)("record_race_info %u %u %u confThr %p "
4893 "confTym %llu found %d (%lu,%lu)\n",
4894 Cfailed, Kfailed, Cw,
4895 confThr, confTym, found, firstIx, lastIx);
4896 /* We can't indefinitely collect stack traces at VTS
4897 transitions, since we'd eventually run out of memory. Hence
4898 note_local_Kw_n_stack_for will eventually throw away old
4899 ones, which in turn means we might fail to find index value
4900 confTym in the array. */
4901 if (found) {
4902 ULong_n_EC *pair_start, *pair_end;
4903 pair_start
4904 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
4905 hist1_seg_start = pair_start->ec;
4906 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
4907 pair_end
4908 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
4909 lastIx+1 );
4910 /* from properties of VG_(lookupXA) and the comparison fn used: */
4911 tl_assert(pair_start->ull < pair_end->ull);
4912 hist1_seg_end = pair_end->ec;
4913 /* Could do a bit better here. It may be that pair_end
4914 doesn't have a stack, but the following entries in the
4915 array have the same scalar Kw and to have a stack. So
4916 we should search a bit further along the array than
4917 lastIx+1 if hist1_seg_end is NULL. */
4918 } else {
4919 if (!confThr->llexit_done)
4920 hist1_seg_end = main_get_EC( confThr );
4922 // seg_start could be NULL iff this is the first stack in the thread
4923 //if (seg_start) VG_(pp_ExeContext)(seg_start);
4924 //if (seg_end) VG_(pp_ExeContext)(seg_end);
4925 hist1_conf_thr = confThr->hgthread;
4929 HG_(record_error_Race)( acc_thr->hgthread, acc_addr,
4930 szB, isWrite,
4931 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
4934 static Bool is_sane_SVal_C ( SVal sv ) {
4935 Bool leq;
4936 if (!SVal__isC(sv)) return True;
4937 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
4938 return leq;
4942 /* Compute new state following a read */
4943 static inline SVal msmcread ( SVal svOld,
4944 /* The following are only needed for
4945 creating error reports. */
4946 Thr* acc_thr,
4947 Addr acc_addr, SizeT szB )
4949 SVal svNew = SVal_INVALID;
4950 stats__msmcread++;
4952 /* Redundant sanity check on the constraints */
4953 if (CHECK_MSM) {
4954 tl_assert(is_sane_SVal_C(svOld));
4957 if (LIKELY(SVal__isC(svOld))) {
4958 VtsID tviR = acc_thr->viR;
4959 VtsID tviW = acc_thr->viW;
4960 VtsID rmini = SVal__unC_Rmin(svOld);
4961 VtsID wmini = SVal__unC_Wmin(svOld);
4962 Bool leq = VtsID__cmpLEQ(rmini,tviR);
4963 if (LIKELY(leq)) {
4964 /* no race */
4965 /* Note: RWLOCK subtlety: use tviW, not tviR */
4966 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4967 goto out;
4968 } else {
4969 /* assert on sanity of constraints. */
4970 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
4971 tl_assert(leqxx);
4972 // same as in non-race case
4973 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4974 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
4975 rmini, /* Cfailed */
4976 tviR, /* Kfailed */
4977 wmini /* Cw */ );
4978 goto out;
4981 if (SVal__isA(svOld)) {
4982 /* reading no-access memory (sigh); leave unchanged */
4983 /* check for no pollution */
4984 tl_assert(svOld == SVal_NOACCESS);
4985 svNew = SVal_NOACCESS;
4986 goto out;
4988 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
4989 tl_assert(0);
4991 out:
4992 if (CHECK_MSM) {
4993 tl_assert(is_sane_SVal_C(svNew));
4995 if (UNLIKELY(svNew != svOld)) {
4996 tl_assert(svNew != SVal_INVALID);
4997 if (HG_(clo_history_level) >= 2
4998 && SVal__isC(svOld) && SVal__isC(svNew)) {
4999 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
5000 stats__msmcread_change++;
5003 return svNew;
5007 /* Compute new state following a write */
5008 static inline SVal msmcwrite ( SVal svOld,
5009 /* The following are only needed for
5010 creating error reports. */
5011 Thr* acc_thr,
5012 Addr acc_addr, SizeT szB )
5014 SVal svNew = SVal_INVALID;
5015 stats__msmcwrite++;
5017 /* Redundant sanity check on the constraints */
5018 if (CHECK_MSM) {
5019 tl_assert(is_sane_SVal_C(svOld));
5022 if (LIKELY(SVal__isC(svOld))) {
5023 VtsID tviW = acc_thr->viW;
5024 VtsID wmini = SVal__unC_Wmin(svOld);
5025 Bool leq = VtsID__cmpLEQ(wmini,tviW);
5026 if (LIKELY(leq)) {
5027 /* no race */
5028 svNew = SVal__mkC( tviW, tviW );
5029 goto out;
5030 } else {
5031 VtsID rmini = SVal__unC_Rmin(svOld);
5032 /* assert on sanity of constraints. */
5033 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5034 tl_assert(leqxx);
5035 // same as in non-race case
5036 // proof: in the non-race case, we have
5037 // rmini <= wmini (invar on constraints)
5038 // tviW <= tviR (invar on thread clocks)
5039 // wmini <= tviW (from run-time check)
5040 // hence from transitivity of <= we have
5041 // rmini <= wmini <= tviW
5042 // and so join(rmini,tviW) == tviW
5043 // and join(wmini,tviW) == tviW
5044 // qed.
5045 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
5046 VtsID__join2(wmini, tviW) );
5047 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
5048 wmini, /* Cfailed */
5049 tviW, /* Kfailed */
5050 wmini /* Cw */ );
5051 goto out;
5054 if (SVal__isA(svOld)) {
5055 /* writing no-access memory (sigh); leave unchanged */
5056 /* check for no pollution */
5057 tl_assert(svOld == SVal_NOACCESS);
5058 svNew = SVal_NOACCESS;
5059 goto out;
5061 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
5062 tl_assert(0);
5064 out:
5065 if (CHECK_MSM) {
5066 tl_assert(is_sane_SVal_C(svNew));
5068 if (UNLIKELY(svNew != svOld)) {
5069 tl_assert(svNew != SVal_INVALID);
5070 if (HG_(clo_history_level) >= 2
5071 && SVal__isC(svOld) && SVal__isC(svNew)) {
5072 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
5073 stats__msmcwrite_change++;
5076 return svNew;
5080 /////////////////////////////////////////////////////////
5081 // //
5082 // Apply core MSM to specific memory locations //
5083 // //
5084 /////////////////////////////////////////////////////////
5086 /*------------- ZSM accesses: 8 bit sapply ------------- */
5088 static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
5089 CacheLine* cl;
5090 UWord cloff, tno, toff;
5091 SVal svOld, svNew;
5092 UShort descr;
5093 stats__cline_cread08s++;
5094 cl = get_cacheline(a);
5095 cloff = get_cacheline_offset(a);
5096 tno = get_treeno(a);
5097 toff = get_tree_offset(a); /* == 0 .. 7 */
5098 descr = cl->descrs[tno];
5099 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5100 SVal* tree = &cl->svals[tno << 3];
5101 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5102 if (CHECK_ZSM)
5103 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5105 svOld = cl->svals[cloff];
5106 svNew = msmcread( svOld, thr,a,1 );
5107 if (CHECK_ZSM)
5108 tl_assert(svNew != SVal_INVALID);
5109 cl->svals[cloff] = svNew;
5112 static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
5113 CacheLine* cl;
5114 UWord cloff, tno, toff;
5115 SVal svOld, svNew;
5116 UShort descr;
5117 stats__cline_cwrite08s++;
5118 cl = get_cacheline(a);
5119 cloff = get_cacheline_offset(a);
5120 tno = get_treeno(a);
5121 toff = get_tree_offset(a); /* == 0 .. 7 */
5122 descr = cl->descrs[tno];
5123 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5124 SVal* tree = &cl->svals[tno << 3];
5125 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5126 if (CHECK_ZSM)
5127 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5129 svOld = cl->svals[cloff];
5130 svNew = msmcwrite( svOld, thr,a,1 );
5131 if (CHECK_ZSM)
5132 tl_assert(svNew != SVal_INVALID);
5133 cl->svals[cloff] = svNew;
5136 /*------------- ZSM accesses: 16 bit sapply ------------- */
5138 static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
5139 CacheLine* cl;
5140 UWord cloff, tno, toff;
5141 SVal svOld, svNew;
5142 UShort descr;
5143 stats__cline_cread16s++;
5144 if (UNLIKELY(!aligned16(a))) goto slowcase;
5145 cl = get_cacheline(a);
5146 cloff = get_cacheline_offset(a);
5147 tno = get_treeno(a);
5148 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5149 descr = cl->descrs[tno];
5150 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5151 if (valid_value_is_below_me_16(descr, toff)) {
5152 goto slowcase;
5153 } else {
5154 SVal* tree = &cl->svals[tno << 3];
5155 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5157 if (CHECK_ZSM)
5158 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5160 svOld = cl->svals[cloff];
5161 svNew = msmcread( svOld, thr,a,2 );
5162 if (CHECK_ZSM)
5163 tl_assert(svNew != SVal_INVALID);
5164 cl->svals[cloff] = svNew;
5165 return;
5166 slowcase: /* misaligned, or must go further down the tree */
5167 stats__cline_16to8splits++;
5168 zsm_sapply08__msmcread( thr, a + 0 );
5169 zsm_sapply08__msmcread( thr, a + 1 );
5172 static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
5173 CacheLine* cl;
5174 UWord cloff, tno, toff;
5175 SVal svOld, svNew;
5176 UShort descr;
5177 stats__cline_cwrite16s++;
5178 if (UNLIKELY(!aligned16(a))) goto slowcase;
5179 cl = get_cacheline(a);
5180 cloff = get_cacheline_offset(a);
5181 tno = get_treeno(a);
5182 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5183 descr = cl->descrs[tno];
5184 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5185 if (valid_value_is_below_me_16(descr, toff)) {
5186 goto slowcase;
5187 } else {
5188 SVal* tree = &cl->svals[tno << 3];
5189 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5191 if (CHECK_ZSM)
5192 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5194 svOld = cl->svals[cloff];
5195 svNew = msmcwrite( svOld, thr,a,2 );
5196 if (CHECK_ZSM)
5197 tl_assert(svNew != SVal_INVALID);
5198 cl->svals[cloff] = svNew;
5199 return;
5200 slowcase: /* misaligned, or must go further down the tree */
5201 stats__cline_16to8splits++;
5202 zsm_sapply08__msmcwrite( thr, a + 0 );
5203 zsm_sapply08__msmcwrite( thr, a + 1 );
5206 /*------------- ZSM accesses: 32 bit sapply ------------- */
5208 static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
5209 CacheLine* cl;
5210 UWord cloff, tno, toff;
5211 SVal svOld, svNew;
5212 UShort descr;
5213 stats__cline_cread32s++;
5214 if (UNLIKELY(!aligned32(a))) goto slowcase;
5215 cl = get_cacheline(a);
5216 cloff = get_cacheline_offset(a);
5217 tno = get_treeno(a);
5218 toff = get_tree_offset(a); /* == 0 or 4 */
5219 descr = cl->descrs[tno];
5220 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5221 if (valid_value_is_above_me_32(descr, toff)) {
5222 SVal* tree = &cl->svals[tno << 3];
5223 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5224 } else {
5225 goto slowcase;
5227 if (CHECK_ZSM)
5228 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5230 svOld = cl->svals[cloff];
5231 svNew = msmcread( svOld, thr,a,4 );
5232 if (CHECK_ZSM)
5233 tl_assert(svNew != SVal_INVALID);
5234 cl->svals[cloff] = svNew;
5235 return;
5236 slowcase: /* misaligned, or must go further down the tree */
5237 stats__cline_32to16splits++;
5238 zsm_sapply16__msmcread( thr, a + 0 );
5239 zsm_sapply16__msmcread( thr, a + 2 );
5242 static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
5243 CacheLine* cl;
5244 UWord cloff, tno, toff;
5245 SVal svOld, svNew;
5246 UShort descr;
5247 stats__cline_cwrite32s++;
5248 if (UNLIKELY(!aligned32(a))) goto slowcase;
5249 cl = get_cacheline(a);
5250 cloff = get_cacheline_offset(a);
5251 tno = get_treeno(a);
5252 toff = get_tree_offset(a); /* == 0 or 4 */
5253 descr = cl->descrs[tno];
5254 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5255 if (valid_value_is_above_me_32(descr, toff)) {
5256 SVal* tree = &cl->svals[tno << 3];
5257 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5258 } else {
5259 goto slowcase;
5261 if (CHECK_ZSM)
5262 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5264 svOld = cl->svals[cloff];
5265 svNew = msmcwrite( svOld, thr,a,4 );
5266 if (CHECK_ZSM)
5267 tl_assert(svNew != SVal_INVALID);
5268 cl->svals[cloff] = svNew;
5269 return;
5270 slowcase: /* misaligned, or must go further down the tree */
5271 stats__cline_32to16splits++;
5272 zsm_sapply16__msmcwrite( thr, a + 0 );
5273 zsm_sapply16__msmcwrite( thr, a + 2 );
5276 /*------------- ZSM accesses: 64 bit sapply ------------- */
5278 static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
5279 CacheLine* cl;
5280 UWord cloff, tno;
5281 //UWord toff;
5282 SVal svOld, svNew;
5283 UShort descr;
5284 stats__cline_cread64s++;
5285 if (UNLIKELY(!aligned64(a))) goto slowcase;
5286 cl = get_cacheline(a);
5287 cloff = get_cacheline_offset(a);
5288 tno = get_treeno(a);
5289 //toff = get_tree_offset(a); /* == 0, unused */
5290 descr = cl->descrs[tno];
5291 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5292 goto slowcase;
5294 svOld = cl->svals[cloff];
5295 svNew = msmcread( svOld, thr,a,8 );
5296 if (CHECK_ZSM)
5297 tl_assert(svNew != SVal_INVALID);
5298 cl->svals[cloff] = svNew;
5299 return;
5300 slowcase: /* misaligned, or must go further down the tree */
5301 stats__cline_64to32splits++;
5302 zsm_sapply32__msmcread( thr, a + 0 );
5303 zsm_sapply32__msmcread( thr, a + 4 );
5306 static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
5307 CacheLine* cl;
5308 UWord cloff, tno;
5309 //UWord toff;
5310 SVal svOld, svNew;
5311 UShort descr;
5312 stats__cline_cwrite64s++;
5313 if (UNLIKELY(!aligned64(a))) goto slowcase;
5314 cl = get_cacheline(a);
5315 cloff = get_cacheline_offset(a);
5316 tno = get_treeno(a);
5317 //toff = get_tree_offset(a); /* == 0, unused */
5318 descr = cl->descrs[tno];
5319 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5320 goto slowcase;
5322 svOld = cl->svals[cloff];
5323 svNew = msmcwrite( svOld, thr,a,8 );
5324 if (CHECK_ZSM)
5325 tl_assert(svNew != SVal_INVALID);
5326 cl->svals[cloff] = svNew;
5327 return;
5328 slowcase: /* misaligned, or must go further down the tree */
5329 stats__cline_64to32splits++;
5330 zsm_sapply32__msmcwrite( thr, a + 0 );
5331 zsm_sapply32__msmcwrite( thr, a + 4 );
5334 /*--------------- ZSM accesses: 8 bit swrite --------------- */
5336 static
5337 void zsm_swrite08 ( Addr a, SVal svNew ) {
5338 CacheLine* cl;
5339 UWord cloff, tno, toff;
5340 UShort descr;
5341 stats__cline_swrite08s++;
5342 cl = get_cacheline(a);
5343 cloff = get_cacheline_offset(a);
5344 tno = get_treeno(a);
5345 toff = get_tree_offset(a); /* == 0 .. 7 */
5346 descr = cl->descrs[tno];
5347 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5348 SVal* tree = &cl->svals[tno << 3];
5349 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5350 if (CHECK_ZSM)
5351 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5353 tl_assert(svNew != SVal_INVALID);
5354 cl->svals[cloff] = svNew;
5357 /*--------------- ZSM accesses: 16 bit swrite --------------- */
5359 static
5360 void zsm_swrite16 ( Addr a, SVal svNew ) {
5361 CacheLine* cl;
5362 UWord cloff, tno, toff;
5363 UShort descr;
5364 stats__cline_swrite16s++;
5365 if (UNLIKELY(!aligned16(a))) goto slowcase;
5366 cl = get_cacheline(a);
5367 cloff = get_cacheline_offset(a);
5368 tno = get_treeno(a);
5369 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5370 descr = cl->descrs[tno];
5371 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5372 if (valid_value_is_below_me_16(descr, toff)) {
5373 /* Writing at this level. Need to fix up 'descr'. */
5374 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
5375 /* At this point, the tree does not match cl->descr[tno] any
5376 more. The assignments below will fix it up. */
5377 } else {
5378 /* We can't indiscriminately write on the w16 node as in the
5379 w64 case, as that might make the node inconsistent with
5380 its parent. So first, pull down to this level. */
5381 SVal* tree = &cl->svals[tno << 3];
5382 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5383 if (CHECK_ZSM)
5384 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5387 tl_assert(svNew != SVal_INVALID);
5388 cl->svals[cloff + 0] = svNew;
5389 cl->svals[cloff + 1] = SVal_INVALID;
5390 return;
5391 slowcase: /* misaligned */
5392 stats__cline_16to8splits++;
5393 zsm_swrite08( a + 0, svNew );
5394 zsm_swrite08( a + 1, svNew );
5397 /*--------------- ZSM accesses: 32 bit swrite --------------- */
5399 static
5400 void zsm_swrite32 ( Addr a, SVal svNew ) {
5401 CacheLine* cl;
5402 UWord cloff, tno, toff;
5403 UShort descr;
5404 stats__cline_swrite32s++;
5405 if (UNLIKELY(!aligned32(a))) goto slowcase;
5406 cl = get_cacheline(a);
5407 cloff = get_cacheline_offset(a);
5408 tno = get_treeno(a);
5409 toff = get_tree_offset(a); /* == 0 or 4 */
5410 descr = cl->descrs[tno];
5411 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5412 if (valid_value_is_above_me_32(descr, toff)) {
5413 /* We can't indiscriminately write on the w32 node as in the
5414 w64 case, as that might make the node inconsistent with
5415 its parent. So first, pull down to this level. */
5416 SVal* tree = &cl->svals[tno << 3];
5417 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5418 if (CHECK_ZSM)
5419 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5420 } else {
5421 /* Writing at this level. Need to fix up 'descr'. */
5422 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
5423 /* At this point, the tree does not match cl->descr[tno] any
5424 more. The assignments below will fix it up. */
5427 tl_assert(svNew != SVal_INVALID);
5428 cl->svals[cloff + 0] = svNew;
5429 cl->svals[cloff + 1] = SVal_INVALID;
5430 cl->svals[cloff + 2] = SVal_INVALID;
5431 cl->svals[cloff + 3] = SVal_INVALID;
5432 return;
5433 slowcase: /* misaligned */
5434 stats__cline_32to16splits++;
5435 zsm_swrite16( a + 0, svNew );
5436 zsm_swrite16( a + 2, svNew );
5439 /*--------------- ZSM accesses: 64 bit swrite --------------- */
5441 static
5442 void zsm_swrite64 ( Addr a, SVal svNew ) {
5443 CacheLine* cl;
5444 UWord cloff, tno;
5445 //UWord toff;
5446 stats__cline_swrite64s++;
5447 if (UNLIKELY(!aligned64(a))) goto slowcase;
5448 cl = get_cacheline(a);
5449 cloff = get_cacheline_offset(a);
5450 tno = get_treeno(a);
5451 //toff = get_tree_offset(a); /* == 0, unused */
5452 cl->descrs[tno] = TREE_DESCR_64;
5453 tl_assert(svNew != SVal_INVALID);
5454 cl->svals[cloff + 0] = svNew;
5455 cl->svals[cloff + 1] = SVal_INVALID;
5456 cl->svals[cloff + 2] = SVal_INVALID;
5457 cl->svals[cloff + 3] = SVal_INVALID;
5458 cl->svals[cloff + 4] = SVal_INVALID;
5459 cl->svals[cloff + 5] = SVal_INVALID;
5460 cl->svals[cloff + 6] = SVal_INVALID;
5461 cl->svals[cloff + 7] = SVal_INVALID;
5462 return;
5463 slowcase: /* misaligned */
5464 stats__cline_64to32splits++;
5465 zsm_swrite32( a + 0, svNew );
5466 zsm_swrite32( a + 4, svNew );
5469 /*------------- ZSM accesses: 8 bit sread/scopy ------------- */
5471 static
5472 SVal zsm_sread08 ( Addr a ) {
5473 CacheLine* cl;
5474 UWord cloff, tno, toff;
5475 UShort descr;
5476 stats__cline_sread08s++;
5477 cl = get_cacheline(a);
5478 cloff = get_cacheline_offset(a);
5479 tno = get_treeno(a);
5480 toff = get_tree_offset(a); /* == 0 .. 7 */
5481 descr = cl->descrs[tno];
5482 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5483 SVal* tree = &cl->svals[tno << 3];
5484 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5486 return cl->svals[cloff];
5489 static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
5490 SVal sv;
5491 stats__cline_scopy08s++;
5492 sv = zsm_sread08( src );
5493 zsm_swrite08( dst, sv );
5497 /* Block-copy states (needed for implementing realloc()). Note this
5498 doesn't change the filtering arrangements. The caller of
5499 zsm_scopy_range needs to attend to that. */
5501 static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
5503 SizeT i;
5504 if (len == 0)
5505 return;
5507 /* assert for non-overlappingness */
5508 tl_assert(src+len <= dst || dst+len <= src);
5510 /* To be simple, just copy byte by byte. But so as not to wreck
5511 performance for later accesses to dst[0 .. len-1], normalise
5512 destination lines as we finish with them, and also normalise the
5513 line containing the first and last address. */
5514 for (i = 0; i < len; i++) {
5515 Bool normalise
5516 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
5517 || i == 0 /* first in range */
5518 || i == len-1; /* last in range */
5519 zsm_scopy08( src+i, dst+i, normalise );
5524 /* For setting address ranges to a given value. Has considerable
5525 sophistication so as to avoid generating large numbers of pointless
5526 cache loads/writebacks for large ranges. */
5528 /* Do small ranges in-cache, in the obvious way. */
5529 static
5530 void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
5532 /* fast track a couple of common cases */
5533 if (len == 4 && aligned32(a)) {
5534 zsm_swrite32( a, svNew );
5535 return;
5537 if (len == 8 && aligned64(a)) {
5538 zsm_swrite64( a, svNew );
5539 return;
5542 /* be completely general (but as efficient as possible) */
5543 if (len == 0) return;
5545 if (!aligned16(a) && len >= 1) {
5546 zsm_swrite08( a, svNew );
5547 a += 1;
5548 len -= 1;
5549 tl_assert(aligned16(a));
5551 if (len == 0) return;
5553 if (!aligned32(a) && len >= 2) {
5554 zsm_swrite16( a, svNew );
5555 a += 2;
5556 len -= 2;
5557 tl_assert(aligned32(a));
5559 if (len == 0) return;
5561 if (!aligned64(a) && len >= 4) {
5562 zsm_swrite32( a, svNew );
5563 a += 4;
5564 len -= 4;
5565 tl_assert(aligned64(a));
5567 if (len == 0) return;
5569 if (len >= 8) {
5570 tl_assert(aligned64(a));
5571 while (len >= 8) {
5572 zsm_swrite64( a, svNew );
5573 a += 8;
5574 len -= 8;
5576 tl_assert(aligned64(a));
5578 if (len == 0) return;
5580 if (len >= 4)
5581 tl_assert(aligned32(a));
5582 if (len >= 4) {
5583 zsm_swrite32( a, svNew );
5584 a += 4;
5585 len -= 4;
5587 if (len == 0) return;
5589 if (len >= 2)
5590 tl_assert(aligned16(a));
5591 if (len >= 2) {
5592 zsm_swrite16( a, svNew );
5593 a += 2;
5594 len -= 2;
5596 if (len == 0) return;
5598 if (len >= 1) {
5599 zsm_swrite08( a, svNew );
5600 //a += 1;
5601 len -= 1;
5603 tl_assert(len == 0);
5607 /* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
5608 for larger ranges, try to operate directly on the out-of-cache
5609 representation, rather than dragging lines into the cache,
5610 overwriting them, and forcing them out. This turns out to be an
5611 important performance optimisation.
5613 Note that this doesn't change the filtering arrangements. The
5614 caller of zsm_sset_range needs to attend to that. */
5616 static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
5618 tl_assert(svNew != SVal_INVALID);
5619 stats__cache_make_New_arange += (ULong)len;
5621 if (0 && len > 500)
5622 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
5624 if (0) {
5625 static UWord n_New_in_cache = 0;
5626 static UWord n_New_not_in_cache = 0;
5627 /* tag is 'a' with the in-line offset masked out,
5628 eg a[31]..a[4] 0000 */
5629 Addr tag = a & ~(N_LINE_ARANGE - 1);
5630 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
5631 if (LIKELY(tag == cache_shmem.tags0[wix])) {
5632 n_New_in_cache++;
5633 } else {
5634 n_New_not_in_cache++;
5636 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
5637 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
5638 n_New_in_cache, n_New_not_in_cache );
5641 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
5642 zsm_sset_range_SMALL( a, len, svNew );
5643 } else {
5644 Addr before_start = a;
5645 Addr aligned_start = cacheline_ROUNDUP(a);
5646 Addr after_start = cacheline_ROUNDDN(a + len);
5647 UWord before_len = aligned_start - before_start;
5648 UWord aligned_len = after_start - aligned_start;
5649 UWord after_len = a + len - after_start;
5650 tl_assert(before_start <= aligned_start);
5651 tl_assert(aligned_start <= after_start);
5652 tl_assert(before_len < N_LINE_ARANGE);
5653 tl_assert(after_len < N_LINE_ARANGE);
5654 tl_assert(get_cacheline_offset(aligned_start) == 0);
5655 if (get_cacheline_offset(a) == 0) {
5656 tl_assert(before_len == 0);
5657 tl_assert(a == aligned_start);
5659 if (get_cacheline_offset(a+len) == 0) {
5660 tl_assert(after_len == 0);
5661 tl_assert(after_start == a+len);
5663 if (before_len > 0) {
5664 zsm_sset_range_SMALL( before_start, before_len, svNew );
5666 if (after_len > 0) {
5667 zsm_sset_range_SMALL( after_start, after_len, svNew );
5669 stats__cache_make_New_inZrep += (ULong)aligned_len;
5671 while (1) {
5672 Addr tag;
5673 UWord wix;
5674 if (aligned_start >= after_start)
5675 break;
5676 tl_assert(get_cacheline_offset(aligned_start) == 0);
5677 tag = aligned_start & ~(N_LINE_ARANGE - 1);
5678 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
5679 if (tag == cache_shmem.tags0[wix]) {
5680 UWord i;
5681 for (i = 0; i < N_LINE_ARANGE / 8; i++)
5682 zsm_swrite64( aligned_start + i * 8, svNew );
5683 } else {
5684 UWord i;
5685 Word zix;
5686 SecMap* sm;
5687 LineZ* lineZ;
5688 /* This line is not in the cache. Do not force it in; instead
5689 modify it in-place. */
5690 /* find the Z line to write in and rcdec it or the
5691 associated F line. */
5692 find_Z_for_writing( &sm, &zix, tag );
5693 tl_assert(sm);
5694 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
5695 lineZ = &sm->linesZ[zix];
5696 lineZ->dict[0] = svNew;
5697 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
5698 for (i = 0; i < N_LINE_ARANGE/4; i++)
5699 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
5700 rcinc_LineZ(lineZ);
5702 aligned_start += N_LINE_ARANGE;
5703 aligned_len -= N_LINE_ARANGE;
5705 tl_assert(aligned_start == after_start);
5706 tl_assert(aligned_len == 0);
5711 /////////////////////////////////////////////////////////
5712 // //
5713 // Front-filtering accesses //
5714 // //
5715 /////////////////////////////////////////////////////////
5717 static UWord stats__f_ac = 0;
5718 static UWord stats__f_sk = 0;
5720 #if 0
5721 # define STATS__F_SHOW \
5722 do { \
5723 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
5724 VG_(printf)("filters: ac %lu sk %lu\n", \
5725 stats__f_ac, stats__f_sk); \
5726 } while (0)
5727 #else
5728 # define STATS__F_SHOW /* */
5729 #endif
5731 void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
5732 stats__f_ac++;
5733 STATS__F_SHOW;
5734 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
5735 stats__f_sk++;
5736 return;
5738 zsm_sapply08__msmcwrite(thr, a);
5741 void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
5742 stats__f_ac++;
5743 STATS__F_SHOW;
5744 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
5745 stats__f_sk++;
5746 return;
5748 zsm_sapply16__msmcwrite(thr, a);
5751 void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
5752 stats__f_ac++;
5753 STATS__F_SHOW;
5754 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
5755 stats__f_sk++;
5756 return;
5758 zsm_sapply32__msmcwrite(thr, a);
5761 void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
5762 stats__f_ac++;
5763 STATS__F_SHOW;
5764 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
5765 stats__f_sk++;
5766 return;
5768 zsm_sapply64__msmcwrite(thr, a);
5771 void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
5773 /* fast track a couple of common cases */
5774 if (len == 4 && aligned32(a)) {
5775 zsm_sapply32_f__msmcwrite( thr, a );
5776 return;
5778 if (len == 8 && aligned64(a)) {
5779 zsm_sapply64_f__msmcwrite( thr, a );
5780 return;
5783 /* be completely general (but as efficient as possible) */
5784 if (len == 0) return;
5786 if (!aligned16(a) && len >= 1) {
5787 zsm_sapply08_f__msmcwrite( thr, a );
5788 a += 1;
5789 len -= 1;
5790 tl_assert(aligned16(a));
5792 if (len == 0) return;
5794 if (!aligned32(a) && len >= 2) {
5795 zsm_sapply16_f__msmcwrite( thr, a );
5796 a += 2;
5797 len -= 2;
5798 tl_assert(aligned32(a));
5800 if (len == 0) return;
5802 if (!aligned64(a) && len >= 4) {
5803 zsm_sapply32_f__msmcwrite( thr, a );
5804 a += 4;
5805 len -= 4;
5806 tl_assert(aligned64(a));
5808 if (len == 0) return;
5810 if (len >= 8) {
5811 tl_assert(aligned64(a));
5812 while (len >= 8) {
5813 zsm_sapply64_f__msmcwrite( thr, a );
5814 a += 8;
5815 len -= 8;
5817 tl_assert(aligned64(a));
5819 if (len == 0) return;
5821 if (len >= 4)
5822 tl_assert(aligned32(a));
5823 if (len >= 4) {
5824 zsm_sapply32_f__msmcwrite( thr, a );
5825 a += 4;
5826 len -= 4;
5828 if (len == 0) return;
5830 if (len >= 2)
5831 tl_assert(aligned16(a));
5832 if (len >= 2) {
5833 zsm_sapply16_f__msmcwrite( thr, a );
5834 a += 2;
5835 len -= 2;
5837 if (len == 0) return;
5839 if (len >= 1) {
5840 zsm_sapply08_f__msmcwrite( thr, a );
5841 //a += 1;
5842 len -= 1;
5844 tl_assert(len == 0);
5847 void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
5848 stats__f_ac++;
5849 STATS__F_SHOW;
5850 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
5851 stats__f_sk++;
5852 return;
5854 zsm_sapply08__msmcread(thr, a);
5857 void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
5858 stats__f_ac++;
5859 STATS__F_SHOW;
5860 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
5861 stats__f_sk++;
5862 return;
5864 zsm_sapply16__msmcread(thr, a);
5867 void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
5868 stats__f_ac++;
5869 STATS__F_SHOW;
5870 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
5871 stats__f_sk++;
5872 return;
5874 zsm_sapply32__msmcread(thr, a);
5877 void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
5878 stats__f_ac++;
5879 STATS__F_SHOW;
5880 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
5881 stats__f_sk++;
5882 return;
5884 zsm_sapply64__msmcread(thr, a);
5887 void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
5889 /* fast track a couple of common cases */
5890 if (len == 4 && aligned32(a)) {
5891 zsm_sapply32_f__msmcread( thr, a );
5892 return;
5894 if (len == 8 && aligned64(a)) {
5895 zsm_sapply64_f__msmcread( thr, a );
5896 return;
5899 /* be completely general (but as efficient as possible) */
5900 if (len == 0) return;
5902 if (!aligned16(a) && len >= 1) {
5903 zsm_sapply08_f__msmcread( thr, a );
5904 a += 1;
5905 len -= 1;
5906 tl_assert(aligned16(a));
5908 if (len == 0) return;
5910 if (!aligned32(a) && len >= 2) {
5911 zsm_sapply16_f__msmcread( thr, a );
5912 a += 2;
5913 len -= 2;
5914 tl_assert(aligned32(a));
5916 if (len == 0) return;
5918 if (!aligned64(a) && len >= 4) {
5919 zsm_sapply32_f__msmcread( thr, a );
5920 a += 4;
5921 len -= 4;
5922 tl_assert(aligned64(a));
5924 if (len == 0) return;
5926 if (len >= 8) {
5927 tl_assert(aligned64(a));
5928 while (len >= 8) {
5929 zsm_sapply64_f__msmcread( thr, a );
5930 a += 8;
5931 len -= 8;
5933 tl_assert(aligned64(a));
5935 if (len == 0) return;
5937 if (len >= 4)
5938 tl_assert(aligned32(a));
5939 if (len >= 4) {
5940 zsm_sapply32_f__msmcread( thr, a );
5941 a += 4;
5942 len -= 4;
5944 if (len == 0) return;
5946 if (len >= 2)
5947 tl_assert(aligned16(a));
5948 if (len >= 2) {
5949 zsm_sapply16_f__msmcread( thr, a );
5950 a += 2;
5951 len -= 2;
5953 if (len == 0) return;
5955 if (len >= 1) {
5956 zsm_sapply08_f__msmcread( thr, a );
5957 //a += 1;
5958 len -= 1;
5960 tl_assert(len == 0);
5963 void libhb_Thr_resumes ( Thr* thr )
5965 if (0) VG_(printf)("resume %p\n", thr);
5966 tl_assert(thr);
5967 tl_assert(!thr->llexit_done);
5968 Filter__clear(thr->filter, "libhb_Thr_resumes");
5969 /* A kludge, but .. if this thread doesn't have any marker stacks
5970 at all, get one right now. This is easier than figuring out
5971 exactly when at thread startup we can and can't take a stack
5972 snapshot. */
5973 if (HG_(clo_history_level) == 1) {
5974 tl_assert(thr->local_Kws_n_stacks);
5975 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
5976 note_local_Kw_n_stack_for(thr);
5981 /////////////////////////////////////////////////////////
5982 // //
5983 // Synchronisation objects //
5984 // //
5985 /////////////////////////////////////////////////////////
5987 /* A double linked list of all the SO's. */
5988 SO* admin_SO = NULL;
5990 static SO* SO__Alloc ( void )
5992 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
5993 so->viR = VtsID_INVALID;
5994 so->viW = VtsID_INVALID;
5995 so->magic = SO_MAGIC;
5996 /* Add to double linked list */
5997 if (admin_SO) {
5998 tl_assert(admin_SO->admin_prev == NULL);
5999 admin_SO->admin_prev = so;
6000 so->admin_next = admin_SO;
6001 } else {
6002 so->admin_next = NULL;
6004 so->admin_prev = NULL;
6005 admin_SO = so;
6006 /* */
6007 return so;
6010 static void SO__Dealloc ( SO* so )
6012 tl_assert(so);
6013 tl_assert(so->magic == SO_MAGIC);
6014 if (so->viR == VtsID_INVALID) {
6015 tl_assert(so->viW == VtsID_INVALID);
6016 } else {
6017 tl_assert(so->viW != VtsID_INVALID);
6018 VtsID__rcdec(so->viR);
6019 VtsID__rcdec(so->viW);
6021 so->magic = 0;
6022 /* Del from double linked list */
6023 if (so->admin_prev)
6024 so->admin_prev->admin_next = so->admin_next;
6025 if (so->admin_next)
6026 so->admin_next->admin_prev = so->admin_prev;
6027 if (so == admin_SO)
6028 admin_SO = so->admin_next;
6029 /* */
6030 HG_(free)( so );
6034 /////////////////////////////////////////////////////////
6035 // //
6036 // Top Level API //
6037 // //
6038 /////////////////////////////////////////////////////////
6040 static void show_thread_state ( const HChar* str, Thr* t )
6042 if (1) return;
6043 if (t->viR == t->viW) {
6044 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
6045 VtsID__pp( t->viR );
6046 VG_(printf)("%s","\n");
6047 } else {
6048 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
6049 VtsID__pp( t->viR );
6050 VG_(printf)(" viW %u==", t->viW);
6051 VtsID__pp( t->viW );
6052 VG_(printf)("%s","\n");
6057 Thr* libhb_init (
6058 void (*get_stacktrace)( Thr*, Addr*, UWord ),
6059 ExeContext* (*get_EC)( Thr* )
6062 Thr* thr;
6063 VtsID vi;
6065 // We will have to have to store a large number of these,
6066 // so make sure they're the size we expect them to be.
6067 tl_assert(sizeof(ScalarTS) == 8);
6069 /* because first 1024 unusable */
6070 tl_assert(SCALARTS_N_THRBITS >= 11);
6071 /* so as to fit in a UInt w/ 3 bits to spare (see defn of
6072 Thr_n_RCEC). */
6073 tl_assert(SCALARTS_N_THRBITS <= 29);
6075 /* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words
6076 (32-bit). It's not correctness-critical, but there are a lot of
6077 them, so it's important from a space viewpoint. Unfortunately
6078 we simply can't pack it into 2 words on a 32-bit target. */
6079 if (sizeof(UWord) == 8) {
6080 tl_assert(sizeof(Thr_n_RCEC) == 16);
6081 } else {
6082 tl_assert(sizeof(Thr_n_RCEC) == 12);
6085 /* Word sets really are 32 bits. Even on a 64 bit target. */
6086 tl_assert(sizeof(WordSetID) == 4);
6087 tl_assert(sizeof(WordSet) == sizeof(WordSetID));
6089 tl_assert(get_stacktrace);
6090 tl_assert(get_EC);
6091 main_get_stacktrace = get_stacktrace;
6092 main_get_EC = get_EC;
6094 // No need to initialise hg_wordfm.
6095 // No need to initialise hg_wordset.
6097 /* Allocated once and never deallocated. Used as a temporary in
6098 VTS singleton, tick and join operations. */
6099 temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
6100 temp_max_sized_VTS->id = VtsID_INVALID;
6101 verydead_thread_table_init();
6102 vts_set_init();
6103 vts_tab_init();
6104 event_map_init();
6105 VtsID__invalidate_caches();
6107 // initialise shadow memory
6108 zsm_init( SVal__rcinc, SVal__rcdec );
6110 thr = Thr__new();
6111 vi = VtsID__mk_Singleton( thr, 1 );
6112 thr->viR = vi;
6113 thr->viW = vi;
6114 VtsID__rcinc(thr->viR);
6115 VtsID__rcinc(thr->viW);
6117 show_thread_state(" root", thr);
6118 return thr;
6122 Thr* libhb_create ( Thr* parent )
6124 /* The child's VTSs are copies of the parent's VTSs, but ticked at
6125 the child's index. Since the child's index is guaranteed
6126 unique, it has never been seen before, so the implicit value
6127 before the tick is zero and after that is one. */
6128 Thr* child = Thr__new();
6130 child->viR = VtsID__tick( parent->viR, child );
6131 child->viW = VtsID__tick( parent->viW, child );
6132 Filter__clear(child->filter, "libhb_create(child)");
6133 VtsID__rcinc(child->viR);
6134 VtsID__rcinc(child->viW);
6135 /* We need to do note_local_Kw_n_stack_for( child ), but it's too
6136 early for that - it may not have a valid TId yet. So, let
6137 libhb_Thr_resumes pick it up the first time the thread runs. */
6139 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
6140 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
6142 /* and the parent has to move along too */
6143 VtsID__rcdec(parent->viR);
6144 VtsID__rcdec(parent->viW);
6145 parent->viR = VtsID__tick( parent->viR, parent );
6146 parent->viW = VtsID__tick( parent->viW, parent );
6147 Filter__clear(parent->filter, "libhb_create(parent)");
6148 VtsID__rcinc(parent->viR);
6149 VtsID__rcinc(parent->viW);
6150 note_local_Kw_n_stack_for( parent );
6152 show_thread_state(" child", child);
6153 show_thread_state("parent", parent);
6155 return child;
6158 /* Shut down the library, and print stats (in fact that's _all_
6159 this is for. */
6160 void libhb_shutdown ( Bool show_stats )
6162 if (show_stats) {
6163 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
6164 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
6165 stats__secmaps_allocd,
6166 stats__secmap_ga_space_covered);
6167 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
6168 stats__secmap_linesZ_allocd,
6169 stats__secmap_linesZ_bytes);
6170 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
6171 stats__secmap_linesF_allocd,
6172 stats__secmap_linesF_bytes);
6173 VG_(printf)(" secmaps: %'10lu iterator steppings\n",
6174 stats__secmap_iterator_steppings);
6175 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
6176 stats__secmaps_search, stats__secmaps_search_slow);
6178 VG_(printf)("%s","\n");
6179 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
6180 stats__cache_totrefs, stats__cache_totmisses );
6181 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
6182 stats__cache_Z_fetches, stats__cache_F_fetches );
6183 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
6184 stats__cache_Z_wbacks, stats__cache_F_wbacks );
6185 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
6186 stats__cache_invals, stats__cache_flushes );
6187 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
6188 stats__cache_make_New_arange,
6189 stats__cache_make_New_inZrep);
6191 VG_(printf)("%s","\n");
6192 VG_(printf)(" cline: %'10lu normalises\n",
6193 stats__cline_normalises );
6194 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6195 stats__cline_cread64s,
6196 stats__cline_cread32s,
6197 stats__cline_cread16s,
6198 stats__cline_cread08s );
6199 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6200 stats__cline_cwrite64s,
6201 stats__cline_cwrite32s,
6202 stats__cline_cwrite16s,
6203 stats__cline_cwrite08s );
6204 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6205 stats__cline_swrite64s,
6206 stats__cline_swrite32s,
6207 stats__cline_swrite16s,
6208 stats__cline_swrite08s );
6209 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
6210 stats__cline_sread08s, stats__cline_scopy08s );
6211 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
6212 stats__cline_64to32splits,
6213 stats__cline_32to16splits,
6214 stats__cline_16to8splits );
6215 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
6216 stats__cline_64to32pulldown,
6217 stats__cline_32to16pulldown,
6218 stats__cline_16to8pulldown );
6219 if (0)
6220 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
6221 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
6223 VG_(printf)("%s","\n");
6225 VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n",
6226 stats__msmcread, stats__msmcread_change);
6227 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n",
6228 stats__msmcwrite, stats__msmcwrite_change);
6229 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
6230 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
6231 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
6232 stats__join2_queries, stats__join2_misses);
6234 VG_(printf)("%s","\n");
6235 VG_(printf)( " libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n",
6236 stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ );
6237 VG_(printf)( " libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
6238 stats__vts__cmp_structural, stats__vts__cmp_structural_slow );
6239 VG_(printf)( " libhb: VTSset: find__or__clone_and_add %'lu (%'lu allocd)\n",
6240 stats__vts_set__focaa, stats__vts_set__focaa_a );
6241 VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n",
6242 stats__vts__indexat_slow );
6244 VG_(printf)("%s","\n");
6245 VG_(printf)(
6246 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
6247 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
6249 VG_(printf)( " libhb: %lu entries in vts_set\n",
6250 VG_(sizeFM)( vts_set ) );
6252 VG_(printf)("%s","\n");
6253 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
6254 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
6255 stats__ctxt_rcdec2,
6256 stats__ctxt_rcdec3 );
6257 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
6258 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
6259 VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
6260 (UWord)N_RCEC_TAB,
6261 stats__ctxt_tab_curr );
6262 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
6263 stats__ctxt_tab_qs,
6264 stats__ctxt_tab_cmps );
6265 #if 0
6266 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
6267 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
6268 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
6269 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
6270 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
6271 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
6272 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
6273 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
6274 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
6275 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
6276 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
6277 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
6278 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
6279 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
6281 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
6282 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
6283 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
6284 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
6285 #endif
6287 VG_(printf)("%s","<<< END libhb stats >>>\n");
6288 VG_(printf)("%s","\n");
6293 /* Receive notification that a thread has low level exited. The
6294 significance here is that we do not expect to see any more memory
6295 references from it. */
6296 void libhb_async_exit ( Thr* thr )
6298 tl_assert(thr);
6299 tl_assert(!thr->llexit_done);
6300 thr->llexit_done = True;
6302 /* free up Filter and local_Kws_n_stacks (well, actually not the
6303 latter ..) */
6304 tl_assert(thr->filter);
6305 HG_(free)(thr->filter);
6306 thr->filter = NULL;
6308 /* Tell the VTS mechanism this thread has exited, so it can
6309 participate in VTS pruning. Note this can only happen if the
6310 thread has both ll_exited and has been joined with. */
6311 if (thr->joinedwith_done)
6312 VTS__declare_thread_very_dead(thr);
6314 /* Another space-accuracy tradeoff. Do we want to be able to show
6315 H1 history for conflicts in threads which have since exited? If
6316 yes, then we better not free up thr->local_Kws_n_stacks. The
6317 downside is a potential per-thread leak of up to
6318 N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
6319 XArray average overcommit factor is (1.5 I'd guess). */
6320 // hence:
6321 // VG_(deleteXA)(thr->local_Kws_n_stacks);
6322 // thr->local_Kws_n_stacks = NULL;
6325 /* Receive notification that a thread has been joined with. The
6326 significance here is that we do not expect to see any further
6327 references to its vector clocks (Thr::viR and Thr::viW). */
6328 void libhb_joinedwith_done ( Thr* thr )
6330 tl_assert(thr);
6331 /* Caller must ensure that this is only ever called once per Thr. */
6332 tl_assert(!thr->joinedwith_done);
6333 thr->joinedwith_done = True;
6334 if (thr->llexit_done)
6335 VTS__declare_thread_very_dead(thr);
6339 /* Both Segs and SOs point to VTSs. However, there is no sharing, so
6340 a Seg that points at a VTS is its one-and-only owner, and ditto for
6341 a SO that points at a VTS. */
6343 SO* libhb_so_alloc ( void )
6345 return SO__Alloc();
6348 void libhb_so_dealloc ( SO* so )
6350 tl_assert(so);
6351 tl_assert(so->magic == SO_MAGIC);
6352 SO__Dealloc(so);
6355 /* See comments in libhb.h for details on the meaning of
6356 strong vs weak sends and strong vs weak receives. */
6357 void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
6359 /* Copy the VTSs from 'thr' into the sync object, and then move
6360 the thread along one step. */
6362 tl_assert(so);
6363 tl_assert(so->magic == SO_MAGIC);
6365 /* stay sane .. a thread's read-clock must always lead or be the
6366 same as its write-clock */
6367 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
6368 tl_assert(leq);
6371 /* since we're overwriting the VtsIDs in the SO, we need to drop
6372 any references made by the previous contents thereof */
6373 if (so->viR == VtsID_INVALID) {
6374 tl_assert(so->viW == VtsID_INVALID);
6375 so->viR = thr->viR;
6376 so->viW = thr->viW;
6377 VtsID__rcinc(so->viR);
6378 VtsID__rcinc(so->viW);
6379 } else {
6380 /* In a strong send, we dump any previous VC in the SO and
6381 install the sending thread's VC instead. For a weak send we
6382 must join2 with what's already there. */
6383 tl_assert(so->viW != VtsID_INVALID);
6384 VtsID__rcdec(so->viR);
6385 VtsID__rcdec(so->viW);
6386 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
6387 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
6388 VtsID__rcinc(so->viR);
6389 VtsID__rcinc(so->viW);
6392 /* move both parent clocks along */
6393 VtsID__rcdec(thr->viR);
6394 VtsID__rcdec(thr->viW);
6395 thr->viR = VtsID__tick( thr->viR, thr );
6396 thr->viW = VtsID__tick( thr->viW, thr );
6397 if (!thr->llexit_done) {
6398 Filter__clear(thr->filter, "libhb_so_send");
6399 note_local_Kw_n_stack_for(thr);
6401 VtsID__rcinc(thr->viR);
6402 VtsID__rcinc(thr->viW);
6404 if (strong_send)
6405 show_thread_state("s-send", thr);
6406 else
6407 show_thread_state("w-send", thr);
6410 void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
6412 tl_assert(so);
6413 tl_assert(so->magic == SO_MAGIC);
6415 if (so->viR != VtsID_INVALID) {
6416 tl_assert(so->viW != VtsID_INVALID);
6418 /* Weak receive (basically, an R-acquisition of a R-W lock).
6419 This advances the read-clock of the receiver, but not the
6420 write-clock. */
6421 VtsID__rcdec(thr->viR);
6422 thr->viR = VtsID__join2( thr->viR, so->viR );
6423 VtsID__rcinc(thr->viR);
6425 /* At one point (r10589) it seemed safest to tick the clocks for
6426 the receiving thread after the join. But on reflection, I
6427 wonder if that might cause it to 'overtake' constraints,
6428 which could lead to missing races. So, back out that part of
6429 r10589. */
6430 //VtsID__rcdec(thr->viR);
6431 //thr->viR = VtsID__tick( thr->viR, thr );
6432 //VtsID__rcinc(thr->viR);
6434 /* For a strong receive, we also advance the receiver's write
6435 clock, which means the receive as a whole is essentially
6436 equivalent to a W-acquisition of a R-W lock. */
6437 if (strong_recv) {
6438 VtsID__rcdec(thr->viW);
6439 thr->viW = VtsID__join2( thr->viW, so->viW );
6440 VtsID__rcinc(thr->viW);
6442 /* See comment just above, re r10589. */
6443 //VtsID__rcdec(thr->viW);
6444 //thr->viW = VtsID__tick( thr->viW, thr );
6445 //VtsID__rcinc(thr->viW);
6448 if (thr->filter)
6449 Filter__clear(thr->filter, "libhb_so_recv");
6450 note_local_Kw_n_stack_for(thr);
6452 if (strong_recv)
6453 show_thread_state("s-recv", thr);
6454 else
6455 show_thread_state("w-recv", thr);
6457 } else {
6458 tl_assert(so->viW == VtsID_INVALID);
6459 /* Deal with degenerate case: 'so' has no vts, so there has been
6460 no message posted to it. Just ignore this case. */
6461 show_thread_state("d-recv", thr);
6465 Bool libhb_so_everSent ( SO* so )
6467 if (so->viR == VtsID_INVALID) {
6468 tl_assert(so->viW == VtsID_INVALID);
6469 return False;
6470 } else {
6471 tl_assert(so->viW != VtsID_INVALID);
6472 return True;
6476 #define XXX1 0 // 0x67a106c
6477 #define XXX2 0
6479 static inline Bool TRACEME(Addr a, SizeT szB) {
6480 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
6481 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
6482 return False;
6484 static void trace ( Thr* thr, Addr a, SizeT szB, const HChar* s )
6486 SVal sv = zsm_sread08(a);
6487 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
6488 show_thread_state("", thr);
6489 VG_(printf)("%s","\n");
6492 void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
6494 SVal sv = SVal__mkC(thr->viW, thr->viW);
6495 tl_assert(is_sane_SVal_C(sv));
6496 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
6497 zsm_sset_range( a, szB, sv );
6498 Filter__clear_range( thr->filter, a, szB );
6499 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
6502 void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB )
6504 /* do nothing */
6507 void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB )
6509 /* This really does put the requested range in NoAccess. It's
6510 expensive though. */
6511 SVal sv = SVal_NOACCESS;
6512 tl_assert(is_sane_SVal_C(sv));
6513 zsm_sset_range( a, szB, sv );
6514 Filter__clear_range( thr->filter, a, szB );
6517 void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
6519 SVal sv = SVal_NOACCESS;
6520 tl_assert(is_sane_SVal_C(sv));
6521 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
6522 zsm_sset_range( a, szB, sv );
6523 Filter__clear_range( thr->filter, a, szB );
6524 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
6527 Thread* libhb_get_Thr_hgthread ( Thr* thr ) {
6528 tl_assert(thr);
6529 return thr->hgthread;
6532 void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) {
6533 tl_assert(thr);
6534 thr->hgthread = hgthread;
6537 void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
6539 zsm_scopy_range(src, dst, len);
6540 Filter__clear_range( thr->filter, dst, len );
6543 void libhb_maybe_GC ( void )
6545 event_map_maybe_GC();
6546 /* If there are still freelist entries available, no need for a
6547 GC. */
6548 if (vts_tab_freelist != VtsID_INVALID)
6549 return;
6550 /* So all the table entries are full, and we're having to expand
6551 the table. But did we hit the threshhold point yet? */
6552 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
6553 return;
6554 vts_tab__do_GC( False/*don't show stats*/ );
6558 /////////////////////////////////////////////////////////////////
6559 /////////////////////////////////////////////////////////////////
6560 // //
6561 // SECTION END main library //
6562 // //
6563 /////////////////////////////////////////////////////////////////
6564 /////////////////////////////////////////////////////////////////
6566 /*--------------------------------------------------------------------*/
6567 /*--- end libhb_main.c ---*/
6568 /*--------------------------------------------------------------------*/