Update Linux x86 system call number definitions
[valgrind.git] / helgrind / libhb_core.c
blob683c685f24f783fc06cfad70b32d38a3a8376ef4
2 /*--------------------------------------------------------------------*/
3 /*--- LibHB: a library for implementing and checking ---*/
4 /*--- the happens-before relationship in concurrent programs. ---*/
5 /*--- libhb_main.c ---*/
6 /*--------------------------------------------------------------------*/
8 /*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
12 Copyright (C) 2008-2017 OpenWorks Ltd
13 info@open-works.co.uk
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, see <http://www.gnu.org/licenses/>.
28 The GNU General Public License is contained in the file COPYING.
31 #include "pub_tool_basics.h"
32 #include "pub_tool_poolalloc.h"
33 #include "pub_tool_libcassert.h"
34 #include "pub_tool_libcbase.h"
35 #include "pub_tool_libcprint.h"
36 #include "pub_tool_machine.h"
37 #include "pub_tool_mallocfree.h"
38 #include "pub_tool_wordfm.h"
39 #include "pub_tool_hashtable.h"
40 #include "pub_tool_xarray.h"
41 #include "pub_tool_oset.h"
42 #include "pub_tool_threadstate.h"
43 #include "pub_tool_aspacemgr.h"
44 #include "pub_tool_stacktrace.h"
45 #include "pub_tool_execontext.h"
46 #include "pub_tool_errormgr.h"
47 #include "pub_tool_debuginfo.h"
48 #include "pub_tool_gdbserver.h"
49 #include "pub_tool_options.h" // VG_(clo_stats)
50 #include "hg_basics.h"
51 #include "hg_wordset.h"
52 #include "hg_lock_n_thread.h"
53 #include "hg_errors.h"
55 #include "libhb.h"
58 /////////////////////////////////////////////////////////////////
59 /////////////////////////////////////////////////////////////////
60 // //
61 // Debugging #defines //
62 // //
63 /////////////////////////////////////////////////////////////////
64 /////////////////////////////////////////////////////////////////
66 /* Check the sanity of shadow values in the core memory state
67 machine. Change #if 0 to #if 1 to enable this. */
68 #if 0
69 # define CHECK_MSM 1
70 #else
71 # define CHECK_MSM 0
72 #endif
75 /* Check sanity (reference counts, etc) in the conflicting access
76 machinery. Change #if 0 to #if 1 to enable this. */
77 #if 0
78 # define CHECK_CEM 1
79 #else
80 # define CHECK_CEM 0
81 #endif
84 /* Check sanity in the compressed shadow memory machinery,
85 particularly in its caching innards. Unfortunately there's no
86 almost-zero-cost way to make them selectable at run time. Hence
87 set the #if 0 to #if 1 and rebuild if you want them. */
88 #if 0
89 # define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
90 # define inline __attribute__((noinline))
91 /* probably want to ditch -fomit-frame-pointer too */
92 #else
93 # define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
94 #endif
96 /* Define to 1 to activate tracing cached rcec. */
97 #define DEBUG_CACHED_RCEC 0
99 /////////////////////////////////////////////////////////////////
100 /////////////////////////////////////////////////////////////////
101 // //
102 // data decls: VtsID //
103 // //
104 /////////////////////////////////////////////////////////////////
105 /////////////////////////////////////////////////////////////////
107 /* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30
108 bits, since they have to be packed into the lowest 30 bits of an
109 SVal. */
110 typedef UInt VtsID;
111 #define VtsID_INVALID 0xFFFFFFFF
115 /////////////////////////////////////////////////////////////////
116 /////////////////////////////////////////////////////////////////
117 // //
118 // data decls: SVal //
119 // //
120 /////////////////////////////////////////////////////////////////
121 /////////////////////////////////////////////////////////////////
123 typedef ULong SVal;
125 /* This value has special significance to the implementation, and callers
126 may not store it in the shadow memory. */
127 #define SVal_INVALID (3ULL << 62)
129 /* This is the default value for shadow memory. Initially the shadow
130 memory contains no accessible areas and so all reads produce this
131 value. TODO: make this caller-defineable. */
132 #define SVal_NOACCESS (2ULL << 62)
136 /////////////////////////////////////////////////////////////////
137 /////////////////////////////////////////////////////////////////
138 // //
139 // data decls: ScalarTS //
140 // //
141 /////////////////////////////////////////////////////////////////
142 /////////////////////////////////////////////////////////////////
144 /* Scalar Timestamp. We have to store a lot of these, so there is
145 some effort to make them as small as possible. Logically they are
146 a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
147 We pack it into 64 bits by representing the Thr* using a ThrID, a
148 small integer (18 bits), and a 46 bit integer for the timestamp
149 number. The 46/18 split is arbitrary, but has the effect that
150 Helgrind can only handle programs that create 2^18 or fewer threads
151 over their entire lifetime, and have no more than 2^46 timestamp
152 ticks (synchronisation operations on the same thread).
154 This doesn't seem like much of a limitation. 2^46 ticks is
155 7.06e+13, and if each tick (optimistically) takes the machine 1000
156 cycles to process, then the minimum time to process that many ticks
157 at a clock rate of 5 GHz is 162.9 days. And that's doing nothing
158 but VTS ticks, which isn't realistic.
160 NB1: SCALARTS_N_THRBITS must be 27 or lower. The obvious limit is
161 32 since a ThrID is a UInt. 27 comes from the fact that
162 'Thr_n_RCEC', which records information about old accesses, packs
163 in tsw not only a ThrID but also minimum 4+1 other bits (access size
164 and writeness) in a UInt, hence limiting size to 32-(4+1) == 27.
166 NB2: thrid values are issued upwards from 1024, and values less
167 than that aren't valid. This isn't per se necessary (any order
168 will do, so long as they are unique), but it does help ensure they
169 are less likely to get confused with the various other kinds of
170 small-integer thread ids drifting around (eg, TId).
171 So, SCALARTS_N_THRBITS must be 11 or more.
172 See also NB5.
174 NB3: this probably also relies on the fact that Thr's are never
175 deallocated -- they exist forever. Hence the 1-1 mapping from
176 Thr's to thrid values (set up in Thr__new) persists forever.
178 NB4: temp_max_sized_VTS is allocated at startup and never freed.
179 It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
180 ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without
181 making the memory use for this go sky-high. With
182 SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
183 like an OK tradeoff. If more than 256k threads need to be
184 supported, we could change SCALARTS_N_THRBITS to 20, which would
185 facilitate supporting 1 million threads at the cost of 8MB storage
186 for temp_max_sized_VTS.
188 NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
189 ThrID == 0 to denote an empty Thr_n_RCEC record. So ThrID == 0
190 must never be a valid ThrID. Given NB2 that's OK.
192 #define SCALARTS_N_THRBITS 18 /* valid range: 11 to 27 inclusive,
193 See NB1 and NB2 above. */
195 #define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
196 typedef
197 struct {
198 ThrID thrid : SCALARTS_N_THRBITS;
199 ULong tym : SCALARTS_N_TYMBITS;
201 ScalarTS;
203 #define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
207 /////////////////////////////////////////////////////////////////
208 /////////////////////////////////////////////////////////////////
209 // //
210 // data decls: Filter //
211 // //
212 /////////////////////////////////////////////////////////////////
213 /////////////////////////////////////////////////////////////////
215 // baseline: 5, 9
216 #define FI_LINE_SZB_LOG2 5
217 #define FI_NUM_LINES_LOG2 10
219 #define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
220 #define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
222 #define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
223 #define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
225 #define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
226 & (Addr)(FI_NUM_LINES-1) )
229 /* In the lines, each 8 bytes are treated individually, and are mapped
230 to a UShort. Regardless of endianness of the underlying machine,
231 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
232 the highest address.
234 Of each bit pair, the higher numbered bit is set if a R has been
235 seen, so the actual layout is:
237 15 14 ... 01 00
239 R W for addr+7 ... R W for addr+0
241 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
244 /* tags are separated from lines. tags are Addrs and are
245 the base address of the line. */
246 typedef
247 struct {
248 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
250 FiLine;
252 typedef
253 struct {
254 Addr tags[FI_NUM_LINES];
255 FiLine lines[FI_NUM_LINES];
257 Filter;
261 /////////////////////////////////////////////////////////////////
262 /////////////////////////////////////////////////////////////////
263 // //
264 // data decls: Thr, ULong_n_EC //
265 // //
266 /////////////////////////////////////////////////////////////////
267 /////////////////////////////////////////////////////////////////
269 // Records stacks for H1 history mechanism (DRD-style)
270 typedef
271 struct { ULong ull; ExeContext* ec; }
272 ULong_n_EC;
275 /* How many of the above records to collect for each thread? Older
276 ones are dumped when we run out of space. 62.5k requires 1MB per
277 thread, since each ULong_n_EC record is 16 bytes long. When more
278 than N_KWs_N_STACKs_PER_THREAD are present, the older half are
279 deleted to make space. Hence in the worst case we will be able to
280 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
281 Kw transitions (segments in this thread). For the current setting
282 that gives a guaranteed stack for at least the last 31.25k
283 segments. */
284 #define N_KWs_N_STACKs_PER_THREAD 62500
287 #define N_FRAMES 8
288 // (UInt) `echo "Reference Counted Execution Context" | md5sum`
289 #define RCEC_MAGIC 0xab88abb2UL
291 /* RCEC usage is commented more in details in the section 'Change-event map2'
292 later in this file */
293 typedef
294 struct _RCEC {
295 UWord magic; /* sanity check only */
296 struct _RCEC* next;
297 UWord rc;
298 UWord rcX; /* used for crosschecking */
299 UWord frames_hash; /* hash of all the frames */
300 UWord frames[N_FRAMES];
302 RCEC;
304 struct _Thr {
305 /* Current VTSs for this thread. They change as we go along. viR
306 is the VTS to be used for reads, viW for writes. Usually they
307 are the same, but can differ when we deal with reader-writer
308 locks. It is always the case that
309 VtsID__cmpLEQ(viW,viR) == True
310 that is, viW must be the same, or lagging behind, viR. */
311 VtsID viR;
312 VtsID viW;
314 /* Is initially False, and is set to True after the thread really
315 has done a low-level exit. When True, we expect to never see
316 any more memory references done by this thread. */
317 Bool llexit_done;
319 /* Is initially False, and is set to True after the thread has been
320 joined with (reaped by some other thread). After this point, we
321 do not expect to see any uses of .viR or .viW, so it is safe to
322 set them to VtsID_INVALID. */
323 Bool joinedwith_done;
325 /* A small integer giving a unique identity to this Thr. See
326 comments on the definition of ScalarTS for details. */
327 ThrID thrid : SCALARTS_N_THRBITS;
329 /* A filter that removes references for which we believe that
330 msmcread/msmcwrite will not change the state, nor report a
331 race. */
332 Filter* filter;
334 /* A pointer back to the top level Thread structure. There is a
335 1-1 mapping between Thread and Thr structures -- each Thr points
336 at its corresponding Thread, and vice versa. Really, Thr and
337 Thread should be merged into a single structure. */
338 Thread* hgthread;
340 /* cached_rcec maintains the last RCEC that was retrieved for this thread. */
341 RCEC cached_rcec; // cached_rcec value, not ref-counted.
342 /* The shadow register vex_shadow1 SP register (SP_s1) is used to maintain
343 the validity of the cached rcec.
344 If SP_s1 is 0, then the cached rcec is invalid (cannot be used).
345 If SP_S1 is != 0, then the cached rcec is valid. The valid cached rcec
346 can be used to generate a new RCEC by changing just the last frame. */
348 /* The ULongs (scalar Kws) in this accumulate in strictly
349 increasing order, without duplicates. This is important because
350 we need to be able to find a given scalar Kw in this array
351 later, by binary search. */
352 XArray* /* ULong_n_EC */ local_Kws_n_stacks;
357 /////////////////////////////////////////////////////////////////
358 /////////////////////////////////////////////////////////////////
359 // //
360 // data decls: SO //
361 // //
362 /////////////////////////////////////////////////////////////////
363 /////////////////////////////////////////////////////////////////
365 // (UInt) `echo "Synchronisation object" | md5sum`
366 #define SO_MAGIC 0x56b3c5b0U
368 struct _SO {
369 struct _SO* admin_prev;
370 struct _SO* admin_next;
371 VtsID viR; /* r-clock of sender */
372 VtsID viW; /* w-clock of sender */
373 UInt magic;
378 /////////////////////////////////////////////////////////////////
379 /////////////////////////////////////////////////////////////////
380 // //
381 // Forward declarations //
382 // //
383 /////////////////////////////////////////////////////////////////
384 /////////////////////////////////////////////////////////////////
386 /* fwds for
387 Globals needed by other parts of the library. These are set
388 once at startup and then never changed. */
389 static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
390 static ExeContext* (*main_get_EC)( Thr* ) = NULL;
392 /* misc fn and data fwdses */
393 static void VtsID__rcinc ( VtsID ii );
394 static void VtsID__rcdec ( VtsID ii );
396 static inline Bool SVal__isC ( SVal s );
397 static inline VtsID SVal__unC_Rmin ( SVal s );
398 static inline VtsID SVal__unC_Wmin ( SVal s );
399 static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
400 static inline void SVal__rcinc ( SVal s );
401 static inline void SVal__rcdec ( SVal s );
402 /* SVal in LineZ are used to store various pointers. */
403 static inline void *SVal2Ptr (SVal s);
404 static inline SVal Ptr2SVal (void* ptr);
406 /* A double linked list of all the SO's. */
407 SO* admin_SO;
411 /////////////////////////////////////////////////////////////////
412 /////////////////////////////////////////////////////////////////
413 // //
414 // SECTION BEGIN compressed shadow memory //
415 // //
416 /////////////////////////////////////////////////////////////////
417 /////////////////////////////////////////////////////////////////
419 #ifndef __HB_ZSM_H
420 #define __HB_ZSM_H
422 /* Initialise the library. Once initialised, it will (or may) call
423 SVal__rcinc and SVal__rcdec in response to all the calls below, in order to
424 allow the user to do reference counting on the SVals stored herein.
425 It is important to understand, however, that due to internal
426 caching, the reference counts are in general inaccurate, and can be
427 both above or below the true reference count for an item. In
428 particular, the library may indicate that the reference count for
429 an item is zero, when in fact it is not.
431 To make the reference counting exact and therefore non-pointless,
432 call zsm_flush_cache. Immediately after it returns, the reference
433 counts for all items, as deduced by the caller by observing calls
434 to SVal__rcinc and SVal__rcdec, will be correct, and so any items with a
435 zero reference count may be freed (or at least considered to be
436 unreferenced by this library).
438 static void zsm_init ( void );
440 static void zsm_sset_range ( Addr, SizeT, SVal );
441 static void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew );
442 static void zsm_scopy_range ( Addr, Addr, SizeT );
443 static void zsm_flush_cache ( void );
445 #endif /* ! __HB_ZSM_H */
448 /* Round a up to the next multiple of N. N must be a power of 2 */
449 #define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
450 /* Round a down to the next multiple of N. N must be a power of 2 */
451 #define ROUNDDN(a, N) ((a) & ~(N-1))
453 /* True if a belongs in range [start, start + szB[
454 (i.e. start + szB is excluded). */
455 static inline Bool address_in_range (Addr a, Addr start, SizeT szB)
457 /* Checking start <= a && a < start + szB.
458 As start and a are unsigned addresses, the condition can
459 be simplified. */
460 if (CHECK_ZSM)
461 tl_assert ((a - start < szB)
462 == (start <= a
463 && a < start + szB));
464 return a - start < szB;
467 /* ------ CacheLine ------ */
469 #define N_LINE_BITS 6 /* must be >= 3 */
470 #define N_LINE_ARANGE (1 << N_LINE_BITS)
471 #define N_LINE_TREES (N_LINE_ARANGE >> 3)
473 typedef
474 struct {
475 UShort descrs[N_LINE_TREES];
476 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
478 CacheLine;
480 #define TREE_DESCR_16_0 (1<<0)
481 #define TREE_DESCR_32_0 (1<<1)
482 #define TREE_DESCR_16_1 (1<<2)
483 #define TREE_DESCR_64 (1<<3)
484 #define TREE_DESCR_16_2 (1<<4)
485 #define TREE_DESCR_32_1 (1<<5)
486 #define TREE_DESCR_16_3 (1<<6)
487 #define TREE_DESCR_8_0 (1<<7)
488 #define TREE_DESCR_8_1 (1<<8)
489 #define TREE_DESCR_8_2 (1<<9)
490 #define TREE_DESCR_8_3 (1<<10)
491 #define TREE_DESCR_8_4 (1<<11)
492 #define TREE_DESCR_8_5 (1<<12)
493 #define TREE_DESCR_8_6 (1<<13)
494 #define TREE_DESCR_8_7 (1<<14)
495 #define TREE_DESCR_DTY (1<<15)
497 typedef
498 struct {
499 SVal dict[4]; /* can represent up to 4 diff values in the line */
500 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
501 dict indexes */
502 /* if dict[0] == SVal_INVALID then dict[1] is a pointer to the
503 LineF to use, and dict[2..] are also SVal_INVALID. */
505 LineZ; /* compressed rep for a cache line */
507 /* LineZ.dict[1] is used to store various pointers:
508 * In the first lineZ of a free SecMap, it points to the next free SecMap.
509 * In a lineZ for which we need to use a lineF, it points to the lineF. */
512 typedef
513 struct {
514 SVal w64s[N_LINE_ARANGE];
516 LineF; /* full rep for a cache line */
518 /* We use a pool allocator for LineF, as LineF is relatively small,
519 and we will often alloc/release such lines. */
520 static PoolAlloc* LineF_pool_allocator;
522 /* SVal in a lineZ are used to store various pointers.
523 Below are conversion functions to support that. */
524 static inline LineF *LineF_Ptr (LineZ *lineZ)
526 tl_assert(lineZ->dict[0] == SVal_INVALID);
527 return SVal2Ptr (lineZ->dict[1]);
530 /* Shadow memory.
531 Primary map is a WordFM Addr SecMap*.
532 SecMaps cover some page-size-ish section of address space and hold
533 a compressed representation.
534 CacheLine-sized chunks of SecMaps are copied into a Cache, being
535 decompressed when moved into the cache and recompressed on the
536 way out. Because of this, the cache must operate as a writeback
537 cache, not a writethrough one.
539 Each SecMap must hold a power-of-2 number of CacheLines. Hence
540 N_SECMAP_BITS must >= N_LINE_BITS.
542 #define N_SECMAP_BITS 13
543 #define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
545 // # CacheLines held by a SecMap
546 #define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
548 /* The data in the SecMap is held in the array of LineZs. Each LineZ
549 either carries the required data directly, in a compressed
550 representation, or it holds (in .dict[1]) a pointer to a LineF
551 that holds the full representation.
553 As each in-use LineF is referred to by exactly one LineZ,
554 the number of .linesZ[] that refer to a lineF should equal
555 the number of used lineF.
557 RC obligations: the RCs presented to the user include exactly
558 the values in:
559 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
560 * F reps that are in use
562 Hence the following actions at the following transitions are required:
564 F rep: alloc'd -> freed -- rcdec_LineF
565 F rep: -> alloc'd -- rcinc_LineF
566 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
567 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
570 typedef
571 struct {
572 UInt magic;
573 LineZ linesZ[N_SECMAP_ZLINES];
575 SecMap;
577 #define SecMap_MAGIC 0x571e58cbU
579 // (UInt) `echo "Free SecMap" | md5sum`
580 #define SecMap_free_MAGIC 0x5a977f30U
582 __attribute__((unused))
583 static inline Bool is_sane_SecMap ( SecMap* sm ) {
584 return sm != NULL && sm->magic == SecMap_MAGIC;
587 /* ------ Cache ------ */
589 #define N_WAY_BITS 16
590 #define N_WAY_NENT (1 << N_WAY_BITS)
592 /* Each tag is the address of the associated CacheLine, rounded down
593 to a CacheLine address boundary. A CacheLine size must be a power
594 of 2 and must be 8 or more. Hence an easy way to initialise the
595 cache so it is empty is to set all the tag values to any value % 8
596 != 0, eg 1. This means all queries in the cache initially miss.
597 It does however require us to detect and not writeback, any line
598 with a bogus tag. */
599 typedef
600 struct {
601 CacheLine lyns0[N_WAY_NENT];
602 Addr tags0[N_WAY_NENT];
604 Cache;
606 static inline Bool is_valid_scache_tag ( Addr tag ) {
607 /* a valid tag should be naturally aligned to the start of
608 a CacheLine. */
609 return 0 == (tag & (N_LINE_ARANGE - 1));
613 /* --------- Primary data structures --------- */
615 /* Shadow memory primary map */
616 static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
617 static Cache cache_shmem;
620 static UWord stats__secmaps_search = 0; // # SM finds
621 static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
622 static UWord stats__secmaps_allocd = 0; // # SecMaps issued
623 static UWord stats__secmaps_in_map_shmem = 0; // # SecMaps 'live'
624 static UWord stats__secmaps_scanGC = 0; // # nr of scan GC done.
625 static UWord stats__secmaps_scanGCed = 0; // # SecMaps GC-ed via scan
626 static UWord stats__secmaps_ssetGCed = 0; // # SecMaps GC-ed via setnoaccess
627 static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
628 static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
629 static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
630 static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
631 static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
632 static UWord stats__cache_F_fetches = 0; // # F lines fetched
633 static UWord stats__cache_F_wbacks = 0; // # F lines written back
634 static UWord stats__cache_flushes_invals = 0; // # cache flushes and invals
635 static UWord stats__cache_totrefs = 0; // # total accesses
636 static UWord stats__cache_totmisses = 0; // # misses
637 static ULong stats__cache_make_New_arange = 0; // total arange made New
638 static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
639 static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
640 static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
641 static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
642 static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
643 static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
644 static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
645 static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
646 static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
647 static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
648 static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
649 static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
650 static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
651 static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
652 static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
653 static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
654 static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
655 static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
656 static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
657 static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
658 static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
659 static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
660 static UWord stats__vts__tick = 0; // # calls to VTS__tick
661 static UWord stats__vts__join = 0; // # calls to VTS__join
662 static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
663 static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
664 static UWord stats__vts_tab_GC = 0; // # nr of vts_tab GC
665 static UWord stats__vts_pruning = 0; // # nr of vts pruning
667 // # calls to VTS__cmp_structural w/ slow case
668 static UWord stats__vts__cmp_structural_slow = 0;
670 // # calls to VTS__indexAt_SLOW
671 static UWord stats__vts__indexat_slow = 0;
673 // # calls to vts_set__find__or__clone_and_add
674 static UWord stats__vts_set__focaa = 0;
676 // # calls to vts_set__find__or__clone_and_add that lead to an
677 // allocation
678 static UWord stats__vts_set__focaa_a = 0;
681 static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
682 return a & ~(N_SECMAP_ARANGE - 1);
684 static inline UWord shmem__get_SecMap_offset ( Addr a ) {
685 return a & (N_SECMAP_ARANGE - 1);
689 /*----------------------------------------------------------------*/
690 /*--- map_shmem :: WordFM Addr SecMap ---*/
691 /*--- shadow memory (low level handlers) (shmem__* fns) ---*/
692 /*----------------------------------------------------------------*/
694 /*--------------- SecMap allocation --------------- */
696 static HChar* shmem__bigchunk_next = NULL;
697 static HChar* shmem__bigchunk_end1 = NULL;
699 static void* shmem__bigchunk_alloc ( SizeT n )
701 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
702 tl_assert(n > 0);
703 n = VG_ROUNDUP(n, 16);
704 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
705 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
706 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
707 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
708 if (0)
709 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
710 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
711 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
712 if (shmem__bigchunk_next == NULL)
713 VG_(out_of_memory_NORETURN)(
714 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
715 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
717 tl_assert(shmem__bigchunk_next);
718 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
719 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
720 shmem__bigchunk_next += n;
721 return shmem__bigchunk_next - n;
724 /* SecMap changed to be fully SVal_NOACCESS are inserted in a list of
725 recycled SecMap. When a new SecMap is needed, a recycled SecMap
726 will be used in preference to allocating a new SecMap. */
727 /* We make a linked list of SecMap. The first LineZ is re-used to
728 implement the linked list. */
729 /* Returns the SecMap following sm in the free list.
730 NULL if sm is the last SecMap. sm must be on the free list. */
731 static inline SecMap *SecMap_freelist_next ( SecMap* sm )
733 tl_assert (sm);
734 tl_assert (sm->magic == SecMap_free_MAGIC);
735 return SVal2Ptr (sm->linesZ[0].dict[1]);
737 static inline void set_SecMap_freelist_next ( SecMap* sm, SecMap* next )
739 tl_assert (sm);
740 tl_assert (sm->magic == SecMap_free_MAGIC);
741 tl_assert (next == NULL || next->magic == SecMap_free_MAGIC);
742 sm->linesZ[0].dict[1] = Ptr2SVal (next);
745 static SecMap *SecMap_freelist = NULL;
746 static UWord SecMap_freelist_length(void)
748 SecMap *sm;
749 UWord n = 0;
751 sm = SecMap_freelist;
752 while (sm) {
753 n++;
754 sm = SecMap_freelist_next (sm);
756 return n;
759 static void push_SecMap_on_freelist(SecMap* sm)
761 if (0) VG_(message)(Vg_DebugMsg, "%p push\n", sm);
762 sm->magic = SecMap_free_MAGIC;
763 set_SecMap_freelist_next(sm, SecMap_freelist);
764 SecMap_freelist = sm;
766 /* Returns a free SecMap if there is one.
767 Otherwise, returns NULL. */
768 static SecMap *pop_SecMap_from_freelist(void)
770 SecMap *sm;
772 sm = SecMap_freelist;
773 if (sm) {
774 tl_assert (sm->magic == SecMap_free_MAGIC);
775 SecMap_freelist = SecMap_freelist_next (sm);
776 if (0) VG_(message)(Vg_DebugMsg, "%p pop\n", sm);
778 return sm;
781 static SecMap* shmem__alloc_or_recycle_SecMap ( void )
783 Word i, j;
784 SecMap* sm = pop_SecMap_from_freelist();
786 if (!sm) {
787 sm = shmem__bigchunk_alloc( sizeof(SecMap) );
788 stats__secmaps_allocd++;
789 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
790 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
791 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
793 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
794 tl_assert(sm);
795 sm->magic = SecMap_MAGIC;
796 for (i = 0; i < N_SECMAP_ZLINES; i++) {
797 sm->linesZ[i].dict[0] = SVal_NOACCESS;
798 sm->linesZ[i].dict[1] = SVal_INVALID;
799 sm->linesZ[i].dict[2] = SVal_INVALID;
800 sm->linesZ[i].dict[3] = SVal_INVALID;
801 for (j = 0; j < N_LINE_ARANGE/4; j++)
802 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
804 return sm;
807 typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
808 static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
810 static SecMap* shmem__find_SecMap ( Addr ga )
812 SecMap* sm = NULL;
813 Addr gaKey = shmem__round_to_SecMap_base(ga);
814 // Cache
815 stats__secmaps_search++;
816 if (LIKELY(gaKey == smCache[0].gaKey))
817 return smCache[0].sm;
818 if (LIKELY(gaKey == smCache[1].gaKey)) {
819 SMCacheEnt tmp = smCache[0];
820 smCache[0] = smCache[1];
821 smCache[1] = tmp;
822 return smCache[0].sm;
824 if (gaKey == smCache[2].gaKey) {
825 SMCacheEnt tmp = smCache[1];
826 smCache[1] = smCache[2];
827 smCache[2] = tmp;
828 return smCache[1].sm;
830 // end Cache
831 stats__secmaps_search_slow++;
832 if (VG_(lookupFM)( map_shmem,
833 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
834 tl_assert(sm != NULL);
835 smCache[2] = smCache[1];
836 smCache[1] = smCache[0];
837 smCache[0].gaKey = gaKey;
838 smCache[0].sm = sm;
839 } else {
840 tl_assert(sm == NULL);
842 return sm;
845 /* Scan the SecMap and count the SecMap that can be GC-ed.
846 If really, really does the GC of the SecMap. */
847 /* NOT TO BE CALLED FROM WITHIN libzsm. */
848 static UWord next_SecMap_GC_at = 1000;
849 __attribute__((noinline))
850 static UWord shmem__SecMap_do_GC(Bool really)
852 UWord secmapW = 0;
853 Addr gaKey;
854 UWord examined = 0;
855 UWord ok_GCed = 0;
857 /* First invalidate the smCache */
858 smCache[0].gaKey = 1;
859 smCache[1].gaKey = 1;
860 smCache[2].gaKey = 1;
861 STATIC_ASSERT (3 == sizeof(smCache)/sizeof(smCache[0]));
863 VG_(initIterFM)( map_shmem );
864 while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) {
865 UWord i;
866 UWord j;
867 UWord n_linesF = 0;
868 SecMap* sm = (SecMap*)secmapW;
869 tl_assert(sm->magic == SecMap_MAGIC);
870 Bool ok_to_GC = True;
872 examined++;
874 /* Deal with the LineZs and the possible LineF of a LineZ. */
875 for (i = 0; i < N_SECMAP_ZLINES && ok_to_GC; i++) {
876 LineZ* lineZ = &sm->linesZ[i];
877 if (lineZ->dict[0] != SVal_INVALID) {
878 ok_to_GC = lineZ->dict[0] == SVal_NOACCESS
879 && !SVal__isC (lineZ->dict[1])
880 && !SVal__isC (lineZ->dict[2])
881 && !SVal__isC (lineZ->dict[3]);
882 } else {
883 LineF *lineF = LineF_Ptr(lineZ);
884 n_linesF++;
885 for (j = 0; j < N_LINE_ARANGE && ok_to_GC; j++)
886 ok_to_GC = lineF->w64s[j] == SVal_NOACCESS;
889 if (ok_to_GC)
890 ok_GCed++;
891 if (ok_to_GC && really) {
892 SecMap *fm_sm;
893 Addr fm_gaKey;
894 /* We cannot remove a SecMap from map_shmem while iterating.
895 So, stop iteration, remove from map_shmem, recreate the iteration
896 on the next SecMap. */
897 VG_(doneIterFM) ( map_shmem );
898 /* No need to rcdec linesZ or linesF, these are all SVal_NOACCESS.
899 We just need to free the lineF referenced by the linesZ. */
900 if (n_linesF > 0) {
901 for (i = 0; i < N_SECMAP_ZLINES && n_linesF > 0; i++) {
902 LineZ* lineZ = &sm->linesZ[i];
903 if (lineZ->dict[0] == SVal_INVALID) {
904 VG_(freeEltPA)( LineF_pool_allocator, LineF_Ptr(lineZ) );
905 n_linesF--;
909 if (!VG_(delFromFM)(map_shmem, &fm_gaKey, (UWord*)&fm_sm, gaKey))
910 tl_assert (0);
911 stats__secmaps_in_map_shmem--;
912 tl_assert (gaKey == fm_gaKey);
913 tl_assert (sm == fm_sm);
914 stats__secmaps_scanGCed++;
915 push_SecMap_on_freelist (sm);
916 VG_(initIterAtFM) (map_shmem, gaKey + N_SECMAP_ARANGE);
919 VG_(doneIterFM)( map_shmem );
921 if (really) {
922 stats__secmaps_scanGC++;
923 /* Next GC when we approach the max allocated */
924 next_SecMap_GC_at = stats__secmaps_allocd - 1000;
925 /* Unless we GCed less than 10%. We then allow to alloc 10%
926 more before GCing. This avoids doing a lot of costly GC
927 for the worst case : the 'growing phase' of an application
928 that allocates a lot of memory.
929 Worst can can be reproduced e.g. by
930 perf/memrw -t 30000000 -b 1000 -r 1 -l 1
931 that allocates around 30Gb of memory. */
932 if (ok_GCed < stats__secmaps_allocd/10)
933 next_SecMap_GC_at = stats__secmaps_allocd + stats__secmaps_allocd/10;
937 if (VG_(clo_stats) && really) {
938 VG_(message)(Vg_DebugMsg,
939 "libhb: SecMap GC: #%lu scanned %lu, GCed %lu,"
940 " next GC at %lu\n",
941 stats__secmaps_scanGC, examined, ok_GCed,
942 next_SecMap_GC_at);
945 return ok_GCed;
948 static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
950 SecMap* sm = shmem__find_SecMap ( ga );
951 if (LIKELY(sm)) {
952 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
953 return sm;
954 } else {
955 /* create a new one */
956 Addr gaKey = shmem__round_to_SecMap_base(ga);
957 sm = shmem__alloc_or_recycle_SecMap();
958 tl_assert(sm);
959 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
960 stats__secmaps_in_map_shmem++;
961 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
962 return sm;
966 /* Returns the nr of linesF which are in use. Note: this is scanning
967 the secmap wordFM. So, this is to be used for statistics only. */
968 __attribute__((noinline))
969 static UWord shmem__SecMap_used_linesF(void)
971 UWord secmapW = 0;
972 Addr gaKey;
973 UWord inUse = 0;
975 VG_(initIterFM)( map_shmem );
976 while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) {
977 UWord i;
978 SecMap* sm = (SecMap*)secmapW;
979 tl_assert(sm->magic == SecMap_MAGIC);
981 for (i = 0; i < N_SECMAP_ZLINES; i++) {
982 LineZ* lineZ = &sm->linesZ[i];
983 if (lineZ->dict[0] == SVal_INVALID)
984 inUse++;
987 VG_(doneIterFM)( map_shmem );
989 return inUse;
992 /* ------------ LineF and LineZ related ------------ */
994 static void rcinc_LineF ( LineF* lineF ) {
995 UWord i;
996 for (i = 0; i < N_LINE_ARANGE; i++)
997 SVal__rcinc(lineF->w64s[i]);
1000 static void rcdec_LineF ( LineF* lineF ) {
1001 UWord i;
1002 for (i = 0; i < N_LINE_ARANGE; i++)
1003 SVal__rcdec(lineF->w64s[i]);
1006 static void rcinc_LineZ ( LineZ* lineZ ) {
1007 tl_assert(lineZ->dict[0] != SVal_INVALID);
1008 SVal__rcinc(lineZ->dict[0]);
1009 if (lineZ->dict[1] != SVal_INVALID) SVal__rcinc(lineZ->dict[1]);
1010 if (lineZ->dict[2] != SVal_INVALID) SVal__rcinc(lineZ->dict[2]);
1011 if (lineZ->dict[3] != SVal_INVALID) SVal__rcinc(lineZ->dict[3]);
1014 static void rcdec_LineZ ( LineZ* lineZ ) {
1015 tl_assert(lineZ->dict[0] != SVal_INVALID);
1016 SVal__rcdec(lineZ->dict[0]);
1017 if (lineZ->dict[1] != SVal_INVALID) SVal__rcdec(lineZ->dict[1]);
1018 if (lineZ->dict[2] != SVal_INVALID) SVal__rcdec(lineZ->dict[2]);
1019 if (lineZ->dict[3] != SVal_INVALID) SVal__rcdec(lineZ->dict[3]);
1022 inline
1023 static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
1024 Word bix, shft, mask, prep;
1025 tl_assert(ix >= 0);
1026 bix = ix >> 2;
1027 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
1028 mask = 3 << shft;
1029 prep = b2 << shft;
1030 arr[bix] = (arr[bix] & ~mask) | prep;
1033 inline
1034 static UWord read_twobit_array ( UChar* arr, UWord ix ) {
1035 Word bix, shft;
1036 tl_assert(ix >= 0);
1037 bix = ix >> 2;
1038 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
1039 return (arr[bix] >> shft) & 3;
1042 /* We cache one free lineF, to avoid pool allocator calls.
1043 Measurement on firefox has shown that this avoids more than 90%
1044 of the PA calls. */
1045 static LineF *free_lineF = NULL;
1047 /* Allocates a lineF for LineZ. Sets lineZ in a state indicating
1048 lineF has to be used. */
1049 static inline LineF *alloc_LineF_for_Z (LineZ *lineZ)
1051 LineF *lineF;
1053 tl_assert(lineZ->dict[0] == SVal_INVALID);
1055 if (LIKELY(free_lineF)) {
1056 lineF = free_lineF;
1057 free_lineF = NULL;
1058 } else {
1059 lineF = VG_(allocEltPA) ( LineF_pool_allocator );
1061 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1062 lineZ->dict[1] = Ptr2SVal (lineF);
1064 return lineF;
1067 /* rcdec the LineF of lineZ, frees the lineF, and sets lineZ
1068 back to its initial state SVal_NOACCESS (i.e. ready to be
1069 read or written just after SecMap allocation). */
1070 static inline void clear_LineF_of_Z (LineZ *lineZ)
1072 LineF *lineF = LineF_Ptr(lineZ);
1074 rcdec_LineF(lineF);
1075 if (UNLIKELY(free_lineF)) {
1076 VG_(freeEltPA)( LineF_pool_allocator, lineF );
1077 } else {
1078 free_lineF = lineF;
1080 lineZ->dict[0] = SVal_NOACCESS;
1081 lineZ->dict[1] = SVal_INVALID;
1084 /* Given address 'tag', find either the Z or F line containing relevant
1085 data, so it can be read into the cache.
1087 static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
1088 /*OUT*/LineF** fp, Addr tag ) {
1089 LineZ* lineZ;
1090 LineF* lineF;
1091 UWord zix;
1092 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
1093 UWord smoff = shmem__get_SecMap_offset(tag);
1094 /* since smoff is derived from a valid tag, it should be
1095 cacheline-aligned. */
1096 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
1097 zix = smoff >> N_LINE_BITS;
1098 tl_assert(zix < N_SECMAP_ZLINES);
1099 lineZ = &sm->linesZ[zix];
1100 lineF = NULL;
1101 if (lineZ->dict[0] == SVal_INVALID) {
1102 lineF = LineF_Ptr (lineZ);
1103 lineZ = NULL;
1105 *zp = lineZ;
1106 *fp = lineF;
1109 /* Given address 'tag', return the relevant SecMap and the index of
1110 the LineZ within it, in the expectation that the line is to be
1111 overwritten. Regardless of whether 'tag' is currently associated
1112 with a Z or F representation, to rcdec on the current
1113 representation, in recognition of the fact that the contents are
1114 just about to be overwritten. */
1115 static __attribute__((noinline))
1116 void find_Z_for_writing ( /*OUT*/SecMap** smp,
1117 /*OUT*/Word* zixp,
1118 Addr tag ) {
1119 LineZ* lineZ;
1120 UWord zix;
1121 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
1122 UWord smoff = shmem__get_SecMap_offset(tag);
1123 /* since smoff is derived from a valid tag, it should be
1124 cacheline-aligned. */
1125 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
1126 zix = smoff >> N_LINE_BITS;
1127 tl_assert(zix < N_SECMAP_ZLINES);
1128 lineZ = &sm->linesZ[zix];
1129 /* re RCs, we are rcdec_LineZ/clear_LineF_of_Z this LineZ so that new data
1130 can be parked in it. Hence have to rcdec it accordingly. */
1131 /* If lineZ has an associated lineF, free it up. */
1132 if (lineZ->dict[0] == SVal_INVALID)
1133 clear_LineF_of_Z(lineZ);
1134 else
1135 rcdec_LineZ(lineZ);
1136 *smp = sm;
1137 *zixp = zix;
1140 /* ------------ CacheLine and implicit-tree related ------------ */
1142 __attribute__((unused))
1143 static void pp_CacheLine ( CacheLine* cl ) {
1144 Word i;
1145 if (!cl) {
1146 VG_(printf)("%s","pp_CacheLine(NULL)\n");
1147 return;
1149 for (i = 0; i < N_LINE_TREES; i++)
1150 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
1151 for (i = 0; i < N_LINE_ARANGE; i++)
1152 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
1155 static UChar descr_to_validbits ( UShort descr )
1157 /* a.k.a Party Time for gcc's constant folder */
1158 # define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
1159 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
1160 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
1161 ( (b8_5) << 12) | ( (b8_4) << 11) | \
1162 ( (b8_3) << 10) | ( (b8_2) << 9) | \
1163 ( (b8_1) << 8) | ( (b8_0) << 7) | \
1164 ( (b16_3) << 6) | ( (b32_1) << 5) | \
1165 ( (b16_2) << 4) | ( (b64) << 3) | \
1166 ( (b16_1) << 2) | ( (b32_0) << 1) | \
1167 ( (b16_0) << 0) ) )
1169 # define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
1170 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
1171 ( (bit5) << 5) | ( (bit4) << 4) | \
1172 ( (bit3) << 3) | ( (bit2) << 2) | \
1173 ( (bit1) << 1) | ( (bit0) << 0) ) )
1175 /* these should all get folded out at compile time */
1176 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
1177 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
1178 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
1179 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
1180 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
1181 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
1182 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
1183 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
1184 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
1186 switch (descr) {
1188 +--------------------------------- TREE_DESCR_8_7
1189 | +------------------- TREE_DESCR_8_0
1190 | | +---------------- TREE_DESCR_16_3
1191 | | | +-------------- TREE_DESCR_32_1
1192 | | | | +------------ TREE_DESCR_16_2
1193 | | | | | +--------- TREE_DESCR_64
1194 | | | | | | +------ TREE_DESCR_16_1
1195 | | | | | | | +---- TREE_DESCR_32_0
1196 | | | | | | | | +-- TREE_DESCR_16_0
1197 | | | | | | | | |
1198 | | | | | | | | | GRANULARITY, 7 -> 0 */
1199 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
1200 return BYTE(1,1,1,1,1,1,1,1);
1201 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
1202 return BYTE(1,1,0,1,1,1,1,1);
1203 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
1204 return BYTE(0,1,1,1,1,1,1,1);
1205 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
1206 return BYTE(0,1,0,1,1,1,1,1);
1208 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
1209 return BYTE(1,1,1,1,1,1,0,1);
1210 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
1211 return BYTE(1,1,0,1,1,1,0,1);
1212 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
1213 return BYTE(0,1,1,1,1,1,0,1);
1214 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
1215 return BYTE(0,1,0,1,1,1,0,1);
1217 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
1218 return BYTE(1,1,1,1,0,1,1,1);
1219 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
1220 return BYTE(1,1,0,1,0,1,1,1);
1221 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
1222 return BYTE(0,1,1,1,0,1,1,1);
1223 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
1224 return BYTE(0,1,0,1,0,1,1,1);
1226 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
1227 return BYTE(1,1,1,1,0,1,0,1);
1228 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
1229 return BYTE(1,1,0,1,0,1,0,1);
1230 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
1231 return BYTE(0,1,1,1,0,1,0,1);
1232 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
1233 return BYTE(0,1,0,1,0,1,0,1);
1235 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
1236 return BYTE(0,0,0,1,1,1,1,1);
1237 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
1238 return BYTE(0,0,0,1,1,1,0,1);
1239 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
1240 return BYTE(0,0,0,1,0,1,1,1);
1241 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
1242 return BYTE(0,0,0,1,0,1,0,1);
1244 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
1245 return BYTE(1,1,1,1,0,0,0,1);
1246 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
1247 return BYTE(1,1,0,1,0,0,0,1);
1248 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
1249 return BYTE(0,1,1,1,0,0,0,1);
1250 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
1251 return BYTE(0,1,0,1,0,0,0,1);
1253 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
1254 return BYTE(0,0,0,1,0,0,0,1);
1256 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
1257 return BYTE(0,0,0,0,0,0,0,1);
1259 default: return BYTE(0,0,0,0,0,0,0,0);
1260 /* INVALID - any valid descr produces at least one
1261 valid bit in tree[0..7]*/
1263 /* NOTREACHED*/
1264 tl_assert(0);
1266 # undef DESCR
1267 # undef BYTE
1270 __attribute__((unused))
1271 static Bool is_sane_Descr ( UShort descr ) {
1272 return descr_to_validbits(descr) != 0;
1275 static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
1276 VG_(sprintf)(dst,
1277 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
1278 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
1279 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
1280 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
1281 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
1282 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
1283 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
1284 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
1285 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
1286 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
1287 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
1288 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
1289 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
1290 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
1291 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
1292 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
1295 static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
1296 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
1297 (Int)((byte & 128) ? 1 : 0),
1298 (Int)((byte & 64) ? 1 : 0),
1299 (Int)((byte & 32) ? 1 : 0),
1300 (Int)((byte & 16) ? 1 : 0),
1301 (Int)((byte & 8) ? 1 : 0),
1302 (Int)((byte & 4) ? 1 : 0),
1303 (Int)((byte & 2) ? 1 : 0),
1304 (Int)((byte & 1) ? 1 : 0)
1308 static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
1309 Word i;
1310 UChar validbits = descr_to_validbits(descr);
1311 HChar buf[128], buf2[128]; // large enough
1312 if (validbits == 0)
1313 goto bad;
1314 for (i = 0; i < 8; i++) {
1315 if (validbits & (1<<i)) {
1316 if (tree[i] == SVal_INVALID)
1317 goto bad;
1318 } else {
1319 if (tree[i] != SVal_INVALID)
1320 goto bad;
1323 return True;
1324 bad:
1325 sprintf_Descr( buf, descr );
1326 sprintf_Byte( buf2, validbits );
1327 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
1328 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
1329 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
1330 for (i = 0; i < 8; i++)
1331 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
1332 VG_(printf)("%s","}\n");
1333 return 0;
1336 static Bool is_sane_CacheLine ( CacheLine* cl )
1338 Word tno, cloff;
1340 if (!cl) goto bad;
1342 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1343 UShort descr = cl->descrs[tno];
1344 SVal* tree = &cl->svals[cloff];
1345 if (!is_sane_Descr_and_Tree(descr, tree))
1346 goto bad;
1348 tl_assert(cloff == N_LINE_ARANGE);
1349 return True;
1350 bad:
1351 pp_CacheLine(cl);
1352 return False;
1355 static UShort normalise_tree ( /*MOD*/SVal* tree )
1357 UShort descr;
1358 /* pre: incoming tree[0..7] does not have any invalid shvals, in
1359 particular no zeroes. */
1360 if (CHECK_ZSM
1361 && UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
1362 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
1363 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
1364 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
1365 tl_assert(0);
1367 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
1368 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
1369 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
1370 /* build 16-bit layer */
1371 if (tree[1] == tree[0]) {
1372 tree[1] = SVal_INVALID;
1373 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
1374 descr |= TREE_DESCR_16_0;
1376 if (tree[3] == tree[2]) {
1377 tree[3] = SVal_INVALID;
1378 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
1379 descr |= TREE_DESCR_16_1;
1381 if (tree[5] == tree[4]) {
1382 tree[5] = SVal_INVALID;
1383 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
1384 descr |= TREE_DESCR_16_2;
1386 if (tree[7] == tree[6]) {
1387 tree[7] = SVal_INVALID;
1388 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
1389 descr |= TREE_DESCR_16_3;
1391 /* build 32-bit layer */
1392 if (tree[2] == tree[0]
1393 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
1394 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
1395 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
1396 descr |= TREE_DESCR_32_0;
1398 if (tree[6] == tree[4]
1399 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
1400 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
1401 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
1402 descr |= TREE_DESCR_32_1;
1404 /* build 64-bit layer */
1405 if (tree[4] == tree[0]
1406 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
1407 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
1408 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
1409 descr |= TREE_DESCR_64;
1411 return descr;
1414 /* This takes a cacheline where all the data is at the leaves
1415 (w8[..]) and builds a correctly normalised tree. */
1416 static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
1418 Word tno, cloff;
1419 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1420 SVal* tree = &cl->svals[cloff];
1421 cl->descrs[tno] = normalise_tree( tree );
1423 tl_assert(cloff == N_LINE_ARANGE);
1424 if (CHECK_ZSM)
1425 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1426 stats__cline_normalises++;
1430 typedef struct { UChar count; SVal sval; } CountedSVal;
1432 static
1433 void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
1434 /*OUT*/Word* dstUsedP,
1435 Word nDst, CacheLine* src )
1437 Word tno, cloff, dstUsed;
1439 tl_assert(nDst == N_LINE_ARANGE);
1440 dstUsed = 0;
1442 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1443 UShort descr = src->descrs[tno];
1444 SVal* tree = &src->svals[cloff];
1446 /* sequentialise the tree described by (descr,tree). */
1447 # define PUT(_n,_v) \
1448 do { dst[dstUsed ].count = (_n); \
1449 dst[dstUsed++].sval = (_v); \
1450 } while (0)
1452 /* byte 0 */
1453 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
1454 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
1455 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
1456 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
1457 /* byte 1 */
1458 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
1459 /* byte 2 */
1460 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
1461 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
1462 /* byte 3 */
1463 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
1464 /* byte 4 */
1465 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
1466 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
1467 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
1468 /* byte 5 */
1469 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
1470 /* byte 6 */
1471 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
1472 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
1473 /* byte 7 */
1474 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
1476 # undef PUT
1477 /* END sequentialise the tree described by (descr,tree). */
1480 tl_assert(cloff == N_LINE_ARANGE);
1481 tl_assert(dstUsed <= nDst);
1483 *dstUsedP = dstUsed;
1486 /* Write the cacheline 'wix' to backing store. Where it ends up
1487 is determined by its tag field. */
1488 static __attribute__((noinline)) void cacheline_wback ( UWord wix )
1490 Word i, j, k, m;
1491 Addr tag;
1492 SecMap* sm;
1493 CacheLine* cl;
1494 LineZ* lineZ;
1495 LineF* lineF;
1496 Word zix, fix, csvalsUsed;
1497 CountedSVal csvals[N_LINE_ARANGE];
1498 SVal sv;
1500 if (0)
1501 VG_(printf)("scache wback line %d\n", (Int)wix);
1503 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1505 tag = cache_shmem.tags0[wix];
1506 cl = &cache_shmem.lyns0[wix];
1508 /* The cache line may have been invalidated; if so, ignore it. */
1509 if (!is_valid_scache_tag(tag))
1510 return;
1512 /* Where are we going to put it? */
1513 sm = NULL;
1514 lineZ = NULL;
1515 lineF = NULL;
1516 zix = fix = -1;
1518 /* find the Z line to write in and rcdec it or the associated F
1519 line. */
1520 find_Z_for_writing( &sm, &zix, tag );
1522 tl_assert(sm);
1523 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1524 lineZ = &sm->linesZ[zix];
1526 /* Generate the data to be stored */
1527 if (CHECK_ZSM)
1528 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1530 csvalsUsed = -1;
1531 sequentialise_CacheLine( csvals, &csvalsUsed,
1532 N_LINE_ARANGE, cl );
1533 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1534 if (0) VG_(printf)("%ld ", csvalsUsed);
1536 lineZ->dict[0] = lineZ->dict[1]
1537 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1539 /* i indexes actual shadow values, k is cursor in csvals */
1540 i = 0;
1541 for (k = 0; k < csvalsUsed; k++) {
1543 sv = csvals[k].sval;
1544 if (CHECK_ZSM)
1545 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1546 /* do we already have it? */
1547 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1548 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1549 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1550 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1551 /* no. look for a free slot. */
1552 if (CHECK_ZSM)
1553 tl_assert(sv != SVal_INVALID);
1554 if (lineZ->dict[0]
1555 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1556 if (lineZ->dict[1]
1557 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1558 if (lineZ->dict[2]
1559 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1560 if (lineZ->dict[3]
1561 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1562 break; /* we'll have to use the f rep */
1563 dict_ok:
1564 m = csvals[k].count;
1565 if (m == 8) {
1566 write_twobit_array( lineZ->ix2s, i+0, j );
1567 write_twobit_array( lineZ->ix2s, i+1, j );
1568 write_twobit_array( lineZ->ix2s, i+2, j );
1569 write_twobit_array( lineZ->ix2s, i+3, j );
1570 write_twobit_array( lineZ->ix2s, i+4, j );
1571 write_twobit_array( lineZ->ix2s, i+5, j );
1572 write_twobit_array( lineZ->ix2s, i+6, j );
1573 write_twobit_array( lineZ->ix2s, i+7, j );
1574 i += 8;
1576 else if (m == 4) {
1577 write_twobit_array( lineZ->ix2s, i+0, j );
1578 write_twobit_array( lineZ->ix2s, i+1, j );
1579 write_twobit_array( lineZ->ix2s, i+2, j );
1580 write_twobit_array( lineZ->ix2s, i+3, j );
1581 i += 4;
1583 else if (m == 1) {
1584 write_twobit_array( lineZ->ix2s, i+0, j );
1585 i += 1;
1587 else if (m == 2) {
1588 write_twobit_array( lineZ->ix2s, i+0, j );
1589 write_twobit_array( lineZ->ix2s, i+1, j );
1590 i += 2;
1592 else {
1593 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1598 if (LIKELY(i == N_LINE_ARANGE)) {
1599 /* Construction of the compressed representation was
1600 successful. */
1601 rcinc_LineZ(lineZ);
1602 stats__cache_Z_wbacks++;
1603 } else {
1604 /* Cannot use the compressed(z) representation. Use the full(f)
1605 rep instead. */
1606 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1607 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1608 lineF = alloc_LineF_for_Z (lineZ);
1609 i = 0;
1610 for (k = 0; k < csvalsUsed; k++) {
1611 if (CHECK_ZSM)
1612 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1613 sv = csvals[k].sval;
1614 if (CHECK_ZSM)
1615 tl_assert(sv != SVal_INVALID);
1616 for (m = csvals[k].count; m > 0; m--) {
1617 lineF->w64s[i] = sv;
1618 i++;
1621 tl_assert(i == N_LINE_ARANGE);
1622 rcinc_LineF(lineF);
1623 stats__cache_F_wbacks++;
1627 /* Fetch the cacheline 'wix' from the backing store. The tag
1628 associated with 'wix' is assumed to have already been filled in;
1629 hence that is used to determine where in the backing store to read
1630 from. */
1631 static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1633 Word i;
1634 Addr tag;
1635 CacheLine* cl;
1636 LineZ* lineZ;
1637 LineF* lineF;
1639 if (0)
1640 VG_(printf)("scache fetch line %d\n", (Int)wix);
1642 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1644 tag = cache_shmem.tags0[wix];
1645 cl = &cache_shmem.lyns0[wix];
1647 /* reject nonsense requests */
1648 tl_assert(is_valid_scache_tag(tag));
1650 lineZ = NULL;
1651 lineF = NULL;
1652 find_ZF_for_reading( &lineZ, &lineF, tag );
1653 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1655 /* expand the data into the bottom layer of the tree, then get
1656 cacheline_normalise to build the descriptor array. */
1657 if (lineF) {
1658 for (i = 0; i < N_LINE_ARANGE; i++) {
1659 cl->svals[i] = lineF->w64s[i];
1661 stats__cache_F_fetches++;
1662 } else {
1663 for (i = 0; i < N_LINE_ARANGE; i++) {
1664 UWord ix = read_twobit_array( lineZ->ix2s, i );
1665 if (CHECK_ZSM) tl_assert(ix >= 0 && ix <= 3);
1666 cl->svals[i] = lineZ->dict[ix];
1667 if (CHECK_ZSM) tl_assert(cl->svals[i] != SVal_INVALID);
1669 stats__cache_Z_fetches++;
1671 normalise_CacheLine( cl );
1674 /* Invalid the cachelines corresponding to the given range, which
1675 must start and end on a cacheline boundary. */
1676 static void shmem__invalidate_scache_range (Addr ga, SizeT szB)
1678 Word wix;
1680 /* ga must be on a cacheline boundary. */
1681 tl_assert (is_valid_scache_tag (ga));
1682 /* szB must be a multiple of cacheline size. */
1683 tl_assert (0 == (szB & (N_LINE_ARANGE - 1)));
1686 Word ga_ix = (ga >> N_LINE_BITS) & (N_WAY_NENT - 1);
1687 Word nwix = szB / N_LINE_ARANGE;
1689 if (nwix > N_WAY_NENT)
1690 nwix = N_WAY_NENT; // no need to check several times the same entry.
1692 for (wix = 0; wix < nwix; wix++) {
1693 if (address_in_range(cache_shmem.tags0[ga_ix], ga, szB))
1694 cache_shmem.tags0[ga_ix] = 1/*INVALID*/;
1695 ga_ix++;
1696 if (UNLIKELY(ga_ix == N_WAY_NENT))
1697 ga_ix = 0;
1702 static void shmem__flush_and_invalidate_scache ( void ) {
1703 Word wix;
1704 Addr tag;
1705 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1706 tl_assert(!is_valid_scache_tag(1));
1707 for (wix = 0; wix < N_WAY_NENT; wix++) {
1708 tag = cache_shmem.tags0[wix];
1709 if (tag == 1/*INVALID*/) {
1710 /* already invalid; nothing to do */
1711 } else {
1712 tl_assert(is_valid_scache_tag(tag));
1713 cacheline_wback( wix );
1715 cache_shmem.tags0[wix] = 1/*INVALID*/;
1717 stats__cache_flushes_invals++;
1721 static inline Bool aligned16 ( Addr a ) {
1722 return 0 == (a & 1);
1724 static inline Bool aligned32 ( Addr a ) {
1725 return 0 == (a & 3);
1727 static inline Bool aligned64 ( Addr a ) {
1728 return 0 == (a & 7);
1730 static inline UWord get_cacheline_offset ( Addr a ) {
1731 return (UWord)(a & (N_LINE_ARANGE - 1));
1733 static inline Addr cacheline_ROUNDUP ( Addr a ) {
1734 return ROUNDUP(a, N_LINE_ARANGE);
1736 static inline Addr cacheline_ROUNDDN ( Addr a ) {
1737 return ROUNDDN(a, N_LINE_ARANGE);
1739 static inline UWord get_treeno ( Addr a ) {
1740 return get_cacheline_offset(a) >> 3;
1742 static inline UWord get_tree_offset ( Addr a ) {
1743 return a & 7;
1746 static __attribute__((noinline))
1747 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1748 static inline CacheLine* get_cacheline ( Addr a )
1750 /* tag is 'a' with the in-line offset masked out,
1751 eg a[31]..a[4] 0000 */
1752 Addr tag = a & ~(N_LINE_ARANGE - 1);
1753 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1754 stats__cache_totrefs++;
1755 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1756 return &cache_shmem.lyns0[wix];
1757 } else {
1758 return get_cacheline_MISS( a );
1762 static __attribute__((noinline))
1763 CacheLine* get_cacheline_MISS ( Addr a )
1765 /* tag is 'a' with the in-line offset masked out,
1766 eg a[31]..a[4] 0000 */
1768 CacheLine* cl;
1769 Addr* tag_old_p;
1770 Addr tag = a & ~(N_LINE_ARANGE - 1);
1771 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1773 tl_assert(tag != cache_shmem.tags0[wix]);
1775 /* Dump the old line into the backing store. */
1776 stats__cache_totmisses++;
1778 cl = &cache_shmem.lyns0[wix];
1779 tag_old_p = &cache_shmem.tags0[wix];
1781 if (is_valid_scache_tag( *tag_old_p )) {
1782 /* EXPENSIVE and REDUNDANT: callee does it */
1783 if (CHECK_ZSM)
1784 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1785 cacheline_wback( wix );
1787 /* and reload the new one */
1788 *tag_old_p = tag;
1789 cacheline_fetch( wix );
1790 if (CHECK_ZSM)
1791 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1792 return cl;
1795 static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1796 stats__cline_64to32pulldown++;
1797 switch (toff) {
1798 case 0: case 4:
1799 tl_assert(descr & TREE_DESCR_64);
1800 tree[4] = tree[0];
1801 descr &= ~TREE_DESCR_64;
1802 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1803 break;
1804 default:
1805 tl_assert(0);
1807 return descr;
1810 static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1811 stats__cline_32to16pulldown++;
1812 switch (toff) {
1813 case 0: case 2:
1814 if (!(descr & TREE_DESCR_32_0)) {
1815 descr = pulldown_to_32(tree, 0, descr);
1817 tl_assert(descr & TREE_DESCR_32_0);
1818 tree[2] = tree[0];
1819 descr &= ~TREE_DESCR_32_0;
1820 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1821 break;
1822 case 4: case 6:
1823 if (!(descr & TREE_DESCR_32_1)) {
1824 descr = pulldown_to_32(tree, 4, descr);
1826 tl_assert(descr & TREE_DESCR_32_1);
1827 tree[6] = tree[4];
1828 descr &= ~TREE_DESCR_32_1;
1829 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1830 break;
1831 default:
1832 tl_assert(0);
1834 return descr;
1837 static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1838 stats__cline_16to8pulldown++;
1839 switch (toff) {
1840 case 0: case 1:
1841 if (!(descr & TREE_DESCR_16_0)) {
1842 descr = pulldown_to_16(tree, 0, descr);
1844 tl_assert(descr & TREE_DESCR_16_0);
1845 tree[1] = tree[0];
1846 descr &= ~TREE_DESCR_16_0;
1847 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1848 break;
1849 case 2: case 3:
1850 if (!(descr & TREE_DESCR_16_1)) {
1851 descr = pulldown_to_16(tree, 2, descr);
1853 tl_assert(descr & TREE_DESCR_16_1);
1854 tree[3] = tree[2];
1855 descr &= ~TREE_DESCR_16_1;
1856 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1857 break;
1858 case 4: case 5:
1859 if (!(descr & TREE_DESCR_16_2)) {
1860 descr = pulldown_to_16(tree, 4, descr);
1862 tl_assert(descr & TREE_DESCR_16_2);
1863 tree[5] = tree[4];
1864 descr &= ~TREE_DESCR_16_2;
1865 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1866 break;
1867 case 6: case 7:
1868 if (!(descr & TREE_DESCR_16_3)) {
1869 descr = pulldown_to_16(tree, 6, descr);
1871 tl_assert(descr & TREE_DESCR_16_3);
1872 tree[7] = tree[6];
1873 descr &= ~TREE_DESCR_16_3;
1874 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1875 break;
1876 default:
1877 tl_assert(0);
1879 return descr;
1883 static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1884 UShort mask;
1885 switch (toff) {
1886 case 0:
1887 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1888 tl_assert( (descr & mask) == mask );
1889 descr &= ~mask;
1890 descr |= TREE_DESCR_16_0;
1891 break;
1892 case 2:
1893 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1894 tl_assert( (descr & mask) == mask );
1895 descr &= ~mask;
1896 descr |= TREE_DESCR_16_1;
1897 break;
1898 case 4:
1899 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1900 tl_assert( (descr & mask) == mask );
1901 descr &= ~mask;
1902 descr |= TREE_DESCR_16_2;
1903 break;
1904 case 6:
1905 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1906 tl_assert( (descr & mask) == mask );
1907 descr &= ~mask;
1908 descr |= TREE_DESCR_16_3;
1909 break;
1910 default:
1911 tl_assert(0);
1913 return descr;
1916 static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1917 UShort mask;
1918 switch (toff) {
1919 case 0:
1920 if (!(descr & TREE_DESCR_16_0))
1921 descr = pullup_descr_to_16(descr, 0);
1922 if (!(descr & TREE_DESCR_16_1))
1923 descr = pullup_descr_to_16(descr, 2);
1924 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1925 tl_assert( (descr & mask) == mask );
1926 descr &= ~mask;
1927 descr |= TREE_DESCR_32_0;
1928 break;
1929 case 4:
1930 if (!(descr & TREE_DESCR_16_2))
1931 descr = pullup_descr_to_16(descr, 4);
1932 if (!(descr & TREE_DESCR_16_3))
1933 descr = pullup_descr_to_16(descr, 6);
1934 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1935 tl_assert( (descr & mask) == mask );
1936 descr &= ~mask;
1937 descr |= TREE_DESCR_32_1;
1938 break;
1939 default:
1940 tl_assert(0);
1942 return descr;
1945 static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1946 switch (toff) {
1947 case 0: case 4:
1948 return 0 != (descr & TREE_DESCR_64);
1949 default:
1950 tl_assert(0);
1954 static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1955 switch (toff) {
1956 case 0:
1957 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1958 case 2:
1959 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1960 case 4:
1961 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1962 case 6:
1963 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1964 default:
1965 tl_assert(0);
1969 /* ------------ Cache management ------------ */
1971 static void zsm_flush_cache ( void )
1973 shmem__flush_and_invalidate_scache();
1977 static void zsm_init ( void )
1979 tl_assert( sizeof(UWord) == sizeof(Addr) );
1981 tl_assert(map_shmem == NULL);
1982 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1983 HG_(free),
1984 NULL/*unboxed UWord cmp*/);
1985 /* Invalidate all cache entries. */
1986 tl_assert(!is_valid_scache_tag(1));
1987 for (UWord wix = 0; wix < N_WAY_NENT; wix++) {
1988 cache_shmem.tags0[wix] = 1/*INVALID*/;
1991 LineF_pool_allocator = VG_(newPA) (
1992 sizeof(LineF),
1993 /* Nr elements/pool to fill a core arena block
1994 taking some arena overhead into account. */
1995 (4 * 1024 * 1024 - 200)/sizeof(LineF),
1996 HG_(zalloc),
1997 "libhb.LineF_storage.pool",
1998 HG_(free)
2001 /* a SecMap must contain an integral number of CacheLines */
2002 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
2003 /* also ... a CacheLine holds an integral number of trees */
2004 tl_assert(0 == (N_LINE_ARANGE % 8));
2007 /////////////////////////////////////////////////////////////////
2008 /////////////////////////////////////////////////////////////////
2009 // //
2010 // SECTION END compressed shadow memory //
2011 // //
2012 /////////////////////////////////////////////////////////////////
2013 /////////////////////////////////////////////////////////////////
2017 /////////////////////////////////////////////////////////////////
2018 /////////////////////////////////////////////////////////////////
2019 // //
2020 // SECTION BEGIN vts primitives //
2021 // //
2022 /////////////////////////////////////////////////////////////////
2023 /////////////////////////////////////////////////////////////////
2026 /* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
2027 being compact stand-ins for Thr*'s. Use these functions to map
2028 between them. */
2029 static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */
2030 static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */
2032 __attribute__((noreturn))
2033 static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
2035 if (due_to_nThrs) {
2036 const HChar* s =
2037 "\n"
2038 "Helgrind: cannot continue, run aborted: too many threads.\n"
2039 "Sorry. Helgrind can only handle programs that create\n"
2040 "%'llu or fewer threads over their entire lifetime.\n"
2041 "\n";
2042 VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
2043 } else {
2044 const HChar* s =
2045 "\n"
2046 "Helgrind: cannot continue, run aborted: too many\n"
2047 "synchronisation events. Sorry. Helgrind can only handle\n"
2048 "programs which perform %'llu or fewer\n"
2049 "inter-thread synchronisation events (locks, unlocks, etc).\n"
2050 "\n";
2051 VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
2053 VG_(exit)(1);
2054 /*NOTREACHED*/
2055 tl_assert(0); /*wtf?!*/
2059 /* The dead thread (ThrID, actually) tables. A thread may only be
2060 listed here if we have been notified thereof by libhb_async_exit.
2061 New entries are added at the end. The order isn't important, but
2062 the ThrID values must be unique.
2063 verydead_thread_table_not_pruned lists the identity of the threads
2064 that died since the previous round of pruning.
2065 Once pruning is done, these ThrID are added in verydead_thread_table.
2066 We don't actually need to keep the set of threads that have ever died --
2067 only the threads that have died since the previous round of
2068 pruning. But it's useful for sanity check purposes to keep the
2069 entire set, so we do. */
2070 static XArray* /* of ThrID */ verydead_thread_table_not_pruned = NULL;
2071 static XArray* /* of ThrID */ verydead_thread_table = NULL;
2073 /* Arbitrary total ordering on ThrIDs. */
2074 static Int cmp__ThrID ( const void* v1, const void* v2 ) {
2075 ThrID id1 = *(const ThrID*)v1;
2076 ThrID id2 = *(const ThrID*)v2;
2077 if (id1 < id2) return -1;
2078 if (id1 > id2) return 1;
2079 return 0;
2082 static void verydead_thread_tables_init ( void )
2084 tl_assert(!verydead_thread_table);
2085 tl_assert(!verydead_thread_table_not_pruned);
2086 verydead_thread_table
2087 = VG_(newXA)( HG_(zalloc),
2088 "libhb.verydead_thread_table_init.1",
2089 HG_(free), sizeof(ThrID) );
2090 VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
2091 verydead_thread_table_not_pruned
2092 = VG_(newXA)( HG_(zalloc),
2093 "libhb.verydead_thread_table_init.2",
2094 HG_(free), sizeof(ThrID) );
2095 VG_(setCmpFnXA)(verydead_thread_table_not_pruned, cmp__ThrID);
2098 static void verydead_thread_table_sort_and_check (XArray* thrids)
2100 UWord i;
2102 VG_(sortXA)( thrids );
2103 /* Sanity check: check for unique .sts.thr values. */
2104 UWord nBT = VG_(sizeXA)( thrids );
2105 if (nBT > 0) {
2106 ThrID thrid1, thrid2;
2107 thrid2 = *(ThrID*)VG_(indexXA)( thrids, 0 );
2108 for (i = 1; i < nBT; i++) {
2109 thrid1 = thrid2;
2110 thrid2 = *(ThrID*)VG_(indexXA)( thrids, i );
2111 tl_assert(thrid1 < thrid2);
2114 /* Ok, so the dead thread table thrids has unique and in-order keys. */
2117 /* A VTS contains .ts, its vector clock, and also .id, a field to hold
2118 a backlink for the caller's convenience. Since we have no idea
2119 what to set that to in the library, it always gets set to
2120 VtsID_INVALID. */
2121 typedef
2122 struct {
2123 VtsID id;
2124 UInt usedTS;
2125 UInt sizeTS;
2126 ScalarTS ts[0];
2128 VTS;
2130 /* Allocate a VTS capable of storing 'sizeTS' entries. */
2131 static VTS* VTS__new ( const HChar* who, UInt sizeTS );
2133 /* Make a clone of 'vts', sizing the new array to exactly match the
2134 number of ScalarTSs present. */
2135 static VTS* VTS__clone ( const HChar* who, VTS* vts );
2137 /* Make a clone of 'vts' with the thrids in 'thrids' removed. The new
2138 array is sized exactly to hold the number of required elements.
2139 'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
2140 must be in strictly increasing order. */
2141 static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel );
2143 /* Delete this VTS in its entirety. */
2144 static void VTS__delete ( VTS* vts );
2146 /* Create a new singleton VTS in 'out'. Caller must have
2147 pre-allocated 'out' sufficiently big to hold the result in all
2148 possible cases. */
2149 static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
2151 /* Create in 'out' a VTS which is the same as 'vts' except with
2152 vts[me]++, so to speak. Caller must have pre-allocated 'out'
2153 sufficiently big to hold the result in all possible cases. */
2154 static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
2156 /* Create in 'out' a VTS which is the join (max) of 'a' and
2157 'b'. Caller must have pre-allocated 'out' sufficiently big to hold
2158 the result in all possible cases. */
2159 static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
2161 /* Compute the partial ordering relation of the two args. Although we
2162 could be completely general and return an enumeration value (EQ,
2163 LT, GT, UN), in fact we only need LEQ, and so we may as well
2164 hardwire that fact.
2166 Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
2167 invald ThrID). In the latter case, the returned ThrID indicates
2168 the discovered point for which they are not. There may be more
2169 than one such point, but we only care about seeing one of them, not
2170 all of them. This rather strange convention is used because
2171 sometimes we want to know the actual index at which they first
2172 differ. */
2173 static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
2175 /* Compute an arbitrary structural (total) ordering on the two args,
2176 based on their VCs, so they can be looked up in a table, tree, etc.
2177 Returns -1, 0 or 1. */
2178 static Word VTS__cmp_structural ( VTS* a, VTS* b );
2180 /* Debugging only. Display the given VTS. */
2181 static void VTS__show ( const VTS* vts );
2183 /* Debugging only. Return vts[index], so to speak. */
2184 static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
2186 /* Notify the VTS machinery that a thread has been declared
2187 comprehensively dead: that is, it has done an async exit AND it has
2188 been joined with. This should ensure that its local clocks (.viR
2189 and .viW) will never again change, and so all mentions of this
2190 thread from all VTSs in the system may be removed. */
2191 static void VTS__declare_thread_very_dead ( Thr* idx );
2193 /*--------------- to do with Vector Timestamps ---------------*/
2195 static Bool is_sane_VTS ( VTS* vts )
2197 UWord i, n;
2198 ScalarTS *st1, *st2;
2199 if (!vts) return False;
2200 if (vts->usedTS > vts->sizeTS) return False;
2201 n = vts->usedTS;
2202 if (n == 1) {
2203 st1 = &vts->ts[0];
2204 if (st1->tym == 0)
2205 return False;
2207 else
2208 if (n >= 2) {
2209 for (i = 0; i < n-1; i++) {
2210 st1 = &vts->ts[i];
2211 st2 = &vts->ts[i+1];
2212 if (st1->thrid >= st2->thrid)
2213 return False;
2214 if (st1->tym == 0 || st2->tym == 0)
2215 return False;
2218 return True;
2222 /* Create a new, empty VTS.
2224 static VTS* VTS__new ( const HChar* who, UInt sizeTS )
2226 VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
2227 tl_assert(vts->usedTS == 0);
2228 vts->sizeTS = sizeTS;
2229 *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
2230 return vts;
2233 /* Clone this VTS.
2235 static VTS* VTS__clone ( const HChar* who, VTS* vts )
2237 tl_assert(vts);
2238 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2239 UInt nTS = vts->usedTS;
2240 VTS* clone = VTS__new(who, nTS);
2241 clone->id = vts->id;
2242 clone->sizeTS = nTS;
2243 clone->usedTS = nTS;
2244 UInt i;
2245 for (i = 0; i < nTS; i++) {
2246 clone->ts[i] = vts->ts[i];
2248 tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2249 return clone;
2253 /* Make a clone of a VTS with specified ThrIDs removed. 'thridsToDel'
2254 must be in strictly increasing order. We could obviously do this
2255 much more efficiently (in linear time) if necessary.
2257 static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel )
2259 UInt i, j;
2260 tl_assert(vts);
2261 tl_assert(thridsToDel);
2262 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2263 UInt nTS = vts->usedTS;
2264 /* Figure out how many ScalarTSs will remain in the output. */
2265 UInt nReq = nTS;
2266 for (i = 0; i < nTS; i++) {
2267 ThrID thrid = vts->ts[i].thrid;
2268 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2269 nReq--;
2271 tl_assert(nReq <= nTS);
2272 /* Copy the ones that will remain. */
2273 VTS* res = VTS__new(who, nReq);
2274 j = 0;
2275 for (i = 0; i < nTS; i++) {
2276 ThrID thrid = vts->ts[i].thrid;
2277 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2278 continue;
2279 res->ts[j++] = vts->ts[i];
2281 tl_assert(j == nReq);
2282 tl_assert(j == res->sizeTS);
2283 res->usedTS = j;
2284 tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
2285 return res;
2289 /* Delete this VTS in its entirety.
2291 static void VTS__delete ( VTS* vts )
2293 tl_assert(vts);
2294 tl_assert(vts->usedTS <= vts->sizeTS);
2295 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2296 HG_(free)(vts);
2300 /* Create a new singleton VTS.
2302 static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
2304 tl_assert(thr);
2305 tl_assert(tym >= 1);
2306 tl_assert(out);
2307 tl_assert(out->usedTS == 0);
2308 tl_assert(out->sizeTS >= 1);
2309 UInt hi = out->usedTS++;
2310 out->ts[hi].thrid = Thr__to_ThrID(thr);
2311 out->ts[hi].tym = tym;
2315 /* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
2316 not modified.
2318 static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
2320 UInt i, n;
2321 ThrID me_thrid;
2322 Bool found = False;
2324 stats__vts__tick++;
2326 tl_assert(out);
2327 tl_assert(out->usedTS == 0);
2328 if (vts->usedTS >= ThrID_MAX_VALID)
2329 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2330 tl_assert(out->sizeTS >= 1 + vts->usedTS);
2332 tl_assert(me);
2333 me_thrid = Thr__to_ThrID(me);
2334 tl_assert(is_sane_VTS(vts));
2335 n = vts->usedTS;
2337 /* Copy all entries which precede 'me'. */
2338 for (i = 0; i < n; i++) {
2339 ScalarTS* here = &vts->ts[i];
2340 if (UNLIKELY(here->thrid >= me_thrid))
2341 break;
2342 UInt hi = out->usedTS++;
2343 out->ts[hi] = *here;
2346 /* 'i' now indicates the next entry to copy, if any.
2347 There are 3 possibilities:
2348 (a) there is no next entry (we used them all up already):
2349 add (me_thrid,1) to the output, and quit
2350 (b) there is a next entry, and its thrid > me_thrid:
2351 add (me_thrid,1) to the output, then copy the remaining entries
2352 (c) there is a next entry, and its thrid == me_thrid:
2353 copy it to the output but increment its timestamp value.
2354 Then copy the remaining entries. (c) is the common case.
2356 tl_assert(i >= 0 && i <= n);
2357 if (i == n) { /* case (a) */
2358 UInt hi = out->usedTS++;
2359 out->ts[hi].thrid = me_thrid;
2360 out->ts[hi].tym = 1;
2361 } else {
2362 /* cases (b) and (c) */
2363 ScalarTS* here = &vts->ts[i];
2364 if (me_thrid == here->thrid) { /* case (c) */
2365 if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
2366 /* We're hosed. We have to stop. */
2367 scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
2369 UInt hi = out->usedTS++;
2370 out->ts[hi].thrid = here->thrid;
2371 out->ts[hi].tym = here->tym + 1;
2372 i++;
2373 found = True;
2374 } else { /* case (b) */
2375 UInt hi = out->usedTS++;
2376 out->ts[hi].thrid = me_thrid;
2377 out->ts[hi].tym = 1;
2379 /* And copy any remaining entries. */
2380 for (/*keepgoing*/; i < n; i++) {
2381 ScalarTS* here2 = &vts->ts[i];
2382 UInt hi = out->usedTS++;
2383 out->ts[hi] = *here2;
2387 tl_assert(is_sane_VTS(out));
2388 tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
2389 tl_assert(out->usedTS <= out->sizeTS);
2393 /* Return a new VTS constructed as the join (max) of the 2 args.
2394 Neither arg is modified.
2396 static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
2398 UInt ia, ib, useda, usedb;
2399 ULong tyma, tymb, tymMax;
2400 ThrID thrid;
2401 UInt ncommon = 0;
2403 stats__vts__join++;
2405 tl_assert(a);
2406 tl_assert(b);
2407 useda = a->usedTS;
2408 usedb = b->usedTS;
2410 tl_assert(out);
2411 tl_assert(out->usedTS == 0);
2412 /* overly conservative test, but doing better involves comparing
2413 the two VTSs, which we don't want to do at this point. */
2414 if (useda + usedb >= ThrID_MAX_VALID)
2415 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2416 tl_assert(out->sizeTS >= useda + usedb);
2418 ia = ib = 0;
2420 while (1) {
2422 /* This logic is to enumerate triples (thrid, tyma, tymb) drawn
2423 from a and b in order, where thrid is the next ThrID
2424 occurring in either a or b, and tyma/b are the relevant
2425 scalar timestamps, taking into account implicit zeroes. */
2426 tl_assert(ia >= 0 && ia <= useda);
2427 tl_assert(ib >= 0 && ib <= usedb);
2429 if (ia == useda && ib == usedb) {
2430 /* both empty - done */
2431 break;
2433 } else if (ia == useda && ib != usedb) {
2434 /* a empty, use up b */
2435 ScalarTS* tmpb = &b->ts[ib];
2436 thrid = tmpb->thrid;
2437 tyma = 0;
2438 tymb = tmpb->tym;
2439 ib++;
2441 } else if (ia != useda && ib == usedb) {
2442 /* b empty, use up a */
2443 ScalarTS* tmpa = &a->ts[ia];
2444 thrid = tmpa->thrid;
2445 tyma = tmpa->tym;
2446 tymb = 0;
2447 ia++;
2449 } else {
2450 /* both not empty; extract lowest-ThrID'd triple */
2451 ScalarTS* tmpa = &a->ts[ia];
2452 ScalarTS* tmpb = &b->ts[ib];
2453 if (tmpa->thrid < tmpb->thrid) {
2454 /* a has the lowest unconsidered ThrID */
2455 thrid = tmpa->thrid;
2456 tyma = tmpa->tym;
2457 tymb = 0;
2458 ia++;
2459 } else if (tmpa->thrid > tmpb->thrid) {
2460 /* b has the lowest unconsidered ThrID */
2461 thrid = tmpb->thrid;
2462 tyma = 0;
2463 tymb = tmpb->tym;
2464 ib++;
2465 } else {
2466 /* they both next mention the same ThrID */
2467 tl_assert(tmpa->thrid == tmpb->thrid);
2468 thrid = tmpa->thrid; /* == tmpb->thrid */
2469 tyma = tmpa->tym;
2470 tymb = tmpb->tym;
2471 ia++;
2472 ib++;
2473 ncommon++;
2477 /* having laboriously determined (thr, tyma, tymb), do something
2478 useful with it. */
2479 tymMax = tyma > tymb ? tyma : tymb;
2480 if (tymMax > 0) {
2481 UInt hi = out->usedTS++;
2482 out->ts[hi].thrid = thrid;
2483 out->ts[hi].tym = tymMax;
2488 tl_assert(is_sane_VTS(out));
2489 tl_assert(out->usedTS <= out->sizeTS);
2490 tl_assert(out->usedTS == useda + usedb - ncommon);
2494 /* Determine if 'a' <= 'b', in the partial ordering. Returns zero if
2495 they are, or the first ThrID for which they are not (no valid ThrID
2496 has the value zero). This rather strange convention is used
2497 because sometimes we want to know the actual index at which they
2498 first differ. */
2499 static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
2501 Word ia, ib, useda, usedb;
2502 ULong tyma, tymb;
2504 stats__vts__cmpLEQ++;
2506 tl_assert(a);
2507 tl_assert(b);
2508 useda = a->usedTS;
2509 usedb = b->usedTS;
2511 ia = ib = 0;
2513 while (1) {
2515 /* This logic is to enumerate doubles (tyma, tymb) drawn
2516 from a and b in order, and tyma/b are the relevant
2517 scalar timestamps, taking into account implicit zeroes. */
2518 ThrID thrid;
2520 tl_assert(ia >= 0 && ia <= useda);
2521 tl_assert(ib >= 0 && ib <= usedb);
2523 if (ia == useda && ib == usedb) {
2524 /* both empty - done */
2525 break;
2527 } else if (ia == useda && ib != usedb) {
2528 /* a empty, use up b */
2529 ScalarTS* tmpb = &b->ts[ib];
2530 tyma = 0;
2531 tymb = tmpb->tym;
2532 thrid = tmpb->thrid;
2533 ib++;
2535 } else if (ia != useda && ib == usedb) {
2536 /* b empty, use up a */
2537 ScalarTS* tmpa = &a->ts[ia];
2538 tyma = tmpa->tym;
2539 thrid = tmpa->thrid;
2540 tymb = 0;
2541 ia++;
2543 } else {
2544 /* both not empty; extract lowest-ThrID'd triple */
2545 ScalarTS* tmpa = &a->ts[ia];
2546 ScalarTS* tmpb = &b->ts[ib];
2547 if (tmpa->thrid < tmpb->thrid) {
2548 /* a has the lowest unconsidered ThrID */
2549 tyma = tmpa->tym;
2550 thrid = tmpa->thrid;
2551 tymb = 0;
2552 ia++;
2554 else
2555 if (tmpa->thrid > tmpb->thrid) {
2556 /* b has the lowest unconsidered ThrID */
2557 tyma = 0;
2558 tymb = tmpb->tym;
2559 thrid = tmpb->thrid;
2560 ib++;
2561 } else {
2562 /* they both next mention the same ThrID */
2563 tl_assert(tmpa->thrid == tmpb->thrid);
2564 tyma = tmpa->tym;
2565 thrid = tmpa->thrid;
2566 tymb = tmpb->tym;
2567 ia++;
2568 ib++;
2572 /* having laboriously determined (tyma, tymb), do something
2573 useful with it. */
2574 if (tyma > tymb) {
2575 /* not LEQ at this index. Quit, since the answer is
2576 determined already. */
2577 tl_assert(thrid >= 1024);
2578 return thrid;
2582 return 0; /* all points are LEQ => return an invalid ThrID */
2586 /* Compute an arbitrary structural (total) ordering on the two args,
2587 based on their VCs, so they can be looked up in a table, tree, etc.
2588 Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be
2589 performance critical so there is some effort expended to make it sa
2590 fast as possible.
2592 Word VTS__cmp_structural ( VTS* a, VTS* b )
2594 /* We just need to generate an arbitrary total ordering based on
2595 a->ts and b->ts. Preferably do it in a way which comes across likely
2596 differences relatively quickly. */
2597 Word i;
2598 Word useda = 0, usedb = 0;
2599 ScalarTS *ctsa = NULL, *ctsb = NULL;
2601 stats__vts__cmp_structural++;
2603 tl_assert(a);
2604 tl_assert(b);
2606 ctsa = &a->ts[0]; useda = a->usedTS;
2607 ctsb = &b->ts[0]; usedb = b->usedTS;
2609 if (LIKELY(useda == usedb)) {
2610 ScalarTS *tmpa = NULL, *tmpb = NULL;
2611 stats__vts__cmp_structural_slow++;
2612 /* Same length vectors. Find the first difference, if any, as
2613 fast as possible. */
2614 for (i = 0; i < useda; i++) {
2615 tmpa = &ctsa[i];
2616 tmpb = &ctsb[i];
2617 if (LIKELY(tmpa->tym == tmpb->tym
2618 && tmpa->thrid == tmpb->thrid))
2619 continue;
2620 else
2621 break;
2623 if (UNLIKELY(i == useda)) {
2624 /* They're identical. */
2625 return 0;
2626 } else {
2627 tl_assert(i >= 0 && i < useda);
2628 if (tmpa->tym < tmpb->tym) return -1;
2629 if (tmpa->tym > tmpb->tym) return 1;
2630 if (tmpa->thrid < tmpb->thrid) return -1;
2631 if (tmpa->thrid > tmpb->thrid) return 1;
2632 /* we just established them as non-identical, hence: */
2634 /*NOTREACHED*/
2635 tl_assert(0);
2638 if (useda < usedb) return -1;
2639 if (useda > usedb) return 1;
2640 /*NOTREACHED*/
2641 tl_assert(0);
2645 /* Debugging only. Display the given VTS.
2647 static void VTS__show ( const VTS* vts )
2649 Word i, n;
2650 tl_assert(vts);
2652 VG_(printf)("[");
2653 n = vts->usedTS;
2654 for (i = 0; i < n; i++) {
2655 const ScalarTS *st = &vts->ts[i];
2656 VG_(printf)(i < n-1 ? "%d:%llu " : "%d:%llu", st->thrid, (ULong)st->tym);
2658 VG_(printf)("]");
2662 /* Debugging only. Return vts[index], so to speak.
2664 ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
2666 UWord i, n;
2667 ThrID idx_thrid = Thr__to_ThrID(idx);
2668 stats__vts__indexat_slow++;
2669 tl_assert(vts);
2670 n = vts->usedTS;
2671 for (i = 0; i < n; i++) {
2672 ScalarTS* st = &vts->ts[i];
2673 if (st->thrid == idx_thrid)
2674 return st->tym;
2676 return 0;
2680 /* See comment on prototype above.
2682 static void VTS__declare_thread_very_dead ( Thr* thr )
2684 if (0) VG_(printf)("VTQ: tae %p\n", thr);
2686 tl_assert(thr->llexit_done);
2687 tl_assert(thr->joinedwith_done);
2689 ThrID nyu;
2690 nyu = Thr__to_ThrID(thr);
2691 VG_(addToXA)( verydead_thread_table_not_pruned, &nyu );
2693 /* We can only get here if we're assured that we'll never again
2694 need to look at this thread's ::viR or ::viW. Set them to
2695 VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
2696 mostly so that we don't wind up pruning them (as that would be
2697 nonsensical: the only interesting ScalarTS entry for a dead
2698 thread is its own index, and the pruning will remove that.). */
2699 VtsID__rcdec(thr->viR);
2700 VtsID__rcdec(thr->viW);
2701 thr->viR = VtsID_INVALID;
2702 thr->viW = VtsID_INVALID;
2706 /////////////////////////////////////////////////////////////////
2707 /////////////////////////////////////////////////////////////////
2708 // //
2709 // SECTION END vts primitives //
2710 // //
2711 /////////////////////////////////////////////////////////////////
2712 /////////////////////////////////////////////////////////////////
2716 /////////////////////////////////////////////////////////////////
2717 /////////////////////////////////////////////////////////////////
2718 // //
2719 // SECTION BEGIN main library //
2720 // //
2721 /////////////////////////////////////////////////////////////////
2722 /////////////////////////////////////////////////////////////////
2725 /////////////////////////////////////////////////////////
2726 // //
2727 // VTS set //
2728 // //
2729 /////////////////////////////////////////////////////////
2731 static WordFM* /* WordFM VTS* void */ vts_set = NULL;
2733 static void vts_set_init ( void )
2735 tl_assert(!vts_set);
2736 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2737 HG_(free),
2738 (Word(*)(UWord,UWord))VTS__cmp_structural );
2741 /* Given a VTS, look in vts_set to see if we already have a
2742 structurally identical one. If yes, return the pair (True, pointer
2743 to the existing one). If no, clone this one, add the clone to the
2744 set, and return (False, pointer to the clone). */
2745 static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
2747 UWord keyW, valW;
2748 stats__vts_set__focaa++;
2749 tl_assert(cand->id == VtsID_INVALID);
2750 /* lookup cand (by value) */
2751 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2752 /* found it */
2753 tl_assert(valW == 0);
2754 /* if this fails, cand (by ref) was already present (!) */
2755 tl_assert(keyW != (UWord)cand);
2756 *res = (VTS*)keyW;
2757 return True;
2758 } else {
2759 /* not present. Clone, add and return address of clone. */
2760 stats__vts_set__focaa_a++;
2761 VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
2762 tl_assert(clone != cand);
2763 VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
2764 *res = clone;
2765 return False;
2770 /////////////////////////////////////////////////////////
2771 // //
2772 // VTS table //
2773 // //
2774 /////////////////////////////////////////////////////////
2776 static void VtsID__invalidate_caches ( void ); /* fwds */
2778 /* A type to hold VTS table entries. Invariants:
2779 If .vts == NULL, then this entry is not in use, so:
2780 - .rc == 0
2781 - this entry is on the freelist (unfortunately, does not imply
2782 any constraints on value for u.freelink)
2783 If .vts != NULL, then this entry is in use:
2784 - .vts is findable in vts_set
2785 - .vts->id == this entry number
2786 - no specific value for .rc (even 0 is OK)
2787 - this entry is not on freelist, so u.freelink == VtsID_INVALID
2789 typedef
2790 struct {
2791 VTS* vts; /* vts, in vts_set */
2792 UWord rc; /* reference count - enough for entire aspace */
2793 union {
2794 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2795 VtsID remap; /* used only during pruning, for used entries */
2796 } u;
2797 /* u.freelink only used when vts == NULL,
2798 u.remap only used when vts != NULL, during pruning. */
2800 VtsTE;
2802 /* The VTS table. */
2803 static XArray* /* of VtsTE */ vts_tab = NULL;
2805 /* An index into the VTS table, indicating the start of the list of
2806 free (available for use) entries. If the list is empty, this is
2807 VtsID_INVALID. */
2808 static VtsID vts_tab_freelist = VtsID_INVALID;
2810 /* Do a GC of vts_tab when the freelist becomes empty AND the size of
2811 vts_tab equals or exceeds this size. After GC, the value here is
2812 set appropriately so as to check for the next GC point. */
2813 static Word vts_next_GC_at = 1000;
2815 static void vts_tab_init ( void )
2817 vts_tab = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2818 HG_(free), sizeof(VtsTE) );
2819 vts_tab_freelist = VtsID_INVALID;
2822 /* Add ii to the free list, checking that it looks out-of-use. */
2823 static void add_to_free_list ( VtsID ii )
2825 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2826 tl_assert(ie->vts == NULL);
2827 tl_assert(ie->rc == 0);
2828 tl_assert(ie->u.freelink == VtsID_INVALID);
2829 ie->u.freelink = vts_tab_freelist;
2830 vts_tab_freelist = ii;
2833 /* Get an entry from the free list. This will return VtsID_INVALID if
2834 the free list is empty. */
2835 static VtsID get_from_free_list ( void )
2837 VtsID ii;
2838 VtsTE* ie;
2839 if (vts_tab_freelist == VtsID_INVALID)
2840 return VtsID_INVALID;
2841 ii = vts_tab_freelist;
2842 ie = VG_(indexXA)( vts_tab, ii );
2843 tl_assert(ie->vts == NULL);
2844 tl_assert(ie->rc == 0);
2845 vts_tab_freelist = ie->u.freelink;
2846 return ii;
2849 /* Produce a new VtsID that can be used, either by getting it from
2850 the freelist, or, if that is empty, by expanding vts_tab. */
2851 static VtsID get_new_VtsID ( void )
2853 VtsID ii;
2854 VtsTE te;
2855 ii = get_from_free_list();
2856 if (ii != VtsID_INVALID)
2857 return ii;
2858 te.vts = NULL;
2859 te.rc = 0;
2860 te.u.freelink = VtsID_INVALID;
2861 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2862 return ii;
2866 /* Indirect callback from lib_zsm. */
2867 static void VtsID__rcinc ( VtsID ii )
2869 VtsTE* ie;
2870 /* VG_(indexXA) does a range check for us */
2871 ie = VG_(indexXA)( vts_tab, ii );
2872 tl_assert(ie->vts); /* else it's not in use */
2873 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2874 tl_assert(ie->vts->id == ii);
2875 ie->rc++;
2878 /* Indirect callback from lib_zsm. */
2879 static void VtsID__rcdec ( VtsID ii )
2881 VtsTE* ie;
2882 /* VG_(indexXA) does a range check for us */
2883 ie = VG_(indexXA)( vts_tab, ii );
2884 tl_assert(ie->vts); /* else it's not in use */
2885 tl_assert(ie->rc > 0); /* else RC snafu */
2886 tl_assert(ie->vts->id == ii);
2887 ie->rc--;
2891 /* Look up 'cand' in our collection of VTSs. If present, return the
2892 VtsID for the pre-existing version. If not present, clone it, add
2893 the clone to both vts_tab and vts_set, allocate a fresh VtsID for
2894 it, and return that. */
2895 static VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
2897 VTS* in_tab = NULL;
2898 tl_assert(cand->id == VtsID_INVALID);
2899 Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
2900 tl_assert(in_tab);
2901 if (already_have) {
2902 /* We already have a copy of 'cand'. Use that. */
2903 VtsTE* ie;
2904 tl_assert(in_tab->id != VtsID_INVALID);
2905 ie = VG_(indexXA)( vts_tab, in_tab->id );
2906 tl_assert(ie->vts == in_tab);
2907 return in_tab->id;
2908 } else {
2909 VtsID ii = get_new_VtsID();
2910 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2911 ie->vts = in_tab;
2912 ie->rc = 0;
2913 ie->u.freelink = VtsID_INVALID;
2914 in_tab->id = ii;
2915 return ii;
2920 static void show_vts_stats ( const HChar* caller )
2922 UWord nSet, nTab, nLive;
2923 ULong totrc;
2924 UWord n, i;
2925 nSet = VG_(sizeFM)( vts_set );
2926 nTab = VG_(sizeXA)( vts_tab );
2927 totrc = 0;
2928 nLive = 0;
2929 n = VG_(sizeXA)( vts_tab );
2930 for (i = 0; i < n; i++) {
2931 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2932 if (ie->vts) {
2933 nLive++;
2934 totrc += (ULong)ie->rc;
2935 } else {
2936 tl_assert(ie->rc == 0);
2939 VG_(printf)(" show_vts_stats %s\n", caller);
2940 VG_(printf)(" vts_tab size %4lu\n", nTab);
2941 VG_(printf)(" vts_tab live %4lu\n", nLive);
2942 VG_(printf)(" vts_set size %4lu\n", nSet);
2943 VG_(printf)(" total rc %4llu\n", totrc);
2947 /* --- Helpers for VtsID pruning --- */
2949 static
2950 void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2951 /*MOD*/XArray* /* of VtsTE */ new_tab,
2952 VtsID* ii )
2954 VtsTE *old_te, *new_te;
2955 VtsID old_id, new_id;
2956 /* We're relying here on VG_(indexXA)'s range checking to assert on
2957 any stupid values, in particular *ii == VtsID_INVALID. */
2958 old_id = *ii;
2959 old_te = VG_(indexXA)( old_tab, old_id );
2960 old_te->rc--;
2961 new_id = old_te->u.remap;
2962 new_te = VG_(indexXA)( new_tab, new_id );
2963 new_te->rc++;
2964 *ii = new_id;
2967 static
2968 void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2969 /*MOD*/XArray* /* of VtsTE */ new_tab,
2970 SVal* s )
2972 SVal old_sv, new_sv;
2973 old_sv = *s;
2974 if (SVal__isC(old_sv)) {
2975 VtsID rMin, wMin;
2976 rMin = SVal__unC_Rmin(old_sv);
2977 wMin = SVal__unC_Wmin(old_sv);
2978 remap_VtsID( old_tab, new_tab, &rMin );
2979 remap_VtsID( old_tab, new_tab, &wMin );
2980 new_sv = SVal__mkC( rMin, wMin );
2981 *s = new_sv;
2986 /* NOT TO BE CALLED FROM WITHIN libzsm. */
2987 __attribute__((noinline))
2988 static void vts_tab__do_GC ( Bool show_stats )
2990 UWord i, nTab, nLive, nFreed;
2992 /* ---------- BEGIN VTS GC ---------- */
2993 /* check this is actually necessary. */
2994 tl_assert(vts_tab_freelist == VtsID_INVALID);
2996 /* empty the caches for partial order checks and binary joins. We
2997 could do better and prune out the entries to be deleted, but it
2998 ain't worth the hassle. */
2999 VtsID__invalidate_caches();
3001 /* First, make the reference counts up to date. */
3002 zsm_flush_cache();
3004 nTab = VG_(sizeXA)( vts_tab );
3006 if (show_stats) {
3007 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
3008 show_vts_stats("before GC");
3011 /* Now we can inspect the entire vts_tab. Any entries with zero
3012 .rc fields are now no longer in use and can be put back on the
3013 free list, removed from vts_set, and deleted. */
3014 nFreed = 0;
3015 for (i = 0; i < nTab; i++) {
3016 Bool present;
3017 UWord oldK = 0, oldV = 12345;
3018 VtsTE* te = VG_(indexXA)( vts_tab, i );
3019 if (te->vts == NULL) {
3020 tl_assert(te->rc == 0);
3021 continue; /* already on the free list (presumably) */
3023 if (te->rc > 0)
3024 continue; /* in use */
3025 /* Ok, we got one we can free. */
3026 tl_assert(te->vts->id == i);
3027 /* first, remove it from vts_set. */
3028 present = VG_(delFromFM)( vts_set,
3029 &oldK, &oldV, (UWord)te->vts );
3030 tl_assert(present); /* else it isn't in vts_set ?! */
3031 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
3032 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
3033 /* now free the VTS itself */
3034 VTS__delete(te->vts);
3035 te->vts = NULL;
3036 /* and finally put this entry on the free list */
3037 tl_assert(te->u.freelink == VtsID_INVALID); /* can't already be on it */
3038 add_to_free_list( i );
3039 nFreed++;
3042 /* Now figure out when the next GC should be. We'll allow the
3043 number of VTSs to double before GCing again. Except of course
3044 that since we can't (or, at least, don't) shrink vts_tab, we
3045 can't set the threshold value smaller than it. */
3046 tl_assert(nFreed <= nTab);
3047 nLive = nTab - nFreed;
3048 tl_assert(nLive >= 0 && nLive <= nTab);
3049 vts_next_GC_at = 2 * nLive;
3050 if (vts_next_GC_at < nTab)
3051 vts_next_GC_at = nTab;
3053 if (show_stats) {
3054 show_vts_stats("after GC");
3055 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
3058 stats__vts_tab_GC++;
3059 if (VG_(clo_stats)) {
3060 tl_assert(nTab > 0);
3061 VG_(message)(Vg_DebugMsg,
3062 "libhb: VTS GC: #%lu old size %lu live %lu (%2llu%%)\n",
3063 stats__vts_tab_GC,
3064 nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
3066 /* ---------- END VTS GC ---------- */
3068 /* Decide whether to do VTS pruning. We have one of three
3069 settings. */
3070 static UInt pruning_auto_ctr = 0; /* do not make non-static */
3072 Bool do_pruning = False;
3073 switch (HG_(clo_vts_pruning)) {
3074 case 0: /* never */
3075 break;
3076 case 1: /* auto */
3077 do_pruning = (++pruning_auto_ctr % 5) == 0;
3078 break;
3079 case 2: /* always */
3080 do_pruning = True;
3081 break;
3082 default:
3083 tl_assert(0);
3086 /* The rest of this routine only handles pruning, so we can
3087 quit at this point if it is not to be done. */
3088 if (!do_pruning)
3089 return;
3090 /* No need to do pruning if no thread died since the last pruning as
3091 no VtsTE can be pruned. */
3092 if (VG_(sizeXA)( verydead_thread_table_not_pruned) == 0)
3093 return;
3095 /* ---------- BEGIN VTS PRUNING ---------- */
3096 /* Sort and check the very dead threads that died since the last pruning.
3097 Sorting is used for the check and so that we can quickly look
3098 up the dead-thread entries as we work through the VTSs. */
3099 verydead_thread_table_sort_and_check (verydead_thread_table_not_pruned);
3101 /* We will run through the old table, and create a new table and
3102 set, at the same time setting the u.remap entries in the old
3103 table to point to the new entries. Then, visit every VtsID in
3104 the system, and replace all of them with new ones, using the
3105 u.remap entries in the old table. Finally, we can delete the old
3106 table and set. */
3108 XArray* /* of VtsTE */ new_tab
3109 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab",
3110 HG_(free), sizeof(VtsTE) );
3112 /* WordFM VTS* void */
3113 WordFM* new_set
3114 = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set",
3115 HG_(free),
3116 (Word(*)(UWord,UWord))VTS__cmp_structural );
3118 /* Visit each old VTS. For each one:
3120 * make a pruned version
3122 * search new_set for the pruned version, yielding either
3123 Nothing (not present) or the new VtsID for it.
3125 * if not present, allocate a new VtsID for it, insert (pruned
3126 VTS, new VtsID) in the tree, and set
3127 remap_table[old VtsID] = new VtsID.
3129 * if present, set remap_table[old VtsID] = new VtsID, where
3130 new VtsID was determined by the tree lookup. Then free up
3131 the clone.
3134 UWord nBeforePruning = 0, nAfterPruning = 0;
3135 UWord nSTSsBefore = 0, nSTSsAfter = 0;
3136 VtsID new_VtsID_ctr = 0;
3138 for (i = 0; i < nTab; i++) {
3140 /* For each old VTS .. */
3141 VtsTE* old_te = VG_(indexXA)( vts_tab, i );
3142 VTS* old_vts = old_te->vts;
3144 /* Skip it if not in use */
3145 if (old_te->rc == 0) {
3146 tl_assert(old_vts == NULL);
3147 continue;
3149 tl_assert(old_te->u.remap == VtsID_INVALID);
3150 tl_assert(old_vts != NULL);
3151 tl_assert(old_vts->id == i);
3152 tl_assert(old_vts->ts != NULL);
3154 /* It is in use. Make a pruned version. */
3155 nBeforePruning++;
3156 nSTSsBefore += old_vts->usedTS;
3157 VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts",
3158 old_vts, verydead_thread_table_not_pruned);
3159 tl_assert(new_vts->sizeTS == new_vts->usedTS);
3160 tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS])
3161 == 0x0ddC0ffeeBadF00dULL);
3163 /* Get rid of the old VTS and the tree entry. It's a bit more
3164 complex to incrementally delete the VTSs now than to nuke
3165 them all after we're done, but the upside is that we don't
3166 wind up temporarily storing potentially two complete copies
3167 of each VTS and hence spiking memory use. */
3168 UWord oldK = 0, oldV = 12345;
3169 Bool present = VG_(delFromFM)( vts_set,
3170 &oldK, &oldV, (UWord)old_vts );
3171 tl_assert(present); /* else it isn't in vts_set ?! */
3172 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
3173 tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */
3174 /* now free the VTS itself */
3175 VTS__delete(old_vts);
3176 old_te->vts = NULL;
3177 old_vts = NULL;
3179 /* NO MENTIONS of old_vts allowed beyond this point. */
3181 /* Ok, we have the pruned copy in new_vts. See if a
3182 structurally identical version is already present in new_set.
3183 If so, delete the one we just made and move on; if not, add
3184 it. */
3185 VTS* identical_version = NULL;
3186 UWord valW = 12345;
3187 if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW,
3188 (UWord)new_vts)) {
3189 // already have it
3190 tl_assert(valW == 0);
3191 tl_assert(identical_version != NULL);
3192 tl_assert(identical_version != new_vts);
3193 VTS__delete(new_vts);
3194 new_vts = identical_version;
3195 tl_assert(new_vts->id != VtsID_INVALID);
3196 } else {
3197 tl_assert(valW == 12345);
3198 tl_assert(identical_version == NULL);
3199 new_vts->id = new_VtsID_ctr++;
3200 Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0);
3201 tl_assert(!b);
3202 VtsTE new_te;
3203 new_te.vts = new_vts;
3204 new_te.rc = 0;
3205 new_te.u.freelink = VtsID_INVALID;
3206 Word j = VG_(addToXA)( new_tab, &new_te );
3207 tl_assert(j <= i);
3208 tl_assert(j == new_VtsID_ctr - 1);
3209 // stats
3210 nAfterPruning++;
3211 nSTSsAfter += new_vts->usedTS;
3213 old_te->u.remap = new_vts->id;
3215 } /* for (i = 0; i < nTab; i++) */
3217 /* Move very dead thread from verydead_thread_table_not_pruned to
3218 verydead_thread_table. Sort and check verydead_thread_table
3219 to verify a thread was reported very dead only once. */
3221 UWord nBT = VG_(sizeXA)( verydead_thread_table_not_pruned);
3223 for (i = 0; i < nBT; i++) {
3224 ThrID thrid =
3225 *(ThrID*)VG_(indexXA)( verydead_thread_table_not_pruned, i );
3226 VG_(addToXA)( verydead_thread_table, &thrid );
3228 verydead_thread_table_sort_and_check (verydead_thread_table);
3229 VG_(dropHeadXA) (verydead_thread_table_not_pruned, nBT);
3232 /* At this point, we have:
3233 * the old VTS table, with its u.remap entries set,
3234 and with all .vts == NULL.
3235 * the old VTS tree should be empty, since it and the old VTSs
3236 it contained have been incrementally deleted was we worked
3237 through the old table.
3238 * the new VTS table, with all .rc == 0, all u.freelink and u.remap
3239 == VtsID_INVALID.
3240 * the new VTS tree.
3242 tl_assert( VG_(sizeFM)(vts_set) == 0 );
3244 /* Now actually apply the mapping. */
3245 /* Visit all the VtsIDs in the entire system. Where do we expect
3246 to find them?
3247 (a) in shadow memory -- the LineZs and LineFs
3248 (b) in our collection of struct _Thrs.
3249 (c) in our collection of struct _SOs.
3250 Nowhere else, AFAICS. Not in the zsm cache, because that just
3251 got invalidated.
3253 Using the u.remap fields in vts_tab, map each old VtsID to a new
3254 VtsID. For each old VtsID, dec its rc; and for each new one,
3255 inc it. This sets up the new refcounts, and it also gives a
3256 cheap sanity check of the old ones: all old refcounts should be
3257 zero after this operation.
3260 /* Do the mappings for (a) above: iterate over the Primary shadow
3261 mem map (WordFM Addr SecMap*). */
3262 UWord secmapW = 0;
3263 VG_(initIterFM)( map_shmem );
3264 while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) {
3265 UWord j;
3266 SecMap* sm = (SecMap*)secmapW;
3267 tl_assert(sm->magic == SecMap_MAGIC);
3268 /* Deal with the LineZs */
3269 for (i = 0; i < N_SECMAP_ZLINES; i++) {
3270 LineZ* lineZ = &sm->linesZ[i];
3271 if (lineZ->dict[0] != SVal_INVALID) {
3272 for (j = 0; j < 4; j++)
3273 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]);
3274 } else {
3275 LineF* lineF = SVal2Ptr (lineZ->dict[1]);
3276 for (j = 0; j < N_LINE_ARANGE; j++)
3277 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]);
3281 VG_(doneIterFM)( map_shmem );
3283 /* Do the mappings for (b) above: visit our collection of struct
3284 _Thrs. */
3285 Thread* hgthread = get_admin_threads();
3286 tl_assert(hgthread);
3287 while (hgthread) {
3288 Thr* hbthr = hgthread->hbthr;
3289 tl_assert(hbthr);
3290 /* Threads that are listed in the prunable set have their viR
3291 and viW set to VtsID_INVALID, so we can't mess with them. */
3292 if (hbthr->llexit_done && hbthr->joinedwith_done) {
3293 tl_assert(hbthr->viR == VtsID_INVALID);
3294 tl_assert(hbthr->viW == VtsID_INVALID);
3295 hgthread = hgthread->admin;
3296 continue;
3298 remap_VtsID( vts_tab, new_tab, &hbthr->viR );
3299 remap_VtsID( vts_tab, new_tab, &hbthr->viW );
3300 hgthread = hgthread->admin;
3303 /* Do the mappings for (c) above: visit the struct _SOs. */
3304 SO* so = admin_SO;
3305 while (so) {
3306 if (so->viR != VtsID_INVALID)
3307 remap_VtsID( vts_tab, new_tab, &so->viR );
3308 if (so->viW != VtsID_INVALID)
3309 remap_VtsID( vts_tab, new_tab, &so->viW );
3310 so = so->admin_next;
3313 /* So, we're nearly done (with this incredibly complex operation).
3314 Check the refcounts for the old VtsIDs all fell to zero, as
3315 expected. Any failure is serious. */
3316 for (i = 0; i < nTab; i++) {
3317 VtsTE* te = VG_(indexXA)( vts_tab, i );
3318 tl_assert(te->vts == NULL);
3319 /* This is the assert proper. Note we're also asserting
3320 zeroness for old entries which are unmapped. That's OK. */
3321 tl_assert(te->rc == 0);
3324 /* Install the new table and set. */
3325 VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/);
3326 vts_set = new_set;
3327 VG_(deleteXA)( vts_tab );
3328 vts_tab = new_tab;
3330 /* The freelist of vts_tab entries is empty now, because we've
3331 compacted all of the live entries at the low end of the
3332 table. */
3333 vts_tab_freelist = VtsID_INVALID;
3335 /* Sanity check vts_set and vts_tab. */
3337 /* Because all the live entries got slid down to the bottom of vts_tab: */
3338 tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set ));
3340 /* Assert that the vts_tab and vts_set entries point at each other
3341 in the required way */
3342 UWord wordK = 0, wordV = 0;
3343 VG_(initIterFM)( vts_set );
3344 while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) {
3345 tl_assert(wordK != 0);
3346 tl_assert(wordV == 0);
3347 VTS* vts = (VTS*)wordK;
3348 tl_assert(vts->id != VtsID_INVALID);
3349 VtsTE* te = VG_(indexXA)( vts_tab, vts->id );
3350 tl_assert(te->vts == vts);
3352 VG_(doneIterFM)( vts_set );
3354 /* Also iterate over the table, and check each entry is
3355 plausible. */
3356 nTab = VG_(sizeXA)( vts_tab );
3357 for (i = 0; i < nTab; i++) {
3358 VtsTE* te = VG_(indexXA)( vts_tab, i );
3359 tl_assert(te->vts);
3360 tl_assert(te->vts->id == i);
3361 tl_assert(te->rc > 0); /* 'cos we just GC'd */
3362 tl_assert(te->u.freelink == VtsID_INVALID); /* in use */
3363 /* value of te->u.remap not relevant */
3366 /* And we're done. Bwahahaha. Ha. Ha. Ha. */
3367 stats__vts_pruning++;
3368 if (VG_(clo_stats)) {
3369 tl_assert(nTab > 0);
3370 VG_(message)(
3371 Vg_DebugMsg,
3372 "libhb: VTS PR: #%lu before %lu (avg sz %lu) "
3373 "after %lu (avg sz %lu)\n",
3374 stats__vts_pruning,
3375 nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1),
3376 nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1)
3379 /* ---------- END VTS PRUNING ---------- */
3383 /////////////////////////////////////////////////////////
3384 // //
3385 // Vts IDs //
3386 // //
3387 /////////////////////////////////////////////////////////
3389 //////////////////////////
3390 /* A temporary, max-sized VTS which is used as a temporary (the first
3391 argument) in VTS__singleton, VTS__tick and VTS__join operations. */
3392 static VTS* temp_max_sized_VTS = NULL;
3394 //////////////////////////
3395 static ULong stats__cmpLEQ_queries = 0;
3396 static ULong stats__cmpLEQ_misses = 0;
3397 static ULong stats__join2_queries = 0;
3398 static ULong stats__join2_misses = 0;
3400 static inline UInt ROL32 ( UInt w, Int n ) {
3401 w = (w << n) | (w >> (32-n));
3402 return w;
3404 static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
3405 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
3406 return hash % nTab;
3409 #define N_CMPLEQ_CACHE 1023
3410 static
3411 struct { VtsID vi1; VtsID vi2; Bool leq; }
3412 cmpLEQ_cache[N_CMPLEQ_CACHE];
3414 #define N_JOIN2_CACHE 1023
3415 static
3416 struct { VtsID vi1; VtsID vi2; VtsID res; }
3417 join2_cache[N_JOIN2_CACHE];
3419 static void VtsID__invalidate_caches ( void ) {
3420 Int i;
3421 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
3422 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
3423 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
3424 cmpLEQ_cache[i].leq = False;
3426 for (i = 0; i < N_JOIN2_CACHE; i++) {
3427 join2_cache[i].vi1 = VtsID_INVALID;
3428 join2_cache[i].vi2 = VtsID_INVALID;
3429 join2_cache[i].res = VtsID_INVALID;
3432 //////////////////////////
3434 //static Bool VtsID__is_valid ( VtsID vi ) {
3435 // VtsTE* ve;
3436 // if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
3437 // return False;
3438 // ve = VG_(indexXA)( vts_tab, vi );
3439 // if (!ve->vts)
3440 // return False;
3441 // tl_assert(ve->vts->id == vi);
3442 // return True;
3445 static VTS* VtsID__to_VTS ( VtsID vi ) {
3446 VtsTE* te = VG_(indexXA)( vts_tab, vi );
3447 tl_assert(te->vts);
3448 return te->vts;
3451 static void VtsID__pp ( VtsID vi ) {
3452 VTS* vts = VtsID__to_VTS(vi);
3453 VTS__show( vts );
3456 /* compute partial ordering relation of vi1 and vi2. */
3457 __attribute__((noinline))
3458 static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
3459 UInt hash;
3460 Bool leq;
3461 VTS *v1, *v2;
3462 //if (vi1 == vi2) return True;
3463 tl_assert(vi1 != vi2);
3464 ////++
3465 stats__cmpLEQ_queries++;
3466 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
3467 if (cmpLEQ_cache[hash].vi1 == vi1
3468 && cmpLEQ_cache[hash].vi2 == vi2)
3469 return cmpLEQ_cache[hash].leq;
3470 stats__cmpLEQ_misses++;
3471 ////--
3472 v1 = VtsID__to_VTS(vi1);
3473 v2 = VtsID__to_VTS(vi2);
3474 leq = VTS__cmpLEQ( v1, v2 ) == 0;
3475 ////++
3476 cmpLEQ_cache[hash].vi1 = vi1;
3477 cmpLEQ_cache[hash].vi2 = vi2;
3478 cmpLEQ_cache[hash].leq = leq;
3479 ////--
3480 return leq;
3482 static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
3483 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
3486 /* compute binary join */
3487 __attribute__((noinline))
3488 static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
3489 UInt hash;
3490 VtsID res;
3491 VTS *vts1, *vts2;
3492 //if (vi1 == vi2) return vi1;
3493 tl_assert(vi1 != vi2);
3494 ////++
3495 stats__join2_queries++;
3496 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
3497 if (join2_cache[hash].vi1 == vi1
3498 && join2_cache[hash].vi2 == vi2)
3499 return join2_cache[hash].res;
3500 stats__join2_misses++;
3501 ////--
3502 vts1 = VtsID__to_VTS(vi1);
3503 vts2 = VtsID__to_VTS(vi2);
3504 temp_max_sized_VTS->usedTS = 0;
3505 VTS__join(temp_max_sized_VTS, vts1,vts2);
3506 res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3507 ////++
3508 join2_cache[hash].vi1 = vi1;
3509 join2_cache[hash].vi2 = vi2;
3510 join2_cache[hash].res = res;
3511 ////--
3512 return res;
3514 static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
3515 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
3518 /* create a singleton VTS, namely [thr:1] */
3519 static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
3520 temp_max_sized_VTS->usedTS = 0;
3521 VTS__singleton(temp_max_sized_VTS, thr,tym);
3522 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3525 /* tick operation, creates value 1 if specified index is absent */
3526 static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
3527 VTS* vts = VtsID__to_VTS(vi);
3528 temp_max_sized_VTS->usedTS = 0;
3529 VTS__tick(temp_max_sized_VTS, idx,vts);
3530 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3533 /* index into a VTS (only for assertions) */
3534 static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
3535 VTS* vts = VtsID__to_VTS(vi);
3536 return VTS__indexAt_SLOW( vts, idx );
3539 /* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
3540 any, really) element in vi1 which is pointwise greater-than the
3541 corresponding element in vi2. If no such element exists, return
3542 NULL. This needs to be fairly quick since it is called every time
3543 a race is detected. */
3544 static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
3546 VTS *vts1, *vts2;
3547 Thr* diffthr;
3548 ThrID diffthrid;
3549 tl_assert(vi1 != vi2);
3550 vts1 = VtsID__to_VTS(vi1);
3551 vts2 = VtsID__to_VTS(vi2);
3552 tl_assert(vts1 != vts2);
3553 diffthrid = VTS__cmpLEQ(vts1, vts2);
3554 diffthr = Thr__from_ThrID(diffthrid);
3555 tl_assert(diffthr); /* else they are LEQ ! */
3556 return diffthr;
3560 /////////////////////////////////////////////////////////
3561 // //
3562 // Filters //
3563 // //
3564 /////////////////////////////////////////////////////////
3566 /* Forget everything we know -- clear the filter and let everything
3567 through. This needs to be as fast as possible, since it is called
3568 every time the running thread changes, and every time a thread's
3569 vector clocks change, which can be quite frequent. The obvious
3570 fast way to do this is simply to stuff in tags which we know are
3571 not going to match anything, since they're not aligned to the start
3572 of a line. */
3573 static void Filter__clear ( Filter* fi, const HChar* who )
3575 UWord i;
3576 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
3577 for (i = 0; i < FI_NUM_LINES; i += 8) {
3578 fi->tags[i+0] = 1; /* impossible value -- cannot match */
3579 fi->tags[i+1] = 1;
3580 fi->tags[i+2] = 1;
3581 fi->tags[i+3] = 1;
3582 fi->tags[i+4] = 1;
3583 fi->tags[i+5] = 1;
3584 fi->tags[i+6] = 1;
3585 fi->tags[i+7] = 1;
3587 tl_assert(i == FI_NUM_LINES);
3590 /* Clearing an arbitrary range in the filter. Unfortunately
3591 we have to do this due to core-supplied new/die-mem events. */
3593 static void Filter__clear_1byte ( Filter* fi, Addr a )
3595 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3596 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3597 FiLine* line = &fi->lines[lineno];
3598 UWord loff = (a - atag) / 8;
3599 UShort mask = 0x3 << (2 * (a & 7));
3600 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3601 if (LIKELY( fi->tags[lineno] == atag )) {
3602 /* hit. clear the bits. */
3603 UShort u16 = line->u16s[loff];
3604 line->u16s[loff] = u16 & ~mask; /* clear them */
3605 } else {
3606 /* miss. The filter doesn't hold this address, so ignore. */
3610 static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
3612 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3613 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3614 FiLine* line = &fi->lines[lineno];
3615 UWord loff = (a - atag) / 8;
3616 if (LIKELY( fi->tags[lineno] == atag )) {
3617 line->u16s[loff] = 0;
3618 } else {
3619 /* miss. The filter doesn't hold this address, so ignore. */
3623 /* Only used to verify the fast Filter__clear_range */
3624 __attribute__((unused))
3625 static void Filter__clear_range_SLOW ( Filter* fi, Addr a, UWord len )
3627 tl_assert (CHECK_ZSM);
3629 /* slowly do part preceding 8-alignment */
3630 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
3631 Filter__clear_1byte( fi, a );
3632 a++;
3633 len--;
3635 /* vector loop */
3636 while (len >= 8) {
3637 Filter__clear_8bytes_aligned( fi, a );
3638 a += 8;
3639 len -= 8;
3641 /* slowly do tail */
3642 while (UNLIKELY(len > 0)) {
3643 Filter__clear_1byte( fi, a );
3644 a++;
3645 len--;
3649 static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
3651 # if CHECK_ZSM > 0
3652 /* We check the below more complex algorithm with the simple one.
3653 This check is very expensive : we do first the slow way on a
3654 copy of the data, then do it the fast way. On RETURN, we check
3655 the two values are equal. */
3656 Filter fi_check = *fi;
3657 Filter__clear_range_SLOW(&fi_check, a, len);
3658 # define RETURN goto check_and_return
3659 # else
3660 # define RETURN return
3661 # endif
3663 Addr begtag = FI_GET_TAG(a); /* tag of range begin */
3665 Addr end = a + len - 1;
3666 Addr endtag = FI_GET_TAG(end); /* tag of range end. */
3668 UWord rlen = len; /* remaining length to clear */
3670 Addr c = a; /* Current position we are clearing. */
3671 UWord clineno = FI_GET_LINENO(c); /* Current lineno we are clearing */
3672 FiLine* cline; /* Current line we are clearing */
3673 UWord cloff; /* Current offset in line we are clearing, when clearing
3674 partial lines. */
3676 UShort u16;
3678 STATIC_ASSERT (FI_LINE_SZB == 32);
3679 // Below assumes filter lines are 32 bytes
3681 if (LIKELY(fi->tags[clineno] == begtag)) {
3682 /* LIKELY for the heavy caller VG_(unknown_SP_update). */
3683 /* First filter line matches begtag.
3684 If c is not at the filter line begin, the below will clear
3685 the filter line bytes starting from c. */
3686 cline = &fi->lines[clineno];
3687 cloff = (c - begtag) / 8;
3689 /* First the byte(s) needed to reach 8-alignment */
3690 if (UNLIKELY(!VG_IS_8_ALIGNED(c))) {
3691 /* hiB is the nr of bytes (higher addresses) from c to reach
3692 8-aligment. */
3693 UWord hiB = 8 - (c & 7);
3694 /* Compute 2-bit/byte mask representing hiB bytes [c..c+hiB[
3695 mask is C000 , F000, FC00, FF00, FFC0, FFF0 or FFFC for the byte
3696 range 7..7 6..7 5..7 4..7 3..7 2..7 1..7 */
3697 UShort mask = 0xFFFF << (16 - 2*hiB);
3699 u16 = cline->u16s[cloff];
3700 if (LIKELY(rlen >= hiB)) {
3701 cline->u16s[cloff] = u16 & ~mask; /* clear all hiB from c */
3702 rlen -= hiB;
3703 c += hiB;
3704 cloff += 1;
3705 } else {
3706 /* Only have the bits for rlen bytes bytes. */
3707 mask = mask & ~(0xFFFF << (16 - 2*(hiB-rlen)));
3708 cline->u16s[cloff] = u16 & ~mask; /* clear rlen bytes from c. */
3709 RETURN; // We have cleared all what we can.
3712 /* c is now 8 aligned. Clear by 8 aligned bytes,
3713 till c is filter-line aligned */
3714 while (!VG_IS_32_ALIGNED(c) && rlen >= 8) {
3715 cline->u16s[cloff] = 0;
3716 c += 8;
3717 rlen -= 8;
3718 cloff += 1;
3720 } else {
3721 c = begtag + FI_LINE_SZB;
3722 if (c > end)
3723 RETURN; // We have cleared all what we can.
3724 rlen -= c - a;
3726 // We have changed c, so re-establish clineno.
3727 clineno = FI_GET_LINENO(c);
3729 if (rlen >= FI_LINE_SZB) {
3730 /* Here, c is filter line-aligned. Clear all full lines that
3731 overlap with the range starting at c, made of a full lines */
3732 UWord nfull = rlen / FI_LINE_SZB;
3733 UWord full_len = nfull * FI_LINE_SZB;
3734 rlen -= full_len;
3735 if (nfull > FI_NUM_LINES)
3736 nfull = FI_NUM_LINES; // no need to check several times the same entry.
3738 for (UWord n = 0; n < nfull; n++) {
3739 if (UNLIKELY(address_in_range(fi->tags[clineno], c, full_len))) {
3740 cline = &fi->lines[clineno];
3741 cline->u16s[0] = 0;
3742 cline->u16s[1] = 0;
3743 cline->u16s[2] = 0;
3744 cline->u16s[3] = 0;
3745 STATIC_ASSERT (4 == sizeof(cline->u16s)/sizeof(cline->u16s[0]));
3747 clineno++;
3748 if (UNLIKELY(clineno == FI_NUM_LINES))
3749 clineno = 0;
3752 c += full_len;
3753 clineno = FI_GET_LINENO(c);
3756 if (CHECK_ZSM) {
3757 tl_assert(VG_IS_8_ALIGNED(c));
3758 tl_assert(clineno == FI_GET_LINENO(c));
3761 /* Do the last filter line, if it was not cleared as a full filter line */
3762 if (UNLIKELY(rlen > 0) && fi->tags[clineno] == endtag) {
3763 cline = &fi->lines[clineno];
3764 cloff = (c - endtag) / 8;
3765 if (CHECK_ZSM) tl_assert(FI_GET_TAG(c) == endtag);
3767 /* c is 8 aligned. Clear by 8 aligned bytes, till we have less than
3768 8 bytes. */
3769 while (rlen >= 8) {
3770 cline->u16s[cloff] = 0;
3771 c += 8;
3772 rlen -= 8;
3773 cloff += 1;
3775 /* Then the remaining byte(s) */
3776 if (rlen > 0) {
3777 /* nr of bytes from c to reach end. */
3778 UWord loB = rlen;
3779 /* Compute mask representing loB bytes [c..c+loB[ :
3780 mask is 0003, 000F, 003F, 00FF, 03FF, 0FFF or 3FFF */
3781 UShort mask = 0xFFFF >> (16 - 2*loB);
3783 u16 = cline->u16s[cloff];
3784 cline->u16s[cloff] = u16 & ~mask; /* clear all loB from c */
3788 # if CHECK_ZSM > 0
3789 check_and_return:
3790 tl_assert (VG_(memcmp)(&fi_check, fi, sizeof(fi_check)) == 0);
3791 # endif
3792 # undef RETURN
3795 /* ------ Read handlers for the filter. ------ */
3797 static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
3799 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3800 return False;
3802 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3803 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3804 FiLine* line = &fi->lines[lineno];
3805 UWord loff = (a - atag) / 8;
3806 UShort mask = 0xAAAA;
3807 if (LIKELY( fi->tags[lineno] == atag )) {
3808 /* hit. check line and update. */
3809 UShort u16 = line->u16s[loff];
3810 Bool ok = (u16 & mask) == mask; /* all R bits set? */
3811 line->u16s[loff] = u16 | mask; /* set them */
3812 return ok;
3813 } else {
3814 /* miss. nuke existing line and re-use it. */
3815 UWord i;
3816 fi->tags[lineno] = atag;
3817 for (i = 0; i < FI_LINE_SZB / 8; i++)
3818 line->u16s[i] = 0;
3819 line->u16s[loff] = mask;
3820 return False;
3825 static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
3827 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3828 return False;
3830 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3831 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3832 FiLine* line = &fi->lines[lineno];
3833 UWord loff = (a - atag) / 8;
3834 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
3835 if (LIKELY( fi->tags[lineno] == atag )) {
3836 /* hit. check line and update. */
3837 UShort u16 = line->u16s[loff];
3838 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
3839 line->u16s[loff] = u16 | mask; /* set them */
3840 return ok;
3841 } else {
3842 /* miss. nuke existing line and re-use it. */
3843 UWord i;
3844 fi->tags[lineno] = atag;
3845 for (i = 0; i < FI_LINE_SZB / 8; i++)
3846 line->u16s[i] = 0;
3847 line->u16s[loff] = mask;
3848 return False;
3853 static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
3855 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3856 return False;
3858 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3859 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3860 FiLine* line = &fi->lines[lineno];
3861 UWord loff = (a - atag) / 8;
3862 UShort mask = 0xA << (2 * (a & 6));
3863 /* mask is A000, 0A00, 00A0 or 000A */
3864 if (LIKELY( fi->tags[lineno] == atag )) {
3865 /* hit. check line and update. */
3866 UShort u16 = line->u16s[loff];
3867 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
3868 line->u16s[loff] = u16 | mask; /* set them */
3869 return ok;
3870 } else {
3871 /* miss. nuke existing line and re-use it. */
3872 UWord i;
3873 fi->tags[lineno] = atag;
3874 for (i = 0; i < FI_LINE_SZB / 8; i++)
3875 line->u16s[i] = 0;
3876 line->u16s[loff] = mask;
3877 return False;
3882 static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
3885 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3886 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3887 FiLine* line = &fi->lines[lineno];
3888 UWord loff = (a - atag) / 8;
3889 UShort mask = 0x2 << (2 * (a & 7));
3890 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
3891 if (LIKELY( fi->tags[lineno] == atag )) {
3892 /* hit. check line and update. */
3893 UShort u16 = line->u16s[loff];
3894 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3895 line->u16s[loff] = u16 | mask; /* set them */
3896 return ok;
3897 } else {
3898 /* miss. nuke existing line and re-use it. */
3899 UWord i;
3900 fi->tags[lineno] = atag;
3901 for (i = 0; i < FI_LINE_SZB / 8; i++)
3902 line->u16s[i] = 0;
3903 line->u16s[loff] = mask;
3904 return False;
3910 /* ------ Write handlers for the filter. ------ */
3912 static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
3914 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3915 return False;
3917 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3918 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3919 FiLine* line = &fi->lines[lineno];
3920 UWord loff = (a - atag) / 8;
3921 UShort mask = 0xFFFF;
3922 if (LIKELY( fi->tags[lineno] == atag )) {
3923 /* hit. check line and update. */
3924 UShort u16 = line->u16s[loff];
3925 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
3926 line->u16s[loff] = u16 | mask; /* set them */
3927 return ok;
3928 } else {
3929 /* miss. nuke existing line and re-use it. */
3930 UWord i;
3931 fi->tags[lineno] = atag;
3932 for (i = 0; i < FI_LINE_SZB / 8; i++)
3933 line->u16s[i] = 0;
3934 line->u16s[loff] = mask;
3935 return False;
3940 static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
3942 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3943 return False;
3945 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3946 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3947 FiLine* line = &fi->lines[lineno];
3948 UWord loff = (a - atag) / 8;
3949 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
3950 if (LIKELY( fi->tags[lineno] == atag )) {
3951 /* hit. check line and update. */
3952 UShort u16 = line->u16s[loff];
3953 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
3954 line->u16s[loff] = u16 | mask; /* set them */
3955 return ok;
3956 } else {
3957 /* miss. nuke existing line and re-use it. */
3958 UWord i;
3959 fi->tags[lineno] = atag;
3960 for (i = 0; i < FI_LINE_SZB / 8; i++)
3961 line->u16s[i] = 0;
3962 line->u16s[loff] = mask;
3963 return False;
3968 static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
3970 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3971 return False;
3973 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3974 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3975 FiLine* line = &fi->lines[lineno];
3976 UWord loff = (a - atag) / 8;
3977 UShort mask = 0xF << (2 * (a & 6));
3978 /* mask is F000, 0F00, 00F0 or 000F */
3979 if (LIKELY( fi->tags[lineno] == atag )) {
3980 /* hit. check line and update. */
3981 UShort u16 = line->u16s[loff];
3982 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
3983 line->u16s[loff] = u16 | mask; /* set them */
3984 return ok;
3985 } else {
3986 /* miss. nuke existing line and re-use it. */
3987 UWord i;
3988 fi->tags[lineno] = atag;
3989 for (i = 0; i < FI_LINE_SZB / 8; i++)
3990 line->u16s[i] = 0;
3991 line->u16s[loff] = mask;
3992 return False;
3997 static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
4000 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
4001 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
4002 FiLine* line = &fi->lines[lineno];
4003 UWord loff = (a - atag) / 8;
4004 UShort mask = 0x3 << (2 * (a & 7));
4005 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
4006 if (LIKELY( fi->tags[lineno] == atag )) {
4007 /* hit. check line and update. */
4008 UShort u16 = line->u16s[loff];
4009 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
4010 line->u16s[loff] = u16 | mask; /* set them */
4011 return ok;
4012 } else {
4013 /* miss. nuke existing line and re-use it. */
4014 UWord i;
4015 fi->tags[lineno] = atag;
4016 for (i = 0; i < FI_LINE_SZB / 8; i++)
4017 line->u16s[i] = 0;
4018 line->u16s[loff] = mask;
4019 return False;
4025 /////////////////////////////////////////////////////////
4026 // //
4027 // Threads //
4028 // //
4029 /////////////////////////////////////////////////////////
4031 /* Maps ThrID values to their Thr*s (which contain ThrID values that
4032 should point back to the relevant slot in the array. Lowest
4033 numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
4034 static XArray* /* of Thr* */ thrid_to_thr_map = NULL;
4036 /* And a counter to dole out ThrID values. For rationale/background,
4037 see comments on definition of ScalarTS (far) above. */
4038 static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
4040 static ThrID Thr__to_ThrID ( Thr* thr ) {
4041 return thr->thrid;
4043 static Thr* Thr__from_ThrID ( UInt thrid ) {
4044 Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
4045 tl_assert(thr->thrid == thrid);
4046 return thr;
4049 /* True if the cached rcec for thr is valid and can be used to build the
4050 current stack trace just by changing the last frame to the current IP. */
4051 static inline Bool cached_rcec_valid(Thr *thr)
4053 UWord cached_stackvalid = VG_(get_SP_s1) (thr->hgthread->coretid);
4054 return cached_stackvalid != 0;
4056 /* Set the validity of the cached rcec of thr. */
4057 static inline void set_cached_rcec_validity(Thr *thr, Bool valid)
4059 VG_(set_SP_s1) (thr->hgthread->coretid, valid);
4062 static Thr* Thr__new ( void )
4064 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
4065 thr->viR = VtsID_INVALID;
4066 thr->viW = VtsID_INVALID;
4067 thr->llexit_done = False;
4068 thr->joinedwith_done = False;
4069 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
4070 if (HG_(clo_history_level) == 1)
4071 thr->local_Kws_n_stacks
4072 = VG_(newXA)( HG_(zalloc),
4073 "libhb.Thr__new.3 (local_Kws_and_stacks)",
4074 HG_(free), sizeof(ULong_n_EC) );
4075 /* Make an 'empty' cached rcec in thr. */
4076 thr->cached_rcec.magic = RCEC_MAGIC;
4077 thr->cached_rcec.rc = 0;
4078 thr->cached_rcec.rcX = 0;
4079 thr->cached_rcec.next = NULL;
4081 /* Add this Thr* <-> ThrID binding to the mapping, and
4082 cross-check */
4083 if (!thrid_to_thr_map) {
4084 thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
4085 HG_(free), sizeof(Thr*) );
4088 if (thrid_counter >= ThrID_MAX_VALID) {
4089 /* We're hosed. We have to stop. */
4090 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
4093 thr->thrid = thrid_counter++;
4094 Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
4095 tl_assert(ix + 1024 == thr->thrid);
4097 return thr;
4100 static void note_local_Kw_n_stack_for ( Thr* thr )
4102 Word nPresent;
4103 ULong_n_EC pair;
4104 tl_assert(thr);
4106 // We only collect this info at history level 1 (approx)
4107 if (HG_(clo_history_level) != 1)
4108 return;
4110 /* This is the scalar Kw for thr. */
4111 pair.ull = VtsID__indexAt( thr->viW, thr );
4112 pair.ec = main_get_EC( thr );
4113 tl_assert(pair.ec);
4114 tl_assert(thr->local_Kws_n_stacks);
4116 /* check that we're not adding duplicates */
4117 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
4119 /* Throw away old stacks, if necessary. We can't accumulate stuff
4120 indefinitely. */
4121 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
4122 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
4123 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
4124 if (0)
4125 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n",
4126 thr, pair.ull, pair.ec );
4129 if (nPresent > 0) {
4130 ULong_n_EC* prevPair
4131 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
4132 tl_assert( prevPair->ull <= pair.ull );
4135 if (nPresent == 0)
4136 pair.ec = NULL;
4138 VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
4140 if (0)
4141 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n",
4142 thr, pair.ull, pair.ec );
4143 if (0)
4144 VG_(pp_ExeContext)(pair.ec);
4147 static Int cmp__ULong_n_EC__by_ULong ( const ULong_n_EC* pair1,
4148 const ULong_n_EC* pair2 )
4150 if (pair1->ull < pair2->ull) return -1;
4151 if (pair1->ull > pair2->ull) return 1;
4152 return 0;
4156 /////////////////////////////////////////////////////////
4157 // //
4158 // Shadow Values //
4159 // //
4160 /////////////////////////////////////////////////////////
4162 // type SVal, SVal_INVALID and SVal_NOACCESS are defined by
4163 // hb_zsm.h. We have to do everything else here.
4165 /* SVal is 64 bit unsigned int.
4167 <---------30---------> <---------30--------->
4168 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
4169 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
4170 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
4173 #define SVAL_TAGMASK (3ULL << 62)
4175 static inline Bool SVal__isC ( SVal s ) {
4176 return (0ULL << 62) == (s & SVAL_TAGMASK);
4178 static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
4179 //tl_assert(VtsID__is_valid(rmini));
4180 //tl_assert(VtsID__is_valid(wmini));
4181 return (((ULong)rmini) << 32) | ((ULong)wmini);
4183 static inline VtsID SVal__unC_Rmin ( SVal s ) {
4184 tl_assert(SVal__isC(s));
4185 return (VtsID)(s >> 32);
4187 static inline VtsID SVal__unC_Wmin ( SVal s ) {
4188 tl_assert(SVal__isC(s));
4189 return (VtsID)(s & 0xFFFFFFFFULL);
4192 static inline Bool SVal__isA ( SVal s ) {
4193 return (2ULL << 62) == (s & SVAL_TAGMASK);
4195 __attribute__((unused))
4196 static inline SVal SVal__mkA ( void ) {
4197 return 2ULL << 62;
4200 /* Direct callback from lib_zsm. */
4201 static inline void SVal__rcinc ( SVal s ) {
4202 if (SVal__isC(s)) {
4203 VtsID__rcinc( SVal__unC_Rmin(s) );
4204 VtsID__rcinc( SVal__unC_Wmin(s) );
4208 /* Direct callback from lib_zsm. */
4209 static inline void SVal__rcdec ( SVal s ) {
4210 if (SVal__isC(s)) {
4211 VtsID__rcdec( SVal__unC_Rmin(s) );
4212 VtsID__rcdec( SVal__unC_Wmin(s) );
4216 static inline void *SVal2Ptr (SVal s)
4218 return (void*)(UWord)s;
4221 static inline SVal Ptr2SVal (void* ptr)
4223 return (SVal)(UWord)ptr;
4228 /////////////////////////////////////////////////////////
4229 // //
4230 // Change-event map2 //
4231 // //
4232 /////////////////////////////////////////////////////////
4234 /* This is in two parts:
4236 1. A hash table of RCECs. This is a set of reference-counted stack
4237 traces. When the reference count of a stack trace becomes zero,
4238 it is removed from the set and freed up. The intent is to have
4239 a set of stack traces which can be referred to from (2), but to
4240 only represent each one once. The set is indexed/searched by
4241 ordering on the stack trace vectors.
4243 2. A Hash table of OldRefs. These store information about each old
4244 ref that we need to record. Hash table key is the address of the
4245 location for which the information is recorded. For LRU
4246 purposes, each OldRef in the hash table is also on a doubly
4247 linked list maintaining the order in which the OldRef were most
4248 recently accessed.
4249 Each OldRef also maintains the stamp at which it was last accessed.
4250 With these stamps, we can quickly check which of 2 OldRef is the
4251 'newest', without having to scan the full list of LRU OldRef.
4253 The important part of an OldRef is, however, its acc component.
4254 This binds a TSW triple (thread, size, R/W) to an RCEC.
4256 We allocate a maximum of VG_(clo_conflict_cache_size) OldRef.
4257 Then we do exact LRU discarding. For each discarded OldRef we must
4258 of course decrement the reference count on the RCEC it
4259 refers to, in order that entries from (1) eventually get
4260 discarded too.
4263 static UWord stats__evm__lookup_found = 0;
4264 static UWord stats__evm__lookup_notfound = 0;
4266 static UWord stats__ctxt_eq_tsw_eq_rcec = 0;
4267 static UWord stats__ctxt_eq_tsw_neq_rcec = 0;
4268 static UWord stats__ctxt_neq_tsw_neq_rcec = 0;
4269 static UWord stats__ctxt_rcdec_calls = 0;
4270 static UWord stats__ctxt_rcec_gc_discards = 0;
4272 static UWord stats__ctxt_tab_curr = 0;
4273 static UWord stats__ctxt_tab_max = 0;
4275 static UWord stats__ctxt_tab_qs = 0;
4276 static UWord stats__ctxt_tab_cmps = 0;
4279 ///////////////////////////////////////////////////////
4280 //// Part (1): A hash table of RCECs
4283 //#define N_RCEC_TAB 98317 /* prime */
4284 #define N_RCEC_TAB 196613 /* prime */
4286 //////////// BEGIN RCEC pool allocator
4287 static PoolAlloc* rcec_pool_allocator;
4288 static RCEC* alloc_RCEC ( void ) {
4289 return VG_(allocEltPA) ( rcec_pool_allocator );
4292 static void free_RCEC ( RCEC* rcec ) {
4293 tl_assert(rcec->magic == RCEC_MAGIC);
4294 VG_(freeEltPA)( rcec_pool_allocator, rcec );
4296 //////////// END RCEC pool allocator
4298 static RCEC** contextTab = NULL; /* hash table of RCEC*s */
4300 /* Count of allocated RCEC having ref count > 0 */
4301 static UWord RCEC_referenced = 0;
4303 /* True if the frames of ec1 and ec2 are different. */
4304 static Bool RCEC__differs_by_frames ( RCEC* ec1, RCEC* ec2 ) {
4305 Word i;
4306 if (CHECK_CEM) {
4307 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
4308 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
4310 if (ec1->frames_hash != ec2->frames_hash) return True;
4311 for (i = 0; i < N_FRAMES; i++) {
4312 if (ec1->frames[i] != ec2->frames[i]) return True;
4314 return False;
4317 /* Dec the ref of this RCEC. */
4318 static void ctxt__rcdec ( RCEC* ec )
4320 stats__ctxt_rcdec_calls++;
4321 if (CHECK_CEM)
4322 tl_assert(ec && ec->magic == RCEC_MAGIC);
4323 tl_assert(ec->rc > 0);
4324 ec->rc--;
4325 if (ec->rc == 0)
4326 RCEC_referenced--;
4329 static void ctxt__rcinc ( RCEC* ec )
4331 if (CHECK_CEM)
4332 tl_assert(ec && ec->magic == RCEC_MAGIC);
4333 if (ec->rc == 0)
4334 RCEC_referenced++;
4335 ec->rc++;
4339 /* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
4340 move it one step closer to the front of the list, so as to make
4341 subsequent searches for it cheaper. */
4342 static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
4344 RCEC *ec0, *ec1, *ec2;
4345 if (ec == *headp)
4346 tl_assert(0); /* already at head of list */
4347 tl_assert(ec != NULL);
4348 ec0 = *headp;
4349 ec1 = NULL;
4350 ec2 = NULL;
4351 while (True) {
4352 if (ec0 == NULL || ec0 == ec) break;
4353 ec2 = ec1;
4354 ec1 = ec0;
4355 ec0 = ec0->next;
4357 tl_assert(ec0 == ec);
4358 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
4359 RCEC* tmp;
4360 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
4361 predecessor. Swap ec0 and ec1, that is, move ec0 one step
4362 closer to the start of the list. */
4363 tl_assert(ec2->next == ec1);
4364 tl_assert(ec1->next == ec0);
4365 tmp = ec0->next;
4366 ec2->next = ec0;
4367 ec0->next = ec1;
4368 ec1->next = tmp;
4370 else
4371 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
4372 /* it's second in the list. */
4373 tl_assert(*headp == ec1);
4374 tl_assert(ec1->next == ec0);
4375 ec1->next = ec0->next;
4376 ec0->next = ec1;
4377 *headp = ec0;
4382 /* Find the given RCEC in the tree, and return a pointer to it. Or,
4383 if not present, add the given one to the tree (by making a copy of
4384 it, so the caller can immediately deallocate the original) and
4385 return a pointer to the copy. The caller can safely have 'example'
4386 on its stack, since we will always return a pointer to a copy of
4387 it, not to the original. Note that the inserted node will have .rc
4388 of zero and so the caller must immediately increment it. */
4389 __attribute__((noinline))
4390 static RCEC* ctxt__find_or_add ( RCEC* example )
4392 UWord hent;
4393 RCEC* copy;
4395 if (CHECK_CEM) {
4396 /* Note that the single caller of ctxt__find_or_add always provides
4397 &thr->cached_rcec as argument. The sanity of thr->cached_rcec is always
4398 checked with a thread terminates. */
4399 tl_assert(example && example->magic == RCEC_MAGIC);
4400 tl_assert(example->rc == 0);
4403 /* Search the hash table to see if we already have it. */
4404 stats__ctxt_tab_qs++;
4405 hent = example->frames_hash % N_RCEC_TAB;
4406 copy = contextTab[hent];
4407 while (1) {
4408 if (!copy) break;
4409 if (CHECK_CEM)
4410 tl_assert(copy->magic == RCEC_MAGIC);
4411 stats__ctxt_tab_cmps++;
4412 if (!RCEC__differs_by_frames(copy, example)) break;
4413 copy = copy->next;
4416 if (copy) {
4417 tl_assert(copy != example);
4418 /* optimisation: if it's not at the head of its list, move 1
4419 step fwds, to make future searches cheaper */
4420 if (copy != contextTab[hent]) {
4421 move_RCEC_one_step_forward( &contextTab[hent], copy );
4423 } else {
4424 copy = alloc_RCEC();
4425 tl_assert(copy != example);
4426 *copy = *example;
4427 copy->next = contextTab[hent];
4428 contextTab[hent] = copy;
4429 stats__ctxt_tab_curr++;
4430 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
4431 stats__ctxt_tab_max = stats__ctxt_tab_curr;
4433 return copy;
4436 static inline UWord ROLW ( UWord w, Int n )
4438 Int bpw = 8 * sizeof(UWord);
4439 w = (w << n) | (w >> (bpw-n));
4440 return w;
4443 static UWord stats__cached_rcec_identical = 0;
4444 static UWord stats__cached_rcec_updated = 0;
4445 static UWord stats__cached_rcec_fresh = 0;
4446 static UWord stats__cached_rcec_diff = 0;
4447 static UWord stats__cached_rcec_diff_known_reason = 0;
4449 /* Check if the cached rcec in thr corresponds to the current
4450 stacktrace of the thread. Returns True if ok, False otherwise.
4451 This is just used for debugging the cached rcec logic, activated
4452 using --hg-sanity-flags=xx1xxx i.e. SCE_ACCESS flag.
4453 When this flag is activated, a call to this function will happen each time
4454 a stack trace is needed for a memory access. */
4455 __attribute__((noinline))
4456 static Bool check_cached_rcec_ok (Thr* thr, Addr previous_frame0)
4458 Bool ok = True;
4459 UInt i;
4460 UWord frames[N_FRAMES];
4461 UWord sps[N_FRAMES];
4462 UWord fps[N_FRAMES];
4463 const DiEpoch cur_ep = VG_(current_DiEpoch)();
4465 for (i = 0; i < N_FRAMES; i++)
4466 frames[i] = sps[i] = fps[i] = 0;
4467 VG_(get_StackTrace)( thr->hgthread->coretid, &frames[0], N_FRAMES,
4468 &sps[0], &fps[0], 0);
4469 for (i = 0; i < N_FRAMES; i++) {
4470 if ( thr->cached_rcec.frames[i] != frames[i] ) {
4471 /* There are a bunch of "normal" reasons for which a stack
4472 derived from the cached rcec differs from frames. */
4473 const HChar *reason = NULL;
4475 /* Old linkers (e.g. RHEL5) gave no cfi unwind information in the PLT
4476 section (fix was added in binutils around June 2011).
4477 Without PLT unwind info, stacktrace in the PLT section are
4478 missing an entry. E.g. the cached stacktrace is:
4479 ==4463== at 0x2035C0: ___tls_get_addr (dl-tls.c:753)
4480 ==4463== by 0x33B7F9: __libc_thread_freeres
4481 (in /lib/libc-2.11.2.so)
4482 ==4463== by 0x39BA4F: start_thread (pthread_create.c:307)
4483 ==4463== by 0x2F107D: clone (clone.S:130)
4484 while the 'check stacktrace' is
4485 ==4463== at 0x2035C0: ___tls_get_addr (dl-tls.c:753)
4486 ==4463== by 0x33B82D: strerror_thread_freeres
4487 (in /lib/libc-2.11.2.so)
4488 ==4463== by 0x33B7F9: __libc_thread_freeres
4489 (in /lib/libc-2.11.2.so)
4490 ==4463== by 0x39BA4F: start_thread (pthread_create.c:307)
4491 ==4463== by 0x2F107D: clone (clone.S:130)
4492 No cheap/easy way to detect or fix that. */
4494 /* It seems that sometimes, the CFI unwind info looks wrong
4495 for a 'ret' instruction. E.g. here is the unwind info
4496 for a 'retq' on gcc20 (amd64, Debian 7)
4497 [0x4e3ddfe .. 0x4e3ddfe]: let cfa=oldSP+48 in RA=*(cfa+-8)
4498 SP=cfa+0 BP=*(cfa+-24)
4499 This unwind info looks doubtful, as the RA should be at oldSP.
4500 No easy way to detect this problem.
4501 This gives a difference between cached rcec and
4502 current stack trace: the cached rcec is correct. */
4504 /* When returning from main, unwind info becomes erratic.
4505 So, by default, only report errors for main and above,
4506 unless asked to show below main. */
4507 if (reason == NULL) {
4508 UInt fr_main;
4509 Vg_FnNameKind fr_kind;
4510 for (fr_main = 0; fr_main < N_FRAMES; fr_main++) {
4511 fr_kind = VG_(get_fnname_kind_from_IP)
4512 (cur_ep, frames[fr_main]);
4513 if (fr_kind == Vg_FnNameMain || fr_kind == Vg_FnNameBelowMain)
4514 break;
4516 UInt kh_main;
4517 Vg_FnNameKind kh_kind;
4518 for (kh_main = 0; kh_main < N_FRAMES; kh_main++) {
4519 kh_kind = VG_(get_fnname_kind_from_IP)
4520 (cur_ep, thr->cached_rcec.frames[kh_main]);
4521 if (kh_kind == Vg_FnNameMain || kh_kind == Vg_FnNameBelowMain)
4522 break;
4524 if (kh_main == fr_main
4525 && kh_kind == fr_kind
4526 && (kh_main < i || (kh_main == i
4527 && kh_kind == Vg_FnNameBelowMain))) {
4528 // found main or below main before the difference
4529 reason = "Below main";
4533 /* We have places where the stack is missing some internal
4534 pthread functions. For such stacktraces, GDB reports only
4535 one function, telling:
4536 #0 0xf7fa81fe in _L_unlock_669 ()
4537 from /lib/i386-linux-gnu/libpthread.so.0
4538 Backtrace stopped: previous frame identical to
4539 this frame (corrupt stack?)
4541 This is when sps and fps are identical.
4542 The cached stack trace is then
4543 ==3336== at 0x40641FE: _L_unlock_669
4544 (pthread_mutex_unlock.c:310)
4545 ==3336== by 0x40302BE: pthread_mutex_unlock
4546 (hg_intercepts.c:710)
4547 ==3336== by 0x80486AF: main (cond_timedwait_test.c:14)
4548 while the 'check stacktrace' is
4549 ==3336== at 0x40641FE: _L_unlock_669
4550 (pthread_mutex_unlock.c:310)
4551 ==3336== by 0x4064206: _L_unlock_669
4552 (pthread_mutex_unlock.c:310)
4553 ==3336== by 0x4064132: __pthread_mutex_unlock_usercnt
4554 (pthread_mutex_unlock.c:57)
4555 ==3336== by 0x40302BE: pthread_mutex_unlock
4556 (hg_intercepts.c:710)
4557 ==3336== by 0x80486AF: main (cond_timedwait_test.c:14) */
4558 if (reason == NULL) {
4559 if ((i > 0
4560 && sps[i] == sps[i-1] && fps[i] == fps[i-1])
4561 || (i < N_FRAMES-1
4562 && sps[i] == sps[i+1] && fps[i] == fps[i+1])) {
4563 reason = "previous||next frame: identical sp and fp";
4566 if (reason == NULL) {
4567 if ((i > 0
4568 && fps[i] == fps[i-1])
4569 || (i < N_FRAMES-1
4570 && fps[i] == fps[i+1])) {
4571 reason = "previous||next frame: identical fp";
4575 /* When we have a read or write 'in the middle of a push instruction',
4576 then the normal backtrace is not very good, while the helgrind
4577 stacktrace is better, as it undoes the not yet fully finished
4578 push instruction before getting the stacktrace. */
4579 if (reason == NULL && thr->hgthread->first_sp_delta != 0) {
4580 reason = "fixupSP probably needed for check stacktrace";
4583 /* Unwinding becomes hectic when running the exit handlers.
4584 None of GDB, cached stacktrace and check stacktrace corresponds.
4585 So, if we find __run_exit_handlers, ignore the difference. */
4586 if (reason == NULL) {
4587 const HChar *fnname;
4588 for (UInt f = 0; f < N_FRAMES; f++) {
4589 if (VG_(get_fnname)( cur_ep, frames[f], &fnname)
4590 && VG_(strcmp) ("__run_exit_handlers", fnname) == 0) {
4591 reason = "exit handlers";
4592 break;
4597 // Show what we have found for this difference
4598 if (reason == NULL) {
4599 ok = False;
4600 stats__cached_rcec_diff++;
4601 } else {
4602 ok = True;
4603 stats__cached_rcec_diff_known_reason++;
4605 if (!ok || VG_(clo_verbosity) > 2) {
4606 Bool save_show_below_main = VG_(clo_show_below_main);
4607 VG_(clo_show_below_main) = True;
4608 /* The below error msg reports an unexpected diff in 'frame %d'.
4609 The (maybe wrong) pc found in the cached stacktrace is
4610 'cached_pc %p' while an unwind gives the (maybe wrong)
4611 'check_pc %p'.
4612 After, 'previous_frame0 %p' tells where the cached stacktrace
4613 was taken.
4614 This is then followed by the full resulting cache stack trace
4615 and the full stack trace found doing unwind.
4616 Such a diff can have various origins:
4617 * a bug in the unwinder, when the cached stack trace was taken
4618 at 'previous_frame0'
4619 * a bug in the unwinder, when the check stack trace was taken
4620 (i.e. at current pc).
4621 * a missing 'invalidate cache stack trace' somewhere in the
4622 instructions between 'previous_frame0' and current_pc.
4623 To investigate the last case, typically, disass the range of
4624 instructions where an invalidate cached stack might miss. */
4625 VG_(printf)("%s diff tid %u frame %u "
4626 "cached_pc %p check_pc %p\n",
4627 reason ? reason : "unexpected",
4628 thr->hgthread->coretid,
4630 (void*)thr->cached_rcec.frames[i],
4631 (void*)frames[i]);
4632 VG_(printf)("cached stack trace previous_frame0 %p\n",
4633 (void*)previous_frame0);
4634 VG_(pp_StackTrace)(cur_ep, &previous_frame0, 1);
4635 VG_(printf)("resulting cached stack trace:\n");
4636 VG_(pp_StackTrace)(cur_ep, thr->cached_rcec.frames, N_FRAMES);
4637 VG_(printf)("check stack trace:\n");
4638 VG_(pp_StackTrace)(cur_ep, frames, N_FRAMES);
4640 VG_(show_sched_status) (False, // host_stacktrace
4641 False, // stack_usage
4642 False); // exited_threads
4643 if (VG_(clo_vgdb_error) == 1234567890) // HACK TO ALLOW TO DEBUG
4644 VG_(gdbserver) ( thr->hgthread->coretid );
4645 VG_(clo_show_below_main) = save_show_below_main;
4647 break; // Stop giving more errors for this stacktrace.
4650 return ok;
4653 __attribute__((noinline))
4654 static RCEC* get_RCEC ( Thr* thr )
4656 UInt i;
4657 UWord hash;
4658 Addr previous_frame0 = 0; // Assignment needed to silence gcc
4659 RCEC *res;
4660 const Bool thr_cached_rcec_valid = cached_rcec_valid(thr);
4661 const Addr cur_ip = VG_(get_IP)(thr->hgthread->coretid);
4663 if (DEBUG_CACHED_RCEC)
4664 VG_(printf)("get rcec tid %u at IP %p SP %p"
4665 " first_sp_delta %ld cached valid %d\n",
4666 thr->hgthread->coretid,
4667 (void*)cur_ip,
4668 (void*)VG_(get_SP)(thr->hgthread->coretid),
4669 thr->hgthread->first_sp_delta, thr_cached_rcec_valid);
4671 /* If we have a valid cached rcec, derive the new rcec from the cached one
4672 and update the cached one.
4673 Otherwise, compute a fresh rcec. */
4675 if (thr_cached_rcec_valid) {
4676 /* Update the stacktrace of the cached rcec with the current IP */
4677 previous_frame0 = thr->cached_rcec.frames[0];
4678 thr->cached_rcec.frames[0] = cur_ip;
4680 # if defined(VGP_x86_linux)
4681 // See m_stacktrace.c kludge
4682 extern Addr VG_(client__dl_sysinfo_int80);
4683 /// #include pub_core_clientstate needed for the above ????
4684 /// or move the above into a pub_tool_??? tool_stacktrace.h maybe ????
4685 if (VG_(client__dl_sysinfo_int80) != 0 /* we know its address */
4686 && cur_ip >= VG_(client__dl_sysinfo_int80)
4687 && cur_ip < VG_(client__dl_sysinfo_int80)+3
4689 thr->cached_rcec.frames[0]
4690 = (ULong) *(Addr*)(UWord)VG_(get_SP)(thr->hgthread->coretid);
4692 # endif
4694 if (previous_frame0 == thr->cached_rcec.frames[0])
4695 stats__cached_rcec_identical++;
4696 else
4697 stats__cached_rcec_updated++;
4698 } else {
4699 /* Compute a fresh stacktrace. */
4700 main_get_stacktrace( thr, &thr->cached_rcec.frames[0], N_FRAMES );
4701 if (DEBUG_CACHED_RCEC) {
4702 Bool save_show_below_main = VG_(clo_show_below_main);
4703 VG_(clo_show_below_main) = True;
4704 VG_(printf)("caching stack trace:\n");
4705 VG_(pp_StackTrace)(VG_(current_DiEpoch)(),
4706 &thr->cached_rcec.frames[0], N_FRAMES);
4707 VG_(clo_show_below_main) = save_show_below_main;
4709 stats__cached_rcec_fresh++;
4712 hash = 0;
4713 for (i = 0; i < N_FRAMES; i++) {
4714 hash ^= thr->cached_rcec.frames[i];
4715 hash = ROLW(hash, 19);
4717 thr->cached_rcec.frames_hash = hash;
4718 res = ctxt__find_or_add( &thr->cached_rcec );
4720 if (UNLIKELY(HG_(clo_sanity_flags) & SCE_ACCESS)
4721 && thr_cached_rcec_valid) {
4722 /* In case the cached and check differ, invalidate the cached rcec.
4723 We have less duplicated diffs reported afterwards. */
4724 if (!check_cached_rcec_ok (thr, previous_frame0))
4725 set_cached_rcec_validity(thr, False);
4726 } else {
4727 if (HG_(clo_delta_stacktrace) && !thr_cached_rcec_valid)
4728 set_cached_rcec_validity(thr, True);
4731 return res;
4734 ///////////////////////////////////////////////////////
4735 //// Part (2):
4736 /// A hashtable guest-addr -> OldRef, that refers to (1)
4737 /// Note: we use the guest address as key. This means that the entries
4738 /// for multiple threads accessing the same address will land in the same
4739 /// bucket. It might be nice to have a better distribution of the
4740 /// OldRef in the hashtable by using ask key the guestaddress ^ tsw.
4741 /// The problem is that when a race is reported on a ga, we need to retrieve
4742 /// efficiently the accesses to ga by other threads, only using the ga.
4743 /// Measurements on firefox have shown that the chain length is reasonable.
4745 /* Records an access: a thread, a context (size & writeness) and the
4746 number of held locks. The size (1,2,4,8) is stored as is in szB.
4747 Note that szB uses more bits than needed to store a size up to 8.
4748 This allows to use a TSW as a fully initialised UInt e.g. in
4749 cmp_oldref_tsw. If needed, a more compact representation of szB
4750 can be done (e.g. use only 4 bits, or use only 2 bits and encode the
4751 size (1,2,4,8) as 00 = 1, 01 = 2, 10 = 4, 11 = 8. */
4752 typedef
4753 struct {
4754 UInt thrid : SCALARTS_N_THRBITS;
4755 UInt szB : 32 - SCALARTS_N_THRBITS - 1;
4756 UInt isW : 1;
4757 } TSW; // Thread+Size+Writeness
4758 typedef
4759 struct {
4760 TSW tsw;
4761 WordSetID locksHeldW;
4762 RCEC* rcec;
4764 Thr_n_RCEC;
4766 typedef
4767 struct OldRef {
4768 struct OldRef *ht_next; // to link hash table nodes together.
4769 UWord ga; // hash_table key, == address for which we record an access.
4770 struct OldRef *prev; // to refs older than this one
4771 struct OldRef *next; // to refs newer that this one
4772 UWord stamp; // allows to order (by time of access) 2 OldRef
4773 Thr_n_RCEC acc;
4775 OldRef;
4777 /* Returns the or->tsw as an UInt */
4778 static inline UInt oldref_tsw (const OldRef* or)
4780 return *(const UInt*)(&or->acc.tsw);
4783 /* Compare the tsw component for 2 OldRef.
4784 Used for OldRef hashtable (which already verifies equality of the
4785 'key' part. */
4786 static Word cmp_oldref_tsw (const void* node1, const void* node2 )
4788 const UInt tsw1 = oldref_tsw(node1);
4789 const UInt tsw2 = oldref_tsw(node2);
4791 if (tsw1 < tsw2) return -1;
4792 if (tsw1 > tsw2) return 1;
4793 return 0;
4797 //////////// BEGIN OldRef pool allocator
4798 static PoolAlloc* oldref_pool_allocator;
4799 // Note: We only allocate elements in this pool allocator, we never free them.
4800 // We stop allocating elements at VG_(clo_conflict_cache_size).
4801 //////////// END OldRef pool allocator
4803 static OldRef mru;
4804 static OldRef lru;
4805 // A double linked list, chaining all OldREf in a mru/lru order.
4806 // mru/lru are sentinel nodes.
4807 // Whenever an oldref is re-used, its position is changed as the most recently
4808 // used (i.e. pointed to by mru.prev).
4809 // When a new oldref is needed, it is allocated from the pool
4810 // if we have not yet reached --conflict-cache-size.
4811 // Otherwise, if all oldref have already been allocated,
4812 // the least recently used (i.e. pointed to by lru.next) is re-used.
4813 // When an OldRef is used, it is moved as the most recently used entry
4814 // (i.e. pointed to by mru.prev).
4816 // Removes r from the double linked list
4817 // Note: we do not need to test for special cases such as
4818 // NULL next or prev pointers, because we have sentinel nodes
4819 // at both sides of the list. So, a node is always forward and
4820 // backward linked.
4821 static inline void OldRef_unchain(OldRef *r)
4823 r->next->prev = r->prev;
4824 r->prev->next = r->next;
4827 // Insert new as the newest OldRef
4828 // Similarly to OldRef_unchain, no need to test for NULL
4829 // pointers, as e.g. mru.prev is always guaranteed to point
4830 // to a non NULL node (lru when the list is empty).
4831 static inline void OldRef_newest(OldRef *new)
4833 new->next = &mru;
4834 new->prev = mru.prev;
4835 mru.prev = new;
4836 new->prev->next = new;
4840 static VgHashTable* oldrefHT = NULL; /* Hash table* OldRef* */
4841 static UWord oldrefHTN = 0; /* # elems in oldrefHT */
4842 /* Note: the nr of ref in the oldrefHT will always be equal to
4843 the nr of elements that were allocated from the OldRef pool allocator
4844 as we never free an OldRef : we just re-use them. */
4847 /* allocates a new OldRef or re-use the lru one if all allowed OldRef
4848 have already been allocated. */
4849 static OldRef* alloc_or_reuse_OldRef ( void )
4851 if (oldrefHTN < HG_(clo_conflict_cache_size)) {
4852 oldrefHTN++;
4853 return VG_(allocEltPA) ( oldref_pool_allocator );
4854 } else {
4855 OldRef *oldref_ht;
4856 OldRef *oldref = lru.next;
4858 OldRef_unchain(oldref);
4859 oldref_ht = VG_(HT_gen_remove) (oldrefHT, oldref, cmp_oldref_tsw);
4860 tl_assert (oldref == oldref_ht);
4861 ctxt__rcdec( oldref->acc.rcec );
4862 return oldref;
4867 inline static UInt min_UInt ( UInt a, UInt b ) {
4868 return a < b ? a : b;
4871 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
4872 first interval is lower, 1 if the first interval is higher, and 0
4873 if there is any overlap. Redundant paranoia with casting is there
4874 following what looked distinctly like a bug in gcc-4.1.2, in which
4875 some of the comparisons were done signedly instead of
4876 unsignedly. */
4877 /* Copied from exp-ptrcheck/sg_main.c */
4878 static inline Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
4879 Addr a2, SizeT n2 ) {
4880 UWord a1w = (UWord)a1;
4881 UWord n1w = (UWord)n1;
4882 UWord a2w = (UWord)a2;
4883 UWord n2w = (UWord)n2;
4884 tl_assert(n1w > 0 && n2w > 0);
4885 if (a1w + n1w <= a2w) return -1L;
4886 if (a2w + n2w <= a1w) return 1L;
4887 return 0;
4890 static UWord event_map_stamp = 0; // Used to stamp each OldRef when touched.
4892 static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
4894 OldRef example;
4895 OldRef* ref;
4896 RCEC* rcec;
4898 tl_assert(thr);
4899 ThrID thrid = thr->thrid;
4900 tl_assert(thrid != 0); /* zero is used to denote an empty slot. */
4902 WordSetID locksHeldW = thr->hgthread->locksetW;
4904 rcec = get_RCEC( thr );
4906 /* Look in the oldrefHT to see if we already have a record for this
4907 address/thr/sz/isW. */
4908 example.ga = a;
4909 example.acc.tsw = (TSW) {.thrid = thrid,
4910 .szB = szB,
4911 .isW = (UInt)(isW & 1)};
4912 ref = VG_(HT_gen_lookup) (oldrefHT, &example, cmp_oldref_tsw);
4914 if (ref) {
4915 /* We already have a record for this address and this (thrid, R/W,
4916 size) triple. */
4917 tl_assert (ref->ga == a);
4919 /* thread 'thr' has an entry. Update its RCEC, if it differs. */
4920 if (rcec == ref->acc.rcec)
4921 stats__ctxt_eq_tsw_eq_rcec++;
4922 else {
4923 stats__ctxt_eq_tsw_neq_rcec++;
4924 ctxt__rcdec( ref->acc.rcec );
4925 ctxt__rcinc(rcec);
4926 ref->acc.rcec = rcec;
4928 tl_assert(ref->acc.tsw.thrid == thrid);
4929 /* Update the stamp, RCEC and the W-held lockset. */
4930 ref->stamp = event_map_stamp;
4931 ref->acc.locksHeldW = locksHeldW;
4933 OldRef_unchain(ref);
4934 OldRef_newest(ref);
4936 } else {
4937 tl_assert (szB == 4 || szB == 8 ||szB == 1 || szB == 2);
4938 // We only need to check the size the first time we insert a ref.
4939 // Check for most frequent cases first
4940 // Note: we could support a szB up to 1 << (32 - SCALARTS_N_THRBITS - 1)
4942 /* We don't have a record for this address+triple. Create a new one. */
4943 stats__ctxt_neq_tsw_neq_rcec++;
4944 ref = alloc_or_reuse_OldRef();
4945 ref->ga = a;
4946 ref->acc.tsw = (TSW) {.thrid = thrid,
4947 .szB = szB,
4948 .isW = (UInt)(isW & 1)};
4949 ref->stamp = event_map_stamp;
4950 ref->acc.locksHeldW = locksHeldW;
4951 ref->acc.rcec = rcec;
4952 ctxt__rcinc(rcec);
4954 VG_(HT_add_node) ( oldrefHT, ref );
4955 OldRef_newest (ref);
4957 event_map_stamp++;
4961 /* Extract info from the conflicting-access machinery.
4962 Returns the most recent conflicting access with thr/[a, a+szB[/isW. */
4963 Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
4964 /*OUT*/Thr** resThr,
4965 /*OUT*/SizeT* resSzB,
4966 /*OUT*/Bool* resIsW,
4967 /*OUT*/WordSetID* locksHeldW,
4968 Thr* thr, Addr a, SizeT szB, Bool isW )
4970 Word i, j;
4971 OldRef *ref = NULL;
4972 SizeT ref_szB = 0;
4974 OldRef *cand_ref;
4975 SizeT cand_ref_szB;
4976 Addr cand_a;
4978 Addr toCheck[15];
4979 Int nToCheck = 0;
4981 tl_assert(thr);
4982 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
4984 ThrID thrid = thr->thrid;
4986 toCheck[nToCheck++] = a;
4987 for (i = -7; i < (Word)szB; i++) {
4988 if (i != 0)
4989 toCheck[nToCheck++] = a + i;
4991 tl_assert(nToCheck <= 15);
4993 /* Now see if we can find a suitable matching event for
4994 any of the addresses in toCheck[0 .. nToCheck-1]. */
4995 for (j = 0; j < nToCheck; j++) {
4997 cand_a = toCheck[j];
4998 // VG_(printf)("test %ld %p\n", j, cand_a);
5000 /* Find the first HT element for this address.
5001 We might have several of these. They will be linked via ht_next.
5002 We however need to check various elements as the list contains
5003 all elements that map to the same bucket. */
5004 for (cand_ref = VG_(HT_lookup)( oldrefHT, cand_a );
5005 cand_ref; cand_ref = cand_ref->ht_next) {
5006 if (cand_ref->ga != cand_a)
5007 /* OldRef for another address in this HT bucket. Ignore. */
5008 continue;
5010 if (cand_ref->acc.tsw.thrid == thrid)
5011 /* This is an access by the same thread, but we're only
5012 interested in accesses from other threads. Ignore. */
5013 continue;
5015 if ((!cand_ref->acc.tsw.isW) && (!isW))
5016 /* We don't want to report a read racing against another
5017 read; that's stupid. So in this case move on. */
5018 continue;
5020 cand_ref_szB = cand_ref->acc.tsw.szB;
5021 if (cmp_nonempty_intervals(a, szB, cand_a, cand_ref_szB) != 0)
5022 /* No overlap with the access we're asking about. Ignore. */
5023 continue;
5025 /* We have a match. Keep this match if it is newer than
5026 the previous match. Note that stamp are Unsigned Words, and
5027 for long running applications, event_map_stamp might have cycled.
5028 So, 'roll' each stamp using event_map_stamp to have the
5029 stamps in the good order, in case event_map_stamp recycled. */
5030 if (!ref
5031 || (ref->stamp - event_map_stamp)
5032 < (cand_ref->stamp - event_map_stamp)) {
5033 ref = cand_ref;
5034 ref_szB = cand_ref_szB;
5038 if (ref) {
5039 /* return with success */
5040 Int n, maxNFrames;
5041 RCEC* ref_rcec = ref->acc.rcec;
5042 tl_assert(ref->acc.tsw.thrid);
5043 tl_assert(ref_rcec);
5044 tl_assert(ref_rcec->magic == RCEC_MAGIC);
5045 tl_assert(ref_szB >= 1);
5046 /* Count how many non-zero frames we have. */
5047 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
5048 for (n = 0; n < maxNFrames; n++) {
5049 if (0 == ref_rcec->frames[n]) break;
5051 *resEC = VG_(make_ExeContext_from_StackTrace)(ref_rcec->frames,
5053 *resThr = Thr__from_ThrID(ref->acc.tsw.thrid);
5054 *resSzB = ref_szB;
5055 *resIsW = ref->acc.tsw.isW;
5056 *locksHeldW = ref->acc.locksHeldW;
5057 stats__evm__lookup_found++;
5058 return True;
5061 /* consider next address in toCheck[] */
5062 } /* for (j = 0; j < nToCheck; j++) */
5064 /* really didn't find anything. */
5065 stats__evm__lookup_notfound++;
5066 return False;
5070 void libhb_event_map_access_history ( Addr a, SizeT szB, Access_t fn )
5072 OldRef *ref = lru.next;
5073 SizeT ref_szB;
5074 Int n;
5076 while (ref != &mru) {
5077 ref_szB = ref->acc.tsw.szB;
5078 if (cmp_nonempty_intervals(a, szB, ref->ga, ref_szB) == 0) {
5079 RCEC* ref_rcec = ref->acc.rcec;
5080 for (n = 0; n < N_FRAMES; n++) {
5081 if (0 == ref_rcec->frames[n]) {
5082 break;
5085 (*fn)(ref_rcec->frames, n,
5086 Thr__from_ThrID(ref->acc.tsw.thrid),
5087 ref->ga,
5088 ref_szB,
5089 ref->acc.tsw.isW,
5090 ref->acc.locksHeldW);
5092 tl_assert (ref->next == &mru
5093 || ((ref->stamp - event_map_stamp)
5094 < ref->next->stamp - event_map_stamp));
5095 ref = ref->next;
5099 static void event_map_init ( void )
5101 Word i;
5103 /* Context (RCEC) pool allocator */
5104 rcec_pool_allocator = VG_(newPA) (
5105 sizeof(RCEC),
5106 1000 /* RCECs per pool */,
5107 HG_(zalloc),
5108 "libhb.event_map_init.1 (RCEC pools)",
5109 HG_(free)
5112 /* Context table */
5113 tl_assert(!contextTab);
5114 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
5115 N_RCEC_TAB * sizeof(RCEC*) );
5116 for (i = 0; i < N_RCEC_TAB; i++)
5117 contextTab[i] = NULL;
5119 /* Oldref pool allocator */
5120 oldref_pool_allocator = VG_(newPA)(
5121 sizeof(OldRef),
5122 1000 /* OldRefs per pool */,
5123 HG_(zalloc),
5124 "libhb.event_map_init.3 (OldRef pools)",
5125 HG_(free)
5128 /* Oldref hashtable */
5129 tl_assert(!oldrefHT);
5130 oldrefHT = VG_(HT_construct) ("libhb.event_map_init.4 (oldref hashtable)");
5132 oldrefHTN = 0;
5133 mru.prev = &lru;
5134 mru.next = NULL;
5135 lru.prev = NULL;
5136 lru.next = &mru;
5137 mru.acc = (Thr_n_RCEC) {.tsw = {.thrid = 0,
5138 .szB = 0,
5139 .isW = 0},
5140 .locksHeldW = 0,
5141 .rcec = NULL};
5142 lru.acc = mru.acc;
5145 static void event_map__check_reference_counts ( void )
5147 RCEC* rcec;
5148 OldRef* oldref;
5149 Word i;
5150 UWord nEnts = 0;
5152 /* Set the 'check' reference counts to zero. Also, optionally
5153 check that the real reference counts are non-zero. We allow
5154 these to fall to zero before a GC, but the GC must get rid of
5155 all those that are zero, hence none should be zero after a
5156 GC. */
5157 for (i = 0; i < N_RCEC_TAB; i++) {
5158 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
5159 nEnts++;
5160 tl_assert(rcec);
5161 tl_assert(rcec->magic == RCEC_MAGIC);
5162 rcec->rcX = 0;
5166 /* check that the stats are sane */
5167 tl_assert(nEnts == stats__ctxt_tab_curr);
5168 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
5170 /* visit all the referencing points, inc check ref counts */
5171 VG_(HT_ResetIter)( oldrefHT );
5172 oldref = VG_(HT_Next)( oldrefHT );
5173 while (oldref) {
5174 tl_assert (oldref->acc.tsw.thrid);
5175 tl_assert (oldref->acc.rcec);
5176 tl_assert (oldref->acc.rcec->magic == RCEC_MAGIC);
5177 oldref->acc.rcec->rcX++;
5178 oldref = VG_(HT_Next)( oldrefHT );
5181 /* compare check ref counts with actual */
5182 for (i = 0; i < N_RCEC_TAB; i++) {
5183 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
5184 tl_assert(rcec->rc == rcec->rcX);
5189 __attribute__((noinline))
5190 static void do_RCEC_GC ( void )
5192 UInt i;
5194 if (VG_(clo_stats)) {
5195 static UInt ctr = 1;
5196 VG_(message)(Vg_DebugMsg,
5197 "libhb: RCEC GC: #%u %lu slots,"
5198 " %lu cur ents(ref'd %lu),"
5199 " %lu max ents\n",
5200 ctr++,
5201 (UWord)N_RCEC_TAB,
5202 stats__ctxt_tab_curr, RCEC_referenced,
5203 stats__ctxt_tab_max );
5205 tl_assert (stats__ctxt_tab_curr > RCEC_referenced);
5207 /* Throw away all RCECs with zero reference counts */
5208 for (i = 0; i < N_RCEC_TAB; i++) {
5209 RCEC** pp = &contextTab[i];
5210 RCEC* p = *pp;
5211 while (p) {
5212 if (p->rc == 0) {
5213 *pp = p->next;
5214 free_RCEC(p);
5215 p = *pp;
5216 tl_assert(stats__ctxt_tab_curr > 0);
5217 stats__ctxt_rcec_gc_discards++;
5218 stats__ctxt_tab_curr--;
5219 } else {
5220 pp = &p->next;
5221 p = p->next;
5226 tl_assert (stats__ctxt_tab_curr == RCEC_referenced);
5229 /////////////////////////////////////////////////////////
5230 // //
5231 // Core MSM //
5232 // //
5233 /////////////////////////////////////////////////////////
5235 /* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
5236 Nov 08, and again after [...],
5237 June 09. */
5239 static ULong stats__msmcread = 0;
5240 static ULong stats__msmcread_change = 0;
5241 static ULong stats__msmcwrite = 0;
5242 static ULong stats__msmcwrite_change = 0;
5244 /* Some notes on the H1 history mechanism:
5246 Transition rules are:
5248 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw)
5249 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
5251 After any access by a thread T to a location L, L's constraint pair
5252 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
5254 After a race by thread T conflicting with some previous access by
5255 some other thread U, for a location with constraint (before
5256 processing the later access) (Cr,Cw), then Cw[U] is the segment in
5257 which the previously access lies.
5259 Hence in record_race_info, we pass in Cfailed and Kfailed, which
5260 are compared so as to find out which thread(s) this access
5261 conflicts with. Once that is established, we also require the
5262 pre-update Cw for the location, so we can index into it for those
5263 threads, to get the scalar clock values for the point at which the
5264 former accesses were made. (In fact we only bother to do any of
5265 this for an arbitrarily chosen one of the conflicting threads, as
5266 that's simpler, it avoids flooding the user with vast amounts of
5267 mostly useless information, and because the program is wrong if it
5268 contains any races at all -- so we don't really need to show all
5269 conflicting access pairs initially, so long as we only show none if
5270 none exist).
5274 That requires the auxiliary proof that
5276 (Cr `join` Kw)[T] == Kw[T]
5278 Why should that be true? Because for any thread T, Kw[T] >= the
5279 scalar clock value for T known by any other thread. In other
5280 words, because T's value for its own scalar clock is at least as up
5281 to date as the value for it known by any other thread (that is true
5282 for both the R- and W- scalar clocks). Hence no other thread will
5283 be able to feed in a value for that element (indirectly via a
5284 constraint) which will exceed Kw[T], and hence the join cannot
5285 cause that particular element to advance.
5288 __attribute__((noinline))
5289 static void record_race_info ( Thr* acc_thr,
5290 Addr acc_addr, SizeT szB, Bool isWrite,
5291 VtsID Cfailed,
5292 VtsID Kfailed,
5293 VtsID Cw )
5295 /* Call here to report a race. We just hand it onwards to
5296 HG_(record_error_Race). If that in turn discovers that the
5297 error is going to be collected, then, at history_level 2, that
5298 queries the conflicting-event map. The alternative would be to
5299 query it right here. But that causes a lot of pointless queries
5300 for errors which will shortly be discarded as duplicates, and
5301 can become a performance overhead; so we defer the query until
5302 we know the error is not a duplicate. */
5304 /* Stacks for the bounds of the (or one of the) conflicting
5305 segment(s). These are only set at history_level 1. */
5306 ExeContext* hist1_seg_start = NULL;
5307 ExeContext* hist1_seg_end = NULL;
5308 Thread* hist1_conf_thr = NULL;
5310 tl_assert(acc_thr);
5311 tl_assert(acc_thr->hgthread);
5312 tl_assert(acc_thr->hgthread->hbthr == acc_thr);
5313 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
5315 if (HG_(clo_history_level) == 1) {
5316 Bool found;
5317 Word firstIx, lastIx;
5318 ULong_n_EC key;
5320 /* At history_level 1, we must round up the relevant stack-pair
5321 for the conflicting segment right now. This is because
5322 deferring it is complex; we can't (easily) put Kfailed and
5323 Cfailed into the XError and wait for later without
5324 getting tied up in difficulties with VtsID reference
5325 counting. So just do it now. */
5326 Thr* confThr;
5327 ULong confTym = 0;
5328 /* Which thread are we in conflict with? There may be more than
5329 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
5330 (in fact it's the one with the lowest Thr* value). */
5331 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
5332 /* This must exist! since if it was NULL then there's no
5333 conflict (semantics of return value of
5334 VtsID__findFirst_notLEQ), and msmc{read,write}, which has
5335 called us, just checked exactly this -- that there was in
5336 fact a race. */
5337 tl_assert(confThr);
5339 /* Get the scalar clock value that the conflicting thread
5340 introduced into the constraint. A careful examination of the
5341 base machine rules shows that this must be the same as the
5342 conflicting thread's scalar clock when it created this
5343 constraint. Hence we know the scalar clock of the
5344 conflicting thread when the conflicting access was made. */
5345 confTym = VtsID__indexAt( Cfailed, confThr );
5347 /* Using this scalar clock, index into the conflicting thread's
5348 collection of stack traces made each time its vector clock
5349 (hence its scalar clock) changed. This gives the stack
5350 traces at the start and end of the conflicting segment (well,
5351 as per comment just above, of one of the conflicting
5352 segments, if there are more than one). */
5353 key.ull = confTym;
5354 key.ec = NULL;
5355 /* tl_assert(confThr); -- asserted just above */
5356 tl_assert(confThr->local_Kws_n_stacks);
5357 firstIx = lastIx = 0;
5358 found = VG_(lookupXA_UNSAFE)(
5359 confThr->local_Kws_n_stacks,
5360 &key, &firstIx, &lastIx,
5361 (XACmpFn_t)cmp__ULong_n_EC__by_ULong
5363 if (0) VG_(printf)("record_race_info %u %u %u confThr %p "
5364 "confTym %llu found %d (%ld,%ld)\n",
5365 Cfailed, Kfailed, Cw,
5366 confThr, confTym, found, firstIx, lastIx);
5367 /* We can't indefinitely collect stack traces at VTS
5368 transitions, since we'd eventually run out of memory. Hence
5369 note_local_Kw_n_stack_for will eventually throw away old
5370 ones, which in turn means we might fail to find index value
5371 confTym in the array. */
5372 if (found) {
5373 ULong_n_EC *pair_start, *pair_end;
5374 pair_start
5375 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
5376 hist1_seg_start = pair_start->ec;
5377 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
5378 pair_end
5379 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
5380 lastIx+1 );
5381 /* from properties of VG_(lookupXA) and the comparison fn used: */
5382 tl_assert(pair_start->ull < pair_end->ull);
5383 hist1_seg_end = pair_end->ec;
5384 /* Could do a bit better here. It may be that pair_end
5385 doesn't have a stack, but the following entries in the
5386 array have the same scalar Kw and to have a stack. So
5387 we should search a bit further along the array than
5388 lastIx+1 if hist1_seg_end is NULL. */
5389 } else {
5390 if (!confThr->llexit_done)
5391 hist1_seg_end = main_get_EC( confThr );
5393 // seg_start could be NULL iff this is the first stack in the thread
5394 //if (seg_start) VG_(pp_ExeContext)(seg_start);
5395 //if (seg_end) VG_(pp_ExeContext)(seg_end);
5396 hist1_conf_thr = confThr->hgthread;
5400 HG_(record_error_Race)( acc_thr->hgthread, acc_addr,
5401 szB, isWrite,
5402 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
5405 static Bool is_sane_SVal_C ( SVal sv ) {
5406 Bool leq;
5407 if (!SVal__isC(sv)) return True;
5408 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
5409 return leq;
5413 /* Compute new state following a read */
5414 static inline SVal msmcread ( SVal svOld,
5415 /* The following are only needed for
5416 creating error reports. */
5417 Thr* acc_thr,
5418 Addr acc_addr, SizeT szB )
5420 SVal svNew = SVal_INVALID;
5421 stats__msmcread++;
5423 /* Redundant sanity check on the constraints */
5424 if (CHECK_MSM) {
5425 tl_assert(is_sane_SVal_C(svOld));
5428 if (LIKELY(SVal__isC(svOld))) {
5429 VtsID tviR = acc_thr->viR;
5430 VtsID tviW = acc_thr->viW;
5431 VtsID rmini = SVal__unC_Rmin(svOld);
5432 VtsID wmini = SVal__unC_Wmin(svOld);
5433 Bool leq = VtsID__cmpLEQ(rmini,tviR);
5434 if (LIKELY(leq)) {
5435 /* no race */
5436 /* Note: RWLOCK subtlety: use tviW, not tviR */
5437 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5438 goto out;
5439 } else {
5440 /* assert on sanity of constraints. */
5441 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5442 tl_assert(leqxx);
5443 // same as in non-race case
5444 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5445 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
5446 rmini, /* Cfailed */
5447 tviR, /* Kfailed */
5448 wmini /* Cw */ );
5449 goto out;
5452 if (SVal__isA(svOld)) {
5453 /* reading no-access memory (sigh); leave unchanged */
5454 /* check for no pollution */
5455 tl_assert(svOld == SVal_NOACCESS);
5456 svNew = SVal_NOACCESS;
5457 goto out;
5459 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
5460 tl_assert(0);
5462 out:
5463 if (CHECK_MSM) {
5464 tl_assert(is_sane_SVal_C(svNew));
5466 if (UNLIKELY(svNew != svOld)) {
5467 tl_assert(svNew != SVal_INVALID);
5468 if (HG_(clo_history_level) >= 2
5469 && SVal__isC(svOld) && SVal__isC(svNew)) {
5470 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
5471 stats__msmcread_change++;
5474 return svNew;
5478 /* Compute new state following a write */
5479 static inline SVal msmcwrite ( SVal svOld,
5480 /* The following are only needed for
5481 creating error reports. */
5482 Thr* acc_thr,
5483 Addr acc_addr, SizeT szB )
5485 SVal svNew = SVal_INVALID;
5486 stats__msmcwrite++;
5488 /* Redundant sanity check on the constraints */
5489 if (CHECK_MSM) {
5490 tl_assert(is_sane_SVal_C(svOld));
5493 if (LIKELY(SVal__isC(svOld))) {
5494 VtsID tviW = acc_thr->viW;
5495 VtsID wmini = SVal__unC_Wmin(svOld);
5496 Bool leq = VtsID__cmpLEQ(wmini,tviW);
5497 if (LIKELY(leq)) {
5498 /* no race */
5499 svNew = SVal__mkC( tviW, tviW );
5500 goto out;
5501 } else {
5502 VtsID rmini = SVal__unC_Rmin(svOld);
5503 /* assert on sanity of constraints. */
5504 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5505 tl_assert(leqxx);
5506 // same as in non-race case
5507 // proof: in the non-race case, we have
5508 // rmini <= wmini (invar on constraints)
5509 // tviW <= tviR (invar on thread clocks)
5510 // wmini <= tviW (from run-time check)
5511 // hence from transitivity of <= we have
5512 // rmini <= wmini <= tviW
5513 // and so join(rmini,tviW) == tviW
5514 // and join(wmini,tviW) == tviW
5515 // qed.
5516 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
5517 VtsID__join2(wmini, tviW) );
5518 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
5519 wmini, /* Cfailed */
5520 tviW, /* Kfailed */
5521 wmini /* Cw */ );
5522 goto out;
5525 if (SVal__isA(svOld)) {
5526 /* writing no-access memory (sigh); leave unchanged */
5527 /* check for no pollution */
5528 tl_assert(svOld == SVal_NOACCESS);
5529 svNew = SVal_NOACCESS;
5530 goto out;
5532 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
5533 tl_assert(0);
5535 out:
5536 if (CHECK_MSM) {
5537 tl_assert(is_sane_SVal_C(svNew));
5539 if (UNLIKELY(svNew != svOld)) {
5540 tl_assert(svNew != SVal_INVALID);
5541 if (HG_(clo_history_level) >= 2
5542 && SVal__isC(svOld) && SVal__isC(svNew)) {
5543 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
5544 stats__msmcwrite_change++;
5547 return svNew;
5551 /////////////////////////////////////////////////////////
5552 // //
5553 // Apply core MSM to specific memory locations //
5554 // //
5555 /////////////////////////////////////////////////////////
5557 /*------------- ZSM accesses: 8 bit sapply ------------- */
5559 static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
5560 CacheLine* cl;
5561 UWord cloff, tno, toff;
5562 SVal svOld, svNew;
5563 UShort descr;
5564 stats__cline_cread08s++;
5565 cl = get_cacheline(a);
5566 cloff = get_cacheline_offset(a);
5567 tno = get_treeno(a);
5568 toff = get_tree_offset(a); /* == 0 .. 7 */
5569 descr = cl->descrs[tno];
5570 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5571 SVal* tree = &cl->svals[tno << 3];
5572 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5573 if (CHECK_ZSM)
5574 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5576 svOld = cl->svals[cloff];
5577 svNew = msmcread( svOld, thr,a,1 );
5578 if (CHECK_ZSM)
5579 tl_assert(svNew != SVal_INVALID);
5580 cl->svals[cloff] = svNew;
5583 static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
5584 CacheLine* cl;
5585 UWord cloff, tno, toff;
5586 SVal svOld, svNew;
5587 UShort descr;
5588 stats__cline_cwrite08s++;
5589 cl = get_cacheline(a);
5590 cloff = get_cacheline_offset(a);
5591 tno = get_treeno(a);
5592 toff = get_tree_offset(a); /* == 0 .. 7 */
5593 descr = cl->descrs[tno];
5594 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5595 SVal* tree = &cl->svals[tno << 3];
5596 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5597 if (CHECK_ZSM)
5598 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5600 svOld = cl->svals[cloff];
5601 svNew = msmcwrite( svOld, thr,a,1 );
5602 if (CHECK_ZSM)
5603 tl_assert(svNew != SVal_INVALID);
5604 cl->svals[cloff] = svNew;
5607 /*------------- ZSM accesses: 16 bit sapply ------------- */
5609 static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
5610 CacheLine* cl;
5611 UWord cloff, tno, toff;
5612 SVal svOld, svNew;
5613 UShort descr;
5614 stats__cline_cread16s++;
5615 if (UNLIKELY(!aligned16(a))) goto slowcase;
5616 cl = get_cacheline(a);
5617 cloff = get_cacheline_offset(a);
5618 tno = get_treeno(a);
5619 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5620 descr = cl->descrs[tno];
5621 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5622 if (valid_value_is_below_me_16(descr, toff)) {
5623 goto slowcase;
5624 } else {
5625 SVal* tree = &cl->svals[tno << 3];
5626 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5628 if (CHECK_ZSM)
5629 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5631 svOld = cl->svals[cloff];
5632 svNew = msmcread( svOld, thr,a,2 );
5633 if (CHECK_ZSM)
5634 tl_assert(svNew != SVal_INVALID);
5635 cl->svals[cloff] = svNew;
5636 return;
5637 slowcase: /* misaligned, or must go further down the tree */
5638 stats__cline_16to8splits++;
5639 zsm_sapply08__msmcread( thr, a + 0 );
5640 zsm_sapply08__msmcread( thr, a + 1 );
5643 static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
5644 CacheLine* cl;
5645 UWord cloff, tno, toff;
5646 SVal svOld, svNew;
5647 UShort descr;
5648 stats__cline_cwrite16s++;
5649 if (UNLIKELY(!aligned16(a))) goto slowcase;
5650 cl = get_cacheline(a);
5651 cloff = get_cacheline_offset(a);
5652 tno = get_treeno(a);
5653 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5654 descr = cl->descrs[tno];
5655 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5656 if (valid_value_is_below_me_16(descr, toff)) {
5657 goto slowcase;
5658 } else {
5659 SVal* tree = &cl->svals[tno << 3];
5660 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5662 if (CHECK_ZSM)
5663 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5665 svOld = cl->svals[cloff];
5666 svNew = msmcwrite( svOld, thr,a,2 );
5667 if (CHECK_ZSM)
5668 tl_assert(svNew != SVal_INVALID);
5669 cl->svals[cloff] = svNew;
5670 return;
5671 slowcase: /* misaligned, or must go further down the tree */
5672 stats__cline_16to8splits++;
5673 zsm_sapply08__msmcwrite( thr, a + 0 );
5674 zsm_sapply08__msmcwrite( thr, a + 1 );
5677 /*------------- ZSM accesses: 32 bit sapply ------------- */
5679 static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
5680 CacheLine* cl;
5681 UWord cloff, tno, toff;
5682 SVal svOld, svNew;
5683 UShort descr;
5684 stats__cline_cread32s++;
5685 if (UNLIKELY(!aligned32(a))) goto slowcase;
5686 cl = get_cacheline(a);
5687 cloff = get_cacheline_offset(a);
5688 tno = get_treeno(a);
5689 toff = get_tree_offset(a); /* == 0 or 4 */
5690 descr = cl->descrs[tno];
5691 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5692 if (valid_value_is_above_me_32(descr, toff)) {
5693 SVal* tree = &cl->svals[tno << 3];
5694 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5695 } else {
5696 goto slowcase;
5698 if (CHECK_ZSM)
5699 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5701 svOld = cl->svals[cloff];
5702 svNew = msmcread( svOld, thr,a,4 );
5703 if (CHECK_ZSM)
5704 tl_assert(svNew != SVal_INVALID);
5705 cl->svals[cloff] = svNew;
5706 return;
5707 slowcase: /* misaligned, or must go further down the tree */
5708 stats__cline_32to16splits++;
5709 zsm_sapply16__msmcread( thr, a + 0 );
5710 zsm_sapply16__msmcread( thr, a + 2 );
5713 static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
5714 CacheLine* cl;
5715 UWord cloff, tno, toff;
5716 SVal svOld, svNew;
5717 UShort descr;
5718 stats__cline_cwrite32s++;
5719 if (UNLIKELY(!aligned32(a))) goto slowcase;
5720 cl = get_cacheline(a);
5721 cloff = get_cacheline_offset(a);
5722 tno = get_treeno(a);
5723 toff = get_tree_offset(a); /* == 0 or 4 */
5724 descr = cl->descrs[tno];
5725 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5726 if (valid_value_is_above_me_32(descr, toff)) {
5727 SVal* tree = &cl->svals[tno << 3];
5728 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5729 } else {
5730 goto slowcase;
5732 if (CHECK_ZSM)
5733 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5735 svOld = cl->svals[cloff];
5736 svNew = msmcwrite( svOld, thr,a,4 );
5737 if (CHECK_ZSM)
5738 tl_assert(svNew != SVal_INVALID);
5739 cl->svals[cloff] = svNew;
5740 return;
5741 slowcase: /* misaligned, or must go further down the tree */
5742 stats__cline_32to16splits++;
5743 zsm_sapply16__msmcwrite( thr, a + 0 );
5744 zsm_sapply16__msmcwrite( thr, a + 2 );
5747 /*------------- ZSM accesses: 64 bit sapply ------------- */
5749 static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
5750 CacheLine* cl;
5751 UWord cloff, tno;
5752 //UWord toff;
5753 SVal svOld, svNew;
5754 UShort descr;
5755 stats__cline_cread64s++;
5756 if (UNLIKELY(!aligned64(a))) goto slowcase;
5757 cl = get_cacheline(a);
5758 cloff = get_cacheline_offset(a);
5759 tno = get_treeno(a);
5760 //toff = get_tree_offset(a); /* == 0, unused */
5761 descr = cl->descrs[tno];
5762 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5763 goto slowcase;
5765 svOld = cl->svals[cloff];
5766 svNew = msmcread( svOld, thr,a,8 );
5767 if (CHECK_ZSM)
5768 tl_assert(svNew != SVal_INVALID);
5769 cl->svals[cloff] = svNew;
5770 return;
5771 slowcase: /* misaligned, or must go further down the tree */
5772 stats__cline_64to32splits++;
5773 zsm_sapply32__msmcread( thr, a + 0 );
5774 zsm_sapply32__msmcread( thr, a + 4 );
5777 static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
5778 CacheLine* cl;
5779 UWord cloff, tno;
5780 //UWord toff;
5781 SVal svOld, svNew;
5782 UShort descr;
5783 stats__cline_cwrite64s++;
5784 if (UNLIKELY(!aligned64(a))) goto slowcase;
5785 cl = get_cacheline(a);
5786 cloff = get_cacheline_offset(a);
5787 tno = get_treeno(a);
5788 //toff = get_tree_offset(a); /* == 0, unused */
5789 descr = cl->descrs[tno];
5790 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5791 goto slowcase;
5793 svOld = cl->svals[cloff];
5794 svNew = msmcwrite( svOld, thr,a,8 );
5795 if (CHECK_ZSM)
5796 tl_assert(svNew != SVal_INVALID);
5797 cl->svals[cloff] = svNew;
5798 return;
5799 slowcase: /* misaligned, or must go further down the tree */
5800 stats__cline_64to32splits++;
5801 zsm_sapply32__msmcwrite( thr, a + 0 );
5802 zsm_sapply32__msmcwrite( thr, a + 4 );
5805 /*--------------- ZSM accesses: 8 bit swrite --------------- */
5807 static
5808 void zsm_swrite08 ( Addr a, SVal svNew ) {
5809 CacheLine* cl;
5810 UWord cloff, tno, toff;
5811 UShort descr;
5812 stats__cline_swrite08s++;
5813 cl = get_cacheline(a);
5814 cloff = get_cacheline_offset(a);
5815 tno = get_treeno(a);
5816 toff = get_tree_offset(a); /* == 0 .. 7 */
5817 descr = cl->descrs[tno];
5818 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5819 SVal* tree = &cl->svals[tno << 3];
5820 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5821 if (CHECK_ZSM)
5822 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5824 tl_assert(svNew != SVal_INVALID);
5825 cl->svals[cloff] = svNew;
5828 /*--------------- ZSM accesses: 16 bit swrite --------------- */
5830 static
5831 void zsm_swrite16 ( Addr a, SVal svNew ) {
5832 CacheLine* cl;
5833 UWord cloff, tno, toff;
5834 UShort descr;
5835 stats__cline_swrite16s++;
5836 if (UNLIKELY(!aligned16(a))) goto slowcase;
5837 cl = get_cacheline(a);
5838 cloff = get_cacheline_offset(a);
5839 tno = get_treeno(a);
5840 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5841 descr = cl->descrs[tno];
5842 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5843 if (valid_value_is_below_me_16(descr, toff)) {
5844 /* Writing at this level. Need to fix up 'descr'. */
5845 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
5846 /* At this point, the tree does not match cl->descr[tno] any
5847 more. The assignments below will fix it up. */
5848 } else {
5849 /* We can't indiscriminately write on the w16 node as in the
5850 w64 case, as that might make the node inconsistent with
5851 its parent. So first, pull down to this level. */
5852 SVal* tree = &cl->svals[tno << 3];
5853 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5854 if (CHECK_ZSM)
5855 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5858 tl_assert(svNew != SVal_INVALID);
5859 cl->svals[cloff + 0] = svNew;
5860 cl->svals[cloff + 1] = SVal_INVALID;
5861 return;
5862 slowcase: /* misaligned */
5863 stats__cline_16to8splits++;
5864 zsm_swrite08( a + 0, svNew );
5865 zsm_swrite08( a + 1, svNew );
5868 /*--------------- ZSM accesses: 32 bit swrite --------------- */
5870 static
5871 void zsm_swrite32 ( Addr a, SVal svNew ) {
5872 CacheLine* cl;
5873 UWord cloff, tno, toff;
5874 UShort descr;
5875 stats__cline_swrite32s++;
5876 if (UNLIKELY(!aligned32(a))) goto slowcase;
5877 cl = get_cacheline(a);
5878 cloff = get_cacheline_offset(a);
5879 tno = get_treeno(a);
5880 toff = get_tree_offset(a); /* == 0 or 4 */
5881 descr = cl->descrs[tno];
5882 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5883 if (valid_value_is_above_me_32(descr, toff)) {
5884 /* We can't indiscriminately write on the w32 node as in the
5885 w64 case, as that might make the node inconsistent with
5886 its parent. So first, pull down to this level. */
5887 SVal* tree = &cl->svals[tno << 3];
5888 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5889 if (CHECK_ZSM)
5890 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5891 } else {
5892 /* Writing at this level. Need to fix up 'descr'. */
5893 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
5894 /* At this point, the tree does not match cl->descr[tno] any
5895 more. The assignments below will fix it up. */
5898 tl_assert(svNew != SVal_INVALID);
5899 cl->svals[cloff + 0] = svNew;
5900 cl->svals[cloff + 1] = SVal_INVALID;
5901 cl->svals[cloff + 2] = SVal_INVALID;
5902 cl->svals[cloff + 3] = SVal_INVALID;
5903 return;
5904 slowcase: /* misaligned */
5905 stats__cline_32to16splits++;
5906 zsm_swrite16( a + 0, svNew );
5907 zsm_swrite16( a + 2, svNew );
5910 /*--------------- ZSM accesses: 64 bit swrite --------------- */
5912 static
5913 void zsm_swrite64 ( Addr a, SVal svNew ) {
5914 CacheLine* cl;
5915 UWord cloff, tno;
5916 //UWord toff;
5917 stats__cline_swrite64s++;
5918 if (UNLIKELY(!aligned64(a))) goto slowcase;
5919 cl = get_cacheline(a);
5920 cloff = get_cacheline_offset(a);
5921 tno = get_treeno(a);
5922 //toff = get_tree_offset(a); /* == 0, unused */
5923 cl->descrs[tno] = TREE_DESCR_64;
5924 if (CHECK_ZSM)
5925 tl_assert(svNew != SVal_INVALID); /* EXPENSIVE */
5926 cl->svals[cloff + 0] = svNew;
5927 cl->svals[cloff + 1] = SVal_INVALID;
5928 cl->svals[cloff + 2] = SVal_INVALID;
5929 cl->svals[cloff + 3] = SVal_INVALID;
5930 cl->svals[cloff + 4] = SVal_INVALID;
5931 cl->svals[cloff + 5] = SVal_INVALID;
5932 cl->svals[cloff + 6] = SVal_INVALID;
5933 cl->svals[cloff + 7] = SVal_INVALID;
5934 return;
5935 slowcase: /* misaligned */
5936 stats__cline_64to32splits++;
5937 zsm_swrite32( a + 0, svNew );
5938 zsm_swrite32( a + 4, svNew );
5941 /*------------- ZSM accesses: 8 bit sread/scopy ------------- */
5943 static
5944 SVal zsm_sread08 ( Addr a ) {
5945 CacheLine* cl;
5946 UWord cloff, tno, toff;
5947 UShort descr;
5948 stats__cline_sread08s++;
5949 cl = get_cacheline(a);
5950 cloff = get_cacheline_offset(a);
5951 tno = get_treeno(a);
5952 toff = get_tree_offset(a); /* == 0 .. 7 */
5953 descr = cl->descrs[tno];
5954 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5955 SVal* tree = &cl->svals[tno << 3];
5956 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5958 return cl->svals[cloff];
5961 static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
5962 SVal sv;
5963 stats__cline_scopy08s++;
5964 sv = zsm_sread08( src );
5965 zsm_swrite08( dst, sv );
5969 /* Block-copy states (needed for implementing realloc()). Note this
5970 doesn't change the filtering arrangements. The caller of
5971 zsm_scopy_range needs to attend to that. */
5973 static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
5975 SizeT i;
5976 if (len == 0)
5977 return;
5979 /* assert for non-overlappingness */
5980 tl_assert(src+len <= dst || dst+len <= src);
5982 /* To be simple, just copy byte by byte. But so as not to wreck
5983 performance for later accesses to dst[0 .. len-1], normalise
5984 destination lines as we finish with them, and also normalise the
5985 line containing the first and last address. */
5986 for (i = 0; i < len; i++) {
5987 Bool normalise
5988 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
5989 || i == 0 /* first in range */
5990 || i == len-1; /* last in range */
5991 zsm_scopy08( src+i, dst+i, normalise );
5996 /* For setting address ranges to a given value. Has considerable
5997 sophistication so as to avoid generating large numbers of pointless
5998 cache loads/writebacks for large ranges. */
6000 /* Do small ranges in-cache, in the obvious way. */
6001 static
6002 void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
6004 /* fast track a couple of common cases */
6005 if (len == 4 && aligned32(a)) {
6006 zsm_swrite32( a, svNew );
6007 return;
6009 if (len == 8 && aligned64(a)) {
6010 zsm_swrite64( a, svNew );
6011 return;
6014 /* be completely general (but as efficient as possible) */
6015 if (len == 0) return;
6017 if (!aligned16(a) && len >= 1) {
6018 zsm_swrite08( a, svNew );
6019 a += 1;
6020 len -= 1;
6021 tl_assert(aligned16(a));
6023 if (len == 0) return;
6025 if (!aligned32(a) && len >= 2) {
6026 zsm_swrite16( a, svNew );
6027 a += 2;
6028 len -= 2;
6029 tl_assert(aligned32(a));
6031 if (len == 0) return;
6033 if (!aligned64(a) && len >= 4) {
6034 zsm_swrite32( a, svNew );
6035 a += 4;
6036 len -= 4;
6037 tl_assert(aligned64(a));
6039 if (len == 0) return;
6041 if (len >= 8) {
6042 tl_assert(aligned64(a));
6043 while (len >= 8) {
6044 zsm_swrite64( a, svNew );
6045 a += 8;
6046 len -= 8;
6048 tl_assert(aligned64(a));
6050 if (len == 0) return;
6052 if (len >= 4)
6053 tl_assert(aligned32(a));
6054 if (len >= 4) {
6055 zsm_swrite32( a, svNew );
6056 a += 4;
6057 len -= 4;
6059 if (len == 0) return;
6061 if (len >= 2)
6062 tl_assert(aligned16(a));
6063 if (len >= 2) {
6064 zsm_swrite16( a, svNew );
6065 a += 2;
6066 len -= 2;
6068 if (len == 0) return;
6070 if (len >= 1) {
6071 zsm_swrite08( a, svNew );
6072 //a += 1;
6073 len -= 1;
6075 tl_assert(len == 0);
6079 /* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
6080 for larger ranges, try to operate directly on the out-of-cache
6081 representation, rather than dragging lines into the cache,
6082 overwriting them, and forcing them out. This turns out to be an
6083 important performance optimisation.
6085 Note that this doesn't change the filtering arrangements. The
6086 caller of zsm_sset_range needs to attend to that. */
6088 static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
6090 tl_assert(svNew != SVal_INVALID);
6091 stats__cache_make_New_arange += (ULong)len;
6093 if (0 && len > 500)
6094 VG_(printf)("make New ( %#lx, %lu )\n", a, len );
6096 if (0) {
6097 static UWord n_New_in_cache = 0;
6098 static UWord n_New_not_in_cache = 0;
6099 /* tag is 'a' with the in-line offset masked out,
6100 eg a[31]..a[4] 0000 */
6101 Addr tag = a & ~(N_LINE_ARANGE - 1);
6102 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
6103 if (LIKELY(tag == cache_shmem.tags0[wix])) {
6104 n_New_in_cache++;
6105 } else {
6106 n_New_not_in_cache++;
6108 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
6109 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
6110 n_New_in_cache, n_New_not_in_cache );
6113 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
6114 zsm_sset_range_SMALL( a, len, svNew );
6115 } else {
6116 Addr before_start = a;
6117 Addr aligned_start = cacheline_ROUNDUP(a);
6118 Addr after_start = cacheline_ROUNDDN(a + len);
6119 UWord before_len = aligned_start - before_start;
6120 UWord aligned_len = after_start - aligned_start;
6121 UWord after_len = a + len - after_start;
6122 tl_assert(before_start <= aligned_start);
6123 tl_assert(aligned_start <= after_start);
6124 tl_assert(before_len < N_LINE_ARANGE);
6125 tl_assert(after_len < N_LINE_ARANGE);
6126 tl_assert(get_cacheline_offset(aligned_start) == 0);
6127 if (get_cacheline_offset(a) == 0) {
6128 tl_assert(before_len == 0);
6129 tl_assert(a == aligned_start);
6131 if (get_cacheline_offset(a+len) == 0) {
6132 tl_assert(after_len == 0);
6133 tl_assert(after_start == a+len);
6135 if (before_len > 0) {
6136 zsm_sset_range_SMALL( before_start, before_len, svNew );
6138 if (after_len > 0) {
6139 zsm_sset_range_SMALL( after_start, after_len, svNew );
6141 stats__cache_make_New_inZrep += (ULong)aligned_len;
6143 while (1) {
6144 Addr tag;
6145 UWord wix;
6146 if (aligned_start >= after_start)
6147 break;
6148 tl_assert(get_cacheline_offset(aligned_start) == 0);
6149 tag = aligned_start & ~(N_LINE_ARANGE - 1);
6150 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
6151 if (tag == cache_shmem.tags0[wix]) {
6152 UWord i;
6153 for (i = 0; i < N_LINE_ARANGE / 8; i++)
6154 zsm_swrite64( aligned_start + i * 8, svNew );
6155 } else {
6156 UWord i;
6157 Word zix;
6158 SecMap* sm;
6159 LineZ* lineZ;
6160 /* This line is not in the cache. Do not force it in; instead
6161 modify it in-place. */
6162 /* find the Z line to write in and rcdec it or the
6163 associated F line. */
6164 find_Z_for_writing( &sm, &zix, tag );
6165 tl_assert(sm);
6166 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
6167 lineZ = &sm->linesZ[zix];
6168 lineZ->dict[0] = svNew;
6169 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
6170 for (i = 0; i < N_LINE_ARANGE/4; i++)
6171 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
6172 rcinc_LineZ(lineZ);
6174 aligned_start += N_LINE_ARANGE;
6175 aligned_len -= N_LINE_ARANGE;
6177 tl_assert(aligned_start == after_start);
6178 tl_assert(aligned_len == 0);
6183 /////////////////////////////////////////////////////////
6184 // //
6185 // Front-filtering accesses //
6186 // //
6187 /////////////////////////////////////////////////////////
6189 static UWord stats__f_ac = 0;
6190 static UWord stats__f_sk = 0;
6192 #if 0
6193 # define STATS__F_SHOW \
6194 do { \
6195 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
6196 VG_(printf)("filters: ac %lu sk %lu\n", \
6197 stats__f_ac, stats__f_sk); \
6198 } while (0)
6199 #else
6200 # define STATS__F_SHOW /* */
6201 #endif
6203 void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
6204 stats__f_ac++;
6205 STATS__F_SHOW;
6206 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
6207 stats__f_sk++;
6208 return;
6210 zsm_sapply08__msmcwrite(thr, a);
6213 void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
6214 stats__f_ac++;
6215 STATS__F_SHOW;
6216 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
6217 stats__f_sk++;
6218 return;
6220 zsm_sapply16__msmcwrite(thr, a);
6223 void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
6224 stats__f_ac++;
6225 STATS__F_SHOW;
6226 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
6227 stats__f_sk++;
6228 return;
6230 zsm_sapply32__msmcwrite(thr, a);
6233 void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
6234 stats__f_ac++;
6235 STATS__F_SHOW;
6236 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
6237 stats__f_sk++;
6238 return;
6240 zsm_sapply64__msmcwrite(thr, a);
6243 void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
6245 /* fast track a couple of common cases */
6246 if (len == 4 && aligned32(a)) {
6247 zsm_sapply32_f__msmcwrite( thr, a );
6248 return;
6250 if (len == 8 && aligned64(a)) {
6251 zsm_sapply64_f__msmcwrite( thr, a );
6252 return;
6255 /* be completely general (but as efficient as possible) */
6256 if (len == 0) return;
6258 if (!aligned16(a) && len >= 1) {
6259 zsm_sapply08_f__msmcwrite( thr, a );
6260 a += 1;
6261 len -= 1;
6262 tl_assert(aligned16(a));
6264 if (len == 0) return;
6266 if (!aligned32(a) && len >= 2) {
6267 zsm_sapply16_f__msmcwrite( thr, a );
6268 a += 2;
6269 len -= 2;
6270 tl_assert(aligned32(a));
6272 if (len == 0) return;
6274 if (!aligned64(a) && len >= 4) {
6275 zsm_sapply32_f__msmcwrite( thr, a );
6276 a += 4;
6277 len -= 4;
6278 tl_assert(aligned64(a));
6280 if (len == 0) return;
6282 if (len >= 8) {
6283 tl_assert(aligned64(a));
6284 while (len >= 8) {
6285 zsm_sapply64_f__msmcwrite( thr, a );
6286 a += 8;
6287 len -= 8;
6289 tl_assert(aligned64(a));
6291 if (len == 0) return;
6293 if (len >= 4)
6294 tl_assert(aligned32(a));
6295 if (len >= 4) {
6296 zsm_sapply32_f__msmcwrite( thr, a );
6297 a += 4;
6298 len -= 4;
6300 if (len == 0) return;
6302 if (len >= 2)
6303 tl_assert(aligned16(a));
6304 if (len >= 2) {
6305 zsm_sapply16_f__msmcwrite( thr, a );
6306 a += 2;
6307 len -= 2;
6309 if (len == 0) return;
6311 if (len >= 1) {
6312 zsm_sapply08_f__msmcwrite( thr, a );
6313 //a += 1;
6314 len -= 1;
6316 tl_assert(len == 0);
6319 void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
6320 stats__f_ac++;
6321 STATS__F_SHOW;
6322 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
6323 stats__f_sk++;
6324 return;
6326 zsm_sapply08__msmcread(thr, a);
6329 void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
6330 stats__f_ac++;
6331 STATS__F_SHOW;
6332 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
6333 stats__f_sk++;
6334 return;
6336 zsm_sapply16__msmcread(thr, a);
6339 void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
6340 stats__f_ac++;
6341 STATS__F_SHOW;
6342 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
6343 stats__f_sk++;
6344 return;
6346 zsm_sapply32__msmcread(thr, a);
6349 void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
6350 stats__f_ac++;
6351 STATS__F_SHOW;
6352 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
6353 stats__f_sk++;
6354 return;
6356 zsm_sapply64__msmcread(thr, a);
6359 void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
6361 /* fast track a couple of common cases */
6362 if (len == 4 && aligned32(a)) {
6363 zsm_sapply32_f__msmcread( thr, a );
6364 return;
6366 if (len == 8 && aligned64(a)) {
6367 zsm_sapply64_f__msmcread( thr, a );
6368 return;
6371 /* be completely general (but as efficient as possible) */
6372 if (len == 0) return;
6374 if (!aligned16(a) && len >= 1) {
6375 zsm_sapply08_f__msmcread( thr, a );
6376 a += 1;
6377 len -= 1;
6378 tl_assert(aligned16(a));
6380 if (len == 0) return;
6382 if (!aligned32(a) && len >= 2) {
6383 zsm_sapply16_f__msmcread( thr, a );
6384 a += 2;
6385 len -= 2;
6386 tl_assert(aligned32(a));
6388 if (len == 0) return;
6390 if (!aligned64(a) && len >= 4) {
6391 zsm_sapply32_f__msmcread( thr, a );
6392 a += 4;
6393 len -= 4;
6394 tl_assert(aligned64(a));
6396 if (len == 0) return;
6398 if (len >= 8) {
6399 tl_assert(aligned64(a));
6400 while (len >= 8) {
6401 zsm_sapply64_f__msmcread( thr, a );
6402 a += 8;
6403 len -= 8;
6405 tl_assert(aligned64(a));
6407 if (len == 0) return;
6409 if (len >= 4)
6410 tl_assert(aligned32(a));
6411 if (len >= 4) {
6412 zsm_sapply32_f__msmcread( thr, a );
6413 a += 4;
6414 len -= 4;
6416 if (len == 0) return;
6418 if (len >= 2)
6419 tl_assert(aligned16(a));
6420 if (len >= 2) {
6421 zsm_sapply16_f__msmcread( thr, a );
6422 a += 2;
6423 len -= 2;
6425 if (len == 0) return;
6427 if (len >= 1) {
6428 zsm_sapply08_f__msmcread( thr, a );
6429 //a += 1;
6430 len -= 1;
6432 tl_assert(len == 0);
6435 void libhb_Thr_resumes ( Thr* thr )
6437 if (0) VG_(printf)("resume %p\n", thr);
6438 tl_assert(thr);
6439 tl_assert(!thr->llexit_done);
6440 Filter__clear(thr->filter, "libhb_Thr_resumes");
6441 /* A kludge, but .. if this thread doesn't have any marker stacks
6442 at all, get one right now. This is easier than figuring out
6443 exactly when at thread startup we can and can't take a stack
6444 snapshot. */
6445 if (HG_(clo_history_level) == 1) {
6446 tl_assert(thr->local_Kws_n_stacks);
6447 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
6448 note_local_Kw_n_stack_for(thr);
6453 /////////////////////////////////////////////////////////
6454 // //
6455 // Synchronisation objects //
6456 // //
6457 /////////////////////////////////////////////////////////
6459 /* A double linked list of all the SO's. */
6460 SO* admin_SO = NULL;
6462 static SO* SO__Alloc ( void )
6464 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
6465 so->viR = VtsID_INVALID;
6466 so->viW = VtsID_INVALID;
6467 so->magic = SO_MAGIC;
6468 /* Add to double linked list */
6469 if (admin_SO) {
6470 tl_assert(admin_SO->admin_prev == NULL);
6471 admin_SO->admin_prev = so;
6472 so->admin_next = admin_SO;
6473 } else {
6474 so->admin_next = NULL;
6476 so->admin_prev = NULL;
6477 admin_SO = so;
6478 /* */
6479 return so;
6482 static void SO__Dealloc ( SO* so )
6484 tl_assert(so);
6485 tl_assert(so->magic == SO_MAGIC);
6486 if (so->viR == VtsID_INVALID) {
6487 tl_assert(so->viW == VtsID_INVALID);
6488 } else {
6489 tl_assert(so->viW != VtsID_INVALID);
6490 VtsID__rcdec(so->viR);
6491 VtsID__rcdec(so->viW);
6493 so->magic = 0;
6494 /* Del from double linked list */
6495 if (so->admin_prev)
6496 so->admin_prev->admin_next = so->admin_next;
6497 if (so->admin_next)
6498 so->admin_next->admin_prev = so->admin_prev;
6499 if (so == admin_SO)
6500 admin_SO = so->admin_next;
6501 /* */
6502 HG_(free)( so );
6506 /////////////////////////////////////////////////////////
6507 // //
6508 // Top Level API //
6509 // //
6510 /////////////////////////////////////////////////////////
6512 static void show_thread_state ( const HChar* str, Thr* t )
6514 if (1) return;
6515 if (t->viR == t->viW) {
6516 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
6517 VtsID__pp( t->viR );
6518 VG_(printf)("%s","\n");
6519 } else {
6520 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
6521 VtsID__pp( t->viR );
6522 VG_(printf)(" viW %u==", t->viW);
6523 VtsID__pp( t->viW );
6524 VG_(printf)("%s","\n");
6529 Thr* libhb_init (
6530 void (*get_stacktrace)( Thr*, Addr*, UWord ),
6531 ExeContext* (*get_EC)( Thr* )
6534 Thr* thr;
6535 VtsID vi;
6537 // We will have to have to store a large number of these,
6538 // so make sure they're the size we expect them to be.
6539 STATIC_ASSERT(sizeof(ScalarTS) == 8);
6541 /* because first 1024 unusable */
6542 STATIC_ASSERT(SCALARTS_N_THRBITS >= 11);
6543 /* so as to fit in a UInt w/ 5 bits to spare (see defn of
6544 Thr_n_RCEC and TSW). */
6545 STATIC_ASSERT(SCALARTS_N_THRBITS <= 27);
6547 /* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words
6548 (32-bit). It's not correctness-critical, but there are a lot of
6549 them, so it's important from a space viewpoint. Unfortunately
6550 we simply can't pack it into 2 words on a 32-bit target. */
6551 STATIC_ASSERT( (sizeof(UWord) == 8 && sizeof(Thr_n_RCEC) == 16)
6552 || (sizeof(UWord) == 4 && sizeof(Thr_n_RCEC) == 12));
6553 STATIC_ASSERT(sizeof(TSW) == sizeof(UInt));
6555 /* Word sets really are 32 bits. Even on a 64 bit target. */
6556 STATIC_ASSERT(sizeof(WordSetID) == 4);
6557 STATIC_ASSERT(sizeof(WordSet) == sizeof(WordSetID));
6559 tl_assert(get_stacktrace);
6560 tl_assert(get_EC);
6561 main_get_stacktrace = get_stacktrace;
6562 main_get_EC = get_EC;
6564 // No need to initialise hg_wordfm.
6565 // No need to initialise hg_wordset.
6567 /* Allocated once and never deallocated. Used as a temporary in
6568 VTS singleton, tick and join operations. */
6569 temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
6570 temp_max_sized_VTS->id = VtsID_INVALID;
6571 verydead_thread_tables_init();
6572 vts_set_init();
6573 vts_tab_init();
6574 event_map_init();
6575 VtsID__invalidate_caches();
6577 // initialise shadow memory
6578 zsm_init( );
6580 thr = Thr__new();
6581 vi = VtsID__mk_Singleton( thr, 1 );
6582 thr->viR = vi;
6583 thr->viW = vi;
6584 VtsID__rcinc(thr->viR);
6585 VtsID__rcinc(thr->viW);
6587 show_thread_state(" root", thr);
6588 return thr;
6592 Thr* libhb_create ( Thr* parent )
6594 /* The child's VTSs are copies of the parent's VTSs, but ticked at
6595 the child's index. Since the child's index is guaranteed
6596 unique, it has never been seen before, so the implicit value
6597 before the tick is zero and after that is one. */
6598 Thr* child = Thr__new();
6600 child->viR = VtsID__tick( parent->viR, child );
6601 child->viW = VtsID__tick( parent->viW, child );
6602 Filter__clear(child->filter, "libhb_create(child)");
6603 VtsID__rcinc(child->viR);
6604 VtsID__rcinc(child->viW);
6605 /* We need to do note_local_Kw_n_stack_for( child ), but it's too
6606 early for that - it may not have a valid TId yet. So, let
6607 libhb_Thr_resumes pick it up the first time the thread runs. */
6609 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
6610 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
6612 /* and the parent has to move along too */
6613 VtsID__rcdec(parent->viR);
6614 VtsID__rcdec(parent->viW);
6615 parent->viR = VtsID__tick( parent->viR, parent );
6616 parent->viW = VtsID__tick( parent->viW, parent );
6617 Filter__clear(parent->filter, "libhb_create(parent)");
6618 VtsID__rcinc(parent->viR);
6619 VtsID__rcinc(parent->viW);
6620 note_local_Kw_n_stack_for( parent );
6622 show_thread_state(" child", child);
6623 show_thread_state("parent", parent);
6625 return child;
6628 /* Shut down the library, and print stats (in fact that's _all_
6629 this is for. */
6630 void libhb_shutdown ( Bool show_stats )
6632 if (show_stats) {
6633 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
6634 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
6635 stats__secmaps_allocd,
6636 stats__secmap_ga_space_covered);
6637 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
6638 stats__secmap_linesZ_allocd,
6639 stats__secmap_linesZ_bytes);
6640 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)"
6641 " (%'10lu used)\n",
6642 VG_(sizePA) (LineF_pool_allocator),
6643 VG_(sizePA) (LineF_pool_allocator) * sizeof(LineF),
6644 shmem__SecMap_used_linesF());
6645 VG_(printf)(" secmaps: %'10lu in map (can be scanGCed %'5lu)"
6646 " #%lu scanGC \n",
6647 stats__secmaps_in_map_shmem,
6648 shmem__SecMap_do_GC(False /* really do GC */),
6649 stats__secmaps_scanGC);
6650 tl_assert (VG_(sizeFM) (map_shmem) == stats__secmaps_in_map_shmem);
6651 VG_(printf)(" secmaps: %'10lu in freelist,"
6652 " total (scanGCed %'lu, ssetGCed %'lu)\n",
6653 SecMap_freelist_length(),
6654 stats__secmaps_scanGCed,
6655 stats__secmaps_ssetGCed);
6656 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
6657 stats__secmaps_search, stats__secmaps_search_slow);
6659 VG_(printf)("%s","\n");
6660 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
6661 stats__cache_totrefs, stats__cache_totmisses );
6662 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
6663 stats__cache_Z_fetches, stats__cache_F_fetches );
6664 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
6665 stats__cache_Z_wbacks, stats__cache_F_wbacks );
6666 VG_(printf)(" cache: %'14lu flushes_invals\n",
6667 stats__cache_flushes_invals );
6668 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
6669 stats__cache_make_New_arange,
6670 stats__cache_make_New_inZrep);
6672 VG_(printf)("%s","\n");
6673 VG_(printf)(" cline: %'10lu normalises\n",
6674 stats__cline_normalises );
6675 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6676 stats__cline_cread64s,
6677 stats__cline_cread32s,
6678 stats__cline_cread16s,
6679 stats__cline_cread08s );
6680 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6681 stats__cline_cwrite64s,
6682 stats__cline_cwrite32s,
6683 stats__cline_cwrite16s,
6684 stats__cline_cwrite08s );
6685 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6686 stats__cline_swrite64s,
6687 stats__cline_swrite32s,
6688 stats__cline_swrite16s,
6689 stats__cline_swrite08s );
6690 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
6691 stats__cline_sread08s, stats__cline_scopy08s );
6692 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu"
6693 " 2to1 %'12lu\n",
6694 stats__cline_64to32splits, stats__cline_32to16splits,
6695 stats__cline_16to8splits );
6696 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu"
6697 " 2to1 %'12lu\n",
6698 stats__cline_64to32pulldown, stats__cline_32to16pulldown,
6699 stats__cline_16to8pulldown );
6700 if (0)
6701 VG_(printf)(" cline: sizeof(CacheLineZ) %ld,"
6702 " covers %ld bytes of arange\n",
6703 (Word)sizeof(LineZ),
6704 (Word)N_LINE_ARANGE);
6706 VG_(printf)("%s","\n");
6708 VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n",
6709 stats__msmcread, stats__msmcread_change);
6710 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n",
6711 stats__msmcwrite, stats__msmcwrite_change);
6712 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
6713 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
6714 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
6715 stats__join2_queries, stats__join2_misses);
6717 VG_(printf)("%s","\n");
6718 VG_(printf)(" libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n",
6719 stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ );
6720 VG_(printf)(" libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
6721 stats__vts__cmp_structural, stats__vts__cmp_structural_slow);
6722 VG_(printf)(" libhb: VTSset: find__or__clone_and_add %'lu"
6723 " (%'lu allocd)\n",
6724 stats__vts_set__focaa, stats__vts_set__focaa_a );
6725 VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n",
6726 stats__vts__indexat_slow );
6728 VG_(printf)("%s","\n");
6729 VG_(printf)(
6730 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
6731 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
6733 VG_(printf)(" libhb: #%lu vts_tab GC #%lu vts pruning\n",
6734 stats__vts_tab_GC, stats__vts_pruning);
6735 VG_(printf)( " libhb: %lu entries in vts_set\n",
6736 VG_(sizeFM)( vts_set ) );
6738 VG_(printf)("%s","\n");
6740 UInt live = 0;
6741 UInt llexit_done = 0;
6742 UInt joinedwith_done = 0;
6743 UInt llexit_and_joinedwith_done = 0;
6745 Thread* hgthread = get_admin_threads();
6746 tl_assert(hgthread);
6747 while (hgthread) {
6748 Thr* hbthr = hgthread->hbthr;
6749 tl_assert(hbthr);
6750 if (hbthr->llexit_done && hbthr->joinedwith_done)
6751 llexit_and_joinedwith_done++;
6752 else if (hbthr->llexit_done)
6753 llexit_done++;
6754 else if (hbthr->joinedwith_done)
6755 joinedwith_done++;
6756 else
6757 live++;
6758 hgthread = hgthread->admin;
6760 VG_(printf)(" libhb: threads live: %u exit_and_joinedwith %u"
6761 " exit %u joinedwith %u\n",
6762 live, llexit_and_joinedwith_done,
6763 llexit_done, joinedwith_done);
6764 VG_(printf)(" libhb: %d verydead_threads, "
6765 "%d verydead_threads_not_pruned\n",
6766 (int) VG_(sizeXA)( verydead_thread_table),
6767 (int) VG_(sizeXA)( verydead_thread_table_not_pruned));
6768 tl_assert (VG_(sizeXA)( verydead_thread_table)
6769 + VG_(sizeXA)( verydead_thread_table_not_pruned)
6770 == llexit_and_joinedwith_done);
6773 VG_(printf)("%s","\n");
6774 VG_(printf)( " libhb: oldrefHTN %lu (%'d bytes)\n",
6775 oldrefHTN, (int)(oldrefHTN * sizeof(OldRef)));
6776 tl_assert (oldrefHTN == VG_(HT_count_nodes) (oldrefHT));
6777 VG_(printf)( " libhb: oldref lookup found=%lu notfound=%lu\n",
6778 stats__evm__lookup_found, stats__evm__lookup_notfound);
6779 if (VG_(clo_verbosity) > 1)
6780 VG_(HT_print_stats) (oldrefHT, cmp_oldref_tsw);
6781 VG_(printf)( " libhb: oldref bind tsw/rcec "
6782 "==/==:%'lu ==/!=:%'lu !=/!=:%'lu\n",
6783 stats__ctxt_eq_tsw_eq_rcec, stats__ctxt_eq_tsw_neq_rcec,
6784 stats__ctxt_neq_tsw_neq_rcec);
6785 VG_(printf)( " libhb: ctxt__rcdec calls %'lu. rcec gc discards %'lu\n",
6786 stats__ctxt_rcdec_calls, stats__ctxt_rcec_gc_discards);
6787 VG_(printf)( " libhb: contextTab: %lu slots,"
6788 " %lu cur ents(ref'd %lu),"
6789 " %lu max ents\n",
6790 (UWord)N_RCEC_TAB,
6791 stats__ctxt_tab_curr, RCEC_referenced,
6792 stats__ctxt_tab_max );
6793 VG_(printf) (" libhb: stats__cached_rcec "
6794 "identical %'lu updated %'lu fresh %'lu\n",
6795 stats__cached_rcec_identical, stats__cached_rcec_updated,
6796 stats__cached_rcec_fresh);
6797 if (stats__cached_rcec_diff > 0)
6798 VG_(printf) (" libhb: stats__cached_rcec diff unk reason%'lu\n",
6799 stats__cached_rcec_diff);
6800 if (stats__cached_rcec_diff_known_reason > 0)
6801 VG_(printf) (" libhb: stats__cached_rcec diff known reason %'lu\n",
6802 stats__cached_rcec_diff_known_reason);
6805 # define MAXCHAIN 10
6806 UInt chains[MAXCHAIN+1]; // [MAXCHAIN] gets all chains >= MAXCHAIN
6807 UInt non0chain = 0;
6808 UInt n;
6809 UInt i;
6810 RCEC *p;
6812 for (i = 0; i <= MAXCHAIN; i++) chains[i] = 0;
6813 for (i = 0; i < N_RCEC_TAB; i++) {
6814 n = 0;
6815 for (p = contextTab[i]; p; p = p->next)
6816 n++;
6817 if (n < MAXCHAIN)
6818 chains[n]++;
6819 else
6820 chains[MAXCHAIN]++;
6821 if (n > 0)
6822 non0chain++;
6824 VG_(printf)( " libhb: contextTab chain of [length]=nchain."
6825 " Avg chain len %3.1f\n"
6826 " ",
6827 (Double)stats__ctxt_tab_curr
6828 / (Double)(non0chain ? non0chain : 1));
6829 for (i = 0; i <= MAXCHAIN; i++) {
6830 if (chains[i] != 0)
6831 VG_(printf)( "[%u%s]=%u ",
6832 i, i == MAXCHAIN ? "+" : "",
6833 chains[i]);
6835 VG_(printf)( "\n");
6836 # undef MAXCHAIN
6838 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
6839 stats__ctxt_tab_qs,
6840 stats__ctxt_tab_cmps );
6841 #if 0
6842 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
6843 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
6844 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
6845 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
6846 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
6847 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
6848 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
6849 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
6850 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
6851 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
6852 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
6853 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
6854 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
6855 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
6857 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
6858 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
6859 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
6860 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
6861 #endif
6863 VG_(printf)("%s","<<< END libhb stats >>>\n");
6864 VG_(printf)("%s","\n");
6869 /* Receive notification that a thread has low level exited. The
6870 significance here is that we do not expect to see any more memory
6871 references from it. */
6872 void libhb_async_exit ( Thr* thr )
6874 tl_assert(thr);
6875 tl_assert(!thr->llexit_done);
6876 thr->llexit_done = True;
6878 /* Check nobody messed up with the cached_rcec */
6879 tl_assert (thr->cached_rcec.magic == RCEC_MAGIC);
6880 tl_assert (thr->cached_rcec.rc == 0);
6881 tl_assert (thr->cached_rcec.rcX == 0);
6882 tl_assert (thr->cached_rcec.next == NULL);
6884 /* Just to be sure, declare the cached stack invalid. */
6885 set_cached_rcec_validity(thr, False);
6887 /* free up Filter and local_Kws_n_stacks (well, actually not the
6888 latter ..) */
6889 tl_assert(thr->filter);
6890 HG_(free)(thr->filter);
6891 thr->filter = NULL;
6893 /* Tell the VTS mechanism this thread has exited, so it can
6894 participate in VTS pruning. Note this can only happen if the
6895 thread has both ll_exited and has been joined with. */
6896 if (thr->joinedwith_done)
6897 VTS__declare_thread_very_dead(thr);
6899 /* Another space-accuracy tradeoff. Do we want to be able to show
6900 H1 history for conflicts in threads which have since exited? If
6901 yes, then we better not free up thr->local_Kws_n_stacks. The
6902 downside is a potential per-thread leak of up to
6903 N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
6904 XArray average overcommit factor is (1.5 I'd guess). */
6905 // hence:
6906 // VG_(deleteXA)(thr->local_Kws_n_stacks);
6907 // thr->local_Kws_n_stacks = NULL;
6910 /* Receive notification that a thread has been joined with. The
6911 significance here is that we do not expect to see any further
6912 references to its vector clocks (Thr::viR and Thr::viW). */
6913 void libhb_joinedwith_done ( Thr* thr )
6915 tl_assert(thr);
6916 /* Caller must ensure that this is only ever called once per Thr. */
6917 tl_assert(!thr->joinedwith_done);
6918 thr->joinedwith_done = True;
6919 if (thr->llexit_done)
6920 VTS__declare_thread_very_dead(thr);
6924 /* Both Segs and SOs point to VTSs. However, there is no sharing, so
6925 a Seg that points at a VTS is its one-and-only owner, and ditto for
6926 a SO that points at a VTS. */
6928 SO* libhb_so_alloc ( void )
6930 return SO__Alloc();
6933 void libhb_so_dealloc ( SO* so )
6935 tl_assert(so);
6936 tl_assert(so->magic == SO_MAGIC);
6937 SO__Dealloc(so);
6940 /* See comments in libhb.h for details on the meaning of
6941 strong vs weak sends and strong vs weak receives. */
6942 void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
6944 /* Copy the VTSs from 'thr' into the sync object, and then move
6945 the thread along one step. */
6947 tl_assert(so);
6948 tl_assert(so->magic == SO_MAGIC);
6950 /* stay sane .. a thread's read-clock must always lead or be the
6951 same as its write-clock */
6952 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
6953 tl_assert(leq);
6956 /* since we're overwriting the VtsIDs in the SO, we need to drop
6957 any references made by the previous contents thereof */
6958 if (so->viR == VtsID_INVALID) {
6959 tl_assert(so->viW == VtsID_INVALID);
6960 so->viR = thr->viR;
6961 so->viW = thr->viW;
6962 VtsID__rcinc(so->viR);
6963 VtsID__rcinc(so->viW);
6964 } else {
6965 /* In a strong send, we dump any previous VC in the SO and
6966 install the sending thread's VC instead. For a weak send we
6967 must join2 with what's already there. */
6968 tl_assert(so->viW != VtsID_INVALID);
6969 VtsID__rcdec(so->viR);
6970 VtsID__rcdec(so->viW);
6971 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
6972 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
6973 VtsID__rcinc(so->viR);
6974 VtsID__rcinc(so->viW);
6977 /* move both parent clocks along */
6978 VtsID__rcdec(thr->viR);
6979 VtsID__rcdec(thr->viW);
6980 thr->viR = VtsID__tick( thr->viR, thr );
6981 thr->viW = VtsID__tick( thr->viW, thr );
6982 if (!thr->llexit_done) {
6983 Filter__clear(thr->filter, "libhb_so_send");
6984 note_local_Kw_n_stack_for(thr);
6986 VtsID__rcinc(thr->viR);
6987 VtsID__rcinc(thr->viW);
6989 if (strong_send)
6990 show_thread_state("s-send", thr);
6991 else
6992 show_thread_state("w-send", thr);
6995 void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
6997 tl_assert(so);
6998 tl_assert(so->magic == SO_MAGIC);
7000 if (so->viR != VtsID_INVALID) {
7001 tl_assert(so->viW != VtsID_INVALID);
7003 /* Weak receive (basically, an R-acquisition of a R-W lock).
7004 This advances the read-clock of the receiver, but not the
7005 write-clock. */
7006 VtsID__rcdec(thr->viR);
7007 thr->viR = VtsID__join2( thr->viR, so->viR );
7008 VtsID__rcinc(thr->viR);
7010 /* At one point (r10589) it seemed safest to tick the clocks for
7011 the receiving thread after the join. But on reflection, I
7012 wonder if that might cause it to 'overtake' constraints,
7013 which could lead to missing races. So, back out that part of
7014 r10589. */
7015 //VtsID__rcdec(thr->viR);
7016 //thr->viR = VtsID__tick( thr->viR, thr );
7017 //VtsID__rcinc(thr->viR);
7019 /* For a strong receive, we also advance the receiver's write
7020 clock, which means the receive as a whole is essentially
7021 equivalent to a W-acquisition of a R-W lock. */
7022 if (strong_recv) {
7023 VtsID__rcdec(thr->viW);
7024 thr->viW = VtsID__join2( thr->viW, so->viW );
7025 VtsID__rcinc(thr->viW);
7027 /* See comment just above, re r10589. */
7028 //VtsID__rcdec(thr->viW);
7029 //thr->viW = VtsID__tick( thr->viW, thr );
7030 //VtsID__rcinc(thr->viW);
7033 if (thr->filter)
7034 Filter__clear(thr->filter, "libhb_so_recv");
7035 note_local_Kw_n_stack_for(thr);
7037 if (strong_recv)
7038 show_thread_state("s-recv", thr);
7039 else
7040 show_thread_state("w-recv", thr);
7042 } else {
7043 tl_assert(so->viW == VtsID_INVALID);
7044 /* Deal with degenerate case: 'so' has no vts, so there has been
7045 no message posted to it. Just ignore this case. */
7046 show_thread_state("d-recv", thr);
7050 Bool libhb_so_everSent ( SO* so )
7052 if (so->viR == VtsID_INVALID) {
7053 tl_assert(so->viW == VtsID_INVALID);
7054 return False;
7055 } else {
7056 tl_assert(so->viW != VtsID_INVALID);
7057 return True;
7061 #define XXX1 0 // 0x67a106c
7062 #define XXX2 0
7064 static inline Bool TRACEME(Addr a, SizeT szB) {
7065 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
7066 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
7067 return False;
7069 static void trace ( Thr* thr, Addr a, SizeT szB, const HChar* s )
7071 SVal sv = zsm_sread08(a);
7072 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
7073 show_thread_state("", thr);
7074 VG_(printf)("%s","\n");
7077 void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
7079 SVal sv = SVal__mkC(thr->viW, thr->viW);
7080 tl_assert(is_sane_SVal_C(sv));
7081 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
7082 zsm_sset_range( a, szB, sv );
7083 Filter__clear_range( thr->filter, a, szB );
7084 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
7087 void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB )
7089 /* do nothing */
7093 /* Set the lines zix_start till zix_end to NOACCESS. */
7094 static void zsm_secmap_line_range_noaccess (SecMap *sm,
7095 UInt zix_start, UInt zix_end)
7097 for (UInt lz = zix_start; lz <= zix_end; lz++) {
7098 LineZ* lineZ;
7099 lineZ = &sm->linesZ[lz];
7100 if (lineZ->dict[0] != SVal_INVALID) {
7101 rcdec_LineZ(lineZ);
7102 lineZ->dict[0] = SVal_NOACCESS;
7103 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
7104 } else {
7105 clear_LineF_of_Z(lineZ);
7107 for (UInt i = 0; i < N_LINE_ARANGE/4; i++)
7108 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
7112 /* Set the given range to SVal_NOACCESS in-place in the secmap.
7113 a must be cacheline aligned. len must be a multiple of a cacheline
7114 and must be < N_SECMAP_ARANGE. */
7115 static void zsm_sset_range_noaccess_in_secmap(Addr a, SizeT len)
7117 tl_assert (is_valid_scache_tag (a));
7118 tl_assert (0 == (len & (N_LINE_ARANGE - 1)));
7119 tl_assert (len < N_SECMAP_ARANGE);
7121 SecMap *sm1 = shmem__find_SecMap (a);
7122 SecMap *sm2 = shmem__find_SecMap (a + len - 1);
7123 UWord zix_start = shmem__get_SecMap_offset(a ) >> N_LINE_BITS;
7124 UWord zix_end = shmem__get_SecMap_offset(a + len - 1) >> N_LINE_BITS;
7126 if (sm1) {
7127 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm1));
7128 zsm_secmap_line_range_noaccess (sm1, zix_start,
7129 sm1 == sm2 ? zix_end : N_SECMAP_ZLINES-1);
7131 if (sm2 && sm1 != sm2) {
7132 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm2));
7133 zsm_secmap_line_range_noaccess (sm2, 0, zix_end);
7137 /* Set the given address range to SVal_NOACCESS.
7138 The SecMaps fully set to SVal_NOACCESS will be pushed in SecMap_freelist. */
7139 static void zsm_sset_range_noaccess (Addr addr, SizeT len)
7142 BPC = Before, Partial Cacheline, = addr
7143 (i.e. starting inside a cacheline/inside a SecMap)
7144 BFC = Before, Full Cacheline(s), but not full SecMap
7145 (i.e. starting inside a SecMap)
7146 FSM = Full SecMap(s)
7147 (i.e. starting a SecMap)
7148 AFC = After, Full Cacheline(s), but not full SecMap
7149 (i.e. first address after the full SecMap(s))
7150 APC = After, Partial Cacheline, i.e. first address after the
7151 full CacheLines).
7152 ARE = After Range End = addr+len = first address not part of the range.
7154 If addr starts a Cacheline, then BPC == BFC.
7155 If addr starts a SecMap, then BPC == BFC == FSM.
7156 If addr+len starts a SecMap, then APC == ARE == AFC
7157 If addr+len starts a Cacheline, then APC == ARE
7159 Addr ARE = addr + len;
7160 Addr BPC = addr;
7161 Addr BFC = ROUNDUP(BPC, N_LINE_ARANGE);
7162 Addr FSM = ROUNDUP(BPC, N_SECMAP_ARANGE);
7163 Addr AFC = ROUNDDN(ARE, N_SECMAP_ARANGE);
7164 Addr APC = ROUNDDN(ARE, N_LINE_ARANGE);
7165 SizeT Plen = len; // Plen will be split between the following:
7166 SizeT BPClen;
7167 SizeT BFClen;
7168 SizeT FSMlen;
7169 SizeT AFClen;
7170 SizeT APClen;
7172 /* Consumes from Plen the nr of bytes between from and to.
7173 from and to must be aligned on a multiple of round.
7174 The length consumed will be a multiple of round, with
7175 a maximum of Plen. */
7176 # define PlenCONSUME(from, to, round, consumed) \
7177 do { \
7178 if (from < to) { \
7179 if (to - from < Plen) \
7180 consumed = to - from; \
7181 else \
7182 consumed = ROUNDDN(Plen, round); \
7183 } else { \
7184 consumed = 0; \
7186 Plen -= consumed; } while (0)
7188 PlenCONSUME(BPC, BFC, 1, BPClen);
7189 PlenCONSUME(BFC, FSM, N_LINE_ARANGE, BFClen);
7190 PlenCONSUME(FSM, AFC, N_SECMAP_ARANGE, FSMlen);
7191 PlenCONSUME(AFC, APC, N_LINE_ARANGE, AFClen);
7192 PlenCONSUME(APC, ARE, 1, APClen);
7194 if (0)
7195 VG_(printf) ("addr %p[%lu] ARE %p"
7196 " BPC %p[%lu] BFC %p[%lu] FSM %p[%lu]"
7197 " AFC %p[%lu] APC %p[%lu]\n",
7198 (void*)addr, len, (void*)ARE,
7199 (void*)BPC, BPClen, (void*)BFC, BFClen, (void*)FSM, FSMlen,
7200 (void*)AFC, AFClen, (void*)APC, APClen);
7202 tl_assert (Plen == 0);
7204 /* Set to NOACCESS pieces before and after not covered by entire SecMaps. */
7206 /* First we set the partial cachelines. This is done through the cache. */
7207 if (BPClen > 0)
7208 zsm_sset_range_SMALL (BPC, BPClen, SVal_NOACCESS);
7209 if (APClen > 0)
7210 zsm_sset_range_SMALL (APC, APClen, SVal_NOACCESS);
7212 /* After this, we will not use the cache anymore. We will directly work
7213 in-place on the z shadow memory in SecMap(s).
7214 So, we invalidate the cachelines for the whole range we are setting
7215 to NOACCESS below. */
7216 shmem__invalidate_scache_range (BFC, APC - BFC);
7218 if (BFClen > 0)
7219 zsm_sset_range_noaccess_in_secmap (BFC, BFClen);
7220 if (AFClen > 0)
7221 zsm_sset_range_noaccess_in_secmap (AFC, AFClen);
7223 if (FSMlen > 0) {
7224 /* Set to NOACCESS all the SecMaps, pushing the SecMaps to the
7225 free list. */
7226 Addr sm_start = FSM;
7227 while (sm_start < AFC) {
7228 SecMap *sm = shmem__find_SecMap (sm_start);
7229 if (sm) {
7230 Addr gaKey;
7231 SecMap *fm_sm;
7233 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
7234 for (UInt lz = 0; lz < N_SECMAP_ZLINES; lz++) {
7235 LineZ *lineZ = &sm->linesZ[lz];
7236 if (LIKELY(lineZ->dict[0] != SVal_INVALID))
7237 rcdec_LineZ(lineZ);
7238 else
7239 clear_LineF_of_Z(lineZ);
7241 if (!VG_(delFromFM)(map_shmem, &gaKey, (UWord*)&fm_sm, sm_start))
7242 tl_assert (0);
7243 stats__secmaps_in_map_shmem--;
7244 tl_assert (gaKey == sm_start);
7245 tl_assert (sm == fm_sm);
7246 stats__secmaps_ssetGCed++;
7247 push_SecMap_on_freelist (sm);
7249 sm_start += N_SECMAP_ARANGE;
7251 tl_assert (sm_start == AFC);
7253 /* The above loop might have kept copies of freed SecMap in the smCache.
7254 => clear them. */
7255 if (address_in_range(smCache[0].gaKey, FSM, FSMlen)) {
7256 smCache[0].gaKey = 1;
7257 smCache[0].sm = NULL;
7259 if (address_in_range(smCache[1].gaKey, FSM, FSMlen)) {
7260 smCache[1].gaKey = 1;
7261 smCache[1].sm = NULL;
7263 if (address_in_range(smCache[2].gaKey, FSM, FSMlen)) {
7264 smCache[2].gaKey = 1;
7265 smCache[2].sm = NULL;
7267 STATIC_ASSERT (3 == sizeof(smCache)/sizeof(SMCacheEnt));
7271 void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB )
7273 /* This really does put the requested range in NoAccess. It's
7274 expensive though. */
7275 SVal sv = SVal_NOACCESS;
7276 tl_assert(is_sane_SVal_C(sv));
7277 if (LIKELY(szB < 2 * N_LINE_ARANGE))
7278 zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
7279 else
7280 zsm_sset_range_noaccess (a, szB);
7281 Filter__clear_range( thr->filter, a, szB );
7284 /* Works byte at a time. Can be optimised if needed. */
7285 UWord libhb_srange_get_abits (Addr a, UChar *abits, SizeT len)
7287 UWord anr = 0; // nr of bytes addressable.
7289 /* Get the accessibility of each byte. Pay attention to not
7290 create SecMap or LineZ when checking if a byte is addressable.
7292 Note: this is used for client request. Performance deemed not critical.
7293 So for simplicity, we work byte per byte.
7294 Performance could be improved by working with full cachelines
7295 or with full SecMap, when reaching a cacheline or secmap boundary. */
7296 for (SizeT i = 0; i < len; i++) {
7297 SVal sv = SVal_INVALID;
7298 Addr b = a + i;
7299 Addr tag = b & ~(N_LINE_ARANGE - 1);
7300 UWord wix = (b >> N_LINE_BITS) & (N_WAY_NENT - 1);
7301 UWord cloff = get_cacheline_offset(b);
7303 /* Note: we do not use get_cacheline(b) to avoid creating cachelines
7304 and/or SecMap for non addressable bytes. */
7305 if (tag == cache_shmem.tags0[wix]) {
7306 CacheLine copy = cache_shmem.lyns0[wix];
7307 /* We work on a copy of the cacheline, as we do not want to
7308 record the client request as a real read.
7309 The below is somewhat similar to zsm_sapply08__msmcread but
7310 avoids side effects on the cache. */
7311 UWord toff = get_tree_offset(b); /* == 0 .. 7 */
7312 UWord tno = get_treeno(b);
7313 UShort descr = copy.descrs[tno];
7314 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
7315 SVal* tree = &copy.svals[tno << 3];
7316 copy.descrs[tno] = pulldown_to_8(tree, toff, descr);
7318 sv = copy.svals[cloff];
7319 } else {
7320 /* Byte not found in the cacheline. Search for a SecMap. */
7321 SecMap *sm = shmem__find_SecMap(b);
7322 LineZ *lineZ;
7323 if (sm == NULL)
7324 sv = SVal_NOACCESS;
7325 else {
7326 UWord zix = shmem__get_SecMap_offset(b) >> N_LINE_BITS;
7327 lineZ = &sm->linesZ[zix];
7328 if (lineZ->dict[0] == SVal_INVALID) {
7329 LineF *lineF = SVal2Ptr(lineZ->dict[1]);
7330 sv = lineF->w64s[cloff];
7331 } else {
7332 UWord ix = read_twobit_array( lineZ->ix2s, cloff );
7333 sv = lineZ->dict[ix];
7338 tl_assert (sv != SVal_INVALID);
7339 if (sv == SVal_NOACCESS) {
7340 if (abits)
7341 abits[i] = 0x00;
7342 } else {
7343 if (abits)
7344 abits[i] = 0xff;
7345 anr++;
7349 return anr;
7353 void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
7355 SVal sv = SVal_NOACCESS;
7356 tl_assert(is_sane_SVal_C(sv));
7357 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
7358 if (LIKELY(szB < 2 * N_LINE_ARANGE))
7359 zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
7360 else
7361 zsm_sset_range_noaccess (a, szB);
7362 Filter__clear_range( thr->filter, a, szB );
7363 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
7366 Thread* libhb_get_Thr_hgthread ( Thr* thr ) {
7367 tl_assert(thr);
7368 return thr->hgthread;
7371 void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) {
7372 tl_assert(thr);
7373 thr->hgthread = hgthread;
7376 void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
7378 zsm_scopy_range(src, dst, len);
7379 Filter__clear_range( thr->filter, dst, len );
7382 void libhb_maybe_GC ( void )
7384 /* GC the unreferenced (zero rc) RCECs when
7385 (1) reaching a significant nr of RCECs (to avoid scanning a contextTab
7386 with mostly NULL ptr)
7387 and (2) approaching the max nr of RCEC (as we have in any case
7388 at least that amount of RCEC in the pool allocator)
7389 Note: the margin allows to avoid a small but constant increase
7390 of the max nr of RCEC due to the fact that libhb_maybe_GC is
7391 not called when the current nr of RCEC exactly reaches the max.
7392 and (3) the nr of referenced RCECs is less than 75% than total nr RCECs.
7393 Avoid growing too much the nr of RCEC keeps the memory use low,
7394 and avoids to have too many elements in the (fixed) contextTab hashtable.
7396 if (UNLIKELY(stats__ctxt_tab_curr > N_RCEC_TAB/2
7397 && stats__ctxt_tab_curr + 1000 >= stats__ctxt_tab_max
7398 && (stats__ctxt_tab_curr * 3)/4 > RCEC_referenced))
7399 do_RCEC_GC();
7401 /* If there are still no entries available (all the table entries are full),
7402 and we hit the threshold point, then do a GC */
7403 Bool vts_tab_GC = vts_tab_freelist == VtsID_INVALID
7404 && VG_(sizeXA)( vts_tab ) >= vts_next_GC_at;
7405 if (UNLIKELY (vts_tab_GC))
7406 vts_tab__do_GC( False/*don't show stats*/ );
7408 /* scan GC the SecMaps when
7409 (1) no SecMap in the freelist
7410 and (2) the current nr of live secmaps exceeds the threshold. */
7411 if (UNLIKELY(SecMap_freelist == NULL
7412 && stats__secmaps_in_map_shmem >= next_SecMap_GC_at)) {
7413 // If we did a vts tab GC, then no need to flush the cache again.
7414 if (!vts_tab_GC)
7415 zsm_flush_cache();
7416 shmem__SecMap_do_GC(True);
7419 /* Check the reference counts (expensive) */
7420 if (CHECK_CEM)
7421 event_map__check_reference_counts();
7425 /////////////////////////////////////////////////////////////////
7426 /////////////////////////////////////////////////////////////////
7427 // //
7428 // SECTION END main library //
7429 // //
7430 /////////////////////////////////////////////////////////////////
7431 /////////////////////////////////////////////////////////////////
7433 /*--------------------------------------------------------------------*/
7434 /*--- end libhb_main.c ---*/
7435 /*--------------------------------------------------------------------*/