Add missing zstd.h to coregrind Makefile.am noinst_HEADERS
[valgrind.git] / helgrind / libhb_core.c
blob0a084dd60d4ae7f7f1b9f9735211a76dbdcf015b
2 /*--------------------------------------------------------------------*/
3 /*--- LibHB: a library for implementing and checking ---*/
4 /*--- the happens-before relationship in concurrent programs. ---*/
5 /*--- libhb_main.c ---*/
6 /*--------------------------------------------------------------------*/
8 /*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
12 Copyright (C) 2008-2017 OpenWorks Ltd
13 info@open-works.co.uk
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, see <http://www.gnu.org/licenses/>.
28 The GNU General Public License is contained in the file COPYING.
31 #include "pub_tool_basics.h"
32 #include "pub_tool_poolalloc.h"
33 #include "pub_tool_libcassert.h"
34 #include "pub_tool_libcbase.h"
35 #include "pub_tool_libcprint.h"
36 #include "pub_tool_machine.h"
37 #include "pub_tool_mallocfree.h"
38 #include "pub_tool_wordfm.h"
39 #include "pub_tool_hashtable.h"
40 #include "pub_tool_xarray.h"
41 #include "pub_tool_oset.h"
42 #include "pub_tool_threadstate.h"
43 #include "pub_tool_aspacemgr.h"
44 #include "pub_tool_stacktrace.h"
45 #include "pub_tool_execontext.h"
46 #include "pub_tool_errormgr.h"
47 #include "pub_tool_debuginfo.h"
48 #include "pub_tool_gdbserver.h"
49 #include "pub_tool_options.h" // VG_(clo_stats)
50 #include "hg_basics.h"
51 #include "hg_wordset.h"
52 #include "hg_lock_n_thread.h"
53 #include "hg_errors.h"
55 #include "libhb.h"
58 /////////////////////////////////////////////////////////////////
59 /////////////////////////////////////////////////////////////////
60 // //
61 // Debugging #defines //
62 // //
63 /////////////////////////////////////////////////////////////////
64 /////////////////////////////////////////////////////////////////
66 /* Check the sanity of shadow values in the core memory state
67 machine. Change #if 0 to #if 1 to enable this. */
68 #if 0
69 # define CHECK_MSM 1
70 #else
71 # define CHECK_MSM 0
72 #endif
75 /* Check sanity (reference counts, etc) in the conflicting access
76 machinery. Change #if 0 to #if 1 to enable this. */
77 #if 0
78 # define CHECK_CEM 1
79 #else
80 # define CHECK_CEM 0
81 #endif
84 /* Check sanity in the compressed shadow memory machinery,
85 particularly in its caching innards. Unfortunately there's no
86 almost-zero-cost way to make them selectable at run time. Hence
87 set the #if 0 to #if 1 and rebuild if you want them. */
88 #if 0
89 # define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
90 # define inline __attribute__((noinline))
91 /* probably want to ditch -fomit-frame-pointer too */
92 #else
93 # define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
94 #endif
96 /* Define to 1 to activate tracing cached rcec. */
97 #define DEBUG_CACHED_RCEC 0
99 /////////////////////////////////////////////////////////////////
100 /////////////////////////////////////////////////////////////////
101 // //
102 // data decls: VtsID //
103 // //
104 /////////////////////////////////////////////////////////////////
105 /////////////////////////////////////////////////////////////////
107 /* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30
108 bits, since they have to be packed into the lowest 30 bits of an
109 SVal. */
110 typedef UInt VtsID;
111 #define VtsID_INVALID 0xFFFFFFFF
115 /////////////////////////////////////////////////////////////////
116 /////////////////////////////////////////////////////////////////
117 // //
118 // data decls: SVal //
119 // //
120 /////////////////////////////////////////////////////////////////
121 /////////////////////////////////////////////////////////////////
123 typedef ULong SVal;
125 /* This value has special significance to the implementation, and callers
126 may not store it in the shadow memory. */
127 #define SVal_INVALID (3ULL << 62)
129 /* This is the default value for shadow memory. Initially the shadow
130 memory contains no accessible areas and so all reads produce this
131 value. TODO: make this caller-defineable. */
132 #define SVal_NOACCESS (2ULL << 62)
136 /////////////////////////////////////////////////////////////////
137 /////////////////////////////////////////////////////////////////
138 // //
139 // data decls: ScalarTS //
140 // //
141 /////////////////////////////////////////////////////////////////
142 /////////////////////////////////////////////////////////////////
144 /* Scalar Timestamp. We have to store a lot of these, so there is
145 some effort to make them as small as possible. Logically they are
146 a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
147 We pack it into 64 bits by representing the Thr* using a ThrID, a
148 small integer (18 bits), and a 46 bit integer for the timestamp
149 number. The 46/18 split is arbitrary, but has the effect that
150 Helgrind can only handle programs that create 2^18 or fewer threads
151 over their entire lifetime, and have no more than 2^46 timestamp
152 ticks (synchronisation operations on the same thread).
154 This doesn't seem like much of a limitation. 2^46 ticks is
155 7.06e+13, and if each tick (optimistically) takes the machine 1000
156 cycles to process, then the minimum time to process that many ticks
157 at a clock rate of 5 GHz is 162.9 days. And that's doing nothing
158 but VTS ticks, which isn't realistic.
160 NB1: SCALARTS_N_THRBITS must be 27 or lower. The obvious limit is
161 32 since a ThrID is a UInt. 27 comes from the fact that
162 'Thr_n_RCEC', which records information about old accesses, packs
163 in tsw not only a ThrID but also minimum 4+1 other bits (access size
164 and writeness) in a UInt, hence limiting size to 32-(4+1) == 27.
166 NB2: thrid values are issued upwards from 1024, and values less
167 than that aren't valid. This isn't per se necessary (any order
168 will do, so long as they are unique), but it does help ensure they
169 are less likely to get confused with the various other kinds of
170 small-integer thread ids drifting around (eg, TId).
171 So, SCALARTS_N_THRBITS must be 11 or more.
172 See also NB5.
174 NB3: this probably also relies on the fact that Thr's are never
175 deallocated -- they exist forever. Hence the 1-1 mapping from
176 Thr's to thrid values (set up in Thr__new) persists forever.
178 NB4: temp_max_sized_VTS is allocated at startup and never freed.
179 It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
180 ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without
181 making the memory use for this go sky-high. With
182 SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
183 like an OK tradeoff. If more than 256k threads need to be
184 supported, we could change SCALARTS_N_THRBITS to 20, which would
185 facilitate supporting 1 million threads at the cost of 8MB storage
186 for temp_max_sized_VTS.
188 NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
189 ThrID == 0 to denote an empty Thr_n_RCEC record. So ThrID == 0
190 must never be a valid ThrID. Given NB2 that's OK.
192 #define SCALARTS_N_THRBITS 18 /* valid range: 11 to 27 inclusive,
193 See NB1 and NB2 above. */
195 #define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
196 typedef
197 struct {
198 ThrID thrid : SCALARTS_N_THRBITS;
199 ULong tym : SCALARTS_N_TYMBITS;
201 ScalarTS;
203 #define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
207 /////////////////////////////////////////////////////////////////
208 /////////////////////////////////////////////////////////////////
209 // //
210 // data decls: Filter //
211 // //
212 /////////////////////////////////////////////////////////////////
213 /////////////////////////////////////////////////////////////////
215 // baseline: 5, 9
216 #define FI_LINE_SZB_LOG2 5
217 #define FI_NUM_LINES_LOG2 10
219 #define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
220 #define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
222 #define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
223 #define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
225 #define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
226 & (Addr)(FI_NUM_LINES-1) )
229 /* In the lines, each 8 bytes are treated individually, and are mapped
230 to a UShort. Regardless of endianness of the underlying machine,
231 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
232 the highest address.
234 Of each bit pair, the higher numbered bit is set if a R has been
235 seen, so the actual layout is:
237 15 14 ... 01 00
239 R W for addr+7 ... R W for addr+0
241 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
244 /* tags are separated from lines. tags are Addrs and are
245 the base address of the line. */
246 typedef
247 struct {
248 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
250 FiLine;
252 typedef
253 struct {
254 Addr tags[FI_NUM_LINES];
255 FiLine lines[FI_NUM_LINES];
257 Filter;
261 /////////////////////////////////////////////////////////////////
262 /////////////////////////////////////////////////////////////////
263 // //
264 // data decls: Thr, ULong_n_EC //
265 // //
266 /////////////////////////////////////////////////////////////////
267 /////////////////////////////////////////////////////////////////
269 // Records stacks for H1 history mechanism (DRD-style)
270 typedef
271 struct { ULong ull; ExeContext* ec; }
272 ULong_n_EC;
275 /* How many of the above records to collect for each thread? Older
276 ones are dumped when we run out of space. 62.5k requires 1MB per
277 thread, since each ULong_n_EC record is 16 bytes long. When more
278 than N_KWs_N_STACKs_PER_THREAD are present, the older half are
279 deleted to make space. Hence in the worst case we will be able to
280 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
281 Kw transitions (segments in this thread). For the current setting
282 that gives a guaranteed stack for at least the last 31.25k
283 segments. */
284 #define N_KWs_N_STACKs_PER_THREAD 62500
287 UInt HG_(clo_history_backtrace_size) = 8;
289 // (UInt) `echo "Reference Counted Execution Context" | md5sum`
290 #define RCEC_MAGIC 0xab88abb2UL
292 /* RCEC usage is commented more in details in the section 'Change-event map2'
293 later in this file */
294 typedef
295 struct _RCEC {
296 UWord magic; /* sanity check only */
297 struct _RCEC* next;
298 UWord rc;
299 UWord rcX; /* used for crosschecking */
300 UWord frames_hash; /* hash of all the frames */
301 UWord frames[0];
302 /* Variable-length array.
303 The size depends on HG_(clo_history_backtrace_size). */
305 RCEC;
307 struct _Thr {
308 /* Current VTSs for this thread. They change as we go along. viR
309 is the VTS to be used for reads, viW for writes. Usually they
310 are the same, but can differ when we deal with reader-writer
311 locks. It is always the case that
312 VtsID__cmpLEQ(viW,viR) == True
313 that is, viW must be the same, or lagging behind, viR. */
314 VtsID viR;
315 VtsID viW;
317 /* Is initially False, and is set to True after the thread really
318 has done a low-level exit. When True, we expect to never see
319 any more memory references done by this thread. */
320 Bool llexit_done;
322 /* Is initially False, and is set to True after the thread has been
323 joined with (reaped by some other thread). After this point, we
324 do not expect to see any uses of .viR or .viW, so it is safe to
325 set them to VtsID_INVALID. */
326 Bool joinedwith_done;
328 /* A small integer giving a unique identity to this Thr. See
329 comments on the definition of ScalarTS for details. */
330 ThrID thrid : SCALARTS_N_THRBITS;
332 /* A filter that removes references for which we believe that
333 msmcread/msmcwrite will not change the state, nor report a
334 race. */
335 Filter* filter;
337 /* A pointer back to the top level Thread structure. There is a
338 1-1 mapping between Thread and Thr structures -- each Thr points
339 at its corresponding Thread, and vice versa. Really, Thr and
340 Thread should be merged into a single structure. */
341 Thread* hgthread;
343 /* The ULongs (scalar Kws) in this accumulate in strictly
344 increasing order, without duplicates. This is important because
345 we need to be able to find a given scalar Kw in this array
346 later, by binary search. */
347 XArray* /* ULong_n_EC */ local_Kws_n_stacks;
349 /* cached_rcec maintains the last RCEC that was retrieved for this thread. */
350 RCEC cached_rcec;
351 // cached_rcec value, not ref-counted.
352 // As the last member of an RCEC is a variable length array, this must be
353 // the last element of the _Thr struct.
355 /* The shadow register vex_shadow1 SP register (SP_s1) is used to maintain
356 the validity of the cached rcec.
357 If SP_s1 is 0, then the cached rcec is invalid (cannot be used).
358 If SP_S1 is != 0, then the cached rcec is valid. The valid cached rcec
359 can be used to generate a new RCEC by changing just the last frame. */
365 /////////////////////////////////////////////////////////////////
366 /////////////////////////////////////////////////////////////////
367 // //
368 // data decls: SO //
369 // //
370 /////////////////////////////////////////////////////////////////
371 /////////////////////////////////////////////////////////////////
373 // (UInt) `echo "Synchronisation object" | md5sum`
374 #define SO_MAGIC 0x56b3c5b0U
376 struct _SO {
377 struct _SO* admin_prev;
378 struct _SO* admin_next;
379 VtsID viR; /* r-clock of sender */
380 VtsID viW; /* w-clock of sender */
381 UInt magic;
386 /////////////////////////////////////////////////////////////////
387 /////////////////////////////////////////////////////////////////
388 // //
389 // Forward declarations //
390 // //
391 /////////////////////////////////////////////////////////////////
392 /////////////////////////////////////////////////////////////////
394 /* fwds for
395 Globals needed by other parts of the library. These are set
396 once at startup and then never changed. */
397 static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
398 static ExeContext* (*main_get_EC)( Thr* ) = NULL;
400 /* misc fn and data fwdses */
401 static void VtsID__rcinc ( VtsID ii );
402 static void VtsID__rcdec ( VtsID ii );
404 static inline Bool SVal__isC ( SVal s );
405 static inline VtsID SVal__unC_Rmin ( SVal s );
406 static inline VtsID SVal__unC_Wmin ( SVal s );
407 static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
408 static inline void SVal__rcinc ( SVal s );
409 static inline void SVal__rcdec ( SVal s );
410 /* SVal in LineZ are used to store various pointers. */
411 static inline void *SVal2Ptr (SVal s);
412 static inline SVal Ptr2SVal (void* ptr);
414 /* A double linked list of all the SO's. */
415 SO* admin_SO;
419 /////////////////////////////////////////////////////////////////
420 /////////////////////////////////////////////////////////////////
421 // //
422 // SECTION BEGIN compressed shadow memory //
423 // //
424 /////////////////////////////////////////////////////////////////
425 /////////////////////////////////////////////////////////////////
427 #ifndef __HB_ZSM_H
428 #define __HB_ZSM_H
430 /* Initialise the library. Once initialised, it will (or may) call
431 SVal__rcinc and SVal__rcdec in response to all the calls below, in order to
432 allow the user to do reference counting on the SVals stored herein.
433 It is important to understand, however, that due to internal
434 caching, the reference counts are in general inaccurate, and can be
435 both above or below the true reference count for an item. In
436 particular, the library may indicate that the reference count for
437 an item is zero, when in fact it is not.
439 To make the reference counting exact and therefore non-pointless,
440 call zsm_flush_cache. Immediately after it returns, the reference
441 counts for all items, as deduced by the caller by observing calls
442 to SVal__rcinc and SVal__rcdec, will be correct, and so any items with a
443 zero reference count may be freed (or at least considered to be
444 unreferenced by this library).
446 static void zsm_init ( void );
448 static void zsm_sset_range ( Addr, SizeT, SVal );
449 static void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew );
450 static void zsm_scopy_range ( Addr, Addr, SizeT );
451 static void zsm_flush_cache ( void );
453 #endif /* ! __HB_ZSM_H */
456 /* Round a up to the next multiple of N. N must be a power of 2 */
457 #define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
458 /* Round a down to the next multiple of N. N must be a power of 2 */
459 #define ROUNDDN(a, N) ((a) & ~(N-1))
461 /* True if a belongs in range [start, start + szB[
462 (i.e. start + szB is excluded). */
463 static inline Bool address_in_range (Addr a, Addr start, SizeT szB)
465 /* Checking start <= a && a < start + szB.
466 As start and a are unsigned addresses, the condition can
467 be simplified. */
468 if (CHECK_ZSM)
469 tl_assert ((a - start < szB)
470 == (start <= a
471 && a < start + szB));
472 return a - start < szB;
475 /* ------ CacheLine ------ */
477 #define N_LINE_BITS 6 /* must be >= 3 */
478 #define N_LINE_ARANGE (1 << N_LINE_BITS)
479 #define N_LINE_TREES (N_LINE_ARANGE >> 3)
481 typedef
482 struct {
483 UShort descrs[N_LINE_TREES];
484 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
486 CacheLine;
488 #define TREE_DESCR_16_0 (1<<0)
489 #define TREE_DESCR_32_0 (1<<1)
490 #define TREE_DESCR_16_1 (1<<2)
491 #define TREE_DESCR_64 (1<<3)
492 #define TREE_DESCR_16_2 (1<<4)
493 #define TREE_DESCR_32_1 (1<<5)
494 #define TREE_DESCR_16_3 (1<<6)
495 #define TREE_DESCR_8_0 (1<<7)
496 #define TREE_DESCR_8_1 (1<<8)
497 #define TREE_DESCR_8_2 (1<<9)
498 #define TREE_DESCR_8_3 (1<<10)
499 #define TREE_DESCR_8_4 (1<<11)
500 #define TREE_DESCR_8_5 (1<<12)
501 #define TREE_DESCR_8_6 (1<<13)
502 #define TREE_DESCR_8_7 (1<<14)
503 #define TREE_DESCR_DTY (1<<15)
505 typedef
506 struct {
507 SVal dict[4]; /* can represent up to 4 diff values in the line */
508 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
509 dict indexes */
510 /* if dict[0] == SVal_INVALID then dict[1] is a pointer to the
511 LineF to use, and dict[2..] are also SVal_INVALID. */
513 LineZ; /* compressed rep for a cache line */
515 /* LineZ.dict[1] is used to store various pointers:
516 * In the first lineZ of a free SecMap, it points to the next free SecMap.
517 * In a lineZ for which we need to use a lineF, it points to the lineF. */
520 typedef
521 struct {
522 SVal w64s[N_LINE_ARANGE];
524 LineF; /* full rep for a cache line */
526 /* We use a pool allocator for LineF, as LineF is relatively small,
527 and we will often alloc/release such lines. */
528 static PoolAlloc* LineF_pool_allocator;
530 /* SVal in a lineZ are used to store various pointers.
531 Below are conversion functions to support that. */
532 static inline LineF *LineF_Ptr (LineZ *lineZ)
534 tl_assert(lineZ->dict[0] == SVal_INVALID);
535 return SVal2Ptr (lineZ->dict[1]);
538 /* Shadow memory.
539 Primary map is a WordFM Addr SecMap*.
540 SecMaps cover some page-size-ish section of address space and hold
541 a compressed representation.
542 CacheLine-sized chunks of SecMaps are copied into a Cache, being
543 decompressed when moved into the cache and recompressed on the
544 way out. Because of this, the cache must operate as a writeback
545 cache, not a writethrough one.
547 Each SecMap must hold a power-of-2 number of CacheLines. Hence
548 N_SECMAP_BITS must >= N_LINE_BITS.
550 #define N_SECMAP_BITS 13
551 #define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
553 // # CacheLines held by a SecMap
554 #define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
556 /* The data in the SecMap is held in the array of LineZs. Each LineZ
557 either carries the required data directly, in a compressed
558 representation, or it holds (in .dict[1]) a pointer to a LineF
559 that holds the full representation.
561 As each in-use LineF is referred to by exactly one LineZ,
562 the number of .linesZ[] that refer to a lineF should equal
563 the number of used lineF.
565 RC obligations: the RCs presented to the user include exactly
566 the values in:
567 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
568 * F reps that are in use
570 Hence the following actions at the following transitions are required:
572 F rep: alloc'd -> freed -- rcdec_LineF
573 F rep: -> alloc'd -- rcinc_LineF
574 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
575 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
578 typedef
579 struct {
580 UInt magic;
581 LineZ linesZ[N_SECMAP_ZLINES];
583 SecMap;
585 #define SecMap_MAGIC 0x571e58cbU
587 // (UInt) `echo "Free SecMap" | md5sum`
588 #define SecMap_free_MAGIC 0x5a977f30U
590 __attribute__((unused))
591 static inline Bool is_sane_SecMap ( SecMap* sm ) {
592 return sm != NULL && sm->magic == SecMap_MAGIC;
595 /* ------ Cache ------ */
597 #define N_WAY_BITS 16
598 #define N_WAY_NENT (1 << N_WAY_BITS)
600 /* Each tag is the address of the associated CacheLine, rounded down
601 to a CacheLine address boundary. A CacheLine size must be a power
602 of 2 and must be 8 or more. Hence an easy way to initialise the
603 cache so it is empty is to set all the tag values to any value % 8
604 != 0, eg 1. This means all queries in the cache initially miss.
605 It does however require us to detect and not writeback, any line
606 with a bogus tag. */
607 typedef
608 struct {
609 CacheLine lyns0[N_WAY_NENT];
610 Addr tags0[N_WAY_NENT];
612 Cache;
614 static inline Bool is_valid_scache_tag ( Addr tag ) {
615 /* a valid tag should be naturally aligned to the start of
616 a CacheLine. */
617 return 0 == (tag & (N_LINE_ARANGE - 1));
621 /* --------- Primary data structures --------- */
623 /* Shadow memory primary map */
624 static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
625 static Cache cache_shmem;
628 static UWord stats__secmaps_search = 0; // # SM finds
629 static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
630 static UWord stats__secmaps_allocd = 0; // # SecMaps issued
631 static UWord stats__secmaps_in_map_shmem = 0; // # SecMaps 'live'
632 static UWord stats__secmaps_scanGC = 0; // # nr of scan GC done.
633 static UWord stats__secmaps_scanGCed = 0; // # SecMaps GC-ed via scan
634 static UWord stats__secmaps_ssetGCed = 0; // # SecMaps GC-ed via setnoaccess
635 static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
636 static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
637 static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
638 static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
639 static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
640 static UWord stats__cache_F_fetches = 0; // # F lines fetched
641 static UWord stats__cache_F_wbacks = 0; // # F lines written back
642 static UWord stats__cache_flushes_invals = 0; // # cache flushes and invals
643 static UWord stats__cache_totrefs = 0; // # total accesses
644 static UWord stats__cache_totmisses = 0; // # misses
645 static ULong stats__cache_make_New_arange = 0; // total arange made New
646 static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
647 static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
648 static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
649 static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
650 static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
651 static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
652 static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
653 static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
654 static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
655 static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
656 static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
657 static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
658 static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
659 static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
660 static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
661 static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
662 static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
663 static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
664 static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
665 static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
666 static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
667 static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
668 static UWord stats__vts__tick = 0; // # calls to VTS__tick
669 static UWord stats__vts__join = 0; // # calls to VTS__join
670 static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
671 static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
672 static UWord stats__vts_tab_GC = 0; // # nr of vts_tab GC
673 static UWord stats__vts_pruning = 0; // # nr of vts pruning
675 // # calls to VTS__cmp_structural w/ slow case
676 static UWord stats__vts__cmp_structural_slow = 0;
678 // # calls to VTS__indexAt_SLOW
679 static UWord stats__vts__indexat_slow = 0;
681 // # calls to vts_set__find__or__clone_and_add
682 static UWord stats__vts_set__focaa = 0;
684 // # calls to vts_set__find__or__clone_and_add that lead to an
685 // allocation
686 static UWord stats__vts_set__focaa_a = 0;
689 static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
690 return a & ~(N_SECMAP_ARANGE - 1);
692 static inline UWord shmem__get_SecMap_offset ( Addr a ) {
693 return a & (N_SECMAP_ARANGE - 1);
697 /*----------------------------------------------------------------*/
698 /*--- map_shmem :: WordFM Addr SecMap ---*/
699 /*--- shadow memory (low level handlers) (shmem__* fns) ---*/
700 /*----------------------------------------------------------------*/
702 /*--------------- SecMap allocation --------------- */
704 static HChar* shmem__bigchunk_next = NULL;
705 static HChar* shmem__bigchunk_end1 = NULL;
707 static void* shmem__bigchunk_alloc ( SizeT n )
709 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
710 tl_assert(n > 0);
711 n = VG_ROUNDUP(n, 16);
712 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
713 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
714 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
715 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
716 if (0)
717 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
718 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
719 SysRes sres = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
720 if (sr_isError(sres)) {
721 VG_(out_of_memory_NORETURN)(
722 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE,
723 sr_Err(sres));
725 shmem__bigchunk_next = (void*)(Addr)sr_Res(sres);;
726 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
728 tl_assert(shmem__bigchunk_next);
729 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
730 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
731 shmem__bigchunk_next += n;
732 return shmem__bigchunk_next - n;
735 /* SecMap changed to be fully SVal_NOACCESS are inserted in a list of
736 recycled SecMap. When a new SecMap is needed, a recycled SecMap
737 will be used in preference to allocating a new SecMap. */
738 /* We make a linked list of SecMap. The first LineZ is re-used to
739 implement the linked list. */
740 /* Returns the SecMap following sm in the free list.
741 NULL if sm is the last SecMap. sm must be on the free list. */
742 static inline SecMap *SecMap_freelist_next ( SecMap* sm )
744 tl_assert (sm);
745 tl_assert (sm->magic == SecMap_free_MAGIC);
746 return SVal2Ptr (sm->linesZ[0].dict[1]);
748 static inline void set_SecMap_freelist_next ( SecMap* sm, SecMap* next )
750 tl_assert (sm);
751 tl_assert (sm->magic == SecMap_free_MAGIC);
752 tl_assert (next == NULL || next->magic == SecMap_free_MAGIC);
753 sm->linesZ[0].dict[1] = Ptr2SVal (next);
756 static SecMap *SecMap_freelist = NULL;
757 static UWord SecMap_freelist_length(void)
759 SecMap *sm;
760 UWord n = 0;
762 sm = SecMap_freelist;
763 while (sm) {
764 n++;
765 sm = SecMap_freelist_next (sm);
767 return n;
770 static void push_SecMap_on_freelist(SecMap* sm)
772 if (0) VG_(message)(Vg_DebugMsg, "%p push\n", sm);
773 sm->magic = SecMap_free_MAGIC;
774 set_SecMap_freelist_next(sm, SecMap_freelist);
775 SecMap_freelist = sm;
777 /* Returns a free SecMap if there is one.
778 Otherwise, returns NULL. */
779 static SecMap *pop_SecMap_from_freelist(void)
781 SecMap *sm;
783 sm = SecMap_freelist;
784 if (sm) {
785 tl_assert (sm->magic == SecMap_free_MAGIC);
786 SecMap_freelist = SecMap_freelist_next (sm);
787 if (0) VG_(message)(Vg_DebugMsg, "%p pop\n", sm);
789 return sm;
792 static SecMap* shmem__alloc_or_recycle_SecMap ( void )
794 Word i, j;
795 SecMap* sm = pop_SecMap_from_freelist();
797 if (!sm) {
798 sm = shmem__bigchunk_alloc( sizeof(SecMap) );
799 stats__secmaps_allocd++;
800 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
801 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
802 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
804 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
805 tl_assert(sm);
806 sm->magic = SecMap_MAGIC;
807 for (i = 0; i < N_SECMAP_ZLINES; i++) {
808 sm->linesZ[i].dict[0] = SVal_NOACCESS;
809 sm->linesZ[i].dict[1] = SVal_INVALID;
810 sm->linesZ[i].dict[2] = SVal_INVALID;
811 sm->linesZ[i].dict[3] = SVal_INVALID;
812 for (j = 0; j < N_LINE_ARANGE/4; j++)
813 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
815 return sm;
818 typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
819 static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
821 static SecMap* shmem__find_SecMap ( Addr ga )
823 SecMap* sm = NULL;
824 Addr gaKey = shmem__round_to_SecMap_base(ga);
825 // Cache
826 stats__secmaps_search++;
827 if (LIKELY(gaKey == smCache[0].gaKey))
828 return smCache[0].sm;
829 if (LIKELY(gaKey == smCache[1].gaKey)) {
830 SMCacheEnt tmp = smCache[0];
831 smCache[0] = smCache[1];
832 smCache[1] = tmp;
833 return smCache[0].sm;
835 if (gaKey == smCache[2].gaKey) {
836 SMCacheEnt tmp = smCache[1];
837 smCache[1] = smCache[2];
838 smCache[2] = tmp;
839 return smCache[1].sm;
841 // end Cache
842 stats__secmaps_search_slow++;
843 if (VG_(lookupFM)( map_shmem,
844 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
845 tl_assert(sm != NULL);
846 smCache[2] = smCache[1];
847 smCache[1] = smCache[0];
848 smCache[0].gaKey = gaKey;
849 smCache[0].sm = sm;
850 } else {
851 tl_assert(sm == NULL);
853 return sm;
856 /* Scan the SecMap and count the SecMap that can be GC-ed.
857 If really, really does the GC of the SecMap. */
858 /* NOT TO BE CALLED FROM WITHIN libzsm. */
859 static UWord next_SecMap_GC_at = 1000;
860 __attribute__((noinline))
861 static UWord shmem__SecMap_do_GC(Bool really)
863 UWord secmapW = 0;
864 Addr gaKey;
865 UWord examined = 0;
866 UWord ok_GCed = 0;
868 /* First invalidate the smCache */
869 smCache[0].gaKey = 1;
870 smCache[1].gaKey = 1;
871 smCache[2].gaKey = 1;
872 STATIC_ASSERT (3 == sizeof(smCache)/sizeof(smCache[0]));
874 VG_(initIterFM)( map_shmem );
875 while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) {
876 UWord i;
877 UWord j;
878 UWord n_linesF = 0;
879 SecMap* sm = (SecMap*)secmapW;
880 tl_assert(sm->magic == SecMap_MAGIC);
881 Bool ok_to_GC = True;
883 examined++;
885 /* Deal with the LineZs and the possible LineF of a LineZ. */
886 for (i = 0; i < N_SECMAP_ZLINES && ok_to_GC; i++) {
887 LineZ* lineZ = &sm->linesZ[i];
888 if (lineZ->dict[0] != SVal_INVALID) {
889 ok_to_GC = lineZ->dict[0] == SVal_NOACCESS
890 && !SVal__isC (lineZ->dict[1])
891 && !SVal__isC (lineZ->dict[2])
892 && !SVal__isC (lineZ->dict[3]);
893 } else {
894 LineF *lineF = LineF_Ptr(lineZ);
895 n_linesF++;
896 for (j = 0; j < N_LINE_ARANGE && ok_to_GC; j++)
897 ok_to_GC = lineF->w64s[j] == SVal_NOACCESS;
900 if (ok_to_GC)
901 ok_GCed++;
902 if (ok_to_GC && really) {
903 SecMap *fm_sm;
904 Addr fm_gaKey;
905 /* We cannot remove a SecMap from map_shmem while iterating.
906 So, stop iteration, remove from map_shmem, recreate the iteration
907 on the next SecMap. */
908 VG_(doneIterFM) ( map_shmem );
909 /* No need to rcdec linesZ or linesF, these are all SVal_NOACCESS.
910 We just need to free the lineF referenced by the linesZ. */
911 if (n_linesF > 0) {
912 for (i = 0; i < N_SECMAP_ZLINES && n_linesF > 0; i++) {
913 LineZ* lineZ = &sm->linesZ[i];
914 if (lineZ->dict[0] == SVal_INVALID) {
915 VG_(freeEltPA)( LineF_pool_allocator, LineF_Ptr(lineZ) );
916 n_linesF--;
920 if (!VG_(delFromFM)(map_shmem, &fm_gaKey, (UWord*)&fm_sm, gaKey))
921 tl_assert (0);
922 stats__secmaps_in_map_shmem--;
923 tl_assert (gaKey == fm_gaKey);
924 tl_assert (sm == fm_sm);
925 stats__secmaps_scanGCed++;
926 push_SecMap_on_freelist (sm);
927 VG_(initIterAtFM) (map_shmem, gaKey + N_SECMAP_ARANGE);
930 VG_(doneIterFM)( map_shmem );
932 if (really) {
933 stats__secmaps_scanGC++;
934 /* Next GC when we approach the max allocated */
935 next_SecMap_GC_at = stats__secmaps_allocd - 1000;
936 /* Unless we GCed less than 10%. We then allow to alloc 10%
937 more before GCing. This avoids doing a lot of costly GC
938 for the worst case : the 'growing phase' of an application
939 that allocates a lot of memory.
940 Worst can can be reproduced e.g. by
941 perf/memrw -t 30000000 -b 1000 -r 1 -l 1
942 that allocates around 30Gb of memory. */
943 if (ok_GCed < stats__secmaps_allocd/10)
944 next_SecMap_GC_at = stats__secmaps_allocd + stats__secmaps_allocd/10;
948 if (VG_(clo_stats) && really) {
949 VG_(message)(Vg_DebugMsg,
950 "libhb: SecMap GC: #%lu scanned %lu, GCed %lu,"
951 " next GC at %lu\n",
952 stats__secmaps_scanGC, examined, ok_GCed,
953 next_SecMap_GC_at);
956 return ok_GCed;
959 static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
961 SecMap* sm = shmem__find_SecMap ( ga );
962 if (LIKELY(sm)) {
963 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
964 return sm;
965 } else {
966 /* create a new one */
967 Addr gaKey = shmem__round_to_SecMap_base(ga);
968 sm = shmem__alloc_or_recycle_SecMap();
969 tl_assert(sm);
970 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
971 stats__secmaps_in_map_shmem++;
972 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
973 return sm;
977 /* Returns the nr of linesF which are in use. Note: this is scanning
978 the secmap wordFM. So, this is to be used for statistics only. */
979 __attribute__((noinline))
980 static UWord shmem__SecMap_used_linesF(void)
982 UWord secmapW = 0;
983 Addr gaKey;
984 UWord inUse = 0;
986 VG_(initIterFM)( map_shmem );
987 while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) {
988 UWord i;
989 SecMap* sm = (SecMap*)secmapW;
990 tl_assert(sm->magic == SecMap_MAGIC);
992 for (i = 0; i < N_SECMAP_ZLINES; i++) {
993 LineZ* lineZ = &sm->linesZ[i];
994 if (lineZ->dict[0] == SVal_INVALID)
995 inUse++;
998 VG_(doneIterFM)( map_shmem );
1000 return inUse;
1003 /* ------------ LineF and LineZ related ------------ */
1005 static void rcinc_LineF ( LineF* lineF ) {
1006 UWord i;
1007 for (i = 0; i < N_LINE_ARANGE; i++)
1008 SVal__rcinc(lineF->w64s[i]);
1011 static void rcdec_LineF ( LineF* lineF ) {
1012 UWord i;
1013 for (i = 0; i < N_LINE_ARANGE; i++)
1014 SVal__rcdec(lineF->w64s[i]);
1017 static void rcinc_LineZ ( LineZ* lineZ ) {
1018 tl_assert(lineZ->dict[0] != SVal_INVALID);
1019 SVal__rcinc(lineZ->dict[0]);
1020 if (lineZ->dict[1] != SVal_INVALID) SVal__rcinc(lineZ->dict[1]);
1021 if (lineZ->dict[2] != SVal_INVALID) SVal__rcinc(lineZ->dict[2]);
1022 if (lineZ->dict[3] != SVal_INVALID) SVal__rcinc(lineZ->dict[3]);
1025 static void rcdec_LineZ ( LineZ* lineZ ) {
1026 tl_assert(lineZ->dict[0] != SVal_INVALID);
1027 SVal__rcdec(lineZ->dict[0]);
1028 if (lineZ->dict[1] != SVal_INVALID) SVal__rcdec(lineZ->dict[1]);
1029 if (lineZ->dict[2] != SVal_INVALID) SVal__rcdec(lineZ->dict[2]);
1030 if (lineZ->dict[3] != SVal_INVALID) SVal__rcdec(lineZ->dict[3]);
1033 inline
1034 static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
1035 Word bix, shft, mask, prep;
1036 bix = ix >> 2;
1037 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
1038 mask = 3 << shft;
1039 prep = b2 << shft;
1040 arr[bix] = (arr[bix] & ~mask) | prep;
1043 inline
1044 static UWord read_twobit_array ( UChar* arr, UWord ix ) {
1045 Word bix, shft;
1046 bix = ix >> 2;
1047 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
1048 return (arr[bix] >> shft) & 3;
1051 /* We cache one free lineF, to avoid pool allocator calls.
1052 Measurement on firefox has shown that this avoids more than 90%
1053 of the PA calls. */
1054 static LineF *free_lineF = NULL;
1056 /* Allocates a lineF for LineZ. Sets lineZ in a state indicating
1057 lineF has to be used. */
1058 static inline LineF *alloc_LineF_for_Z (LineZ *lineZ)
1060 LineF *lineF;
1062 tl_assert(lineZ->dict[0] == SVal_INVALID);
1064 if (LIKELY(free_lineF)) {
1065 lineF = free_lineF;
1066 free_lineF = NULL;
1067 } else {
1068 lineF = VG_(allocEltPA) ( LineF_pool_allocator );
1070 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1071 lineZ->dict[1] = Ptr2SVal (lineF);
1073 return lineF;
1076 /* rcdec the LineF of lineZ, frees the lineF, and sets lineZ
1077 back to its initial state SVal_NOACCESS (i.e. ready to be
1078 read or written just after SecMap allocation). */
1079 static inline void clear_LineF_of_Z (LineZ *lineZ)
1081 LineF *lineF = LineF_Ptr(lineZ);
1083 rcdec_LineF(lineF);
1084 if (UNLIKELY(free_lineF)) {
1085 VG_(freeEltPA)( LineF_pool_allocator, lineF );
1086 } else {
1087 free_lineF = lineF;
1089 lineZ->dict[0] = SVal_NOACCESS;
1090 lineZ->dict[1] = SVal_INVALID;
1093 /* Given address 'tag', find either the Z or F line containing relevant
1094 data, so it can be read into the cache.
1096 static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
1097 /*OUT*/LineF** fp, Addr tag ) {
1098 LineZ* lineZ;
1099 LineF* lineF;
1100 UWord zix;
1101 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
1102 UWord smoff = shmem__get_SecMap_offset(tag);
1103 /* since smoff is derived from a valid tag, it should be
1104 cacheline-aligned. */
1105 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
1106 zix = smoff >> N_LINE_BITS;
1107 tl_assert(zix < N_SECMAP_ZLINES);
1108 lineZ = &sm->linesZ[zix];
1109 lineF = NULL;
1110 if (lineZ->dict[0] == SVal_INVALID) {
1111 lineF = LineF_Ptr (lineZ);
1112 lineZ = NULL;
1114 *zp = lineZ;
1115 *fp = lineF;
1118 /* Given address 'tag', return the relevant SecMap and the index of
1119 the LineZ within it, in the expectation that the line is to be
1120 overwritten. Regardless of whether 'tag' is currently associated
1121 with a Z or F representation, to rcdec on the current
1122 representation, in recognition of the fact that the contents are
1123 just about to be overwritten. */
1124 static __attribute__((noinline))
1125 void find_Z_for_writing ( /*OUT*/SecMap** smp,
1126 /*OUT*/Word* zixp,
1127 Addr tag ) {
1128 LineZ* lineZ;
1129 UWord zix;
1130 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
1131 UWord smoff = shmem__get_SecMap_offset(tag);
1132 /* since smoff is derived from a valid tag, it should be
1133 cacheline-aligned. */
1134 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
1135 zix = smoff >> N_LINE_BITS;
1136 tl_assert(zix < N_SECMAP_ZLINES);
1137 lineZ = &sm->linesZ[zix];
1138 /* re RCs, we are rcdec_LineZ/clear_LineF_of_Z this LineZ so that new data
1139 can be parked in it. Hence have to rcdec it accordingly. */
1140 /* If lineZ has an associated lineF, free it up. */
1141 if (lineZ->dict[0] == SVal_INVALID)
1142 clear_LineF_of_Z(lineZ);
1143 else
1144 rcdec_LineZ(lineZ);
1145 *smp = sm;
1146 *zixp = zix;
1149 /* ------------ CacheLine and implicit-tree related ------------ */
1151 __attribute__((unused))
1152 static void pp_CacheLine ( CacheLine* cl ) {
1153 Word i;
1154 if (!cl) {
1155 VG_(printf)("%s","pp_CacheLine(NULL)\n");
1156 return;
1158 for (i = 0; i < N_LINE_TREES; i++)
1159 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
1160 for (i = 0; i < N_LINE_ARANGE; i++)
1161 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
1164 static UChar descr_to_validbits ( UShort descr )
1166 /* a.k.a Party Time for gcc's constant folder */
1167 # define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
1168 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
1169 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
1170 ( (b8_5) << 12) | ( (b8_4) << 11) | \
1171 ( (b8_3) << 10) | ( (b8_2) << 9) | \
1172 ( (b8_1) << 8) | ( (b8_0) << 7) | \
1173 ( (b16_3) << 6) | ( (b32_1) << 5) | \
1174 ( (b16_2) << 4) | ( (b64) << 3) | \
1175 ( (b16_1) << 2) | ( (b32_0) << 1) | \
1176 ( (b16_0) << 0) ) )
1178 # define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
1179 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
1180 ( (bit5) << 5) | ( (bit4) << 4) | \
1181 ( (bit3) << 3) | ( (bit2) << 2) | \
1182 ( (bit1) << 1) | ( (bit0) << 0) ) )
1184 /* these should all get folded out at compile time */
1185 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
1186 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
1187 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
1188 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
1189 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
1190 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
1191 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
1192 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
1193 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
1195 switch (descr) {
1197 +--------------------------------- TREE_DESCR_8_7
1198 | +------------------- TREE_DESCR_8_0
1199 | | +---------------- TREE_DESCR_16_3
1200 | | | +-------------- TREE_DESCR_32_1
1201 | | | | +------------ TREE_DESCR_16_2
1202 | | | | | +--------- TREE_DESCR_64
1203 | | | | | | +------ TREE_DESCR_16_1
1204 | | | | | | | +---- TREE_DESCR_32_0
1205 | | | | | | | | +-- TREE_DESCR_16_0
1206 | | | | | | | | |
1207 | | | | | | | | | GRANULARITY, 7 -> 0 */
1208 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
1209 return BYTE(1,1,1,1,1,1,1,1);
1210 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
1211 return BYTE(1,1,0,1,1,1,1,1);
1212 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
1213 return BYTE(0,1,1,1,1,1,1,1);
1214 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
1215 return BYTE(0,1,0,1,1,1,1,1);
1217 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
1218 return BYTE(1,1,1,1,1,1,0,1);
1219 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
1220 return BYTE(1,1,0,1,1,1,0,1);
1221 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
1222 return BYTE(0,1,1,1,1,1,0,1);
1223 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
1224 return BYTE(0,1,0,1,1,1,0,1);
1226 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
1227 return BYTE(1,1,1,1,0,1,1,1);
1228 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
1229 return BYTE(1,1,0,1,0,1,1,1);
1230 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
1231 return BYTE(0,1,1,1,0,1,1,1);
1232 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
1233 return BYTE(0,1,0,1,0,1,1,1);
1235 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
1236 return BYTE(1,1,1,1,0,1,0,1);
1237 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
1238 return BYTE(1,1,0,1,0,1,0,1);
1239 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
1240 return BYTE(0,1,1,1,0,1,0,1);
1241 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
1242 return BYTE(0,1,0,1,0,1,0,1);
1244 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
1245 return BYTE(0,0,0,1,1,1,1,1);
1246 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
1247 return BYTE(0,0,0,1,1,1,0,1);
1248 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
1249 return BYTE(0,0,0,1,0,1,1,1);
1250 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
1251 return BYTE(0,0,0,1,0,1,0,1);
1253 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
1254 return BYTE(1,1,1,1,0,0,0,1);
1255 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
1256 return BYTE(1,1,0,1,0,0,0,1);
1257 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
1258 return BYTE(0,1,1,1,0,0,0,1);
1259 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
1260 return BYTE(0,1,0,1,0,0,0,1);
1262 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
1263 return BYTE(0,0,0,1,0,0,0,1);
1265 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
1266 return BYTE(0,0,0,0,0,0,0,1);
1268 default: return BYTE(0,0,0,0,0,0,0,0);
1269 /* INVALID - any valid descr produces at least one
1270 valid bit in tree[0..7]*/
1272 /* NOTREACHED*/
1273 tl_assert(0);
1275 # undef DESCR
1276 # undef BYTE
1279 __attribute__((unused))
1280 static Bool is_sane_Descr ( UShort descr ) {
1281 return descr_to_validbits(descr) != 0;
1284 static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
1285 VG_(sprintf)(dst,
1286 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
1287 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
1288 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
1289 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
1290 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
1291 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
1292 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
1293 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
1294 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
1295 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
1296 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
1297 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
1298 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
1299 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
1300 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
1301 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
1304 static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
1305 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
1306 (Int)((byte & 128) ? 1 : 0),
1307 (Int)((byte & 64) ? 1 : 0),
1308 (Int)((byte & 32) ? 1 : 0),
1309 (Int)((byte & 16) ? 1 : 0),
1310 (Int)((byte & 8) ? 1 : 0),
1311 (Int)((byte & 4) ? 1 : 0),
1312 (Int)((byte & 2) ? 1 : 0),
1313 (Int)((byte & 1) ? 1 : 0)
1317 static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
1318 Word i;
1319 UChar validbits = descr_to_validbits(descr);
1320 HChar buf[128], buf2[128]; // large enough
1321 if (validbits == 0)
1322 goto bad;
1323 for (i = 0; i < 8; i++) {
1324 if (validbits & (1<<i)) {
1325 if (tree[i] == SVal_INVALID)
1326 goto bad;
1327 } else {
1328 if (tree[i] != SVal_INVALID)
1329 goto bad;
1332 return True;
1333 bad:
1334 sprintf_Descr( buf, descr );
1335 sprintf_Byte( buf2, validbits );
1336 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
1337 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
1338 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
1339 for (i = 0; i < 8; i++)
1340 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
1341 VG_(printf)("%s","}\n");
1342 return 0;
1345 static Bool is_sane_CacheLine ( CacheLine* cl )
1347 Word tno, cloff;
1349 if (!cl) goto bad;
1351 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1352 UShort descr = cl->descrs[tno];
1353 SVal* tree = &cl->svals[cloff];
1354 if (!is_sane_Descr_and_Tree(descr, tree))
1355 goto bad;
1357 tl_assert(cloff == N_LINE_ARANGE);
1358 return True;
1359 bad:
1360 pp_CacheLine(cl);
1361 return False;
1364 static UShort normalise_tree ( /*MOD*/SVal* tree )
1366 UShort descr;
1367 /* pre: incoming tree[0..7] does not have any invalid shvals, in
1368 particular no zeroes. */
1369 if (CHECK_ZSM
1370 && UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
1371 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
1372 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
1373 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
1374 tl_assert(0);
1376 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
1377 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
1378 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
1379 /* build 16-bit layer */
1380 if (tree[1] == tree[0]) {
1381 tree[1] = SVal_INVALID;
1382 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
1383 descr |= TREE_DESCR_16_0;
1385 if (tree[3] == tree[2]) {
1386 tree[3] = SVal_INVALID;
1387 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
1388 descr |= TREE_DESCR_16_1;
1390 if (tree[5] == tree[4]) {
1391 tree[5] = SVal_INVALID;
1392 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
1393 descr |= TREE_DESCR_16_2;
1395 if (tree[7] == tree[6]) {
1396 tree[7] = SVal_INVALID;
1397 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
1398 descr |= TREE_DESCR_16_3;
1400 /* build 32-bit layer */
1401 if (tree[2] == tree[0]
1402 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
1403 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
1404 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
1405 descr |= TREE_DESCR_32_0;
1407 if (tree[6] == tree[4]
1408 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
1409 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
1410 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
1411 descr |= TREE_DESCR_32_1;
1413 /* build 64-bit layer */
1414 if (tree[4] == tree[0]
1415 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
1416 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
1417 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
1418 descr |= TREE_DESCR_64;
1420 return descr;
1423 /* This takes a cacheline where all the data is at the leaves
1424 (w8[..]) and builds a correctly normalised tree. */
1425 static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
1427 Word tno, cloff;
1428 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1429 SVal* tree = &cl->svals[cloff];
1430 cl->descrs[tno] = normalise_tree( tree );
1432 tl_assert(cloff == N_LINE_ARANGE);
1433 if (CHECK_ZSM)
1434 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1435 stats__cline_normalises++;
1439 typedef struct { UChar count; SVal sval; } CountedSVal;
1441 static
1442 void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
1443 /*OUT*/Word* dstUsedP,
1444 Word nDst, CacheLine* src )
1446 Word tno, cloff, dstUsed;
1448 tl_assert(nDst == N_LINE_ARANGE);
1449 dstUsed = 0;
1451 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1452 UShort descr = src->descrs[tno];
1453 SVal* tree = &src->svals[cloff];
1455 /* sequentialise the tree described by (descr,tree). */
1456 # define PUT(_n,_v) \
1457 do { dst[dstUsed ].count = (_n); \
1458 dst[dstUsed++].sval = (_v); \
1459 } while (0)
1461 /* byte 0 */
1462 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
1463 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
1464 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
1465 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
1466 /* byte 1 */
1467 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
1468 /* byte 2 */
1469 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
1470 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
1471 /* byte 3 */
1472 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
1473 /* byte 4 */
1474 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
1475 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
1476 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
1477 /* byte 5 */
1478 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
1479 /* byte 6 */
1480 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
1481 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
1482 /* byte 7 */
1483 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
1485 # undef PUT
1486 /* END sequentialise the tree described by (descr,tree). */
1489 tl_assert(cloff == N_LINE_ARANGE);
1490 tl_assert(dstUsed <= nDst);
1492 *dstUsedP = dstUsed;
1495 /* Write the cacheline 'wix' to backing store. Where it ends up
1496 is determined by its tag field. */
1497 static __attribute__((noinline)) void cacheline_wback ( UWord wix )
1499 Word i, j, k, m;
1500 Addr tag;
1501 SecMap* sm;
1502 CacheLine* cl;
1503 LineZ* lineZ;
1504 LineF* lineF;
1505 Word zix, fix, csvalsUsed;
1506 CountedSVal csvals[N_LINE_ARANGE];
1507 SVal sv;
1509 if (0)
1510 VG_(printf)("scache wback line %d\n", (Int)wix);
1512 tl_assert(wix < N_WAY_NENT);
1514 tag = cache_shmem.tags0[wix];
1515 cl = &cache_shmem.lyns0[wix];
1517 /* The cache line may have been invalidated; if so, ignore it. */
1518 if (!is_valid_scache_tag(tag))
1519 return;
1521 /* Where are we going to put it? */
1522 sm = NULL;
1523 lineZ = NULL;
1524 lineF = NULL;
1525 zix = fix = -1;
1527 /* find the Z line to write in and rcdec it or the associated F
1528 line. */
1529 find_Z_for_writing( &sm, &zix, tag );
1531 tl_assert(sm);
1532 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1533 lineZ = &sm->linesZ[zix];
1535 /* Generate the data to be stored */
1536 if (CHECK_ZSM)
1537 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1539 csvalsUsed = -1;
1540 sequentialise_CacheLine( csvals, &csvalsUsed,
1541 N_LINE_ARANGE, cl );
1542 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1543 if (0) VG_(printf)("%ld ", csvalsUsed);
1545 lineZ->dict[0] = lineZ->dict[1]
1546 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1548 /* i indexes actual shadow values, k is cursor in csvals */
1549 i = 0;
1550 for (k = 0; k < csvalsUsed; k++) {
1552 sv = csvals[k].sval;
1553 if (CHECK_ZSM)
1554 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1555 /* do we already have it? */
1556 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1557 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1558 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1559 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1560 /* no. look for a free slot. */
1561 if (CHECK_ZSM)
1562 tl_assert(sv != SVal_INVALID);
1563 if (lineZ->dict[0]
1564 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1565 if (lineZ->dict[1]
1566 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1567 if (lineZ->dict[2]
1568 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1569 if (lineZ->dict[3]
1570 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1571 break; /* we'll have to use the f rep */
1572 dict_ok:
1573 m = csvals[k].count;
1574 if (m == 8) {
1575 write_twobit_array( lineZ->ix2s, i+0, j );
1576 write_twobit_array( lineZ->ix2s, i+1, j );
1577 write_twobit_array( lineZ->ix2s, i+2, j );
1578 write_twobit_array( lineZ->ix2s, i+3, j );
1579 write_twobit_array( lineZ->ix2s, i+4, j );
1580 write_twobit_array( lineZ->ix2s, i+5, j );
1581 write_twobit_array( lineZ->ix2s, i+6, j );
1582 write_twobit_array( lineZ->ix2s, i+7, j );
1583 i += 8;
1585 else if (m == 4) {
1586 write_twobit_array( lineZ->ix2s, i+0, j );
1587 write_twobit_array( lineZ->ix2s, i+1, j );
1588 write_twobit_array( lineZ->ix2s, i+2, j );
1589 write_twobit_array( lineZ->ix2s, i+3, j );
1590 i += 4;
1592 else if (m == 1) {
1593 write_twobit_array( lineZ->ix2s, i+0, j );
1594 i += 1;
1596 else if (m == 2) {
1597 write_twobit_array( lineZ->ix2s, i+0, j );
1598 write_twobit_array( lineZ->ix2s, i+1, j );
1599 i += 2;
1601 else {
1602 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1607 if (LIKELY(i == N_LINE_ARANGE)) {
1608 /* Construction of the compressed representation was
1609 successful. */
1610 rcinc_LineZ(lineZ);
1611 stats__cache_Z_wbacks++;
1612 } else {
1613 /* Cannot use the compressed(z) representation. Use the full(f)
1614 rep instead. */
1615 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1616 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1617 lineF = alloc_LineF_for_Z (lineZ);
1618 i = 0;
1619 for (k = 0; k < csvalsUsed; k++) {
1620 if (CHECK_ZSM)
1621 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1622 sv = csvals[k].sval;
1623 if (CHECK_ZSM)
1624 tl_assert(sv != SVal_INVALID);
1625 for (m = csvals[k].count; m > 0; m--) {
1626 lineF->w64s[i] = sv;
1627 i++;
1630 tl_assert(i == N_LINE_ARANGE);
1631 rcinc_LineF(lineF);
1632 stats__cache_F_wbacks++;
1636 /* Fetch the cacheline 'wix' from the backing store. The tag
1637 associated with 'wix' is assumed to have already been filled in;
1638 hence that is used to determine where in the backing store to read
1639 from. */
1640 static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1642 Word i;
1643 Addr tag;
1644 CacheLine* cl;
1645 LineZ* lineZ;
1646 LineF* lineF;
1648 if (0)
1649 VG_(printf)("scache fetch line %d\n", (Int)wix);
1651 tl_assert(wix < N_WAY_NENT);
1653 tag = cache_shmem.tags0[wix];
1654 cl = &cache_shmem.lyns0[wix];
1656 /* reject nonsense requests */
1657 tl_assert(is_valid_scache_tag(tag));
1659 lineZ = NULL;
1660 lineF = NULL;
1661 find_ZF_for_reading( &lineZ, &lineF, tag );
1662 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1664 /* expand the data into the bottom layer of the tree, then get
1665 cacheline_normalise to build the descriptor array. */
1666 if (lineF) {
1667 for (i = 0; i < N_LINE_ARANGE; i++) {
1668 cl->svals[i] = lineF->w64s[i];
1670 stats__cache_F_fetches++;
1671 } else {
1672 for (i = 0; i < N_LINE_ARANGE; i++) {
1673 UWord ix = read_twobit_array( lineZ->ix2s, i );
1674 if (CHECK_ZSM) tl_assert(ix >= 0 && ix <= 3);
1675 cl->svals[i] = lineZ->dict[ix];
1676 if (CHECK_ZSM) tl_assert(cl->svals[i] != SVal_INVALID);
1678 stats__cache_Z_fetches++;
1680 normalise_CacheLine( cl );
1683 /* Invalid the cachelines corresponding to the given range, which
1684 must start and end on a cacheline boundary. */
1685 static void shmem__invalidate_scache_range (Addr ga, SizeT szB)
1687 Word wix;
1689 /* ga must be on a cacheline boundary. */
1690 tl_assert (is_valid_scache_tag (ga));
1691 /* szB must be a multiple of cacheline size. */
1692 tl_assert (0 == (szB & (N_LINE_ARANGE - 1)));
1695 Word ga_ix = (ga >> N_LINE_BITS) & (N_WAY_NENT - 1);
1696 Word nwix = szB / N_LINE_ARANGE;
1698 if (nwix > N_WAY_NENT)
1699 nwix = N_WAY_NENT; // no need to check several times the same entry.
1701 for (wix = 0; wix < nwix; wix++) {
1702 if (address_in_range(cache_shmem.tags0[ga_ix], ga, szB))
1703 cache_shmem.tags0[ga_ix] = 1/*INVALID*/;
1704 ga_ix++;
1705 if (UNLIKELY(ga_ix == N_WAY_NENT))
1706 ga_ix = 0;
1711 static void shmem__flush_and_invalidate_scache ( void ) {
1712 Word wix;
1713 Addr tag;
1714 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1715 tl_assert(!is_valid_scache_tag(1));
1716 for (wix = 0; wix < N_WAY_NENT; wix++) {
1717 tag = cache_shmem.tags0[wix];
1718 if (tag == 1/*INVALID*/) {
1719 /* already invalid; nothing to do */
1720 } else {
1721 tl_assert(is_valid_scache_tag(tag));
1722 cacheline_wback( wix );
1724 cache_shmem.tags0[wix] = 1/*INVALID*/;
1726 stats__cache_flushes_invals++;
1730 static inline Bool aligned16 ( Addr a ) {
1731 return 0 == (a & 1);
1733 static inline Bool aligned32 ( Addr a ) {
1734 return 0 == (a & 3);
1736 static inline Bool aligned64 ( Addr a ) {
1737 return 0 == (a & 7);
1739 static inline UWord get_cacheline_offset ( Addr a ) {
1740 return (UWord)(a & (N_LINE_ARANGE - 1));
1742 static inline Addr cacheline_ROUNDUP ( Addr a ) {
1743 return ROUNDUP(a, N_LINE_ARANGE);
1745 static inline Addr cacheline_ROUNDDN ( Addr a ) {
1746 return ROUNDDN(a, N_LINE_ARANGE);
1748 static inline UWord get_treeno ( Addr a ) {
1749 return get_cacheline_offset(a) >> 3;
1751 static inline UWord get_tree_offset ( Addr a ) {
1752 return a & 7;
1755 static __attribute__((noinline))
1756 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1757 static inline CacheLine* get_cacheline ( Addr a )
1759 /* tag is 'a' with the in-line offset masked out,
1760 eg a[31]..a[4] 0000 */
1761 Addr tag = a & ~(N_LINE_ARANGE - 1);
1762 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1763 stats__cache_totrefs++;
1764 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1765 return &cache_shmem.lyns0[wix];
1766 } else {
1767 return get_cacheline_MISS( a );
1771 static __attribute__((noinline))
1772 CacheLine* get_cacheline_MISS ( Addr a )
1774 /* tag is 'a' with the in-line offset masked out,
1775 eg a[31]..a[4] 0000 */
1777 CacheLine* cl;
1778 Addr* tag_old_p;
1779 Addr tag = a & ~(N_LINE_ARANGE - 1);
1780 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1782 tl_assert(tag != cache_shmem.tags0[wix]);
1784 /* Dump the old line into the backing store. */
1785 stats__cache_totmisses++;
1787 cl = &cache_shmem.lyns0[wix];
1788 tag_old_p = &cache_shmem.tags0[wix];
1790 if (is_valid_scache_tag( *tag_old_p )) {
1791 /* EXPENSIVE and REDUNDANT: callee does it */
1792 if (CHECK_ZSM)
1793 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1794 cacheline_wback( wix );
1796 /* and reload the new one */
1797 *tag_old_p = tag;
1798 cacheline_fetch( wix );
1799 if (CHECK_ZSM)
1800 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1801 return cl;
1804 static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1805 stats__cline_64to32pulldown++;
1806 switch (toff) {
1807 case 0: case 4:
1808 tl_assert(descr & TREE_DESCR_64);
1809 tree[4] = tree[0];
1810 descr &= ~TREE_DESCR_64;
1811 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1812 break;
1813 default:
1814 tl_assert(0);
1816 return descr;
1819 static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1820 stats__cline_32to16pulldown++;
1821 switch (toff) {
1822 case 0: case 2:
1823 if (!(descr & TREE_DESCR_32_0)) {
1824 descr = pulldown_to_32(tree, 0, descr);
1826 tl_assert(descr & TREE_DESCR_32_0);
1827 tree[2] = tree[0];
1828 descr &= ~TREE_DESCR_32_0;
1829 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1830 break;
1831 case 4: case 6:
1832 if (!(descr & TREE_DESCR_32_1)) {
1833 descr = pulldown_to_32(tree, 4, descr);
1835 tl_assert(descr & TREE_DESCR_32_1);
1836 tree[6] = tree[4];
1837 descr &= ~TREE_DESCR_32_1;
1838 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1839 break;
1840 default:
1841 tl_assert(0);
1843 return descr;
1846 static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1847 stats__cline_16to8pulldown++;
1848 switch (toff) {
1849 case 0: case 1:
1850 if (!(descr & TREE_DESCR_16_0)) {
1851 descr = pulldown_to_16(tree, 0, descr);
1853 tl_assert(descr & TREE_DESCR_16_0);
1854 tree[1] = tree[0];
1855 descr &= ~TREE_DESCR_16_0;
1856 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1857 break;
1858 case 2: case 3:
1859 if (!(descr & TREE_DESCR_16_1)) {
1860 descr = pulldown_to_16(tree, 2, descr);
1862 tl_assert(descr & TREE_DESCR_16_1);
1863 tree[3] = tree[2];
1864 descr &= ~TREE_DESCR_16_1;
1865 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1866 break;
1867 case 4: case 5:
1868 if (!(descr & TREE_DESCR_16_2)) {
1869 descr = pulldown_to_16(tree, 4, descr);
1871 tl_assert(descr & TREE_DESCR_16_2);
1872 tree[5] = tree[4];
1873 descr &= ~TREE_DESCR_16_2;
1874 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1875 break;
1876 case 6: case 7:
1877 if (!(descr & TREE_DESCR_16_3)) {
1878 descr = pulldown_to_16(tree, 6, descr);
1880 tl_assert(descr & TREE_DESCR_16_3);
1881 tree[7] = tree[6];
1882 descr &= ~TREE_DESCR_16_3;
1883 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1884 break;
1885 default:
1886 tl_assert(0);
1888 return descr;
1892 static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1893 UShort mask;
1894 switch (toff) {
1895 case 0:
1896 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1897 tl_assert( (descr & mask) == mask );
1898 descr &= ~mask;
1899 descr |= TREE_DESCR_16_0;
1900 break;
1901 case 2:
1902 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1903 tl_assert( (descr & mask) == mask );
1904 descr &= ~mask;
1905 descr |= TREE_DESCR_16_1;
1906 break;
1907 case 4:
1908 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1909 tl_assert( (descr & mask) == mask );
1910 descr &= ~mask;
1911 descr |= TREE_DESCR_16_2;
1912 break;
1913 case 6:
1914 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1915 tl_assert( (descr & mask) == mask );
1916 descr &= ~mask;
1917 descr |= TREE_DESCR_16_3;
1918 break;
1919 default:
1920 tl_assert(0);
1922 return descr;
1925 static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1926 UShort mask;
1927 switch (toff) {
1928 case 0:
1929 if (!(descr & TREE_DESCR_16_0))
1930 descr = pullup_descr_to_16(descr, 0);
1931 if (!(descr & TREE_DESCR_16_1))
1932 descr = pullup_descr_to_16(descr, 2);
1933 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1934 tl_assert( (descr & mask) == mask );
1935 descr &= ~mask;
1936 descr |= TREE_DESCR_32_0;
1937 break;
1938 case 4:
1939 if (!(descr & TREE_DESCR_16_2))
1940 descr = pullup_descr_to_16(descr, 4);
1941 if (!(descr & TREE_DESCR_16_3))
1942 descr = pullup_descr_to_16(descr, 6);
1943 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1944 tl_assert( (descr & mask) == mask );
1945 descr &= ~mask;
1946 descr |= TREE_DESCR_32_1;
1947 break;
1948 default:
1949 tl_assert(0);
1951 return descr;
1954 static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1955 switch (toff) {
1956 case 0: case 4:
1957 return 0 != (descr & TREE_DESCR_64);
1958 default:
1959 tl_assert(0);
1963 static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1964 switch (toff) {
1965 case 0:
1966 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1967 case 2:
1968 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1969 case 4:
1970 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1971 case 6:
1972 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1973 default:
1974 tl_assert(0);
1978 /* ------------ Cache management ------------ */
1980 static void zsm_flush_cache ( void )
1982 shmem__flush_and_invalidate_scache();
1986 static void zsm_init ( void )
1988 tl_assert( sizeof(UWord) == sizeof(Addr) );
1990 tl_assert(map_shmem == NULL);
1991 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1992 HG_(free),
1993 NULL/*unboxed UWord cmp*/);
1994 /* Invalidate all cache entries. */
1995 tl_assert(!is_valid_scache_tag(1));
1996 for (UWord wix = 0; wix < N_WAY_NENT; wix++) {
1997 cache_shmem.tags0[wix] = 1/*INVALID*/;
2000 LineF_pool_allocator = VG_(newPA) (
2001 sizeof(LineF),
2002 /* Nr elements/pool to fill a core arena block
2003 taking some arena overhead into account. */
2004 (4 * 1024 * 1024 - 200)/sizeof(LineF),
2005 HG_(zalloc),
2006 "libhb.LineF_storage.pool",
2007 HG_(free)
2010 /* a SecMap must contain an integral number of CacheLines */
2011 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
2012 /* also ... a CacheLine holds an integral number of trees */
2013 tl_assert(0 == (N_LINE_ARANGE % 8));
2016 /////////////////////////////////////////////////////////////////
2017 /////////////////////////////////////////////////////////////////
2018 // //
2019 // SECTION END compressed shadow memory //
2020 // //
2021 /////////////////////////////////////////////////////////////////
2022 /////////////////////////////////////////////////////////////////
2026 /////////////////////////////////////////////////////////////////
2027 /////////////////////////////////////////////////////////////////
2028 // //
2029 // SECTION BEGIN vts primitives //
2030 // //
2031 /////////////////////////////////////////////////////////////////
2032 /////////////////////////////////////////////////////////////////
2035 /* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
2036 being compact stand-ins for Thr*'s. Use these functions to map
2037 between them. */
2038 static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */
2039 static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */
2041 __attribute__((noreturn))
2042 static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
2044 if (due_to_nThrs) {
2045 const HChar* s =
2046 "\n"
2047 "Helgrind: cannot continue, run aborted: too many threads.\n"
2048 "Sorry. Helgrind can only handle programs that create\n"
2049 "%'llu or fewer threads over their entire lifetime.\n"
2050 "\n";
2051 VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
2052 } else {
2053 const HChar* s =
2054 "\n"
2055 "Helgrind: cannot continue, run aborted: too many\n"
2056 "synchronisation events. Sorry. Helgrind can only handle\n"
2057 "programs which perform %'llu or fewer\n"
2058 "inter-thread synchronisation events (locks, unlocks, etc).\n"
2059 "\n";
2060 VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
2062 VG_(exit)(1);
2063 /*NOTREACHED*/
2064 tl_assert(0); /*wtf?!*/
2068 /* The dead thread (ThrID, actually) tables. A thread may only be
2069 listed here if we have been notified thereof by libhb_async_exit.
2070 New entries are added at the end. The order isn't important, but
2071 the ThrID values must be unique.
2072 verydead_thread_table_not_pruned lists the identity of the threads
2073 that died since the previous round of pruning.
2074 Once pruning is done, these ThrID are added in verydead_thread_table.
2075 We don't actually need to keep the set of threads that have ever died --
2076 only the threads that have died since the previous round of
2077 pruning. But it's useful for sanity check purposes to keep the
2078 entire set, so we do. */
2079 static XArray* /* of ThrID */ verydead_thread_table_not_pruned = NULL;
2080 static XArray* /* of ThrID */ verydead_thread_table = NULL;
2082 /* Arbitrary total ordering on ThrIDs. */
2083 static Int cmp__ThrID ( const void* v1, const void* v2 ) {
2084 ThrID id1 = *(const ThrID*)v1;
2085 ThrID id2 = *(const ThrID*)v2;
2086 if (id1 < id2) return -1;
2087 if (id1 > id2) return 1;
2088 return 0;
2091 static void verydead_thread_tables_init ( void )
2093 tl_assert(!verydead_thread_table);
2094 tl_assert(!verydead_thread_table_not_pruned);
2095 verydead_thread_table
2096 = VG_(newXA)( HG_(zalloc),
2097 "libhb.verydead_thread_table_init.1",
2098 HG_(free), sizeof(ThrID) );
2099 VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
2100 verydead_thread_table_not_pruned
2101 = VG_(newXA)( HG_(zalloc),
2102 "libhb.verydead_thread_table_init.2",
2103 HG_(free), sizeof(ThrID) );
2104 VG_(setCmpFnXA)(verydead_thread_table_not_pruned, cmp__ThrID);
2107 static void verydead_thread_table_sort_and_check (XArray* thrids)
2109 UWord i;
2111 VG_(sortXA)( thrids );
2112 /* Sanity check: check for unique .sts.thr values. */
2113 UWord nBT = VG_(sizeXA)( thrids );
2114 if (nBT > 0) {
2115 ThrID thrid1, thrid2;
2116 thrid2 = *(ThrID*)VG_(indexXA)( thrids, 0 );
2117 for (i = 1; i < nBT; i++) {
2118 thrid1 = thrid2;
2119 thrid2 = *(ThrID*)VG_(indexXA)( thrids, i );
2120 tl_assert(thrid1 < thrid2);
2123 /* Ok, so the dead thread table thrids has unique and in-order keys. */
2126 /* A VTS contains .ts, its vector clock, and also .id, a field to hold
2127 a backlink for the caller's convenience. Since we have no idea
2128 what to set that to in the library, it always gets set to
2129 VtsID_INVALID. */
2130 typedef
2131 struct {
2132 VtsID id;
2133 UInt usedTS;
2134 UInt sizeTS;
2135 ScalarTS ts[0];
2137 VTS;
2139 /* Allocate a VTS capable of storing 'sizeTS' entries. */
2140 static VTS* VTS__new ( const HChar* who, UInt sizeTS );
2142 /* Make a clone of 'vts', sizing the new array to exactly match the
2143 number of ScalarTSs present. */
2144 static VTS* VTS__clone ( const HChar* who, VTS* vts );
2146 /* Make a clone of 'vts' with the thrids in 'thrids' removed. The new
2147 array is sized exactly to hold the number of required elements.
2148 'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
2149 must be in strictly increasing order. */
2150 static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel );
2152 /* Delete this VTS in its entirety. */
2153 static void VTS__delete ( VTS* vts );
2155 /* Create a new singleton VTS in 'out'. Caller must have
2156 pre-allocated 'out' sufficiently big to hold the result in all
2157 possible cases. */
2158 static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
2160 /* Create in 'out' a VTS which is the same as 'vts' except with
2161 vts[me]++, so to speak. Caller must have pre-allocated 'out'
2162 sufficiently big to hold the result in all possible cases. */
2163 static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
2165 /* Create in 'out' a VTS which is the join (max) of 'a' and
2166 'b'. Caller must have pre-allocated 'out' sufficiently big to hold
2167 the result in all possible cases. */
2168 static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
2170 /* Compute the partial ordering relation of the two args. Although we
2171 could be completely general and return an enumeration value (EQ,
2172 LT, GT, UN), in fact we only need LEQ, and so we may as well
2173 hardwire that fact.
2175 Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
2176 invald ThrID). In the latter case, the returned ThrID indicates
2177 the discovered point for which they are not. There may be more
2178 than one such point, but we only care about seeing one of them, not
2179 all of them. This rather strange convention is used because
2180 sometimes we want to know the actual index at which they first
2181 differ. */
2182 static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
2184 /* Compute an arbitrary structural (total) ordering on the two args,
2185 based on their VCs, so they can be looked up in a table, tree, etc.
2186 Returns -1, 0 or 1. */
2187 static Word VTS__cmp_structural ( VTS* a, VTS* b );
2189 /* Debugging only. Display the given VTS. */
2190 static void VTS__show ( const VTS* vts );
2192 /* Debugging only. Return vts[index], so to speak. */
2193 static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
2195 /* Notify the VTS machinery that a thread has been declared
2196 comprehensively dead: that is, it has done an async exit AND it has
2197 been joined with. This should ensure that its local clocks (.viR
2198 and .viW) will never again change, and so all mentions of this
2199 thread from all VTSs in the system may be removed. */
2200 static void VTS__declare_thread_very_dead ( Thr* idx );
2202 /*--------------- to do with Vector Timestamps ---------------*/
2204 static Bool is_sane_VTS ( VTS* vts )
2206 UWord i, n;
2207 ScalarTS *st1, *st2;
2208 if (!vts) return False;
2209 if (vts->usedTS > vts->sizeTS) return False;
2210 n = vts->usedTS;
2211 if (n == 1) {
2212 st1 = &vts->ts[0];
2213 if (st1->tym == 0)
2214 return False;
2216 else
2217 if (n >= 2) {
2218 for (i = 0; i < n-1; i++) {
2219 st1 = &vts->ts[i];
2220 st2 = &vts->ts[i+1];
2221 if (st1->thrid >= st2->thrid)
2222 return False;
2223 if (st1->tym == 0 || st2->tym == 0)
2224 return False;
2227 return True;
2231 /* Create a new, empty VTS.
2233 static VTS* VTS__new ( const HChar* who, UInt sizeTS )
2235 VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
2236 tl_assert(vts->usedTS == 0);
2237 vts->sizeTS = sizeTS;
2238 *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
2239 return vts;
2242 /* Clone this VTS.
2244 static VTS* VTS__clone ( const HChar* who, VTS* vts )
2246 tl_assert(vts);
2247 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2248 UInt nTS = vts->usedTS;
2249 VTS* clone = VTS__new(who, nTS);
2250 clone->id = vts->id;
2251 clone->sizeTS = nTS;
2252 clone->usedTS = nTS;
2253 UInt i;
2254 for (i = 0; i < nTS; i++) {
2255 clone->ts[i] = vts->ts[i];
2257 tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2258 return clone;
2262 /* Make a clone of a VTS with specified ThrIDs removed. 'thridsToDel'
2263 must be in strictly increasing order. We could obviously do this
2264 much more efficiently (in linear time) if necessary.
2266 static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel )
2268 UInt i, j;
2269 tl_assert(vts);
2270 tl_assert(thridsToDel);
2271 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2272 UInt nTS = vts->usedTS;
2273 /* Figure out how many ScalarTSs will remain in the output. */
2274 UInt nReq = nTS;
2275 for (i = 0; i < nTS; i++) {
2276 ThrID thrid = vts->ts[i].thrid;
2277 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2278 nReq--;
2280 tl_assert(nReq <= nTS);
2281 /* Copy the ones that will remain. */
2282 VTS* res = VTS__new(who, nReq);
2283 j = 0;
2284 for (i = 0; i < nTS; i++) {
2285 ThrID thrid = vts->ts[i].thrid;
2286 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2287 continue;
2288 res->ts[j++] = vts->ts[i];
2290 tl_assert(j == nReq);
2291 tl_assert(j == res->sizeTS);
2292 res->usedTS = j;
2293 tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
2294 return res;
2298 /* Delete this VTS in its entirety.
2300 static void VTS__delete ( VTS* vts )
2302 tl_assert(vts);
2303 tl_assert(vts->usedTS <= vts->sizeTS);
2304 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2305 HG_(free)(vts);
2309 /* Create a new singleton VTS.
2311 static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
2313 tl_assert(thr);
2314 tl_assert(tym >= 1);
2315 tl_assert(out);
2316 tl_assert(out->usedTS == 0);
2317 tl_assert(out->sizeTS >= 1);
2318 UInt hi = out->usedTS++;
2319 out->ts[hi].thrid = Thr__to_ThrID(thr);
2320 out->ts[hi].tym = tym;
2324 /* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
2325 not modified.
2327 static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
2329 UInt i, n;
2330 ThrID me_thrid;
2331 Bool found = False;
2333 stats__vts__tick++;
2335 tl_assert(out);
2336 tl_assert(out->usedTS == 0);
2337 if (vts->usedTS >= ThrID_MAX_VALID)
2338 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2339 tl_assert(out->sizeTS >= 1 + vts->usedTS);
2341 tl_assert(me);
2342 me_thrid = Thr__to_ThrID(me);
2343 tl_assert(is_sane_VTS(vts));
2344 n = vts->usedTS;
2346 /* Copy all entries which precede 'me'. */
2347 for (i = 0; i < n; i++) {
2348 ScalarTS* here = &vts->ts[i];
2349 if (UNLIKELY(here->thrid >= me_thrid))
2350 break;
2351 UInt hi = out->usedTS++;
2352 out->ts[hi] = *here;
2355 /* 'i' now indicates the next entry to copy, if any.
2356 There are 3 possibilities:
2357 (a) there is no next entry (we used them all up already):
2358 add (me_thrid,1) to the output, and quit
2359 (b) there is a next entry, and its thrid > me_thrid:
2360 add (me_thrid,1) to the output, then copy the remaining entries
2361 (c) there is a next entry, and its thrid == me_thrid:
2362 copy it to the output but increment its timestamp value.
2363 Then copy the remaining entries. (c) is the common case.
2365 tl_assert(i <= n);
2366 if (i == n) { /* case (a) */
2367 UInt hi = out->usedTS++;
2368 out->ts[hi].thrid = me_thrid;
2369 out->ts[hi].tym = 1;
2370 } else {
2371 /* cases (b) and (c) */
2372 ScalarTS* here = &vts->ts[i];
2373 if (me_thrid == here->thrid) { /* case (c) */
2374 if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
2375 /* We're hosed. We have to stop. */
2376 scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
2378 UInt hi = out->usedTS++;
2379 out->ts[hi].thrid = here->thrid;
2380 out->ts[hi].tym = here->tym + 1;
2381 i++;
2382 found = True;
2383 } else { /* case (b) */
2384 UInt hi = out->usedTS++;
2385 out->ts[hi].thrid = me_thrid;
2386 out->ts[hi].tym = 1;
2388 /* And copy any remaining entries. */
2389 for (/*keepgoing*/; i < n; i++) {
2390 ScalarTS* here2 = &vts->ts[i];
2391 UInt hi = out->usedTS++;
2392 out->ts[hi] = *here2;
2396 tl_assert(is_sane_VTS(out));
2397 tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
2398 tl_assert(out->usedTS <= out->sizeTS);
2402 /* Return a new VTS constructed as the join (max) of the 2 args.
2403 Neither arg is modified.
2405 static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
2407 UInt ia, ib, useda, usedb;
2408 ULong tyma, tymb, tymMax;
2409 ThrID thrid;
2410 UInt ncommon = 0;
2412 stats__vts__join++;
2414 tl_assert(a);
2415 tl_assert(b);
2416 useda = a->usedTS;
2417 usedb = b->usedTS;
2419 tl_assert(out);
2420 tl_assert(out->usedTS == 0);
2421 /* overly conservative test, but doing better involves comparing
2422 the two VTSs, which we don't want to do at this point. */
2423 if (useda + usedb >= ThrID_MAX_VALID)
2424 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2425 tl_assert(out->sizeTS >= useda + usedb);
2427 ia = ib = 0;
2429 while (1) {
2431 /* This logic is to enumerate triples (thrid, tyma, tymb) drawn
2432 from a and b in order, where thrid is the next ThrID
2433 occurring in either a or b, and tyma/b are the relevant
2434 scalar timestamps, taking into account implicit zeroes. */
2435 tl_assert(ia <= useda);
2436 tl_assert(ib <= usedb);
2438 if (ia == useda && ib == usedb) {
2439 /* both empty - done */
2440 break;
2442 } else if (ia == useda && ib != usedb) {
2443 /* a empty, use up b */
2444 ScalarTS* tmpb = &b->ts[ib];
2445 thrid = tmpb->thrid;
2446 tyma = 0;
2447 tymb = tmpb->tym;
2448 ib++;
2450 } else if (ia != useda && ib == usedb) {
2451 /* b empty, use up a */
2452 ScalarTS* tmpa = &a->ts[ia];
2453 thrid = tmpa->thrid;
2454 tyma = tmpa->tym;
2455 tymb = 0;
2456 ia++;
2458 } else {
2459 /* both not empty; extract lowest-ThrID'd triple */
2460 ScalarTS* tmpa = &a->ts[ia];
2461 ScalarTS* tmpb = &b->ts[ib];
2462 if (tmpa->thrid < tmpb->thrid) {
2463 /* a has the lowest unconsidered ThrID */
2464 thrid = tmpa->thrid;
2465 tyma = tmpa->tym;
2466 tymb = 0;
2467 ia++;
2468 } else if (tmpa->thrid > tmpb->thrid) {
2469 /* b has the lowest unconsidered ThrID */
2470 thrid = tmpb->thrid;
2471 tyma = 0;
2472 tymb = tmpb->tym;
2473 ib++;
2474 } else {
2475 /* they both next mention the same ThrID */
2476 tl_assert(tmpa->thrid == tmpb->thrid);
2477 thrid = tmpa->thrid; /* == tmpb->thrid */
2478 tyma = tmpa->tym;
2479 tymb = tmpb->tym;
2480 ia++;
2481 ib++;
2482 ncommon++;
2486 /* having laboriously determined (thr, tyma, tymb), do something
2487 useful with it. */
2488 tymMax = tyma > tymb ? tyma : tymb;
2489 if (tymMax > 0) {
2490 UInt hi = out->usedTS++;
2491 out->ts[hi].thrid = thrid;
2492 out->ts[hi].tym = tymMax;
2497 tl_assert(is_sane_VTS(out));
2498 tl_assert(out->usedTS <= out->sizeTS);
2499 tl_assert(out->usedTS == useda + usedb - ncommon);
2503 /* Determine if 'a' <= 'b', in the partial ordering. Returns zero if
2504 they are, or the first ThrID for which they are not (no valid ThrID
2505 has the value zero). This rather strange convention is used
2506 because sometimes we want to know the actual index at which they
2507 first differ. */
2508 static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
2510 Word ia, ib, useda, usedb;
2511 ULong tyma, tymb;
2513 stats__vts__cmpLEQ++;
2515 tl_assert(a);
2516 tl_assert(b);
2517 useda = a->usedTS;
2518 usedb = b->usedTS;
2520 ia = ib = 0;
2522 while (1) {
2524 /* This logic is to enumerate doubles (tyma, tymb) drawn
2525 from a and b in order, and tyma/b are the relevant
2526 scalar timestamps, taking into account implicit zeroes. */
2527 ThrID thrid;
2529 tl_assert(ia >= 0 && ia <= useda);
2530 tl_assert(ib >= 0 && ib <= usedb);
2532 if (ia == useda && ib == usedb) {
2533 /* both empty - done */
2534 break;
2536 } else if (ia == useda && ib != usedb) {
2537 /* a empty, use up b */
2538 ScalarTS* tmpb = &b->ts[ib];
2539 tyma = 0;
2540 tymb = tmpb->tym;
2541 thrid = tmpb->thrid;
2542 ib++;
2544 } else if (ia != useda && ib == usedb) {
2545 /* b empty, use up a */
2546 ScalarTS* tmpa = &a->ts[ia];
2547 tyma = tmpa->tym;
2548 thrid = tmpa->thrid;
2549 tymb = 0;
2550 ia++;
2552 } else {
2553 /* both not empty; extract lowest-ThrID'd triple */
2554 ScalarTS* tmpa = &a->ts[ia];
2555 ScalarTS* tmpb = &b->ts[ib];
2556 if (tmpa->thrid < tmpb->thrid) {
2557 /* a has the lowest unconsidered ThrID */
2558 tyma = tmpa->tym;
2559 thrid = tmpa->thrid;
2560 tymb = 0;
2561 ia++;
2563 else
2564 if (tmpa->thrid > tmpb->thrid) {
2565 /* b has the lowest unconsidered ThrID */
2566 tyma = 0;
2567 tymb = tmpb->tym;
2568 thrid = tmpb->thrid;
2569 ib++;
2570 } else {
2571 /* they both next mention the same ThrID */
2572 tl_assert(tmpa->thrid == tmpb->thrid);
2573 tyma = tmpa->tym;
2574 thrid = tmpa->thrid;
2575 tymb = tmpb->tym;
2576 ia++;
2577 ib++;
2581 /* having laboriously determined (tyma, tymb), do something
2582 useful with it. */
2583 if (tyma > tymb) {
2584 /* not LEQ at this index. Quit, since the answer is
2585 determined already. */
2586 tl_assert(thrid >= 1024);
2587 return thrid;
2591 return 0; /* all points are LEQ => return an invalid ThrID */
2595 /* Compute an arbitrary structural (total) ordering on the two args,
2596 based on their VCs, so they can be looked up in a table, tree, etc.
2597 Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be
2598 performance critical so there is some effort expended to make it sa
2599 fast as possible.
2601 Word VTS__cmp_structural ( VTS* a, VTS* b )
2603 /* We just need to generate an arbitrary total ordering based on
2604 a->ts and b->ts. Preferably do it in a way which comes across likely
2605 differences relatively quickly. */
2606 Word i;
2607 Word useda = 0, usedb = 0;
2608 ScalarTS *ctsa = NULL, *ctsb = NULL;
2610 stats__vts__cmp_structural++;
2612 tl_assert(a);
2613 tl_assert(b);
2615 ctsa = &a->ts[0]; useda = a->usedTS;
2616 ctsb = &b->ts[0]; usedb = b->usedTS;
2618 if (LIKELY(useda == usedb)) {
2619 ScalarTS *tmpa = NULL, *tmpb = NULL;
2620 stats__vts__cmp_structural_slow++;
2621 /* Same length vectors. Find the first difference, if any, as
2622 fast as possible. */
2623 for (i = 0; i < useda; i++) {
2624 tmpa = &ctsa[i];
2625 tmpb = &ctsb[i];
2626 if (LIKELY(tmpa->tym == tmpb->tym
2627 && tmpa->thrid == tmpb->thrid))
2628 continue;
2629 else
2630 break;
2632 if (UNLIKELY(i == useda)) {
2633 /* They're identical. */
2634 return 0;
2635 } else {
2636 tl_assert(i >= 0 && i < useda);
2637 if (tmpa->tym < tmpb->tym) return -1;
2638 if (tmpa->tym > tmpb->tym) return 1;
2639 if (tmpa->thrid < tmpb->thrid) return -1;
2640 if (tmpa->thrid > tmpb->thrid) return 1;
2641 /* we just established them as non-identical, hence: */
2643 /*NOTREACHED*/
2644 tl_assert(0);
2647 if (useda < usedb) return -1;
2648 if (useda > usedb) return 1;
2649 /*NOTREACHED*/
2650 tl_assert(0);
2654 /* Debugging only. Display the given VTS.
2656 static void VTS__show ( const VTS* vts )
2658 Word i, n;
2659 tl_assert(vts);
2661 VG_(printf)("[");
2662 n = vts->usedTS;
2663 for (i = 0; i < n; i++) {
2664 const ScalarTS *st = &vts->ts[i];
2665 VG_(printf)(i < n-1 ? "%d:%llu " : "%d:%llu", st->thrid, (ULong)st->tym);
2667 VG_(printf)("]");
2671 /* Debugging only. Return vts[index], so to speak.
2673 ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
2675 UWord i, n;
2676 ThrID idx_thrid = Thr__to_ThrID(idx);
2677 stats__vts__indexat_slow++;
2678 tl_assert(vts);
2679 n = vts->usedTS;
2680 for (i = 0; i < n; i++) {
2681 ScalarTS* st = &vts->ts[i];
2682 if (st->thrid == idx_thrid)
2683 return st->tym;
2685 return 0;
2689 /* See comment on prototype above.
2691 static void VTS__declare_thread_very_dead ( Thr* thr )
2693 if (0) VG_(printf)("VTQ: tae %p\n", thr);
2695 tl_assert(thr->llexit_done);
2696 tl_assert(thr->joinedwith_done);
2698 ThrID nyu;
2699 nyu = Thr__to_ThrID(thr);
2700 VG_(addToXA)( verydead_thread_table_not_pruned, &nyu );
2702 /* We can only get here if we're assured that we'll never again
2703 need to look at this thread's ::viR or ::viW. Set them to
2704 VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
2705 mostly so that we don't wind up pruning them (as that would be
2706 nonsensical: the only interesting ScalarTS entry for a dead
2707 thread is its own index, and the pruning will remove that.). */
2708 VtsID__rcdec(thr->viR);
2709 VtsID__rcdec(thr->viW);
2710 thr->viR = VtsID_INVALID;
2711 thr->viW = VtsID_INVALID;
2715 /////////////////////////////////////////////////////////////////
2716 /////////////////////////////////////////////////////////////////
2717 // //
2718 // SECTION END vts primitives //
2719 // //
2720 /////////////////////////////////////////////////////////////////
2721 /////////////////////////////////////////////////////////////////
2725 /////////////////////////////////////////////////////////////////
2726 /////////////////////////////////////////////////////////////////
2727 // //
2728 // SECTION BEGIN main library //
2729 // //
2730 /////////////////////////////////////////////////////////////////
2731 /////////////////////////////////////////////////////////////////
2734 /////////////////////////////////////////////////////////
2735 // //
2736 // VTS set //
2737 // //
2738 /////////////////////////////////////////////////////////
2740 static WordFM* /* WordFM VTS* void */ vts_set = NULL;
2742 static void vts_set_init ( void )
2744 tl_assert(!vts_set);
2745 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2746 HG_(free),
2747 (Word(*)(UWord,UWord))VTS__cmp_structural );
2750 /* Given a VTS, look in vts_set to see if we already have a
2751 structurally identical one. If yes, return the pair (True, pointer
2752 to the existing one). If no, clone this one, add the clone to the
2753 set, and return (False, pointer to the clone). */
2754 static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
2756 UWord keyW, valW;
2757 stats__vts_set__focaa++;
2758 tl_assert(cand->id == VtsID_INVALID);
2759 /* lookup cand (by value) */
2760 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2761 /* found it */
2762 tl_assert(valW == 0);
2763 /* if this fails, cand (by ref) was already present (!) */
2764 tl_assert(keyW != (UWord)cand);
2765 *res = (VTS*)keyW;
2766 return True;
2767 } else {
2768 /* not present. Clone, add and return address of clone. */
2769 stats__vts_set__focaa_a++;
2770 VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
2771 tl_assert(clone != cand);
2772 VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
2773 *res = clone;
2774 return False;
2779 /////////////////////////////////////////////////////////
2780 // //
2781 // VTS table //
2782 // //
2783 /////////////////////////////////////////////////////////
2785 static void VtsID__invalidate_caches ( void ); /* fwds */
2787 /* A type to hold VTS table entries. Invariants:
2788 If .vts == NULL, then this entry is not in use, so:
2789 - .rc == 0
2790 - this entry is on the freelist (unfortunately, does not imply
2791 any constraints on value for u.freelink)
2792 If .vts != NULL, then this entry is in use:
2793 - .vts is findable in vts_set
2794 - .vts->id == this entry number
2795 - no specific value for .rc (even 0 is OK)
2796 - this entry is not on freelist, so u.freelink == VtsID_INVALID
2798 typedef
2799 struct {
2800 VTS* vts; /* vts, in vts_set */
2801 UWord rc; /* reference count - enough for entire aspace */
2802 union {
2803 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2804 VtsID remap; /* used only during pruning, for used entries */
2805 } u;
2806 /* u.freelink only used when vts == NULL,
2807 u.remap only used when vts != NULL, during pruning. */
2809 VtsTE;
2811 /* The VTS table. */
2812 static XArray* /* of VtsTE */ vts_tab = NULL;
2814 /* An index into the VTS table, indicating the start of the list of
2815 free (available for use) entries. If the list is empty, this is
2816 VtsID_INVALID. */
2817 static VtsID vts_tab_freelist = VtsID_INVALID;
2819 /* Do a GC of vts_tab when the freelist becomes empty AND the size of
2820 vts_tab equals or exceeds this size. After GC, the value here is
2821 set appropriately so as to check for the next GC point. */
2822 static Word vts_next_GC_at = 1000;
2824 static void vts_tab_init ( void )
2826 vts_tab = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2827 HG_(free), sizeof(VtsTE) );
2828 vts_tab_freelist = VtsID_INVALID;
2831 /* Add ii to the free list, checking that it looks out-of-use. */
2832 static void add_to_free_list ( VtsID ii )
2834 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2835 tl_assert(ie->vts == NULL);
2836 tl_assert(ie->rc == 0);
2837 tl_assert(ie->u.freelink == VtsID_INVALID);
2838 ie->u.freelink = vts_tab_freelist;
2839 vts_tab_freelist = ii;
2842 /* Get an entry from the free list. This will return VtsID_INVALID if
2843 the free list is empty. */
2844 static VtsID get_from_free_list ( void )
2846 VtsID ii;
2847 VtsTE* ie;
2848 if (vts_tab_freelist == VtsID_INVALID)
2849 return VtsID_INVALID;
2850 ii = vts_tab_freelist;
2851 ie = VG_(indexXA)( vts_tab, ii );
2852 tl_assert(ie->vts == NULL);
2853 tl_assert(ie->rc == 0);
2854 vts_tab_freelist = ie->u.freelink;
2855 return ii;
2858 /* Produce a new VtsID that can be used, either by getting it from
2859 the freelist, or, if that is empty, by expanding vts_tab. */
2860 static VtsID get_new_VtsID ( void )
2862 VtsID ii;
2863 VtsTE te;
2864 ii = get_from_free_list();
2865 if (ii != VtsID_INVALID)
2866 return ii;
2867 te.vts = NULL;
2868 te.rc = 0;
2869 te.u.freelink = VtsID_INVALID;
2870 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2871 return ii;
2875 /* Indirect callback from lib_zsm. */
2876 static void VtsID__rcinc ( VtsID ii )
2878 VtsTE* ie;
2879 /* VG_(indexXA) does a range check for us */
2880 ie = VG_(indexXA)( vts_tab, ii );
2881 tl_assert(ie->vts); /* else it's not in use */
2882 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2883 tl_assert(ie->vts->id == ii);
2884 ie->rc++;
2887 /* Indirect callback from lib_zsm. */
2888 static void VtsID__rcdec ( VtsID ii )
2890 VtsTE* ie;
2891 /* VG_(indexXA) does a range check for us */
2892 ie = VG_(indexXA)( vts_tab, ii );
2893 tl_assert(ie->vts); /* else it's not in use */
2894 tl_assert(ie->rc > 0); /* else RC snafu */
2895 tl_assert(ie->vts->id == ii);
2896 ie->rc--;
2900 /* Look up 'cand' in our collection of VTSs. If present, return the
2901 VtsID for the pre-existing version. If not present, clone it, add
2902 the clone to both vts_tab and vts_set, allocate a fresh VtsID for
2903 it, and return that. */
2904 static VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
2906 VTS* in_tab = NULL;
2907 tl_assert(cand->id == VtsID_INVALID);
2908 Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
2909 tl_assert(in_tab);
2910 if (already_have) {
2911 /* We already have a copy of 'cand'. Use that. */
2912 VtsTE* ie;
2913 tl_assert(in_tab->id != VtsID_INVALID);
2914 ie = VG_(indexXA)( vts_tab, in_tab->id );
2915 tl_assert(ie->vts == in_tab);
2916 return in_tab->id;
2917 } else {
2918 VtsID ii = get_new_VtsID();
2919 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2920 ie->vts = in_tab;
2921 ie->rc = 0;
2922 ie->u.freelink = VtsID_INVALID;
2923 in_tab->id = ii;
2924 return ii;
2929 static void show_vts_stats ( const HChar* caller )
2931 UWord nSet, nTab, nLive;
2932 ULong totrc;
2933 UWord n, i;
2934 nSet = VG_(sizeFM)( vts_set );
2935 nTab = VG_(sizeXA)( vts_tab );
2936 totrc = 0;
2937 nLive = 0;
2938 n = VG_(sizeXA)( vts_tab );
2939 for (i = 0; i < n; i++) {
2940 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2941 if (ie->vts) {
2942 nLive++;
2943 totrc += (ULong)ie->rc;
2944 } else {
2945 tl_assert(ie->rc == 0);
2948 VG_(printf)(" show_vts_stats %s\n", caller);
2949 VG_(printf)(" vts_tab size %4lu\n", nTab);
2950 VG_(printf)(" vts_tab live %4lu\n", nLive);
2951 VG_(printf)(" vts_set size %4lu\n", nSet);
2952 VG_(printf)(" total rc %4llu\n", totrc);
2956 /* --- Helpers for VtsID pruning --- */
2958 static
2959 void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2960 /*MOD*/XArray* /* of VtsTE */ new_tab,
2961 VtsID* ii )
2963 VtsTE *old_te, *new_te;
2964 VtsID old_id, new_id;
2965 /* We're relying here on VG_(indexXA)'s range checking to assert on
2966 any stupid values, in particular *ii == VtsID_INVALID. */
2967 old_id = *ii;
2968 old_te = VG_(indexXA)( old_tab, old_id );
2969 old_te->rc--;
2970 new_id = old_te->u.remap;
2971 new_te = VG_(indexXA)( new_tab, new_id );
2972 new_te->rc++;
2973 *ii = new_id;
2976 static
2977 void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2978 /*MOD*/XArray* /* of VtsTE */ new_tab,
2979 SVal* s )
2981 SVal old_sv, new_sv;
2982 old_sv = *s;
2983 if (SVal__isC(old_sv)) {
2984 VtsID rMin, wMin;
2985 rMin = SVal__unC_Rmin(old_sv);
2986 wMin = SVal__unC_Wmin(old_sv);
2987 remap_VtsID( old_tab, new_tab, &rMin );
2988 remap_VtsID( old_tab, new_tab, &wMin );
2989 new_sv = SVal__mkC( rMin, wMin );
2990 *s = new_sv;
2995 /* NOT TO BE CALLED FROM WITHIN libzsm. */
2996 __attribute__((noinline))
2997 static void vts_tab__do_GC ( Bool show_stats )
2999 UWord i, nTab, nLive, nFreed;
3001 /* ---------- BEGIN VTS GC ---------- */
3002 /* check this is actually necessary. */
3003 tl_assert(vts_tab_freelist == VtsID_INVALID);
3005 /* empty the caches for partial order checks and binary joins. We
3006 could do better and prune out the entries to be deleted, but it
3007 ain't worth the hassle. */
3008 VtsID__invalidate_caches();
3010 /* First, make the reference counts up to date. */
3011 zsm_flush_cache();
3013 nTab = VG_(sizeXA)( vts_tab );
3015 if (show_stats) {
3016 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
3017 show_vts_stats("before GC");
3020 /* Now we can inspect the entire vts_tab. Any entries with zero
3021 .rc fields are now no longer in use and can be put back on the
3022 free list, removed from vts_set, and deleted. */
3023 nFreed = 0;
3024 for (i = 0; i < nTab; i++) {
3025 Bool present;
3026 UWord oldK = 0, oldV = 12345;
3027 VtsTE* te = VG_(indexXA)( vts_tab, i );
3028 if (te->vts == NULL) {
3029 tl_assert(te->rc == 0);
3030 continue; /* already on the free list (presumably) */
3032 if (te->rc > 0)
3033 continue; /* in use */
3034 /* Ok, we got one we can free. */
3035 tl_assert(te->vts->id == i);
3036 /* first, remove it from vts_set. */
3037 present = VG_(delFromFM)( vts_set,
3038 &oldK, &oldV, (UWord)te->vts );
3039 tl_assert(present); /* else it isn't in vts_set ?! */
3040 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
3041 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
3042 /* now free the VTS itself */
3043 VTS__delete(te->vts);
3044 te->vts = NULL;
3045 /* and finally put this entry on the free list */
3046 tl_assert(te->u.freelink == VtsID_INVALID); /* can't already be on it */
3047 add_to_free_list( i );
3048 nFreed++;
3051 /* Now figure out when the next GC should be. We'll allow the
3052 number of VTSs to double before GCing again. Except of course
3053 that since we can't (or, at least, don't) shrink vts_tab, we
3054 can't set the threshold value smaller than it. */
3055 tl_assert(nFreed <= nTab);
3056 nLive = nTab - nFreed;
3057 tl_assert(nLive <= nTab);
3058 vts_next_GC_at = 2 * nLive;
3059 if (vts_next_GC_at < nTab)
3060 vts_next_GC_at = nTab;
3062 if (show_stats) {
3063 show_vts_stats("after GC");
3064 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
3067 stats__vts_tab_GC++;
3068 if (VG_(clo_stats)) {
3069 tl_assert(nTab > 0);
3070 VG_(message)(Vg_DebugMsg,
3071 "libhb: VTS GC: #%lu old size %lu live %lu (%2llu%%)\n",
3072 stats__vts_tab_GC,
3073 nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
3075 /* ---------- END VTS GC ---------- */
3077 /* Decide whether to do VTS pruning. We have one of three
3078 settings. */
3079 static UInt pruning_auto_ctr = 0; /* do not make non-static */
3081 Bool do_pruning = False;
3082 switch (HG_(clo_vts_pruning)) {
3083 case 0: /* never */
3084 break;
3085 case 1: /* auto */
3086 do_pruning = (++pruning_auto_ctr % 5) == 0;
3087 break;
3088 case 2: /* always */
3089 do_pruning = True;
3090 break;
3091 default:
3092 tl_assert(0);
3095 /* The rest of this routine only handles pruning, so we can
3096 quit at this point if it is not to be done. */
3097 if (!do_pruning)
3098 return;
3099 /* No need to do pruning if no thread died since the last pruning as
3100 no VtsTE can be pruned. */
3101 if (VG_(sizeXA)( verydead_thread_table_not_pruned) == 0)
3102 return;
3104 /* ---------- BEGIN VTS PRUNING ---------- */
3105 /* Sort and check the very dead threads that died since the last pruning.
3106 Sorting is used for the check and so that we can quickly look
3107 up the dead-thread entries as we work through the VTSs. */
3108 verydead_thread_table_sort_and_check (verydead_thread_table_not_pruned);
3110 /* We will run through the old table, and create a new table and
3111 set, at the same time setting the u.remap entries in the old
3112 table to point to the new entries. Then, visit every VtsID in
3113 the system, and replace all of them with new ones, using the
3114 u.remap entries in the old table. Finally, we can delete the old
3115 table and set. */
3117 XArray* /* of VtsTE */ new_tab
3118 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab",
3119 HG_(free), sizeof(VtsTE) );
3121 /* WordFM VTS* void */
3122 WordFM* new_set
3123 = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set",
3124 HG_(free),
3125 (Word(*)(UWord,UWord))VTS__cmp_structural );
3127 /* Visit each old VTS. For each one:
3129 * make a pruned version
3131 * search new_set for the pruned version, yielding either
3132 Nothing (not present) or the new VtsID for it.
3134 * if not present, allocate a new VtsID for it, insert (pruned
3135 VTS, new VtsID) in the tree, and set
3136 remap_table[old VtsID] = new VtsID.
3138 * if present, set remap_table[old VtsID] = new VtsID, where
3139 new VtsID was determined by the tree lookup. Then free up
3140 the clone.
3143 UWord nBeforePruning = 0, nAfterPruning = 0;
3144 UWord nSTSsBefore = 0, nSTSsAfter = 0;
3145 VtsID new_VtsID_ctr = 0;
3147 for (i = 0; i < nTab; i++) {
3149 /* For each old VTS .. */
3150 VtsTE* old_te = VG_(indexXA)( vts_tab, i );
3151 VTS* old_vts = old_te->vts;
3153 /* Skip it if not in use */
3154 if (old_te->rc == 0) {
3155 tl_assert(old_vts == NULL);
3156 continue;
3158 tl_assert(old_te->u.remap == VtsID_INVALID);
3159 tl_assert(old_vts != NULL);
3160 tl_assert(old_vts->id == i);
3161 tl_assert(old_vts->ts != NULL);
3163 /* It is in use. Make a pruned version. */
3164 nBeforePruning++;
3165 nSTSsBefore += old_vts->usedTS;
3166 VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts",
3167 old_vts, verydead_thread_table_not_pruned);
3168 tl_assert(new_vts->sizeTS == new_vts->usedTS);
3169 tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS])
3170 == 0x0ddC0ffeeBadF00dULL);
3172 /* Get rid of the old VTS and the tree entry. It's a bit more
3173 complex to incrementally delete the VTSs now than to nuke
3174 them all after we're done, but the upside is that we don't
3175 wind up temporarily storing potentially two complete copies
3176 of each VTS and hence spiking memory use. */
3177 UWord oldK = 0, oldV = 12345;
3178 Bool present = VG_(delFromFM)( vts_set,
3179 &oldK, &oldV, (UWord)old_vts );
3180 tl_assert(present); /* else it isn't in vts_set ?! */
3181 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
3182 tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */
3183 /* now free the VTS itself */
3184 VTS__delete(old_vts);
3185 old_te->vts = NULL;
3186 old_vts = NULL;
3188 /* NO MENTIONS of old_vts allowed beyond this point. */
3190 /* Ok, we have the pruned copy in new_vts. See if a
3191 structurally identical version is already present in new_set.
3192 If so, delete the one we just made and move on; if not, add
3193 it. */
3194 VTS* identical_version = NULL;
3195 UWord valW = 12345;
3196 if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW,
3197 (UWord)new_vts)) {
3198 // already have it
3199 tl_assert(valW == 0);
3200 tl_assert(identical_version != NULL);
3201 tl_assert(identical_version != new_vts);
3202 VTS__delete(new_vts);
3203 new_vts = identical_version;
3204 tl_assert(new_vts->id != VtsID_INVALID);
3205 } else {
3206 tl_assert(valW == 12345);
3207 tl_assert(identical_version == NULL);
3208 new_vts->id = new_VtsID_ctr++;
3209 Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0);
3210 tl_assert(!b);
3211 VtsTE new_te;
3212 new_te.vts = new_vts;
3213 new_te.rc = 0;
3214 new_te.u.freelink = VtsID_INVALID;
3215 Word j = VG_(addToXA)( new_tab, &new_te );
3216 tl_assert(j <= i);
3217 tl_assert(j == new_VtsID_ctr - 1);
3218 // stats
3219 nAfterPruning++;
3220 nSTSsAfter += new_vts->usedTS;
3222 old_te->u.remap = new_vts->id;
3224 } /* for (i = 0; i < nTab; i++) */
3226 /* Move very dead thread from verydead_thread_table_not_pruned to
3227 verydead_thread_table. Sort and check verydead_thread_table
3228 to verify a thread was reported very dead only once. */
3230 UWord nBT = VG_(sizeXA)( verydead_thread_table_not_pruned);
3232 for (i = 0; i < nBT; i++) {
3233 ThrID thrid =
3234 *(ThrID*)VG_(indexXA)( verydead_thread_table_not_pruned, i );
3235 VG_(addToXA)( verydead_thread_table, &thrid );
3237 verydead_thread_table_sort_and_check (verydead_thread_table);
3238 VG_(dropHeadXA) (verydead_thread_table_not_pruned, nBT);
3241 /* At this point, we have:
3242 * the old VTS table, with its u.remap entries set,
3243 and with all .vts == NULL.
3244 * the old VTS tree should be empty, since it and the old VTSs
3245 it contained have been incrementally deleted was we worked
3246 through the old table.
3247 * the new VTS table, with all .rc == 0, all u.freelink and u.remap
3248 == VtsID_INVALID.
3249 * the new VTS tree.
3251 tl_assert( VG_(sizeFM)(vts_set) == 0 );
3253 /* Now actually apply the mapping. */
3254 /* Visit all the VtsIDs in the entire system. Where do we expect
3255 to find them?
3256 (a) in shadow memory -- the LineZs and LineFs
3257 (b) in our collection of struct _Thrs.
3258 (c) in our collection of struct _SOs.
3259 Nowhere else, AFAICS. Not in the zsm cache, because that just
3260 got invalidated.
3262 Using the u.remap fields in vts_tab, map each old VtsID to a new
3263 VtsID. For each old VtsID, dec its rc; and for each new one,
3264 inc it. This sets up the new refcounts, and it also gives a
3265 cheap sanity check of the old ones: all old refcounts should be
3266 zero after this operation.
3269 /* Do the mappings for (a) above: iterate over the Primary shadow
3270 mem map (WordFM Addr SecMap*). */
3271 UWord secmapW = 0;
3272 VG_(initIterFM)( map_shmem );
3273 while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) {
3274 UWord j;
3275 SecMap* sm = (SecMap*)secmapW;
3276 tl_assert(sm->magic == SecMap_MAGIC);
3277 /* Deal with the LineZs */
3278 for (i = 0; i < N_SECMAP_ZLINES; i++) {
3279 LineZ* lineZ = &sm->linesZ[i];
3280 if (lineZ->dict[0] != SVal_INVALID) {
3281 for (j = 0; j < 4; j++)
3282 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]);
3283 } else {
3284 LineF* lineF = SVal2Ptr (lineZ->dict[1]);
3285 for (j = 0; j < N_LINE_ARANGE; j++)
3286 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]);
3290 VG_(doneIterFM)( map_shmem );
3292 /* Do the mappings for (b) above: visit our collection of struct
3293 _Thrs. */
3294 Thread* hgthread = get_admin_threads();
3295 tl_assert(hgthread);
3296 while (hgthread) {
3297 Thr* hbthr = hgthread->hbthr;
3298 tl_assert(hbthr);
3299 /* Threads that are listed in the prunable set have their viR
3300 and viW set to VtsID_INVALID, so we can't mess with them. */
3301 if (hbthr->llexit_done && hbthr->joinedwith_done) {
3302 tl_assert(hbthr->viR == VtsID_INVALID);
3303 tl_assert(hbthr->viW == VtsID_INVALID);
3304 hgthread = hgthread->admin;
3305 continue;
3307 remap_VtsID( vts_tab, new_tab, &hbthr->viR );
3308 remap_VtsID( vts_tab, new_tab, &hbthr->viW );
3309 hgthread = hgthread->admin;
3312 /* Do the mappings for (c) above: visit the struct _SOs. */
3313 SO* so = admin_SO;
3314 while (so) {
3315 if (so->viR != VtsID_INVALID)
3316 remap_VtsID( vts_tab, new_tab, &so->viR );
3317 if (so->viW != VtsID_INVALID)
3318 remap_VtsID( vts_tab, new_tab, &so->viW );
3319 so = so->admin_next;
3322 /* So, we're nearly done (with this incredibly complex operation).
3323 Check the refcounts for the old VtsIDs all fell to zero, as
3324 expected. Any failure is serious. */
3325 for (i = 0; i < nTab; i++) {
3326 VtsTE* te = VG_(indexXA)( vts_tab, i );
3327 tl_assert(te->vts == NULL);
3328 /* This is the assert proper. Note we're also asserting
3329 zeroness for old entries which are unmapped. That's OK. */
3330 tl_assert(te->rc == 0);
3333 /* Install the new table and set. */
3334 VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/);
3335 vts_set = new_set;
3336 VG_(deleteXA)( vts_tab );
3337 vts_tab = new_tab;
3339 /* The freelist of vts_tab entries is empty now, because we've
3340 compacted all of the live entries at the low end of the
3341 table. */
3342 vts_tab_freelist = VtsID_INVALID;
3344 /* Sanity check vts_set and vts_tab. */
3346 /* Because all the live entries got slid down to the bottom of vts_tab: */
3347 tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set ));
3349 /* Assert that the vts_tab and vts_set entries point at each other
3350 in the required way */
3351 UWord wordK = 0, wordV = 0;
3352 VG_(initIterFM)( vts_set );
3353 while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) {
3354 tl_assert(wordK != 0);
3355 tl_assert(wordV == 0);
3356 VTS* vts = (VTS*)wordK;
3357 tl_assert(vts->id != VtsID_INVALID);
3358 VtsTE* te = VG_(indexXA)( vts_tab, vts->id );
3359 tl_assert(te->vts == vts);
3361 VG_(doneIterFM)( vts_set );
3363 /* Also iterate over the table, and check each entry is
3364 plausible. */
3365 nTab = VG_(sizeXA)( vts_tab );
3366 for (i = 0; i < nTab; i++) {
3367 VtsTE* te = VG_(indexXA)( vts_tab, i );
3368 tl_assert(te->vts);
3369 tl_assert(te->vts->id == i);
3370 tl_assert(te->rc > 0); /* 'cos we just GC'd */
3371 tl_assert(te->u.freelink == VtsID_INVALID); /* in use */
3372 /* value of te->u.remap not relevant */
3375 /* And we're done. Bwahahaha. Ha. Ha. Ha. */
3376 stats__vts_pruning++;
3377 if (VG_(clo_stats)) {
3378 tl_assert(nTab > 0);
3379 VG_(message)(
3380 Vg_DebugMsg,
3381 "libhb: VTS PR: #%lu before %lu (avg sz %lu) "
3382 "after %lu (avg sz %lu)\n",
3383 stats__vts_pruning,
3384 nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1),
3385 nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1)
3388 /* ---------- END VTS PRUNING ---------- */
3392 /////////////////////////////////////////////////////////
3393 // //
3394 // Vts IDs //
3395 // //
3396 /////////////////////////////////////////////////////////
3398 //////////////////////////
3399 /* A temporary, max-sized VTS which is used as a temporary (the first
3400 argument) in VTS__singleton, VTS__tick and VTS__join operations. */
3401 static VTS* temp_max_sized_VTS = NULL;
3403 //////////////////////////
3404 static ULong stats__cmpLEQ_queries = 0;
3405 static ULong stats__cmpLEQ_misses = 0;
3406 static ULong stats__join2_queries = 0;
3407 static ULong stats__join2_misses = 0;
3409 static inline UInt ROL32 ( UInt w, Int n ) {
3410 w = (w << n) | (w >> (32-n));
3411 return w;
3413 static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
3414 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
3415 return hash % nTab;
3418 #define N_CMPLEQ_CACHE 1023
3419 static
3420 struct { VtsID vi1; VtsID vi2; Bool leq; }
3421 cmpLEQ_cache[N_CMPLEQ_CACHE];
3423 #define N_JOIN2_CACHE 1023
3424 static
3425 struct { VtsID vi1; VtsID vi2; VtsID res; }
3426 join2_cache[N_JOIN2_CACHE];
3428 static void VtsID__invalidate_caches ( void ) {
3429 Int i;
3430 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
3431 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
3432 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
3433 cmpLEQ_cache[i].leq = False;
3435 for (i = 0; i < N_JOIN2_CACHE; i++) {
3436 join2_cache[i].vi1 = VtsID_INVALID;
3437 join2_cache[i].vi2 = VtsID_INVALID;
3438 join2_cache[i].res = VtsID_INVALID;
3441 //////////////////////////
3443 //static Bool VtsID__is_valid ( VtsID vi ) {
3444 // VtsTE* ve;
3445 // if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
3446 // return False;
3447 // ve = VG_(indexXA)( vts_tab, vi );
3448 // if (!ve->vts)
3449 // return False;
3450 // tl_assert(ve->vts->id == vi);
3451 // return True;
3454 static VTS* VtsID__to_VTS ( VtsID vi ) {
3455 VtsTE* te = VG_(indexXA)( vts_tab, vi );
3456 tl_assert(te->vts);
3457 return te->vts;
3460 static void VtsID__pp ( VtsID vi ) {
3461 VTS* vts = VtsID__to_VTS(vi);
3462 VTS__show( vts );
3465 /* compute partial ordering relation of vi1 and vi2. */
3466 __attribute__((noinline))
3467 static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
3468 UInt hash;
3469 Bool leq;
3470 VTS *v1, *v2;
3471 //if (vi1 == vi2) return True;
3472 tl_assert(vi1 != vi2);
3473 ////++
3474 stats__cmpLEQ_queries++;
3475 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
3476 if (cmpLEQ_cache[hash].vi1 == vi1
3477 && cmpLEQ_cache[hash].vi2 == vi2)
3478 return cmpLEQ_cache[hash].leq;
3479 stats__cmpLEQ_misses++;
3480 ////--
3481 v1 = VtsID__to_VTS(vi1);
3482 v2 = VtsID__to_VTS(vi2);
3483 leq = VTS__cmpLEQ( v1, v2 ) == 0;
3484 ////++
3485 cmpLEQ_cache[hash].vi1 = vi1;
3486 cmpLEQ_cache[hash].vi2 = vi2;
3487 cmpLEQ_cache[hash].leq = leq;
3488 ////--
3489 return leq;
3491 static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
3492 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
3495 /* compute binary join */
3496 __attribute__((noinline))
3497 static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
3498 UInt hash;
3499 VtsID res;
3500 VTS *vts1, *vts2;
3501 //if (vi1 == vi2) return vi1;
3502 tl_assert(vi1 != vi2);
3503 ////++
3504 stats__join2_queries++;
3505 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
3506 if (join2_cache[hash].vi1 == vi1
3507 && join2_cache[hash].vi2 == vi2)
3508 return join2_cache[hash].res;
3509 stats__join2_misses++;
3510 ////--
3511 vts1 = VtsID__to_VTS(vi1);
3512 vts2 = VtsID__to_VTS(vi2);
3513 temp_max_sized_VTS->usedTS = 0;
3514 VTS__join(temp_max_sized_VTS, vts1,vts2);
3515 res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3516 ////++
3517 join2_cache[hash].vi1 = vi1;
3518 join2_cache[hash].vi2 = vi2;
3519 join2_cache[hash].res = res;
3520 ////--
3521 return res;
3523 static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
3524 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
3527 /* create a singleton VTS, namely [thr:1] */
3528 static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
3529 temp_max_sized_VTS->usedTS = 0;
3530 VTS__singleton(temp_max_sized_VTS, thr,tym);
3531 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3534 /* tick operation, creates value 1 if specified index is absent */
3535 static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
3536 VTS* vts = VtsID__to_VTS(vi);
3537 temp_max_sized_VTS->usedTS = 0;
3538 VTS__tick(temp_max_sized_VTS, idx,vts);
3539 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3542 /* index into a VTS (only for assertions) */
3543 static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
3544 VTS* vts = VtsID__to_VTS(vi);
3545 return VTS__indexAt_SLOW( vts, idx );
3548 /* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
3549 any, really) element in vi1 which is pointwise greater-than the
3550 corresponding element in vi2. If no such element exists, return
3551 NULL. This needs to be fairly quick since it is called every time
3552 a race is detected. */
3553 static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
3555 VTS *vts1, *vts2;
3556 Thr* diffthr;
3557 ThrID diffthrid;
3558 tl_assert(vi1 != vi2);
3559 vts1 = VtsID__to_VTS(vi1);
3560 vts2 = VtsID__to_VTS(vi2);
3561 tl_assert(vts1 != vts2);
3562 diffthrid = VTS__cmpLEQ(vts1, vts2);
3563 diffthr = Thr__from_ThrID(diffthrid);
3564 tl_assert(diffthr); /* else they are LEQ ! */
3565 return diffthr;
3569 /////////////////////////////////////////////////////////
3570 // //
3571 // Filters //
3572 // //
3573 /////////////////////////////////////////////////////////
3575 /* Forget everything we know -- clear the filter and let everything
3576 through. This needs to be as fast as possible, since it is called
3577 every time the running thread changes, and every time a thread's
3578 vector clocks change, which can be quite frequent. The obvious
3579 fast way to do this is simply to stuff in tags which we know are
3580 not going to match anything, since they're not aligned to the start
3581 of a line. */
3582 static void Filter__clear ( Filter* fi, const HChar* who )
3584 UWord i;
3585 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
3586 for (i = 0; i < FI_NUM_LINES; i += 8) {
3587 fi->tags[i+0] = 1; /* impossible value -- cannot match */
3588 fi->tags[i+1] = 1;
3589 fi->tags[i+2] = 1;
3590 fi->tags[i+3] = 1;
3591 fi->tags[i+4] = 1;
3592 fi->tags[i+5] = 1;
3593 fi->tags[i+6] = 1;
3594 fi->tags[i+7] = 1;
3596 tl_assert(i == FI_NUM_LINES);
3599 /* Clearing an arbitrary range in the filter. Unfortunately
3600 we have to do this due to core-supplied new/die-mem events. */
3602 static void Filter__clear_1byte ( Filter* fi, Addr a )
3604 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3605 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3606 FiLine* line = &fi->lines[lineno];
3607 UWord loff = (a - atag) / 8;
3608 UShort mask = 0x3 << (2 * (a & 7));
3609 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3610 if (LIKELY( fi->tags[lineno] == atag )) {
3611 /* hit. clear the bits. */
3612 UShort u16 = line->u16s[loff];
3613 line->u16s[loff] = u16 & ~mask; /* clear them */
3614 } else {
3615 /* miss. The filter doesn't hold this address, so ignore. */
3619 static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
3621 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3622 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3623 FiLine* line = &fi->lines[lineno];
3624 UWord loff = (a - atag) / 8;
3625 if (LIKELY( fi->tags[lineno] == atag )) {
3626 line->u16s[loff] = 0;
3627 } else {
3628 /* miss. The filter doesn't hold this address, so ignore. */
3632 /* Only used to verify the fast Filter__clear_range */
3633 __attribute__((unused))
3634 static void Filter__clear_range_SLOW ( Filter* fi, Addr a, UWord len )
3636 tl_assert (CHECK_ZSM);
3638 /* slowly do part preceding 8-alignment */
3639 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
3640 Filter__clear_1byte( fi, a );
3641 a++;
3642 len--;
3644 /* vector loop */
3645 while (len >= 8) {
3646 Filter__clear_8bytes_aligned( fi, a );
3647 a += 8;
3648 len -= 8;
3650 /* slowly do tail */
3651 while (UNLIKELY(len > 0)) {
3652 Filter__clear_1byte( fi, a );
3653 a++;
3654 len--;
3658 static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
3660 # if CHECK_ZSM > 0
3661 /* We check the below more complex algorithm with the simple one.
3662 This check is very expensive : we do first the slow way on a
3663 copy of the data, then do it the fast way. On RETURN, we check
3664 the two values are equal. */
3665 Filter fi_check = *fi;
3666 Filter__clear_range_SLOW(&fi_check, a, len);
3667 # define RETURN goto check_and_return
3668 # else
3669 # define RETURN return
3670 # endif
3672 Addr begtag = FI_GET_TAG(a); /* tag of range begin */
3674 Addr end = a + len - 1;
3675 Addr endtag = FI_GET_TAG(end); /* tag of range end. */
3677 UWord rlen = len; /* remaining length to clear */
3679 Addr c = a; /* Current position we are clearing. */
3680 UWord clineno = FI_GET_LINENO(c); /* Current lineno we are clearing */
3681 FiLine* cline; /* Current line we are clearing */
3682 UWord cloff; /* Current offset in line we are clearing, when clearing
3683 partial lines. */
3685 UShort u16;
3687 STATIC_ASSERT (FI_LINE_SZB == 32);
3688 // Below assumes filter lines are 32 bytes
3690 if (LIKELY(fi->tags[clineno] == begtag)) {
3691 /* LIKELY for the heavy caller VG_(unknown_SP_update). */
3692 /* First filter line matches begtag.
3693 If c is not at the filter line begin, the below will clear
3694 the filter line bytes starting from c. */
3695 cline = &fi->lines[clineno];
3696 cloff = (c - begtag) / 8;
3698 /* First the byte(s) needed to reach 8-alignment */
3699 if (UNLIKELY(!VG_IS_8_ALIGNED(c))) {
3700 /* hiB is the nr of bytes (higher addresses) from c to reach
3701 8-aligment. */
3702 UWord hiB = 8 - (c & 7);
3703 /* Compute 2-bit/byte mask representing hiB bytes [c..c+hiB[
3704 mask is C000 , F000, FC00, FF00, FFC0, FFF0 or FFFC for the byte
3705 range 7..7 6..7 5..7 4..7 3..7 2..7 1..7 */
3706 UShort mask = 0xFFFF << (16 - 2*hiB);
3708 u16 = cline->u16s[cloff];
3709 if (LIKELY(rlen >= hiB)) {
3710 cline->u16s[cloff] = u16 & ~mask; /* clear all hiB from c */
3711 rlen -= hiB;
3712 c += hiB;
3713 cloff += 1;
3714 } else {
3715 /* Only have the bits for rlen bytes bytes. */
3716 mask = mask & ~(0xFFFF << (16 - 2*(hiB-rlen)));
3717 cline->u16s[cloff] = u16 & ~mask; /* clear rlen bytes from c. */
3718 RETURN; // We have cleared all what we can.
3721 /* c is now 8 aligned. Clear by 8 aligned bytes,
3722 till c is filter-line aligned */
3723 while (!VG_IS_32_ALIGNED(c) && rlen >= 8) {
3724 cline->u16s[cloff] = 0;
3725 c += 8;
3726 rlen -= 8;
3727 cloff += 1;
3729 } else {
3730 c = begtag + FI_LINE_SZB;
3731 if (c > end)
3732 RETURN; // We have cleared all what we can.
3733 rlen -= c - a;
3735 // We have changed c, so re-establish clineno.
3736 clineno = FI_GET_LINENO(c);
3738 if (rlen >= FI_LINE_SZB) {
3739 /* Here, c is filter line-aligned. Clear all full lines that
3740 overlap with the range starting at c, made of a full lines */
3741 UWord nfull = rlen / FI_LINE_SZB;
3742 UWord full_len = nfull * FI_LINE_SZB;
3743 rlen -= full_len;
3744 if (nfull > FI_NUM_LINES)
3745 nfull = FI_NUM_LINES; // no need to check several times the same entry.
3747 for (UWord n = 0; n < nfull; n++) {
3748 if (UNLIKELY(address_in_range(fi->tags[clineno], c, full_len))) {
3749 cline = &fi->lines[clineno];
3750 cline->u16s[0] = 0;
3751 cline->u16s[1] = 0;
3752 cline->u16s[2] = 0;
3753 cline->u16s[3] = 0;
3754 STATIC_ASSERT (4 == sizeof(cline->u16s)/sizeof(cline->u16s[0]));
3756 clineno++;
3757 if (UNLIKELY(clineno == FI_NUM_LINES))
3758 clineno = 0;
3761 c += full_len;
3762 clineno = FI_GET_LINENO(c);
3765 if (CHECK_ZSM) {
3766 tl_assert(VG_IS_8_ALIGNED(c));
3767 tl_assert(clineno == FI_GET_LINENO(c));
3770 /* Do the last filter line, if it was not cleared as a full filter line */
3771 if (UNLIKELY(rlen > 0) && fi->tags[clineno] == endtag) {
3772 cline = &fi->lines[clineno];
3773 cloff = (c - endtag) / 8;
3774 if (CHECK_ZSM) tl_assert(FI_GET_TAG(c) == endtag);
3776 /* c is 8 aligned. Clear by 8 aligned bytes, till we have less than
3777 8 bytes. */
3778 while (rlen >= 8) {
3779 cline->u16s[cloff] = 0;
3780 c += 8;
3781 rlen -= 8;
3782 cloff += 1;
3784 /* Then the remaining byte(s) */
3785 if (rlen > 0) {
3786 /* nr of bytes from c to reach end. */
3787 UWord loB = rlen;
3788 /* Compute mask representing loB bytes [c..c+loB[ :
3789 mask is 0003, 000F, 003F, 00FF, 03FF, 0FFF or 3FFF */
3790 UShort mask = 0xFFFF >> (16 - 2*loB);
3792 u16 = cline->u16s[cloff];
3793 cline->u16s[cloff] = u16 & ~mask; /* clear all loB from c */
3797 # if CHECK_ZSM > 0
3798 check_and_return:
3799 tl_assert (VG_(memcmp)(&fi_check, fi, sizeof(fi_check)) == 0);
3800 # endif
3801 # undef RETURN
3804 /* ------ Read handlers for the filter. ------ */
3806 static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
3808 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3809 return False;
3811 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3812 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3813 FiLine* line = &fi->lines[lineno];
3814 UWord loff = (a - atag) / 8;
3815 UShort mask = 0xAAAA;
3816 if (LIKELY( fi->tags[lineno] == atag )) {
3817 /* hit. check line and update. */
3818 UShort u16 = line->u16s[loff];
3819 Bool ok = (u16 & mask) == mask; /* all R bits set? */
3820 line->u16s[loff] = u16 | mask; /* set them */
3821 return ok;
3822 } else {
3823 /* miss. nuke existing line and re-use it. */
3824 UWord i;
3825 fi->tags[lineno] = atag;
3826 for (i = 0; i < FI_LINE_SZB / 8; i++)
3827 line->u16s[i] = 0;
3828 line->u16s[loff] = mask;
3829 return False;
3834 static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
3836 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3837 return False;
3839 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3840 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3841 FiLine* line = &fi->lines[lineno];
3842 UWord loff = (a - atag) / 8;
3843 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
3844 if (LIKELY( fi->tags[lineno] == atag )) {
3845 /* hit. check line and update. */
3846 UShort u16 = line->u16s[loff];
3847 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
3848 line->u16s[loff] = u16 | mask; /* set them */
3849 return ok;
3850 } else {
3851 /* miss. nuke existing line and re-use it. */
3852 UWord i;
3853 fi->tags[lineno] = atag;
3854 for (i = 0; i < FI_LINE_SZB / 8; i++)
3855 line->u16s[i] = 0;
3856 line->u16s[loff] = mask;
3857 return False;
3862 static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
3864 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3865 return False;
3867 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3868 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3869 FiLine* line = &fi->lines[lineno];
3870 UWord loff = (a - atag) / 8;
3871 UShort mask = 0xA << (2 * (a & 6));
3872 /* mask is A000, 0A00, 00A0 or 000A */
3873 if (LIKELY( fi->tags[lineno] == atag )) {
3874 /* hit. check line and update. */
3875 UShort u16 = line->u16s[loff];
3876 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
3877 line->u16s[loff] = u16 | mask; /* set them */
3878 return ok;
3879 } else {
3880 /* miss. nuke existing line and re-use it. */
3881 UWord i;
3882 fi->tags[lineno] = atag;
3883 for (i = 0; i < FI_LINE_SZB / 8; i++)
3884 line->u16s[i] = 0;
3885 line->u16s[loff] = mask;
3886 return False;
3891 static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
3894 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3895 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3896 FiLine* line = &fi->lines[lineno];
3897 UWord loff = (a - atag) / 8;
3898 UShort mask = 0x2 << (2 * (a & 7));
3899 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
3900 if (LIKELY( fi->tags[lineno] == atag )) {
3901 /* hit. check line and update. */
3902 UShort u16 = line->u16s[loff];
3903 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3904 line->u16s[loff] = u16 | mask; /* set them */
3905 return ok;
3906 } else {
3907 /* miss. nuke existing line and re-use it. */
3908 UWord i;
3909 fi->tags[lineno] = atag;
3910 for (i = 0; i < FI_LINE_SZB / 8; i++)
3911 line->u16s[i] = 0;
3912 line->u16s[loff] = mask;
3913 return False;
3919 /* ------ Write handlers for the filter. ------ */
3921 static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
3923 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3924 return False;
3926 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3927 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3928 FiLine* line = &fi->lines[lineno];
3929 UWord loff = (a - atag) / 8;
3930 UShort mask = 0xFFFF;
3931 if (LIKELY( fi->tags[lineno] == atag )) {
3932 /* hit. check line and update. */
3933 UShort u16 = line->u16s[loff];
3934 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
3935 line->u16s[loff] = u16 | mask; /* set them */
3936 return ok;
3937 } else {
3938 /* miss. nuke existing line and re-use it. */
3939 UWord i;
3940 fi->tags[lineno] = atag;
3941 for (i = 0; i < FI_LINE_SZB / 8; i++)
3942 line->u16s[i] = 0;
3943 line->u16s[loff] = mask;
3944 return False;
3949 static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
3951 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3952 return False;
3954 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3955 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3956 FiLine* line = &fi->lines[lineno];
3957 UWord loff = (a - atag) / 8;
3958 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
3959 if (LIKELY( fi->tags[lineno] == atag )) {
3960 /* hit. check line and update. */
3961 UShort u16 = line->u16s[loff];
3962 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
3963 line->u16s[loff] = u16 | mask; /* set them */
3964 return ok;
3965 } else {
3966 /* miss. nuke existing line and re-use it. */
3967 UWord i;
3968 fi->tags[lineno] = atag;
3969 for (i = 0; i < FI_LINE_SZB / 8; i++)
3970 line->u16s[i] = 0;
3971 line->u16s[loff] = mask;
3972 return False;
3977 static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
3979 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3980 return False;
3982 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3983 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3984 FiLine* line = &fi->lines[lineno];
3985 UWord loff = (a - atag) / 8;
3986 UShort mask = 0xF << (2 * (a & 6));
3987 /* mask is F000, 0F00, 00F0 or 000F */
3988 if (LIKELY( fi->tags[lineno] == atag )) {
3989 /* hit. check line and update. */
3990 UShort u16 = line->u16s[loff];
3991 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
3992 line->u16s[loff] = u16 | mask; /* set them */
3993 return ok;
3994 } else {
3995 /* miss. nuke existing line and re-use it. */
3996 UWord i;
3997 fi->tags[lineno] = atag;
3998 for (i = 0; i < FI_LINE_SZB / 8; i++)
3999 line->u16s[i] = 0;
4000 line->u16s[loff] = mask;
4001 return False;
4006 static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
4009 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
4010 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
4011 FiLine* line = &fi->lines[lineno];
4012 UWord loff = (a - atag) / 8;
4013 UShort mask = 0x3 << (2 * (a & 7));
4014 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
4015 if (LIKELY( fi->tags[lineno] == atag )) {
4016 /* hit. check line and update. */
4017 UShort u16 = line->u16s[loff];
4018 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
4019 line->u16s[loff] = u16 | mask; /* set them */
4020 return ok;
4021 } else {
4022 /* miss. nuke existing line and re-use it. */
4023 UWord i;
4024 fi->tags[lineno] = atag;
4025 for (i = 0; i < FI_LINE_SZB / 8; i++)
4026 line->u16s[i] = 0;
4027 line->u16s[loff] = mask;
4028 return False;
4034 /////////////////////////////////////////////////////////
4035 // //
4036 // Threads //
4037 // //
4038 /////////////////////////////////////////////////////////
4040 /* Maps ThrID values to their Thr*s (which contain ThrID values that
4041 should point back to the relevant slot in the array. Lowest
4042 numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
4043 static XArray* /* of Thr* */ thrid_to_thr_map = NULL;
4045 /* And a counter to dole out ThrID values. For rationale/background,
4046 see comments on definition of ScalarTS (far) above. */
4047 static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
4049 static ThrID Thr__to_ThrID ( Thr* thr ) {
4050 return thr->thrid;
4052 static Thr* Thr__from_ThrID ( UInt thrid ) {
4053 Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
4054 tl_assert(thr->thrid == thrid);
4055 return thr;
4058 /* True if the cached rcec for thr is valid and can be used to build the
4059 current stack trace just by changing the last frame to the current IP. */
4060 static inline Bool cached_rcec_valid(Thr *thr)
4062 UWord cached_stackvalid = VG_(get_SP_s1) (thr->hgthread->coretid);
4063 return cached_stackvalid != 0;
4065 /* Set the validity of the cached rcec of thr. */
4066 static inline void set_cached_rcec_validity(Thr *thr, Bool valid)
4068 VG_(set_SP_s1) (thr->hgthread->coretid, valid);
4071 static Thr* Thr__new ( void )
4073 Thr* thr = HG_(zalloc)
4074 ( "libhb.Thr__new.1",
4075 sizeof(Thr) + HG_(clo_history_backtrace_size) * sizeof(UWord));
4076 // We need to add the size of the frames in the cached_rcec (last member of
4077 // _Thr).
4079 thr->viR = VtsID_INVALID;
4080 thr->viW = VtsID_INVALID;
4081 thr->llexit_done = False;
4082 thr->joinedwith_done = False;
4083 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
4084 if (HG_(clo_history_level) == 1)
4085 thr->local_Kws_n_stacks
4086 = VG_(newXA)( HG_(zalloc),
4087 "libhb.Thr__new.3 (local_Kws_and_stacks)",
4088 HG_(free), sizeof(ULong_n_EC) );
4089 /* Make an 'empty' cached rcec in thr. */
4090 thr->cached_rcec.magic = RCEC_MAGIC;
4091 thr->cached_rcec.rc = 0;
4092 thr->cached_rcec.rcX = 0;
4093 thr->cached_rcec.next = NULL;
4095 /* Add this Thr* <-> ThrID binding to the mapping, and
4096 cross-check */
4097 if (!thrid_to_thr_map) {
4098 thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
4099 HG_(free), sizeof(Thr*) );
4102 if (thrid_counter >= ThrID_MAX_VALID) {
4103 /* We're hosed. We have to stop. */
4104 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
4107 thr->thrid = thrid_counter++;
4108 Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
4109 tl_assert(ix + 1024 == thr->thrid);
4111 return thr;
4114 static void note_local_Kw_n_stack_for ( Thr* thr )
4116 Word nPresent;
4117 ULong_n_EC pair;
4118 tl_assert(thr);
4120 // We only collect this info at history level 1 (approx)
4121 if (HG_(clo_history_level) != 1)
4122 return;
4124 /* This is the scalar Kw for thr. */
4125 pair.ull = VtsID__indexAt( thr->viW, thr );
4126 pair.ec = main_get_EC( thr );
4127 tl_assert(pair.ec);
4128 tl_assert(thr->local_Kws_n_stacks);
4130 /* check that we're not adding duplicates */
4131 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
4133 /* Throw away old stacks, if necessary. We can't accumulate stuff
4134 indefinitely. */
4135 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
4136 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
4137 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
4138 if (0)
4139 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n",
4140 thr, pair.ull, pair.ec );
4143 if (nPresent > 0) {
4144 ULong_n_EC* prevPair
4145 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
4146 tl_assert( prevPair->ull <= pair.ull );
4149 if (nPresent == 0)
4150 pair.ec = NULL;
4152 VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
4154 if (0)
4155 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n",
4156 thr, pair.ull, pair.ec );
4157 if (0)
4158 VG_(pp_ExeContext)(pair.ec);
4161 static Int cmp__ULong_n_EC__by_ULong ( const ULong_n_EC* pair1,
4162 const ULong_n_EC* pair2 )
4164 if (pair1->ull < pair2->ull) return -1;
4165 if (pair1->ull > pair2->ull) return 1;
4166 return 0;
4170 /////////////////////////////////////////////////////////
4171 // //
4172 // Shadow Values //
4173 // //
4174 /////////////////////////////////////////////////////////
4176 // type SVal, SVal_INVALID and SVal_NOACCESS are defined by
4177 // hb_zsm.h. We have to do everything else here.
4179 /* SVal is 64 bit unsigned int.
4181 <---------30---------> <---------30--------->
4182 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
4183 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
4184 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
4187 #define SVAL_TAGMASK (3ULL << 62)
4189 static inline Bool SVal__isC ( SVal s ) {
4190 return (0ULL << 62) == (s & SVAL_TAGMASK);
4192 static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
4193 //tl_assert(VtsID__is_valid(rmini));
4194 //tl_assert(VtsID__is_valid(wmini));
4195 return (((ULong)rmini) << 32) | ((ULong)wmini);
4197 static inline VtsID SVal__unC_Rmin ( SVal s ) {
4198 tl_assert(SVal__isC(s));
4199 return (VtsID)(s >> 32);
4201 static inline VtsID SVal__unC_Wmin ( SVal s ) {
4202 tl_assert(SVal__isC(s));
4203 return (VtsID)(s & 0xFFFFFFFFULL);
4206 static inline Bool SVal__isA ( SVal s ) {
4207 return (2ULL << 62) == (s & SVAL_TAGMASK);
4209 __attribute__((unused))
4210 static inline SVal SVal__mkA ( void ) {
4211 return 2ULL << 62;
4214 /* Direct callback from lib_zsm. */
4215 static inline void SVal__rcinc ( SVal s ) {
4216 if (SVal__isC(s)) {
4217 VtsID__rcinc( SVal__unC_Rmin(s) );
4218 VtsID__rcinc( SVal__unC_Wmin(s) );
4222 /* Direct callback from lib_zsm. */
4223 static inline void SVal__rcdec ( SVal s ) {
4224 if (SVal__isC(s)) {
4225 VtsID__rcdec( SVal__unC_Rmin(s) );
4226 VtsID__rcdec( SVal__unC_Wmin(s) );
4230 static inline void *SVal2Ptr (SVal s)
4232 return (void*)(UWord)s;
4235 static inline SVal Ptr2SVal (void* ptr)
4237 return (SVal)(UWord)ptr;
4242 /////////////////////////////////////////////////////////
4243 // //
4244 // Change-event map2 //
4245 // //
4246 /////////////////////////////////////////////////////////
4248 /* This is in two parts:
4250 1. A hash table of RCECs. This is a set of reference-counted stack
4251 traces. When the reference count of a stack trace becomes zero,
4252 it is removed from the set and freed up. The intent is to have
4253 a set of stack traces which can be referred to from (2), but to
4254 only represent each one once. The set is indexed/searched by
4255 ordering on the stack trace vectors.
4257 2. A Hash table of OldRefs. These store information about each old
4258 ref that we need to record. Hash table key is the address of the
4259 location for which the information is recorded. For LRU
4260 purposes, each OldRef in the hash table is also on a doubly
4261 linked list maintaining the order in which the OldRef were most
4262 recently accessed.
4263 Each OldRef also maintains the stamp at which it was last accessed.
4264 With these stamps, we can quickly check which of 2 OldRef is the
4265 'newest', without having to scan the full list of LRU OldRef.
4267 The important part of an OldRef is, however, its acc component.
4268 This binds a TSW triple (thread, size, R/W) to an RCEC.
4270 We allocate a maximum of VG_(clo_conflict_cache_size) OldRef.
4271 Then we do exact LRU discarding. For each discarded OldRef we must
4272 of course decrement the reference count on the RCEC it
4273 refers to, in order that entries from (1) eventually get
4274 discarded too.
4277 static UWord stats__evm__lookup_found = 0;
4278 static UWord stats__evm__lookup_notfound = 0;
4280 static UWord stats__ctxt_eq_tsw_eq_rcec = 0;
4281 static UWord stats__ctxt_eq_tsw_neq_rcec = 0;
4282 static UWord stats__ctxt_neq_tsw_neq_rcec = 0;
4283 static UWord stats__ctxt_rcdec_calls = 0;
4284 static UWord stats__ctxt_rcec_gc_discards = 0;
4286 static UWord stats__ctxt_tab_curr = 0;
4287 static UWord stats__ctxt_tab_max = 0;
4289 static UWord stats__ctxt_tab_qs = 0;
4290 static UWord stats__ctxt_tab_cmps = 0;
4293 ///////////////////////////////////////////////////////
4294 //// Part (1): A hash table of RCECs
4297 //#define N_RCEC_TAB 98317 /* prime */
4298 #define N_RCEC_TAB 196613 /* prime */
4300 //////////// BEGIN RCEC pool allocator
4301 static PoolAlloc* rcec_pool_allocator;
4302 static RCEC* alloc_RCEC ( void ) {
4303 return VG_(allocEltPA) ( rcec_pool_allocator );
4306 static void free_RCEC ( RCEC* rcec ) {
4307 tl_assert(rcec->magic == RCEC_MAGIC);
4308 VG_(freeEltPA)( rcec_pool_allocator, rcec );
4310 //////////// END RCEC pool allocator
4312 static RCEC** contextTab = NULL; /* hash table of RCEC*s */
4314 /* Count of allocated RCEC having ref count > 0 */
4315 static UWord RCEC_referenced = 0;
4317 /* True if the frames of ec1 and ec2 are different. */
4318 static Bool RCEC__differs_by_frames ( RCEC* ec1, RCEC* ec2 ) {
4319 Word i;
4320 if (CHECK_CEM) {
4321 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
4322 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
4324 if (ec1->frames_hash != ec2->frames_hash) return True;
4325 for (i = 0; i < HG_(clo_history_backtrace_size); i++) {
4326 if (ec1->frames[i] != ec2->frames[i]) return True;
4328 return False;
4331 /* Dec the ref of this RCEC. */
4332 static void ctxt__rcdec ( RCEC* ec )
4334 stats__ctxt_rcdec_calls++;
4335 if (CHECK_CEM)
4336 tl_assert(ec && ec->magic == RCEC_MAGIC);
4337 tl_assert(ec->rc > 0);
4338 ec->rc--;
4339 if (ec->rc == 0)
4340 RCEC_referenced--;
4343 static void ctxt__rcinc ( RCEC* ec )
4345 if (CHECK_CEM)
4346 tl_assert(ec && ec->magic == RCEC_MAGIC);
4347 if (ec->rc == 0)
4348 RCEC_referenced++;
4349 ec->rc++;
4353 /* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
4354 move it one step closer to the front of the list, so as to make
4355 subsequent searches for it cheaper. */
4356 static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
4358 RCEC *ec0, *ec1, *ec2;
4359 if (ec == *headp)
4360 tl_assert(0); /* already at head of list */
4361 tl_assert(ec != NULL);
4362 ec0 = *headp;
4363 ec1 = NULL;
4364 ec2 = NULL;
4365 while (True) {
4366 if (ec0 == NULL || ec0 == ec) break;
4367 ec2 = ec1;
4368 ec1 = ec0;
4369 ec0 = ec0->next;
4371 tl_assert(ec0 == ec);
4372 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
4373 RCEC* tmp;
4374 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
4375 predecessor. Swap ec0 and ec1, that is, move ec0 one step
4376 closer to the start of the list. */
4377 tl_assert(ec2->next == ec1);
4378 tl_assert(ec1->next == ec0);
4379 tmp = ec0->next;
4380 ec2->next = ec0;
4381 ec0->next = ec1;
4382 ec1->next = tmp;
4384 else
4385 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
4386 /* it's second in the list. */
4387 tl_assert(*headp == ec1);
4388 tl_assert(ec1->next == ec0);
4389 ec1->next = ec0->next;
4390 ec0->next = ec1;
4391 *headp = ec0;
4396 /* Find the given RCEC in the tree, and return a pointer to it. Or,
4397 if not present, add the given one to the tree (by making a copy of
4398 it, so the caller can immediately deallocate the original) and
4399 return a pointer to the copy. The caller can safely have 'example'
4400 on its stack, since we will always return a pointer to a copy of
4401 it, not to the original. Note that the inserted node will have .rc
4402 of zero and so the caller must immediately increment it. */
4403 __attribute__((noinline))
4404 static RCEC* ctxt__find_or_add ( RCEC* example )
4406 UWord hent;
4407 RCEC* copy;
4409 if (CHECK_CEM) {
4410 /* Note that the single caller of ctxt__find_or_add always provides
4411 &thr->cached_rcec as argument. The sanity of thr->cached_rcec is always
4412 checked with a thread terminates. */
4413 tl_assert(example && example->magic == RCEC_MAGIC);
4414 tl_assert(example->rc == 0);
4417 /* Search the hash table to see if we already have it. */
4418 stats__ctxt_tab_qs++;
4419 hent = example->frames_hash % N_RCEC_TAB;
4420 copy = contextTab[hent];
4421 while (1) {
4422 if (!copy) break;
4423 if (CHECK_CEM)
4424 tl_assert(copy->magic == RCEC_MAGIC);
4425 stats__ctxt_tab_cmps++;
4426 if (!RCEC__differs_by_frames(copy, example)) break;
4427 copy = copy->next;
4430 if (copy) {
4431 tl_assert(copy != example);
4432 /* optimisation: if it's not at the head of its list, move 1
4433 step fwds, to make future searches cheaper */
4434 if (copy != contextTab[hent]) {
4435 move_RCEC_one_step_forward( &contextTab[hent], copy );
4437 } else {
4438 copy = alloc_RCEC();
4439 tl_assert(copy != example);
4440 *copy = *example;
4441 for (Word i = 0; i < HG_(clo_history_backtrace_size); i++)
4442 copy->frames[i] = example->frames[i];
4443 copy->next = contextTab[hent];
4444 contextTab[hent] = copy;
4445 stats__ctxt_tab_curr++;
4446 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
4447 stats__ctxt_tab_max = stats__ctxt_tab_curr;
4449 return copy;
4452 static inline UWord ROLW ( UWord w, Int n )
4454 Int bpw = 8 * sizeof(UWord);
4455 w = (w << n) | (w >> (bpw-n));
4456 return w;
4459 static UWord stats__cached_rcec_identical = 0;
4460 static UWord stats__cached_rcec_updated = 0;
4461 static UWord stats__cached_rcec_fresh = 0;
4462 static UWord stats__cached_rcec_diff = 0;
4463 static UWord stats__cached_rcec_diff_known_reason = 0;
4465 /* Check if the cached rcec in thr corresponds to the current
4466 stacktrace of the thread. Returns True if ok, False otherwise.
4467 This is just used for debugging the cached rcec logic, activated
4468 using --hg-sanity-flags=xx1xxx i.e. SCE_ACCESS flag.
4469 When this flag is activated, a call to this function will happen each time
4470 a stack trace is needed for a memory access. */
4471 __attribute__((noinline))
4472 static Bool check_cached_rcec_ok (Thr* thr, Addr previous_frame0)
4474 Bool ok = True;
4475 UInt i;
4476 UWord frames[HG_(clo_history_backtrace_size)];
4477 UWord sps[HG_(clo_history_backtrace_size)];
4478 UWord fps[HG_(clo_history_backtrace_size)];
4479 const DiEpoch cur_ep = VG_(current_DiEpoch)();
4481 for (i = 0; i < HG_(clo_history_backtrace_size); i++)
4482 frames[i] = sps[i] = fps[i] = 0;
4483 VG_(get_StackTrace)( thr->hgthread->coretid, &frames[0],
4484 HG_(clo_history_backtrace_size),
4485 &sps[0], &fps[0], 0);
4486 for (i = 0; i < HG_(clo_history_backtrace_size); i++) {
4487 if ( thr->cached_rcec.frames[i] != frames[i] ) {
4488 /* There are a bunch of "normal" reasons for which a stack
4489 derived from the cached rcec differs from frames. */
4490 const HChar *reason = NULL;
4492 /* Old linkers (e.g. RHEL5) gave no cfi unwind information in the PLT
4493 section (fix was added in binutils around June 2011).
4494 Without PLT unwind info, stacktrace in the PLT section are
4495 missing an entry. E.g. the cached stacktrace is:
4496 ==4463== at 0x2035C0: ___tls_get_addr (dl-tls.c:753)
4497 ==4463== by 0x33B7F9: __libc_thread_freeres
4498 (in /lib/libc-2.11.2.so)
4499 ==4463== by 0x39BA4F: start_thread (pthread_create.c:307)
4500 ==4463== by 0x2F107D: clone (clone.S:130)
4501 while the 'check stacktrace' is
4502 ==4463== at 0x2035C0: ___tls_get_addr (dl-tls.c:753)
4503 ==4463== by 0x33B82D: strerror_thread_freeres
4504 (in /lib/libc-2.11.2.so)
4505 ==4463== by 0x33B7F9: __libc_thread_freeres
4506 (in /lib/libc-2.11.2.so)
4507 ==4463== by 0x39BA4F: start_thread (pthread_create.c:307)
4508 ==4463== by 0x2F107D: clone (clone.S:130)
4509 No cheap/easy way to detect or fix that. */
4511 /* It seems that sometimes, the CFI unwind info looks wrong
4512 for a 'ret' instruction. E.g. here is the unwind info
4513 for a 'retq' on gcc20 (amd64, Debian 7)
4514 [0x4e3ddfe .. 0x4e3ddfe]: let cfa=oldSP+48 in RA=*(cfa+-8)
4515 SP=cfa+0 BP=*(cfa+-24)
4516 This unwind info looks doubtful, as the RA should be at oldSP.
4517 No easy way to detect this problem.
4518 This gives a difference between cached rcec and
4519 current stack trace: the cached rcec is correct. */
4521 /* When returning from main, unwind info becomes erratic.
4522 So, by default, only report errors for main and above,
4523 unless asked to show below main. */
4524 if (reason == NULL) {
4525 UInt fr_main;
4526 Vg_FnNameKind fr_kind = Vg_FnNameNormal;
4527 for (fr_main = 0;
4528 fr_main < HG_(clo_history_backtrace_size);
4529 fr_main++) {
4530 fr_kind = VG_(get_fnname_kind_from_IP)
4531 (cur_ep, frames[fr_main]);
4532 if (fr_kind == Vg_FnNameMain || fr_kind == Vg_FnNameBelowMain)
4533 break;
4535 UInt kh_main;
4536 Vg_FnNameKind kh_kind = Vg_FnNameNormal;
4537 for (kh_main = 0;
4538 kh_main < HG_(clo_history_backtrace_size);
4539 kh_main++) {
4540 kh_kind = VG_(get_fnname_kind_from_IP)
4541 (cur_ep, thr->cached_rcec.frames[kh_main]);
4542 if (kh_kind == Vg_FnNameMain || kh_kind == Vg_FnNameBelowMain)
4543 break;
4545 if (kh_main == fr_main
4546 && kh_kind == fr_kind
4547 && (kh_main < i || (kh_main == i
4548 && kh_kind == Vg_FnNameBelowMain))) {
4549 // found main or below main before the difference
4550 reason = "Below main";
4554 /* We have places where the stack is missing some internal
4555 pthread functions. For such stacktraces, GDB reports only
4556 one function, telling:
4557 #0 0xf7fa81fe in _L_unlock_669 ()
4558 from /lib/i386-linux-gnu/libpthread.so.0
4559 Backtrace stopped: previous frame identical to
4560 this frame (corrupt stack?)
4562 This is when sps and fps are identical.
4563 The cached stack trace is then
4564 ==3336== at 0x40641FE: _L_unlock_669
4565 (pthread_mutex_unlock.c:310)
4566 ==3336== by 0x40302BE: pthread_mutex_unlock
4567 (hg_intercepts.c:710)
4568 ==3336== by 0x80486AF: main (cond_timedwait_test.c:14)
4569 while the 'check stacktrace' is
4570 ==3336== at 0x40641FE: _L_unlock_669
4571 (pthread_mutex_unlock.c:310)
4572 ==3336== by 0x4064206: _L_unlock_669
4573 (pthread_mutex_unlock.c:310)
4574 ==3336== by 0x4064132: __pthread_mutex_unlock_usercnt
4575 (pthread_mutex_unlock.c:57)
4576 ==3336== by 0x40302BE: pthread_mutex_unlock
4577 (hg_intercepts.c:710)
4578 ==3336== by 0x80486AF: main (cond_timedwait_test.c:14) */
4579 if (reason == NULL) {
4580 if ((i > 0
4581 && sps[i] == sps[i-1] && fps[i] == fps[i-1])
4582 || (i < HG_(clo_history_backtrace_size)-1
4583 && sps[i] == sps[i+1] && fps[i] == fps[i+1])) {
4584 reason = "previous||next frame: identical sp and fp";
4587 if (reason == NULL) {
4588 if ((i > 0
4589 && fps[i] == fps[i-1])
4590 || (i < HG_(clo_history_backtrace_size)-1
4591 && fps[i] == fps[i+1])) {
4592 reason = "previous||next frame: identical fp";
4596 /* When we have a read or write 'in the middle of a push instruction',
4597 then the normal backtrace is not very good, while the helgrind
4598 stacktrace is better, as it undoes the not yet fully finished
4599 push instruction before getting the stacktrace. */
4600 if (reason == NULL && thr->hgthread->first_sp_delta != 0) {
4601 reason = "fixupSP probably needed for check stacktrace";
4604 /* Unwinding becomes hectic when running the exit handlers.
4605 None of GDB, cached stacktrace and check stacktrace corresponds.
4606 So, if we find __run_exit_handlers, ignore the difference. */
4607 if (reason == NULL) {
4608 const HChar *fnname;
4609 for (UInt f = 0; f < HG_(clo_history_backtrace_size); f++) {
4610 if (VG_(get_fnname)( cur_ep, frames[f], &fnname)
4611 && VG_(strcmp) ("__run_exit_handlers", fnname) == 0) {
4612 reason = "exit handlers";
4613 break;
4618 // Show what we have found for this difference
4619 if (reason == NULL) {
4620 ok = False;
4621 stats__cached_rcec_diff++;
4622 } else {
4623 ok = True;
4624 stats__cached_rcec_diff_known_reason++;
4626 if (!ok || VG_(clo_verbosity) > 2) {
4627 Bool save_show_below_main = VG_(clo_show_below_main);
4628 VG_(clo_show_below_main) = True;
4629 /* The below error msg reports an unexpected diff in 'frame %d'.
4630 The (maybe wrong) pc found in the cached stacktrace is
4631 'cached_pc %p' while an unwind gives the (maybe wrong)
4632 'check_pc %p'.
4633 After, 'previous_frame0 %p' tells where the cached stacktrace
4634 was taken.
4635 This is then followed by the full resulting cache stack trace
4636 and the full stack trace found doing unwind.
4637 Such a diff can have various origins:
4638 * a bug in the unwinder, when the cached stack trace was taken
4639 at 'previous_frame0'
4640 * a bug in the unwinder, when the check stack trace was taken
4641 (i.e. at current pc).
4642 * a missing 'invalidate cache stack trace' somewhere in the
4643 instructions between 'previous_frame0' and current_pc.
4644 To investigate the last case, typically, disass the range of
4645 instructions where an invalidate cached stack might miss. */
4646 VG_(printf)("%s diff tid %u frame %u "
4647 "cached_pc %p check_pc %p\n",
4648 reason ? reason : "unexpected",
4649 thr->hgthread->coretid,
4651 (void*)thr->cached_rcec.frames[i],
4652 (void*)frames[i]);
4653 VG_(printf)("cached stack trace previous_frame0 %p\n",
4654 (void*)previous_frame0);
4655 VG_(pp_StackTrace)(cur_ep, &previous_frame0, 1);
4656 VG_(printf)("resulting cached stack trace:\n");
4657 VG_(pp_StackTrace)(cur_ep, thr->cached_rcec.frames,
4658 HG_(clo_history_backtrace_size));
4659 VG_(printf)("check stack trace:\n");
4660 VG_(pp_StackTrace)(cur_ep, frames, HG_(clo_history_backtrace_size));
4662 VG_(show_sched_status) (False, // host_stacktrace
4663 False, // stack_usage
4664 False); // exited_threads
4665 if (VG_(clo_vgdb_error) == 1234567890) // HACK TO ALLOW TO DEBUG
4666 VG_(gdbserver) ( thr->hgthread->coretid );
4667 VG_(clo_show_below_main) = save_show_below_main;
4669 break; // Stop giving more errors for this stacktrace.
4672 return ok;
4675 __attribute__((noinline))
4676 static RCEC* get_RCEC ( Thr* thr )
4678 UInt i;
4679 UWord hash;
4680 Addr previous_frame0 = 0; // Assignment needed to silence gcc
4681 RCEC *res;
4682 const Bool thr_cached_rcec_valid = cached_rcec_valid(thr);
4683 const Addr cur_ip = VG_(get_IP)(thr->hgthread->coretid);
4685 if (DEBUG_CACHED_RCEC)
4686 VG_(printf)("get rcec tid %u at IP %p SP %p"
4687 " first_sp_delta %ld cached valid %d\n",
4688 thr->hgthread->coretid,
4689 (void*)cur_ip,
4690 (void*)VG_(get_SP)(thr->hgthread->coretid),
4691 thr->hgthread->first_sp_delta, thr_cached_rcec_valid);
4693 /* If we have a valid cached rcec, derive the new rcec from the cached one
4694 and update the cached one.
4695 Otherwise, compute a fresh rcec. */
4697 if (thr_cached_rcec_valid) {
4698 /* Update the stacktrace of the cached rcec with the current IP */
4699 previous_frame0 = thr->cached_rcec.frames[0];
4700 thr->cached_rcec.frames[0] = cur_ip;
4702 # if defined(VGP_x86_linux)
4703 // See m_stacktrace.c kludge
4704 extern Addr VG_(client__dl_sysinfo_int80);
4705 /// #include pub_core_clientstate needed for the above ????
4706 /// or move the above into a pub_tool_??? tool_stacktrace.h maybe ????
4707 if (VG_(client__dl_sysinfo_int80) != 0 /* we know its address */
4708 && cur_ip >= VG_(client__dl_sysinfo_int80)
4709 && cur_ip < VG_(client__dl_sysinfo_int80)+3
4711 thr->cached_rcec.frames[0]
4712 = (ULong) *(Addr*)(UWord)VG_(get_SP)(thr->hgthread->coretid);
4714 # endif
4716 if (previous_frame0 == thr->cached_rcec.frames[0])
4717 stats__cached_rcec_identical++;
4718 else
4719 stats__cached_rcec_updated++;
4720 } else {
4721 /* Compute a fresh stacktrace. */
4722 main_get_stacktrace( thr, &thr->cached_rcec.frames[0],
4723 HG_(clo_history_backtrace_size) );
4724 if (DEBUG_CACHED_RCEC) {
4725 Bool save_show_below_main = VG_(clo_show_below_main);
4726 VG_(clo_show_below_main) = True;
4727 VG_(printf)("caching stack trace:\n");
4728 VG_(pp_StackTrace)(VG_(current_DiEpoch)(),
4729 &thr->cached_rcec.frames[0],
4730 HG_(clo_history_backtrace_size));
4731 VG_(clo_show_below_main) = save_show_below_main;
4733 stats__cached_rcec_fresh++;
4736 hash = 0;
4737 for (i = 0; i < HG_(clo_history_backtrace_size); i++) {
4738 hash ^= thr->cached_rcec.frames[i];
4739 hash = ROLW(hash, 19);
4741 thr->cached_rcec.frames_hash = hash;
4742 res = ctxt__find_or_add( &thr->cached_rcec );
4744 if (UNLIKELY(HG_(clo_sanity_flags) & SCE_ACCESS)
4745 && thr_cached_rcec_valid) {
4746 /* In case the cached and check differ, invalidate the cached rcec.
4747 We have less duplicated diffs reported afterwards. */
4748 if (!check_cached_rcec_ok (thr, previous_frame0))
4749 set_cached_rcec_validity(thr, False);
4750 } else {
4751 if (HG_(clo_delta_stacktrace) && !thr_cached_rcec_valid)
4752 set_cached_rcec_validity(thr, True);
4755 return res;
4758 ///////////////////////////////////////////////////////
4759 //// Part (2):
4760 /// A hashtable guest-addr -> OldRef, that refers to (1)
4761 /// Note: we use the guest address as key. This means that the entries
4762 /// for multiple threads accessing the same address will land in the same
4763 /// bucket. It might be nice to have a better distribution of the
4764 /// OldRef in the hashtable by using ask key the guestaddress ^ tsw.
4765 /// The problem is that when a race is reported on a ga, we need to retrieve
4766 /// efficiently the accesses to ga by other threads, only using the ga.
4767 /// Measurements on firefox have shown that the chain length is reasonable.
4769 /* Records an access: a thread, a context (size & writeness) and the
4770 number of held locks. The size (1,2,4,8) is stored as is in szB.
4771 Note that szB uses more bits than needed to store a size up to 8.
4772 This allows to use a TSW as a fully initialised UInt e.g. in
4773 cmp_oldref_tsw. If needed, a more compact representation of szB
4774 can be done (e.g. use only 4 bits, or use only 2 bits and encode the
4775 size (1,2,4,8) as 00 = 1, 01 = 2, 10 = 4, 11 = 8. */
4776 typedef
4777 struct {
4778 UInt thrid : SCALARTS_N_THRBITS;
4779 UInt szB : 32 - SCALARTS_N_THRBITS - 1;
4780 UInt isW : 1;
4781 } TSW; // Thread+Size+Writeness
4782 typedef
4783 struct {
4784 TSW tsw;
4785 WordSetID locksHeldW;
4786 RCEC* rcec;
4788 Thr_n_RCEC;
4790 typedef
4791 struct OldRef {
4792 struct OldRef *ht_next; // to link hash table nodes together.
4793 UWord ga; // hash_table key, == address for which we record an access.
4794 struct OldRef *prev; // to refs older than this one
4795 struct OldRef *next; // to refs newer that this one
4796 UWord stamp; // allows to order (by time of access) 2 OldRef
4797 Thr_n_RCEC acc;
4799 OldRef;
4801 /* Returns the or->tsw as an UInt */
4802 static inline UInt oldref_tsw (const OldRef* or)
4804 return *(const UInt*)(&or->acc.tsw);
4807 /* Compare the tsw component for 2 OldRef.
4808 Used for OldRef hashtable (which already verifies equality of the
4809 'key' part. */
4810 static Word cmp_oldref_tsw (const void* node1, const void* node2 )
4812 const UInt tsw1 = oldref_tsw(node1);
4813 const UInt tsw2 = oldref_tsw(node2);
4815 if (tsw1 < tsw2) return -1;
4816 if (tsw1 > tsw2) return 1;
4817 return 0;
4821 //////////// BEGIN OldRef pool allocator
4822 static PoolAlloc* oldref_pool_allocator;
4823 // Note: We only allocate elements in this pool allocator, we never free them.
4824 // We stop allocating elements at VG_(clo_conflict_cache_size).
4825 //////////// END OldRef pool allocator
4827 static OldRef mru;
4828 static OldRef lru;
4829 // A double linked list, chaining all OldREf in a mru/lru order.
4830 // mru/lru are sentinel nodes.
4831 // Whenever an oldref is re-used, its position is changed as the most recently
4832 // used (i.e. pointed to by mru.prev).
4833 // When a new oldref is needed, it is allocated from the pool
4834 // if we have not yet reached --conflict-cache-size.
4835 // Otherwise, if all oldref have already been allocated,
4836 // the least recently used (i.e. pointed to by lru.next) is re-used.
4837 // When an OldRef is used, it is moved as the most recently used entry
4838 // (i.e. pointed to by mru.prev).
4840 // Removes r from the double linked list
4841 // Note: we do not need to test for special cases such as
4842 // NULL next or prev pointers, because we have sentinel nodes
4843 // at both sides of the list. So, a node is always forward and
4844 // backward linked.
4845 static inline void OldRef_unchain(OldRef *r)
4847 r->next->prev = r->prev;
4848 r->prev->next = r->next;
4851 // Insert new as the newest OldRef
4852 // Similarly to OldRef_unchain, no need to test for NULL
4853 // pointers, as e.g. mru.prev is always guaranteed to point
4854 // to a non NULL node (lru when the list is empty).
4855 static inline void OldRef_newest(OldRef *new)
4857 new->next = &mru;
4858 new->prev = mru.prev;
4859 mru.prev = new;
4860 new->prev->next = new;
4864 static VgHashTable* oldrefHT = NULL; /* Hash table* OldRef* */
4865 static UWord oldrefHTN = 0; /* # elems in oldrefHT */
4866 /* Note: the nr of ref in the oldrefHT will always be equal to
4867 the nr of elements that were allocated from the OldRef pool allocator
4868 as we never free an OldRef : we just re-use them. */
4871 /* allocates a new OldRef or re-use the lru one if all allowed OldRef
4872 have already been allocated. */
4873 static OldRef* alloc_or_reuse_OldRef ( void )
4875 if (oldrefHTN < HG_(clo_conflict_cache_size)) {
4876 oldrefHTN++;
4877 return VG_(allocEltPA) ( oldref_pool_allocator );
4878 } else {
4879 OldRef *oldref_ht;
4880 OldRef *oldref = lru.next;
4882 OldRef_unchain(oldref);
4883 oldref_ht = VG_(HT_gen_remove) (oldrefHT, oldref, cmp_oldref_tsw);
4884 tl_assert (oldref == oldref_ht);
4885 ctxt__rcdec( oldref->acc.rcec );
4886 return oldref;
4891 inline static UInt min_UInt ( UInt a, UInt b ) {
4892 return a < b ? a : b;
4895 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
4896 first interval is lower, 1 if the first interval is higher, and 0
4897 if there is any overlap. Redundant paranoia with casting is there
4898 following what looked distinctly like a bug in gcc-4.1.2, in which
4899 some of the comparisons were done signedly instead of
4900 unsignedly. */
4901 /* Copied from exp-ptrcheck/sg_main.c */
4902 static inline Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
4903 Addr a2, SizeT n2 ) {
4904 UWord a1w = (UWord)a1;
4905 UWord n1w = (UWord)n1;
4906 UWord a2w = (UWord)a2;
4907 UWord n2w = (UWord)n2;
4908 tl_assert(n1w > 0 && n2w > 0);
4909 if (a1w + n1w <= a2w) return -1L;
4910 if (a2w + n2w <= a1w) return 1L;
4911 return 0;
4914 static UWord event_map_stamp = 0; // Used to stamp each OldRef when touched.
4916 static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
4918 OldRef example;
4919 OldRef* ref;
4920 RCEC* rcec;
4922 tl_assert(thr);
4923 ThrID thrid = thr->thrid;
4924 tl_assert(thrid != 0); /* zero is used to denote an empty slot. */
4926 WordSetID locksHeldW = thr->hgthread->locksetW;
4928 rcec = get_RCEC( thr );
4930 /* Look in the oldrefHT to see if we already have a record for this
4931 address/thr/sz/isW. */
4932 example.ga = a;
4933 example.acc.tsw = (TSW) {.thrid = thrid,
4934 .szB = szB,
4935 .isW = (UInt)(isW & 1)};
4936 ref = VG_(HT_gen_lookup) (oldrefHT, &example, cmp_oldref_tsw);
4938 if (ref) {
4939 /* We already have a record for this address and this (thrid, R/W,
4940 size) triple. */
4941 tl_assert (ref->ga == a);
4943 /* thread 'thr' has an entry. Update its RCEC, if it differs. */
4944 if (rcec == ref->acc.rcec)
4945 stats__ctxt_eq_tsw_eq_rcec++;
4946 else {
4947 stats__ctxt_eq_tsw_neq_rcec++;
4948 ctxt__rcdec( ref->acc.rcec );
4949 ctxt__rcinc(rcec);
4950 ref->acc.rcec = rcec;
4952 tl_assert(ref->acc.tsw.thrid == thrid);
4953 /* Update the stamp, RCEC and the W-held lockset. */
4954 ref->stamp = event_map_stamp;
4955 ref->acc.locksHeldW = locksHeldW;
4957 OldRef_unchain(ref);
4958 OldRef_newest(ref);
4960 } else {
4961 tl_assert (szB == 4 || szB == 8 ||szB == 1 || szB == 2);
4962 // We only need to check the size the first time we insert a ref.
4963 // Check for most frequent cases first
4964 // Note: we could support a szB up to 1 << (32 - SCALARTS_N_THRBITS - 1)
4966 /* We don't have a record for this address+triple. Create a new one. */
4967 stats__ctxt_neq_tsw_neq_rcec++;
4968 ref = alloc_or_reuse_OldRef();
4969 ref->ga = a;
4970 ref->acc.tsw = (TSW) {.thrid = thrid,
4971 .szB = szB,
4972 .isW = (UInt)(isW & 1)};
4973 ref->stamp = event_map_stamp;
4974 ref->acc.locksHeldW = locksHeldW;
4975 ref->acc.rcec = rcec;
4976 ctxt__rcinc(rcec);
4978 VG_(HT_add_node) ( oldrefHT, ref );
4979 OldRef_newest (ref);
4981 event_map_stamp++;
4985 /* Extract info from the conflicting-access machinery.
4986 Returns the most recent conflicting access with thr/[a, a+szB[/isW. */
4987 Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
4988 /*OUT*/Thr** resThr,
4989 /*OUT*/SizeT* resSzB,
4990 /*OUT*/Bool* resIsW,
4991 /*OUT*/WordSetID* locksHeldW,
4992 Thr* thr, Addr a, SizeT szB, Bool isW )
4994 Word i, j;
4995 OldRef *ref = NULL;
4996 SizeT ref_szB = 0;
4998 OldRef *cand_ref;
4999 SizeT cand_ref_szB;
5000 Addr cand_a;
5002 Addr toCheck[15];
5003 Int nToCheck = 0;
5005 tl_assert(thr);
5006 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
5008 ThrID thrid = thr->thrid;
5010 toCheck[nToCheck++] = a;
5011 for (i = -7; i < (Word)szB; i++) {
5012 if (i != 0)
5013 toCheck[nToCheck++] = a + i;
5015 tl_assert(nToCheck <= 15);
5017 /* Now see if we can find a suitable matching event for
5018 any of the addresses in toCheck[0 .. nToCheck-1]. */
5019 for (j = 0; j < nToCheck; j++) {
5021 cand_a = toCheck[j];
5022 // VG_(printf)("test %ld %p\n", j, cand_a);
5024 /* Find the first HT element for this address.
5025 We might have several of these. They will be linked via ht_next.
5026 We however need to check various elements as the list contains
5027 all elements that map to the same bucket. */
5028 for (cand_ref = VG_(HT_lookup)( oldrefHT, cand_a );
5029 cand_ref; cand_ref = cand_ref->ht_next) {
5030 if (cand_ref->ga != cand_a)
5031 /* OldRef for another address in this HT bucket. Ignore. */
5032 continue;
5034 if (cand_ref->acc.tsw.thrid == thrid)
5035 /* This is an access by the same thread, but we're only
5036 interested in accesses from other threads. Ignore. */
5037 continue;
5039 if ((!cand_ref->acc.tsw.isW) && (!isW))
5040 /* We don't want to report a read racing against another
5041 read; that's stupid. So in this case move on. */
5042 continue;
5044 cand_ref_szB = cand_ref->acc.tsw.szB;
5045 if (cmp_nonempty_intervals(a, szB, cand_a, cand_ref_szB) != 0)
5046 /* No overlap with the access we're asking about. Ignore. */
5047 continue;
5049 /* We have a match. Keep this match if it is newer than
5050 the previous match. Note that stamp are Unsigned Words, and
5051 for long running applications, event_map_stamp might have cycled.
5052 So, 'roll' each stamp using event_map_stamp to have the
5053 stamps in the good order, in case event_map_stamp recycled. */
5054 if (!ref
5055 || (ref->stamp - event_map_stamp)
5056 < (cand_ref->stamp - event_map_stamp)) {
5057 ref = cand_ref;
5058 ref_szB = cand_ref_szB;
5062 if (ref) {
5063 /* return with success */
5064 Int n, maxNFrames;
5065 RCEC* ref_rcec = ref->acc.rcec;
5066 tl_assert(ref->acc.tsw.thrid);
5067 tl_assert(ref_rcec);
5068 tl_assert(ref_rcec->magic == RCEC_MAGIC);
5069 tl_assert(ref_szB >= 1);
5070 /* Count how many non-zero frames we have. */
5071 maxNFrames = min_UInt(HG_(clo_history_backtrace_size),
5072 VG_(clo_backtrace_size));
5073 for (n = 0; n < maxNFrames; n++) {
5074 if (0 == ref_rcec->frames[n]) break;
5076 *resEC = VG_(make_ExeContext_from_StackTrace)(&ref_rcec->frames[0],
5078 *resThr = Thr__from_ThrID(ref->acc.tsw.thrid);
5079 *resSzB = ref_szB;
5080 *resIsW = ref->acc.tsw.isW;
5081 *locksHeldW = ref->acc.locksHeldW;
5082 stats__evm__lookup_found++;
5083 return True;
5086 /* consider next address in toCheck[] */
5087 } /* for (j = 0; j < nToCheck; j++) */
5089 /* really didn't find anything. */
5090 stats__evm__lookup_notfound++;
5091 return False;
5095 void libhb_event_map_access_history ( Addr a, SizeT szB, Access_t fn )
5097 OldRef *ref = lru.next;
5098 SizeT ref_szB;
5099 Int n;
5101 while (ref != &mru) {
5102 ref_szB = ref->acc.tsw.szB;
5103 if (cmp_nonempty_intervals(a, szB, ref->ga, ref_szB) == 0) {
5104 RCEC* ref_rcec = ref->acc.rcec;
5105 for (n = 0; n < HG_(clo_history_backtrace_size); n++) {
5106 if (0 == ref_rcec->frames[n]) {
5107 break;
5110 (*fn)(&ref_rcec->frames[0], n,
5111 Thr__from_ThrID(ref->acc.tsw.thrid),
5112 ref->ga,
5113 ref_szB,
5114 ref->acc.tsw.isW,
5115 ref->acc.locksHeldW);
5117 tl_assert (ref->next == &mru
5118 || ((ref->stamp - event_map_stamp)
5119 < ref->next->stamp - event_map_stamp));
5120 ref = ref->next;
5124 static void event_map_init ( void )
5126 Word i;
5128 /* Context (RCEC) pool allocator */
5129 rcec_pool_allocator
5130 = VG_(newPA) (
5131 sizeof(RCEC) + 2 * HG_(clo_history_backtrace_size) * sizeof(UWord),
5132 1000 /* RCECs per pool */,
5133 HG_(zalloc),
5134 "libhb.event_map_init.1 (RCEC pools)",
5135 HG_(free)
5138 /* Context table */
5139 tl_assert(!contextTab);
5140 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
5141 N_RCEC_TAB * sizeof(RCEC*) );
5142 for (i = 0; i < N_RCEC_TAB; i++)
5143 contextTab[i] = NULL;
5145 /* Oldref pool allocator */
5146 oldref_pool_allocator = VG_(newPA)(
5147 sizeof(OldRef),
5148 1000 /* OldRefs per pool */,
5149 HG_(zalloc),
5150 "libhb.event_map_init.3 (OldRef pools)",
5151 HG_(free)
5154 /* Oldref hashtable */
5155 tl_assert(!oldrefHT);
5156 oldrefHT = VG_(HT_construct) ("libhb.event_map_init.4 (oldref hashtable)");
5158 oldrefHTN = 0;
5159 mru.prev = &lru;
5160 mru.next = NULL;
5161 lru.prev = NULL;
5162 lru.next = &mru;
5163 mru.acc = (Thr_n_RCEC) {.tsw = {.thrid = 0,
5164 .szB = 0,
5165 .isW = 0},
5166 .locksHeldW = 0,
5167 .rcec = NULL};
5168 lru.acc = mru.acc;
5171 static void event_map__check_reference_counts ( void )
5173 RCEC* rcec;
5174 OldRef* oldref;
5175 Word i;
5176 UWord nEnts = 0;
5178 /* Set the 'check' reference counts to zero. Also, optionally
5179 check that the real reference counts are non-zero. We allow
5180 these to fall to zero before a GC, but the GC must get rid of
5181 all those that are zero, hence none should be zero after a
5182 GC. */
5183 for (i = 0; i < N_RCEC_TAB; i++) {
5184 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
5185 nEnts++;
5186 tl_assert(rcec);
5187 tl_assert(rcec->magic == RCEC_MAGIC);
5188 rcec->rcX = 0;
5192 /* check that the stats are sane */
5193 tl_assert(nEnts == stats__ctxt_tab_curr);
5194 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
5196 /* visit all the referencing points, inc check ref counts */
5197 VG_(HT_ResetIter)( oldrefHT );
5198 oldref = VG_(HT_Next)( oldrefHT );
5199 while (oldref) {
5200 tl_assert (oldref->acc.tsw.thrid);
5201 tl_assert (oldref->acc.rcec);
5202 tl_assert (oldref->acc.rcec->magic == RCEC_MAGIC);
5203 oldref->acc.rcec->rcX++;
5204 oldref = VG_(HT_Next)( oldrefHT );
5207 /* compare check ref counts with actual */
5208 for (i = 0; i < N_RCEC_TAB; i++) {
5209 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
5210 tl_assert(rcec->rc == rcec->rcX);
5215 __attribute__((noinline))
5216 static void do_RCEC_GC ( void )
5218 UInt i;
5220 if (VG_(clo_stats)) {
5221 static UInt ctr = 1;
5222 VG_(message)(Vg_DebugMsg,
5223 "libhb: RCEC GC: #%u %lu slots,"
5224 " %lu cur ents(ref'd %lu),"
5225 " %lu max ents\n",
5226 ctr++,
5227 (UWord)N_RCEC_TAB,
5228 stats__ctxt_tab_curr, RCEC_referenced,
5229 stats__ctxt_tab_max );
5231 tl_assert (stats__ctxt_tab_curr > RCEC_referenced);
5233 /* Throw away all RCECs with zero reference counts */
5234 for (i = 0; i < N_RCEC_TAB; i++) {
5235 RCEC** pp = &contextTab[i];
5236 RCEC* p = *pp;
5237 while (p) {
5238 if (p->rc == 0) {
5239 *pp = p->next;
5240 free_RCEC(p);
5241 p = *pp;
5242 tl_assert(stats__ctxt_tab_curr > 0);
5243 stats__ctxt_rcec_gc_discards++;
5244 stats__ctxt_tab_curr--;
5245 } else {
5246 pp = &p->next;
5247 p = p->next;
5252 tl_assert (stats__ctxt_tab_curr == RCEC_referenced);
5255 /////////////////////////////////////////////////////////
5256 // //
5257 // Core MSM //
5258 // //
5259 /////////////////////////////////////////////////////////
5261 /* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
5262 Nov 08, and again after [...],
5263 June 09. */
5265 static ULong stats__msmcread = 0;
5266 static ULong stats__msmcread_change = 0;
5267 static ULong stats__msmcwrite = 0;
5268 static ULong stats__msmcwrite_change = 0;
5270 /* Some notes on the H1 history mechanism:
5272 Transition rules are:
5274 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw)
5275 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
5277 After any access by a thread T to a location L, L's constraint pair
5278 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
5280 After a race by thread T conflicting with some previous access by
5281 some other thread U, for a location with constraint (before
5282 processing the later access) (Cr,Cw), then Cw[U] is the segment in
5283 which the previously access lies.
5285 Hence in record_race_info, we pass in Cfailed and Kfailed, which
5286 are compared so as to find out which thread(s) this access
5287 conflicts with. Once that is established, we also require the
5288 pre-update Cw for the location, so we can index into it for those
5289 threads, to get the scalar clock values for the point at which the
5290 former accesses were made. (In fact we only bother to do any of
5291 this for an arbitrarily chosen one of the conflicting threads, as
5292 that's simpler, it avoids flooding the user with vast amounts of
5293 mostly useless information, and because the program is wrong if it
5294 contains any races at all -- so we don't really need to show all
5295 conflicting access pairs initially, so long as we only show none if
5296 none exist).
5300 That requires the auxiliary proof that
5302 (Cr `join` Kw)[T] == Kw[T]
5304 Why should that be true? Because for any thread T, Kw[T] >= the
5305 scalar clock value for T known by any other thread. In other
5306 words, because T's value for its own scalar clock is at least as up
5307 to date as the value for it known by any other thread (that is true
5308 for both the R- and W- scalar clocks). Hence no other thread will
5309 be able to feed in a value for that element (indirectly via a
5310 constraint) which will exceed Kw[T], and hence the join cannot
5311 cause that particular element to advance.
5314 __attribute__((noinline))
5315 static void record_race_info ( Thr* acc_thr,
5316 Addr acc_addr, SizeT szB, Bool isWrite,
5317 VtsID Cfailed,
5318 VtsID Kfailed,
5319 VtsID Cw )
5321 /* Call here to report a race. We just hand it onwards to
5322 HG_(record_error_Race). If that in turn discovers that the
5323 error is going to be collected, then, at history_level 2, that
5324 queries the conflicting-event map. The alternative would be to
5325 query it right here. But that causes a lot of pointless queries
5326 for errors which will shortly be discarded as duplicates, and
5327 can become a performance overhead; so we defer the query until
5328 we know the error is not a duplicate. */
5330 /* Stacks for the bounds of the (or one of the) conflicting
5331 segment(s). These are only set at history_level 1. */
5332 ExeContext* hist1_seg_start = NULL;
5333 ExeContext* hist1_seg_end = NULL;
5334 Thread* hist1_conf_thr = NULL;
5336 tl_assert(acc_thr);
5337 tl_assert(acc_thr->hgthread);
5338 tl_assert(acc_thr->hgthread->hbthr == acc_thr);
5339 tl_assert(HG_(clo_history_level) <= 2);
5341 if (HG_(clo_history_level) == 1) {
5342 Bool found;
5343 Word firstIx, lastIx;
5344 ULong_n_EC key;
5346 /* At history_level 1, we must round up the relevant stack-pair
5347 for the conflicting segment right now. This is because
5348 deferring it is complex; we can't (easily) put Kfailed and
5349 Cfailed into the XError and wait for later without
5350 getting tied up in difficulties with VtsID reference
5351 counting. So just do it now. */
5352 Thr* confThr;
5353 ULong confTym = 0;
5354 /* Which thread are we in conflict with? There may be more than
5355 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
5356 (in fact it's the one with the lowest Thr* value). */
5357 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
5358 /* This must exist! since if it was NULL then there's no
5359 conflict (semantics of return value of
5360 VtsID__findFirst_notLEQ), and msmc{read,write}, which has
5361 called us, just checked exactly this -- that there was in
5362 fact a race. */
5363 tl_assert(confThr);
5365 /* Get the scalar clock value that the conflicting thread
5366 introduced into the constraint. A careful examination of the
5367 base machine rules shows that this must be the same as the
5368 conflicting thread's scalar clock when it created this
5369 constraint. Hence we know the scalar clock of the
5370 conflicting thread when the conflicting access was made. */
5371 confTym = VtsID__indexAt( Cfailed, confThr );
5373 /* Using this scalar clock, index into the conflicting thread's
5374 collection of stack traces made each time its vector clock
5375 (hence its scalar clock) changed. This gives the stack
5376 traces at the start and end of the conflicting segment (well,
5377 as per comment just above, of one of the conflicting
5378 segments, if there are more than one). */
5379 key.ull = confTym;
5380 key.ec = NULL;
5381 /* tl_assert(confThr); -- asserted just above */
5382 tl_assert(confThr->local_Kws_n_stacks);
5383 firstIx = lastIx = 0;
5384 found = VG_(lookupXA_UNSAFE)(
5385 confThr->local_Kws_n_stacks,
5386 &key, &firstIx, &lastIx,
5387 (XACmpFn_t)cmp__ULong_n_EC__by_ULong
5389 if (0) VG_(printf)("record_race_info %u %u %u confThr %p "
5390 "confTym %llu found %d (%ld,%ld)\n",
5391 Cfailed, Kfailed, Cw,
5392 confThr, confTym, found, firstIx, lastIx);
5393 /* We can't indefinitely collect stack traces at VTS
5394 transitions, since we'd eventually run out of memory. Hence
5395 note_local_Kw_n_stack_for will eventually throw away old
5396 ones, which in turn means we might fail to find index value
5397 confTym in the array. */
5398 if (found) {
5399 ULong_n_EC *pair_start, *pair_end;
5400 pair_start
5401 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
5402 hist1_seg_start = pair_start->ec;
5403 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
5404 pair_end
5405 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
5406 lastIx+1 );
5407 /* from properties of VG_(lookupXA) and the comparison fn used: */
5408 tl_assert(pair_start->ull < pair_end->ull);
5409 hist1_seg_end = pair_end->ec;
5410 /* Could do a bit better here. It may be that pair_end
5411 doesn't have a stack, but the following entries in the
5412 array have the same scalar Kw and to have a stack. So
5413 we should search a bit further along the array than
5414 lastIx+1 if hist1_seg_end is NULL. */
5415 } else {
5416 if (!confThr->llexit_done)
5417 hist1_seg_end = main_get_EC( confThr );
5419 // seg_start could be NULL iff this is the first stack in the thread
5420 //if (seg_start) VG_(pp_ExeContext)(seg_start);
5421 //if (seg_end) VG_(pp_ExeContext)(seg_end);
5422 hist1_conf_thr = confThr->hgthread;
5426 HG_(record_error_Race)( acc_thr->hgthread, acc_addr,
5427 szB, isWrite,
5428 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
5431 static Bool is_sane_SVal_C ( SVal sv ) {
5432 Bool leq;
5433 if (!SVal__isC(sv)) return True;
5434 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
5435 return leq;
5439 /* Compute new state following a read */
5440 static inline SVal msmcread ( SVal svOld,
5441 /* The following are only needed for
5442 creating error reports. */
5443 Thr* acc_thr,
5444 Addr acc_addr, SizeT szB )
5446 SVal svNew = SVal_INVALID;
5447 stats__msmcread++;
5449 /* Redundant sanity check on the constraints */
5450 if (CHECK_MSM) {
5451 tl_assert(is_sane_SVal_C(svOld));
5454 if (LIKELY(SVal__isC(svOld))) {
5455 VtsID tviR = acc_thr->viR;
5456 VtsID tviW = acc_thr->viW;
5457 VtsID rmini = SVal__unC_Rmin(svOld);
5458 VtsID wmini = SVal__unC_Wmin(svOld);
5459 Bool leq = VtsID__cmpLEQ(rmini,tviR);
5460 if (LIKELY(leq)) {
5461 /* no race */
5462 /* Note: RWLOCK subtlety: use tviW, not tviR */
5463 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5464 goto out;
5465 } else {
5466 /* assert on sanity of constraints. */
5467 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5468 tl_assert(leqxx);
5469 // same as in non-race case
5470 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5471 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
5472 rmini, /* Cfailed */
5473 tviR, /* Kfailed */
5474 wmini /* Cw */ );
5475 goto out;
5478 if (SVal__isA(svOld)) {
5479 /* reading no-access memory (sigh); leave unchanged */
5480 /* check for no pollution */
5481 tl_assert(svOld == SVal_NOACCESS);
5482 svNew = SVal_NOACCESS;
5483 goto out;
5485 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
5486 tl_assert(0);
5488 out:
5489 if (CHECK_MSM) {
5490 tl_assert(is_sane_SVal_C(svNew));
5492 if (UNLIKELY(svNew != svOld)) {
5493 tl_assert(svNew != SVal_INVALID);
5494 if (HG_(clo_history_level) >= 2
5495 && SVal__isC(svOld) && SVal__isC(svNew)) {
5496 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
5497 stats__msmcread_change++;
5500 return svNew;
5504 /* Compute new state following a write */
5505 static inline SVal msmcwrite ( SVal svOld,
5506 /* The following are only needed for
5507 creating error reports. */
5508 Thr* acc_thr,
5509 Addr acc_addr, SizeT szB )
5511 SVal svNew = SVal_INVALID;
5512 stats__msmcwrite++;
5514 /* Redundant sanity check on the constraints */
5515 if (CHECK_MSM) {
5516 tl_assert(is_sane_SVal_C(svOld));
5519 if (LIKELY(SVal__isC(svOld))) {
5520 VtsID tviW = acc_thr->viW;
5521 VtsID wmini = SVal__unC_Wmin(svOld);
5522 Bool leq = VtsID__cmpLEQ(wmini,tviW);
5523 if (LIKELY(leq)) {
5524 /* no race */
5525 svNew = SVal__mkC( tviW, tviW );
5526 goto out;
5527 } else {
5528 VtsID rmini = SVal__unC_Rmin(svOld);
5529 /* assert on sanity of constraints. */
5530 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5531 tl_assert(leqxx);
5532 // same as in non-race case
5533 // proof: in the non-race case, we have
5534 // rmini <= wmini (invar on constraints)
5535 // tviW <= tviR (invar on thread clocks)
5536 // wmini <= tviW (from run-time check)
5537 // hence from transitivity of <= we have
5538 // rmini <= wmini <= tviW
5539 // and so join(rmini,tviW) == tviW
5540 // and join(wmini,tviW) == tviW
5541 // qed.
5542 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
5543 VtsID__join2(wmini, tviW) );
5544 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
5545 wmini, /* Cfailed */
5546 tviW, /* Kfailed */
5547 wmini /* Cw */ );
5548 goto out;
5551 if (SVal__isA(svOld)) {
5552 /* writing no-access memory (sigh); leave unchanged */
5553 /* check for no pollution */
5554 tl_assert(svOld == SVal_NOACCESS);
5555 svNew = SVal_NOACCESS;
5556 goto out;
5558 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
5559 tl_assert(0);
5561 out:
5562 if (CHECK_MSM) {
5563 tl_assert(is_sane_SVal_C(svNew));
5565 if (UNLIKELY(svNew != svOld)) {
5566 tl_assert(svNew != SVal_INVALID);
5567 if (HG_(clo_history_level) >= 2
5568 && SVal__isC(svOld) && SVal__isC(svNew)) {
5569 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
5570 stats__msmcwrite_change++;
5573 return svNew;
5577 /////////////////////////////////////////////////////////
5578 // //
5579 // Apply core MSM to specific memory locations //
5580 // //
5581 /////////////////////////////////////////////////////////
5583 /*------------- ZSM accesses: 8 bit sapply ------------- */
5585 static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
5586 CacheLine* cl;
5587 UWord cloff, tno, toff;
5588 SVal svOld, svNew;
5589 UShort descr;
5590 stats__cline_cread08s++;
5591 cl = get_cacheline(a);
5592 cloff = get_cacheline_offset(a);
5593 tno = get_treeno(a);
5594 toff = get_tree_offset(a); /* == 0 .. 7 */
5595 descr = cl->descrs[tno];
5596 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5597 SVal* tree = &cl->svals[tno << 3];
5598 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5599 if (CHECK_ZSM)
5600 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5602 svOld = cl->svals[cloff];
5603 svNew = msmcread( svOld, thr,a,1 );
5604 if (CHECK_ZSM)
5605 tl_assert(svNew != SVal_INVALID);
5606 cl->svals[cloff] = svNew;
5609 static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
5610 CacheLine* cl;
5611 UWord cloff, tno, toff;
5612 SVal svOld, svNew;
5613 UShort descr;
5614 stats__cline_cwrite08s++;
5615 cl = get_cacheline(a);
5616 cloff = get_cacheline_offset(a);
5617 tno = get_treeno(a);
5618 toff = get_tree_offset(a); /* == 0 .. 7 */
5619 descr = cl->descrs[tno];
5620 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5621 SVal* tree = &cl->svals[tno << 3];
5622 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5623 if (CHECK_ZSM)
5624 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5626 svOld = cl->svals[cloff];
5627 svNew = msmcwrite( svOld, thr,a,1 );
5628 if (CHECK_ZSM)
5629 tl_assert(svNew != SVal_INVALID);
5630 cl->svals[cloff] = svNew;
5633 /*------------- ZSM accesses: 16 bit sapply ------------- */
5635 static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
5636 CacheLine* cl;
5637 UWord cloff, tno, toff;
5638 SVal svOld, svNew;
5639 UShort descr;
5640 stats__cline_cread16s++;
5641 if (UNLIKELY(!aligned16(a))) goto slowcase;
5642 cl = get_cacheline(a);
5643 cloff = get_cacheline_offset(a);
5644 tno = get_treeno(a);
5645 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5646 descr = cl->descrs[tno];
5647 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5648 if (valid_value_is_below_me_16(descr, toff)) {
5649 goto slowcase;
5650 } else {
5651 SVal* tree = &cl->svals[tno << 3];
5652 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5654 if (CHECK_ZSM)
5655 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5657 svOld = cl->svals[cloff];
5658 svNew = msmcread( svOld, thr,a,2 );
5659 if (CHECK_ZSM)
5660 tl_assert(svNew != SVal_INVALID);
5661 cl->svals[cloff] = svNew;
5662 return;
5663 slowcase: /* misaligned, or must go further down the tree */
5664 stats__cline_16to8splits++;
5665 zsm_sapply08__msmcread( thr, a + 0 );
5666 zsm_sapply08__msmcread( thr, a + 1 );
5669 static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
5670 CacheLine* cl;
5671 UWord cloff, tno, toff;
5672 SVal svOld, svNew;
5673 UShort descr;
5674 stats__cline_cwrite16s++;
5675 if (UNLIKELY(!aligned16(a))) goto slowcase;
5676 cl = get_cacheline(a);
5677 cloff = get_cacheline_offset(a);
5678 tno = get_treeno(a);
5679 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5680 descr = cl->descrs[tno];
5681 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5682 if (valid_value_is_below_me_16(descr, toff)) {
5683 goto slowcase;
5684 } else {
5685 SVal* tree = &cl->svals[tno << 3];
5686 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5688 if (CHECK_ZSM)
5689 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5691 svOld = cl->svals[cloff];
5692 svNew = msmcwrite( svOld, thr,a,2 );
5693 if (CHECK_ZSM)
5694 tl_assert(svNew != SVal_INVALID);
5695 cl->svals[cloff] = svNew;
5696 return;
5697 slowcase: /* misaligned, or must go further down the tree */
5698 stats__cline_16to8splits++;
5699 zsm_sapply08__msmcwrite( thr, a + 0 );
5700 zsm_sapply08__msmcwrite( thr, a + 1 );
5703 /*------------- ZSM accesses: 32 bit sapply ------------- */
5705 static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
5706 CacheLine* cl;
5707 UWord cloff, tno, toff;
5708 SVal svOld, svNew;
5709 UShort descr;
5710 stats__cline_cread32s++;
5711 if (UNLIKELY(!aligned32(a))) goto slowcase;
5712 cl = get_cacheline(a);
5713 cloff = get_cacheline_offset(a);
5714 tno = get_treeno(a);
5715 toff = get_tree_offset(a); /* == 0 or 4 */
5716 descr = cl->descrs[tno];
5717 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5718 if (valid_value_is_above_me_32(descr, toff)) {
5719 SVal* tree = &cl->svals[tno << 3];
5720 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5721 } else {
5722 goto slowcase;
5724 if (CHECK_ZSM)
5725 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5727 svOld = cl->svals[cloff];
5728 svNew = msmcread( svOld, thr,a,4 );
5729 if (CHECK_ZSM)
5730 tl_assert(svNew != SVal_INVALID);
5731 cl->svals[cloff] = svNew;
5732 return;
5733 slowcase: /* misaligned, or must go further down the tree */
5734 stats__cline_32to16splits++;
5735 zsm_sapply16__msmcread( thr, a + 0 );
5736 zsm_sapply16__msmcread( thr, a + 2 );
5739 static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
5740 CacheLine* cl;
5741 UWord cloff, tno, toff;
5742 SVal svOld, svNew;
5743 UShort descr;
5744 stats__cline_cwrite32s++;
5745 if (UNLIKELY(!aligned32(a))) goto slowcase;
5746 cl = get_cacheline(a);
5747 cloff = get_cacheline_offset(a);
5748 tno = get_treeno(a);
5749 toff = get_tree_offset(a); /* == 0 or 4 */
5750 descr = cl->descrs[tno];
5751 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5752 if (valid_value_is_above_me_32(descr, toff)) {
5753 SVal* tree = &cl->svals[tno << 3];
5754 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5755 } else {
5756 goto slowcase;
5758 if (CHECK_ZSM)
5759 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5761 svOld = cl->svals[cloff];
5762 svNew = msmcwrite( svOld, thr,a,4 );
5763 if (CHECK_ZSM)
5764 tl_assert(svNew != SVal_INVALID);
5765 cl->svals[cloff] = svNew;
5766 return;
5767 slowcase: /* misaligned, or must go further down the tree */
5768 stats__cline_32to16splits++;
5769 zsm_sapply16__msmcwrite( thr, a + 0 );
5770 zsm_sapply16__msmcwrite( thr, a + 2 );
5773 /*------------- ZSM accesses: 64 bit sapply ------------- */
5775 static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
5776 CacheLine* cl;
5777 UWord cloff, tno;
5778 //UWord toff;
5779 SVal svOld, svNew;
5780 UShort descr;
5781 stats__cline_cread64s++;
5782 if (UNLIKELY(!aligned64(a))) goto slowcase;
5783 cl = get_cacheline(a);
5784 cloff = get_cacheline_offset(a);
5785 tno = get_treeno(a);
5786 //toff = get_tree_offset(a); /* == 0, unused */
5787 descr = cl->descrs[tno];
5788 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5789 goto slowcase;
5791 svOld = cl->svals[cloff];
5792 svNew = msmcread( svOld, thr,a,8 );
5793 if (CHECK_ZSM)
5794 tl_assert(svNew != SVal_INVALID);
5795 cl->svals[cloff] = svNew;
5796 return;
5797 slowcase: /* misaligned, or must go further down the tree */
5798 stats__cline_64to32splits++;
5799 zsm_sapply32__msmcread( thr, a + 0 );
5800 zsm_sapply32__msmcread( thr, a + 4 );
5803 static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
5804 CacheLine* cl;
5805 UWord cloff, tno;
5806 //UWord toff;
5807 SVal svOld, svNew;
5808 UShort descr;
5809 stats__cline_cwrite64s++;
5810 if (UNLIKELY(!aligned64(a))) goto slowcase;
5811 cl = get_cacheline(a);
5812 cloff = get_cacheline_offset(a);
5813 tno = get_treeno(a);
5814 //toff = get_tree_offset(a); /* == 0, unused */
5815 descr = cl->descrs[tno];
5816 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5817 goto slowcase;
5819 svOld = cl->svals[cloff];
5820 svNew = msmcwrite( svOld, thr,a,8 );
5821 if (CHECK_ZSM)
5822 tl_assert(svNew != SVal_INVALID);
5823 cl->svals[cloff] = svNew;
5824 return;
5825 slowcase: /* misaligned, or must go further down the tree */
5826 stats__cline_64to32splits++;
5827 zsm_sapply32__msmcwrite( thr, a + 0 );
5828 zsm_sapply32__msmcwrite( thr, a + 4 );
5831 /*--------------- ZSM accesses: 8 bit swrite --------------- */
5833 static
5834 void zsm_swrite08 ( Addr a, SVal svNew ) {
5835 CacheLine* cl;
5836 UWord cloff, tno, toff;
5837 UShort descr;
5838 stats__cline_swrite08s++;
5839 cl = get_cacheline(a);
5840 cloff = get_cacheline_offset(a);
5841 tno = get_treeno(a);
5842 toff = get_tree_offset(a); /* == 0 .. 7 */
5843 descr = cl->descrs[tno];
5844 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5845 SVal* tree = &cl->svals[tno << 3];
5846 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5847 if (CHECK_ZSM)
5848 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5850 tl_assert(svNew != SVal_INVALID);
5851 cl->svals[cloff] = svNew;
5854 /*--------------- ZSM accesses: 16 bit swrite --------------- */
5856 static
5857 void zsm_swrite16 ( Addr a, SVal svNew ) {
5858 CacheLine* cl;
5859 UWord cloff, tno, toff;
5860 UShort descr;
5861 stats__cline_swrite16s++;
5862 if (UNLIKELY(!aligned16(a))) goto slowcase;
5863 cl = get_cacheline(a);
5864 cloff = get_cacheline_offset(a);
5865 tno = get_treeno(a);
5866 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5867 descr = cl->descrs[tno];
5868 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5869 if (valid_value_is_below_me_16(descr, toff)) {
5870 /* Writing at this level. Need to fix up 'descr'. */
5871 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
5872 /* At this point, the tree does not match cl->descr[tno] any
5873 more. The assignments below will fix it up. */
5874 } else {
5875 /* We can't indiscriminately write on the w16 node as in the
5876 w64 case, as that might make the node inconsistent with
5877 its parent. So first, pull down to this level. */
5878 SVal* tree = &cl->svals[tno << 3];
5879 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5880 if (CHECK_ZSM)
5881 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5884 tl_assert(svNew != SVal_INVALID);
5885 cl->svals[cloff + 0] = svNew;
5886 cl->svals[cloff + 1] = SVal_INVALID;
5887 return;
5888 slowcase: /* misaligned */
5889 stats__cline_16to8splits++;
5890 zsm_swrite08( a + 0, svNew );
5891 zsm_swrite08( a + 1, svNew );
5894 /*--------------- ZSM accesses: 32 bit swrite --------------- */
5896 static
5897 void zsm_swrite32 ( Addr a, SVal svNew ) {
5898 CacheLine* cl;
5899 UWord cloff, tno, toff;
5900 UShort descr;
5901 stats__cline_swrite32s++;
5902 if (UNLIKELY(!aligned32(a))) goto slowcase;
5903 cl = get_cacheline(a);
5904 cloff = get_cacheline_offset(a);
5905 tno = get_treeno(a);
5906 toff = get_tree_offset(a); /* == 0 or 4 */
5907 descr = cl->descrs[tno];
5908 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5909 if (valid_value_is_above_me_32(descr, toff)) {
5910 /* We can't indiscriminately write on the w32 node as in the
5911 w64 case, as that might make the node inconsistent with
5912 its parent. So first, pull down to this level. */
5913 SVal* tree = &cl->svals[tno << 3];
5914 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5915 if (CHECK_ZSM)
5916 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5917 } else {
5918 /* Writing at this level. Need to fix up 'descr'. */
5919 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
5920 /* At this point, the tree does not match cl->descr[tno] any
5921 more. The assignments below will fix it up. */
5924 tl_assert(svNew != SVal_INVALID);
5925 cl->svals[cloff + 0] = svNew;
5926 cl->svals[cloff + 1] = SVal_INVALID;
5927 cl->svals[cloff + 2] = SVal_INVALID;
5928 cl->svals[cloff + 3] = SVal_INVALID;
5929 return;
5930 slowcase: /* misaligned */
5931 stats__cline_32to16splits++;
5932 zsm_swrite16( a + 0, svNew );
5933 zsm_swrite16( a + 2, svNew );
5936 /*--------------- ZSM accesses: 64 bit swrite --------------- */
5938 static
5939 void zsm_swrite64 ( Addr a, SVal svNew ) {
5940 CacheLine* cl;
5941 UWord cloff, tno;
5942 //UWord toff;
5943 stats__cline_swrite64s++;
5944 if (UNLIKELY(!aligned64(a))) goto slowcase;
5945 cl = get_cacheline(a);
5946 cloff = get_cacheline_offset(a);
5947 tno = get_treeno(a);
5948 //toff = get_tree_offset(a); /* == 0, unused */
5949 cl->descrs[tno] = TREE_DESCR_64;
5950 if (CHECK_ZSM)
5951 tl_assert(svNew != SVal_INVALID); /* EXPENSIVE */
5952 cl->svals[cloff + 0] = svNew;
5953 cl->svals[cloff + 1] = SVal_INVALID;
5954 cl->svals[cloff + 2] = SVal_INVALID;
5955 cl->svals[cloff + 3] = SVal_INVALID;
5956 cl->svals[cloff + 4] = SVal_INVALID;
5957 cl->svals[cloff + 5] = SVal_INVALID;
5958 cl->svals[cloff + 6] = SVal_INVALID;
5959 cl->svals[cloff + 7] = SVal_INVALID;
5960 return;
5961 slowcase: /* misaligned */
5962 stats__cline_64to32splits++;
5963 zsm_swrite32( a + 0, svNew );
5964 zsm_swrite32( a + 4, svNew );
5967 /*------------- ZSM accesses: 8 bit sread/scopy ------------- */
5969 static
5970 SVal zsm_sread08 ( Addr a ) {
5971 CacheLine* cl;
5972 UWord cloff, tno, toff;
5973 UShort descr;
5974 stats__cline_sread08s++;
5975 cl = get_cacheline(a);
5976 cloff = get_cacheline_offset(a);
5977 tno = get_treeno(a);
5978 toff = get_tree_offset(a); /* == 0 .. 7 */
5979 descr = cl->descrs[tno];
5980 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5981 SVal* tree = &cl->svals[tno << 3];
5982 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5984 return cl->svals[cloff];
5987 static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
5988 SVal sv;
5989 stats__cline_scopy08s++;
5990 sv = zsm_sread08( src );
5991 zsm_swrite08( dst, sv );
5995 /* Block-copy states (needed for implementing realloc()). Note this
5996 doesn't change the filtering arrangements. The caller of
5997 zsm_scopy_range needs to attend to that. */
5999 static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
6001 SizeT i;
6002 if (len == 0)
6003 return;
6005 /* assert for non-overlappingness */
6006 tl_assert(src+len <= dst || dst+len <= src);
6008 /* To be simple, just copy byte by byte. But so as not to wreck
6009 performance for later accesses to dst[0 .. len-1], normalise
6010 destination lines as we finish with them, and also normalise the
6011 line containing the first and last address. */
6012 for (i = 0; i < len; i++) {
6013 Bool normalise
6014 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
6015 || i == 0 /* first in range */
6016 || i == len-1; /* last in range */
6017 zsm_scopy08( src+i, dst+i, normalise );
6022 /* For setting address ranges to a given value. Has considerable
6023 sophistication so as to avoid generating large numbers of pointless
6024 cache loads/writebacks for large ranges. */
6026 /* Do small ranges in-cache, in the obvious way. */
6027 static
6028 void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
6030 /* fast track a couple of common cases */
6031 if (len == 4 && aligned32(a)) {
6032 zsm_swrite32( a, svNew );
6033 return;
6035 if (len == 8 && aligned64(a)) {
6036 zsm_swrite64( a, svNew );
6037 return;
6040 /* be completely general (but as efficient as possible) */
6041 if (len == 0) return;
6043 if (!aligned16(a) && len >= 1) {
6044 zsm_swrite08( a, svNew );
6045 a += 1;
6046 len -= 1;
6047 tl_assert(aligned16(a));
6049 if (len == 0) return;
6051 if (!aligned32(a) && len >= 2) {
6052 zsm_swrite16( a, svNew );
6053 a += 2;
6054 len -= 2;
6055 tl_assert(aligned32(a));
6057 if (len == 0) return;
6059 if (!aligned64(a) && len >= 4) {
6060 zsm_swrite32( a, svNew );
6061 a += 4;
6062 len -= 4;
6063 tl_assert(aligned64(a));
6065 if (len == 0) return;
6067 if (len >= 8) {
6068 tl_assert(aligned64(a));
6069 while (len >= 8) {
6070 zsm_swrite64( a, svNew );
6071 a += 8;
6072 len -= 8;
6074 tl_assert(aligned64(a));
6076 if (len == 0) return;
6078 if (len >= 4)
6079 tl_assert(aligned32(a));
6080 if (len >= 4) {
6081 zsm_swrite32( a, svNew );
6082 a += 4;
6083 len -= 4;
6085 if (len == 0) return;
6087 if (len >= 2)
6088 tl_assert(aligned16(a));
6089 if (len >= 2) {
6090 zsm_swrite16( a, svNew );
6091 a += 2;
6092 len -= 2;
6094 if (len == 0) return;
6096 if (len >= 1) {
6097 zsm_swrite08( a, svNew );
6098 //a += 1;
6099 len -= 1;
6101 tl_assert(len == 0);
6105 /* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
6106 for larger ranges, try to operate directly on the out-of-cache
6107 representation, rather than dragging lines into the cache,
6108 overwriting them, and forcing them out. This turns out to be an
6109 important performance optimisation.
6111 Note that this doesn't change the filtering arrangements. The
6112 caller of zsm_sset_range needs to attend to that. */
6114 static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
6116 tl_assert(svNew != SVal_INVALID);
6117 stats__cache_make_New_arange += (ULong)len;
6119 if (0 && len > 500)
6120 VG_(printf)("make New ( %#lx, %lu )\n", a, len );
6122 if (0) {
6123 static UWord n_New_in_cache = 0;
6124 static UWord n_New_not_in_cache = 0;
6125 /* tag is 'a' with the in-line offset masked out,
6126 eg a[31]..a[4] 0000 */
6127 Addr tag = a & ~(N_LINE_ARANGE - 1);
6128 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
6129 if (LIKELY(tag == cache_shmem.tags0[wix])) {
6130 n_New_in_cache++;
6131 } else {
6132 n_New_not_in_cache++;
6134 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
6135 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
6136 n_New_in_cache, n_New_not_in_cache );
6139 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
6140 zsm_sset_range_SMALL( a, len, svNew );
6141 } else {
6142 Addr before_start = a;
6143 Addr aligned_start = cacheline_ROUNDUP(a);
6144 Addr after_start = cacheline_ROUNDDN(a + len);
6145 UWord before_len = aligned_start - before_start;
6146 UWord aligned_len = after_start - aligned_start;
6147 UWord after_len = a + len - after_start;
6148 tl_assert(before_start <= aligned_start);
6149 tl_assert(aligned_start <= after_start);
6150 tl_assert(before_len < N_LINE_ARANGE);
6151 tl_assert(after_len < N_LINE_ARANGE);
6152 tl_assert(get_cacheline_offset(aligned_start) == 0);
6153 if (get_cacheline_offset(a) == 0) {
6154 tl_assert(before_len == 0);
6155 tl_assert(a == aligned_start);
6157 if (get_cacheline_offset(a+len) == 0) {
6158 tl_assert(after_len == 0);
6159 tl_assert(after_start == a+len);
6161 if (before_len > 0) {
6162 zsm_sset_range_SMALL( before_start, before_len, svNew );
6164 if (after_len > 0) {
6165 zsm_sset_range_SMALL( after_start, after_len, svNew );
6167 stats__cache_make_New_inZrep += (ULong)aligned_len;
6169 while (1) {
6170 Addr tag;
6171 UWord wix;
6172 if (aligned_start >= after_start)
6173 break;
6174 tl_assert(get_cacheline_offset(aligned_start) == 0);
6175 tag = aligned_start & ~(N_LINE_ARANGE - 1);
6176 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
6177 if (tag == cache_shmem.tags0[wix]) {
6178 UWord i;
6179 for (i = 0; i < N_LINE_ARANGE / 8; i++)
6180 zsm_swrite64( aligned_start + i * 8, svNew );
6181 } else {
6182 UWord i;
6183 Word zix;
6184 SecMap* sm;
6185 LineZ* lineZ;
6186 /* This line is not in the cache. Do not force it in; instead
6187 modify it in-place. */
6188 /* find the Z line to write in and rcdec it or the
6189 associated F line. */
6190 find_Z_for_writing( &sm, &zix, tag );
6191 tl_assert(sm);
6192 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
6193 lineZ = &sm->linesZ[zix];
6194 lineZ->dict[0] = svNew;
6195 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
6196 for (i = 0; i < N_LINE_ARANGE/4; i++)
6197 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
6198 rcinc_LineZ(lineZ);
6200 aligned_start += N_LINE_ARANGE;
6201 aligned_len -= N_LINE_ARANGE;
6203 tl_assert(aligned_start == after_start);
6204 tl_assert(aligned_len == 0);
6209 /////////////////////////////////////////////////////////
6210 // //
6211 // Front-filtering accesses //
6212 // //
6213 /////////////////////////////////////////////////////////
6215 static UWord stats__f_ac = 0;
6216 static UWord stats__f_sk = 0;
6218 #if 0
6219 # define STATS__F_SHOW \
6220 do { \
6221 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
6222 VG_(printf)("filters: ac %lu sk %lu\n", \
6223 stats__f_ac, stats__f_sk); \
6224 } while (0)
6225 #else
6226 # define STATS__F_SHOW /* */
6227 #endif
6229 void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
6230 stats__f_ac++;
6231 STATS__F_SHOW;
6232 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
6233 stats__f_sk++;
6234 return;
6236 zsm_sapply08__msmcwrite(thr, a);
6239 void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
6240 stats__f_ac++;
6241 STATS__F_SHOW;
6242 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
6243 stats__f_sk++;
6244 return;
6246 zsm_sapply16__msmcwrite(thr, a);
6249 void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
6250 stats__f_ac++;
6251 STATS__F_SHOW;
6252 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
6253 stats__f_sk++;
6254 return;
6256 zsm_sapply32__msmcwrite(thr, a);
6259 void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
6260 stats__f_ac++;
6261 STATS__F_SHOW;
6262 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
6263 stats__f_sk++;
6264 return;
6266 zsm_sapply64__msmcwrite(thr, a);
6269 void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
6271 /* fast track a couple of common cases */
6272 if (len == 4 && aligned32(a)) {
6273 zsm_sapply32_f__msmcwrite( thr, a );
6274 return;
6276 if (len == 8 && aligned64(a)) {
6277 zsm_sapply64_f__msmcwrite( thr, a );
6278 return;
6281 /* be completely general (but as efficient as possible) */
6282 if (len == 0) return;
6284 if (!aligned16(a) && len >= 1) {
6285 zsm_sapply08_f__msmcwrite( thr, a );
6286 a += 1;
6287 len -= 1;
6288 tl_assert(aligned16(a));
6290 if (len == 0) return;
6292 if (!aligned32(a) && len >= 2) {
6293 zsm_sapply16_f__msmcwrite( thr, a );
6294 a += 2;
6295 len -= 2;
6296 tl_assert(aligned32(a));
6298 if (len == 0) return;
6300 if (!aligned64(a) && len >= 4) {
6301 zsm_sapply32_f__msmcwrite( thr, a );
6302 a += 4;
6303 len -= 4;
6304 tl_assert(aligned64(a));
6306 if (len == 0) return;
6308 if (len >= 8) {
6309 tl_assert(aligned64(a));
6310 while (len >= 8) {
6311 zsm_sapply64_f__msmcwrite( thr, a );
6312 a += 8;
6313 len -= 8;
6315 tl_assert(aligned64(a));
6317 if (len == 0) return;
6319 if (len >= 4)
6320 tl_assert(aligned32(a));
6321 if (len >= 4) {
6322 zsm_sapply32_f__msmcwrite( thr, a );
6323 a += 4;
6324 len -= 4;
6326 if (len == 0) return;
6328 if (len >= 2)
6329 tl_assert(aligned16(a));
6330 if (len >= 2) {
6331 zsm_sapply16_f__msmcwrite( thr, a );
6332 a += 2;
6333 len -= 2;
6335 if (len == 0) return;
6337 if (len >= 1) {
6338 zsm_sapply08_f__msmcwrite( thr, a );
6339 //a += 1;
6340 len -= 1;
6342 tl_assert(len == 0);
6345 void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
6346 stats__f_ac++;
6347 STATS__F_SHOW;
6348 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
6349 stats__f_sk++;
6350 return;
6352 zsm_sapply08__msmcread(thr, a);
6355 void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
6356 stats__f_ac++;
6357 STATS__F_SHOW;
6358 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
6359 stats__f_sk++;
6360 return;
6362 zsm_sapply16__msmcread(thr, a);
6365 void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
6366 stats__f_ac++;
6367 STATS__F_SHOW;
6368 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
6369 stats__f_sk++;
6370 return;
6372 zsm_sapply32__msmcread(thr, a);
6375 void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
6376 stats__f_ac++;
6377 STATS__F_SHOW;
6378 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
6379 stats__f_sk++;
6380 return;
6382 zsm_sapply64__msmcread(thr, a);
6385 void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
6387 /* fast track a couple of common cases */
6388 if (len == 4 && aligned32(a)) {
6389 zsm_sapply32_f__msmcread( thr, a );
6390 return;
6392 if (len == 8 && aligned64(a)) {
6393 zsm_sapply64_f__msmcread( thr, a );
6394 return;
6397 /* be completely general (but as efficient as possible) */
6398 if (len == 0) return;
6400 if (!aligned16(a) && len >= 1) {
6401 zsm_sapply08_f__msmcread( thr, a );
6402 a += 1;
6403 len -= 1;
6404 tl_assert(aligned16(a));
6406 if (len == 0) return;
6408 if (!aligned32(a) && len >= 2) {
6409 zsm_sapply16_f__msmcread( thr, a );
6410 a += 2;
6411 len -= 2;
6412 tl_assert(aligned32(a));
6414 if (len == 0) return;
6416 if (!aligned64(a) && len >= 4) {
6417 zsm_sapply32_f__msmcread( thr, a );
6418 a += 4;
6419 len -= 4;
6420 tl_assert(aligned64(a));
6422 if (len == 0) return;
6424 if (len >= 8) {
6425 tl_assert(aligned64(a));
6426 while (len >= 8) {
6427 zsm_sapply64_f__msmcread( thr, a );
6428 a += 8;
6429 len -= 8;
6431 tl_assert(aligned64(a));
6433 if (len == 0) return;
6435 if (len >= 4)
6436 tl_assert(aligned32(a));
6437 if (len >= 4) {
6438 zsm_sapply32_f__msmcread( thr, a );
6439 a += 4;
6440 len -= 4;
6442 if (len == 0) return;
6444 if (len >= 2)
6445 tl_assert(aligned16(a));
6446 if (len >= 2) {
6447 zsm_sapply16_f__msmcread( thr, a );
6448 a += 2;
6449 len -= 2;
6451 if (len == 0) return;
6453 if (len >= 1) {
6454 zsm_sapply08_f__msmcread( thr, a );
6455 //a += 1;
6456 len -= 1;
6458 tl_assert(len == 0);
6461 void libhb_Thr_resumes ( Thr* thr )
6463 if (0) VG_(printf)("resume %p\n", thr);
6464 tl_assert(thr);
6465 tl_assert(!thr->llexit_done);
6466 Filter__clear(thr->filter, "libhb_Thr_resumes");
6467 /* A kludge, but .. if this thread doesn't have any marker stacks
6468 at all, get one right now. This is easier than figuring out
6469 exactly when at thread startup we can and can't take a stack
6470 snapshot. */
6471 if (HG_(clo_history_level) == 1) {
6472 tl_assert(thr->local_Kws_n_stacks);
6473 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
6474 note_local_Kw_n_stack_for(thr);
6479 /////////////////////////////////////////////////////////
6480 // //
6481 // Synchronisation objects //
6482 // //
6483 /////////////////////////////////////////////////////////
6485 /* A double linked list of all the SO's. */
6486 SO* admin_SO = NULL;
6488 static SO* SO__Alloc ( void )
6490 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
6491 so->viR = VtsID_INVALID;
6492 so->viW = VtsID_INVALID;
6493 so->magic = SO_MAGIC;
6494 /* Add to double linked list */
6495 if (admin_SO) {
6496 tl_assert(admin_SO->admin_prev == NULL);
6497 admin_SO->admin_prev = so;
6498 so->admin_next = admin_SO;
6499 } else {
6500 so->admin_next = NULL;
6502 so->admin_prev = NULL;
6503 admin_SO = so;
6504 /* */
6505 return so;
6508 static void SO__Dealloc ( SO* so )
6510 tl_assert(so);
6511 tl_assert(so->magic == SO_MAGIC);
6512 if (so->viR == VtsID_INVALID) {
6513 tl_assert(so->viW == VtsID_INVALID);
6514 } else {
6515 tl_assert(so->viW != VtsID_INVALID);
6516 VtsID__rcdec(so->viR);
6517 VtsID__rcdec(so->viW);
6519 so->magic = 0;
6520 /* Del from double linked list */
6521 if (so->admin_prev)
6522 so->admin_prev->admin_next = so->admin_next;
6523 if (so->admin_next)
6524 so->admin_next->admin_prev = so->admin_prev;
6525 if (so == admin_SO)
6526 admin_SO = so->admin_next;
6527 /* */
6528 HG_(free)( so );
6532 /////////////////////////////////////////////////////////
6533 // //
6534 // Top Level API //
6535 // //
6536 /////////////////////////////////////////////////////////
6538 static void show_thread_state ( const HChar* str, Thr* t )
6540 if (1) return;
6541 if (t->viR == t->viW) {
6542 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
6543 VtsID__pp( t->viR );
6544 VG_(printf)("%s","\n");
6545 } else {
6546 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
6547 VtsID__pp( t->viR );
6548 VG_(printf)(" viW %u==", t->viW);
6549 VtsID__pp( t->viW );
6550 VG_(printf)("%s","\n");
6555 Thr* libhb_init (
6556 void (*get_stacktrace)( Thr*, Addr*, UWord ),
6557 ExeContext* (*get_EC)( Thr* )
6560 Thr* thr;
6561 VtsID vi;
6563 // We will have to have to store a large number of these,
6564 // so make sure they're the size we expect them to be.
6565 STATIC_ASSERT(sizeof(ScalarTS) == 8);
6567 /* because first 1024 unusable */
6568 STATIC_ASSERT(SCALARTS_N_THRBITS >= 11);
6569 /* so as to fit in a UInt w/ 5 bits to spare (see defn of
6570 Thr_n_RCEC and TSW). */
6571 STATIC_ASSERT(SCALARTS_N_THRBITS <= 27);
6573 /* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words
6574 (32-bit). It's not correctness-critical, but there are a lot of
6575 them, so it's important from a space viewpoint. Unfortunately
6576 we simply can't pack it into 2 words on a 32-bit target. */
6577 STATIC_ASSERT( (sizeof(UWord) == 8 && sizeof(Thr_n_RCEC) == 16)
6578 || (sizeof(UWord) == 4 && sizeof(Thr_n_RCEC) == 12));
6579 STATIC_ASSERT(sizeof(TSW) == sizeof(UInt));
6581 /* Word sets really are 32 bits. Even on a 64 bit target. */
6582 STATIC_ASSERT(sizeof(WordSetID) == 4);
6583 STATIC_ASSERT(sizeof(WordSet) == sizeof(WordSetID));
6585 tl_assert(get_stacktrace);
6586 tl_assert(get_EC);
6587 main_get_stacktrace = get_stacktrace;
6588 main_get_EC = get_EC;
6590 // No need to initialise hg_wordfm.
6591 // No need to initialise hg_wordset.
6593 /* Allocated once and never deallocated. Used as a temporary in
6594 VTS singleton, tick and join operations. */
6595 temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
6596 temp_max_sized_VTS->id = VtsID_INVALID;
6597 verydead_thread_tables_init();
6598 vts_set_init();
6599 vts_tab_init();
6600 event_map_init();
6601 VtsID__invalidate_caches();
6603 // initialise shadow memory
6604 zsm_init( );
6606 thr = Thr__new();
6607 vi = VtsID__mk_Singleton( thr, 1 );
6608 thr->viR = vi;
6609 thr->viW = vi;
6610 VtsID__rcinc(thr->viR);
6611 VtsID__rcinc(thr->viW);
6613 show_thread_state(" root", thr);
6614 return thr;
6618 Thr* libhb_create ( Thr* parent )
6620 /* The child's VTSs are copies of the parent's VTSs, but ticked at
6621 the child's index. Since the child's index is guaranteed
6622 unique, it has never been seen before, so the implicit value
6623 before the tick is zero and after that is one. */
6624 Thr* child = Thr__new();
6626 child->viR = VtsID__tick( parent->viR, child );
6627 child->viW = VtsID__tick( parent->viW, child );
6628 Filter__clear(child->filter, "libhb_create(child)");
6629 VtsID__rcinc(child->viR);
6630 VtsID__rcinc(child->viW);
6631 /* We need to do note_local_Kw_n_stack_for( child ), but it's too
6632 early for that - it may not have a valid TId yet. So, let
6633 libhb_Thr_resumes pick it up the first time the thread runs. */
6635 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
6636 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
6638 /* and the parent has to move along too */
6639 VtsID__rcdec(parent->viR);
6640 VtsID__rcdec(parent->viW);
6641 parent->viR = VtsID__tick( parent->viR, parent );
6642 parent->viW = VtsID__tick( parent->viW, parent );
6643 Filter__clear(parent->filter, "libhb_create(parent)");
6644 VtsID__rcinc(parent->viR);
6645 VtsID__rcinc(parent->viW);
6646 note_local_Kw_n_stack_for( parent );
6648 show_thread_state(" child", child);
6649 show_thread_state("parent", parent);
6651 return child;
6654 /* Shut down the library, and print stats (in fact that's _all_
6655 this is for. */
6656 void libhb_shutdown ( Bool show_stats )
6658 if (show_stats) {
6659 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
6660 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
6661 stats__secmaps_allocd,
6662 stats__secmap_ga_space_covered);
6663 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
6664 stats__secmap_linesZ_allocd,
6665 stats__secmap_linesZ_bytes);
6666 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)"
6667 " (%'10lu used)\n",
6668 VG_(sizePA) (LineF_pool_allocator),
6669 VG_(sizePA) (LineF_pool_allocator) * sizeof(LineF),
6670 shmem__SecMap_used_linesF());
6671 VG_(printf)(" secmaps: %'10lu in map (can be scanGCed %'5lu)"
6672 " #%lu scanGC \n",
6673 stats__secmaps_in_map_shmem,
6674 shmem__SecMap_do_GC(False /* really do GC */),
6675 stats__secmaps_scanGC);
6676 tl_assert (VG_(sizeFM) (map_shmem) == stats__secmaps_in_map_shmem);
6677 VG_(printf)(" secmaps: %'10lu in freelist,"
6678 " total (scanGCed %'lu, ssetGCed %'lu)\n",
6679 SecMap_freelist_length(),
6680 stats__secmaps_scanGCed,
6681 stats__secmaps_ssetGCed);
6682 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
6683 stats__secmaps_search, stats__secmaps_search_slow);
6685 VG_(printf)("%s","\n");
6686 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
6687 stats__cache_totrefs, stats__cache_totmisses );
6688 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
6689 stats__cache_Z_fetches, stats__cache_F_fetches );
6690 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
6691 stats__cache_Z_wbacks, stats__cache_F_wbacks );
6692 VG_(printf)(" cache: %'14lu flushes_invals\n",
6693 stats__cache_flushes_invals );
6694 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
6695 stats__cache_make_New_arange,
6696 stats__cache_make_New_inZrep);
6698 VG_(printf)("%s","\n");
6699 VG_(printf)(" cline: %'10lu normalises\n",
6700 stats__cline_normalises );
6701 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6702 stats__cline_cread64s,
6703 stats__cline_cread32s,
6704 stats__cline_cread16s,
6705 stats__cline_cread08s );
6706 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6707 stats__cline_cwrite64s,
6708 stats__cline_cwrite32s,
6709 stats__cline_cwrite16s,
6710 stats__cline_cwrite08s );
6711 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6712 stats__cline_swrite64s,
6713 stats__cline_swrite32s,
6714 stats__cline_swrite16s,
6715 stats__cline_swrite08s );
6716 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
6717 stats__cline_sread08s, stats__cline_scopy08s );
6718 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu"
6719 " 2to1 %'12lu\n",
6720 stats__cline_64to32splits, stats__cline_32to16splits,
6721 stats__cline_16to8splits );
6722 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu"
6723 " 2to1 %'12lu\n",
6724 stats__cline_64to32pulldown, stats__cline_32to16pulldown,
6725 stats__cline_16to8pulldown );
6726 if (0)
6727 VG_(printf)(" cline: sizeof(CacheLineZ) %ld,"
6728 " covers %ld bytes of arange\n",
6729 (Word)sizeof(LineZ),
6730 (Word)N_LINE_ARANGE);
6732 VG_(printf)("%s","\n");
6734 VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n",
6735 stats__msmcread, stats__msmcread_change);
6736 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n",
6737 stats__msmcwrite, stats__msmcwrite_change);
6738 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
6739 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
6740 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
6741 stats__join2_queries, stats__join2_misses);
6743 VG_(printf)("%s","\n");
6744 VG_(printf)(" libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n",
6745 stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ );
6746 VG_(printf)(" libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
6747 stats__vts__cmp_structural, stats__vts__cmp_structural_slow);
6748 VG_(printf)(" libhb: VTSset: find__or__clone_and_add %'lu"
6749 " (%'lu allocd)\n",
6750 stats__vts_set__focaa, stats__vts_set__focaa_a );
6751 VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n",
6752 stats__vts__indexat_slow );
6754 VG_(printf)("%s","\n");
6755 VG_(printf)(
6756 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
6757 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
6759 VG_(printf)(" libhb: #%lu vts_tab GC #%lu vts pruning\n",
6760 stats__vts_tab_GC, stats__vts_pruning);
6761 VG_(printf)( " libhb: %lu entries in vts_set\n",
6762 VG_(sizeFM)( vts_set ) );
6764 VG_(printf)("%s","\n");
6766 UInt live = 0;
6767 UInt llexit_done = 0;
6768 UInt joinedwith_done = 0;
6769 UInt llexit_and_joinedwith_done = 0;
6771 Thread* hgthread = get_admin_threads();
6772 tl_assert(hgthread);
6773 while (hgthread) {
6774 Thr* hbthr = hgthread->hbthr;
6775 tl_assert(hbthr);
6776 if (hbthr->llexit_done && hbthr->joinedwith_done)
6777 llexit_and_joinedwith_done++;
6778 else if (hbthr->llexit_done)
6779 llexit_done++;
6780 else if (hbthr->joinedwith_done)
6781 joinedwith_done++;
6782 else
6783 live++;
6784 hgthread = hgthread->admin;
6786 VG_(printf)(" libhb: threads live: %u exit_and_joinedwith %u"
6787 " exit %u joinedwith %u\n",
6788 live, llexit_and_joinedwith_done,
6789 llexit_done, joinedwith_done);
6790 VG_(printf)(" libhb: %d verydead_threads, "
6791 "%d verydead_threads_not_pruned\n",
6792 (int) VG_(sizeXA)( verydead_thread_table),
6793 (int) VG_(sizeXA)( verydead_thread_table_not_pruned));
6794 tl_assert (VG_(sizeXA)( verydead_thread_table)
6795 + VG_(sizeXA)( verydead_thread_table_not_pruned)
6796 == llexit_and_joinedwith_done);
6799 VG_(printf)("%s","\n");
6800 VG_(printf)( " libhb: oldrefHTN %lu (%'d bytes)\n",
6801 oldrefHTN, (int)(oldrefHTN * sizeof(OldRef)));
6802 tl_assert (oldrefHTN == VG_(HT_count_nodes) (oldrefHT));
6803 VG_(printf)( " libhb: oldref lookup found=%lu notfound=%lu\n",
6804 stats__evm__lookup_found, stats__evm__lookup_notfound);
6805 if (VG_(clo_verbosity) > 1)
6806 VG_(HT_print_stats) (oldrefHT, cmp_oldref_tsw);
6807 VG_(printf)( " libhb: oldref bind tsw/rcec "
6808 "==/==:%'lu ==/!=:%'lu !=/!=:%'lu\n",
6809 stats__ctxt_eq_tsw_eq_rcec, stats__ctxt_eq_tsw_neq_rcec,
6810 stats__ctxt_neq_tsw_neq_rcec);
6811 VG_(printf)( " libhb: ctxt__rcdec calls %'lu. rcec gc discards %'lu\n",
6812 stats__ctxt_rcdec_calls, stats__ctxt_rcec_gc_discards);
6813 VG_(printf)( " libhb: contextTab: %lu slots,"
6814 " %lu cur ents(ref'd %lu),"
6815 " %lu max ents\n",
6816 (UWord)N_RCEC_TAB,
6817 stats__ctxt_tab_curr, RCEC_referenced,
6818 stats__ctxt_tab_max );
6819 VG_(printf) (" libhb: stats__cached_rcec "
6820 "identical %'lu updated %'lu fresh %'lu\n",
6821 stats__cached_rcec_identical, stats__cached_rcec_updated,
6822 stats__cached_rcec_fresh);
6823 if (stats__cached_rcec_diff > 0)
6824 VG_(printf) (" libhb: stats__cached_rcec diff unk reason%'lu\n",
6825 stats__cached_rcec_diff);
6826 if (stats__cached_rcec_diff_known_reason > 0)
6827 VG_(printf) (" libhb: stats__cached_rcec diff known reason %'lu\n",
6828 stats__cached_rcec_diff_known_reason);
6831 # define MAXCHAIN 10
6832 UInt chains[MAXCHAIN+1]; // [MAXCHAIN] gets all chains >= MAXCHAIN
6833 UInt non0chain = 0;
6834 UInt n;
6835 UInt i;
6836 RCEC *p;
6838 for (i = 0; i <= MAXCHAIN; i++) chains[i] = 0;
6839 for (i = 0; i < N_RCEC_TAB; i++) {
6840 n = 0;
6841 for (p = contextTab[i]; p; p = p->next)
6842 n++;
6843 if (n < MAXCHAIN)
6844 chains[n]++;
6845 else
6846 chains[MAXCHAIN]++;
6847 if (n > 0)
6848 non0chain++;
6850 VG_(printf)( " libhb: contextTab chain of [length]=nchain."
6851 " Avg chain len %3.1f\n"
6852 " ",
6853 (Double)stats__ctxt_tab_curr
6854 / (Double)(non0chain ? non0chain : 1));
6855 for (i = 0; i <= MAXCHAIN; i++) {
6856 if (chains[i] != 0)
6857 VG_(printf)( "[%u%s]=%u ",
6858 i, i == MAXCHAIN ? "+" : "",
6859 chains[i]);
6861 VG_(printf)( "\n");
6862 # undef MAXCHAIN
6864 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
6865 stats__ctxt_tab_qs,
6866 stats__ctxt_tab_cmps );
6867 #if 0
6868 VG_(printf)("sizeof(CacheLine) = %zu\n", sizeof(CacheLine));
6869 VG_(printf)("sizeof(LineZ) = %zu\n", sizeof(LineZ));
6870 VG_(printf)("sizeof(LineF) = %zu\n", sizeof(LineF));
6871 VG_(printf)("sizeof(SecMap) = %zu\n", sizeof(SecMap));
6872 VG_(printf)("sizeof(Cache) = %zu\n", sizeof(Cache));
6873 VG_(printf)("sizeof(SMCacheEnt) = %zu\n", sizeof(SMCacheEnt));
6874 VG_(printf)("sizeof(CountedSVal) = %zu\n", sizeof(CountedSVal));
6875 VG_(printf)("sizeof(VTS) = %zu\n", sizeof(VTS));
6876 VG_(printf)("sizeof(ScalarTS) = %zu\n", sizeof(ScalarTS));
6877 VG_(printf)("sizeof(VtsTE) = %zu\n", sizeof(VtsTE));
6879 VG_(printf)("sizeof(struct _Thr) = %zu\n", sizeof(struct _Thr));
6880 VG_(printf)("sizeof(RCEC) = %zu\n", sizeof(RCEC));
6881 VG_(printf)("sizeof(struct _SO) = %zu\n", sizeof(struct _SO));
6882 #endif
6884 VG_(printf)("%s","<<< END libhb stats >>>\n");
6885 VG_(printf)("%s","\n");
6890 /* Receive notification that a thread has low level exited. The
6891 significance here is that we do not expect to see any more memory
6892 references from it. */
6893 void libhb_async_exit ( Thr* thr )
6895 tl_assert(thr);
6896 tl_assert(!thr->llexit_done);
6897 thr->llexit_done = True;
6899 /* Check nobody messed up with the cached_rcec */
6900 tl_assert (thr->cached_rcec.magic == RCEC_MAGIC);
6901 tl_assert (thr->cached_rcec.rc == 0);
6902 tl_assert (thr->cached_rcec.rcX == 0);
6903 tl_assert (thr->cached_rcec.next == NULL);
6905 /* Just to be sure, declare the cached stack invalid. */
6906 set_cached_rcec_validity(thr, False);
6908 /* free up Filter and local_Kws_n_stacks (well, actually not the
6909 latter ..) */
6910 tl_assert(thr->filter);
6911 HG_(free)(thr->filter);
6912 thr->filter = NULL;
6914 /* Tell the VTS mechanism this thread has exited, so it can
6915 participate in VTS pruning. Note this can only happen if the
6916 thread has both ll_exited and has been joined with. */
6917 if (thr->joinedwith_done)
6918 VTS__declare_thread_very_dead(thr);
6920 /* Another space-accuracy tradeoff. Do we want to be able to show
6921 H1 history for conflicts in threads which have since exited? If
6922 yes, then we better not free up thr->local_Kws_n_stacks. The
6923 downside is a potential per-thread leak of up to
6924 N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
6925 XArray average overcommit factor is (1.5 I'd guess). */
6926 // hence:
6927 // VG_(deleteXA)(thr->local_Kws_n_stacks);
6928 // thr->local_Kws_n_stacks = NULL;
6931 /* Receive notification that a thread has been joined with. The
6932 significance here is that we do not expect to see any further
6933 references to its vector clocks (Thr::viR and Thr::viW). */
6934 void libhb_joinedwith_done ( Thr* thr )
6936 tl_assert(thr);
6937 /* Caller must ensure that this is only ever called once per Thr. */
6938 tl_assert(!thr->joinedwith_done);
6939 thr->joinedwith_done = True;
6940 if (thr->llexit_done)
6941 VTS__declare_thread_very_dead(thr);
6945 /* Both Segs and SOs point to VTSs. However, there is no sharing, so
6946 a Seg that points at a VTS is its one-and-only owner, and ditto for
6947 a SO that points at a VTS. */
6949 SO* libhb_so_alloc ( void )
6951 return SO__Alloc();
6954 void libhb_so_dealloc ( SO* so )
6956 tl_assert(so);
6957 tl_assert(so->magic == SO_MAGIC);
6958 SO__Dealloc(so);
6961 /* See comments in libhb.h for details on the meaning of
6962 strong vs weak sends and strong vs weak receives. */
6963 void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
6965 /* Copy the VTSs from 'thr' into the sync object, and then move
6966 the thread along one step. */
6968 tl_assert(so);
6969 tl_assert(so->magic == SO_MAGIC);
6971 /* stay sane .. a thread's read-clock must always lead or be the
6972 same as its write-clock */
6973 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
6974 tl_assert(leq);
6977 /* since we're overwriting the VtsIDs in the SO, we need to drop
6978 any references made by the previous contents thereof */
6979 if (so->viR == VtsID_INVALID) {
6980 tl_assert(so->viW == VtsID_INVALID);
6981 so->viR = thr->viR;
6982 so->viW = thr->viW;
6983 VtsID__rcinc(so->viR);
6984 VtsID__rcinc(so->viW);
6985 } else {
6986 /* In a strong send, we dump any previous VC in the SO and
6987 install the sending thread's VC instead. For a weak send we
6988 must join2 with what's already there. */
6989 tl_assert(so->viW != VtsID_INVALID);
6990 VtsID__rcdec(so->viR);
6991 VtsID__rcdec(so->viW);
6992 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
6993 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
6994 VtsID__rcinc(so->viR);
6995 VtsID__rcinc(so->viW);
6998 /* move both parent clocks along */
6999 VtsID__rcdec(thr->viR);
7000 VtsID__rcdec(thr->viW);
7001 thr->viR = VtsID__tick( thr->viR, thr );
7002 thr->viW = VtsID__tick( thr->viW, thr );
7003 if (!thr->llexit_done) {
7004 Filter__clear(thr->filter, "libhb_so_send");
7005 note_local_Kw_n_stack_for(thr);
7007 VtsID__rcinc(thr->viR);
7008 VtsID__rcinc(thr->viW);
7010 if (strong_send)
7011 show_thread_state("s-send", thr);
7012 else
7013 show_thread_state("w-send", thr);
7016 void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
7018 tl_assert(so);
7019 tl_assert(so->magic == SO_MAGIC);
7021 if (so->viR != VtsID_INVALID) {
7022 tl_assert(so->viW != VtsID_INVALID);
7024 /* Weak receive (basically, an R-acquisition of a R-W lock).
7025 This advances the read-clock of the receiver, but not the
7026 write-clock. */
7027 VtsID__rcdec(thr->viR);
7028 thr->viR = VtsID__join2( thr->viR, so->viR );
7029 VtsID__rcinc(thr->viR);
7031 /* At one point (r10589) it seemed safest to tick the clocks for
7032 the receiving thread after the join. But on reflection, I
7033 wonder if that might cause it to 'overtake' constraints,
7034 which could lead to missing races. So, back out that part of
7035 r10589. */
7036 //VtsID__rcdec(thr->viR);
7037 //thr->viR = VtsID__tick( thr->viR, thr );
7038 //VtsID__rcinc(thr->viR);
7040 /* For a strong receive, we also advance the receiver's write
7041 clock, which means the receive as a whole is essentially
7042 equivalent to a W-acquisition of a R-W lock. */
7043 if (strong_recv) {
7044 VtsID__rcdec(thr->viW);
7045 thr->viW = VtsID__join2( thr->viW, so->viW );
7046 VtsID__rcinc(thr->viW);
7048 /* See comment just above, re r10589. */
7049 //VtsID__rcdec(thr->viW);
7050 //thr->viW = VtsID__tick( thr->viW, thr );
7051 //VtsID__rcinc(thr->viW);
7054 if (thr->filter)
7055 Filter__clear(thr->filter, "libhb_so_recv");
7056 note_local_Kw_n_stack_for(thr);
7058 if (strong_recv)
7059 show_thread_state("s-recv", thr);
7060 else
7061 show_thread_state("w-recv", thr);
7063 } else {
7064 tl_assert(so->viW == VtsID_INVALID);
7065 /* Deal with degenerate case: 'so' has no vts, so there has been
7066 no message posted to it. Just ignore this case. */
7067 show_thread_state("d-recv", thr);
7071 Bool libhb_so_everSent ( SO* so )
7073 if (so->viR == VtsID_INVALID) {
7074 tl_assert(so->viW == VtsID_INVALID);
7075 return False;
7076 } else {
7077 tl_assert(so->viW != VtsID_INVALID);
7078 return True;
7082 #define XXX1 0 // 0x67a106c
7083 #define XXX2 0
7085 static inline Bool TRACEME(Addr a, SizeT szB) {
7086 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
7087 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
7088 return False;
7090 static void trace ( Thr* thr, Addr a, SizeT szB, const HChar* s )
7092 SVal sv = zsm_sread08(a);
7093 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
7094 show_thread_state("", thr);
7095 VG_(printf)("%s","\n");
7098 void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
7100 SVal sv = SVal__mkC(thr->viW, thr->viW);
7101 tl_assert(is_sane_SVal_C(sv));
7102 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
7103 zsm_sset_range( a, szB, sv );
7104 Filter__clear_range( thr->filter, a, szB );
7105 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
7108 void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB )
7110 /* do nothing */
7114 /* Set the lines zix_start till zix_end to NOACCESS. */
7115 static void zsm_secmap_line_range_noaccess (SecMap *sm,
7116 UInt zix_start, UInt zix_end)
7118 for (UInt lz = zix_start; lz <= zix_end; lz++) {
7119 LineZ* lineZ;
7120 lineZ = &sm->linesZ[lz];
7121 if (lineZ->dict[0] != SVal_INVALID) {
7122 rcdec_LineZ(lineZ);
7123 lineZ->dict[0] = SVal_NOACCESS;
7124 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
7125 } else {
7126 clear_LineF_of_Z(lineZ);
7128 for (UInt i = 0; i < N_LINE_ARANGE/4; i++)
7129 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
7133 /* Set the given range to SVal_NOACCESS in-place in the secmap.
7134 a must be cacheline aligned. len must be a multiple of a cacheline
7135 and must be < N_SECMAP_ARANGE. */
7136 static void zsm_sset_range_noaccess_in_secmap(Addr a, SizeT len)
7138 tl_assert (is_valid_scache_tag (a));
7139 tl_assert (0 == (len & (N_LINE_ARANGE - 1)));
7140 tl_assert (len < N_SECMAP_ARANGE);
7142 SecMap *sm1 = shmem__find_SecMap (a);
7143 SecMap *sm2 = shmem__find_SecMap (a + len - 1);
7144 UWord zix_start = shmem__get_SecMap_offset(a ) >> N_LINE_BITS;
7145 UWord zix_end = shmem__get_SecMap_offset(a + len - 1) >> N_LINE_BITS;
7147 if (sm1) {
7148 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm1));
7149 zsm_secmap_line_range_noaccess (sm1, zix_start,
7150 sm1 == sm2 ? zix_end : N_SECMAP_ZLINES-1);
7152 if (sm2 && sm1 != sm2) {
7153 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm2));
7154 zsm_secmap_line_range_noaccess (sm2, 0, zix_end);
7158 /* Set the given address range to SVal_NOACCESS.
7159 The SecMaps fully set to SVal_NOACCESS will be pushed in SecMap_freelist. */
7160 static void zsm_sset_range_noaccess (Addr addr, SizeT len)
7163 BPC = Before, Partial Cacheline, = addr
7164 (i.e. starting inside a cacheline/inside a SecMap)
7165 BFC = Before, Full Cacheline(s), but not full SecMap
7166 (i.e. starting inside a SecMap)
7167 FSM = Full SecMap(s)
7168 (i.e. starting a SecMap)
7169 AFC = After, Full Cacheline(s), but not full SecMap
7170 (i.e. first address after the full SecMap(s))
7171 APC = After, Partial Cacheline, i.e. first address after the
7172 full CacheLines).
7173 ARE = After Range End = addr+len = first address not part of the range.
7175 If addr starts a Cacheline, then BPC == BFC.
7176 If addr starts a SecMap, then BPC == BFC == FSM.
7177 If addr+len starts a SecMap, then APC == ARE == AFC
7178 If addr+len starts a Cacheline, then APC == ARE
7180 Addr ARE = addr + len;
7181 Addr BPC = addr;
7182 Addr BFC = ROUNDUP(BPC, N_LINE_ARANGE);
7183 Addr FSM = ROUNDUP(BPC, N_SECMAP_ARANGE);
7184 Addr AFC = ROUNDDN(ARE, N_SECMAP_ARANGE);
7185 Addr APC = ROUNDDN(ARE, N_LINE_ARANGE);
7186 SizeT Plen = len; // Plen will be split between the following:
7187 SizeT BPClen;
7188 SizeT BFClen;
7189 SizeT FSMlen;
7190 SizeT AFClen;
7191 SizeT APClen;
7193 /* Consumes from Plen the nr of bytes between from and to.
7194 from and to must be aligned on a multiple of round.
7195 The length consumed will be a multiple of round, with
7196 a maximum of Plen. */
7197 # define PlenCONSUME(from, to, round, consumed) \
7198 do { \
7199 if (from < to) { \
7200 if (to - from < Plen) \
7201 consumed = to - from; \
7202 else \
7203 consumed = ROUNDDN(Plen, round); \
7204 } else { \
7205 consumed = 0; \
7207 Plen -= consumed; } while (0)
7209 PlenCONSUME(BPC, BFC, 1, BPClen);
7210 PlenCONSUME(BFC, FSM, N_LINE_ARANGE, BFClen);
7211 PlenCONSUME(FSM, AFC, N_SECMAP_ARANGE, FSMlen);
7212 PlenCONSUME(AFC, APC, N_LINE_ARANGE, AFClen);
7213 PlenCONSUME(APC, ARE, 1, APClen);
7215 if (0)
7216 VG_(printf) ("addr %p[%lu] ARE %p"
7217 " BPC %p[%lu] BFC %p[%lu] FSM %p[%lu]"
7218 " AFC %p[%lu] APC %p[%lu]\n",
7219 (void*)addr, len, (void*)ARE,
7220 (void*)BPC, BPClen, (void*)BFC, BFClen, (void*)FSM, FSMlen,
7221 (void*)AFC, AFClen, (void*)APC, APClen);
7223 tl_assert (Plen == 0);
7225 /* Set to NOACCESS pieces before and after not covered by entire SecMaps. */
7227 /* First we set the partial cachelines. This is done through the cache. */
7228 if (BPClen > 0)
7229 zsm_sset_range_SMALL (BPC, BPClen, SVal_NOACCESS);
7230 if (APClen > 0)
7231 zsm_sset_range_SMALL (APC, APClen, SVal_NOACCESS);
7233 /* After this, we will not use the cache anymore. We will directly work
7234 in-place on the z shadow memory in SecMap(s).
7235 So, we invalidate the cachelines for the whole range we are setting
7236 to NOACCESS below. */
7237 shmem__invalidate_scache_range (BFC, APC - BFC);
7239 if (BFClen > 0)
7240 zsm_sset_range_noaccess_in_secmap (BFC, BFClen);
7241 if (AFClen > 0)
7242 zsm_sset_range_noaccess_in_secmap (AFC, AFClen);
7244 if (FSMlen > 0) {
7245 /* Set to NOACCESS all the SecMaps, pushing the SecMaps to the
7246 free list. */
7247 Addr sm_start = FSM;
7248 while (sm_start < AFC) {
7249 SecMap *sm = shmem__find_SecMap (sm_start);
7250 if (sm) {
7251 Addr gaKey;
7252 SecMap *fm_sm;
7254 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
7255 for (UInt lz = 0; lz < N_SECMAP_ZLINES; lz++) {
7256 LineZ *lineZ = &sm->linesZ[lz];
7257 if (LIKELY(lineZ->dict[0] != SVal_INVALID))
7258 rcdec_LineZ(lineZ);
7259 else
7260 clear_LineF_of_Z(lineZ);
7262 if (!VG_(delFromFM)(map_shmem, &gaKey, (UWord*)&fm_sm, sm_start))
7263 tl_assert (0);
7264 stats__secmaps_in_map_shmem--;
7265 tl_assert (gaKey == sm_start);
7266 tl_assert (sm == fm_sm);
7267 stats__secmaps_ssetGCed++;
7268 push_SecMap_on_freelist (sm);
7270 sm_start += N_SECMAP_ARANGE;
7272 tl_assert (sm_start == AFC);
7274 /* The above loop might have kept copies of freed SecMap in the smCache.
7275 => clear them. */
7276 if (address_in_range(smCache[0].gaKey, FSM, FSMlen)) {
7277 smCache[0].gaKey = 1;
7278 smCache[0].sm = NULL;
7280 if (address_in_range(smCache[1].gaKey, FSM, FSMlen)) {
7281 smCache[1].gaKey = 1;
7282 smCache[1].sm = NULL;
7284 if (address_in_range(smCache[2].gaKey, FSM, FSMlen)) {
7285 smCache[2].gaKey = 1;
7286 smCache[2].sm = NULL;
7288 STATIC_ASSERT (3 == sizeof(smCache)/sizeof(SMCacheEnt));
7292 void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB )
7294 /* This really does put the requested range in NoAccess. It's
7295 expensive though. */
7296 SVal sv = SVal_NOACCESS;
7297 tl_assert(is_sane_SVal_C(sv));
7298 if (LIKELY(szB < 2 * N_LINE_ARANGE))
7299 zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
7300 else
7301 zsm_sset_range_noaccess (a, szB);
7302 Filter__clear_range( thr->filter, a, szB );
7305 /* Works byte at a time. Can be optimised if needed. */
7306 UWord libhb_srange_get_abits (Addr a, UChar *abits, SizeT len)
7308 UWord anr = 0; // nr of bytes addressable.
7310 /* Get the accessibility of each byte. Pay attention to not
7311 create SecMap or LineZ when checking if a byte is addressable.
7313 Note: this is used for client request. Performance deemed not critical.
7314 So for simplicity, we work byte per byte.
7315 Performance could be improved by working with full cachelines
7316 or with full SecMap, when reaching a cacheline or secmap boundary. */
7317 for (SizeT i = 0; i < len; i++) {
7318 SVal sv = SVal_INVALID;
7319 Addr b = a + i;
7320 Addr tag = b & ~(N_LINE_ARANGE - 1);
7321 UWord wix = (b >> N_LINE_BITS) & (N_WAY_NENT - 1);
7322 UWord cloff = get_cacheline_offset(b);
7324 /* Note: we do not use get_cacheline(b) to avoid creating cachelines
7325 and/or SecMap for non addressable bytes. */
7326 if (tag == cache_shmem.tags0[wix]) {
7327 CacheLine copy = cache_shmem.lyns0[wix];
7328 /* We work on a copy of the cacheline, as we do not want to
7329 record the client request as a real read.
7330 The below is somewhat similar to zsm_sapply08__msmcread but
7331 avoids side effects on the cache. */
7332 UWord toff = get_tree_offset(b); /* == 0 .. 7 */
7333 UWord tno = get_treeno(b);
7334 UShort descr = copy.descrs[tno];
7335 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
7336 SVal* tree = &copy.svals[tno << 3];
7337 copy.descrs[tno] = pulldown_to_8(tree, toff, descr);
7339 sv = copy.svals[cloff];
7340 } else {
7341 /* Byte not found in the cacheline. Search for a SecMap. */
7342 SecMap *sm = shmem__find_SecMap(b);
7343 LineZ *lineZ;
7344 if (sm == NULL)
7345 sv = SVal_NOACCESS;
7346 else {
7347 UWord zix = shmem__get_SecMap_offset(b) >> N_LINE_BITS;
7348 lineZ = &sm->linesZ[zix];
7349 if (lineZ->dict[0] == SVal_INVALID) {
7350 LineF *lineF = SVal2Ptr(lineZ->dict[1]);
7351 sv = lineF->w64s[cloff];
7352 } else {
7353 UWord ix = read_twobit_array( lineZ->ix2s, cloff );
7354 sv = lineZ->dict[ix];
7359 tl_assert (sv != SVal_INVALID);
7360 if (sv == SVal_NOACCESS) {
7361 if (abits)
7362 abits[i] = 0x00;
7363 } else {
7364 if (abits)
7365 abits[i] = 0xff;
7366 anr++;
7370 return anr;
7374 void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
7376 SVal sv = SVal_NOACCESS;
7377 tl_assert(is_sane_SVal_C(sv));
7378 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
7379 if (LIKELY(szB < 2 * N_LINE_ARANGE))
7380 zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
7381 else
7382 zsm_sset_range_noaccess (a, szB);
7383 Filter__clear_range( thr->filter, a, szB );
7384 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
7387 Thread* libhb_get_Thr_hgthread ( Thr* thr ) {
7388 tl_assert(thr);
7389 return thr->hgthread;
7392 void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) {
7393 tl_assert(thr);
7394 thr->hgthread = hgthread;
7397 void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
7399 zsm_scopy_range(src, dst, len);
7400 Filter__clear_range( thr->filter, dst, len );
7403 void libhb_maybe_GC ( void )
7405 /* GC the unreferenced (zero rc) RCECs when
7406 (1) reaching a significant nr of RCECs (to avoid scanning a contextTab
7407 with mostly NULL ptr)
7408 and (2) approaching the max nr of RCEC (as we have in any case
7409 at least that amount of RCEC in the pool allocator)
7410 Note: the margin allows to avoid a small but constant increase
7411 of the max nr of RCEC due to the fact that libhb_maybe_GC is
7412 not called when the current nr of RCEC exactly reaches the max.
7413 and (3) the nr of referenced RCECs is less than 75% than total nr RCECs.
7414 Avoid growing too much the nr of RCEC keeps the memory use low,
7415 and avoids to have too many elements in the (fixed) contextTab hashtable.
7417 if (UNLIKELY(stats__ctxt_tab_curr > N_RCEC_TAB/2
7418 && stats__ctxt_tab_curr + 1000 >= stats__ctxt_tab_max
7419 && (stats__ctxt_tab_curr * 3)/4 > RCEC_referenced))
7420 do_RCEC_GC();
7422 /* If there are still no entries available (all the table entries are full),
7423 and we hit the threshold point, then do a GC */
7424 Bool vts_tab_GC = vts_tab_freelist == VtsID_INVALID
7425 && VG_(sizeXA)( vts_tab ) >= vts_next_GC_at;
7426 if (UNLIKELY (vts_tab_GC))
7427 vts_tab__do_GC( False/*don't show stats*/ );
7429 /* scan GC the SecMaps when
7430 (1) no SecMap in the freelist
7431 and (2) the current nr of live secmaps exceeds the threshold. */
7432 if (UNLIKELY(SecMap_freelist == NULL
7433 && stats__secmaps_in_map_shmem >= next_SecMap_GC_at)) {
7434 // If we did a vts tab GC, then no need to flush the cache again.
7435 if (!vts_tab_GC)
7436 zsm_flush_cache();
7437 shmem__SecMap_do_GC(True);
7440 /* Check the reference counts (expensive) */
7441 if (CHECK_CEM)
7442 event_map__check_reference_counts();
7446 /////////////////////////////////////////////////////////////////
7447 /////////////////////////////////////////////////////////////////
7448 // //
7449 // SECTION END main library //
7450 // //
7451 /////////////////////////////////////////////////////////////////
7452 /////////////////////////////////////////////////////////////////
7454 /*--------------------------------------------------------------------*/
7455 /*--- end libhb_main.c ---*/
7456 /*--------------------------------------------------------------------*/