tests/vg_regtest: Always evaluate prerequisite expressions with sh
[valgrind.git] / memcheck / mc_main.c
blob712f7d32899e657422c73453809a31e430e7ccee
2 /*--------------------------------------------------------------------*/
3 /*--- MemCheck: Maintain bitmaps of memory, tracking the ---*/
4 /*--- accessibility (A) and validity (V) status of each byte. ---*/
5 /*--- mc_main.c ---*/
6 /*--------------------------------------------------------------------*/
8 /*
9 This file is part of MemCheck, a heavyweight Valgrind tool for
10 detecting memory errors.
12 Copyright (C) 2000-2013 Julian Seward
13 jseward@acm.org
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
30 The GNU General Public License is contained in the file COPYING.
33 #include "pub_tool_basics.h"
34 #include "pub_tool_aspacemgr.h"
35 #include "pub_tool_gdbserver.h"
36 #include "pub_tool_poolalloc.h"
37 #include "pub_tool_hashtable.h" // For mc_include.h
38 #include "pub_tool_libcbase.h"
39 #include "pub_tool_libcassert.h"
40 #include "pub_tool_libcprint.h"
41 #include "pub_tool_machine.h"
42 #include "pub_tool_mallocfree.h"
43 #include "pub_tool_options.h"
44 #include "pub_tool_oset.h"
45 #include "pub_tool_rangemap.h"
46 #include "pub_tool_replacemalloc.h"
47 #include "pub_tool_tooliface.h"
48 #include "pub_tool_threadstate.h"
50 #include "mc_include.h"
51 #include "memcheck.h" /* for client requests */
54 /* Set to 1 to enable handwritten assembly helpers on targets for
55 which it is supported. */
56 #define ENABLE_ASSEMBLY_HELPERS 1
58 /* Set to 1 to do a little more sanity checking */
59 #define VG_DEBUG_MEMORY 0
61 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
63 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
64 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
67 /*------------------------------------------------------------*/
68 /*--- Fast-case knobs ---*/
69 /*------------------------------------------------------------*/
71 // Comment these out to disable the fast cases (don't just set them to zero).
73 #define PERF_FAST_LOADV 1
74 #define PERF_FAST_STOREV 1
76 #define PERF_FAST_SARP 1
78 #define PERF_FAST_STACK 1
79 #define PERF_FAST_STACK2 1
81 /* Change this to 1 to enable assertions on origin tracking cache fast
82 paths */
83 #define OC_ENABLE_ASSERTIONS 0
86 /*------------------------------------------------------------*/
87 /*--- Comments on the origin tracking implementation ---*/
88 /*------------------------------------------------------------*/
90 /* See detailed comment entitled
91 AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
92 which is contained further on in this file. */
95 /*------------------------------------------------------------*/
96 /*--- V bits and A bits ---*/
97 /*------------------------------------------------------------*/
99 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
100 thinks the corresponding value bit is defined. And every memory byte
101 has an A bit, which tracks whether Memcheck thinks the program can access
102 it safely (ie. it's mapped, and has at least one of the RWX permission bits
103 set). So every N-bit register is shadowed with N V bits, and every memory
104 byte is shadowed with 8 V bits and one A bit.
106 In the implementation, we use two forms of compression (compressed V bits
107 and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
108 for memory.
110 Memcheck also tracks extra information about each heap block that is
111 allocated, for detecting memory leaks and other purposes.
114 /*------------------------------------------------------------*/
115 /*--- Basic A/V bitmap representation. ---*/
116 /*------------------------------------------------------------*/
118 /* All reads and writes are checked against a memory map (a.k.a. shadow
119 memory), which records the state of all memory in the process.
121 On 32-bit machines the memory map is organised as follows.
122 The top 16 bits of an address are used to index into a top-level
123 map table, containing 65536 entries. Each entry is a pointer to a
124 second-level map, which records the accesibililty and validity
125 permissions for the 65536 bytes indexed by the lower 16 bits of the
126 address. Each byte is represented by two bits (details are below). So
127 each second-level map contains 16384 bytes. This two-level arrangement
128 conveniently divides the 4G address space into 64k lumps, each size 64k
129 bytes.
131 All entries in the primary (top-level) map must point to a valid
132 secondary (second-level) map. Since many of the 64kB chunks will
133 have the same status for every bit -- ie. noaccess (for unused
134 address space) or entirely addressable and defined (for code segments) --
135 there are three distinguished secondary maps, which indicate 'noaccess',
136 'undefined' and 'defined'. For these uniform 64kB chunks, the primary
137 map entry points to the relevant distinguished map. In practice,
138 typically more than half of the addressable memory is represented with
139 the 'undefined' or 'defined' distinguished secondary map, so it gives a
140 good saving. It also lets us set the V+A bits of large address regions
141 quickly in set_address_range_perms().
143 On 64-bit machines it's more complicated. If we followed the same basic
144 scheme we'd have a four-level table which would require too many memory
145 accesses. So instead the top-level map table has 2^20 entries (indexed
146 using bits 16..35 of the address); this covers the bottom 64GB. Any
147 accesses above 64GB are handled with a slow, sparse auxiliary table.
148 Valgrind's address space manager tries very hard to keep things below
149 this 64GB barrier so that performance doesn't suffer too much.
151 Note that this file has a lot of different functions for reading and
152 writing shadow memory. Only a couple are strictly necessary (eg.
153 get_vabits2 and set_vabits2), most are just specialised for specific
154 common cases to improve performance.
156 Aside: the V+A bits are less precise than they could be -- we have no way
157 of marking memory as read-only. It would be great if we could add an
158 extra state VA_BITSn_READONLY. But then we'd have 5 different states,
159 which requires 2.3 bits to hold, and there's no way to do that elegantly
160 -- we'd have to double up to 4 bits of metadata per byte, which doesn't
161 seem worth it.
164 /* --------------- Basic configuration --------------- */
166 /* Only change this. N_PRIMARY_MAP *must* be a power of 2. */
168 #if VG_WORDSIZE == 4
170 /* cover the entire address space */
171 # define N_PRIMARY_BITS 16
173 #else
175 /* Just handle the first 64G fast and the rest via auxiliary
176 primaries. If you change this, Memcheck will assert at startup.
177 See the definition of UNALIGNED_OR_HIGH for extensive comments. */
178 # define N_PRIMARY_BITS 20
180 #endif
183 /* Do not change this. */
184 #define N_PRIMARY_MAP ( ((UWord)1) << N_PRIMARY_BITS)
186 /* Do not change this. */
187 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
190 /* --------------- Secondary maps --------------- */
192 // Each byte of memory conceptually has an A bit, which indicates its
193 // addressability, and 8 V bits, which indicates its definedness.
195 // But because very few bytes are partially defined, we can use a nice
196 // compression scheme to reduce the size of shadow memory. Each byte of
197 // memory has 2 bits which indicates its state (ie. V+A bits):
199 // 00: noaccess (unaddressable but treated as fully defined)
200 // 01: undefined (addressable and fully undefined)
201 // 10: defined (addressable and fully defined)
202 // 11: partdefined (addressable and partially defined)
204 // In the "partdefined" case, we use a secondary table to store the V bits.
205 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
206 // bits.
208 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
209 // four bytes (32 bits) of memory are in each chunk. Hence the name
210 // "vabits8". This lets us get the V+A bits for four bytes at a time
211 // easily (without having to do any shifting and/or masking), and that is a
212 // very common operation. (Note that although each vabits8 chunk
213 // is 8 bits in size, it represents 32 bits of memory.)
215 // The representation is "inverse" little-endian... each 4 bytes of
216 // memory is represented by a 1 byte value, where:
218 // - the status of byte (a+0) is held in bits [1..0]
219 // - the status of byte (a+1) is held in bits [3..2]
220 // - the status of byte (a+2) is held in bits [5..4]
221 // - the status of byte (a+3) is held in bits [7..6]
223 // It's "inverse" because endianness normally describes a mapping from
224 // value bits to memory addresses; in this case the mapping is inverted.
225 // Ie. instead of particular value bits being held in certain addresses, in
226 // this case certain addresses are represented by particular value bits.
227 // See insert_vabits2_into_vabits8() for an example.
229 // But note that we don't compress the V bits stored in registers; they
230 // need to be explicit to made the shadow operations possible. Therefore
231 // when moving values between registers and memory we need to convert
232 // between the expanded in-register format and the compressed in-memory
233 // format. This isn't so difficult, it just requires careful attention in a
234 // few places.
236 // These represent eight bits of memory.
237 #define VA_BITS2_NOACCESS 0x0 // 00b
238 #define VA_BITS2_UNDEFINED 0x1 // 01b
239 #define VA_BITS2_DEFINED 0x2 // 10b
240 #define VA_BITS2_PARTDEFINED 0x3 // 11b
242 // These represent 16 bits of memory.
243 #define VA_BITS4_NOACCESS 0x0 // 00_00b
244 #define VA_BITS4_UNDEFINED 0x5 // 01_01b
245 #define VA_BITS4_DEFINED 0xa // 10_10b
247 // These represent 32 bits of memory.
248 #define VA_BITS8_NOACCESS 0x00 // 00_00_00_00b
249 #define VA_BITS8_UNDEFINED 0x55 // 01_01_01_01b
250 #define VA_BITS8_DEFINED 0xaa // 10_10_10_10b
252 // These represent 64 bits of memory.
253 #define VA_BITS16_NOACCESS 0x0000 // 00_00_00_00b x 2
254 #define VA_BITS16_UNDEFINED 0x5555 // 01_01_01_01b x 2
255 #define VA_BITS16_DEFINED 0xaaaa // 10_10_10_10b x 2
258 #define SM_CHUNKS 16384 // Each SM covers 64k of memory.
259 #define SM_OFF(aaa) (((aaa) & 0xffff) >> 2)
260 #define SM_OFF_16(aaa) (((aaa) & 0xffff) >> 3)
262 // Paranoia: it's critical for performance that the requested inlining
263 // occurs. So try extra hard.
264 #define INLINE inline __attribute__((always_inline))
266 static INLINE Addr start_of_this_sm ( Addr a ) {
267 return (a & (~SM_MASK));
269 static INLINE Bool is_start_of_sm ( Addr a ) {
270 return (start_of_this_sm(a) == a);
273 typedef
274 struct {
275 UChar vabits8[SM_CHUNKS];
277 SecMap;
279 // 3 distinguished secondary maps, one for no-access, one for
280 // accessible but undefined, and one for accessible and defined.
281 // Distinguished secondaries may never be modified.
282 #define SM_DIST_NOACCESS 0
283 #define SM_DIST_UNDEFINED 1
284 #define SM_DIST_DEFINED 2
286 static SecMap sm_distinguished[3];
288 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
289 return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
292 // Forward declaration
293 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
295 /* dist_sm points to one of our three distinguished secondaries. Make
296 a copy of it so that we can write to it.
298 static SecMap* copy_for_writing ( SecMap* dist_sm )
300 SecMap* new_sm;
301 tl_assert(dist_sm == &sm_distinguished[0]
302 || dist_sm == &sm_distinguished[1]
303 || dist_sm == &sm_distinguished[2]);
305 new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
306 if (new_sm == NULL)
307 VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
308 sizeof(SecMap) );
309 VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
310 update_SM_counts(dist_sm, new_sm);
311 return new_sm;
314 /* --------------- Stats --------------- */
316 static Int n_issued_SMs = 0;
317 static Int n_deissued_SMs = 0;
318 static Int n_noaccess_SMs = N_PRIMARY_MAP; // start with many noaccess DSMs
319 static Int n_undefined_SMs = 0;
320 static Int n_defined_SMs = 0;
321 static Int n_non_DSM_SMs = 0;
322 static Int max_noaccess_SMs = 0;
323 static Int max_undefined_SMs = 0;
324 static Int max_defined_SMs = 0;
325 static Int max_non_DSM_SMs = 0;
327 /* # searches initiated in auxmap_L1, and # base cmps required */
328 static ULong n_auxmap_L1_searches = 0;
329 static ULong n_auxmap_L1_cmps = 0;
330 /* # of searches that missed in auxmap_L1 and therefore had to
331 be handed to auxmap_L2. And the number of nodes inserted. */
332 static ULong n_auxmap_L2_searches = 0;
333 static ULong n_auxmap_L2_nodes = 0;
335 static Int n_sanity_cheap = 0;
336 static Int n_sanity_expensive = 0;
338 static Int n_secVBit_nodes = 0;
339 static Int max_secVBit_nodes = 0;
341 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
343 if (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
344 else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
345 else if (oldSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs --;
346 else { n_non_DSM_SMs --;
347 n_deissued_SMs ++; }
349 if (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
350 else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
351 else if (newSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs ++;
352 else { n_non_DSM_SMs ++;
353 n_issued_SMs ++; }
355 if (n_noaccess_SMs > max_noaccess_SMs ) max_noaccess_SMs = n_noaccess_SMs;
356 if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
357 if (n_defined_SMs > max_defined_SMs ) max_defined_SMs = n_defined_SMs;
358 if (n_non_DSM_SMs > max_non_DSM_SMs ) max_non_DSM_SMs = n_non_DSM_SMs;
361 /* --------------- Primary maps --------------- */
363 /* The main primary map. This covers some initial part of the address
364 space, addresses 0 .. (N_PRIMARY_MAP << 16)-1. The rest of it is
365 handled using the auxiliary primary map.
367 static SecMap* primary_map[N_PRIMARY_MAP];
370 /* An entry in the auxiliary primary map. base must be a 64k-aligned
371 value, and sm points at the relevant secondary map. As with the
372 main primary map, the secondary may be either a real secondary, or
373 one of the three distinguished secondaries. DO NOT CHANGE THIS
374 LAYOUT: the first word has to be the key for OSet fast lookups.
376 typedef
377 struct {
378 Addr base;
379 SecMap* sm;
381 AuxMapEnt;
383 /* Tunable parameter: How big is the L1 queue? */
384 #define N_AUXMAP_L1 24
386 /* Tunable parameter: How far along the L1 queue to insert
387 entries resulting from L2 lookups? */
388 #define AUXMAP_L1_INSERT_IX 12
390 static struct {
391 Addr base;
392 AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
394 auxmap_L1[N_AUXMAP_L1];
396 static OSet* auxmap_L2 = NULL;
398 static void init_auxmap_L1_L2 ( void )
400 Int i;
401 for (i = 0; i < N_AUXMAP_L1; i++) {
402 auxmap_L1[i].base = 0;
403 auxmap_L1[i].ent = NULL;
406 tl_assert(0 == offsetof(AuxMapEnt,base));
407 tl_assert(sizeof(Addr) == sizeof(void*));
408 auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/ offsetof(AuxMapEnt,base),
409 /*fastCmp*/ NULL,
410 VG_(malloc), "mc.iaLL.1", VG_(free) );
413 /* Check representation invariants; if OK return NULL; else a
414 descriptive bit of text. Also return the number of
415 non-distinguished secondary maps referred to from the auxiliary
416 primary maps. */
418 static const HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
420 Word i, j;
421 /* On a 32-bit platform, the L2 and L1 tables should
422 both remain empty forever.
424 On a 64-bit platform:
425 In the L2 table:
426 all .base & 0xFFFF == 0
427 all .base > MAX_PRIMARY_ADDRESS
428 In the L1 table:
429 all .base & 0xFFFF == 0
430 all (.base > MAX_PRIMARY_ADDRESS
431 .base & 0xFFFF == 0
432 and .ent points to an AuxMapEnt with the same .base)
434 (.base == 0 and .ent == NULL)
436 *n_secmaps_found = 0;
437 if (sizeof(void*) == 4) {
438 /* 32-bit platform */
439 if (VG_(OSetGen_Size)(auxmap_L2) != 0)
440 return "32-bit: auxmap_L2 is non-empty";
441 for (i = 0; i < N_AUXMAP_L1; i++)
442 if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
443 return "32-bit: auxmap_L1 is non-empty";
444 } else {
445 /* 64-bit platform */
446 UWord elems_seen = 0;
447 AuxMapEnt *elem, *res;
448 AuxMapEnt key;
449 /* L2 table */
450 VG_(OSetGen_ResetIter)(auxmap_L2);
451 while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
452 elems_seen++;
453 if (0 != (elem->base & (Addr)0xFFFF))
454 return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
455 if (elem->base <= MAX_PRIMARY_ADDRESS)
456 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
457 if (elem->sm == NULL)
458 return "64-bit: .sm in _L2 is NULL";
459 if (!is_distinguished_sm(elem->sm))
460 (*n_secmaps_found)++;
462 if (elems_seen != n_auxmap_L2_nodes)
463 return "64-bit: disagreement on number of elems in _L2";
464 /* Check L1-L2 correspondence */
465 for (i = 0; i < N_AUXMAP_L1; i++) {
466 if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
467 continue;
468 if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
469 return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
470 if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
471 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
472 if (auxmap_L1[i].ent == NULL)
473 return "64-bit: .ent is NULL in auxmap_L1";
474 if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
475 return "64-bit: _L1 and _L2 bases are inconsistent";
476 /* Look it up in auxmap_L2. */
477 key.base = auxmap_L1[i].base;
478 key.sm = 0;
479 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
480 if (res == NULL)
481 return "64-bit: _L1 .base not found in _L2";
482 if (res != auxmap_L1[i].ent)
483 return "64-bit: _L1 .ent disagrees with _L2 entry";
485 /* Check L1 contains no duplicates */
486 for (i = 0; i < N_AUXMAP_L1; i++) {
487 if (auxmap_L1[i].base == 0)
488 continue;
489 for (j = i+1; j < N_AUXMAP_L1; j++) {
490 if (auxmap_L1[j].base == 0)
491 continue;
492 if (auxmap_L1[j].base == auxmap_L1[i].base)
493 return "64-bit: duplicate _L1 .base entries";
497 return NULL; /* ok */
500 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
502 Word i;
503 tl_assert(ent);
504 tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
505 for (i = N_AUXMAP_L1-1; i > rank; i--)
506 auxmap_L1[i] = auxmap_L1[i-1];
507 auxmap_L1[rank].base = ent->base;
508 auxmap_L1[rank].ent = ent;
511 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
513 AuxMapEnt key;
514 AuxMapEnt* res;
515 Word i;
517 tl_assert(a > MAX_PRIMARY_ADDRESS);
518 a &= ~(Addr)0xFFFF;
520 /* First search the front-cache, which is a self-organising
521 list containing the most popular entries. */
523 if (LIKELY(auxmap_L1[0].base == a))
524 return auxmap_L1[0].ent;
525 if (LIKELY(auxmap_L1[1].base == a)) {
526 Addr t_base = auxmap_L1[0].base;
527 AuxMapEnt* t_ent = auxmap_L1[0].ent;
528 auxmap_L1[0].base = auxmap_L1[1].base;
529 auxmap_L1[0].ent = auxmap_L1[1].ent;
530 auxmap_L1[1].base = t_base;
531 auxmap_L1[1].ent = t_ent;
532 return auxmap_L1[0].ent;
535 n_auxmap_L1_searches++;
537 for (i = 0; i < N_AUXMAP_L1; i++) {
538 if (auxmap_L1[i].base == a) {
539 break;
542 tl_assert(i >= 0 && i <= N_AUXMAP_L1);
544 n_auxmap_L1_cmps += (ULong)(i+1);
546 if (i < N_AUXMAP_L1) {
547 if (i > 0) {
548 Addr t_base = auxmap_L1[i-1].base;
549 AuxMapEnt* t_ent = auxmap_L1[i-1].ent;
550 auxmap_L1[i-1].base = auxmap_L1[i-0].base;
551 auxmap_L1[i-1].ent = auxmap_L1[i-0].ent;
552 auxmap_L1[i-0].base = t_base;
553 auxmap_L1[i-0].ent = t_ent;
554 i--;
556 return auxmap_L1[i].ent;
559 n_auxmap_L2_searches++;
561 /* First see if we already have it. */
562 key.base = a;
563 key.sm = 0;
565 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
566 if (res)
567 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
568 return res;
571 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
573 AuxMapEnt *nyu, *res;
575 /* First see if we already have it. */
576 res = maybe_find_in_auxmap( a );
577 if (LIKELY(res))
578 return res;
580 /* Ok, there's no entry in the secondary map, so we'll have
581 to allocate one. */
582 a &= ~(Addr)0xFFFF;
584 nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
585 nyu->base = a;
586 nyu->sm = &sm_distinguished[SM_DIST_NOACCESS];
587 VG_(OSetGen_Insert)( auxmap_L2, nyu );
588 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
589 n_auxmap_L2_nodes++;
590 return nyu;
593 /* --------------- SecMap fundamentals --------------- */
595 // In all these, 'low' means it's definitely in the main primary map,
596 // 'high' means it's definitely in the auxiliary table.
598 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
600 UWord pm_off = a >> 16;
601 # if VG_DEBUG_MEMORY >= 1
602 tl_assert(pm_off < N_PRIMARY_MAP);
603 # endif
604 return &primary_map[ pm_off ];
607 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
609 AuxMapEnt* am = find_or_alloc_in_auxmap(a);
610 return &am->sm;
613 static INLINE SecMap** get_secmap_ptr ( Addr a )
615 return ( a <= MAX_PRIMARY_ADDRESS
616 ? get_secmap_low_ptr(a)
617 : get_secmap_high_ptr(a));
620 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
622 return *get_secmap_low_ptr(a);
625 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
627 return *get_secmap_high_ptr(a);
630 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
632 SecMap** p = get_secmap_low_ptr(a);
633 if (UNLIKELY(is_distinguished_sm(*p)))
634 *p = copy_for_writing(*p);
635 return *p;
638 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
640 SecMap** p = get_secmap_high_ptr(a);
641 if (UNLIKELY(is_distinguished_sm(*p)))
642 *p = copy_for_writing(*p);
643 return *p;
646 /* Produce the secmap for 'a', either from the primary map or by
647 ensuring there is an entry for it in the aux primary map. The
648 secmap may be a distinguished one as the caller will only want to
649 be able to read it.
651 static INLINE SecMap* get_secmap_for_reading ( Addr a )
653 return ( a <= MAX_PRIMARY_ADDRESS
654 ? get_secmap_for_reading_low (a)
655 : get_secmap_for_reading_high(a) );
658 /* Produce the secmap for 'a', either from the primary map or by
659 ensuring there is an entry for it in the aux primary map. The
660 secmap may not be a distinguished one, since the caller will want
661 to be able to write it. If it is a distinguished secondary, make a
662 writable copy of it, install it, and return the copy instead. (COW
663 semantics).
665 static INLINE SecMap* get_secmap_for_writing ( Addr a )
667 return ( a <= MAX_PRIMARY_ADDRESS
668 ? get_secmap_for_writing_low (a)
669 : get_secmap_for_writing_high(a) );
672 /* If 'a' has a SecMap, produce it. Else produce NULL. But don't
673 allocate one if one doesn't already exist. This is used by the
674 leak checker.
676 static SecMap* maybe_get_secmap_for ( Addr a )
678 if (a <= MAX_PRIMARY_ADDRESS) {
679 return get_secmap_for_reading_low(a);
680 } else {
681 AuxMapEnt* am = maybe_find_in_auxmap(a);
682 return am ? am->sm : NULL;
686 /* --------------- Fundamental functions --------------- */
688 static INLINE
689 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
691 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
692 *vabits8 &= ~(0x3 << shift); // mask out the two old bits
693 *vabits8 |= (vabits2 << shift); // mask in the two new bits
696 static INLINE
697 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
699 UInt shift;
700 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
701 shift = (a & 2) << 1; // shift by 0 or 4
702 *vabits8 &= ~(0xf << shift); // mask out the four old bits
703 *vabits8 |= (vabits4 << shift); // mask in the four new bits
706 static INLINE
707 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
709 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
710 vabits8 >>= shift; // shift the two bits to the bottom
711 return 0x3 & vabits8; // mask out the rest
714 static INLINE
715 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
717 UInt shift;
718 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
719 shift = (a & 2) << 1; // shift by 0 or 4
720 vabits8 >>= shift; // shift the four bits to the bottom
721 return 0xf & vabits8; // mask out the rest
724 // Note that these four are only used in slow cases. The fast cases do
725 // clever things like combine the auxmap check (in
726 // get_secmap_{read,writ}able) with alignment checks.
728 // *** WARNING! ***
729 // Any time this function is called, if it is possible that vabits2
730 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
731 // sec-V-bits table must also be set!
732 static INLINE
733 void set_vabits2 ( Addr a, UChar vabits2 )
735 SecMap* sm = get_secmap_for_writing(a);
736 UWord sm_off = SM_OFF(a);
737 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
740 static INLINE
741 UChar get_vabits2 ( Addr a )
743 SecMap* sm = get_secmap_for_reading(a);
744 UWord sm_off = SM_OFF(a);
745 UChar vabits8 = sm->vabits8[sm_off];
746 return extract_vabits2_from_vabits8(a, vabits8);
749 // *** WARNING! ***
750 // Any time this function is called, if it is possible that any of the
751 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
752 // corresponding entry(s) in the sec-V-bits table must also be set!
753 static INLINE
754 UChar get_vabits8_for_aligned_word32 ( Addr a )
756 SecMap* sm = get_secmap_for_reading(a);
757 UWord sm_off = SM_OFF(a);
758 UChar vabits8 = sm->vabits8[sm_off];
759 return vabits8;
762 static INLINE
763 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
765 SecMap* sm = get_secmap_for_writing(a);
766 UWord sm_off = SM_OFF(a);
767 sm->vabits8[sm_off] = vabits8;
771 // Forward declarations
772 static UWord get_sec_vbits8(Addr a);
773 static void set_sec_vbits8(Addr a, UWord vbits8);
775 // Returns False if there was an addressability error.
776 static INLINE
777 Bool set_vbits8 ( Addr a, UChar vbits8 )
779 Bool ok = True;
780 UChar vabits2 = get_vabits2(a);
781 if ( VA_BITS2_NOACCESS != vabits2 ) {
782 // Addressable. Convert in-register format to in-memory format.
783 // Also remove any existing sec V bit entry for the byte if no
784 // longer necessary.
785 if ( V_BITS8_DEFINED == vbits8 ) { vabits2 = VA_BITS2_DEFINED; }
786 else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
787 else { vabits2 = VA_BITS2_PARTDEFINED;
788 set_sec_vbits8(a, vbits8); }
789 set_vabits2(a, vabits2);
791 } else {
792 // Unaddressable! Do nothing -- when writing to unaddressable
793 // memory it acts as a black hole, and the V bits can never be seen
794 // again. So we don't have to write them at all.
795 ok = False;
797 return ok;
800 // Returns False if there was an addressability error. In that case, we put
801 // all defined bits into vbits8.
802 static INLINE
803 Bool get_vbits8 ( Addr a, UChar* vbits8 )
805 Bool ok = True;
806 UChar vabits2 = get_vabits2(a);
808 // Convert the in-memory format to in-register format.
809 if ( VA_BITS2_DEFINED == vabits2 ) { *vbits8 = V_BITS8_DEFINED; }
810 else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
811 else if ( VA_BITS2_NOACCESS == vabits2 ) {
812 *vbits8 = V_BITS8_DEFINED; // Make V bits defined!
813 ok = False;
814 } else {
815 tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
816 *vbits8 = get_sec_vbits8(a);
818 return ok;
822 /* --------------- Secondary V bit table ------------ */
824 // This table holds the full V bit pattern for partially-defined bytes
825 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
826 // memory.
828 // Note: the nodes in this table can become stale. Eg. if you write a PDB,
829 // then overwrite the same address with a fully defined byte, the sec-V-bit
830 // node will not necessarily be removed. This is because checking for
831 // whether removal is necessary would slow down the fast paths.
833 // To avoid the stale nodes building up too much, we periodically (once the
834 // table reaches a certain size) garbage collect (GC) the table by
835 // traversing it and evicting any nodes not having PDB.
836 // If more than a certain proportion of nodes survived, we increase the
837 // table size so that GCs occur less often.
839 // This policy is designed to avoid bad table bloat in the worst case where
840 // a program creates huge numbers of stale PDBs -- we would get this bloat
841 // if we had no GC -- while handling well the case where a node becomes
842 // stale but shortly afterwards is rewritten with a PDB and so becomes
843 // non-stale again (which happens quite often, eg. in perf/bz2). If we just
844 // remove all stale nodes as soon as possible, we just end up re-adding a
845 // lot of them in later again. The "sufficiently stale" approach avoids
846 // this. (If a program has many live PDBs, performance will just suck,
847 // there's no way around that.)
849 // Further comments, JRS 14 Feb 2012. It turns out that the policy of
850 // holding on to stale entries for 2 GCs before discarding them can lead
851 // to massive space leaks. So we're changing to an arrangement where
852 // lines are evicted as soon as they are observed to be stale during a
853 // GC. This also has a side benefit of allowing the sufficiently_stale
854 // field to be removed from the SecVBitNode struct, reducing its size by
855 // 8 bytes, which is a substantial space saving considering that the
856 // struct was previously 32 or so bytes, on a 64 bit target.
858 // In order to try and mitigate the problem that the "sufficiently stale"
859 // heuristic was designed to avoid, the table size is allowed to drift
860 // up ("DRIFTUP") slowly to 80000, even if the residency is low. This
861 // means that nodes will exist in the table longer on average, and hopefully
862 // will be deleted and re-added less frequently.
864 // The previous scaling up mechanism (now called STEPUP) is retained:
865 // if residency exceeds 50%, the table is scaled up, although by a
866 // factor sqrt(2) rather than 2 as before. This effectively doubles the
867 // frequency of GCs when there are many PDBs at reduces the tendency of
868 // stale PDBs to reside for long periods in the table.
870 static OSet* secVBitTable;
872 // Stats
873 static ULong sec_vbits_new_nodes = 0;
874 static ULong sec_vbits_updates = 0;
876 // This must be a power of two; this is checked in mc_pre_clo_init().
877 // The size chosen here is a trade-off: if the nodes are bigger (ie. cover
878 // a larger address range) they take more space but we can get multiple
879 // partially-defined bytes in one if they are close to each other, reducing
880 // the number of total nodes. In practice sometimes they are clustered (eg.
881 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
882 // row), but often not. So we choose something intermediate.
883 #define BYTES_PER_SEC_VBIT_NODE 16
885 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
886 // more than this many nodes survive a GC.
887 #define STEPUP_SURVIVOR_PROPORTION 0.5
888 #define STEPUP_GROWTH_FACTOR 1.414213562
890 // If the above heuristic doesn't apply, then we may make the table
891 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
892 // this many nodes survive a GC, _and_ the total table size does
893 // not exceed a fixed limit. The numbers are somewhat arbitrary, but
894 // work tolerably well on long Firefox runs. The scaleup ratio of 1.5%
895 // effectively although gradually reduces residency and increases time
896 // between GCs for programs with small numbers of PDBs. The 80000 limit
897 // effectively limits the table size to around 2MB for programs with
898 // small numbers of PDBs, whilst giving a reasonably long lifetime to
899 // entries, to try and reduce the costs resulting from deleting and
900 // re-adding of entries.
901 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
902 #define DRIFTUP_GROWTH_FACTOR 1.015
903 #define DRIFTUP_MAX_SIZE 80000
905 // We GC the table when it gets this many nodes in it, ie. it's effectively
906 // the table size. It can change.
907 static Int secVBitLimit = 1000;
909 // The number of GCs done, used to age sec-V-bit nodes for eviction.
910 // Because it's unsigned, wrapping doesn't matter -- the right answer will
911 // come out anyway.
912 static UInt GCs_done = 0;
914 typedef
915 struct {
916 Addr a;
917 UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
919 SecVBitNode;
921 static OSet* createSecVBitTable(void)
923 OSet* newSecVBitTable;
924 newSecVBitTable = VG_(OSetGen_Create_With_Pool)
925 ( offsetof(SecVBitNode, a),
926 NULL, // use fast comparisons
927 VG_(malloc), "mc.cSVT.1 (sec VBit table)",
928 VG_(free),
929 1000,
930 sizeof(SecVBitNode));
931 return newSecVBitTable;
934 static void gcSecVBitTable(void)
936 OSet* secVBitTable2;
937 SecVBitNode* n;
938 Int i, n_nodes = 0, n_survivors = 0;
940 GCs_done++;
942 // Create the new table.
943 secVBitTable2 = createSecVBitTable();
945 // Traverse the table, moving fresh nodes into the new table.
946 VG_(OSetGen_ResetIter)(secVBitTable);
947 while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
948 // Keep node if any of its bytes are non-stale. Using
949 // get_vabits2() for the lookup is not very efficient, but I don't
950 // think it matters.
951 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
952 if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
953 // Found a non-stale byte, so keep =>
954 // Insert a copy of the node into the new table.
955 SecVBitNode* n2 =
956 VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
957 *n2 = *n;
958 VG_(OSetGen_Insert)(secVBitTable2, n2);
959 break;
964 // Get the before and after sizes.
965 n_nodes = VG_(OSetGen_Size)(secVBitTable);
966 n_survivors = VG_(OSetGen_Size)(secVBitTable2);
968 // Destroy the old table, and put the new one in its place.
969 VG_(OSetGen_Destroy)(secVBitTable);
970 secVBitTable = secVBitTable2;
972 if (VG_(clo_verbosity) > 1 && n_nodes != 0) {
973 VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%.1f%%)\n",
974 n_nodes, n_survivors, n_survivors * 100.0 / n_nodes);
977 // Increase table size if necessary.
978 if ((Double)n_survivors
979 > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) {
980 secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR);
981 if (VG_(clo_verbosity) > 1)
982 VG_(message)(Vg_DebugMsg,
983 "memcheck GC: %d new table size (stepup)\n",
984 secVBitLimit);
986 else
987 if (secVBitLimit < DRIFTUP_MAX_SIZE
988 && (Double)n_survivors
989 > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) {
990 secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR);
991 if (VG_(clo_verbosity) > 1)
992 VG_(message)(Vg_DebugMsg,
993 "memcheck GC: %d new table size (driftup)\n",
994 secVBitLimit);
998 static UWord get_sec_vbits8(Addr a)
1000 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1001 Int amod = a % BYTES_PER_SEC_VBIT_NODE;
1002 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1003 UChar vbits8;
1004 tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
1005 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1006 // make it to the secondary V bits table.
1007 vbits8 = n->vbits8[amod];
1008 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1009 return vbits8;
1012 static void set_sec_vbits8(Addr a, UWord vbits8)
1014 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1015 Int i, amod = a % BYTES_PER_SEC_VBIT_NODE;
1016 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1017 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1018 // make it to the secondary V bits table.
1019 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1020 if (n) {
1021 n->vbits8[amod] = vbits8; // update
1022 sec_vbits_updates++;
1023 } else {
1024 // Do a table GC if necessary. Nb: do this before creating and
1025 // inserting the new node, to avoid erroneously GC'ing the new node.
1026 if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
1027 gcSecVBitTable();
1030 // New node: assign the specific byte, make the rest invalid (they
1031 // should never be read as-is, but be cautious).
1032 n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
1033 n->a = aAligned;
1034 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
1035 n->vbits8[i] = V_BITS8_UNDEFINED;
1037 n->vbits8[amod] = vbits8;
1039 // Insert the new node.
1040 VG_(OSetGen_Insert)(secVBitTable, n);
1041 sec_vbits_new_nodes++;
1043 n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
1044 if (n_secVBit_nodes > max_secVBit_nodes)
1045 max_secVBit_nodes = n_secVBit_nodes;
1049 /* --------------- Endianness helpers --------------- */
1051 /* Returns the offset in memory of the byteno-th most significant byte
1052 in a wordszB-sized word, given the specified endianness. */
1053 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
1054 UWord byteno ) {
1055 return bigendian ? (wordszB-1-byteno) : byteno;
1059 /* --------------- Ignored address ranges --------------- */
1061 /* Denotes the address-error-reportability status for address ranges:
1062 IAR_NotIgnored: the usual case -- report errors in this range
1063 IAR_CommandLine: don't report errors -- from command line setting
1064 IAR_ClientReq: don't report errors -- from client request
1066 typedef
1067 enum { IAR_INVALID=99,
1068 IAR_NotIgnored,
1069 IAR_CommandLine,
1070 IAR_ClientReq }
1071 IARKind;
1073 static const HChar* showIARKind ( IARKind iark )
1075 switch (iark) {
1076 case IAR_INVALID: return "INVALID";
1077 case IAR_NotIgnored: return "NotIgnored";
1078 case IAR_CommandLine: return "CommandLine";
1079 case IAR_ClientReq: return "ClientReq";
1080 default: return "???";
1084 // RangeMap<IARKind>
1085 static RangeMap* gIgnoredAddressRanges = NULL;
1087 static void init_gIgnoredAddressRanges ( void )
1089 if (LIKELY(gIgnoredAddressRanges != NULL))
1090 return;
1091 gIgnoredAddressRanges = VG_(newRangeMap)( VG_(malloc), "mc.igIAR.1",
1092 VG_(free), IAR_NotIgnored );
1095 Bool MC_(in_ignored_range) ( Addr a )
1097 if (LIKELY(gIgnoredAddressRanges == NULL))
1098 return False;
1099 UWord how = IAR_INVALID;
1100 UWord key_min = ~(UWord)0;
1101 UWord key_max = (UWord)0;
1102 VG_(lookupRangeMap)(&key_min, &key_max, &how, gIgnoredAddressRanges, a);
1103 tl_assert(key_min <= a && a <= key_max);
1104 switch (how) {
1105 case IAR_NotIgnored: return False;
1106 case IAR_CommandLine: return True;
1107 case IAR_ClientReq: return True;
1108 default: break; /* invalid */
1110 VG_(tool_panic)("MC_(in_ignore_range)");
1111 /*NOTREACHED*/
1114 /* Parse two Addr separated by a dash, or fail. */
1116 static Bool parse_range ( const HChar** ppc, Addr* result1, Addr* result2 )
1118 Bool ok = VG_(parse_Addr) (ppc, result1);
1119 if (!ok)
1120 return False;
1121 if (**ppc != '-')
1122 return False;
1123 (*ppc)++;
1124 ok = VG_(parse_Addr) (ppc, result2);
1125 if (!ok)
1126 return False;
1127 return True;
1130 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1131 fail. If they are valid, add them to the global set of ignored
1132 ranges. */
1133 static Bool parse_ignore_ranges ( const HChar* str0 )
1135 init_gIgnoredAddressRanges();
1136 const HChar* str = str0;
1137 const HChar** ppc = &str;
1138 while (1) {
1139 Addr start = ~(Addr)0;
1140 Addr end = (Addr)0;
1141 Bool ok = parse_range(ppc, &start, &end);
1142 if (!ok)
1143 return False;
1144 if (start > end)
1145 return False;
1146 VG_(bindRangeMap)( gIgnoredAddressRanges, start, end, IAR_CommandLine );
1147 if (**ppc == 0)
1148 return True;
1149 if (**ppc != ',')
1150 return False;
1151 (*ppc)++;
1153 /*NOTREACHED*/
1154 return False;
1157 /* Add or remove [start, +len) from the set of ignored ranges. */
1158 static Bool modify_ignore_ranges ( Bool addRange, Addr start, Addr len )
1160 init_gIgnoredAddressRanges();
1161 const Bool verbose = (VG_(clo_verbosity) > 1);
1162 if (len == 0) {
1163 return False;
1165 if (addRange) {
1166 VG_(bindRangeMap)(gIgnoredAddressRanges,
1167 start, start+len-1, IAR_ClientReq);
1168 if (verbose)
1169 VG_(dmsg)("memcheck: modify_ignore_ranges: add %p %p\n",
1170 (void*)start, (void*)(start+len-1));
1171 } else {
1172 VG_(bindRangeMap)(gIgnoredAddressRanges,
1173 start, start+len-1, IAR_NotIgnored);
1174 if (verbose)
1175 VG_(dmsg)("memcheck: modify_ignore_ranges: del %p %p\n",
1176 (void*)start, (void*)(start+len-1));
1178 if (verbose) {
1179 VG_(dmsg)("memcheck: now have %ld ranges:\n",
1180 VG_(sizeRangeMap)(gIgnoredAddressRanges));
1181 Word i;
1182 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
1183 UWord val = IAR_INVALID;
1184 UWord key_min = ~(UWord)0;
1185 UWord key_max = (UWord)0;
1186 VG_(indexRangeMap)( &key_min, &key_max, &val,
1187 gIgnoredAddressRanges, i );
1188 VG_(dmsg)("memcheck: [%ld] %016llx-%016llx %s\n",
1189 i, (ULong)key_min, (ULong)key_max, showIARKind(val));
1192 return True;
1196 /* --------------- Load/store slow cases. --------------- */
1198 static
1199 __attribute__((noinline))
1200 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res,
1201 Addr a, SizeT nBits, Bool bigendian )
1203 ULong pessim[4]; /* only used when p-l-ok=yes */
1204 SSizeT szB = nBits / 8;
1205 SSizeT szL = szB / 8; /* Size in Longs (64-bit units) */
1206 SSizeT i, j; /* Must be signed. */
1207 SizeT n_addrs_bad = 0;
1208 Addr ai;
1209 UChar vbits8;
1210 Bool ok;
1212 /* Code below assumes load size is a power of two and at least 64
1213 bits. */
1214 tl_assert((szB & (szB-1)) == 0 && szL > 0);
1216 /* If this triggers, you probably just need to increase the size of
1217 the pessim array. */
1218 tl_assert(szL <= sizeof(pessim) / sizeof(pessim[0]));
1220 for (j = 0; j < szL; j++) {
1221 pessim[j] = V_BITS64_DEFINED;
1222 res[j] = V_BITS64_UNDEFINED;
1225 /* Make up a result V word, which contains the loaded data for
1226 valid addresses and Defined for invalid addresses. Iterate over
1227 the bytes in the word, from the most significant down to the
1228 least. The vbits to return are calculated into vbits128. Also
1229 compute the pessimising value to be used when
1230 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1231 info can be gleaned from the pessim array) but is used as a
1232 cross-check. */
1233 for (j = szL-1; j >= 0; j--) {
1234 ULong vbits64 = V_BITS64_UNDEFINED;
1235 ULong pessim64 = V_BITS64_DEFINED;
1236 UWord long_index = byte_offset_w(szL, bigendian, j);
1237 for (i = 8-1; i >= 0; i--) {
1238 PROF_EVENT(29, "mc_LOADV_128_or_256_slow(loop)");
1239 ai = a + 8*long_index + byte_offset_w(8, bigendian, i);
1240 ok = get_vbits8(ai, &vbits8);
1241 vbits64 <<= 8;
1242 vbits64 |= vbits8;
1243 if (!ok) n_addrs_bad++;
1244 pessim64 <<= 8;
1245 pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1247 res[long_index] = vbits64;
1248 pessim[long_index] = pessim64;
1251 /* In the common case, all the addresses involved are valid, so we
1252 just return the computed V bits and have done. */
1253 if (LIKELY(n_addrs_bad == 0))
1254 return;
1256 /* If there's no possibility of getting a partial-loads-ok
1257 exemption, report the error and quit. */
1258 if (!MC_(clo_partial_loads_ok)) {
1259 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1260 return;
1263 /* The partial-loads-ok excemption might apply. Find out if it
1264 does. If so, don't report an addressing error, but do return
1265 Undefined for the bytes that are out of range, so as to avoid
1266 false negatives. If it doesn't apply, just report an addressing
1267 error in the usual way. */
1269 /* Some code steps along byte strings in aligned chunks
1270 even when there is only a partially defined word at the end (eg,
1271 optimised strlen). This is allowed by the memory model of
1272 modern machines, since an aligned load cannot span two pages and
1273 thus cannot "partially fault".
1275 Therefore, a load from a partially-addressible place is allowed
1276 if all of the following hold:
1277 - the command-line flag is set [by default, it isn't]
1278 - it's an aligned load
1279 - at least one of the addresses in the word *is* valid
1281 Since this suppresses the addressing error, we avoid false
1282 negatives by marking bytes undefined when they come from an
1283 invalid address.
1286 /* "at least one of the addresses is invalid" */
1287 ok = False;
1288 for (j = 0; j < szL; j++)
1289 ok |= pessim[j] != V_BITS64_DEFINED;
1290 tl_assert(ok);
1292 if (0 == (a & (szB - 1)) && n_addrs_bad < szB) {
1293 /* Exemption applies. Use the previously computed pessimising
1294 value and return the combined result, but don't flag an
1295 addressing error. The pessimising value is Defined for valid
1296 addresses and Undefined for invalid addresses. */
1297 /* for assumption that doing bitwise or implements UifU */
1298 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1299 /* (really need "UifU" here...)
1300 vbits[j] UifU= pessim[j] (is pessimised by it, iow) */
1301 for (j = szL-1; j >= 0; j--)
1302 res[j] |= pessim[j];
1303 return;
1306 /* Exemption doesn't apply. Flag an addressing error in the normal
1307 way. */
1308 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1312 static
1313 __attribute__((noinline))
1314 __attribute__((used))
1315 VG_REGPARM(3) /* make sure we're using a fixed calling convention, since
1316 this function may get called from hand written assembly. */
1317 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
1319 PROF_EVENT(30, "mc_LOADVn_slow");
1321 /* ------------ BEGIN semi-fast cases ------------ */
1322 /* These deal quickly-ish with the common auxiliary primary map
1323 cases on 64-bit platforms. Are merely a speedup hack; can be
1324 omitted without loss of correctness/functionality. Note that in
1325 both cases the "sizeof(void*) == 8" causes these cases to be
1326 folded out by compilers on 32-bit platforms. These are derived
1327 from LOADV64 and LOADV32.
1329 if (LIKELY(sizeof(void*) == 8
1330 && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1331 SecMap* sm = get_secmap_for_reading(a);
1332 UWord sm_off16 = SM_OFF_16(a);
1333 UWord vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1334 if (LIKELY(vabits16 == VA_BITS16_DEFINED))
1335 return V_BITS64_DEFINED;
1336 if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
1337 return V_BITS64_UNDEFINED;
1338 /* else fall into the slow case */
1340 if (LIKELY(sizeof(void*) == 8
1341 && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1342 SecMap* sm = get_secmap_for_reading(a);
1343 UWord sm_off = SM_OFF(a);
1344 UWord vabits8 = sm->vabits8[sm_off];
1345 if (LIKELY(vabits8 == VA_BITS8_DEFINED))
1346 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
1347 if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
1348 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
1349 /* else fall into slow case */
1351 /* ------------ END semi-fast cases ------------ */
1353 ULong vbits64 = V_BITS64_UNDEFINED; /* result */
1354 ULong pessim64 = V_BITS64_DEFINED; /* only used when p-l-ok=yes */
1355 SSizeT szB = nBits / 8;
1356 SSizeT i; /* Must be signed. */
1357 SizeT n_addrs_bad = 0;
1358 Addr ai;
1359 UChar vbits8;
1360 Bool ok;
1362 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1364 /* Make up a 64-bit result V word, which contains the loaded data
1365 for valid addresses and Defined for invalid addresses. Iterate
1366 over the bytes in the word, from the most significant down to
1367 the least. The vbits to return are calculated into vbits64.
1368 Also compute the pessimising value to be used when
1369 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1370 info can be gleaned from pessim64) but is used as a
1371 cross-check. */
1372 for (i = szB-1; i >= 0; i--) {
1373 PROF_EVENT(31, "mc_LOADVn_slow(loop)");
1374 ai = a + byte_offset_w(szB, bigendian, i);
1375 ok = get_vbits8(ai, &vbits8);
1376 vbits64 <<= 8;
1377 vbits64 |= vbits8;
1378 if (!ok) n_addrs_bad++;
1379 pessim64 <<= 8;
1380 pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1383 /* In the common case, all the addresses involved are valid, so we
1384 just return the computed V bits and have done. */
1385 if (LIKELY(n_addrs_bad == 0))
1386 return vbits64;
1388 /* If there's no possibility of getting a partial-loads-ok
1389 exemption, report the error and quit. */
1390 if (!MC_(clo_partial_loads_ok)) {
1391 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1392 return vbits64;
1395 /* The partial-loads-ok excemption might apply. Find out if it
1396 does. If so, don't report an addressing error, but do return
1397 Undefined for the bytes that are out of range, so as to avoid
1398 false negatives. If it doesn't apply, just report an addressing
1399 error in the usual way. */
1401 /* Some code steps along byte strings in aligned word-sized chunks
1402 even when there is only a partially defined word at the end (eg,
1403 optimised strlen). This is allowed by the memory model of
1404 modern machines, since an aligned load cannot span two pages and
1405 thus cannot "partially fault". Despite such behaviour being
1406 declared undefined by ANSI C/C++.
1408 Therefore, a load from a partially-addressible place is allowed
1409 if all of the following hold:
1410 - the command-line flag is set [by default, it isn't]
1411 - it's a word-sized, word-aligned load
1412 - at least one of the addresses in the word *is* valid
1414 Since this suppresses the addressing error, we avoid false
1415 negatives by marking bytes undefined when they come from an
1416 invalid address.
1419 /* "at least one of the addresses is invalid" */
1420 tl_assert(pessim64 != V_BITS64_DEFINED);
1422 if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
1423 && n_addrs_bad < VG_WORDSIZE) {
1424 /* Exemption applies. Use the previously computed pessimising
1425 value for vbits64 and return the combined result, but don't
1426 flag an addressing error. The pessimising value is Defined
1427 for valid addresses and Undefined for invalid addresses. */
1428 /* for assumption that doing bitwise or implements UifU */
1429 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1430 /* (really need "UifU" here...)
1431 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1432 vbits64 |= pessim64;
1433 return vbits64;
1436 /* Also, in appears that gcc generates string-stepping code in
1437 32-bit chunks on 64 bit platforms. So, also grant an exception
1438 for this case. Note that the first clause of the conditional
1439 (VG_WORDSIZE == 8) is known at compile time, so the whole clause
1440 will get folded out in 32 bit builds. */
1441 if (VG_WORDSIZE == 8
1442 && VG_IS_4_ALIGNED(a) && nBits == 32 && n_addrs_bad < 4) {
1443 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1444 /* (really need "UifU" here...)
1445 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1446 vbits64 |= pessim64;
1447 /* Mark the upper 32 bits as undefined, just to be on the safe
1448 side. */
1449 vbits64 |= (((ULong)V_BITS32_UNDEFINED) << 32);
1450 return vbits64;
1453 /* Exemption doesn't apply. Flag an addressing error in the normal
1454 way. */
1455 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1457 return vbits64;
1461 static
1462 __attribute__((noinline))
1463 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
1465 SizeT szB = nBits / 8;
1466 SizeT i, n_addrs_bad = 0;
1467 UChar vbits8;
1468 Addr ai;
1469 Bool ok;
1471 PROF_EVENT(35, "mc_STOREVn_slow");
1473 /* ------------ BEGIN semi-fast cases ------------ */
1474 /* These deal quickly-ish with the common auxiliary primary map
1475 cases on 64-bit platforms. Are merely a speedup hack; can be
1476 omitted without loss of correctness/functionality. Note that in
1477 both cases the "sizeof(void*) == 8" causes these cases to be
1478 folded out by compilers on 32-bit platforms. The logic below
1479 is somewhat similar to some cases extensively commented in
1480 MC_(helperc_STOREV8).
1482 if (LIKELY(sizeof(void*) == 8
1483 && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1484 SecMap* sm = get_secmap_for_reading(a);
1485 UWord sm_off16 = SM_OFF_16(a);
1486 UWord vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1487 if (LIKELY( !is_distinguished_sm(sm) &&
1488 (VA_BITS16_DEFINED == vabits16 ||
1489 VA_BITS16_UNDEFINED == vabits16) )) {
1490 /* Handle common case quickly: a is suitably aligned, */
1491 /* is mapped, and is addressible. */
1492 // Convert full V-bits in register to compact 2-bit form.
1493 if (LIKELY(V_BITS64_DEFINED == vbytes)) {
1494 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
1495 return;
1496 } else if (V_BITS64_UNDEFINED == vbytes) {
1497 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
1498 return;
1500 /* else fall into the slow case */
1502 /* else fall into the slow case */
1504 if (LIKELY(sizeof(void*) == 8
1505 && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1506 SecMap* sm = get_secmap_for_reading(a);
1507 UWord sm_off = SM_OFF(a);
1508 UWord vabits8 = sm->vabits8[sm_off];
1509 if (LIKELY( !is_distinguished_sm(sm) &&
1510 (VA_BITS8_DEFINED == vabits8 ||
1511 VA_BITS8_UNDEFINED == vabits8) )) {
1512 /* Handle common case quickly: a is suitably aligned, */
1513 /* is mapped, and is addressible. */
1514 // Convert full V-bits in register to compact 2-bit form.
1515 if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
1516 sm->vabits8[sm_off] = VA_BITS8_DEFINED;
1517 return;
1518 } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
1519 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
1520 return;
1522 /* else fall into the slow case */
1524 /* else fall into the slow case */
1526 /* ------------ END semi-fast cases ------------ */
1528 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1530 /* Dump vbytes in memory, iterating from least to most significant
1531 byte. At the same time establish addressibility of the location. */
1532 for (i = 0; i < szB; i++) {
1533 PROF_EVENT(36, "mc_STOREVn_slow(loop)");
1534 ai = a + byte_offset_w(szB, bigendian, i);
1535 vbits8 = vbytes & 0xff;
1536 ok = set_vbits8(ai, vbits8);
1537 if (!ok) n_addrs_bad++;
1538 vbytes >>= 8;
1541 /* If an address error has happened, report it. */
1542 if (n_addrs_bad > 0)
1543 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
1547 /*------------------------------------------------------------*/
1548 /*--- Setting permissions over address ranges. ---*/
1549 /*------------------------------------------------------------*/
1551 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
1552 UWord dsm_num )
1554 UWord sm_off, sm_off16;
1555 UWord vabits2 = vabits16 & 0x3;
1556 SizeT lenA, lenB, len_to_next_secmap;
1557 Addr aNext;
1558 SecMap* sm;
1559 SecMap** sm_ptr;
1560 SecMap* example_dsm;
1562 PROF_EVENT(150, "set_address_range_perms");
1564 /* Check the V+A bits make sense. */
1565 tl_assert(VA_BITS16_NOACCESS == vabits16 ||
1566 VA_BITS16_UNDEFINED == vabits16 ||
1567 VA_BITS16_DEFINED == vabits16);
1569 // This code should never write PDBs; ensure this. (See comment above
1570 // set_vabits2().)
1571 tl_assert(VA_BITS2_PARTDEFINED != vabits2);
1573 if (lenT == 0)
1574 return;
1576 if (lenT > 256 * 1024 * 1024) {
1577 if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
1578 const HChar* s = "unknown???";
1579 if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
1580 if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
1581 if (vabits16 == VA_BITS16_DEFINED ) s = "defined";
1582 VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
1583 "large range [0x%lx, 0x%lx) (%s)\n",
1584 a, a + lenT, s);
1588 #ifndef PERF_FAST_SARP
1589 /*------------------ debug-only case ------------------ */
1591 // Endianness doesn't matter here because all bytes are being set to
1592 // the same value.
1593 // Nb: We don't have to worry about updating the sec-V-bits table
1594 // after these set_vabits2() calls because this code never writes
1595 // VA_BITS2_PARTDEFINED values.
1596 SizeT i;
1597 for (i = 0; i < lenT; i++) {
1598 set_vabits2(a + i, vabits2);
1600 return;
1602 #endif
1604 /*------------------ standard handling ------------------ */
1606 /* Get the distinguished secondary that we might want
1607 to use (part of the space-compression scheme). */
1608 example_dsm = &sm_distinguished[dsm_num];
1610 // We have to handle ranges covering various combinations of partial and
1611 // whole sec-maps. Here is how parts 1, 2 and 3 are used in each case.
1612 // Cases marked with a '*' are common.
1614 // TYPE PARTS USED
1615 // ---- ----------
1616 // * one partial sec-map (p) 1
1617 // - one whole sec-map (P) 2
1619 // * two partial sec-maps (pp) 1,3
1620 // - one partial, one whole sec-map (pP) 1,2
1621 // - one whole, one partial sec-map (Pp) 2,3
1622 // - two whole sec-maps (PP) 2,2
1624 // * one partial, one whole, one partial (pPp) 1,2,3
1625 // - one partial, two whole (pPP) 1,2,2
1626 // - two whole, one partial (PPp) 2,2,3
1627 // - three whole (PPP) 2,2,2
1629 // * one partial, N-2 whole, one partial (pP...Pp) 1,2...2,3
1630 // - one partial, N-1 whole (pP...PP) 1,2...2,2
1631 // - N-1 whole, one partial (PP...Pp) 2,2...2,3
1632 // - N whole (PP...PP) 2,2...2,3
1634 // Break up total length (lenT) into two parts: length in the first
1635 // sec-map (lenA), and the rest (lenB); lenT == lenA + lenB.
1636 aNext = start_of_this_sm(a) + SM_SIZE;
1637 len_to_next_secmap = aNext - a;
1638 if ( lenT <= len_to_next_secmap ) {
1639 // Range entirely within one sec-map. Covers almost all cases.
1640 PROF_EVENT(151, "set_address_range_perms-single-secmap");
1641 lenA = lenT;
1642 lenB = 0;
1643 } else if (is_start_of_sm(a)) {
1644 // Range spans at least one whole sec-map, and starts at the beginning
1645 // of a sec-map; skip to Part 2.
1646 PROF_EVENT(152, "set_address_range_perms-startof-secmap");
1647 lenA = 0;
1648 lenB = lenT;
1649 goto part2;
1650 } else {
1651 // Range spans two or more sec-maps, first one is partial.
1652 PROF_EVENT(153, "set_address_range_perms-multiple-secmaps");
1653 lenA = len_to_next_secmap;
1654 lenB = lenT - lenA;
1657 //------------------------------------------------------------------------
1658 // Part 1: Deal with the first sec_map. Most of the time the range will be
1659 // entirely within a sec_map and this part alone will suffice. Also,
1660 // doing it this way lets us avoid repeatedly testing for the crossing of
1661 // a sec-map boundary within these loops.
1662 //------------------------------------------------------------------------
1664 // If it's distinguished, make it undistinguished if necessary.
1665 sm_ptr = get_secmap_ptr(a);
1666 if (is_distinguished_sm(*sm_ptr)) {
1667 if (*sm_ptr == example_dsm) {
1668 // Sec-map already has the V+A bits that we want, so skip.
1669 PROF_EVENT(154, "set_address_range_perms-dist-sm1-quick");
1670 a = aNext;
1671 lenA = 0;
1672 } else {
1673 PROF_EVENT(155, "set_address_range_perms-dist-sm1");
1674 *sm_ptr = copy_for_writing(*sm_ptr);
1677 sm = *sm_ptr;
1679 // 1 byte steps
1680 while (True) {
1681 if (VG_IS_8_ALIGNED(a)) break;
1682 if (lenA < 1) break;
1683 PROF_EVENT(156, "set_address_range_perms-loop1a");
1684 sm_off = SM_OFF(a);
1685 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1686 a += 1;
1687 lenA -= 1;
1689 // 8-aligned, 8 byte steps
1690 while (True) {
1691 if (lenA < 8) break;
1692 PROF_EVENT(157, "set_address_range_perms-loop8a");
1693 sm_off16 = SM_OFF_16(a);
1694 ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1695 a += 8;
1696 lenA -= 8;
1698 // 1 byte steps
1699 while (True) {
1700 if (lenA < 1) break;
1701 PROF_EVENT(158, "set_address_range_perms-loop1b");
1702 sm_off = SM_OFF(a);
1703 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1704 a += 1;
1705 lenA -= 1;
1708 // We've finished the first sec-map. Is that it?
1709 if (lenB == 0)
1710 return;
1712 //------------------------------------------------------------------------
1713 // Part 2: Fast-set entire sec-maps at a time.
1714 //------------------------------------------------------------------------
1715 part2:
1716 // 64KB-aligned, 64KB steps.
1717 // Nb: we can reach here with lenB < SM_SIZE
1718 tl_assert(0 == lenA);
1719 while (True) {
1720 if (lenB < SM_SIZE) break;
1721 tl_assert(is_start_of_sm(a));
1722 PROF_EVENT(159, "set_address_range_perms-loop64K");
1723 sm_ptr = get_secmap_ptr(a);
1724 if (!is_distinguished_sm(*sm_ptr)) {
1725 PROF_EVENT(160, "set_address_range_perms-loop64K-free-dist-sm");
1726 // Free the non-distinguished sec-map that we're replacing. This
1727 // case happens moderately often, enough to be worthwhile.
1728 SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
1729 tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n");
1731 update_SM_counts(*sm_ptr, example_dsm);
1732 // Make the sec-map entry point to the example DSM
1733 *sm_ptr = example_dsm;
1734 lenB -= SM_SIZE;
1735 a += SM_SIZE;
1738 // We've finished the whole sec-maps. Is that it?
1739 if (lenB == 0)
1740 return;
1742 //------------------------------------------------------------------------
1743 // Part 3: Finish off the final partial sec-map, if necessary.
1744 //------------------------------------------------------------------------
1746 tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
1748 // If it's distinguished, make it undistinguished if necessary.
1749 sm_ptr = get_secmap_ptr(a);
1750 if (is_distinguished_sm(*sm_ptr)) {
1751 if (*sm_ptr == example_dsm) {
1752 // Sec-map already has the V+A bits that we want, so stop.
1753 PROF_EVENT(161, "set_address_range_perms-dist-sm2-quick");
1754 return;
1755 } else {
1756 PROF_EVENT(162, "set_address_range_perms-dist-sm2");
1757 *sm_ptr = copy_for_writing(*sm_ptr);
1760 sm = *sm_ptr;
1762 // 8-aligned, 8 byte steps
1763 while (True) {
1764 if (lenB < 8) break;
1765 PROF_EVENT(163, "set_address_range_perms-loop8b");
1766 sm_off16 = SM_OFF_16(a);
1767 ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1768 a += 8;
1769 lenB -= 8;
1771 // 1 byte steps
1772 while (True) {
1773 if (lenB < 1) return;
1774 PROF_EVENT(164, "set_address_range_perms-loop1c");
1775 sm_off = SM_OFF(a);
1776 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1777 a += 1;
1778 lenB -= 1;
1783 /* --- Set permissions for arbitrary address ranges --- */
1785 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
1787 PROF_EVENT(40, "MC_(make_mem_noaccess)");
1788 DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
1789 set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
1790 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1791 ocache_sarp_Clear_Origins ( a, len );
1794 static void make_mem_undefined ( Addr a, SizeT len )
1796 PROF_EVENT(41, "make_mem_undefined");
1797 DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
1798 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1801 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
1803 PROF_EVENT(43, "MC_(make_mem_undefined)");
1804 DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
1805 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1806 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1807 ocache_sarp_Set_Origins ( a, len, otag );
1810 static
1811 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
1812 ThreadId tid, UInt okind )
1814 UInt ecu;
1815 ExeContext* here;
1816 /* VG_(record_ExeContext) checks for validity of tid, and asserts
1817 if it is invalid. So no need to do it here. */
1818 tl_assert(okind <= 3);
1819 here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
1820 tl_assert(here);
1821 ecu = VG_(get_ECU_from_ExeContext)(here);
1822 tl_assert(VG_(is_plausible_ECU)(ecu));
1823 MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
1826 static
1827 void mc_new_mem_w_tid_make_ECU ( Addr a, SizeT len, ThreadId tid )
1829 make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
1832 static
1833 void mc_new_mem_w_tid_no_ECU ( Addr a, SizeT len, ThreadId tid )
1835 MC_(make_mem_undefined_w_otag) ( a, len, MC_OKIND_UNKNOWN );
1838 void MC_(make_mem_defined) ( Addr a, SizeT len )
1840 PROF_EVENT(42, "MC_(make_mem_defined)");
1841 DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
1842 set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
1843 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1844 ocache_sarp_Clear_Origins ( a, len );
1847 __attribute__((unused))
1848 static void make_mem_defined_w_tid ( Addr a, SizeT len, ThreadId tid )
1850 MC_(make_mem_defined)(a, len);
1853 /* For each byte in [a,a+len), if the byte is addressable, make it be
1854 defined, but if it isn't addressible, leave it alone. In other
1855 words a version of MC_(make_mem_defined) that doesn't mess with
1856 addressibility. Low-performance implementation. */
1857 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
1859 SizeT i;
1860 UChar vabits2;
1861 DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
1862 for (i = 0; i < len; i++) {
1863 vabits2 = get_vabits2( a+i );
1864 if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
1865 set_vabits2(a+i, VA_BITS2_DEFINED);
1866 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1867 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1873 /* Similarly (needed for mprotect handling ..) */
1874 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
1876 SizeT i;
1877 UChar vabits2;
1878 DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
1879 for (i = 0; i < len; i++) {
1880 vabits2 = get_vabits2( a+i );
1881 if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
1882 set_vabits2(a+i, VA_BITS2_DEFINED);
1883 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1884 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1890 /* --- Block-copy permissions (needed for implementing realloc() and
1891 sys_mremap). --- */
1893 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
1895 SizeT i, j;
1896 UChar vabits2, vabits8;
1897 Bool aligned, nooverlap;
1899 DEBUG("MC_(copy_address_range_state)\n");
1900 PROF_EVENT(50, "MC_(copy_address_range_state)");
1902 if (len == 0 || src == dst)
1903 return;
1905 aligned = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
1906 nooverlap = src+len <= dst || dst+len <= src;
1908 if (nooverlap && aligned) {
1910 /* Vectorised fast case, when no overlap and suitably aligned */
1911 /* vector loop */
1912 i = 0;
1913 while (len >= 4) {
1914 vabits8 = get_vabits8_for_aligned_word32( src+i );
1915 set_vabits8_for_aligned_word32( dst+i, vabits8 );
1916 if (LIKELY(VA_BITS8_DEFINED == vabits8
1917 || VA_BITS8_UNDEFINED == vabits8
1918 || VA_BITS8_NOACCESS == vabits8)) {
1919 /* do nothing */
1920 } else {
1921 /* have to copy secondary map info */
1922 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
1923 set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
1924 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
1925 set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
1926 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
1927 set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
1928 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
1929 set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
1931 i += 4;
1932 len -= 4;
1934 /* fixup loop */
1935 while (len >= 1) {
1936 vabits2 = get_vabits2( src+i );
1937 set_vabits2( dst+i, vabits2 );
1938 if (VA_BITS2_PARTDEFINED == vabits2) {
1939 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1941 i++;
1942 len--;
1945 } else {
1947 /* We have to do things the slow way */
1948 if (src < dst) {
1949 for (i = 0, j = len-1; i < len; i++, j--) {
1950 PROF_EVENT(51, "MC_(copy_address_range_state)(loop)");
1951 vabits2 = get_vabits2( src+j );
1952 set_vabits2( dst+j, vabits2 );
1953 if (VA_BITS2_PARTDEFINED == vabits2) {
1954 set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
1959 if (src > dst) {
1960 for (i = 0; i < len; i++) {
1961 PROF_EVENT(52, "MC_(copy_address_range_state)(loop)");
1962 vabits2 = get_vabits2( src+i );
1963 set_vabits2( dst+i, vabits2 );
1964 if (VA_BITS2_PARTDEFINED == vabits2) {
1965 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1974 /*------------------------------------------------------------*/
1975 /*--- Origin tracking stuff - cache basics ---*/
1976 /*------------------------------------------------------------*/
1978 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
1979 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1981 Note that this implementation draws inspiration from the "origin
1982 tracking by value piggybacking" scheme described in "Tracking Bad
1983 Apples: Reporting the Origin of Null and Undefined Value Errors"
1984 (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
1985 Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
1986 implemented completely differently.
1988 Origin tags and ECUs -- about the shadow values
1989 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1991 This implementation tracks the defining point of all uninitialised
1992 values using so called "origin tags", which are 32-bit integers,
1993 rather than using the values themselves to encode the origins. The
1994 latter, so-called value piggybacking", is what the OOPSLA07 paper
1995 describes.
1997 Origin tags, as tracked by the machinery below, are 32-bit unsigned
1998 ints (UInts), regardless of the machine's word size. Each tag
1999 comprises an upper 30-bit ECU field and a lower 2-bit
2000 'kind' field. The ECU field is a number given out by m_execontext
2001 and has a 1-1 mapping with ExeContext*s. An ECU can be used
2002 directly as an origin tag (otag), but in fact we want to put
2003 additional information 'kind' field to indicate roughly where the
2004 tag came from. This helps print more understandable error messages
2005 for the user -- it has no other purpose. In summary:
2007 * Both ECUs and origin tags are represented as 32-bit words
2009 * m_execontext and the core-tool interface deal purely in ECUs.
2010 They have no knowledge of origin tags - that is a purely
2011 Memcheck-internal matter.
2013 * all valid ECUs have the lowest 2 bits zero and at least
2014 one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
2016 * to convert from an ECU to an otag, OR in one of the MC_OKIND_
2017 constants defined in mc_include.h.
2019 * to convert an otag back to an ECU, AND it with ~3
2021 One important fact is that no valid otag is zero. A zero otag is
2022 used by the implementation to indicate "no origin", which could
2023 mean that either the value is defined, or it is undefined but the
2024 implementation somehow managed to lose the origin.
2026 The ECU used for memory created by malloc etc is derived from the
2027 stack trace at the time the malloc etc happens. This means the
2028 mechanism can show the exact allocation point for heap-created
2029 uninitialised values.
2031 In contrast, it is simply too expensive to create a complete
2032 backtrace for each stack allocation. Therefore we merely use a
2033 depth-1 backtrace for stack allocations, which can be done once at
2034 translation time, rather than N times at run time. The result of
2035 this is that, for stack created uninitialised values, Memcheck can
2036 only show the allocating function, and not what called it.
2037 Furthermore, compilers tend to move the stack pointer just once at
2038 the start of the function, to allocate all locals, and so in fact
2039 the stack origin almost always simply points to the opening brace
2040 of the function. Net result is, for stack origins, the mechanism
2041 can tell you in which function the undefined value was created, but
2042 that's all. Users will need to carefully check all locals in the
2043 specified function.
2045 Shadowing registers and memory
2046 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2048 Memory is shadowed using a two level cache structure (ocacheL1 and
2049 ocacheL2). Memory references are first directed to ocacheL1. This
2050 is a traditional 2-way set associative cache with 32-byte lines and
2051 approximate LRU replacement within each set.
2053 A naive implementation would require storing one 32 bit otag for
2054 each byte of memory covered, a 4:1 space overhead. Instead, there
2055 is one otag for every 4 bytes of memory covered, plus a 4-bit mask
2056 that shows which of the 4 bytes have that shadow value and which
2057 have a shadow value of zero (indicating no origin). Hence a lot of
2058 space is saved, but the cost is that only one different origin per
2059 4 bytes of address space can be represented. This is a source of
2060 imprecision, but how much of a problem it really is remains to be
2061 seen.
2063 A cache line that contains all zeroes ("no origins") contains no
2064 useful information, and can be ejected from the L1 cache "for
2065 free", in the sense that a read miss on the L1 causes a line of
2066 zeroes to be installed. However, ejecting a line containing
2067 nonzeroes risks losing origin information permanently. In order to
2068 prevent such lossage, ejected nonzero lines are placed in a
2069 secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
2070 lines. This can grow arbitrarily large, and so should ensure that
2071 Memcheck runs out of memory in preference to losing useful origin
2072 info due to cache size limitations.
2074 Shadowing registers is a bit tricky, because the shadow values are
2075 32 bits, regardless of the size of the register. That gives a
2076 problem for registers smaller than 32 bits. The solution is to
2077 find spaces in the guest state that are unused, and use those to
2078 shadow guest state fragments smaller than 32 bits. For example, on
2079 ppc32/64, each vector register is 16 bytes long. If 4 bytes of the
2080 shadow are allocated for the register's otag, then there are still
2081 12 bytes left over which could be used to shadow 3 other values.
2083 This implies there is some non-obvious mapping from guest state
2084 (start,length) pairs to the relevant shadow offset (for the origin
2085 tags). And it is unfortunately guest-architecture specific. The
2086 mapping is contained in mc_machine.c, which is quite lengthy but
2087 straightforward.
2089 Instrumenting the IR
2090 ~~~~~~~~~~~~~~~~~~~~
2092 Instrumentation is largely straightforward, and done by the
2093 functions schemeE and schemeS in mc_translate.c. These generate
2094 code for handling the origin tags of expressions (E) and statements
2095 (S) respectively. The rather strange names are a reference to the
2096 "compilation schemes" shown in Simon Peyton Jones' book "The
2097 Implementation of Functional Programming Languages" (Prentice Hall,
2098 1987, see
2099 http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
2101 schemeS merely arranges to move shadow values around the guest
2102 state to track the incoming IR. schemeE is largely trivial too.
2103 The only significant point is how to compute the otag corresponding
2104 to binary (or ternary, quaternary, etc) operator applications. The
2105 rule is simple: just take whichever value is larger (32-bit
2106 unsigned max). Constants get the special value zero. Hence this
2107 rule always propagates a nonzero (known) otag in preference to a
2108 zero (unknown, or more likely, value-is-defined) tag, as we want.
2109 If two different undefined values are inputs to a binary operator
2110 application, then which is propagated is arbitrary, but that
2111 doesn't matter, since the program is erroneous in using either of
2112 the values, and so there's no point in attempting to propagate
2113 both.
2115 Since constants are abstracted to (otag) zero, much of the
2116 instrumentation code can be folded out without difficulty by the
2117 generic post-instrumentation IR cleanup pass, using these rules:
2118 Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
2119 constants is evaluated at JIT time. And the resulting dead code
2120 removal. In practice this causes surprisingly few Max32Us to
2121 survive through to backend code generation.
2123 Integration with the V-bits machinery
2124 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2126 This is again largely straightforward. Mostly the otag and V bits
2127 stuff are independent. The only point of interaction is when the V
2128 bits instrumenter creates a call to a helper function to report an
2129 uninitialised value error -- in that case it must first use schemeE
2130 to get hold of the origin tag expression for the value, and pass
2131 that to the helper too.
2133 There is the usual stuff to do with setting address range
2134 permissions. When memory is painted undefined, we must also know
2135 the origin tag to paint with, which involves some tedious plumbing,
2136 particularly to do with the fast case stack handlers. When memory
2137 is painted defined or noaccess then the origin tags must be forced
2138 to zero.
2140 One of the goals of the implementation was to ensure that the
2141 non-origin tracking mode isn't slowed down at all. To do this,
2142 various functions to do with memory permissions setting (again,
2143 mostly pertaining to the stack) are duplicated for the with- and
2144 without-otag case.
2146 Dealing with stack redzones, and the NIA cache
2147 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2149 This is one of the few non-obvious parts of the implementation.
2151 Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
2152 reserved area below the stack pointer, that can be used as scratch
2153 space by compiler generated code for functions. In the Memcheck
2154 sources this is referred to as the "stack redzone". The important
2155 thing here is that such redzones are considered volatile across
2156 function calls and returns. So Memcheck takes care to mark them as
2157 undefined for each call and return, on the afflicted platforms.
2158 Past experience shows this is essential in order to get reliable
2159 messages about uninitialised values that come from the stack.
2161 So the question is, when we paint a redzone undefined, what origin
2162 tag should we use for it? Consider a function f() calling g(). If
2163 we paint the redzone using an otag derived from the ExeContext of
2164 the CALL/BL instruction in f, then any errors in g causing it to
2165 use uninitialised values that happen to lie in the redzone, will be
2166 reported as having their origin in f. Which is highly confusing.
2168 The same applies for returns: if, on a return, we paint the redzone
2169 using a origin tag derived from the ExeContext of the RET/BLR
2170 instruction in g, then any later errors in f causing it to use
2171 uninitialised values in the redzone, will be reported as having
2172 their origin in g. Which is just as confusing.
2174 To do it right, in both cases we need to use an origin tag which
2175 pertains to the instruction which dynamically follows the CALL/BL
2176 or RET/BLR. In short, one derived from the NIA - the "next
2177 instruction address".
2179 To make this work, Memcheck's redzone-painting helper,
2180 MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
2181 NIA. It converts the NIA to a 1-element ExeContext, and uses that
2182 ExeContext's ECU as the basis for the otag used to paint the
2183 redzone. The expensive part of this is converting an NIA into an
2184 ECU, since this happens once for every call and every return. So
2185 we use a simple 511-line, 2-way set associative cache
2186 (nia_to_ecu_cache) to cache the mappings, and that knocks most of
2187 the cost out.
2189 Further background comments
2190 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2192 > Question: why is otag a UInt? Wouldn't a UWord be better? Isn't
2193 > it really just the address of the relevant ExeContext?
2195 Well, it's not the address, but a value which has a 1-1 mapping
2196 with ExeContexts, and is guaranteed not to be zero, since zero
2197 denotes (to memcheck) "unknown origin or defined value". So these
2198 UInts are just numbers starting at 4 and incrementing by 4; each
2199 ExeContext is given a number when it is created. (*** NOTE this
2200 confuses otags and ECUs; see comments above ***).
2202 Making these otags 32-bit regardless of the machine's word size
2203 makes the 64-bit implementation easier (next para). And it doesn't
2204 really limit us in any way, since for the tags to overflow would
2205 require that the program somehow caused 2^30-1 different
2206 ExeContexts to be created, in which case it is probably in deep
2207 trouble. Not to mention V will have soaked up many tens of
2208 gigabytes of memory merely to store them all.
2210 So having 64-bit origins doesn't really buy you anything, and has
2211 the following downsides:
2213 Suppose that instead, an otag is a UWord. This would mean that, on
2214 a 64-bit target,
2216 1. It becomes hard to shadow any element of guest state which is
2217 smaller than 8 bytes. To do so means you'd need to find some
2218 8-byte-sized hole in the guest state which you don't want to
2219 shadow, and use that instead to hold the otag. On ppc64, the
2220 condition code register(s) are split into 20 UChar sized pieces,
2221 all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
2222 and so that would entail finding 160 bytes somewhere else in the
2223 guest state.
2225 Even on x86, I want to track origins for %AH .. %DH (bits 15:8
2226 of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
2227 same) and so I had to look for 4 untracked otag-sized areas in
2228 the guest state to make that possible.
2230 The same problem exists of course when origin tags are only 32
2231 bits, but it's less extreme.
2233 2. (More compelling) it doubles the size of the origin shadow
2234 memory. Given that the shadow memory is organised as a fixed
2235 size cache, and that accuracy of tracking is limited by origins
2236 falling out the cache due to space conflicts, this isn't good.
2238 > Another question: is the origin tracking perfect, or are there
2239 > cases where it fails to determine an origin?
2241 It is imperfect for at least for the following reasons, and
2242 probably more:
2244 * Insufficient capacity in the origin cache. When a line is
2245 evicted from the cache it is gone forever, and so subsequent
2246 queries for the line produce zero, indicating no origin
2247 information. Interestingly, a line containing all zeroes can be
2248 evicted "free" from the cache, since it contains no useful
2249 information, so there is scope perhaps for some cleverer cache
2250 management schemes. (*** NOTE, with the introduction of the
2251 second level origin tag cache, ocacheL2, this is no longer a
2252 problem. ***)
2254 * The origin cache only stores one otag per 32-bits of address
2255 space, plus 4 bits indicating which of the 4 bytes has that tag
2256 and which are considered defined. The result is that if two
2257 undefined bytes in the same word are stored in memory, the first
2258 stored byte's origin will be lost and replaced by the origin for
2259 the second byte.
2261 * Nonzero origin tags for defined values. Consider a binary
2262 operator application op(x,y). Suppose y is undefined (and so has
2263 a valid nonzero origin tag), and x is defined, but erroneously
2264 has a nonzero origin tag (defined values should have tag zero).
2265 If the erroneous tag has a numeric value greater than y's tag,
2266 then the rule for propagating origin tags though binary
2267 operations, which is simply to take the unsigned max of the two
2268 tags, will erroneously propagate x's tag rather than y's.
2270 * Some obscure uses of x86/amd64 byte registers can cause lossage
2271 or confusion of origins. %AH .. %DH are treated as different
2272 from, and unrelated to, their parent registers, %EAX .. %EDX.
2273 So some weird sequences like
2275 movb undefined-value, %AH
2276 movb defined-value, %AL
2277 .. use %AX or %EAX ..
2279 will cause the origin attributed to %AH to be ignored, since %AL,
2280 %AX, %EAX are treated as the same register, and %AH as a
2281 completely separate one.
2283 But having said all that, it actually seems to work fairly well in
2284 practice.
2287 static UWord stats_ocacheL1_find = 0;
2288 static UWord stats_ocacheL1_found_at_1 = 0;
2289 static UWord stats_ocacheL1_found_at_N = 0;
2290 static UWord stats_ocacheL1_misses = 0;
2291 static UWord stats_ocacheL1_lossage = 0;
2292 static UWord stats_ocacheL1_movefwds = 0;
2294 static UWord stats__ocacheL2_refs = 0;
2295 static UWord stats__ocacheL2_misses = 0;
2296 static UWord stats__ocacheL2_n_nodes_max = 0;
2298 /* Cache of 32-bit values, one every 32 bits of address space */
2300 #define OC_BITS_PER_LINE 5
2301 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2303 static INLINE UWord oc_line_offset ( Addr a ) {
2304 return (a >> 2) & (OC_W32S_PER_LINE - 1);
2306 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
2307 return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
2310 #define OC_LINES_PER_SET 2
2312 #define OC_N_SET_BITS 20
2313 #define OC_N_SETS (1 << OC_N_SET_BITS)
2315 /* These settings give:
2316 64 bit host: ocache: 100,663,296 sizeB 67,108,864 useful
2317 32 bit host: ocache: 92,274,688 sizeB 67,108,864 useful
2320 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2323 typedef
2324 struct {
2325 Addr tag;
2326 UInt w32[OC_W32S_PER_LINE];
2327 UChar descr[OC_W32S_PER_LINE];
2329 OCacheLine;
2331 /* Classify and also sanity-check 'line'. Return 'e' (empty) if not
2332 in use, 'n' (nonzero) if it contains at least one valid origin tag,
2333 and 'z' if all the represented tags are zero. */
2334 static UChar classify_OCacheLine ( OCacheLine* line )
2336 UWord i;
2337 if (line->tag == 1/*invalid*/)
2338 return 'e'; /* EMPTY */
2339 tl_assert(is_valid_oc_tag(line->tag));
2340 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2341 tl_assert(0 == ((~0xF) & line->descr[i]));
2342 if (line->w32[i] > 0 && line->descr[i] > 0)
2343 return 'n'; /* NONZERO - contains useful info */
2345 return 'z'; /* ZERO - no useful info */
2348 typedef
2349 struct {
2350 OCacheLine line[OC_LINES_PER_SET];
2352 OCacheSet;
2354 typedef
2355 struct {
2356 OCacheSet set[OC_N_SETS];
2358 OCache;
2360 static OCache* ocacheL1 = NULL;
2361 static UWord ocacheL1_event_ctr = 0;
2363 static void init_ocacheL2 ( void ); /* fwds */
2364 static void init_OCache ( void )
2366 UWord line, set;
2367 tl_assert(MC_(clo_mc_level) >= 3);
2368 tl_assert(ocacheL1 == NULL);
2369 ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
2370 if (ocacheL1 == NULL) {
2371 VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
2372 sizeof(OCache) );
2374 tl_assert(ocacheL1 != NULL);
2375 for (set = 0; set < OC_N_SETS; set++) {
2376 for (line = 0; line < OC_LINES_PER_SET; line++) {
2377 ocacheL1->set[set].line[line].tag = 1/*invalid*/;
2380 init_ocacheL2();
2383 static void moveLineForwards ( OCacheSet* set, UWord lineno )
2385 OCacheLine tmp;
2386 stats_ocacheL1_movefwds++;
2387 tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
2388 tmp = set->line[lineno-1];
2389 set->line[lineno-1] = set->line[lineno];
2390 set->line[lineno] = tmp;
2393 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
2394 UWord i;
2395 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2396 line->w32[i] = 0; /* NO ORIGIN */
2397 line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
2399 line->tag = tag;
2402 //////////////////////////////////////////////////////////////
2403 //// OCache backing store
2405 static OSet* ocacheL2 = NULL;
2407 static void* ocacheL2_malloc ( const HChar* cc, SizeT szB ) {
2408 return VG_(malloc)(cc, szB);
2410 static void ocacheL2_free ( void* v ) {
2411 VG_(free)( v );
2414 /* Stats: # nodes currently in tree */
2415 static UWord stats__ocacheL2_n_nodes = 0;
2417 static void init_ocacheL2 ( void )
2419 tl_assert(!ocacheL2);
2420 tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
2421 tl_assert(0 == offsetof(OCacheLine,tag));
2422 ocacheL2
2423 = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
2424 NULL, /* fast cmp */
2425 ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
2426 stats__ocacheL2_n_nodes = 0;
2429 /* Find line with the given tag in the tree, or NULL if not found. */
2430 static OCacheLine* ocacheL2_find_tag ( Addr tag )
2432 OCacheLine* line;
2433 tl_assert(is_valid_oc_tag(tag));
2434 stats__ocacheL2_refs++;
2435 line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
2436 return line;
2439 /* Delete the line with the given tag from the tree, if it is present, and
2440 free up the associated memory. */
2441 static void ocacheL2_del_tag ( Addr tag )
2443 OCacheLine* line;
2444 tl_assert(is_valid_oc_tag(tag));
2445 stats__ocacheL2_refs++;
2446 line = VG_(OSetGen_Remove)( ocacheL2, &tag );
2447 if (line) {
2448 VG_(OSetGen_FreeNode)(ocacheL2, line);
2449 tl_assert(stats__ocacheL2_n_nodes > 0);
2450 stats__ocacheL2_n_nodes--;
2454 /* Add a copy of the given line to the tree. It must not already be
2455 present. */
2456 static void ocacheL2_add_line ( OCacheLine* line )
2458 OCacheLine* copy;
2459 tl_assert(is_valid_oc_tag(line->tag));
2460 copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
2461 *copy = *line;
2462 stats__ocacheL2_refs++;
2463 VG_(OSetGen_Insert)( ocacheL2, copy );
2464 stats__ocacheL2_n_nodes++;
2465 if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
2466 stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
2469 ////
2470 //////////////////////////////////////////////////////////////
2472 __attribute__((noinline))
2473 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
2475 OCacheLine *victim, *inL2;
2476 UChar c;
2477 UWord line;
2478 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2479 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2480 UWord tag = a & tagmask;
2481 tl_assert(setno >= 0 && setno < OC_N_SETS);
2483 /* we already tried line == 0; skip therefore. */
2484 for (line = 1; line < OC_LINES_PER_SET; line++) {
2485 if (ocacheL1->set[setno].line[line].tag == tag) {
2486 if (line == 1) {
2487 stats_ocacheL1_found_at_1++;
2488 } else {
2489 stats_ocacheL1_found_at_N++;
2491 if (UNLIKELY(0 == (ocacheL1_event_ctr++
2492 & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
2493 moveLineForwards( &ocacheL1->set[setno], line );
2494 line--;
2496 return &ocacheL1->set[setno].line[line];
2500 /* A miss. Use the last slot. Implicitly this means we're
2501 ejecting the line in the last slot. */
2502 stats_ocacheL1_misses++;
2503 tl_assert(line == OC_LINES_PER_SET);
2504 line--;
2505 tl_assert(line > 0);
2507 /* First, move the to-be-ejected line to the L2 cache. */
2508 victim = &ocacheL1->set[setno].line[line];
2509 c = classify_OCacheLine(victim);
2510 switch (c) {
2511 case 'e':
2512 /* the line is empty (has invalid tag); ignore it. */
2513 break;
2514 case 'z':
2515 /* line contains zeroes. We must ensure the backing store is
2516 updated accordingly, either by copying the line there
2517 verbatim, or by ensuring it isn't present there. We
2518 chosse the latter on the basis that it reduces the size of
2519 the backing store. */
2520 ocacheL2_del_tag( victim->tag );
2521 break;
2522 case 'n':
2523 /* line contains at least one real, useful origin. Copy it
2524 to the backing store. */
2525 stats_ocacheL1_lossage++;
2526 inL2 = ocacheL2_find_tag( victim->tag );
2527 if (inL2) {
2528 *inL2 = *victim;
2529 } else {
2530 ocacheL2_add_line( victim );
2532 break;
2533 default:
2534 tl_assert(0);
2537 /* Now we must reload the L1 cache from the backing tree, if
2538 possible. */
2539 tl_assert(tag != victim->tag); /* stay sane */
2540 inL2 = ocacheL2_find_tag( tag );
2541 if (inL2) {
2542 /* We're in luck. It's in the L2. */
2543 ocacheL1->set[setno].line[line] = *inL2;
2544 } else {
2545 /* Missed at both levels of the cache hierarchy. We have to
2546 declare it as full of zeroes (unknown origins). */
2547 stats__ocacheL2_misses++;
2548 zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
2551 /* Move it one forwards */
2552 moveLineForwards( &ocacheL1->set[setno], line );
2553 line--;
2555 return &ocacheL1->set[setno].line[line];
2558 static INLINE OCacheLine* find_OCacheLine ( Addr a )
2560 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2561 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2562 UWord tag = a & tagmask;
2564 stats_ocacheL1_find++;
2566 if (OC_ENABLE_ASSERTIONS) {
2567 tl_assert(setno >= 0 && setno < OC_N_SETS);
2568 tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
2571 if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
2572 return &ocacheL1->set[setno].line[0];
2575 return find_OCacheLine_SLOW( a );
2578 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
2580 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2581 //// Set the origins for a+0 .. a+7
2582 { OCacheLine* line;
2583 UWord lineoff = oc_line_offset(a);
2584 if (OC_ENABLE_ASSERTIONS) {
2585 tl_assert(lineoff >= 0
2586 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2588 line = find_OCacheLine( a );
2589 line->descr[lineoff+0] = 0xF;
2590 line->descr[lineoff+1] = 0xF;
2591 line->w32[lineoff+0] = otag;
2592 line->w32[lineoff+1] = otag;
2594 //// END inlined, specialised version of MC_(helperc_b_store8)
2598 /*------------------------------------------------------------*/
2599 /*--- Aligned fast case permission setters, ---*/
2600 /*--- for dealing with stacks ---*/
2601 /*------------------------------------------------------------*/
2603 /*--------------------- 32-bit ---------------------*/
2605 /* Nb: by "aligned" here we mean 4-byte aligned */
2607 static INLINE void make_aligned_word32_undefined ( Addr a )
2609 PROF_EVENT(300, "make_aligned_word32_undefined");
2611 #ifndef PERF_FAST_STACK2
2612 make_mem_undefined(a, 4);
2613 #else
2615 UWord sm_off;
2616 SecMap* sm;
2618 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2619 PROF_EVENT(301, "make_aligned_word32_undefined-slow1");
2620 make_mem_undefined(a, 4);
2621 return;
2624 sm = get_secmap_for_writing_low(a);
2625 sm_off = SM_OFF(a);
2626 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
2628 #endif
2631 static INLINE
2632 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
2634 make_aligned_word32_undefined(a);
2635 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2636 //// Set the origins for a+0 .. a+3
2637 { OCacheLine* line;
2638 UWord lineoff = oc_line_offset(a);
2639 if (OC_ENABLE_ASSERTIONS) {
2640 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2642 line = find_OCacheLine( a );
2643 line->descr[lineoff] = 0xF;
2644 line->w32[lineoff] = otag;
2646 //// END inlined, specialised version of MC_(helperc_b_store4)
2649 static INLINE
2650 void make_aligned_word32_noaccess ( Addr a )
2652 PROF_EVENT(310, "make_aligned_word32_noaccess");
2654 #ifndef PERF_FAST_STACK2
2655 MC_(make_mem_noaccess)(a, 4);
2656 #else
2658 UWord sm_off;
2659 SecMap* sm;
2661 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2662 PROF_EVENT(311, "make_aligned_word32_noaccess-slow1");
2663 MC_(make_mem_noaccess)(a, 4);
2664 return;
2667 sm = get_secmap_for_writing_low(a);
2668 sm_off = SM_OFF(a);
2669 sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
2671 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2672 //// Set the origins for a+0 .. a+3.
2673 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2674 OCacheLine* line;
2675 UWord lineoff = oc_line_offset(a);
2676 if (OC_ENABLE_ASSERTIONS) {
2677 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2679 line = find_OCacheLine( a );
2680 line->descr[lineoff] = 0;
2682 //// END inlined, specialised version of MC_(helperc_b_store4)
2684 #endif
2687 /*--------------------- 64-bit ---------------------*/
2689 /* Nb: by "aligned" here we mean 8-byte aligned */
2691 static INLINE void make_aligned_word64_undefined ( Addr a )
2693 PROF_EVENT(320, "make_aligned_word64_undefined");
2695 #ifndef PERF_FAST_STACK2
2696 make_mem_undefined(a, 8);
2697 #else
2699 UWord sm_off16;
2700 SecMap* sm;
2702 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2703 PROF_EVENT(321, "make_aligned_word64_undefined-slow1");
2704 make_mem_undefined(a, 8);
2705 return;
2708 sm = get_secmap_for_writing_low(a);
2709 sm_off16 = SM_OFF_16(a);
2710 ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED;
2712 #endif
2715 static INLINE
2716 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
2718 make_aligned_word64_undefined(a);
2719 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2720 //// Set the origins for a+0 .. a+7
2721 { OCacheLine* line;
2722 UWord lineoff = oc_line_offset(a);
2723 tl_assert(lineoff >= 0
2724 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2725 line = find_OCacheLine( a );
2726 line->descr[lineoff+0] = 0xF;
2727 line->descr[lineoff+1] = 0xF;
2728 line->w32[lineoff+0] = otag;
2729 line->w32[lineoff+1] = otag;
2731 //// END inlined, specialised version of MC_(helperc_b_store8)
2734 static INLINE
2735 void make_aligned_word64_noaccess ( Addr a )
2737 PROF_EVENT(330, "make_aligned_word64_noaccess");
2739 #ifndef PERF_FAST_STACK2
2740 MC_(make_mem_noaccess)(a, 8);
2741 #else
2743 UWord sm_off16;
2744 SecMap* sm;
2746 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2747 PROF_EVENT(331, "make_aligned_word64_noaccess-slow1");
2748 MC_(make_mem_noaccess)(a, 8);
2749 return;
2752 sm = get_secmap_for_writing_low(a);
2753 sm_off16 = SM_OFF_16(a);
2754 ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS;
2756 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2757 //// Clear the origins for a+0 .. a+7.
2758 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2759 OCacheLine* line;
2760 UWord lineoff = oc_line_offset(a);
2761 tl_assert(lineoff >= 0
2762 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2763 line = find_OCacheLine( a );
2764 line->descr[lineoff+0] = 0;
2765 line->descr[lineoff+1] = 0;
2767 //// END inlined, specialised version of MC_(helperc_b_store8)
2769 #endif
2773 /*------------------------------------------------------------*/
2774 /*--- Stack pointer adjustment ---*/
2775 /*------------------------------------------------------------*/
2777 #ifdef PERF_FAST_STACK
2778 # define MAYBE_USED
2779 #else
2780 # define MAYBE_USED __attribute__((unused))
2781 #endif
2783 /*--------------- adjustment by 4 bytes ---------------*/
2785 MAYBE_USED
2786 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
2788 UInt otag = ecu | MC_OKIND_STACK;
2789 PROF_EVENT(110, "new_mem_stack_4");
2790 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2791 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2792 } else {
2793 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
2797 MAYBE_USED
2798 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
2800 PROF_EVENT(110, "new_mem_stack_4");
2801 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2802 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2803 } else {
2804 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
2808 MAYBE_USED
2809 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
2811 PROF_EVENT(120, "die_mem_stack_4");
2812 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2813 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2814 } else {
2815 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
2819 /*--------------- adjustment by 8 bytes ---------------*/
2821 MAYBE_USED
2822 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
2824 UInt otag = ecu | MC_OKIND_STACK;
2825 PROF_EVENT(111, "new_mem_stack_8");
2826 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2827 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2828 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2829 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2830 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2831 } else {
2832 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
2836 MAYBE_USED
2837 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
2839 PROF_EVENT(111, "new_mem_stack_8");
2840 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2841 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2842 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2843 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2844 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2845 } else {
2846 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
2850 MAYBE_USED
2851 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
2853 PROF_EVENT(121, "die_mem_stack_8");
2854 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2855 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2856 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2857 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2858 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2859 } else {
2860 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
2864 /*--------------- adjustment by 12 bytes ---------------*/
2866 MAYBE_USED
2867 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
2869 UInt otag = ecu | MC_OKIND_STACK;
2870 PROF_EVENT(112, "new_mem_stack_12");
2871 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2872 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2873 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2874 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2875 /* from previous test we don't have 8-alignment at offset +0,
2876 hence must have 8 alignment at offsets +4/-4. Hence safe to
2877 do 4 at +0 and then 8 at +4/. */
2878 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2879 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2880 } else {
2881 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
2885 MAYBE_USED
2886 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
2888 PROF_EVENT(112, "new_mem_stack_12");
2889 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2890 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2891 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2892 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2893 /* from previous test we don't have 8-alignment at offset +0,
2894 hence must have 8 alignment at offsets +4/-4. Hence safe to
2895 do 4 at +0 and then 8 at +4/. */
2896 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2897 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2898 } else {
2899 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
2903 MAYBE_USED
2904 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
2906 PROF_EVENT(122, "die_mem_stack_12");
2907 /* Note the -12 in the test */
2908 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
2909 /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
2910 -4. */
2911 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2912 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2913 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2914 /* We have 4-alignment at +0, but we don't have 8-alignment at
2915 -12. So we must have 8-alignment at -8. Hence do 4 at -12
2916 and then 8 at -8. */
2917 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2918 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2919 } else {
2920 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
2924 /*--------------- adjustment by 16 bytes ---------------*/
2926 MAYBE_USED
2927 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
2929 UInt otag = ecu | MC_OKIND_STACK;
2930 PROF_EVENT(113, "new_mem_stack_16");
2931 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2932 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2933 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2934 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2935 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2936 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2937 Hence do 4 at +0, 8 at +4, 4 at +12. */
2938 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2939 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2940 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2941 } else {
2942 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
2946 MAYBE_USED
2947 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
2949 PROF_EVENT(113, "new_mem_stack_16");
2950 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2951 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2952 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2953 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2954 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2955 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2956 Hence do 4 at +0, 8 at +4, 4 at +12. */
2957 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2958 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2959 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
2960 } else {
2961 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
2965 MAYBE_USED
2966 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
2968 PROF_EVENT(123, "die_mem_stack_16");
2969 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2970 /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
2971 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2972 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2973 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2974 /* 8 alignment must be at -12. Do 4 at -16, 8 at -12, 4 at -4. */
2975 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2976 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2977 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2978 } else {
2979 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
2983 /*--------------- adjustment by 32 bytes ---------------*/
2985 MAYBE_USED
2986 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
2988 UInt otag = ecu | MC_OKIND_STACK;
2989 PROF_EVENT(114, "new_mem_stack_32");
2990 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2991 /* Straightforward */
2992 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2993 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2994 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2995 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2996 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2997 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
2998 +0,+28. */
2999 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3000 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
3001 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
3002 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
3003 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
3004 } else {
3005 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
3009 MAYBE_USED
3010 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
3012 PROF_EVENT(114, "new_mem_stack_32");
3013 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3014 /* Straightforward */
3015 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3016 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3017 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3018 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3019 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3020 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3021 +0,+28. */
3022 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3023 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3024 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
3025 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
3026 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
3027 } else {
3028 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
3032 MAYBE_USED
3033 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
3035 PROF_EVENT(124, "die_mem_stack_32");
3036 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3037 /* Straightforward */
3038 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3039 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3040 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3041 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3042 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3043 /* 8 alignment must be at -4 etc. Hence do 8 at -12,-20,-28 and
3044 4 at -32,-4. */
3045 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3046 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
3047 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
3048 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3049 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3050 } else {
3051 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
3055 /*--------------- adjustment by 112 bytes ---------------*/
3057 MAYBE_USED
3058 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
3060 UInt otag = ecu | MC_OKIND_STACK;
3061 PROF_EVENT(115, "new_mem_stack_112");
3062 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3063 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3064 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3065 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3066 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3067 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3068 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3069 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3070 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3071 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3072 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3073 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3074 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3075 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3076 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3077 } else {
3078 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
3082 MAYBE_USED
3083 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
3085 PROF_EVENT(115, "new_mem_stack_112");
3086 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3087 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3088 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3089 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3090 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3091 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3092 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3093 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3094 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3095 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3096 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3097 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3098 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3099 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3100 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3101 } else {
3102 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
3106 MAYBE_USED
3107 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
3109 PROF_EVENT(125, "die_mem_stack_112");
3110 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3111 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3112 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3113 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3114 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3115 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3116 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3117 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3118 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3119 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3120 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3121 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3122 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3123 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3124 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3125 } else {
3126 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
3130 /*--------------- adjustment by 128 bytes ---------------*/
3132 MAYBE_USED
3133 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
3135 UInt otag = ecu | MC_OKIND_STACK;
3136 PROF_EVENT(116, "new_mem_stack_128");
3137 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3138 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3139 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3140 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3141 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3142 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3143 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3144 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3145 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3146 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3147 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3148 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3149 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3150 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3151 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3152 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3153 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3154 } else {
3155 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
3159 MAYBE_USED
3160 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
3162 PROF_EVENT(116, "new_mem_stack_128");
3163 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3164 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3165 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3166 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3167 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3168 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3169 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3170 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3171 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3172 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3173 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3174 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3175 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3176 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3177 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3178 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3179 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3180 } else {
3181 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
3185 MAYBE_USED
3186 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
3188 PROF_EVENT(126, "die_mem_stack_128");
3189 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3190 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3191 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3192 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3193 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3194 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3195 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3196 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3197 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3198 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3199 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3200 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3201 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3202 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3203 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3204 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3205 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3206 } else {
3207 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
3211 /*--------------- adjustment by 144 bytes ---------------*/
3213 MAYBE_USED
3214 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
3216 UInt otag = ecu | MC_OKIND_STACK;
3217 PROF_EVENT(117, "new_mem_stack_144");
3218 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3219 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3220 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3221 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3222 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3223 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3224 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3225 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3226 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3227 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3228 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3229 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3230 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3231 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3232 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3233 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3234 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3235 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3236 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3237 } else {
3238 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
3242 MAYBE_USED
3243 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
3245 PROF_EVENT(117, "new_mem_stack_144");
3246 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3247 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3248 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3249 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3250 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3251 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3252 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3253 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3254 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3255 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3256 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3257 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3258 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3259 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3260 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3261 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3262 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3263 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3264 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3265 } else {
3266 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
3270 MAYBE_USED
3271 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
3273 PROF_EVENT(127, "die_mem_stack_144");
3274 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3275 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3276 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3277 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3278 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3279 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3280 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3281 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3282 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3283 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3284 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3285 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3286 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3287 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3288 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3289 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3290 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3291 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3292 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3293 } else {
3294 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
3298 /*--------------- adjustment by 160 bytes ---------------*/
3300 MAYBE_USED
3301 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
3303 UInt otag = ecu | MC_OKIND_STACK;
3304 PROF_EVENT(118, "new_mem_stack_160");
3305 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3306 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3307 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3308 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3309 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3310 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3311 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3312 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3313 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3314 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3315 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3316 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3317 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3318 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3319 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3320 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3321 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3322 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3323 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3324 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
3325 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
3326 } else {
3327 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
3331 MAYBE_USED
3332 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
3334 PROF_EVENT(118, "new_mem_stack_160");
3335 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3336 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3337 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3338 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3339 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3340 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3341 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3342 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3343 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3344 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3345 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3346 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3347 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3348 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3349 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3350 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3351 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3352 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3353 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3354 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
3355 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
3356 } else {
3357 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
3361 MAYBE_USED
3362 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
3364 PROF_EVENT(128, "die_mem_stack_160");
3365 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3366 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
3367 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
3368 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3369 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3370 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3371 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3372 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3373 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3374 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3375 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3376 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3377 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3378 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3379 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3380 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3381 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3382 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3383 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3384 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3385 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3386 } else {
3387 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
3391 /*--------------- adjustment by N bytes ---------------*/
3393 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
3395 UInt otag = ecu | MC_OKIND_STACK;
3396 PROF_EVENT(115, "new_mem_stack_w_otag");
3397 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
3400 static void mc_new_mem_stack ( Addr a, SizeT len )
3402 PROF_EVENT(115, "new_mem_stack");
3403 make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
3406 static void mc_die_mem_stack ( Addr a, SizeT len )
3408 PROF_EVENT(125, "die_mem_stack");
3409 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
3413 /* The AMD64 ABI says:
3415 "The 128-byte area beyond the location pointed to by %rsp is considered
3416 to be reserved and shall not be modified by signal or interrupt
3417 handlers. Therefore, functions may use this area for temporary data
3418 that is not needed across function calls. In particular, leaf functions
3419 may use this area for their entire stack frame, rather than adjusting
3420 the stack pointer in the prologue and epilogue. This area is known as
3421 red zone [sic]."
3423 So after any call or return we need to mark this redzone as containing
3424 undefined values.
3426 Consider this: we're in function f. f calls g. g moves rsp down
3427 modestly (say 16 bytes) and writes stuff all over the red zone, making it
3428 defined. g returns. f is buggy and reads from parts of the red zone
3429 that it didn't write on. But because g filled that area in, f is going
3430 to be picking up defined V bits and so any errors from reading bits of
3431 the red zone it didn't write, will be missed. The only solution I could
3432 think of was to make the red zone undefined when g returns to f.
3434 This is in accordance with the ABI, which makes it clear the redzone
3435 is volatile across function calls.
3437 The problem occurs the other way round too: f could fill the RZ up
3438 with defined values and g could mistakenly read them. So the RZ
3439 also needs to be nuked on function calls.
3443 /* Here's a simple cache to hold nia -> ECU mappings. It could be
3444 improved so as to have a lower miss rate. */
3446 static UWord stats__nia_cache_queries = 0;
3447 static UWord stats__nia_cache_misses = 0;
3449 typedef
3450 struct { UWord nia0; UWord ecu0; /* nia0 maps to ecu0 */
3451 UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
3452 WCacheEnt;
3454 #define N_NIA_TO_ECU_CACHE 511
3456 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
3458 static void init_nia_to_ecu_cache ( void )
3460 UWord i;
3461 Addr zero_addr = 0;
3462 ExeContext* zero_ec;
3463 UInt zero_ecu;
3464 /* Fill all the slots with an entry for address zero, and the
3465 relevant otags accordingly. Hence the cache is initially filled
3466 with valid data. */
3467 zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
3468 tl_assert(zero_ec);
3469 zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
3470 tl_assert(VG_(is_plausible_ECU)(zero_ecu));
3471 for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
3472 nia_to_ecu_cache[i].nia0 = zero_addr;
3473 nia_to_ecu_cache[i].ecu0 = zero_ecu;
3474 nia_to_ecu_cache[i].nia1 = zero_addr;
3475 nia_to_ecu_cache[i].ecu1 = zero_ecu;
3479 static inline UInt convert_nia_to_ecu ( Addr nia )
3481 UWord i;
3482 UInt ecu;
3483 ExeContext* ec;
3485 tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
3487 stats__nia_cache_queries++;
3488 i = nia % N_NIA_TO_ECU_CACHE;
3489 tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
3491 if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
3492 return nia_to_ecu_cache[i].ecu0;
3494 if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
3495 # define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3496 SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
3497 SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
3498 # undef SWAP
3499 return nia_to_ecu_cache[i].ecu0;
3502 stats__nia_cache_misses++;
3503 ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
3504 tl_assert(ec);
3505 ecu = VG_(get_ECU_from_ExeContext)(ec);
3506 tl_assert(VG_(is_plausible_ECU)(ecu));
3508 nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
3509 nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
3511 nia_to_ecu_cache[i].nia0 = nia;
3512 nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
3513 return ecu;
3517 /* Note that this serves both the origin-tracking and
3518 no-origin-tracking modes. We assume that calls to it are
3519 sufficiently infrequent that it isn't worth specialising for the
3520 with/without origin-tracking cases. */
3521 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia )
3523 UInt otag;
3524 tl_assert(sizeof(UWord) == sizeof(SizeT));
3525 if (0)
3526 VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n",
3527 base, len, nia );
3529 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3530 UInt ecu = convert_nia_to_ecu ( nia );
3531 tl_assert(VG_(is_plausible_ECU)(ecu));
3532 otag = ecu | MC_OKIND_STACK;
3533 } else {
3534 tl_assert(nia == 0);
3535 otag = 0;
3538 # if 0
3539 /* Really slow version */
3540 MC_(make_mem_undefined)(base, len, otag);
3541 # endif
3543 # if 0
3544 /* Slow(ish) version, which is fairly easily seen to be correct.
3546 if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3547 make_aligned_word64_undefined(base + 0, otag);
3548 make_aligned_word64_undefined(base + 8, otag);
3549 make_aligned_word64_undefined(base + 16, otag);
3550 make_aligned_word64_undefined(base + 24, otag);
3552 make_aligned_word64_undefined(base + 32, otag);
3553 make_aligned_word64_undefined(base + 40, otag);
3554 make_aligned_word64_undefined(base + 48, otag);
3555 make_aligned_word64_undefined(base + 56, otag);
3557 make_aligned_word64_undefined(base + 64, otag);
3558 make_aligned_word64_undefined(base + 72, otag);
3559 make_aligned_word64_undefined(base + 80, otag);
3560 make_aligned_word64_undefined(base + 88, otag);
3562 make_aligned_word64_undefined(base + 96, otag);
3563 make_aligned_word64_undefined(base + 104, otag);
3564 make_aligned_word64_undefined(base + 112, otag);
3565 make_aligned_word64_undefined(base + 120, otag);
3566 } else {
3567 MC_(make_mem_undefined)(base, len, otag);
3569 # endif
3571 /* Idea is: go fast when
3572 * 8-aligned and length is 128
3573 * the sm is available in the main primary map
3574 * the address range falls entirely with a single secondary map
3575 If all those conditions hold, just update the V+A bits by writing
3576 directly into the vabits array. (If the sm was distinguished, this
3577 will make a copy and then write to it.)
3580 if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3581 /* Now we know the address range is suitably sized and aligned. */
3582 UWord a_lo = (UWord)(base);
3583 UWord a_hi = (UWord)(base + 128 - 1);
3584 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3585 if (a_hi <= MAX_PRIMARY_ADDRESS) {
3586 // Now we know the entire range is within the main primary map.
3587 SecMap* sm = get_secmap_for_writing_low(a_lo);
3588 SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3589 /* Now we know that the entire address range falls within a
3590 single secondary map, and that that secondary 'lives' in
3591 the main primary map. */
3592 if (LIKELY(sm == sm_hi)) {
3593 // Finally, we know that the range is entirely within one secmap.
3594 UWord v_off = SM_OFF(a_lo);
3595 UShort* p = (UShort*)(&sm->vabits8[v_off]);
3596 p[ 0] = VA_BITS16_UNDEFINED;
3597 p[ 1] = VA_BITS16_UNDEFINED;
3598 p[ 2] = VA_BITS16_UNDEFINED;
3599 p[ 3] = VA_BITS16_UNDEFINED;
3600 p[ 4] = VA_BITS16_UNDEFINED;
3601 p[ 5] = VA_BITS16_UNDEFINED;
3602 p[ 6] = VA_BITS16_UNDEFINED;
3603 p[ 7] = VA_BITS16_UNDEFINED;
3604 p[ 8] = VA_BITS16_UNDEFINED;
3605 p[ 9] = VA_BITS16_UNDEFINED;
3606 p[10] = VA_BITS16_UNDEFINED;
3607 p[11] = VA_BITS16_UNDEFINED;
3608 p[12] = VA_BITS16_UNDEFINED;
3609 p[13] = VA_BITS16_UNDEFINED;
3610 p[14] = VA_BITS16_UNDEFINED;
3611 p[15] = VA_BITS16_UNDEFINED;
3612 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3613 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3614 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3615 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3616 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3617 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3618 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3619 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3620 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3621 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3622 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3623 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3624 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3625 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3626 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3627 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3628 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3630 return;
3635 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3636 if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3637 /* Now we know the address range is suitably sized and aligned. */
3638 UWord a_lo = (UWord)(base);
3639 UWord a_hi = (UWord)(base + 288 - 1);
3640 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3641 if (a_hi <= MAX_PRIMARY_ADDRESS) {
3642 // Now we know the entire range is within the main primary map.
3643 SecMap* sm = get_secmap_for_writing_low(a_lo);
3644 SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3645 /* Now we know that the entire address range falls within a
3646 single secondary map, and that that secondary 'lives' in
3647 the main primary map. */
3648 if (LIKELY(sm == sm_hi)) {
3649 // Finally, we know that the range is entirely within one secmap.
3650 UWord v_off = SM_OFF(a_lo);
3651 UShort* p = (UShort*)(&sm->vabits8[v_off]);
3652 p[ 0] = VA_BITS16_UNDEFINED;
3653 p[ 1] = VA_BITS16_UNDEFINED;
3654 p[ 2] = VA_BITS16_UNDEFINED;
3655 p[ 3] = VA_BITS16_UNDEFINED;
3656 p[ 4] = VA_BITS16_UNDEFINED;
3657 p[ 5] = VA_BITS16_UNDEFINED;
3658 p[ 6] = VA_BITS16_UNDEFINED;
3659 p[ 7] = VA_BITS16_UNDEFINED;
3660 p[ 8] = VA_BITS16_UNDEFINED;
3661 p[ 9] = VA_BITS16_UNDEFINED;
3662 p[10] = VA_BITS16_UNDEFINED;
3663 p[11] = VA_BITS16_UNDEFINED;
3664 p[12] = VA_BITS16_UNDEFINED;
3665 p[13] = VA_BITS16_UNDEFINED;
3666 p[14] = VA_BITS16_UNDEFINED;
3667 p[15] = VA_BITS16_UNDEFINED;
3668 p[16] = VA_BITS16_UNDEFINED;
3669 p[17] = VA_BITS16_UNDEFINED;
3670 p[18] = VA_BITS16_UNDEFINED;
3671 p[19] = VA_BITS16_UNDEFINED;
3672 p[20] = VA_BITS16_UNDEFINED;
3673 p[21] = VA_BITS16_UNDEFINED;
3674 p[22] = VA_BITS16_UNDEFINED;
3675 p[23] = VA_BITS16_UNDEFINED;
3676 p[24] = VA_BITS16_UNDEFINED;
3677 p[25] = VA_BITS16_UNDEFINED;
3678 p[26] = VA_BITS16_UNDEFINED;
3679 p[27] = VA_BITS16_UNDEFINED;
3680 p[28] = VA_BITS16_UNDEFINED;
3681 p[29] = VA_BITS16_UNDEFINED;
3682 p[30] = VA_BITS16_UNDEFINED;
3683 p[31] = VA_BITS16_UNDEFINED;
3684 p[32] = VA_BITS16_UNDEFINED;
3685 p[33] = VA_BITS16_UNDEFINED;
3686 p[34] = VA_BITS16_UNDEFINED;
3687 p[35] = VA_BITS16_UNDEFINED;
3688 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3689 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3690 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3691 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3692 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3693 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3694 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3695 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3696 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3697 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3698 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3699 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3700 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3701 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3702 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3703 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3704 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3705 set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
3706 set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
3707 set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
3708 set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
3709 set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
3710 set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
3711 set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
3712 set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
3713 set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
3714 set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
3715 set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
3716 set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
3717 set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
3718 set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
3719 set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
3720 set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
3721 set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
3722 set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
3723 set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
3724 set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
3726 return;
3731 /* else fall into slow case */
3732 MC_(make_mem_undefined_w_otag)(base, len, otag);
3736 /*------------------------------------------------------------*/
3737 /*--- Checking memory ---*/
3738 /*------------------------------------------------------------*/
3740 typedef
3741 enum {
3742 MC_Ok = 5,
3743 MC_AddrErr = 6,
3744 MC_ValueErr = 7
3746 MC_ReadResult;
3749 /* Check permissions for address range. If inadequate permissions
3750 exist, *bad_addr is set to the offending address, so the caller can
3751 know what it is. */
3753 /* Returns True if [a .. a+len) is not addressible. Otherwise,
3754 returns False, and if bad_addr is non-NULL, sets *bad_addr to
3755 indicate the lowest failing address. Functions below are
3756 similar. */
3757 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
3759 SizeT i;
3760 UWord vabits2;
3762 PROF_EVENT(60, "check_mem_is_noaccess");
3763 for (i = 0; i < len; i++) {
3764 PROF_EVENT(61, "check_mem_is_noaccess(loop)");
3765 vabits2 = get_vabits2(a);
3766 if (VA_BITS2_NOACCESS != vabits2) {
3767 if (bad_addr != NULL) *bad_addr = a;
3768 return False;
3770 a++;
3772 return True;
3775 static Bool is_mem_addressable ( Addr a, SizeT len,
3776 /*OUT*/Addr* bad_addr )
3778 SizeT i;
3779 UWord vabits2;
3781 PROF_EVENT(62, "is_mem_addressable");
3782 for (i = 0; i < len; i++) {
3783 PROF_EVENT(63, "is_mem_addressable(loop)");
3784 vabits2 = get_vabits2(a);
3785 if (VA_BITS2_NOACCESS == vabits2) {
3786 if (bad_addr != NULL) *bad_addr = a;
3787 return False;
3789 a++;
3791 return True;
3794 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
3795 /*OUT*/Addr* bad_addr,
3796 /*OUT*/UInt* otag )
3798 SizeT i;
3799 UWord vabits2;
3801 PROF_EVENT(64, "is_mem_defined");
3802 DEBUG("is_mem_defined\n");
3804 if (otag) *otag = 0;
3805 if (bad_addr) *bad_addr = 0;
3806 for (i = 0; i < len; i++) {
3807 PROF_EVENT(65, "is_mem_defined(loop)");
3808 vabits2 = get_vabits2(a);
3809 if (VA_BITS2_DEFINED != vabits2) {
3810 // Error! Nb: Report addressability errors in preference to
3811 // definedness errors. And don't report definedeness errors unless
3812 // --undef-value-errors=yes.
3813 if (bad_addr) {
3814 *bad_addr = a;
3816 if (VA_BITS2_NOACCESS == vabits2) {
3817 return MC_AddrErr;
3819 if (MC_(clo_mc_level) >= 2) {
3820 if (otag && MC_(clo_mc_level) == 3) {
3821 *otag = MC_(helperc_b_load1)( a );
3823 return MC_ValueErr;
3826 a++;
3828 return MC_Ok;
3832 /* Like is_mem_defined but doesn't give up at the first uninitialised
3833 byte -- the entire range is always checked. This is important for
3834 detecting errors in the case where a checked range strays into
3835 invalid memory, but that fact is not detected by the ordinary
3836 is_mem_defined(), because of an undefined section that precedes the
3837 out of range section, possibly as a result of an alignment hole in
3838 the checked data. This version always checks the entire range and
3839 can report both a definedness and an accessbility error, if
3840 necessary. */
3841 static void is_mem_defined_comprehensive (
3842 Addr a, SizeT len,
3843 /*OUT*/Bool* errorV, /* is there a definedness err? */
3844 /*OUT*/Addr* bad_addrV, /* if so where? */
3845 /*OUT*/UInt* otagV, /* and what's its otag? */
3846 /*OUT*/Bool* errorA, /* is there an addressability err? */
3847 /*OUT*/Addr* bad_addrA /* if so where? */
3850 SizeT i;
3851 UWord vabits2;
3852 Bool already_saw_errV = False;
3854 PROF_EVENT(64, "is_mem_defined"); // fixme
3855 DEBUG("is_mem_defined_comprehensive\n");
3857 tl_assert(!(*errorV || *errorA));
3859 for (i = 0; i < len; i++) {
3860 PROF_EVENT(65, "is_mem_defined(loop)"); // fixme
3861 vabits2 = get_vabits2(a);
3862 switch (vabits2) {
3863 case VA_BITS2_DEFINED:
3864 a++;
3865 break;
3866 case VA_BITS2_UNDEFINED:
3867 case VA_BITS2_PARTDEFINED:
3868 if (!already_saw_errV) {
3869 *errorV = True;
3870 *bad_addrV = a;
3871 if (MC_(clo_mc_level) == 3) {
3872 *otagV = MC_(helperc_b_load1)( a );
3873 } else {
3874 *otagV = 0;
3876 already_saw_errV = True;
3878 a++; /* keep going */
3879 break;
3880 case VA_BITS2_NOACCESS:
3881 *errorA = True;
3882 *bad_addrA = a;
3883 return; /* give up now. */
3884 default:
3885 tl_assert(0);
3891 /* Check a zero-terminated ascii string. Tricky -- don't want to
3892 examine the actual bytes, to find the end, until we're sure it is
3893 safe to do so. */
3895 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
3897 UWord vabits2;
3899 PROF_EVENT(66, "mc_is_defined_asciiz");
3900 DEBUG("mc_is_defined_asciiz\n");
3902 if (otag) *otag = 0;
3903 if (bad_addr) *bad_addr = 0;
3904 while (True) {
3905 PROF_EVENT(67, "mc_is_defined_asciiz(loop)");
3906 vabits2 = get_vabits2(a);
3907 if (VA_BITS2_DEFINED != vabits2) {
3908 // Error! Nb: Report addressability errors in preference to
3909 // definedness errors. And don't report definedeness errors unless
3910 // --undef-value-errors=yes.
3911 if (bad_addr) {
3912 *bad_addr = a;
3914 if (VA_BITS2_NOACCESS == vabits2) {
3915 return MC_AddrErr;
3917 if (MC_(clo_mc_level) >= 2) {
3918 if (otag && MC_(clo_mc_level) == 3) {
3919 *otag = MC_(helperc_b_load1)( a );
3921 return MC_ValueErr;
3924 /* Ok, a is safe to read. */
3925 if (* ((UChar*)a) == 0) {
3926 return MC_Ok;
3928 a++;
3933 /*------------------------------------------------------------*/
3934 /*--- Memory event handlers ---*/
3935 /*------------------------------------------------------------*/
3937 static
3938 void check_mem_is_addressable ( CorePart part, ThreadId tid, const HChar* s,
3939 Addr base, SizeT size )
3941 Addr bad_addr;
3942 Bool ok = is_mem_addressable ( base, size, &bad_addr );
3944 if (!ok) {
3945 switch (part) {
3946 case Vg_CoreSysCall:
3947 MC_(record_memparam_error) ( tid, bad_addr,
3948 /*isAddrErr*/True, s, 0/*otag*/ );
3949 break;
3951 case Vg_CoreSignal:
3952 MC_(record_core_mem_error)( tid, s );
3953 break;
3955 default:
3956 VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
3961 static
3962 void check_mem_is_defined ( CorePart part, ThreadId tid, const HChar* s,
3963 Addr base, SizeT size )
3965 UInt otag = 0;
3966 Addr bad_addr;
3967 MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
3969 if (MC_Ok != res) {
3970 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3972 switch (part) {
3973 case Vg_CoreSysCall:
3974 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3975 isAddrErr ? 0 : otag );
3976 break;
3978 case Vg_CoreSysCallArgInMem:
3979 MC_(record_regparam_error) ( tid, s, otag );
3980 break;
3982 /* If we're being asked to jump to a silly address, record an error
3983 message before potentially crashing the entire system. */
3984 case Vg_CoreTranslate:
3985 MC_(record_jump_error)( tid, bad_addr );
3986 break;
3988 default:
3989 VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
3994 static
3995 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
3996 const HChar* s, Addr str )
3998 MC_ReadResult res;
3999 Addr bad_addr = 0; // shut GCC up
4000 UInt otag = 0;
4002 tl_assert(part == Vg_CoreSysCall);
4003 res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
4004 if (MC_Ok != res) {
4005 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
4006 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
4007 isAddrErr ? 0 : otag );
4011 /* Handling of mmap and mprotect is not as simple as it seems.
4013 The underlying semantics are that memory obtained from mmap is
4014 always initialised, but may be inaccessible. And changes to the
4015 protection of memory do not change its contents and hence not its
4016 definedness state. Problem is we can't model
4017 inaccessible-but-with-some-definedness state; once we mark memory
4018 as inaccessible we lose all info about definedness, and so can't
4019 restore that if it is later made accessible again.
4021 One obvious thing to do is this:
4023 mmap/mprotect NONE -> noaccess
4024 mmap/mprotect other -> defined
4026 The problem case here is: taking accessible memory, writing
4027 uninitialised data to it, mprotecting it NONE and later mprotecting
4028 it back to some accessible state causes the undefinedness to be
4029 lost.
4031 A better proposal is:
4033 (1) mmap NONE -> make noaccess
4034 (2) mmap other -> make defined
4036 (3) mprotect NONE -> # no change
4037 (4) mprotect other -> change any "noaccess" to "defined"
4039 (2) is OK because memory newly obtained from mmap really is defined
4040 (zeroed out by the kernel -- doing anything else would
4041 constitute a massive security hole.)
4043 (1) is OK because the only way to make the memory usable is via
4044 (4), in which case we also wind up correctly marking it all as
4045 defined.
4047 (3) is the weak case. We choose not to change memory state.
4048 (presumably the range is in some mixture of "defined" and
4049 "undefined", viz, accessible but with arbitrary V bits). Doing
4050 nothing means we retain the V bits, so that if the memory is
4051 later mprotected "other", the V bits remain unchanged, so there
4052 can be no false negatives. The bad effect is that if there's
4053 an access in the area, then MC cannot warn; but at least we'll
4054 get a SEGV to show, so it's better than nothing.
4056 Consider the sequence (3) followed by (4). Any memory that was
4057 "defined" or "undefined" previously retains its state (as
4058 required). Any memory that was "noaccess" before can only have
4059 been made that way by (1), and so it's OK to change it to
4060 "defined".
4062 See https://bugs.kde.org/show_bug.cgi?id=205541
4063 and https://bugs.kde.org/show_bug.cgi?id=210268
4065 static
4066 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
4067 ULong di_handle )
4069 if (rr || ww || xx) {
4070 /* (2) mmap/mprotect other -> defined */
4071 MC_(make_mem_defined)(a, len);
4072 } else {
4073 /* (1) mmap/mprotect NONE -> noaccess */
4074 MC_(make_mem_noaccess)(a, len);
4078 static
4079 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
4081 if (rr || ww || xx) {
4082 /* (4) mprotect other -> change any "noaccess" to "defined" */
4083 make_mem_defined_if_noaccess(a, len);
4084 } else {
4085 /* (3) mprotect NONE -> # no change */
4086 /* do nothing */
4091 static
4092 void mc_new_mem_startup( Addr a, SizeT len,
4093 Bool rr, Bool ww, Bool xx, ULong di_handle )
4095 // Because code is defined, initialised variables get put in the data
4096 // segment and are defined, and uninitialised variables get put in the
4097 // bss segment and are auto-zeroed (and so defined).
4099 // It's possible that there will be padding between global variables.
4100 // This will also be auto-zeroed, and marked as defined by Memcheck. If
4101 // a program uses it, Memcheck will not complain. This is arguably a
4102 // false negative, but it's a grey area -- the behaviour is defined (the
4103 // padding is zeroed) but it's probably not what the user intended. And
4104 // we can't avoid it.
4106 // Note: we generally ignore RWX permissions, because we can't track them
4107 // without requiring more than one A bit which would slow things down a
4108 // lot. But on Darwin the 0th page is mapped but !R and !W and !X.
4109 // So we mark any such pages as "unaddressable".
4110 DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
4111 a, (ULong)len, rr, ww, xx);
4112 mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
4115 static
4116 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
4118 MC_(make_mem_defined)(a, len);
4122 /*------------------------------------------------------------*/
4123 /*--- Register event handlers ---*/
4124 /*------------------------------------------------------------*/
4126 /* Try and get a nonzero origin for the guest state section of thread
4127 tid characterised by (offset,size). Return 0 if nothing to show
4128 for it. */
4129 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
4130 Int offset, SizeT size )
4132 Int sh2off;
4133 UInt area[3];
4134 UInt otag;
4135 sh2off = MC_(get_otrack_shadow_offset)( offset, size );
4136 if (sh2off == -1)
4137 return 0; /* This piece of guest state is not tracked */
4138 tl_assert(sh2off >= 0);
4139 tl_assert(0 == (sh2off % 4));
4140 area[0] = 0x31313131;
4141 area[2] = 0x27272727;
4142 VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 );
4143 tl_assert(area[0] == 0x31313131);
4144 tl_assert(area[2] == 0x27272727);
4145 otag = area[1];
4146 return otag;
4150 /* When some chunk of guest state is written, mark the corresponding
4151 shadow area as valid. This is used to initialise arbitrarily large
4152 chunks of guest state, hence the _SIZE value, which has to be as
4153 big as the biggest guest state.
4155 static void mc_post_reg_write ( CorePart part, ThreadId tid,
4156 PtrdiffT offset, SizeT size)
4158 # define MAX_REG_WRITE_SIZE 1712
4159 UChar area[MAX_REG_WRITE_SIZE];
4160 tl_assert(size <= MAX_REG_WRITE_SIZE);
4161 VG_(memset)(area, V_BITS8_DEFINED, size);
4162 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
4163 # undef MAX_REG_WRITE_SIZE
4166 static
4167 void mc_post_reg_write_clientcall ( ThreadId tid,
4168 PtrdiffT offset, SizeT size, Addr f)
4170 mc_post_reg_write(/*dummy*/0, tid, offset, size);
4173 /* Look at the definedness of the guest's shadow state for
4174 [offset, offset+len). If any part of that is undefined, record
4175 a parameter error.
4177 static void mc_pre_reg_read ( CorePart part, ThreadId tid, const HChar* s,
4178 PtrdiffT offset, SizeT size)
4180 Int i;
4181 Bool bad;
4182 UInt otag;
4184 UChar area[16];
4185 tl_assert(size <= 16);
4187 VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
4189 bad = False;
4190 for (i = 0; i < size; i++) {
4191 if (area[i] != V_BITS8_DEFINED) {
4192 bad = True;
4193 break;
4197 if (!bad)
4198 return;
4200 /* We've found some undefinedness. See if we can also find an
4201 origin for it. */
4202 otag = mb_get_origin_for_guest_offset( tid, offset, size );
4203 MC_(record_regparam_error) ( tid, s, otag );
4207 /*------------------------------------------------------------*/
4208 /*--- Register-memory event handlers ---*/
4209 /*------------------------------------------------------------*/
4211 static void mc_copy_mem_to_reg ( CorePart part, ThreadId tid, Addr a,
4212 PtrdiffT guest_state_offset, SizeT size )
4214 SizeT i;
4215 UChar vbits8;
4216 Int offset;
4217 UInt d32;
4219 /* Slow loop. */
4220 for (i = 0; i < size; i++) {
4221 get_vbits8( a+i, &vbits8 );
4222 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/, guest_state_offset+i,
4223 1, &vbits8 );
4226 if (MC_(clo_mc_level) != 3)
4227 return;
4229 /* Track origins. */
4230 offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
4231 if (offset == -1)
4232 return;
4234 switch (size) {
4235 case 1:
4236 d32 = MC_(helperc_b_load1)( a );
4237 break;
4238 case 2:
4239 d32 = MC_(helperc_b_load2)( a );
4240 break;
4241 case 4:
4242 d32 = MC_(helperc_b_load4)( a );
4243 break;
4244 case 8:
4245 d32 = MC_(helperc_b_load8)( a );
4246 break;
4247 case 16:
4248 d32 = MC_(helperc_b_load16)( a );
4249 break;
4250 case 32:
4251 d32 = MC_(helperc_b_load32)( a );
4252 break;
4253 default:
4254 tl_assert(0);
4257 VG_(set_shadow_regs_area)( tid, 2/*shadowNo*/, offset, 4, (UChar*)&d32 );
4260 static void mc_copy_reg_to_mem ( CorePart part, ThreadId tid,
4261 PtrdiffT guest_state_offset, Addr a,
4262 SizeT size )
4264 SizeT i;
4265 UChar vbits8;
4266 Int offset;
4267 UInt d32;
4269 /* Slow loop. */
4270 for (i = 0; i < size; i++) {
4271 VG_(get_shadow_regs_area)( tid, &vbits8, 1/*shadowNo*/,
4272 guest_state_offset+i, 1 );
4273 set_vbits8( a+i, vbits8 );
4276 if (MC_(clo_mc_level) != 3)
4277 return;
4279 /* Track origins. */
4280 offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
4281 if (offset == -1)
4282 return;
4284 VG_(get_shadow_regs_area)( tid, (UChar*)&d32, 2/*shadowNo*/, offset, 4 );
4285 switch (size) {
4286 case 1:
4287 MC_(helperc_b_store1)( a, d32 );
4288 break;
4289 case 2:
4290 MC_(helperc_b_store2)( a, d32 );
4291 break;
4292 case 4:
4293 MC_(helperc_b_store4)( a, d32 );
4294 break;
4295 case 8:
4296 MC_(helperc_b_store8)( a, d32 );
4297 break;
4298 case 16:
4299 MC_(helperc_b_store16)( a, d32 );
4300 break;
4301 case 32:
4302 MC_(helperc_b_store32)( a, d32 );
4303 break;
4304 default:
4305 tl_assert(0);
4310 /*------------------------------------------------------------*/
4311 /*--- Some static assertions ---*/
4312 /*------------------------------------------------------------*/
4314 /* The handwritten assembly helpers below have baked-in assumptions
4315 about various constant values. These assertions attempt to make
4316 that a bit safer by checking those values and flagging changes that
4317 would make the assembly invalid. Not perfect but it's better than
4318 nothing. */
4320 STATIC_ASSERT(SM_CHUNKS * 4 == 65536);
4322 STATIC_ASSERT(VA_BITS8_DEFINED == 0xAA);
4323 STATIC_ASSERT(VA_BITS8_UNDEFINED == 0x55);
4325 STATIC_ASSERT(V_BITS32_DEFINED == 0x00000000);
4326 STATIC_ASSERT(V_BITS32_UNDEFINED == 0xFFFFFFFF);
4328 STATIC_ASSERT(VA_BITS4_DEFINED == 0xA);
4329 STATIC_ASSERT(VA_BITS4_UNDEFINED == 0x5);
4331 STATIC_ASSERT(V_BITS16_DEFINED == 0x0000);
4332 STATIC_ASSERT(V_BITS16_UNDEFINED == 0xFFFF);
4334 STATIC_ASSERT(VA_BITS2_DEFINED == 2);
4335 STATIC_ASSERT(VA_BITS2_UNDEFINED == 1);
4337 STATIC_ASSERT(V_BITS8_DEFINED == 0x00);
4338 STATIC_ASSERT(V_BITS8_UNDEFINED == 0xFF);
4341 /*------------------------------------------------------------*/
4342 /*--- Functions called directly from generated code: ---*/
4343 /*--- Load/store handlers. ---*/
4344 /*------------------------------------------------------------*/
4346 /* Types: LOADV32, LOADV16, LOADV8 are:
4347 UWord fn ( Addr a )
4348 so they return 32-bits on 32-bit machines and 64-bits on
4349 64-bit machines. Addr has the same size as a host word.
4351 LOADV64 is always ULong fn ( Addr a )
4353 Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
4354 are a UWord, and for STOREV64 they are a ULong.
4357 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
4358 naturally '_sz/8'-aligned, or it exceeds the range covered by the
4359 primary map. This is all very tricky (and important!), so let's
4360 work through the maths by hand (below), *and* assert for these
4361 values at startup. */
4362 #define MASK(_szInBytes) \
4363 ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
4365 /* MASK only exists so as to define this macro. */
4366 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
4367 ((_a) & MASK((_szInBits>>3)))
4369 /* On a 32-bit machine:
4371 N_PRIMARY_BITS == 16, so
4372 N_PRIMARY_MAP == 0x10000, so
4373 N_PRIMARY_MAP-1 == 0xFFFF, so
4374 (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
4376 MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
4377 = ~ ( 0xFFFF | 0xFFFF0000 )
4378 = ~ 0xFFFF'FFFF
4381 MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
4382 = ~ ( 0xFFFE | 0xFFFF0000 )
4383 = ~ 0xFFFF'FFFE
4386 MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
4387 = ~ ( 0xFFFC | 0xFFFF0000 )
4388 = ~ 0xFFFF'FFFC
4391 MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
4392 = ~ ( 0xFFF8 | 0xFFFF0000 )
4393 = ~ 0xFFFF'FFF8
4396 Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
4397 precisely when a is not 1/2/4/8-bytes aligned. And obviously, for
4398 the 1-byte alignment case, it is always a zero value, since MASK(1)
4399 is zero. All as expected.
4401 On a 64-bit machine, it's more complex, since we're testing
4402 simultaneously for misalignment and for the address being at or
4403 above 64G:
4405 N_PRIMARY_BITS == 20, so
4406 N_PRIMARY_MAP == 0x100000, so
4407 N_PRIMARY_MAP-1 == 0xFFFFF, so
4408 (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
4410 MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
4411 = ~ ( 0xFFFF | 0xF'FFFF'0000 )
4412 = ~ 0xF'FFFF'FFFF
4413 = 0xFFFF'FFF0'0000'0000
4415 MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
4416 = ~ ( 0xFFFE | 0xF'FFFF'0000 )
4417 = ~ 0xF'FFFF'FFFE
4418 = 0xFFFF'FFF0'0000'0001
4420 MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
4421 = ~ ( 0xFFFC | 0xF'FFFF'0000 )
4422 = ~ 0xF'FFFF'FFFC
4423 = 0xFFFF'FFF0'0000'0003
4425 MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
4426 = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
4427 = ~ 0xF'FFFF'FFF8
4428 = 0xFFFF'FFF0'0000'0007
4431 /*------------------------------------------------------------*/
4432 /*--- LOADV256 and LOADV128 ---*/
4433 /*------------------------------------------------------------*/
4435 static INLINE
4436 void mc_LOADV_128_or_256 ( /*OUT*/ULong* res,
4437 Addr a, SizeT nBits, Bool isBigEndian )
4439 PROF_EVENT(200, "mc_LOADV_128_or_256");
4441 #ifndef PERF_FAST_LOADV
4442 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4443 return;
4444 #else
4446 UWord sm_off16, vabits16, j;
4447 UWord nBytes = nBits / 8;
4448 UWord nULongs = nBytes / 8;
4449 SecMap* sm;
4451 if (UNLIKELY( UNALIGNED_OR_HIGH(a,nBits) )) {
4452 PROF_EVENT(201, "mc_LOADV_128_or_256-slow1");
4453 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4454 return;
4457 /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
4458 suitably aligned, is mapped, and addressible. */
4459 for (j = 0; j < nULongs; j++) {
4460 sm = get_secmap_for_reading_low(a + 8*j);
4461 sm_off16 = SM_OFF_16(a + 8*j);
4462 vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4464 // Convert V bits from compact memory form to expanded
4465 // register form.
4466 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4467 res[j] = V_BITS64_DEFINED;
4468 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4469 res[j] = V_BITS64_UNDEFINED;
4470 } else {
4471 /* Slow case: some block of 8 bytes are not all-defined or
4472 all-undefined. */
4473 PROF_EVENT(202, "mc_LOADV_128_or_256-slow2");
4474 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4475 return;
4478 return;
4480 #endif
4483 VG_REGPARM(2) void MC_(helperc_LOADV256be) ( /*OUT*/V256* res, Addr a )
4485 mc_LOADV_128_or_256(&res->w64[0], a, 256, True);
4487 VG_REGPARM(2) void MC_(helperc_LOADV256le) ( /*OUT*/V256* res, Addr a )
4489 mc_LOADV_128_or_256(&res->w64[0], a, 256, False);
4492 VG_REGPARM(2) void MC_(helperc_LOADV128be) ( /*OUT*/V128* res, Addr a )
4494 mc_LOADV_128_or_256(&res->w64[0], a, 128, True);
4496 VG_REGPARM(2) void MC_(helperc_LOADV128le) ( /*OUT*/V128* res, Addr a )
4498 mc_LOADV_128_or_256(&res->w64[0], a, 128, False);
4501 /*------------------------------------------------------------*/
4502 /*--- LOADV64 ---*/
4503 /*------------------------------------------------------------*/
4505 static INLINE
4506 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
4508 PROF_EVENT(200, "mc_LOADV64");
4510 #ifndef PERF_FAST_LOADV
4511 return mc_LOADVn_slow( a, 64, isBigEndian );
4512 #else
4514 UWord sm_off16, vabits16;
4515 SecMap* sm;
4517 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4518 PROF_EVENT(201, "mc_LOADV64-slow1");
4519 return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
4522 sm = get_secmap_for_reading_low(a);
4523 sm_off16 = SM_OFF_16(a);
4524 vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4526 // Handle common case quickly: a is suitably aligned, is mapped, and
4527 // addressible.
4528 // Convert V bits from compact memory form to expanded register form.
4529 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4530 return V_BITS64_DEFINED;
4531 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4532 return V_BITS64_UNDEFINED;
4533 } else {
4534 /* Slow case: the 8 bytes are not all-defined or all-undefined. */
4535 PROF_EVENT(202, "mc_LOADV64-slow2");
4536 return mc_LOADVn_slow( a, 64, isBigEndian );
4539 #endif
4542 // Generic for all platforms
4543 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
4545 return mc_LOADV64(a, True);
4548 // Non-generic assembly for arm32-linux
4549 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4550 && defined(VGP_arm_linux)
4551 __asm__( /* Derived from the 32 bit assembly helper */
4552 ".text \n"
4553 ".align 2 \n"
4554 ".global vgMemCheck_helperc_LOADV64le \n"
4555 ".type vgMemCheck_helperc_LOADV64le, %function \n"
4556 "vgMemCheck_helperc_LOADV64le: \n"
4557 " tst r0, #7 \n"
4558 " movw r3, #:lower16:primary_map \n"
4559 " bne .LLV64LEc4 \n" // if misaligned
4560 " lsr r2, r0, #16 \n"
4561 " movt r3, #:upper16:primary_map \n"
4562 " ldr r2, [r3, r2, lsl #2] \n"
4563 " uxth r1, r0 \n" // r1 is 0-(16)-0 X-(13)-X 000
4564 " movw r3, #0xAAAA \n"
4565 " lsr r1, r1, #2 \n" // r1 is 0-(16)-0 00 X-(13)-X 0
4566 " ldrh r1, [r2, r1] \n"
4567 " cmp r1, r3 \n" // 0xAAAA == VA_BITS16_DEFINED
4568 " bne .LLV64LEc0 \n" // if !all_defined
4569 " mov r1, #0x0 \n" // 0x0 == V_BITS32_DEFINED
4570 " mov r0, #0x0 \n" // 0x0 == V_BITS32_DEFINED
4571 " bx lr \n"
4572 ".LLV64LEc0: \n"
4573 " movw r3, #0x5555 \n"
4574 " cmp r1, r3 \n" // 0x5555 == VA_BITS16_UNDEFINED
4575 " bne .LLV64LEc4 \n" // if !all_undefined
4576 " mov r1, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
4577 " mov r0, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
4578 " bx lr \n"
4579 ".LLV64LEc4: \n"
4580 " push {r4, lr} \n"
4581 " mov r2, #0 \n"
4582 " mov r1, #64 \n"
4583 " bl mc_LOADVn_slow \n"
4584 " pop {r4, pc} \n"
4585 ".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le \n"
4586 ".previous\n"
4589 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4590 && defined(VGP_x86_linux)
4591 __asm__(
4592 ".text\n"
4593 ".align 16\n"
4594 ".global vgMemCheck_helperc_LOADV64le\n"
4595 ".type vgMemCheck_helperc_LOADV64le, @function\n"
4596 "vgMemCheck_helperc_LOADV64le:\n"
4597 " test $0x7, %eax\n"
4598 " jne .LLV64LE2\n" /* jump if not aligned */
4599 " mov %eax, %ecx\n"
4600 " movzwl %ax, %edx\n"
4601 " shr $0x10, %ecx\n"
4602 " mov primary_map(,%ecx,4), %ecx\n"
4603 " shr $0x3, %edx\n"
4604 " movzwl (%ecx,%edx,2), %edx\n"
4605 " cmp $0xaaaa, %edx\n"
4606 " jne .LLV64LE1\n" /* jump if not all defined */
4607 " xor %eax, %eax\n" /* return 0 in edx:eax */
4608 " xor %edx, %edx\n"
4609 " ret\n"
4610 ".LLV64LE1:\n"
4611 " cmp $0x5555, %edx\n"
4612 " jne .LLV64LE2\n" /* jump if not all undefined */
4613 " or $0xffffffff, %eax\n" /* else return all bits set in edx:eax */
4614 " or $0xffffffff, %edx\n"
4615 " ret\n"
4616 ".LLV64LE2:\n"
4617 " xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 64, 0) */
4618 " mov $64, %edx\n"
4619 " jmp mc_LOADVn_slow\n"
4620 ".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le\n"
4621 ".previous\n"
4624 #else
4625 // Generic for all platforms except {arm32,x86}-linux
4626 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
4628 return mc_LOADV64(a, False);
4630 #endif
4632 /*------------------------------------------------------------*/
4633 /*--- STOREV64 ---*/
4634 /*------------------------------------------------------------*/
4636 static INLINE
4637 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
4639 PROF_EVENT(210, "mc_STOREV64");
4641 #ifndef PERF_FAST_STOREV
4642 // XXX: this slow case seems to be marginally faster than the fast case!
4643 // Investigate further.
4644 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4645 #else
4647 UWord sm_off16, vabits16;
4648 SecMap* sm;
4650 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4651 PROF_EVENT(211, "mc_STOREV64-slow1");
4652 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4653 return;
4656 sm = get_secmap_for_reading_low(a);
4657 sm_off16 = SM_OFF_16(a);
4658 vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4660 // To understand the below cleverness, see the extensive comments
4661 // in MC_(helperc_STOREV8).
4662 if (LIKELY(V_BITS64_DEFINED == vbits64)) {
4663 if (LIKELY(vabits16 == (UShort)VA_BITS16_DEFINED)) {
4664 return;
4666 if (!is_distinguished_sm(sm) && VA_BITS16_UNDEFINED == vabits16) {
4667 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
4668 return;
4670 PROF_EVENT(232, "mc_STOREV64-slow2");
4671 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4672 return;
4674 if (V_BITS64_UNDEFINED == vbits64) {
4675 if (vabits16 == (UShort)VA_BITS16_UNDEFINED) {
4676 return;
4678 if (!is_distinguished_sm(sm) && VA_BITS16_DEFINED == vabits16) {
4679 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
4680 return;
4682 PROF_EVENT(232, "mc_STOREV64-slow3");
4683 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4684 return;
4687 PROF_EVENT(212, "mc_STOREV64-slow4");
4688 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4690 #endif
4693 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
4695 mc_STOREV64(a, vbits64, True);
4697 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
4699 mc_STOREV64(a, vbits64, False);
4702 /*------------------------------------------------------------*/
4703 /*--- LOADV32 ---*/
4704 /*------------------------------------------------------------*/
4706 static INLINE
4707 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
4709 PROF_EVENT(220, "mc_LOADV32");
4711 #ifndef PERF_FAST_LOADV
4712 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4713 #else
4715 UWord sm_off, vabits8;
4716 SecMap* sm;
4718 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4719 PROF_EVENT(221, "mc_LOADV32-slow1");
4720 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4723 sm = get_secmap_for_reading_low(a);
4724 sm_off = SM_OFF(a);
4725 vabits8 = sm->vabits8[sm_off];
4727 // Handle common case quickly: a is suitably aligned, is mapped, and the
4728 // entire word32 it lives in is addressible.
4729 // Convert V bits from compact memory form to expanded register form.
4730 // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
4731 // Almost certainly not necessary, but be paranoid.
4732 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
4733 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
4734 } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
4735 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
4736 } else {
4737 /* Slow case: the 4 bytes are not all-defined or all-undefined. */
4738 PROF_EVENT(222, "mc_LOADV32-slow2");
4739 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4742 #endif
4745 // Generic for all platforms
4746 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
4748 return mc_LOADV32(a, True);
4751 // Non-generic assembly for arm32-linux
4752 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4753 && defined(VGP_arm_linux)
4754 __asm__( /* Derived from NCode template */
4755 ".text \n"
4756 ".align 2 \n"
4757 ".global vgMemCheck_helperc_LOADV32le \n"
4758 ".type vgMemCheck_helperc_LOADV32le, %function \n"
4759 "vgMemCheck_helperc_LOADV32le: \n"
4760 " tst r0, #3 \n" // 1
4761 " movw r3, #:lower16:primary_map \n" // 1
4762 " bne .LLV32LEc4 \n" // 2 if misaligned
4763 " lsr r2, r0, #16 \n" // 3
4764 " movt r3, #:upper16:primary_map \n" // 3
4765 " ldr r2, [r3, r2, lsl #2] \n" // 4
4766 " uxth r1, r0 \n" // 4
4767 " ldrb r1, [r2, r1, lsr #2] \n" // 5
4768 " cmp r1, #0xAA \n" // 6 0xAA == VA_BITS8_DEFINED
4769 " bne .LLV32LEc0 \n" // 7 if !all_defined
4770 " mov r0, #0x0 \n" // 8 0x0 == V_BITS32_DEFINED
4771 " bx lr \n" // 9
4772 ".LLV32LEc0: \n"
4773 " cmp r1, #0x55 \n" // 0x55 == VA_BITS8_UNDEFINED
4774 " bne .LLV32LEc4 \n" // if !all_undefined
4775 " mov r0, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
4776 " bx lr \n"
4777 ".LLV32LEc4: \n"
4778 " push {r4, lr} \n"
4779 " mov r2, #0 \n"
4780 " mov r1, #32 \n"
4781 " bl mc_LOADVn_slow \n"
4782 " pop {r4, pc} \n"
4783 ".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le \n"
4784 ".previous\n"
4787 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4788 && defined(VGP_x86_linux)
4789 __asm__(
4790 ".text\n"
4791 ".align 16\n"
4792 ".global vgMemCheck_helperc_LOADV32le\n"
4793 ".type vgMemCheck_helperc_LOADV32le, @function\n"
4794 "vgMemCheck_helperc_LOADV32le:\n"
4795 " test $0x3, %eax\n"
4796 " jnz .LLV32LE2\n" /* jump if misaligned */
4797 " mov %eax, %edx\n"
4798 " shr $16, %edx\n"
4799 " mov primary_map(,%edx,4), %ecx\n"
4800 " movzwl %ax, %edx\n"
4801 " shr $2, %edx\n"
4802 " movzbl (%ecx,%edx,1), %edx\n"
4803 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */
4804 " jne .LLV32LE1\n" /* jump if not completely defined */
4805 " xor %eax, %eax\n" /* else return V_BITS32_DEFINED */
4806 " ret\n"
4807 ".LLV32LE1:\n"
4808 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
4809 " jne .LLV32LE2\n" /* jump if not completely undefined */
4810 " or $0xffffffff, %eax\n" /* else return V_BITS32_UNDEFINED */
4811 " ret\n"
4812 ".LLV32LE2:\n"
4813 " xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 32, 0) */
4814 " mov $32, %edx\n"
4815 " jmp mc_LOADVn_slow\n"
4816 ".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le\n"
4817 ".previous\n"
4820 #else
4821 // Generic for all platforms except {arm32,x86}-linux
4822 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
4824 return mc_LOADV32(a, False);
4826 #endif
4828 /*------------------------------------------------------------*/
4829 /*--- STOREV32 ---*/
4830 /*------------------------------------------------------------*/
4832 static INLINE
4833 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
4835 PROF_EVENT(230, "mc_STOREV32");
4837 #ifndef PERF_FAST_STOREV
4838 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4839 #else
4841 UWord sm_off, vabits8;
4842 SecMap* sm;
4844 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4845 PROF_EVENT(231, "mc_STOREV32-slow1");
4846 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4847 return;
4850 sm = get_secmap_for_reading_low(a);
4851 sm_off = SM_OFF(a);
4852 vabits8 = sm->vabits8[sm_off];
4854 // To understand the below cleverness, see the extensive comments
4855 // in MC_(helperc_STOREV8).
4856 if (LIKELY(V_BITS32_DEFINED == vbits32)) {
4857 if (LIKELY(vabits8 == (UInt)VA_BITS8_DEFINED)) {
4858 return;
4860 if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) {
4861 sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
4862 return;
4864 PROF_EVENT(232, "mc_STOREV32-slow2");
4865 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4866 return;
4868 if (V_BITS32_UNDEFINED == vbits32) {
4869 if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
4870 return;
4872 if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
4873 sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
4874 return;
4876 PROF_EVENT(233, "mc_STOREV32-slow3");
4877 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4878 return;
4881 PROF_EVENT(234, "mc_STOREV32-slow4");
4882 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4884 #endif
4887 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
4889 mc_STOREV32(a, vbits32, True);
4891 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
4893 mc_STOREV32(a, vbits32, False);
4896 /*------------------------------------------------------------*/
4897 /*--- LOADV16 ---*/
4898 /*------------------------------------------------------------*/
4900 static INLINE
4901 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
4903 PROF_EVENT(240, "mc_LOADV16");
4905 #ifndef PERF_FAST_LOADV
4906 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4907 #else
4909 UWord sm_off, vabits8;
4910 SecMap* sm;
4912 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4913 PROF_EVENT(241, "mc_LOADV16-slow1");
4914 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4917 sm = get_secmap_for_reading_low(a);
4918 sm_off = SM_OFF(a);
4919 vabits8 = sm->vabits8[sm_off];
4920 // Handle common case quickly: a is suitably aligned, is mapped, and is
4921 // addressible.
4922 // Convert V bits from compact memory form to expanded register form
4923 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS16_DEFINED; }
4924 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
4925 else {
4926 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4927 // the two sub-bytes.
4928 UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
4929 if (vabits4 == VA_BITS4_DEFINED ) { return V_BITS16_DEFINED; }
4930 else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
4931 else {
4932 /* Slow case: the two bytes are not all-defined or all-undefined. */
4933 PROF_EVENT(242, "mc_LOADV16-slow2");
4934 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4938 #endif
4941 // Generic for all platforms
4942 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
4944 return mc_LOADV16(a, True);
4947 // Non-generic assembly for arm32-linux
4948 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4949 && defined(VGP_arm_linux)
4950 __asm__( /* Derived from NCode template */
4951 ".text \n"
4952 ".align 2 \n"
4953 ".global vgMemCheck_helperc_LOADV16le \n"
4954 ".type vgMemCheck_helperc_LOADV16le, %function \n"
4955 "vgMemCheck_helperc_LOADV16le: \n" //
4956 " tst r0, #1 \n" //
4957 " bne .LLV16LEc12 \n" // if misaligned
4958 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
4959 " movw r3, #:lower16:primary_map \n" //
4960 " uxth r1, r0 \n" // r1 = sec-map-offB
4961 " movt r3, #:upper16:primary_map \n" //
4962 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
4963 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
4964 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
4965 " bne .LLV16LEc0 \n" // no, goto .LLV16LEc0
4966 ".LLV16LEh9: \n" //
4967 " mov r0, #0xFFFFFFFF \n" //
4968 " lsl r0, r0, #16 \n" // V_BITS16_DEFINED | top16safe
4969 " bx lr \n" //
4970 ".LLV16LEc0: \n" //
4971 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
4972 " bne .LLV16LEc4 \n" //
4973 ".LLV16LEc2: \n" //
4974 " mov r0, #0xFFFFFFFF \n" // V_BITS16_UNDEFINED | top16safe
4975 " bx lr \n" //
4976 ".LLV16LEc4: \n" //
4977 // r1 holds sec-map-VABITS8. r0 holds the address and is 2-aligned.
4978 // Extract the relevant 4 bits and inspect.
4979 " and r2, r0, #2 \n" // addr & 2
4980 " add r2, r2, r2 \n" // 2 * (addr & 2)
4981 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 2))
4982 " and r1, r1, #15 \n" // (sec-map-VABITS8 >> (2 * (addr & 2))) & 15
4984 " cmp r1, #0xA \n" // VA_BITS4_DEFINED
4985 " beq .LLV16LEh9 \n" //
4987 " cmp r1, #0x5 \n" // VA_BITS4_UNDEFINED
4988 " beq .LLV16LEc2 \n" //
4990 ".LLV16LEc12: \n" //
4991 " push {r4, lr} \n" //
4992 " mov r2, #0 \n" //
4993 " mov r1, #16 \n" //
4994 " bl mc_LOADVn_slow \n" //
4995 " pop {r4, pc} \n" //
4996 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
4997 ".previous\n"
5000 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5001 && defined(VGP_x86_linux)
5002 __asm__(
5003 ".text\n"
5004 ".align 16\n"
5005 ".global vgMemCheck_helperc_LOADV16le\n"
5006 ".type vgMemCheck_helperc_LOADV16le, @function\n"
5007 "vgMemCheck_helperc_LOADV16le:\n"
5008 " test $0x1, %eax\n"
5009 " jne .LLV16LE5\n" /* jump if not aligned */
5010 " mov %eax, %edx\n"
5011 " shr $0x10, %edx\n"
5012 " mov primary_map(,%edx,4), %ecx\n"
5013 " movzwl %ax, %edx\n"
5014 " shr $0x2, %edx\n"
5015 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5016 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */
5017 " jne .LLV16LE2\n" /* jump if not all 32bits defined */
5018 ".LLV16LE1:\n"
5019 " mov $0xffff0000,%eax\n" /* V_BITS16_DEFINED | top16safe */
5020 " ret\n"
5021 ".LLV16LE2:\n"
5022 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5023 " jne .LLV16LE4\n" /* jump if not all 32bits undefined */
5024 ".LLV16LE3:\n"
5025 " or $0xffffffff,%eax\n" /* V_BITS16_UNDEFINED | top16safe */
5026 " ret\n"
5027 ".LLV16LE4:\n"
5028 " mov %eax, %ecx\n"
5029 " and $0x2, %ecx\n"
5030 " add %ecx, %ecx\n"
5031 " sar %cl, %edx\n"
5032 " and $0xf, %edx\n"
5033 " cmp $0xa, %edx\n"
5034 " je .LLV16LE1\n" /* jump if all 16bits are defined */
5035 " cmp $0x5, %edx\n"
5036 " je .LLV16LE3\n" /* jump if all 16bits are undefined */
5037 ".LLV16LE5:\n"
5038 " xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 16, 0) */
5039 " mov $16, %edx\n"
5040 " jmp mc_LOADVn_slow\n"
5041 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5042 ".previous\n"
5045 #else
5046 // Generic for all platforms except {arm32,x86}-linux
5047 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
5049 return mc_LOADV16(a, False);
5051 #endif
5053 /*------------------------------------------------------------*/
5054 /*--- STOREV16 ---*/
5055 /*------------------------------------------------------------*/
5057 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
5058 static INLINE
5059 Bool accessible_vabits4_in_vabits8 ( Addr a, UChar vabits8 )
5061 UInt shift;
5062 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
5063 shift = (a & 2) << 1; // shift by 0 or 4
5064 vabits8 >>= shift; // shift the four bits to the bottom
5065 // check 2 x vabits2 != VA_BITS2_NOACCESS
5066 return ((0x3 & vabits8) != VA_BITS2_NOACCESS)
5067 && ((0xc & vabits8) != VA_BITS2_NOACCESS << 2);
5070 static INLINE
5071 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
5073 PROF_EVENT(250, "mc_STOREV16");
5075 #ifndef PERF_FAST_STOREV
5076 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5077 #else
5079 UWord sm_off, vabits8;
5080 SecMap* sm;
5082 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
5083 PROF_EVENT(251, "mc_STOREV16-slow1");
5084 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5085 return;
5088 sm = get_secmap_for_reading_low(a);
5089 sm_off = SM_OFF(a);
5090 vabits8 = sm->vabits8[sm_off];
5092 // To understand the below cleverness, see the extensive comments
5093 // in MC_(helperc_STOREV8).
5094 if (LIKELY(V_BITS16_DEFINED == vbits16)) {
5095 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5096 return;
5098 if (!is_distinguished_sm(sm)
5099 && accessible_vabits4_in_vabits8(a, vabits8)) {
5100 insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED,
5101 &(sm->vabits8[sm_off]) );
5102 return;
5104 PROF_EVENT(232, "mc_STOREV16-slow2");
5105 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5107 if (V_BITS16_UNDEFINED == vbits16) {
5108 if (vabits8 == VA_BITS8_UNDEFINED) {
5109 return;
5111 if (!is_distinguished_sm(sm)
5112 && accessible_vabits4_in_vabits8(a, vabits8)) {
5113 insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
5114 &(sm->vabits8[sm_off]) );
5115 return;
5117 PROF_EVENT(233, "mc_STOREV16-slow3");
5118 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5119 return;
5122 PROF_EVENT(234, "mc_STOREV16-slow4");
5123 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5125 #endif
5129 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
5131 mc_STOREV16(a, vbits16, True);
5133 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
5135 mc_STOREV16(a, vbits16, False);
5138 /*------------------------------------------------------------*/
5139 /*--- LOADV8 ---*/
5140 /*------------------------------------------------------------*/
5142 /* Note: endianness is irrelevant for size == 1 */
5144 // Non-generic assembly for arm32-linux
5145 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5146 && defined(VGP_arm_linux)
5147 __asm__( /* Derived from NCode template */
5148 ".text \n"
5149 ".align 2 \n"
5150 ".global vgMemCheck_helperc_LOADV8 \n"
5151 ".type vgMemCheck_helperc_LOADV8, %function \n"
5152 "vgMemCheck_helperc_LOADV8: \n" //
5153 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5154 " movw r3, #:lower16:primary_map \n" //
5155 " uxth r1, r0 \n" // r1 = sec-map-offB
5156 " movt r3, #:upper16:primary_map \n" //
5157 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5158 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5159 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5160 " bne .LLV8c0 \n" // no, goto .LLV8c0
5161 ".LLV8h9: \n" //
5162 " mov r0, #0xFFFFFF00 \n" // V_BITS8_DEFINED | top24safe
5163 " bx lr \n" //
5164 ".LLV8c0: \n" //
5165 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5166 " bne .LLV8c4 \n" //
5167 ".LLV8c2: \n" //
5168 " mov r0, #0xFFFFFFFF \n" // V_BITS8_UNDEFINED | top24safe
5169 " bx lr \n" //
5170 ".LLV8c4: \n" //
5171 // r1 holds sec-map-VABITS8
5172 // r0 holds the address. Extract the relevant 2 bits and inspect.
5173 " and r2, r0, #3 \n" // addr & 3
5174 " add r2, r2, r2 \n" // 2 * (addr & 3)
5175 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 3))
5176 " and r1, r1, #3 \n" // (sec-map-VABITS8 >> (2 * (addr & 3))) & 3
5178 " cmp r1, #2 \n" // VA_BITS2_DEFINED
5179 " beq .LLV8h9 \n" //
5181 " cmp r1, #1 \n" // VA_BITS2_UNDEFINED
5182 " beq .LLV8c2 \n" //
5184 " push {r4, lr} \n" //
5185 " mov r2, #0 \n" //
5186 " mov r1, #8 \n" //
5187 " bl mc_LOADVn_slow \n" //
5188 " pop {r4, pc} \n" //
5189 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8 \n"
5190 ".previous\n"
5193 /* Non-generic assembly for x86-linux */
5194 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5195 && defined(VGP_x86_linux)
5196 __asm__(
5197 ".text\n"
5198 ".align 16\n"
5199 ".global vgMemCheck_helperc_LOADV8\n"
5200 ".type vgMemCheck_helperc_LOADV8, @function\n"
5201 "vgMemCheck_helperc_LOADV8:\n"
5202 " mov %eax, %edx\n"
5203 " shr $0x10, %edx\n"
5204 " mov primary_map(,%edx,4), %ecx\n"
5205 " movzwl %ax, %edx\n"
5206 " shr $0x2, %edx\n"
5207 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5208 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED? */
5209 " jne .LLV8LE2\n" /* jump if not defined */
5210 ".LLV8LE1:\n"
5211 " mov $0xffffff00, %eax\n" /* V_BITS8_DEFINED | top24safe */
5212 " ret\n"
5213 ".LLV8LE2:\n"
5214 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5215 " jne .LLV8LE4\n" /* jump if not all 32bits are undefined */
5216 ".LLV8LE3:\n"
5217 " or $0xffffffff, %eax\n" /* V_BITS8_UNDEFINED | top24safe */
5218 " ret\n"
5219 ".LLV8LE4:\n"
5220 " mov %eax, %ecx\n"
5221 " and $0x3, %ecx\n"
5222 " add %ecx, %ecx\n"
5223 " sar %cl, %edx\n"
5224 " and $0x3, %edx\n"
5225 " cmp $0x2, %edx\n"
5226 " je .LLV8LE1\n" /* jump if all 8bits are defined */
5227 " cmp $0x1, %edx\n"
5228 " je .LLV8LE3\n" /* jump if all 8bits are undefined */
5229 " xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 8, 0) */
5230 " mov $0x8, %edx\n"
5231 " jmp mc_LOADVn_slow\n"
5232 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8\n"
5233 ".previous\n"
5236 #else
5237 // Generic for all platforms except {arm32,x86}-linux
5238 VG_REGPARM(1)
5239 UWord MC_(helperc_LOADV8) ( Addr a )
5241 PROF_EVENT(260, "mc_LOADV8");
5243 #ifndef PERF_FAST_LOADV
5244 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5245 #else
5247 UWord sm_off, vabits8;
5248 SecMap* sm;
5250 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
5251 PROF_EVENT(261, "mc_LOADV8-slow1");
5252 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5255 sm = get_secmap_for_reading_low(a);
5256 sm_off = SM_OFF(a);
5257 vabits8 = sm->vabits8[sm_off];
5258 // Convert V bits from compact memory form to expanded register form
5259 // Handle common case quickly: a is mapped, and the entire
5260 // word32 it lives in is addressible.
5261 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS8_DEFINED; }
5262 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
5263 else {
5264 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5265 // the single byte.
5266 UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
5267 if (vabits2 == VA_BITS2_DEFINED ) { return V_BITS8_DEFINED; }
5268 else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
5269 else {
5270 /* Slow case: the byte is not all-defined or all-undefined. */
5271 PROF_EVENT(262, "mc_LOADV8-slow2");
5272 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5276 #endif
5278 #endif
5280 /*------------------------------------------------------------*/
5281 /*--- STOREV8 ---*/
5282 /*------------------------------------------------------------*/
5284 VG_REGPARM(2)
5285 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
5287 PROF_EVENT(270, "mc_STOREV8");
5289 #ifndef PERF_FAST_STOREV
5290 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5291 #else
5293 UWord sm_off, vabits8;
5294 SecMap* sm;
5296 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
5297 PROF_EVENT(271, "mc_STOREV8-slow1");
5298 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5299 return;
5302 sm = get_secmap_for_reading_low(a);
5303 sm_off = SM_OFF(a);
5304 vabits8 = sm->vabits8[sm_off];
5306 // Clevernesses to speed up storing V bits.
5307 // The 64/32/16 bit cases also have similar clevernesses, but it
5308 // works a little differently to the code below.
5310 // Cleverness 1: sometimes we don't have to write the shadow memory at
5311 // all, if we can tell that what we want to write is the same as what is
5312 // already there. These cases are marked below as "defined on defined" and
5313 // "undefined on undefined".
5315 // Cleverness 2:
5316 // We also avoid to call mc_STOREVn_slow if the V bits can directly
5317 // be written in the secondary map. V bits can be directly written
5318 // if 4 conditions are respected:
5319 // * The address for which V bits are written is naturally aligned
5320 // on 1 byte for STOREV8 (this is always true)
5321 // on 2 bytes for STOREV16
5322 // on 4 bytes for STOREV32
5323 // on 8 bytes for STOREV64.
5324 // * V bits being written are either fully defined or fully undefined.
5325 // (for partially defined V bits, V bits cannot be directly written,
5326 // as the secondary vbits table must be maintained).
5327 // * the secmap is not distinguished (distinguished maps cannot be
5328 // modified).
5329 // * the memory corresponding to the V bits being written is
5330 // accessible (if one or more bytes are not accessible,
5331 // we must call mc_STOREVn_slow in order to report accessibility
5332 // errors).
5333 // Note that for STOREV32 and STOREV64, it is too expensive
5334 // to verify the accessibility of each byte for the benefit it
5335 // brings. Instead, a quicker check is done by comparing to
5336 // VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
5337 // but misses some opportunity of direct modifications.
5338 // Checking each byte accessibility was measured for
5339 // STOREV32+perf tests and was slowing down all perf tests.
5340 // The cases corresponding to cleverness 2 are marked below as
5341 // "direct mod".
5342 if (LIKELY(V_BITS8_DEFINED == vbits8)) {
5343 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5344 return; // defined on defined
5346 if (!is_distinguished_sm(sm)
5347 && VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8)) {
5348 // direct mod
5349 insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
5350 &(sm->vabits8[sm_off]) );
5351 return;
5353 PROF_EVENT(232, "mc_STOREV8-slow2");
5354 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5355 return;
5357 if (V_BITS8_UNDEFINED == vbits8) {
5358 if (vabits8 == VA_BITS8_UNDEFINED) {
5359 return; // undefined on undefined
5361 if (!is_distinguished_sm(sm)
5362 && (VA_BITS2_NOACCESS
5363 != extract_vabits2_from_vabits8(a, vabits8))) {
5364 // direct mod
5365 insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
5366 &(sm->vabits8[sm_off]) );
5367 return;
5369 PROF_EVENT(233, "mc_STOREV8-slow3");
5370 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5371 return;
5374 // Partially defined word
5375 PROF_EVENT(234, "mc_STOREV8-slow4");
5376 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5378 #endif
5382 /*------------------------------------------------------------*/
5383 /*--- Functions called directly from generated code: ---*/
5384 /*--- Value-check failure handlers. ---*/
5385 /*------------------------------------------------------------*/
5387 /* Call these ones when an origin is available ... */
5388 VG_REGPARM(1)
5389 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
5390 MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
5393 VG_REGPARM(1)
5394 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
5395 MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
5398 VG_REGPARM(1)
5399 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
5400 MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
5403 VG_REGPARM(1)
5404 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
5405 MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
5408 VG_REGPARM(2)
5409 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
5410 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
5413 /* ... and these when an origin isn't available. */
5415 VG_REGPARM(0)
5416 void MC_(helperc_value_check0_fail_no_o) ( void ) {
5417 MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
5420 VG_REGPARM(0)
5421 void MC_(helperc_value_check1_fail_no_o) ( void ) {
5422 MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
5425 VG_REGPARM(0)
5426 void MC_(helperc_value_check4_fail_no_o) ( void ) {
5427 MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
5430 VG_REGPARM(0)
5431 void MC_(helperc_value_check8_fail_no_o) ( void ) {
5432 MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
5435 VG_REGPARM(1)
5436 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
5437 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
5441 /*------------------------------------------------------------*/
5442 /*--- Metadata get/set functions, for client requests. ---*/
5443 /*------------------------------------------------------------*/
5445 // Nb: this expands the V+A bits out into register-form V bits, even though
5446 // they're in memory. This is for backward compatibility, and because it's
5447 // probably what the user wants.
5449 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
5450 error [no longer used], 3 == addressing error. */
5451 /* Nb: We used to issue various definedness/addressability errors from here,
5452 but we took them out because they ranged from not-very-helpful to
5453 downright annoying, and they complicated the error data structures. */
5454 static Int mc_get_or_set_vbits_for_client (
5455 Addr a,
5456 Addr vbits,
5457 SizeT szB,
5458 Bool setting, /* True <=> set vbits, False <=> get vbits */
5459 Bool is_client_request /* True <=> real user request
5460 False <=> internal call from gdbserver */
5463 SizeT i;
5464 Bool ok;
5465 UChar vbits8;
5467 /* Check that arrays are addressible before doing any getting/setting.
5468 vbits to be checked only for real user request. */
5469 for (i = 0; i < szB; i++) {
5470 if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
5471 (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
5472 return 3;
5476 /* Do the copy */
5477 if (setting) {
5478 /* setting */
5479 for (i = 0; i < szB; i++) {
5480 ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
5481 tl_assert(ok);
5483 } else {
5484 /* getting */
5485 for (i = 0; i < szB; i++) {
5486 ok = get_vbits8(a + i, &vbits8);
5487 tl_assert(ok);
5488 ((UChar*)vbits)[i] = vbits8;
5490 if (is_client_request)
5491 // The bytes in vbits[] have now been set, so mark them as such.
5492 MC_(make_mem_defined)(vbits, szB);
5495 return 1;
5499 /*------------------------------------------------------------*/
5500 /*--- Detecting leaked (unreachable) malloc'd blocks. ---*/
5501 /*------------------------------------------------------------*/
5503 /* For the memory leak detector, say whether an entire 64k chunk of
5504 address space is possibly in use, or not. If in doubt return
5505 True.
5507 Bool MC_(is_within_valid_secondary) ( Addr a )
5509 SecMap* sm = maybe_get_secmap_for ( a );
5510 if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) {
5511 /* Definitely not in use. */
5512 return False;
5513 } else {
5514 return True;
5519 /* For the memory leak detector, say whether or not a given word
5520 address is to be regarded as valid. */
5521 Bool MC_(is_valid_aligned_word) ( Addr a )
5523 tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
5524 tl_assert(VG_IS_WORD_ALIGNED(a));
5525 if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED)
5526 return False;
5527 if (sizeof(UWord) == 8) {
5528 if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED)
5529 return False;
5531 if (UNLIKELY(MC_(in_ignored_range)(a)))
5532 return False;
5533 else
5534 return True;
5538 /*------------------------------------------------------------*/
5539 /*--- Initialisation ---*/
5540 /*------------------------------------------------------------*/
5542 static void init_shadow_memory ( void )
5544 Int i;
5545 SecMap* sm;
5547 tl_assert(V_BIT_UNDEFINED == 1);
5548 tl_assert(V_BIT_DEFINED == 0);
5549 tl_assert(V_BITS8_UNDEFINED == 0xFF);
5550 tl_assert(V_BITS8_DEFINED == 0);
5552 /* Build the 3 distinguished secondaries */
5553 sm = &sm_distinguished[SM_DIST_NOACCESS];
5554 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
5556 sm = &sm_distinguished[SM_DIST_UNDEFINED];
5557 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
5559 sm = &sm_distinguished[SM_DIST_DEFINED];
5560 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
5562 /* Set up the primary map. */
5563 /* These entries gradually get overwritten as the used address
5564 space expands. */
5565 for (i = 0; i < N_PRIMARY_MAP; i++)
5566 primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
5568 /* Auxiliary primary maps */
5569 init_auxmap_L1_L2();
5571 /* auxmap_size = auxmap_used = 0;
5572 no ... these are statically initialised */
5574 /* Secondary V bit table */
5575 secVBitTable = createSecVBitTable();
5579 /*------------------------------------------------------------*/
5580 /*--- Sanity check machinery (permanently engaged) ---*/
5581 /*------------------------------------------------------------*/
5583 static Bool mc_cheap_sanity_check ( void )
5585 n_sanity_cheap++;
5586 PROF_EVENT(490, "cheap_sanity_check");
5587 /* Check for sane operating level */
5588 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5589 return False;
5590 /* nothing else useful we can rapidly check */
5591 return True;
5594 static Bool mc_expensive_sanity_check ( void )
5596 Int i;
5597 Word n_secmaps_found;
5598 SecMap* sm;
5599 const HChar* errmsg;
5600 Bool bad = False;
5602 if (0) VG_(printf)("expensive sanity check\n");
5603 if (0) return True;
5605 n_sanity_expensive++;
5606 PROF_EVENT(491, "expensive_sanity_check");
5608 /* Check for sane operating level */
5609 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5610 return False;
5612 /* Check that the 3 distinguished SMs are still as they should be. */
5614 /* Check noaccess DSM. */
5615 sm = &sm_distinguished[SM_DIST_NOACCESS];
5616 for (i = 0; i < SM_CHUNKS; i++)
5617 if (sm->vabits8[i] != VA_BITS8_NOACCESS)
5618 bad = True;
5620 /* Check undefined DSM. */
5621 sm = &sm_distinguished[SM_DIST_UNDEFINED];
5622 for (i = 0; i < SM_CHUNKS; i++)
5623 if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
5624 bad = True;
5626 /* Check defined DSM. */
5627 sm = &sm_distinguished[SM_DIST_DEFINED];
5628 for (i = 0; i < SM_CHUNKS; i++)
5629 if (sm->vabits8[i] != VA_BITS8_DEFINED)
5630 bad = True;
5632 if (bad) {
5633 VG_(printf)("memcheck expensive sanity: "
5634 "distinguished_secondaries have changed\n");
5635 return False;
5638 /* If we're not checking for undefined value errors, the secondary V bit
5639 * table should be empty. */
5640 if (MC_(clo_mc_level) == 1) {
5641 if (0 != VG_(OSetGen_Size)(secVBitTable))
5642 return False;
5645 /* check the auxiliary maps, very thoroughly */
5646 n_secmaps_found = 0;
5647 errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
5648 if (errmsg) {
5649 VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
5650 return False;
5653 /* n_secmaps_found is now the number referred to by the auxiliary
5654 primary map. Now add on the ones referred to by the main
5655 primary map. */
5656 for (i = 0; i < N_PRIMARY_MAP; i++) {
5657 if (primary_map[i] == NULL) {
5658 bad = True;
5659 } else {
5660 if (!is_distinguished_sm(primary_map[i]))
5661 n_secmaps_found++;
5665 /* check that the number of secmaps issued matches the number that
5666 are reachable (iow, no secmap leaks) */
5667 if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
5668 bad = True;
5670 if (bad) {
5671 VG_(printf)("memcheck expensive sanity: "
5672 "apparent secmap leakage\n");
5673 return False;
5676 if (bad) {
5677 VG_(printf)("memcheck expensive sanity: "
5678 "auxmap covers wrong address space\n");
5679 return False;
5682 /* there is only one pointer to each secmap (expensive) */
5684 return True;
5687 /*------------------------------------------------------------*/
5688 /*--- Command line args ---*/
5689 /*------------------------------------------------------------*/
5691 /* --partial-loads-ok: enable by default on MacOS. The MacOS system
5692 graphics libraries are heavily vectorised, and not enabling this by
5693 default causes lots of false errors. */
5694 #if defined(VGO_darwin)
5695 Bool MC_(clo_partial_loads_ok) = True;
5696 #else
5697 Bool MC_(clo_partial_loads_ok) = False;
5698 #endif
5700 Long MC_(clo_freelist_vol) = 20*1000*1000LL;
5701 Long MC_(clo_freelist_big_blocks) = 1*1000*1000LL;
5702 LeakCheckMode MC_(clo_leak_check) = LC_Summary;
5703 VgRes MC_(clo_leak_resolution) = Vg_HighRes;
5704 UInt MC_(clo_show_leak_kinds) = R2S(Possible) | R2S(Unreached);
5705 UInt MC_(clo_error_for_leak_kinds) = R2S(Possible) | R2S(Unreached);
5706 UInt MC_(clo_leak_check_heuristics) = 0;
5707 Bool MC_(clo_workaround_gcc296_bugs) = False;
5708 Int MC_(clo_malloc_fill) = -1;
5709 Int MC_(clo_free_fill) = -1;
5710 KeepStacktraces MC_(clo_keep_stacktraces) = KS_alloc_then_free;
5711 Int MC_(clo_mc_level) = 2;
5712 Bool MC_(clo_show_mismatched_frees) = True;
5714 static const HChar * MC_(parse_leak_heuristics_tokens) =
5715 "-,stdstring,length64,newarray,multipleinheritance";
5716 /* The first heuristic value (LchNone) has no keyword, as this is
5717 a fake heuristic used to collect the blocks found without any
5718 heuristic. */
5720 static Bool mc_process_cmd_line_options(const HChar* arg)
5722 const HChar* tmp_str;
5723 Int tmp_show;
5725 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
5727 /* Set MC_(clo_mc_level):
5728 1 = A bit tracking only
5729 2 = A and V bit tracking, but no V bit origins
5730 3 = A and V bit tracking, and V bit origins
5732 Do this by inspecting --undef-value-errors= and
5733 --track-origins=. Reject the case --undef-value-errors=no
5734 --track-origins=yes as meaningless.
5736 if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
5737 if (MC_(clo_mc_level) == 3) {
5738 goto bad_level;
5739 } else {
5740 MC_(clo_mc_level) = 1;
5741 return True;
5744 if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
5745 if (MC_(clo_mc_level) == 1)
5746 MC_(clo_mc_level) = 2;
5747 return True;
5749 if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
5750 if (MC_(clo_mc_level) == 3)
5751 MC_(clo_mc_level) = 2;
5752 return True;
5754 if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
5755 if (MC_(clo_mc_level) == 1) {
5756 goto bad_level;
5757 } else {
5758 MC_(clo_mc_level) = 3;
5759 return True;
5763 if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
5764 else if VG_USET_CLO(arg, "--errors-for-leak-kinds",
5765 MC_(parse_leak_kinds_tokens),
5766 MC_(clo_error_for_leak_kinds)) {}
5767 else if VG_USET_CLO(arg, "--show-leak-kinds",
5768 MC_(parse_leak_kinds_tokens),
5769 MC_(clo_show_leak_kinds)) {}
5770 else if VG_USET_CLO(arg, "--leak-check-heuristics",
5771 MC_(parse_leak_heuristics_tokens),
5772 MC_(clo_leak_check_heuristics)) {}
5773 else if (VG_BOOL_CLO(arg, "--show-reachable", tmp_show)) {
5774 if (tmp_show) {
5775 MC_(clo_show_leak_kinds) = MC_(all_Reachedness)();
5776 } else {
5777 MC_(clo_show_leak_kinds) &= ~R2S(Reachable);
5780 else if VG_BOOL_CLO(arg, "--show-possibly-lost", tmp_show) {
5781 if (tmp_show) {
5782 MC_(clo_show_leak_kinds) |= R2S(Possible);
5783 } else {
5784 MC_(clo_show_leak_kinds) &= ~R2S(Possible);
5787 else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
5788 MC_(clo_workaround_gcc296_bugs)) {}
5790 else if VG_BINT_CLO(arg, "--freelist-vol", MC_(clo_freelist_vol),
5791 0, 10*1000*1000*1000LL) {}
5793 else if VG_BINT_CLO(arg, "--freelist-big-blocks",
5794 MC_(clo_freelist_big_blocks),
5795 0, 10*1000*1000*1000LL) {}
5797 else if VG_XACT_CLO(arg, "--leak-check=no",
5798 MC_(clo_leak_check), LC_Off) {}
5799 else if VG_XACT_CLO(arg, "--leak-check=summary",
5800 MC_(clo_leak_check), LC_Summary) {}
5801 else if VG_XACT_CLO(arg, "--leak-check=yes",
5802 MC_(clo_leak_check), LC_Full) {}
5803 else if VG_XACT_CLO(arg, "--leak-check=full",
5804 MC_(clo_leak_check), LC_Full) {}
5806 else if VG_XACT_CLO(arg, "--leak-resolution=low",
5807 MC_(clo_leak_resolution), Vg_LowRes) {}
5808 else if VG_XACT_CLO(arg, "--leak-resolution=med",
5809 MC_(clo_leak_resolution), Vg_MedRes) {}
5810 else if VG_XACT_CLO(arg, "--leak-resolution=high",
5811 MC_(clo_leak_resolution), Vg_HighRes) {}
5813 else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
5814 Bool ok = parse_ignore_ranges(tmp_str);
5815 if (!ok) {
5816 VG_(message)(Vg_DebugMsg,
5817 "ERROR: --ignore-ranges: "
5818 "invalid syntax, or end <= start in range\n");
5819 return False;
5821 if (gIgnoredAddressRanges) {
5822 Word i;
5823 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
5824 UWord val = IAR_INVALID;
5825 UWord key_min = ~(UWord)0;
5826 UWord key_max = (UWord)0;
5827 VG_(indexRangeMap)( &key_min, &key_max, &val,
5828 gIgnoredAddressRanges, i );
5829 tl_assert(key_min <= key_max);
5830 UWord limit = 0x4000000; /* 64M - entirely arbitrary limit */
5831 if (key_max - key_min > limit && val == IAR_CommandLine) {
5832 VG_(message)(Vg_DebugMsg,
5833 "ERROR: --ignore-ranges: suspiciously large range:\n");
5834 VG_(message)(Vg_DebugMsg,
5835 " 0x%lx-0x%lx (size %lu)\n", key_min, key_max,
5836 key_max - key_min + 1);
5837 return False;
5843 else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
5844 else if VG_BHEX_CLO(arg, "--free-fill", MC_(clo_free_fill), 0x00,0xFF) {}
5846 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc",
5847 MC_(clo_keep_stacktraces), KS_alloc) {}
5848 else if VG_XACT_CLO(arg, "--keep-stacktraces=free",
5849 MC_(clo_keep_stacktraces), KS_free) {}
5850 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-and-free",
5851 MC_(clo_keep_stacktraces), KS_alloc_and_free) {}
5852 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-then-free",
5853 MC_(clo_keep_stacktraces), KS_alloc_then_free) {}
5854 else if VG_XACT_CLO(arg, "--keep-stacktraces=none",
5855 MC_(clo_keep_stacktraces), KS_none) {}
5857 else if VG_BOOL_CLO(arg, "--show-mismatched-frees",
5858 MC_(clo_show_mismatched_frees)) {}
5860 else
5861 return VG_(replacement_malloc_process_cmd_line_option)(arg);
5863 return True;
5866 bad_level:
5867 VG_(fmsg_bad_option)(arg,
5868 "--track-origins=yes has no effect when --undef-value-errors=no.\n");
5871 static void mc_print_usage(void)
5873 const HChar* plo_default = "no";
5874 # if defined(VGO_darwin)
5875 plo_default = "yes";
5876 # endif
5878 VG_(printf)(
5879 " --leak-check=no|summary|full search for memory leaks at exit? [summary]\n"
5880 " --leak-resolution=low|med|high differentiation of leak stack traces [high]\n"
5881 " --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
5882 " [definite,possible]\n"
5883 " --errors-for-leak-kinds=kind1,kind2,.. which leak kinds are errors?\n"
5884 " [definite,possible]\n"
5885 " where kind is one of:\n"
5886 " definite indirect possible reachable all none\n"
5887 " --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
5888 " improving leak search false positive [none]\n"
5889 " where heur is one of:\n"
5890 " stdstring length64 newarray multipleinheritance all none\n"
5891 " --show-reachable=yes same as --show-leak-kinds=all\n"
5892 " --show-reachable=no --show-possibly-lost=yes\n"
5893 " same as --show-leak-kinds=definite,possible\n"
5894 " --show-reachable=no --show-possibly-lost=no\n"
5895 " same as --show-leak-kinds=definite\n"
5896 " --undef-value-errors=no|yes check for undefined value errors [yes]\n"
5897 " --track-origins=no|yes show origins of undefined values? [no]\n"
5898 " --partial-loads-ok=no|yes too hard to explain here; see manual [%s]\n"
5899 " --freelist-vol=<number> volume of freed blocks queue [20000000]\n"
5900 " --freelist-big-blocks=<number> releases first blocks with size>= [1000000]\n"
5901 " --workaround-gcc296-bugs=no|yes self explanatory [no]\n"
5902 " --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS] assume given addresses are OK\n"
5903 " --malloc-fill=<hexnumber> fill malloc'd areas with given value\n"
5904 " --free-fill=<hexnumber> fill free'd areas with given value\n"
5905 " --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
5906 " stack trace(s) to keep for malloc'd/free'd areas [alloc-then-free]\n"
5907 " --show-mismatched-frees=no|yes show frees that don't match the allocator? [yes]\n"
5908 , plo_default
5912 static void mc_print_debug_usage(void)
5914 VG_(printf)(
5915 " (none)\n"
5920 /*------------------------------------------------------------*/
5921 /*--- Client blocks ---*/
5922 /*------------------------------------------------------------*/
5924 /* Client block management:
5926 This is managed as an expanding array of client block descriptors.
5927 Indices of live descriptors are issued to the client, so it can ask
5928 to free them later. Therefore we cannot slide live entries down
5929 over dead ones. Instead we must use free/inuse flags and scan for
5930 an empty slot at allocation time. This in turn means allocation is
5931 relatively expensive, so we hope this does not happen too often.
5933 An unused block has start == size == 0
5936 /* type CGenBlock is defined in mc_include.h */
5938 /* This subsystem is self-initialising. */
5939 static UWord cgb_size = 0;
5940 static UWord cgb_used = 0;
5941 static CGenBlock* cgbs = NULL;
5943 /* Stats for this subsystem. */
5944 static ULong cgb_used_MAX = 0; /* Max in use. */
5945 static ULong cgb_allocs = 0; /* Number of allocs. */
5946 static ULong cgb_discards = 0; /* Number of discards. */
5947 static ULong cgb_search = 0; /* Number of searches. */
5950 /* Get access to the client block array. */
5951 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
5952 /*OUT*/UWord* nBlocks )
5954 *blocks = cgbs;
5955 *nBlocks = cgb_used;
5959 static
5960 Int alloc_client_block ( void )
5962 UWord i, sz_new;
5963 CGenBlock* cgbs_new;
5965 cgb_allocs++;
5967 for (i = 0; i < cgb_used; i++) {
5968 cgb_search++;
5969 if (cgbs[i].start == 0 && cgbs[i].size == 0)
5970 return i;
5973 /* Not found. Try to allocate one at the end. */
5974 if (cgb_used < cgb_size) {
5975 cgb_used++;
5976 return cgb_used-1;
5979 /* Ok, we have to allocate a new one. */
5980 tl_assert(cgb_used == cgb_size);
5981 sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
5983 cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
5984 for (i = 0; i < cgb_used; i++)
5985 cgbs_new[i] = cgbs[i];
5987 if (cgbs != NULL)
5988 VG_(free)( cgbs );
5989 cgbs = cgbs_new;
5991 cgb_size = sz_new;
5992 cgb_used++;
5993 if (cgb_used > cgb_used_MAX)
5994 cgb_used_MAX = cgb_used;
5995 return cgb_used-1;
5999 static void show_client_block_stats ( void )
6001 VG_(message)(Vg_DebugMsg,
6002 "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
6003 cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
6006 static void print_monitor_help ( void )
6008 VG_(gdb_printf)
6010 "\n"
6011 "memcheck monitor commands:\n"
6012 " get_vbits <addr> [<len>]\n"
6013 " returns validity bits for <len> (or 1) bytes at <addr>\n"
6014 " bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
6015 " Example: get_vbits 0x8049c78 10\n"
6016 " make_memory [noaccess|undefined\n"
6017 " |defined|Definedifaddressable] <addr> [<len>]\n"
6018 " mark <len> (or 1) bytes at <addr> with the given accessibility\n"
6019 " check_memory [addressable|defined] <addr> [<len>]\n"
6020 " check that <len> (or 1) bytes at <addr> have the given accessibility\n"
6021 " and outputs a description of <addr>\n"
6022 " leak_check [full*|summary]\n"
6023 " [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
6024 " [heuristics heur1,heur2,...]\n"
6025 " [increased*|changed|any]\n"
6026 " [unlimited*|limited <max_loss_records_output>]\n"
6027 " * = defaults\n"
6028 " where kind is one of:\n"
6029 " definite indirect possible reachable all none\n"
6030 " where heur is one of:\n"
6031 " stdstring length64 newarray multipleinheritance all none*\n"
6032 " Examples: leak_check\n"
6033 " leak_check summary any\n"
6034 " leak_check full kinds indirect,possible\n"
6035 " leak_check full reachable any limited 100\n"
6036 " block_list <loss_record_nr>\n"
6037 " after a leak search, shows the list of blocks of <loss_record_nr>\n"
6038 " who_points_at <addr> [<len>]\n"
6039 " shows places pointing inside <len> (default 1) bytes at <addr>\n"
6040 " (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
6041 " with len > 1, will also show \"interior pointers\")\n"
6042 "\n");
6045 /* return True if request recognised, False otherwise */
6046 static Bool handle_gdb_monitor_command (ThreadId tid, HChar *req)
6048 HChar* wcmd;
6049 HChar s[VG_(strlen(req)) + 1]; /* copy for strtok_r */
6050 HChar *ssaveptr;
6052 VG_(strcpy) (s, req);
6054 wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
6055 /* NB: if possible, avoid introducing a new command below which
6056 starts with the same first letter(s) as an already existing
6057 command. This ensures a shorter abbreviation for the user. */
6058 switch (VG_(keyword_id)
6059 ("help get_vbits leak_check make_memory check_memory "
6060 "block_list who_points_at",
6061 wcmd, kwd_report_duplicated_matches)) {
6062 case -2: /* multiple matches */
6063 return True;
6064 case -1: /* not found */
6065 return False;
6066 case 0: /* help */
6067 print_monitor_help();
6068 return True;
6069 case 1: { /* get_vbits */
6070 Addr address;
6071 SizeT szB = 1;
6072 if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
6073 UChar vbits;
6074 Int i;
6075 Int unaddressable = 0;
6076 for (i = 0; i < szB; i++) {
6077 Int res = mc_get_or_set_vbits_for_client
6078 (address+i, (Addr) &vbits, 1,
6079 False, /* get them */
6080 False /* is client request */ );
6081 /* we are before the first character on next line, print a \n. */
6082 if ((i % 32) == 0 && i != 0)
6083 VG_(printf) ("\n");
6084 /* we are before the next block of 4 starts, print a space. */
6085 else if ((i % 4) == 0 && i != 0)
6086 VG_(printf) (" ");
6087 if (res == 1) {
6088 VG_(printf) ("%02x", vbits);
6089 } else {
6090 tl_assert(3 == res);
6091 unaddressable++;
6092 VG_(printf) ("__");
6095 VG_(printf) ("\n");
6096 if (unaddressable) {
6097 VG_(printf)
6098 ("Address %p len %ld has %d bytes unaddressable\n",
6099 (void *)address, szB, unaddressable);
6102 return True;
6104 case 2: { /* leak_check */
6105 Int err = 0;
6106 LeakCheckParams lcp;
6107 HChar* kw;
6109 lcp.mode = LC_Full;
6110 lcp.show_leak_kinds = R2S(Possible) | R2S(Unreached);
6111 lcp.errors_for_leak_kinds = 0; // no errors for interactive leak search.
6112 lcp.heuristics = 0;
6113 lcp.deltamode = LCD_Increased;
6114 lcp.max_loss_records_output = 999999999;
6115 lcp.requested_by_monitor_command = True;
6117 for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
6118 kw != NULL;
6119 kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
6120 switch (VG_(keyword_id)
6121 ("full summary "
6122 "kinds reachable possibleleak definiteleak "
6123 "heuristics "
6124 "increased changed any "
6125 "unlimited limited ",
6126 kw, kwd_report_all)) {
6127 case -2: err++; break;
6128 case -1: err++; break;
6129 case 0: /* full */
6130 lcp.mode = LC_Full; break;
6131 case 1: /* summary */
6132 lcp.mode = LC_Summary; break;
6133 case 2: { /* kinds */
6134 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6135 if (wcmd == NULL
6136 || !VG_(parse_enum_set)(MC_(parse_leak_kinds_tokens),
6137 True/*allow_all*/,
6138 wcmd,
6139 &lcp.show_leak_kinds)) {
6140 VG_(gdb_printf) ("missing or malformed leak kinds set\n");
6141 err++;
6143 break;
6145 case 3: /* reachable */
6146 lcp.show_leak_kinds = MC_(all_Reachedness)();
6147 break;
6148 case 4: /* possibleleak */
6149 lcp.show_leak_kinds
6150 = R2S(Possible) | R2S(IndirectLeak) | R2S(Unreached);
6151 break;
6152 case 5: /* definiteleak */
6153 lcp.show_leak_kinds = R2S(Unreached);
6154 break;
6155 case 6: { /* heuristics */
6156 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6157 if (wcmd == NULL
6158 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
6159 True,/*allow_all*/
6160 wcmd,
6161 &lcp.heuristics)) {
6162 VG_(gdb_printf) ("missing or malformed heuristics set\n");
6163 err++;
6165 break;
6167 case 7: /* increased */
6168 lcp.deltamode = LCD_Increased; break;
6169 case 8: /* changed */
6170 lcp.deltamode = LCD_Changed; break;
6171 case 9: /* any */
6172 lcp.deltamode = LCD_Any; break;
6173 case 10: /* unlimited */
6174 lcp.max_loss_records_output = 999999999; break;
6175 case 11: { /* limited */
6176 Int int_value;
6177 const HChar* endptr;
6179 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6180 if (wcmd == NULL) {
6181 int_value = 0;
6182 endptr = "empty"; /* to report an error below */
6183 } else {
6184 HChar *the_end;
6185 int_value = VG_(strtoll10) (wcmd, &the_end);
6186 endptr = the_end;
6188 if (*endptr != '\0')
6189 VG_(gdb_printf) ("missing or malformed integer value\n");
6190 else if (int_value > 0)
6191 lcp.max_loss_records_output = (UInt) int_value;
6192 else
6193 VG_(gdb_printf) ("max_loss_records_output must be >= 1, got %d\n",
6194 int_value);
6195 break;
6197 default:
6198 tl_assert (0);
6201 if (!err)
6202 MC_(detect_memory_leaks)(tid, &lcp);
6203 return True;
6206 case 3: { /* make_memory */
6207 Addr address;
6208 SizeT szB = 1;
6209 Int kwdid = VG_(keyword_id)
6210 ("noaccess undefined defined Definedifaddressable",
6211 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
6212 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6213 return True;
6214 switch (kwdid) {
6215 case -2: break;
6216 case -1: break;
6217 case 0: MC_(make_mem_noaccess) (address, szB); break;
6218 case 1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
6219 MC_OKIND_USER ); break;
6220 case 2: MC_(make_mem_defined) ( address, szB ); break;
6221 case 3: make_mem_defined_if_addressable ( address, szB ); break;;
6222 default: tl_assert(0);
6224 return True;
6227 case 4: { /* check_memory */
6228 Addr address;
6229 SizeT szB = 1;
6230 Addr bad_addr;
6231 UInt okind;
6232 const HChar* src;
6233 UInt otag;
6234 UInt ecu;
6235 ExeContext* origin_ec;
6236 MC_ReadResult res;
6238 Int kwdid = VG_(keyword_id)
6239 ("addressable defined",
6240 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
6241 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6242 return True;
6243 switch (kwdid) {
6244 case -2: break;
6245 case -1: break;
6246 case 0: /* addressable */
6247 if (is_mem_addressable ( address, szB, &bad_addr ))
6248 VG_(printf) ("Address %p len %ld addressable\n",
6249 (void *)address, szB);
6250 else
6251 VG_(printf)
6252 ("Address %p len %ld not addressable:\nbad address %p\n",
6253 (void *)address, szB, (void *) bad_addr);
6254 MC_(pp_describe_addr) (address);
6255 break;
6256 case 1: /* defined */
6257 res = is_mem_defined ( address, szB, &bad_addr, &otag );
6258 if (MC_AddrErr == res)
6259 VG_(printf)
6260 ("Address %p len %ld not addressable:\nbad address %p\n",
6261 (void *)address, szB, (void *) bad_addr);
6262 else if (MC_ValueErr == res) {
6263 okind = otag & 3;
6264 switch (okind) {
6265 case MC_OKIND_STACK:
6266 src = " was created by a stack allocation"; break;
6267 case MC_OKIND_HEAP:
6268 src = " was created by a heap allocation"; break;
6269 case MC_OKIND_USER:
6270 src = " was created by a client request"; break;
6271 case MC_OKIND_UNKNOWN:
6272 src = ""; break;
6273 default: tl_assert(0);
6275 VG_(printf)
6276 ("Address %p len %ld not defined:\n"
6277 "Uninitialised value at %p%s\n",
6278 (void *)address, szB, (void *) bad_addr, src);
6279 ecu = otag & ~3;
6280 if (VG_(is_plausible_ECU)(ecu)) {
6281 origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
6282 VG_(pp_ExeContext)( origin_ec );
6285 else
6286 VG_(printf) ("Address %p len %ld defined\n",
6287 (void *)address, szB);
6288 MC_(pp_describe_addr) (address);
6289 break;
6290 default: tl_assert(0);
6292 return True;
6295 case 5: { /* block_list */
6296 HChar* wl;
6297 HChar *endptr;
6298 UInt lr_nr = 0;
6299 wl = VG_(strtok_r) (NULL, " ", &ssaveptr);
6300 if (wl != NULL)
6301 lr_nr = VG_(strtoull10) (wl, &endptr);
6302 if (wl == NULL || *endptr != '\0') {
6303 VG_(gdb_printf) ("malformed or missing integer\n");
6304 } else {
6305 // lr_nr-1 as what is shown to the user is 1 more than the index in lr_array.
6306 if (lr_nr == 0 || ! MC_(print_block_list) (lr_nr-1))
6307 VG_(gdb_printf) ("invalid loss record nr\n");
6309 return True;
6312 case 6: { /* who_points_at */
6313 Addr address;
6314 SizeT szB = 1;
6316 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6317 return True;
6318 if (address == (Addr) 0) {
6319 VG_(gdb_printf) ("Cannot search who points at 0x0\n");
6320 return True;
6322 MC_(who_points_at) (address, szB);
6323 return True;
6326 default:
6327 tl_assert(0);
6328 return False;
6332 /*------------------------------------------------------------*/
6333 /*--- Client requests ---*/
6334 /*------------------------------------------------------------*/
6336 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
6338 Int i;
6339 Addr bad_addr;
6341 if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
6342 && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
6343 && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
6344 && VG_USERREQ__FREELIKE_BLOCK != arg[0]
6345 && VG_USERREQ__CREATE_MEMPOOL != arg[0]
6346 && VG_USERREQ__DESTROY_MEMPOOL != arg[0]
6347 && VG_USERREQ__MEMPOOL_ALLOC != arg[0]
6348 && VG_USERREQ__MEMPOOL_FREE != arg[0]
6349 && VG_USERREQ__MEMPOOL_TRIM != arg[0]
6350 && VG_USERREQ__MOVE_MEMPOOL != arg[0]
6351 && VG_USERREQ__MEMPOOL_CHANGE != arg[0]
6352 && VG_USERREQ__MEMPOOL_EXISTS != arg[0]
6353 && VG_USERREQ__GDB_MONITOR_COMMAND != arg[0]
6354 && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0]
6355 && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0])
6356 return False;
6358 switch (arg[0]) {
6359 case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE: {
6360 Bool ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
6361 if (!ok)
6362 MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
6363 *ret = ok ? (UWord)NULL : bad_addr;
6364 break;
6367 case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
6368 Bool errorV = False;
6369 Addr bad_addrV = 0;
6370 UInt otagV = 0;
6371 Bool errorA = False;
6372 Addr bad_addrA = 0;
6373 is_mem_defined_comprehensive(
6374 arg[1], arg[2],
6375 &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
6377 if (errorV) {
6378 MC_(record_user_error) ( tid, bad_addrV,
6379 /*isAddrErr*/False, otagV );
6381 if (errorA) {
6382 MC_(record_user_error) ( tid, bad_addrA,
6383 /*isAddrErr*/True, 0 );
6385 /* Return the lower of the two erring addresses, if any. */
6386 *ret = 0;
6387 if (errorV && !errorA) {
6388 *ret = bad_addrV;
6390 if (!errorV && errorA) {
6391 *ret = bad_addrA;
6393 if (errorV && errorA) {
6394 *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
6396 break;
6399 case VG_USERREQ__DO_LEAK_CHECK: {
6400 LeakCheckParams lcp;
6402 if (arg[1] == 0)
6403 lcp.mode = LC_Full;
6404 else if (arg[1] == 1)
6405 lcp.mode = LC_Summary;
6406 else {
6407 VG_(message)(Vg_UserMsg,
6408 "Warning: unknown memcheck leak search mode\n");
6409 lcp.mode = LC_Full;
6412 lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
6413 lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
6414 lcp.heuristics = MC_(clo_leak_check_heuristics);
6416 if (arg[2] == 0)
6417 lcp.deltamode = LCD_Any;
6418 else if (arg[2] == 1)
6419 lcp.deltamode = LCD_Increased;
6420 else if (arg[2] == 2)
6421 lcp.deltamode = LCD_Changed;
6422 else {
6423 VG_(message)
6424 (Vg_UserMsg,
6425 "Warning: unknown memcheck leak search deltamode\n");
6426 lcp.deltamode = LCD_Any;
6428 lcp.max_loss_records_output = 999999999;
6429 lcp.requested_by_monitor_command = False;
6431 MC_(detect_memory_leaks)(tid, &lcp);
6432 *ret = 0; /* return value is meaningless */
6433 break;
6436 case VG_USERREQ__MAKE_MEM_NOACCESS:
6437 MC_(make_mem_noaccess) ( arg[1], arg[2] );
6438 *ret = -1;
6439 break;
6441 case VG_USERREQ__MAKE_MEM_UNDEFINED:
6442 make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
6443 MC_OKIND_USER );
6444 *ret = -1;
6445 break;
6447 case VG_USERREQ__MAKE_MEM_DEFINED:
6448 MC_(make_mem_defined) ( arg[1], arg[2] );
6449 *ret = -1;
6450 break;
6452 case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
6453 make_mem_defined_if_addressable ( arg[1], arg[2] );
6454 *ret = -1;
6455 break;
6457 case VG_USERREQ__CREATE_BLOCK: /* describe a block */
6458 if (arg[1] != 0 && arg[2] != 0) {
6459 i = alloc_client_block();
6460 /* VG_(printf)("allocated %d %p\n", i, cgbs); */
6461 cgbs[i].start = arg[1];
6462 cgbs[i].size = arg[2];
6463 cgbs[i].desc = VG_(strdup)("mc.mhcr.1", (HChar *)arg[3]);
6464 cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
6465 *ret = i;
6466 } else
6467 *ret = -1;
6468 break;
6470 case VG_USERREQ__DISCARD: /* discard */
6471 if (cgbs == NULL
6472 || arg[2] >= cgb_used ||
6473 (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
6474 *ret = 1;
6475 } else {
6476 tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
6477 cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
6478 VG_(free)(cgbs[arg[2]].desc);
6479 cgb_discards++;
6480 *ret = 0;
6482 break;
6484 case VG_USERREQ__GET_VBITS:
6485 *ret = mc_get_or_set_vbits_for_client
6486 ( arg[1], arg[2], arg[3],
6487 False /* get them */,
6488 True /* is client request */ );
6489 break;
6491 case VG_USERREQ__SET_VBITS:
6492 *ret = mc_get_or_set_vbits_for_client
6493 ( arg[1], arg[2], arg[3],
6494 True /* set them */,
6495 True /* is client request */ );
6496 break;
6498 case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
6499 UWord** argp = (UWord**)arg;
6500 // MC_(bytes_leaked) et al were set by the last leak check (or zero
6501 // if no prior leak checks performed).
6502 *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
6503 *argp[2] = MC_(bytes_dubious);
6504 *argp[3] = MC_(bytes_reachable);
6505 *argp[4] = MC_(bytes_suppressed);
6506 // there is no argp[5]
6507 //*argp[5] = MC_(bytes_indirect);
6508 // XXX need to make *argp[1-4] defined; currently done in the
6509 // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
6510 *ret = 0;
6511 return True;
6513 case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
6514 UWord** argp = (UWord**)arg;
6515 // MC_(blocks_leaked) et al were set by the last leak check (or zero
6516 // if no prior leak checks performed).
6517 *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
6518 *argp[2] = MC_(blocks_dubious);
6519 *argp[3] = MC_(blocks_reachable);
6520 *argp[4] = MC_(blocks_suppressed);
6521 // there is no argp[5]
6522 //*argp[5] = MC_(blocks_indirect);
6523 // XXX need to make *argp[1-4] defined; currently done in the
6524 // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
6525 *ret = 0;
6526 return True;
6528 case VG_USERREQ__MALLOCLIKE_BLOCK: {
6529 Addr p = (Addr)arg[1];
6530 SizeT sizeB = arg[2];
6531 UInt rzB = arg[3];
6532 Bool is_zeroed = (Bool)arg[4];
6534 MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
6535 MC_AllocCustom, MC_(malloc_list) );
6536 if (rzB > 0) {
6537 MC_(make_mem_noaccess) ( p - rzB, rzB);
6538 MC_(make_mem_noaccess) ( p + sizeB, rzB);
6540 return True;
6542 case VG_USERREQ__RESIZEINPLACE_BLOCK: {
6543 Addr p = (Addr)arg[1];
6544 SizeT oldSizeB = arg[2];
6545 SizeT newSizeB = arg[3];
6546 UInt rzB = arg[4];
6548 MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
6549 return True;
6551 case VG_USERREQ__FREELIKE_BLOCK: {
6552 Addr p = (Addr)arg[1];
6553 UInt rzB = arg[2];
6555 MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
6556 return True;
6559 case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
6560 HChar* s = (HChar*)arg[1];
6561 Addr dst = (Addr) arg[2];
6562 Addr src = (Addr) arg[3];
6563 SizeT len = (SizeT)arg[4];
6564 MC_(record_overlap_error)(tid, s, src, dst, len);
6565 return True;
6568 case VG_USERREQ__CREATE_MEMPOOL: {
6569 Addr pool = (Addr)arg[1];
6570 UInt rzB = arg[2];
6571 Bool is_zeroed = (Bool)arg[3];
6573 MC_(create_mempool) ( pool, rzB, is_zeroed );
6574 return True;
6577 case VG_USERREQ__DESTROY_MEMPOOL: {
6578 Addr pool = (Addr)arg[1];
6580 MC_(destroy_mempool) ( pool );
6581 return True;
6584 case VG_USERREQ__MEMPOOL_ALLOC: {
6585 Addr pool = (Addr)arg[1];
6586 Addr addr = (Addr)arg[2];
6587 UInt size = arg[3];
6589 MC_(mempool_alloc) ( tid, pool, addr, size );
6590 return True;
6593 case VG_USERREQ__MEMPOOL_FREE: {
6594 Addr pool = (Addr)arg[1];
6595 Addr addr = (Addr)arg[2];
6597 MC_(mempool_free) ( pool, addr );
6598 return True;
6601 case VG_USERREQ__MEMPOOL_TRIM: {
6602 Addr pool = (Addr)arg[1];
6603 Addr addr = (Addr)arg[2];
6604 UInt size = arg[3];
6606 MC_(mempool_trim) ( pool, addr, size );
6607 return True;
6610 case VG_USERREQ__MOVE_MEMPOOL: {
6611 Addr poolA = (Addr)arg[1];
6612 Addr poolB = (Addr)arg[2];
6614 MC_(move_mempool) ( poolA, poolB );
6615 return True;
6618 case VG_USERREQ__MEMPOOL_CHANGE: {
6619 Addr pool = (Addr)arg[1];
6620 Addr addrA = (Addr)arg[2];
6621 Addr addrB = (Addr)arg[3];
6622 UInt size = arg[4];
6624 MC_(mempool_change) ( pool, addrA, addrB, size );
6625 return True;
6628 case VG_USERREQ__MEMPOOL_EXISTS: {
6629 Addr pool = (Addr)arg[1];
6631 *ret = (UWord) MC_(mempool_exists) ( pool );
6632 return True;
6635 case VG_USERREQ__GDB_MONITOR_COMMAND: {
6636 Bool handled = handle_gdb_monitor_command (tid, (HChar*)arg[1]);
6637 if (handled)
6638 *ret = 1;
6639 else
6640 *ret = 0;
6641 return handled;
6644 case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE:
6645 case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE: {
6646 Bool addRange
6647 = arg[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE;
6648 Bool ok
6649 = modify_ignore_ranges(addRange, arg[1], arg[2]);
6650 *ret = ok ? 1 : 0;
6651 return True;
6654 default:
6655 VG_(message)(
6656 Vg_UserMsg,
6657 "Warning: unknown memcheck client request code %llx\n",
6658 (ULong)arg[0]
6660 return False;
6662 return True;
6666 /*------------------------------------------------------------*/
6667 /*--- Crude profiling machinery. ---*/
6668 /*------------------------------------------------------------*/
6670 // We track a number of interesting events (using PROF_EVENT)
6671 // if MC_PROFILE_MEMORY is defined.
6673 #ifdef MC_PROFILE_MEMORY
6675 UInt MC_(event_ctr)[N_PROF_EVENTS];
6676 HChar* MC_(event_ctr_name)[N_PROF_EVENTS];
6678 static void init_prof_mem ( void )
6680 Int i;
6681 for (i = 0; i < N_PROF_EVENTS; i++) {
6682 MC_(event_ctr)[i] = 0;
6683 MC_(event_ctr_name)[i] = NULL;
6687 static void done_prof_mem ( void )
6689 Int i;
6690 Bool spaced = False;
6691 for (i = 0; i < N_PROF_EVENTS; i++) {
6692 if (!spaced && (i % 10) == 0) {
6693 VG_(printf)("\n");
6694 spaced = True;
6696 if (MC_(event_ctr)[i] > 0) {
6697 spaced = False;
6698 VG_(printf)( "prof mem event %3d: %9d %s\n",
6699 i, MC_(event_ctr)[i],
6700 MC_(event_ctr_name)[i]
6701 ? MC_(event_ctr_name)[i] : "unnamed");
6706 #else
6708 static void init_prof_mem ( void ) { }
6709 static void done_prof_mem ( void ) { }
6711 #endif
6714 /*------------------------------------------------------------*/
6715 /*--- Origin tracking stuff ---*/
6716 /*------------------------------------------------------------*/
6718 /*--------------------------------------------*/
6719 /*--- Origin tracking: load handlers ---*/
6720 /*--------------------------------------------*/
6722 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
6723 return or1 > or2 ? or1 : or2;
6726 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
6727 OCacheLine* line;
6728 UChar descr;
6729 UWord lineoff = oc_line_offset(a);
6730 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
6732 if (OC_ENABLE_ASSERTIONS) {
6733 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6736 line = find_OCacheLine( a );
6738 descr = line->descr[lineoff];
6739 if (OC_ENABLE_ASSERTIONS) {
6740 tl_assert(descr < 0x10);
6743 if (LIKELY(0 == (descr & (1 << byteoff)))) {
6744 return 0;
6745 } else {
6746 return line->w32[lineoff];
6750 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
6751 OCacheLine* line;
6752 UChar descr;
6753 UWord lineoff, byteoff;
6755 if (UNLIKELY(a & 1)) {
6756 /* Handle misaligned case, slowly. */
6757 UInt oLo = (UInt)MC_(helperc_b_load1)( a + 0 );
6758 UInt oHi = (UInt)MC_(helperc_b_load1)( a + 1 );
6759 return merge_origins(oLo, oHi);
6762 lineoff = oc_line_offset(a);
6763 byteoff = a & 3; /* 0 or 2 */
6765 if (OC_ENABLE_ASSERTIONS) {
6766 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6768 line = find_OCacheLine( a );
6770 descr = line->descr[lineoff];
6771 if (OC_ENABLE_ASSERTIONS) {
6772 tl_assert(descr < 0x10);
6775 if (LIKELY(0 == (descr & (3 << byteoff)))) {
6776 return 0;
6777 } else {
6778 return line->w32[lineoff];
6782 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
6783 OCacheLine* line;
6784 UChar descr;
6785 UWord lineoff;
6787 if (UNLIKELY(a & 3)) {
6788 /* Handle misaligned case, slowly. */
6789 UInt oLo = (UInt)MC_(helperc_b_load2)( a + 0 );
6790 UInt oHi = (UInt)MC_(helperc_b_load2)( a + 2 );
6791 return merge_origins(oLo, oHi);
6794 lineoff = oc_line_offset(a);
6795 if (OC_ENABLE_ASSERTIONS) {
6796 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6799 line = find_OCacheLine( a );
6801 descr = line->descr[lineoff];
6802 if (OC_ENABLE_ASSERTIONS) {
6803 tl_assert(descr < 0x10);
6806 if (LIKELY(0 == descr)) {
6807 return 0;
6808 } else {
6809 return line->w32[lineoff];
6813 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
6814 OCacheLine* line;
6815 UChar descrLo, descrHi, descr;
6816 UWord lineoff;
6818 if (UNLIKELY(a & 7)) {
6819 /* Handle misaligned case, slowly. */
6820 UInt oLo = (UInt)MC_(helperc_b_load4)( a + 0 );
6821 UInt oHi = (UInt)MC_(helperc_b_load4)( a + 4 );
6822 return merge_origins(oLo, oHi);
6825 lineoff = oc_line_offset(a);
6826 if (OC_ENABLE_ASSERTIONS) {
6827 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
6830 line = find_OCacheLine( a );
6832 descrLo = line->descr[lineoff + 0];
6833 descrHi = line->descr[lineoff + 1];
6834 descr = descrLo | descrHi;
6835 if (OC_ENABLE_ASSERTIONS) {
6836 tl_assert(descr < 0x10);
6839 if (LIKELY(0 == descr)) {
6840 return 0; /* both 32-bit chunks are defined */
6841 } else {
6842 UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
6843 UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
6844 return merge_origins(oLo, oHi);
6848 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
6849 UInt oLo = (UInt)MC_(helperc_b_load8)( a + 0 );
6850 UInt oHi = (UInt)MC_(helperc_b_load8)( a + 8 );
6851 UInt oBoth = merge_origins(oLo, oHi);
6852 return (UWord)oBoth;
6855 UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) {
6856 UInt oQ0 = (UInt)MC_(helperc_b_load8)( a + 0 );
6857 UInt oQ1 = (UInt)MC_(helperc_b_load8)( a + 8 );
6858 UInt oQ2 = (UInt)MC_(helperc_b_load8)( a + 16 );
6859 UInt oQ3 = (UInt)MC_(helperc_b_load8)( a + 24 );
6860 UInt oAll = merge_origins(merge_origins(oQ0, oQ1),
6861 merge_origins(oQ2, oQ3));
6862 return (UWord)oAll;
6866 /*--------------------------------------------*/
6867 /*--- Origin tracking: store handlers ---*/
6868 /*--------------------------------------------*/
6870 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
6871 OCacheLine* line;
6872 UWord lineoff = oc_line_offset(a);
6873 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
6875 if (OC_ENABLE_ASSERTIONS) {
6876 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6879 line = find_OCacheLine( a );
6881 if (d32 == 0) {
6882 line->descr[lineoff] &= ~(1 << byteoff);
6883 } else {
6884 line->descr[lineoff] |= (1 << byteoff);
6885 line->w32[lineoff] = d32;
6889 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
6890 OCacheLine* line;
6891 UWord lineoff, byteoff;
6893 if (UNLIKELY(a & 1)) {
6894 /* Handle misaligned case, slowly. */
6895 MC_(helperc_b_store1)( a + 0, d32 );
6896 MC_(helperc_b_store1)( a + 1, d32 );
6897 return;
6900 lineoff = oc_line_offset(a);
6901 byteoff = a & 3; /* 0 or 2 */
6903 if (OC_ENABLE_ASSERTIONS) {
6904 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6907 line = find_OCacheLine( a );
6909 if (d32 == 0) {
6910 line->descr[lineoff] &= ~(3 << byteoff);
6911 } else {
6912 line->descr[lineoff] |= (3 << byteoff);
6913 line->w32[lineoff] = d32;
6917 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
6918 OCacheLine* line;
6919 UWord lineoff;
6921 if (UNLIKELY(a & 3)) {
6922 /* Handle misaligned case, slowly. */
6923 MC_(helperc_b_store2)( a + 0, d32 );
6924 MC_(helperc_b_store2)( a + 2, d32 );
6925 return;
6928 lineoff = oc_line_offset(a);
6929 if (OC_ENABLE_ASSERTIONS) {
6930 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6933 line = find_OCacheLine( a );
6935 if (d32 == 0) {
6936 line->descr[lineoff] = 0;
6937 } else {
6938 line->descr[lineoff] = 0xF;
6939 line->w32[lineoff] = d32;
6943 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
6944 OCacheLine* line;
6945 UWord lineoff;
6947 if (UNLIKELY(a & 7)) {
6948 /* Handle misaligned case, slowly. */
6949 MC_(helperc_b_store4)( a + 0, d32 );
6950 MC_(helperc_b_store4)( a + 4, d32 );
6951 return;
6954 lineoff = oc_line_offset(a);
6955 if (OC_ENABLE_ASSERTIONS) {
6956 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
6959 line = find_OCacheLine( a );
6961 if (d32 == 0) {
6962 line->descr[lineoff + 0] = 0;
6963 line->descr[lineoff + 1] = 0;
6964 } else {
6965 line->descr[lineoff + 0] = 0xF;
6966 line->descr[lineoff + 1] = 0xF;
6967 line->w32[lineoff + 0] = d32;
6968 line->w32[lineoff + 1] = d32;
6972 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
6973 MC_(helperc_b_store8)( a + 0, d32 );
6974 MC_(helperc_b_store8)( a + 8, d32 );
6977 void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) {
6978 MC_(helperc_b_store8)( a + 0, d32 );
6979 MC_(helperc_b_store8)( a + 8, d32 );
6980 MC_(helperc_b_store8)( a + 16, d32 );
6981 MC_(helperc_b_store8)( a + 24, d32 );
6985 /*--------------------------------------------*/
6986 /*--- Origin tracking: sarp handlers ---*/
6987 /*--------------------------------------------*/
6989 __attribute__((noinline))
6990 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
6991 if ((a & 1) && len >= 1) {
6992 MC_(helperc_b_store1)( a, otag );
6993 a++;
6994 len--;
6996 if ((a & 2) && len >= 2) {
6997 MC_(helperc_b_store2)( a, otag );
6998 a += 2;
6999 len -= 2;
7001 if (len >= 4)
7002 tl_assert(0 == (a & 3));
7003 while (len >= 4) {
7004 MC_(helperc_b_store4)( a, otag );
7005 a += 4;
7006 len -= 4;
7008 if (len >= 2) {
7009 MC_(helperc_b_store2)( a, otag );
7010 a += 2;
7011 len -= 2;
7013 if (len >= 1) {
7014 MC_(helperc_b_store1)( a, otag );
7015 //a++;
7016 len--;
7018 tl_assert(len == 0);
7021 __attribute__((noinline))
7022 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
7023 if ((a & 1) && len >= 1) {
7024 MC_(helperc_b_store1)( a, 0 );
7025 a++;
7026 len--;
7028 if ((a & 2) && len >= 2) {
7029 MC_(helperc_b_store2)( a, 0 );
7030 a += 2;
7031 len -= 2;
7033 if (len >= 4)
7034 tl_assert(0 == (a & 3));
7035 while (len >= 4) {
7036 MC_(helperc_b_store4)( a, 0 );
7037 a += 4;
7038 len -= 4;
7040 if (len >= 2) {
7041 MC_(helperc_b_store2)( a, 0 );
7042 a += 2;
7043 len -= 2;
7045 if (len >= 1) {
7046 MC_(helperc_b_store1)( a, 0 );
7047 //a++;
7048 len--;
7050 tl_assert(len == 0);
7054 /*------------------------------------------------------------*/
7055 /*--- Setup and finalisation ---*/
7056 /*------------------------------------------------------------*/
7058 static void mc_post_clo_init ( void )
7060 /* If we've been asked to emit XML, mash around various other
7061 options so as to constrain the output somewhat. */
7062 if (VG_(clo_xml)) {
7063 /* Extract as much info as possible from the leak checker. */
7064 MC_(clo_leak_check) = LC_Full;
7067 if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol))
7068 VG_(message)(Vg_UserMsg,
7069 "Warning: --freelist-big-blocks value %lld has no effect\n"
7070 "as it is >= to --freelist-vol value %lld\n",
7071 MC_(clo_freelist_big_blocks),
7072 MC_(clo_freelist_vol));
7074 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
7076 if (MC_(clo_mc_level) == 3) {
7077 /* We're doing origin tracking. */
7078 # ifdef PERF_FAST_STACK
7079 VG_(track_new_mem_stack_4_w_ECU) ( mc_new_mem_stack_4_w_ECU );
7080 VG_(track_new_mem_stack_8_w_ECU) ( mc_new_mem_stack_8_w_ECU );
7081 VG_(track_new_mem_stack_12_w_ECU) ( mc_new_mem_stack_12_w_ECU );
7082 VG_(track_new_mem_stack_16_w_ECU) ( mc_new_mem_stack_16_w_ECU );
7083 VG_(track_new_mem_stack_32_w_ECU) ( mc_new_mem_stack_32_w_ECU );
7084 VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
7085 VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
7086 VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
7087 VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
7088 # endif
7089 VG_(track_new_mem_stack_w_ECU) ( mc_new_mem_stack_w_ECU );
7090 VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_make_ECU );
7091 } else {
7092 /* Not doing origin tracking */
7093 # ifdef PERF_FAST_STACK
7094 VG_(track_new_mem_stack_4) ( mc_new_mem_stack_4 );
7095 VG_(track_new_mem_stack_8) ( mc_new_mem_stack_8 );
7096 VG_(track_new_mem_stack_12) ( mc_new_mem_stack_12 );
7097 VG_(track_new_mem_stack_16) ( mc_new_mem_stack_16 );
7098 VG_(track_new_mem_stack_32) ( mc_new_mem_stack_32 );
7099 VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
7100 VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
7101 VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
7102 VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
7103 # endif
7104 VG_(track_new_mem_stack) ( mc_new_mem_stack );
7105 VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_no_ECU );
7108 // We assume that brk()/sbrk() does not initialise new memory. Is this
7109 // accurate? John Reiser says:
7111 // 0) sbrk() can *decrease* process address space. No zero fill is done
7112 // for a decrease, not even the fragment on the high end of the last page
7113 // that is beyond the new highest address. For maximum safety and
7114 // portability, then the bytes in the last page that reside above [the
7115 // new] sbrk(0) should be considered to be uninitialized, but in practice
7116 // it is exceedingly likely that they will retain their previous
7117 // contents.
7119 // 1) If an increase is large enough to require new whole pages, then
7120 // those new whole pages (like all new pages) are zero-filled by the
7121 // operating system. So if sbrk(0) already is page aligned, then
7122 // sbrk(PAGE_SIZE) *does* zero-fill the new memory.
7124 // 2) Any increase that lies within an existing allocated page is not
7125 // changed. So if (x = sbrk(0)) is not page aligned, then
7126 // sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
7127 // existing contents, and an additional PAGE_SIZE bytes which are zeroed.
7128 // ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
7129 // of them come along for the ride because the operating system deals
7130 // only in whole pages. Again, for maximum safety and portability, then
7131 // anything that lives above [the new] sbrk(0) should be considered
7132 // uninitialized, but in practice will retain previous contents [zero in
7133 // this case.]"
7135 // In short:
7137 // A key property of sbrk/brk is that new whole pages that are supplied
7138 // by the operating system *do* get initialized to zero.
7140 // As for the portability of all this:
7142 // sbrk and brk are not POSIX. However, any system that is a derivative
7143 // of *nix has sbrk and brk because there are too many softwares (such as
7144 // the Bourne shell) which rely on the traditional memory map (.text,
7145 // .data+.bss, stack) and the existence of sbrk/brk.
7147 // So we should arguably observe all this. However:
7148 // - The current inaccuracy has caused maybe one complaint in seven years(?)
7149 // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
7150 // doubt most programmers know the above information.
7151 // So I'm not terribly unhappy with marking it as undefined. --njn.
7153 // [More: I think most of what John said only applies to sbrk(). It seems
7154 // that brk() always deals in whole pages. And since this event deals
7155 // directly with brk(), not with sbrk(), perhaps it would be reasonable to
7156 // just mark all memory it allocates as defined.]
7158 # if !defined(VGO_solaris)
7159 if (MC_(clo_mc_level) == 3)
7160 VG_(track_new_mem_brk) ( mc_new_mem_w_tid_make_ECU );
7161 else
7162 VG_(track_new_mem_brk) ( mc_new_mem_w_tid_no_ECU );
7163 # else
7164 // On Solaris, brk memory has to be marked as defined, otherwise we get
7165 // many false positives.
7166 VG_(track_new_mem_brk) ( make_mem_defined_w_tid );
7167 # endif
7169 /* This origin tracking cache is huge (~100M), so only initialise
7170 if we need it. */
7171 if (MC_(clo_mc_level) >= 3) {
7172 init_OCache();
7173 tl_assert(ocacheL1 != NULL);
7174 tl_assert(ocacheL2 != NULL);
7175 } else {
7176 tl_assert(ocacheL1 == NULL);
7177 tl_assert(ocacheL2 == NULL);
7180 MC_(chunk_poolalloc) = VG_(newPA)
7181 (sizeof(MC_Chunk) + MC_(n_where_pointers)() * sizeof(ExeContext*),
7182 1000,
7183 VG_(malloc),
7184 "mc.cMC.1 (MC_Chunk pools)",
7185 VG_(free));
7187 /* Do not check definedness of guest state if --undef-value-errors=no */
7188 if (MC_(clo_mc_level) >= 2)
7189 VG_(track_pre_reg_read) ( mc_pre_reg_read );
7192 static void print_SM_info(const HChar* type, Int n_SMs)
7194 VG_(message)(Vg_DebugMsg,
7195 " memcheck: SMs: %s = %d (%ldk, %ldM)\n",
7196 type,
7197 n_SMs,
7198 n_SMs * sizeof(SecMap) / 1024UL,
7199 n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
7202 static void mc_print_stats (void)
7204 SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
7206 VG_(message)(Vg_DebugMsg, " memcheck: freelist: vol %lld length %lld\n",
7207 VG_(free_queue_volume), VG_(free_queue_length));
7208 VG_(message)(Vg_DebugMsg,
7209 " memcheck: sanity checks: %d cheap, %d expensive\n",
7210 n_sanity_cheap, n_sanity_expensive );
7211 VG_(message)(Vg_DebugMsg,
7212 " memcheck: auxmaps: %lld auxmap entries (%lldk, %lldM) in use\n",
7213 n_auxmap_L2_nodes,
7214 n_auxmap_L2_nodes * 64,
7215 n_auxmap_L2_nodes / 16 );
7216 VG_(message)(Vg_DebugMsg,
7217 " memcheck: auxmaps_L1: %lld searches, %lld cmps, ratio %lld:10\n",
7218 n_auxmap_L1_searches, n_auxmap_L1_cmps,
7219 (10ULL * n_auxmap_L1_cmps)
7220 / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
7222 VG_(message)(Vg_DebugMsg,
7223 " memcheck: auxmaps_L2: %lld searches, %lld nodes\n",
7224 n_auxmap_L2_searches, n_auxmap_L2_nodes
7227 print_SM_info("n_issued ", n_issued_SMs);
7228 print_SM_info("n_deissued ", n_deissued_SMs);
7229 print_SM_info("max_noaccess ", max_noaccess_SMs);
7230 print_SM_info("max_undefined", max_undefined_SMs);
7231 print_SM_info("max_defined ", max_defined_SMs);
7232 print_SM_info("max_non_DSM ", max_non_DSM_SMs);
7234 // Three DSMs, plus the non-DSM ones
7235 max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
7236 // The 3*sizeof(Word) bytes is the AVL node metadata size.
7237 // The VG_ROUNDUP is because the OSet pool allocator will/must align
7238 // the elements on pointer size.
7239 // Note that the pool allocator has some additional small overhead
7240 // which is not counted in the below.
7241 // Hardwiring this logic sucks, but I don't see how else to do it.
7242 max_secVBit_szB = max_secVBit_nodes *
7243 (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*)));
7244 max_shmem_szB = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
7246 VG_(message)(Vg_DebugMsg,
7247 " memcheck: max sec V bit nodes: %d (%ldk, %ldM)\n",
7248 max_secVBit_nodes, max_secVBit_szB / 1024,
7249 max_secVBit_szB / (1024 * 1024));
7250 VG_(message)(Vg_DebugMsg,
7251 " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
7252 sec_vbits_new_nodes + sec_vbits_updates,
7253 sec_vbits_new_nodes, sec_vbits_updates );
7254 VG_(message)(Vg_DebugMsg,
7255 " memcheck: max shadow mem size: %ldk, %ldM\n",
7256 max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
7258 if (MC_(clo_mc_level) >= 3) {
7259 VG_(message)(Vg_DebugMsg,
7260 " ocacheL1: %'12lu refs %'12lu misses (%'lu lossage)\n",
7261 stats_ocacheL1_find,
7262 stats_ocacheL1_misses,
7263 stats_ocacheL1_lossage );
7264 VG_(message)(Vg_DebugMsg,
7265 " ocacheL1: %'12lu at 0 %'12lu at 1\n",
7266 stats_ocacheL1_find - stats_ocacheL1_misses
7267 - stats_ocacheL1_found_at_1
7268 - stats_ocacheL1_found_at_N,
7269 stats_ocacheL1_found_at_1 );
7270 VG_(message)(Vg_DebugMsg,
7271 " ocacheL1: %'12lu at 2+ %'12lu move-fwds\n",
7272 stats_ocacheL1_found_at_N,
7273 stats_ocacheL1_movefwds );
7274 VG_(message)(Vg_DebugMsg,
7275 " ocacheL1: %'12lu sizeB %'12u useful\n",
7276 (UWord)sizeof(OCache),
7277 4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
7278 VG_(message)(Vg_DebugMsg,
7279 " ocacheL2: %'12lu refs %'12lu misses\n",
7280 stats__ocacheL2_refs,
7281 stats__ocacheL2_misses );
7282 VG_(message)(Vg_DebugMsg,
7283 " ocacheL2: %'9lu max nodes %'9lu curr nodes\n",
7284 stats__ocacheL2_n_nodes_max,
7285 stats__ocacheL2_n_nodes );
7286 VG_(message)(Vg_DebugMsg,
7287 " niacache: %'12lu refs %'12lu misses\n",
7288 stats__nia_cache_queries, stats__nia_cache_misses);
7289 } else {
7290 tl_assert(ocacheL1 == NULL);
7291 tl_assert(ocacheL2 == NULL);
7296 static void mc_fini ( Int exitcode )
7298 MC_(print_malloc_stats)();
7300 if (MC_(clo_leak_check) != LC_Off) {
7301 LeakCheckParams lcp;
7302 lcp.mode = MC_(clo_leak_check);
7303 lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
7304 lcp.heuristics = MC_(clo_leak_check_heuristics);
7305 lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
7306 lcp.deltamode = LCD_Any;
7307 lcp.max_loss_records_output = 999999999;
7308 lcp.requested_by_monitor_command = False;
7309 MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp);
7310 } else {
7311 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
7312 VG_(umsg)(
7313 "For a detailed leak analysis, rerun with: --leak-check=full\n"
7314 "\n"
7319 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
7320 VG_(message)(Vg_UserMsg,
7321 "For counts of detected and suppressed errors, rerun with: -v\n");
7324 if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
7325 && MC_(clo_mc_level) == 2) {
7326 VG_(message)(Vg_UserMsg,
7327 "Use --track-origins=yes to see where "
7328 "uninitialised values come from\n");
7331 /* Print a warning if any client-request generated ignore-ranges
7332 still exist. It would be reasonable to expect that a properly
7333 written program would remove any such ranges before exiting, and
7334 since they are a bit on the dangerous side, let's comment. By
7335 contrast ranges which are specified on the command line normally
7336 pertain to hardware mapped into the address space, and so we
7337 can't expect the client to have got rid of them. */
7338 if (gIgnoredAddressRanges) {
7339 Word i, nBad = 0;
7340 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
7341 UWord val = IAR_INVALID;
7342 UWord key_min = ~(UWord)0;
7343 UWord key_max = (UWord)0;
7344 VG_(indexRangeMap)( &key_min, &key_max, &val,
7345 gIgnoredAddressRanges, i );
7346 if (val != IAR_ClientReq)
7347 continue;
7348 /* Print the offending range. Also, if it is the first,
7349 print a banner before it. */
7350 nBad++;
7351 if (nBad == 1) {
7352 VG_(umsg)(
7353 "WARNING: exiting program has the following client-requested\n"
7354 "WARNING: address error disablement range(s) still in force,\n"
7355 "WARNING: "
7356 "possibly as a result of some mistake in the use of the\n"
7357 "WARNING: "
7358 "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
7361 VG_(umsg)(" [%ld] 0x%016llx-0x%016llx %s\n",
7362 i, (ULong)key_min, (ULong)key_max, showIARKind(val));
7366 done_prof_mem();
7368 if (VG_(clo_stats))
7369 mc_print_stats();
7371 if (0) {
7372 VG_(message)(Vg_DebugMsg,
7373 "------ Valgrind's client block stats follow ---------------\n" );
7374 show_client_block_stats();
7378 /* mark the given addr/len unaddressable for watchpoint implementation
7379 The PointKind will be handled at access time */
7380 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
7381 Addr addr, SizeT len)
7383 /* GDBTD this is somewhat fishy. We might rather have to save the previous
7384 accessibility and definedness in gdbserver so as to allow restoring it
7385 properly. Currently, we assume that the user only watches things
7386 which are properly addressable and defined */
7387 if (insert)
7388 MC_(make_mem_noaccess) (addr, len);
7389 else
7390 MC_(make_mem_defined) (addr, len);
7391 return True;
7394 static void mc_pre_clo_init(void)
7396 VG_(details_name) ("Memcheck");
7397 VG_(details_version) (NULL);
7398 VG_(details_description) ("a memory error detector");
7399 VG_(details_copyright_author)(
7400 "Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.");
7401 VG_(details_bug_reports_to) (VG_BUGS_TO);
7402 VG_(details_avg_translation_sizeB) ( 640 );
7404 VG_(basic_tool_funcs) (mc_post_clo_init,
7405 MC_(instrument),
7406 mc_fini);
7408 VG_(needs_final_IR_tidy_pass) ( MC_(final_tidy) );
7411 VG_(needs_core_errors) ();
7412 VG_(needs_tool_errors) (MC_(eq_Error),
7413 MC_(before_pp_Error),
7414 MC_(pp_Error),
7415 True,/*show TIDs for errors*/
7416 MC_(update_Error_extra),
7417 MC_(is_recognised_suppression),
7418 MC_(read_extra_suppression_info),
7419 MC_(error_matches_suppression),
7420 MC_(get_error_name),
7421 MC_(get_extra_suppression_info),
7422 MC_(print_extra_suppression_use),
7423 MC_(update_extra_suppression_use));
7424 VG_(needs_libc_freeres) ();
7425 VG_(needs_command_line_options)(mc_process_cmd_line_options,
7426 mc_print_usage,
7427 mc_print_debug_usage);
7428 VG_(needs_client_requests) (mc_handle_client_request);
7429 VG_(needs_sanity_checks) (mc_cheap_sanity_check,
7430 mc_expensive_sanity_check);
7431 VG_(needs_print_stats) (mc_print_stats);
7432 VG_(needs_info_location) (MC_(pp_describe_addr));
7433 VG_(needs_malloc_replacement) (MC_(malloc),
7434 MC_(__builtin_new),
7435 MC_(__builtin_vec_new),
7436 MC_(memalign),
7437 MC_(calloc),
7438 MC_(free),
7439 MC_(__builtin_delete),
7440 MC_(__builtin_vec_delete),
7441 MC_(realloc),
7442 MC_(malloc_usable_size),
7443 MC_MALLOC_DEFAULT_REDZONE_SZB );
7444 MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)();
7446 VG_(needs_xml_output) ();
7448 VG_(track_new_mem_startup) ( mc_new_mem_startup );
7450 // Handling of mmap and mprotect isn't simple (well, it is simple,
7451 // but the justification isn't.) See comments above, just prior to
7452 // mc_new_mem_mmap.
7453 VG_(track_new_mem_mmap) ( mc_new_mem_mmap );
7454 VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
7456 VG_(track_copy_mem_remap) ( MC_(copy_address_range_state) );
7458 VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
7459 VG_(track_die_mem_brk) ( MC_(make_mem_noaccess) );
7460 VG_(track_die_mem_munmap) ( MC_(make_mem_noaccess) );
7462 /* Defer the specification of the new_mem_stack functions to the
7463 post_clo_init function, since we need to first parse the command
7464 line before deciding which set to use. */
7466 # ifdef PERF_FAST_STACK
7467 VG_(track_die_mem_stack_4) ( mc_die_mem_stack_4 );
7468 VG_(track_die_mem_stack_8) ( mc_die_mem_stack_8 );
7469 VG_(track_die_mem_stack_12) ( mc_die_mem_stack_12 );
7470 VG_(track_die_mem_stack_16) ( mc_die_mem_stack_16 );
7471 VG_(track_die_mem_stack_32) ( mc_die_mem_stack_32 );
7472 VG_(track_die_mem_stack_112) ( mc_die_mem_stack_112 );
7473 VG_(track_die_mem_stack_128) ( mc_die_mem_stack_128 );
7474 VG_(track_die_mem_stack_144) ( mc_die_mem_stack_144 );
7475 VG_(track_die_mem_stack_160) ( mc_die_mem_stack_160 );
7476 # endif
7477 VG_(track_die_mem_stack) ( mc_die_mem_stack );
7479 VG_(track_ban_mem_stack) ( MC_(make_mem_noaccess) );
7481 VG_(track_pre_mem_read) ( check_mem_is_defined );
7482 VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
7483 VG_(track_pre_mem_write) ( check_mem_is_addressable );
7484 VG_(track_post_mem_write) ( mc_post_mem_write );
7486 VG_(track_post_reg_write) ( mc_post_reg_write );
7487 VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
7489 if (MC_(clo_mc_level) >= 2) {
7490 VG_(track_copy_mem_to_reg) ( mc_copy_mem_to_reg );
7491 VG_(track_copy_reg_to_mem) ( mc_copy_reg_to_mem );
7494 VG_(needs_watchpoint) ( mc_mark_unaddressable_for_watchpoint );
7496 init_shadow_memory();
7497 // MC_(chunk_poolalloc) must be allocated in post_clo_init
7498 tl_assert(MC_(chunk_poolalloc) == NULL);
7499 MC_(malloc_list) = VG_(HT_construct)( "MC_(malloc_list)" );
7500 MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
7501 init_prof_mem();
7503 tl_assert( mc_expensive_sanity_check() );
7505 // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
7506 tl_assert(sizeof(UWord) == sizeof(Addr));
7507 // Call me paranoid. I don't care.
7508 tl_assert(sizeof(void*) == sizeof(Addr));
7510 // BYTES_PER_SEC_VBIT_NODE must be a power of two.
7511 tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
7513 /* This is small. Always initialise it. */
7514 init_nia_to_ecu_cache();
7516 /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
7517 if we need to, since the command line args haven't been
7518 processed yet. Hence defer it to mc_post_clo_init. */
7519 tl_assert(ocacheL1 == NULL);
7520 tl_assert(ocacheL2 == NULL);
7522 /* Check some important stuff. See extensive comments above
7523 re UNALIGNED_OR_HIGH for background. */
7524 # if VG_WORDSIZE == 4
7525 tl_assert(sizeof(void*) == 4);
7526 tl_assert(sizeof(Addr) == 4);
7527 tl_assert(sizeof(UWord) == 4);
7528 tl_assert(sizeof(Word) == 4);
7529 tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
7530 tl_assert(MASK(1) == 0UL);
7531 tl_assert(MASK(2) == 1UL);
7532 tl_assert(MASK(4) == 3UL);
7533 tl_assert(MASK(8) == 7UL);
7534 # else
7535 tl_assert(VG_WORDSIZE == 8);
7536 tl_assert(sizeof(void*) == 8);
7537 tl_assert(sizeof(Addr) == 8);
7538 tl_assert(sizeof(UWord) == 8);
7539 tl_assert(sizeof(Word) == 8);
7540 tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFFULL);
7541 tl_assert(MASK(1) == 0xFFFFFFF000000000ULL);
7542 tl_assert(MASK(2) == 0xFFFFFFF000000001ULL);
7543 tl_assert(MASK(4) == 0xFFFFFFF000000003ULL);
7544 tl_assert(MASK(8) == 0xFFFFFFF000000007ULL);
7545 # endif
7548 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
7550 /*--------------------------------------------------------------------*/
7551 /*--- end mc_main.c ---*/
7552 /*--------------------------------------------------------------------*/