1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- MemCheck: Maintain bitmaps of memory, tracking the ---*/
5 /*--- accessibility (A) and validity (V) status of each byte. ---*/
7 /*--------------------------------------------------------------------*/
10 This file is part of MemCheck, a heavyweight Valgrind tool for
11 detecting memory errors.
13 Copyright (C) 2000-2015 Julian Seward
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, write to the Free Software
28 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
31 The GNU General Public License is contained in the file COPYING.
34 #include "pub_tool_basics.h"
35 #include "pub_tool_aspacemgr.h"
36 #include "pub_tool_gdbserver.h"
37 #include "pub_tool_poolalloc.h"
38 #include "pub_tool_hashtable.h" // For mc_include.h
39 #include "pub_tool_libcbase.h"
40 #include "pub_tool_libcassert.h"
41 #include "pub_tool_libcprint.h"
42 #include "pub_tool_machine.h"
43 #include "pub_tool_mallocfree.h"
44 #include "pub_tool_options.h"
45 #include "pub_tool_oset.h"
46 #include "pub_tool_rangemap.h"
47 #include "pub_tool_replacemalloc.h"
48 #include "pub_tool_tooliface.h"
49 #include "pub_tool_threadstate.h"
50 #include "pub_tool_xarray.h"
51 #include "pub_tool_xtree.h"
52 #include "pub_tool_xtmemory.h"
54 #include "mc_include.h"
55 #include "memcheck.h" /* for client requests */
58 /* Set to 1 to enable handwritten assembly helpers on targets for
59 which it is supported. */
60 #define ENABLE_ASSEMBLY_HELPERS 1
62 /* Set to 1 to do a little more sanity checking */
63 #define VG_DEBUG_MEMORY 0
65 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
67 static void ocache_sarp_Set_Origins ( Addr
, UWord
, UInt
); /* fwds */
68 static void ocache_sarp_Clear_Origins ( Addr
, UWord
); /* fwds */
71 /*------------------------------------------------------------*/
72 /*--- Fast-case knobs ---*/
73 /*------------------------------------------------------------*/
75 // Comment these out to disable the fast cases (don't just set them to zero).
77 #define PERF_FAST_LOADV 1
78 #define PERF_FAST_STOREV 1
80 #define PERF_FAST_SARP 1
82 #define PERF_FAST_STACK 1
83 #define PERF_FAST_STACK2 1
85 /* Change this to 1 to enable assertions on origin tracking cache fast
87 #define OC_ENABLE_ASSERTIONS 0
90 /*------------------------------------------------------------*/
91 /*--- Comments on the origin tracking implementation ---*/
92 /*------------------------------------------------------------*/
94 /* See detailed comment entitled
95 AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
96 which is contained further on in this file. */
99 /*------------------------------------------------------------*/
100 /*--- V bits and A bits ---*/
101 /*------------------------------------------------------------*/
103 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
104 thinks the corresponding value bit is defined. And every memory byte
105 has an A bit, which tracks whether Memcheck thinks the program can access
106 it safely (ie. it's mapped, and has at least one of the RWX permission bits
107 set). So every N-bit register is shadowed with N V bits, and every memory
108 byte is shadowed with 8 V bits and one A bit.
110 In the implementation, we use two forms of compression (compressed V bits
111 and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
114 Memcheck also tracks extra information about each heap block that is
115 allocated, for detecting memory leaks and other purposes.
118 /*------------------------------------------------------------*/
119 /*--- Basic A/V bitmap representation. ---*/
120 /*------------------------------------------------------------*/
122 /* All reads and writes are checked against a memory map (a.k.a. shadow
123 memory), which records the state of all memory in the process.
125 On 32-bit machines the memory map is organised as follows.
126 The top 16 bits of an address are used to index into a top-level
127 map table, containing 65536 entries. Each entry is a pointer to a
128 second-level map, which records the accesibililty and validity
129 permissions for the 65536 bytes indexed by the lower 16 bits of the
130 address. Each byte is represented by two bits (details are below). So
131 each second-level map contains 16384 bytes. This two-level arrangement
132 conveniently divides the 4G address space into 64k lumps, each size 64k
135 All entries in the primary (top-level) map must point to a valid
136 secondary (second-level) map. Since many of the 64kB chunks will
137 have the same status for every bit -- ie. noaccess (for unused
138 address space) or entirely addressable and defined (for code segments) --
139 there are three distinguished secondary maps, which indicate 'noaccess',
140 'undefined' and 'defined'. For these uniform 64kB chunks, the primary
141 map entry points to the relevant distinguished map. In practice,
142 typically more than half of the addressable memory is represented with
143 the 'undefined' or 'defined' distinguished secondary map, so it gives a
144 good saving. It also lets us set the V+A bits of large address regions
145 quickly in set_address_range_perms().
147 On 64-bit machines it's more complicated. If we followed the same basic
148 scheme we'd have a four-level table which would require too many memory
149 accesses. So instead the top-level map table has 2^20 entries (indexed
150 using bits 16..35 of the address); this covers the bottom 64GB. Any
151 accesses above 64GB are handled with a slow, sparse auxiliary table.
152 Valgrind's address space manager tries very hard to keep things below
153 this 64GB barrier so that performance doesn't suffer too much.
155 Note that this file has a lot of different functions for reading and
156 writing shadow memory. Only a couple are strictly necessary (eg.
157 get_vabits2 and set_vabits2), most are just specialised for specific
158 common cases to improve performance.
160 Aside: the V+A bits are less precise than they could be -- we have no way
161 of marking memory as read-only. It would be great if we could add an
162 extra state VA_BITSn_READONLY. But then we'd have 5 different states,
163 which requires 2.3 bits to hold, and there's no way to do that elegantly
164 -- we'd have to double up to 4 bits of metadata per byte, which doesn't
168 /* --------------- Basic configuration --------------- */
170 /* Only change this. N_PRIMARY_MAP *must* be a power of 2. */
174 /* cover the entire address space */
175 # define N_PRIMARY_BITS 16
179 /* Just handle the first 64G fast and the rest via auxiliary
180 primaries. If you change this, Memcheck will assert at startup.
181 See the definition of UNALIGNED_OR_HIGH for extensive comments. */
182 # define N_PRIMARY_BITS 20
187 /* Do not change this. */
188 #define N_PRIMARY_MAP ( ((UWord)1) << N_PRIMARY_BITS)
190 /* Do not change this. */
191 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
194 /* --------------- Secondary maps --------------- */
196 // Each byte of memory conceptually has an A bit, which indicates its
197 // addressability, and 8 V bits, which indicates its definedness.
199 // But because very few bytes are partially defined, we can use a nice
200 // compression scheme to reduce the size of shadow memory. Each byte of
201 // memory has 2 bits which indicates its state (ie. V+A bits):
203 // 00: noaccess (unaddressable but treated as fully defined)
204 // 01: undefined (addressable and fully undefined)
205 // 10: defined (addressable and fully defined)
206 // 11: partdefined (addressable and partially defined)
208 // In the "partdefined" case, we use a secondary table to store the V bits.
209 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
212 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
213 // four bytes (32 bits) of memory are in each chunk. Hence the name
214 // "vabits8". This lets us get the V+A bits for four bytes at a time
215 // easily (without having to do any shifting and/or masking), and that is a
216 // very common operation. (Note that although each vabits8 chunk
217 // is 8 bits in size, it represents 32 bits of memory.)
219 // The representation is "inverse" little-endian... each 4 bytes of
220 // memory is represented by a 1 byte value, where:
222 // - the status of byte (a+0) is held in bits [1..0]
223 // - the status of byte (a+1) is held in bits [3..2]
224 // - the status of byte (a+2) is held in bits [5..4]
225 // - the status of byte (a+3) is held in bits [7..6]
227 // It's "inverse" because endianness normally describes a mapping from
228 // value bits to memory addresses; in this case the mapping is inverted.
229 // Ie. instead of particular value bits being held in certain addresses, in
230 // this case certain addresses are represented by particular value bits.
231 // See insert_vabits2_into_vabits8() for an example.
233 // But note that we don't compress the V bits stored in registers; they
234 // need to be explicit to made the shadow operations possible. Therefore
235 // when moving values between registers and memory we need to convert
236 // between the expanded in-register format and the compressed in-memory
237 // format. This isn't so difficult, it just requires careful attention in a
240 // These represent eight bits of memory.
241 #define VA_BITS2_NOACCESS 0x0 // 00b
242 #define VA_BITS2_UNDEFINED 0x1 // 01b
243 #define VA_BITS2_DEFINED 0x2 // 10b
244 #define VA_BITS2_PARTDEFINED 0x3 // 11b
246 // These represent 16 bits of memory.
247 #define VA_BITS4_NOACCESS 0x0 // 00_00b
248 #define VA_BITS4_UNDEFINED 0x5 // 01_01b
249 #define VA_BITS4_DEFINED 0xa // 10_10b
251 // These represent 32 bits of memory.
252 #define VA_BITS8_NOACCESS 0x00 // 00_00_00_00b
253 #define VA_BITS8_UNDEFINED 0x55 // 01_01_01_01b
254 #define VA_BITS8_DEFINED 0xaa // 10_10_10_10b
256 // These represent 64 bits of memory.
257 #define VA_BITS16_NOACCESS 0x0000 // 00_00_00_00b x 2
258 #define VA_BITS16_UNDEFINED 0x5555 // 01_01_01_01b x 2
259 #define VA_BITS16_DEFINED 0xaaaa // 10_10_10_10b x 2
261 // These represent 128 bits of memory.
262 #define VA_BITS32_UNDEFINED 0x55555555 // 01_01_01_01b x 4
265 #define SM_CHUNKS 16384 // Each SM covers 64k of memory.
266 #define SM_OFF(aaa) (((aaa) & 0xffff) >> 2)
267 #define SM_OFF_16(aaa) (((aaa) & 0xffff) >> 3)
269 // Paranoia: it's critical for performance that the requested inlining
270 // occurs. So try extra hard.
271 #define INLINE inline __attribute__((always_inline))
273 static INLINE Addr
start_of_this_sm ( Addr a
) {
274 return (a
& (~SM_MASK
));
276 static INLINE Bool
is_start_of_sm ( Addr a
) {
277 return (start_of_this_sm(a
) == a
);
282 UChar vabits8
[SM_CHUNKS
];
286 // 3 distinguished secondary maps, one for no-access, one for
287 // accessible but undefined, and one for accessible and defined.
288 // Distinguished secondaries may never be modified.
289 #define SM_DIST_NOACCESS 0
290 #define SM_DIST_UNDEFINED 1
291 #define SM_DIST_DEFINED 2
293 static SecMap sm_distinguished
[3];
295 static INLINE Bool
is_distinguished_sm ( SecMap
* sm
) {
296 return sm
>= &sm_distinguished
[0] && sm
<= &sm_distinguished
[2];
299 // Forward declaration
300 static void update_SM_counts(SecMap
* oldSM
, SecMap
* newSM
);
302 /* dist_sm points to one of our three distinguished secondaries. Make
303 a copy of it so that we can write to it.
305 static SecMap
* copy_for_writing ( SecMap
* dist_sm
)
308 tl_assert(dist_sm
== &sm_distinguished
[0]
309 || dist_sm
== &sm_distinguished
[1]
310 || dist_sm
== &sm_distinguished
[2]);
312 new_sm
= VG_(am_shadow_alloc
)(sizeof(SecMap
));
314 VG_(out_of_memory_NORETURN
)( "memcheck:allocate new SecMap",
316 VG_(memcpy
)(new_sm
, dist_sm
, sizeof(SecMap
));
317 update_SM_counts(dist_sm
, new_sm
);
321 /* --------------- Stats --------------- */
323 static Int n_issued_SMs
= 0;
324 static Int n_deissued_SMs
= 0;
325 static Int n_noaccess_SMs
= N_PRIMARY_MAP
; // start with many noaccess DSMs
326 static Int n_undefined_SMs
= 0;
327 static Int n_defined_SMs
= 0;
328 static Int n_non_DSM_SMs
= 0;
329 static Int max_noaccess_SMs
= 0;
330 static Int max_undefined_SMs
= 0;
331 static Int max_defined_SMs
= 0;
332 static Int max_non_DSM_SMs
= 0;
334 /* # searches initiated in auxmap_L1, and # base cmps required */
335 static ULong n_auxmap_L1_searches
= 0;
336 static ULong n_auxmap_L1_cmps
= 0;
337 /* # of searches that missed in auxmap_L1 and therefore had to
338 be handed to auxmap_L2. And the number of nodes inserted. */
339 static ULong n_auxmap_L2_searches
= 0;
340 static ULong n_auxmap_L2_nodes
= 0;
342 static Int n_sanity_cheap
= 0;
343 static Int n_sanity_expensive
= 0;
345 static Int n_secVBit_nodes
= 0;
346 static Int max_secVBit_nodes
= 0;
348 static void update_SM_counts(SecMap
* oldSM
, SecMap
* newSM
)
350 if (oldSM
== &sm_distinguished
[SM_DIST_NOACCESS
]) n_noaccess_SMs
--;
351 else if (oldSM
== &sm_distinguished
[SM_DIST_UNDEFINED
]) n_undefined_SMs
--;
352 else if (oldSM
== &sm_distinguished
[SM_DIST_DEFINED
]) n_defined_SMs
--;
353 else { n_non_DSM_SMs
--;
356 if (newSM
== &sm_distinguished
[SM_DIST_NOACCESS
]) n_noaccess_SMs
++;
357 else if (newSM
== &sm_distinguished
[SM_DIST_UNDEFINED
]) n_undefined_SMs
++;
358 else if (newSM
== &sm_distinguished
[SM_DIST_DEFINED
]) n_defined_SMs
++;
359 else { n_non_DSM_SMs
++;
362 if (n_noaccess_SMs
> max_noaccess_SMs
) max_noaccess_SMs
= n_noaccess_SMs
;
363 if (n_undefined_SMs
> max_undefined_SMs
) max_undefined_SMs
= n_undefined_SMs
;
364 if (n_defined_SMs
> max_defined_SMs
) max_defined_SMs
= n_defined_SMs
;
365 if (n_non_DSM_SMs
> max_non_DSM_SMs
) max_non_DSM_SMs
= n_non_DSM_SMs
;
368 /* --------------- Primary maps --------------- */
370 /* The main primary map. This covers some initial part of the address
371 space, addresses 0 .. (N_PRIMARY_MAP << 16)-1. The rest of it is
372 handled using the auxiliary primary map.
374 static SecMap
* primary_map
[N_PRIMARY_MAP
];
377 /* An entry in the auxiliary primary map. base must be a 64k-aligned
378 value, and sm points at the relevant secondary map. As with the
379 main primary map, the secondary may be either a real secondary, or
380 one of the three distinguished secondaries. DO NOT CHANGE THIS
381 LAYOUT: the first word has to be the key for OSet fast lookups.
390 /* Tunable parameter: How big is the L1 queue? */
391 #define N_AUXMAP_L1 24
393 /* Tunable parameter: How far along the L1 queue to insert
394 entries resulting from L2 lookups? */
395 #define AUXMAP_L1_INSERT_IX 12
399 AuxMapEnt
* ent
; // pointer to the matching auxmap_L2 node
401 auxmap_L1
[N_AUXMAP_L1
];
403 static OSet
* auxmap_L2
= NULL
;
405 static void init_auxmap_L1_L2 ( void )
408 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
409 auxmap_L1
[i
].base
= 0;
410 auxmap_L1
[i
].ent
= NULL
;
413 tl_assert(0 == offsetof(AuxMapEnt
,base
));
414 tl_assert(sizeof(Addr
) == sizeof(void*));
415 auxmap_L2
= VG_(OSetGen_Create
)( /*keyOff*/ offsetof(AuxMapEnt
,base
),
417 VG_(malloc
), "mc.iaLL.1", VG_(free
) );
420 /* Check representation invariants; if OK return NULL; else a
421 descriptive bit of text. Also return the number of
422 non-distinguished secondary maps referred to from the auxiliary
425 static const HChar
* check_auxmap_L1_L2_sanity ( Word
* n_secmaps_found
)
428 /* On a 32-bit platform, the L2 and L1 tables should
429 both remain empty forever.
431 On a 64-bit platform:
433 all .base & 0xFFFF == 0
434 all .base > MAX_PRIMARY_ADDRESS
436 all .base & 0xFFFF == 0
437 all (.base > MAX_PRIMARY_ADDRESS
439 and .ent points to an AuxMapEnt with the same .base)
441 (.base == 0 and .ent == NULL)
443 *n_secmaps_found
= 0;
444 if (sizeof(void*) == 4) {
445 /* 32-bit platform */
446 if (VG_(OSetGen_Size
)(auxmap_L2
) != 0)
447 return "32-bit: auxmap_L2 is non-empty";
448 for (i
= 0; i
< N_AUXMAP_L1
; i
++)
449 if (auxmap_L1
[i
].base
!= 0 || auxmap_L1
[i
].ent
!= NULL
)
450 return "32-bit: auxmap_L1 is non-empty";
452 /* 64-bit platform */
453 UWord elems_seen
= 0;
454 AuxMapEnt
*elem
, *res
;
457 VG_(OSetGen_ResetIter
)(auxmap_L2
);
458 while ( (elem
= VG_(OSetGen_Next
)(auxmap_L2
)) ) {
460 if (0 != (elem
->base
& (Addr
)0xFFFF))
461 return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
462 if (elem
->base
<= MAX_PRIMARY_ADDRESS
)
463 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
464 if (elem
->sm
== NULL
)
465 return "64-bit: .sm in _L2 is NULL";
466 if (!is_distinguished_sm(elem
->sm
))
467 (*n_secmaps_found
)++;
469 if (elems_seen
!= n_auxmap_L2_nodes
)
470 return "64-bit: disagreement on number of elems in _L2";
471 /* Check L1-L2 correspondence */
472 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
473 if (auxmap_L1
[i
].base
== 0 && auxmap_L1
[i
].ent
== NULL
)
475 if (0 != (auxmap_L1
[i
].base
& (Addr
)0xFFFF))
476 return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
477 if (auxmap_L1
[i
].base
<= MAX_PRIMARY_ADDRESS
)
478 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
479 if (auxmap_L1
[i
].ent
== NULL
)
480 return "64-bit: .ent is NULL in auxmap_L1";
481 if (auxmap_L1
[i
].ent
->base
!= auxmap_L1
[i
].base
)
482 return "64-bit: _L1 and _L2 bases are inconsistent";
483 /* Look it up in auxmap_L2. */
484 key
.base
= auxmap_L1
[i
].base
;
486 res
= VG_(OSetGen_Lookup
)(auxmap_L2
, &key
);
488 return "64-bit: _L1 .base not found in _L2";
489 if (res
!= auxmap_L1
[i
].ent
)
490 return "64-bit: _L1 .ent disagrees with _L2 entry";
492 /* Check L1 contains no duplicates */
493 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
494 if (auxmap_L1
[i
].base
== 0)
496 for (j
= i
+1; j
< N_AUXMAP_L1
; j
++) {
497 if (auxmap_L1
[j
].base
== 0)
499 if (auxmap_L1
[j
].base
== auxmap_L1
[i
].base
)
500 return "64-bit: duplicate _L1 .base entries";
504 return NULL
; /* ok */
507 static void insert_into_auxmap_L1_at ( Word rank
, AuxMapEnt
* ent
)
511 tl_assert(rank
>= 0 && rank
< N_AUXMAP_L1
);
512 for (i
= N_AUXMAP_L1
-1; i
> rank
; i
--)
513 auxmap_L1
[i
] = auxmap_L1
[i
-1];
514 auxmap_L1
[rank
].base
= ent
->base
;
515 auxmap_L1
[rank
].ent
= ent
;
518 static INLINE AuxMapEnt
* maybe_find_in_auxmap ( Addr a
)
524 tl_assert(a
> MAX_PRIMARY_ADDRESS
);
527 /* First search the front-cache, which is a self-organising
528 list containing the most popular entries. */
530 if (LIKELY(auxmap_L1
[0].base
== a
))
531 return auxmap_L1
[0].ent
;
532 if (LIKELY(auxmap_L1
[1].base
== a
)) {
533 Addr t_base
= auxmap_L1
[0].base
;
534 AuxMapEnt
* t_ent
= auxmap_L1
[0].ent
;
535 auxmap_L1
[0].base
= auxmap_L1
[1].base
;
536 auxmap_L1
[0].ent
= auxmap_L1
[1].ent
;
537 auxmap_L1
[1].base
= t_base
;
538 auxmap_L1
[1].ent
= t_ent
;
539 return auxmap_L1
[0].ent
;
542 n_auxmap_L1_searches
++;
544 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
545 if (auxmap_L1
[i
].base
== a
) {
549 tl_assert(i
>= 0 && i
<= N_AUXMAP_L1
);
551 n_auxmap_L1_cmps
+= (ULong
)(i
+1);
553 if (i
< N_AUXMAP_L1
) {
555 Addr t_base
= auxmap_L1
[i
-1].base
;
556 AuxMapEnt
* t_ent
= auxmap_L1
[i
-1].ent
;
557 auxmap_L1
[i
-1].base
= auxmap_L1
[i
-0].base
;
558 auxmap_L1
[i
-1].ent
= auxmap_L1
[i
-0].ent
;
559 auxmap_L1
[i
-0].base
= t_base
;
560 auxmap_L1
[i
-0].ent
= t_ent
;
563 return auxmap_L1
[i
].ent
;
566 n_auxmap_L2_searches
++;
568 /* First see if we already have it. */
572 res
= VG_(OSetGen_Lookup
)(auxmap_L2
, &key
);
574 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX
, res
);
578 static AuxMapEnt
* find_or_alloc_in_auxmap ( Addr a
)
580 AuxMapEnt
*nyu
, *res
;
582 /* First see if we already have it. */
583 res
= maybe_find_in_auxmap( a
);
587 /* Ok, there's no entry in the secondary map, so we'll have
591 nyu
= (AuxMapEnt
*) VG_(OSetGen_AllocNode
)( auxmap_L2
, sizeof(AuxMapEnt
) );
593 nyu
->sm
= &sm_distinguished
[SM_DIST_NOACCESS
];
594 VG_(OSetGen_Insert
)( auxmap_L2
, nyu
);
595 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX
, nyu
);
600 /* --------------- SecMap fundamentals --------------- */
602 // In all these, 'low' means it's definitely in the main primary map,
603 // 'high' means it's definitely in the auxiliary table.
605 static INLINE UWord
get_primary_map_low_offset ( Addr a
)
607 UWord pm_off
= a
>> 16;
611 static INLINE SecMap
** get_secmap_low_ptr ( Addr a
)
613 UWord pm_off
= a
>> 16;
614 # if VG_DEBUG_MEMORY >= 1
615 tl_assert(pm_off
< N_PRIMARY_MAP
);
617 return &primary_map
[ pm_off
];
620 static INLINE SecMap
** get_secmap_high_ptr ( Addr a
)
622 AuxMapEnt
* am
= find_or_alloc_in_auxmap(a
);
626 static INLINE SecMap
** get_secmap_ptr ( Addr a
)
628 return ( a
<= MAX_PRIMARY_ADDRESS
629 ? get_secmap_low_ptr(a
)
630 : get_secmap_high_ptr(a
));
633 static INLINE SecMap
* get_secmap_for_reading_low ( Addr a
)
635 return *get_secmap_low_ptr(a
);
638 static INLINE SecMap
* get_secmap_for_reading_high ( Addr a
)
640 return *get_secmap_high_ptr(a
);
643 static INLINE SecMap
* get_secmap_for_writing_low(Addr a
)
645 SecMap
** p
= get_secmap_low_ptr(a
);
646 if (UNLIKELY(is_distinguished_sm(*p
)))
647 *p
= copy_for_writing(*p
);
651 static INLINE SecMap
* get_secmap_for_writing_high ( Addr a
)
653 SecMap
** p
= get_secmap_high_ptr(a
);
654 if (UNLIKELY(is_distinguished_sm(*p
)))
655 *p
= copy_for_writing(*p
);
659 /* Produce the secmap for 'a', either from the primary map or by
660 ensuring there is an entry for it in the aux primary map. The
661 secmap may be a distinguished one as the caller will only want to
664 static INLINE SecMap
* get_secmap_for_reading ( Addr a
)
666 return ( a
<= MAX_PRIMARY_ADDRESS
667 ? get_secmap_for_reading_low (a
)
668 : get_secmap_for_reading_high(a
) );
671 /* Produce the secmap for 'a', either from the primary map or by
672 ensuring there is an entry for it in the aux primary map. The
673 secmap may not be a distinguished one, since the caller will want
674 to be able to write it. If it is a distinguished secondary, make a
675 writable copy of it, install it, and return the copy instead. (COW
678 static INLINE SecMap
* get_secmap_for_writing ( Addr a
)
680 return ( a
<= MAX_PRIMARY_ADDRESS
681 ? get_secmap_for_writing_low (a
)
682 : get_secmap_for_writing_high(a
) );
685 /* If 'a' has a SecMap, produce it. Else produce NULL. But don't
686 allocate one if one doesn't already exist. This is used by the
689 static SecMap
* maybe_get_secmap_for ( Addr a
)
691 if (a
<= MAX_PRIMARY_ADDRESS
) {
692 return get_secmap_for_reading_low(a
);
694 AuxMapEnt
* am
= maybe_find_in_auxmap(a
);
695 return am
? am
->sm
: NULL
;
699 /* --------------- Fundamental functions --------------- */
702 void insert_vabits2_into_vabits8 ( Addr a
, UChar vabits2
, UChar
* vabits8
)
704 UInt shift
= (a
& 3) << 1; // shift by 0, 2, 4, or 6
705 *vabits8
&= ~(0x3 << shift
); // mask out the two old bits
706 *vabits8
|= (vabits2
<< shift
); // mask in the two new bits
710 void insert_vabits4_into_vabits8 ( Addr a
, UChar vabits4
, UChar
* vabits8
)
713 tl_assert(VG_IS_2_ALIGNED(a
)); // Must be 2-aligned
714 shift
= (a
& 2) << 1; // shift by 0 or 4
715 *vabits8
&= ~(0xf << shift
); // mask out the four old bits
716 *vabits8
|= (vabits4
<< shift
); // mask in the four new bits
720 UChar
extract_vabits2_from_vabits8 ( Addr a
, UChar vabits8
)
722 UInt shift
= (a
& 3) << 1; // shift by 0, 2, 4, or 6
723 vabits8
>>= shift
; // shift the two bits to the bottom
724 return 0x3 & vabits8
; // mask out the rest
728 UChar
extract_vabits4_from_vabits8 ( Addr a
, UChar vabits8
)
731 tl_assert(VG_IS_2_ALIGNED(a
)); // Must be 2-aligned
732 shift
= (a
& 2) << 1; // shift by 0 or 4
733 vabits8
>>= shift
; // shift the four bits to the bottom
734 return 0xf & vabits8
; // mask out the rest
737 // Note that these four are only used in slow cases. The fast cases do
738 // clever things like combine the auxmap check (in
739 // get_secmap_{read,writ}able) with alignment checks.
742 // Any time this function is called, if it is possible that vabits2
743 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
744 // sec-V-bits table must also be set!
746 void set_vabits2 ( Addr a
, UChar vabits2
)
748 SecMap
* sm
= get_secmap_for_writing(a
);
749 UWord sm_off
= SM_OFF(a
);
750 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
754 UChar
get_vabits2 ( Addr a
)
756 SecMap
* sm
= get_secmap_for_reading(a
);
757 UWord sm_off
= SM_OFF(a
);
758 UChar vabits8
= sm
->vabits8
[sm_off
];
759 return extract_vabits2_from_vabits8(a
, vabits8
);
763 // Any time this function is called, if it is possible that any of the
764 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
765 // corresponding entry(s) in the sec-V-bits table must also be set!
767 UChar
get_vabits8_for_aligned_word32 ( Addr a
)
769 SecMap
* sm
= get_secmap_for_reading(a
);
770 UWord sm_off
= SM_OFF(a
);
771 UChar vabits8
= sm
->vabits8
[sm_off
];
776 void set_vabits8_for_aligned_word32 ( Addr a
, UChar vabits8
)
778 SecMap
* sm
= get_secmap_for_writing(a
);
779 UWord sm_off
= SM_OFF(a
);
780 sm
->vabits8
[sm_off
] = vabits8
;
784 // Forward declarations
785 static UWord
get_sec_vbits8(Addr a
);
786 static void set_sec_vbits8(Addr a
, UWord vbits8
);
788 // Returns False if there was an addressability error.
790 Bool
set_vbits8 ( Addr a
, UChar vbits8
)
793 UChar vabits2
= get_vabits2(a
);
794 if ( VA_BITS2_NOACCESS
!= vabits2
) {
795 // Addressable. Convert in-register format to in-memory format.
796 // Also remove any existing sec V bit entry for the byte if no
798 if ( V_BITS8_DEFINED
== vbits8
) { vabits2
= VA_BITS2_DEFINED
; }
799 else if ( V_BITS8_UNDEFINED
== vbits8
) { vabits2
= VA_BITS2_UNDEFINED
; }
800 else { vabits2
= VA_BITS2_PARTDEFINED
;
801 set_sec_vbits8(a
, vbits8
); }
802 set_vabits2(a
, vabits2
);
805 // Unaddressable! Do nothing -- when writing to unaddressable
806 // memory it acts as a black hole, and the V bits can never be seen
807 // again. So we don't have to write them at all.
813 // Returns False if there was an addressability error. In that case, we put
814 // all defined bits into vbits8.
816 Bool
get_vbits8 ( Addr a
, UChar
* vbits8
)
819 UChar vabits2
= get_vabits2(a
);
821 // Convert the in-memory format to in-register format.
822 if ( VA_BITS2_DEFINED
== vabits2
) { *vbits8
= V_BITS8_DEFINED
; }
823 else if ( VA_BITS2_UNDEFINED
== vabits2
) { *vbits8
= V_BITS8_UNDEFINED
; }
824 else if ( VA_BITS2_NOACCESS
== vabits2
) {
825 *vbits8
= V_BITS8_DEFINED
; // Make V bits defined!
828 tl_assert( VA_BITS2_PARTDEFINED
== vabits2
);
829 *vbits8
= get_sec_vbits8(a
);
835 /* --------------- Secondary V bit table ------------ */
837 // This table holds the full V bit pattern for partially-defined bytes
838 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
841 // Note: the nodes in this table can become stale. Eg. if you write a PDB,
842 // then overwrite the same address with a fully defined byte, the sec-V-bit
843 // node will not necessarily be removed. This is because checking for
844 // whether removal is necessary would slow down the fast paths.
846 // To avoid the stale nodes building up too much, we periodically (once the
847 // table reaches a certain size) garbage collect (GC) the table by
848 // traversing it and evicting any nodes not having PDB.
849 // If more than a certain proportion of nodes survived, we increase the
850 // table size so that GCs occur less often.
852 // This policy is designed to avoid bad table bloat in the worst case where
853 // a program creates huge numbers of stale PDBs -- we would get this bloat
854 // if we had no GC -- while handling well the case where a node becomes
855 // stale but shortly afterwards is rewritten with a PDB and so becomes
856 // non-stale again (which happens quite often, eg. in perf/bz2). If we just
857 // remove all stale nodes as soon as possible, we just end up re-adding a
858 // lot of them in later again. The "sufficiently stale" approach avoids
859 // this. (If a program has many live PDBs, performance will just suck,
860 // there's no way around that.)
862 // Further comments, JRS 14 Feb 2012. It turns out that the policy of
863 // holding on to stale entries for 2 GCs before discarding them can lead
864 // to massive space leaks. So we're changing to an arrangement where
865 // lines are evicted as soon as they are observed to be stale during a
866 // GC. This also has a side benefit of allowing the sufficiently_stale
867 // field to be removed from the SecVBitNode struct, reducing its size by
868 // 8 bytes, which is a substantial space saving considering that the
869 // struct was previously 32 or so bytes, on a 64 bit target.
871 // In order to try and mitigate the problem that the "sufficiently stale"
872 // heuristic was designed to avoid, the table size is allowed to drift
873 // up ("DRIFTUP") slowly to 80000, even if the residency is low. This
874 // means that nodes will exist in the table longer on average, and hopefully
875 // will be deleted and re-added less frequently.
877 // The previous scaling up mechanism (now called STEPUP) is retained:
878 // if residency exceeds 50%, the table is scaled up, although by a
879 // factor sqrt(2) rather than 2 as before. This effectively doubles the
880 // frequency of GCs when there are many PDBs at reduces the tendency of
881 // stale PDBs to reside for long periods in the table.
883 static OSet
* secVBitTable
;
886 static ULong sec_vbits_new_nodes
= 0;
887 static ULong sec_vbits_updates
= 0;
889 // This must be a power of two; this is checked in mc_pre_clo_init().
890 // The size chosen here is a trade-off: if the nodes are bigger (ie. cover
891 // a larger address range) they take more space but we can get multiple
892 // partially-defined bytes in one if they are close to each other, reducing
893 // the number of total nodes. In practice sometimes they are clustered (eg.
894 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
895 // row), but often not. So we choose something intermediate.
896 #define BYTES_PER_SEC_VBIT_NODE 16
898 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
899 // more than this many nodes survive a GC.
900 #define STEPUP_SURVIVOR_PROPORTION 0.5
901 #define STEPUP_GROWTH_FACTOR 1.414213562
903 // If the above heuristic doesn't apply, then we may make the table
904 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
905 // this many nodes survive a GC, _and_ the total table size does
906 // not exceed a fixed limit. The numbers are somewhat arbitrary, but
907 // work tolerably well on long Firefox runs. The scaleup ratio of 1.5%
908 // effectively although gradually reduces residency and increases time
909 // between GCs for programs with small numbers of PDBs. The 80000 limit
910 // effectively limits the table size to around 2MB for programs with
911 // small numbers of PDBs, whilst giving a reasonably long lifetime to
912 // entries, to try and reduce the costs resulting from deleting and
913 // re-adding of entries.
914 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
915 #define DRIFTUP_GROWTH_FACTOR 1.015
916 #define DRIFTUP_MAX_SIZE 80000
918 // We GC the table when it gets this many nodes in it, ie. it's effectively
919 // the table size. It can change.
920 static Int secVBitLimit
= 1000;
922 // The number of GCs done, used to age sec-V-bit nodes for eviction.
923 // Because it's unsigned, wrapping doesn't matter -- the right answer will
925 static UInt GCs_done
= 0;
930 UChar vbits8
[BYTES_PER_SEC_VBIT_NODE
];
934 static OSet
* createSecVBitTable(void)
936 OSet
* newSecVBitTable
;
937 newSecVBitTable
= VG_(OSetGen_Create_With_Pool
)
938 ( offsetof(SecVBitNode
, a
),
939 NULL
, // use fast comparisons
940 VG_(malloc
), "mc.cSVT.1 (sec VBit table)",
943 sizeof(SecVBitNode
));
944 return newSecVBitTable
;
947 static void gcSecVBitTable(void)
951 Int i
, n_nodes
= 0, n_survivors
= 0;
955 // Create the new table.
956 secVBitTable2
= createSecVBitTable();
958 // Traverse the table, moving fresh nodes into the new table.
959 VG_(OSetGen_ResetIter
)(secVBitTable
);
960 while ( (n
= VG_(OSetGen_Next
)(secVBitTable
)) ) {
961 // Keep node if any of its bytes are non-stale. Using
962 // get_vabits2() for the lookup is not very efficient, but I don't
964 for (i
= 0; i
< BYTES_PER_SEC_VBIT_NODE
; i
++) {
965 if (VA_BITS2_PARTDEFINED
== get_vabits2(n
->a
+ i
)) {
966 // Found a non-stale byte, so keep =>
967 // Insert a copy of the node into the new table.
969 VG_(OSetGen_AllocNode
)(secVBitTable2
, sizeof(SecVBitNode
));
971 VG_(OSetGen_Insert
)(secVBitTable2
, n2
);
977 // Get the before and after sizes.
978 n_nodes
= VG_(OSetGen_Size
)(secVBitTable
);
979 n_survivors
= VG_(OSetGen_Size
)(secVBitTable2
);
981 // Destroy the old table, and put the new one in its place.
982 VG_(OSetGen_Destroy
)(secVBitTable
);
983 secVBitTable
= secVBitTable2
;
985 if (VG_(clo_verbosity
) > 1 && n_nodes
!= 0) {
986 VG_(message
)(Vg_DebugMsg
, "memcheck GC: %d nodes, %d survivors (%.1f%%)\n",
987 n_nodes
, n_survivors
, n_survivors
* 100.0 / n_nodes
);
990 // Increase table size if necessary.
991 if ((Double
)n_survivors
992 > ((Double
)secVBitLimit
* STEPUP_SURVIVOR_PROPORTION
)) {
993 secVBitLimit
= (Int
)((Double
)secVBitLimit
* (Double
)STEPUP_GROWTH_FACTOR
);
994 if (VG_(clo_verbosity
) > 1)
995 VG_(message
)(Vg_DebugMsg
,
996 "memcheck GC: %d new table size (stepup)\n",
1000 if (secVBitLimit
< DRIFTUP_MAX_SIZE
1001 && (Double
)n_survivors
1002 > ((Double
)secVBitLimit
* DRIFTUP_SURVIVOR_PROPORTION
)) {
1003 secVBitLimit
= (Int
)((Double
)secVBitLimit
* (Double
)DRIFTUP_GROWTH_FACTOR
);
1004 if (VG_(clo_verbosity
) > 1)
1005 VG_(message
)(Vg_DebugMsg
,
1006 "memcheck GC: %d new table size (driftup)\n",
1011 static UWord
get_sec_vbits8(Addr a
)
1013 Addr aAligned
= VG_ROUNDDN(a
, BYTES_PER_SEC_VBIT_NODE
);
1014 Int amod
= a
% BYTES_PER_SEC_VBIT_NODE
;
1015 SecVBitNode
* n
= VG_(OSetGen_Lookup
)(secVBitTable
, &aAligned
);
1017 tl_assert2(n
, "get_sec_vbits8: no node for address %p (%p)\n", aAligned
, a
);
1018 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1019 // make it to the secondary V bits table.
1020 vbits8
= n
->vbits8
[amod
];
1021 tl_assert(V_BITS8_DEFINED
!= vbits8
&& V_BITS8_UNDEFINED
!= vbits8
);
1025 static void set_sec_vbits8(Addr a
, UWord vbits8
)
1027 Addr aAligned
= VG_ROUNDDN(a
, BYTES_PER_SEC_VBIT_NODE
);
1028 Int i
, amod
= a
% BYTES_PER_SEC_VBIT_NODE
;
1029 SecVBitNode
* n
= VG_(OSetGen_Lookup
)(secVBitTable
, &aAligned
);
1030 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1031 // make it to the secondary V bits table.
1032 tl_assert(V_BITS8_DEFINED
!= vbits8
&& V_BITS8_UNDEFINED
!= vbits8
);
1034 n
->vbits8
[amod
] = vbits8
; // update
1035 sec_vbits_updates
++;
1037 // Do a table GC if necessary. Nb: do this before creating and
1038 // inserting the new node, to avoid erroneously GC'ing the new node.
1039 if (secVBitLimit
== VG_(OSetGen_Size
)(secVBitTable
)) {
1043 // New node: assign the specific byte, make the rest invalid (they
1044 // should never be read as-is, but be cautious).
1045 n
= VG_(OSetGen_AllocNode
)(secVBitTable
, sizeof(SecVBitNode
));
1047 for (i
= 0; i
< BYTES_PER_SEC_VBIT_NODE
; i
++) {
1048 n
->vbits8
[i
] = V_BITS8_UNDEFINED
;
1050 n
->vbits8
[amod
] = vbits8
;
1052 // Insert the new node.
1053 VG_(OSetGen_Insert
)(secVBitTable
, n
);
1054 sec_vbits_new_nodes
++;
1056 n_secVBit_nodes
= VG_(OSetGen_Size
)(secVBitTable
);
1057 if (n_secVBit_nodes
> max_secVBit_nodes
)
1058 max_secVBit_nodes
= n_secVBit_nodes
;
1062 /* --------------- Endianness helpers --------------- */
1064 /* Returns the offset in memory of the byteno-th most significant byte
1065 in a wordszB-sized word, given the specified endianness. */
1066 static INLINE UWord
byte_offset_w ( UWord wordszB
, Bool bigendian
,
1068 return bigendian
? (wordszB
-1-byteno
) : byteno
;
1072 /* --------------- Ignored address ranges --------------- */
1074 /* Denotes the address-error-reportability status for address ranges:
1075 IAR_NotIgnored: the usual case -- report errors in this range
1076 IAR_CommandLine: don't report errors -- from command line setting
1077 IAR_ClientReq: don't report errors -- from client request
1080 enum { IAR_INVALID
=99,
1086 static const HChar
* showIARKind ( IARKind iark
)
1089 case IAR_INVALID
: return "INVALID";
1090 case IAR_NotIgnored
: return "NotIgnored";
1091 case IAR_CommandLine
: return "CommandLine";
1092 case IAR_ClientReq
: return "ClientReq";
1093 default: return "???";
1097 // RangeMap<IARKind>
1098 static RangeMap
* gIgnoredAddressRanges
= NULL
;
1100 static void init_gIgnoredAddressRanges ( void )
1102 if (LIKELY(gIgnoredAddressRanges
!= NULL
))
1104 gIgnoredAddressRanges
= VG_(newRangeMap
)( VG_(malloc
), "mc.igIAR.1",
1105 VG_(free
), IAR_NotIgnored
);
1108 Bool
MC_(in_ignored_range
) ( Addr a
)
1110 if (LIKELY(gIgnoredAddressRanges
== NULL
))
1112 UWord how
= IAR_INVALID
;
1113 UWord key_min
= ~(UWord
)0;
1114 UWord key_max
= (UWord
)0;
1115 VG_(lookupRangeMap
)(&key_min
, &key_max
, &how
, gIgnoredAddressRanges
, a
);
1116 tl_assert(key_min
<= a
&& a
<= key_max
);
1118 case IAR_NotIgnored
: return False
;
1119 case IAR_CommandLine
: return True
;
1120 case IAR_ClientReq
: return True
;
1121 default: break; /* invalid */
1123 VG_(tool_panic
)("MC_(in_ignore_range)");
1127 Bool
MC_(in_ignored_range_below_sp
) ( Addr sp
, Addr a
, UInt szB
)
1129 if (LIKELY(!MC_(clo_ignore_range_below_sp
)))
1131 tl_assert(szB
>= 1 && szB
<= 32);
1132 tl_assert(MC_(clo_ignore_range_below_sp__first_offset
)
1133 > MC_(clo_ignore_range_below_sp__last_offset
));
1134 Addr range_lo
= sp
- MC_(clo_ignore_range_below_sp__first_offset
);
1135 Addr range_hi
= sp
- MC_(clo_ignore_range_below_sp__last_offset
);
1136 if (range_lo
>= range_hi
) {
1137 /* Bizarre. We have a wraparound situation. What should we do? */
1138 return False
; // Play safe
1140 /* This is the expected case. */
1141 if (range_lo
<= a
&& a
+ szB
- 1 <= range_hi
)
1150 /* Parse two Addrs (in hex) separated by a dash, or fail. */
1152 static Bool
parse_Addr_pair ( const HChar
** ppc
, Addr
* result1
, Addr
* result2
)
1154 Bool ok
= VG_(parse_Addr
) (ppc
, result1
);
1160 ok
= VG_(parse_Addr
) (ppc
, result2
);
1166 /* Parse two UInts (32 bit unsigned, in decimal) separated by a dash,
1169 static Bool
parse_UInt_pair ( const HChar
** ppc
, UInt
* result1
, UInt
* result2
)
1171 Bool ok
= VG_(parse_UInt
) (ppc
, result1
);
1177 ok
= VG_(parse_UInt
) (ppc
, result2
);
1183 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1184 fail. If they are valid, add them to the global set of ignored
1186 static Bool
parse_ignore_ranges ( const HChar
* str0
)
1188 init_gIgnoredAddressRanges();
1189 const HChar
* str
= str0
;
1190 const HChar
** ppc
= &str
;
1192 Addr start
= ~(Addr
)0;
1194 Bool ok
= parse_Addr_pair(ppc
, &start
, &end
);
1199 VG_(bindRangeMap
)( gIgnoredAddressRanges
, start
, end
, IAR_CommandLine
);
1210 /* Add or remove [start, +len) from the set of ignored ranges. */
1211 static Bool
modify_ignore_ranges ( Bool addRange
, Addr start
, Addr len
)
1213 init_gIgnoredAddressRanges();
1214 const Bool verbose
= (VG_(clo_verbosity
) > 1);
1219 VG_(bindRangeMap
)(gIgnoredAddressRanges
,
1220 start
, start
+len
-1, IAR_ClientReq
);
1222 VG_(dmsg
)("memcheck: modify_ignore_ranges: add %p %p\n",
1223 (void*)start
, (void*)(start
+len
-1));
1225 VG_(bindRangeMap
)(gIgnoredAddressRanges
,
1226 start
, start
+len
-1, IAR_NotIgnored
);
1228 VG_(dmsg
)("memcheck: modify_ignore_ranges: del %p %p\n",
1229 (void*)start
, (void*)(start
+len
-1));
1232 VG_(dmsg
)("memcheck: now have %u ranges:\n",
1233 VG_(sizeRangeMap
)(gIgnoredAddressRanges
));
1235 for (i
= 0; i
< VG_(sizeRangeMap
)(gIgnoredAddressRanges
); i
++) {
1236 UWord val
= IAR_INVALID
;
1237 UWord key_min
= ~(UWord
)0;
1238 UWord key_max
= (UWord
)0;
1239 VG_(indexRangeMap
)( &key_min
, &key_max
, &val
,
1240 gIgnoredAddressRanges
, i
);
1241 VG_(dmsg
)("memcheck: [%u] %016lx-%016lx %s\n",
1242 i
, key_min
, key_max
, showIARKind(val
));
1249 /* --------------- Load/store slow cases. --------------- */
1252 __attribute__((noinline
))
1253 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong
* res
,
1254 Addr a
, SizeT nBits
, Bool bigendian
)
1256 ULong pessim
[4]; /* only used when p-l-ok=yes */
1257 SSizeT szB
= nBits
/ 8;
1258 SSizeT szL
= szB
/ 8; /* Size in Longs (64-bit units) */
1259 SSizeT i
, j
; /* Must be signed. */
1260 SizeT n_addrs_bad
= 0;
1265 /* Code below assumes load size is a power of two and at least 64
1267 tl_assert((szB
& (szB
-1)) == 0 && szL
> 0);
1269 /* If this triggers, you probably just need to increase the size of
1270 the pessim array. */
1271 tl_assert(szL
<= sizeof(pessim
) / sizeof(pessim
[0]));
1273 for (j
= 0; j
< szL
; j
++) {
1274 pessim
[j
] = V_BITS64_DEFINED
;
1275 res
[j
] = V_BITS64_UNDEFINED
;
1278 /* Make up a result V word, which contains the loaded data for
1279 valid addresses and Defined for invalid addresses. Iterate over
1280 the bytes in the word, from the most significant down to the
1281 least. The vbits to return are calculated into vbits128. Also
1282 compute the pessimising value to be used when
1283 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1284 info can be gleaned from the pessim array) but is used as a
1286 for (j
= szL
-1; j
>= 0; j
--) {
1287 ULong vbits64
= V_BITS64_UNDEFINED
;
1288 ULong pessim64
= V_BITS64_DEFINED
;
1289 UWord long_index
= byte_offset_w(szL
, bigendian
, j
);
1290 for (i
= 8-1; i
>= 0; i
--) {
1291 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW_LOOP
);
1292 ai
= a
+ 8*long_index
+ byte_offset_w(8, bigendian
, i
);
1293 ok
= get_vbits8(ai
, &vbits8
);
1296 if (!ok
) n_addrs_bad
++;
1298 pessim64
|= (ok
? V_BITS8_DEFINED
: V_BITS8_UNDEFINED
);
1300 res
[long_index
] = vbits64
;
1301 pessim
[long_index
] = pessim64
;
1304 /* In the common case, all the addresses involved are valid, so we
1305 just return the computed V bits and have done. */
1306 if (LIKELY(n_addrs_bad
== 0))
1309 /* If there's no possibility of getting a partial-loads-ok
1310 exemption, report the error and quit. */
1311 if (!MC_(clo_partial_loads_ok
)) {
1312 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1316 /* The partial-loads-ok excemption might apply. Find out if it
1317 does. If so, don't report an addressing error, but do return
1318 Undefined for the bytes that are out of range, so as to avoid
1319 false negatives. If it doesn't apply, just report an addressing
1320 error in the usual way. */
1322 /* Some code steps along byte strings in aligned chunks
1323 even when there is only a partially defined word at the end (eg,
1324 optimised strlen). This is allowed by the memory model of
1325 modern machines, since an aligned load cannot span two pages and
1326 thus cannot "partially fault".
1328 Therefore, a load from a partially-addressible place is allowed
1329 if all of the following hold:
1330 - the command-line flag is set [by default, it isn't]
1331 - it's an aligned load
1332 - at least one of the addresses in the word *is* valid
1334 Since this suppresses the addressing error, we avoid false
1335 negatives by marking bytes undefined when they come from an
1339 /* "at least one of the addresses is invalid" */
1341 for (j
= 0; j
< szL
; j
++)
1342 ok
|= pessim
[j
] != V_BITS64_DEFINED
;
1345 if (0 == (a
& (szB
- 1)) && n_addrs_bad
< szB
) {
1346 /* Exemption applies. Use the previously computed pessimising
1347 value and return the combined result, but don't flag an
1348 addressing error. The pessimising value is Defined for valid
1349 addresses and Undefined for invalid addresses. */
1350 /* for assumption that doing bitwise or implements UifU */
1351 tl_assert(V_BIT_UNDEFINED
== 1 && V_BIT_DEFINED
== 0);
1352 /* (really need "UifU" here...)
1353 vbits[j] UifU= pessim[j] (is pessimised by it, iow) */
1354 for (j
= szL
-1; j
>= 0; j
--)
1355 res
[j
] |= pessim
[j
];
1359 /* Exemption doesn't apply. Flag an addressing error in the normal
1361 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1366 __attribute__((noinline
))
1367 __attribute__((used
))
1368 VG_REGPARM(3) /* make sure we're using a fixed calling convention, since
1369 this function may get called from hand written assembly. */
1370 ULong
mc_LOADVn_slow ( Addr a
, SizeT nBits
, Bool bigendian
)
1372 PROF_EVENT(MCPE_LOADVN_SLOW
);
1374 /* ------------ BEGIN semi-fast cases ------------ */
1375 /* These deal quickly-ish with the common auxiliary primary map
1376 cases on 64-bit platforms. Are merely a speedup hack; can be
1377 omitted without loss of correctness/functionality. Note that in
1378 both cases the "sizeof(void*) == 8" causes these cases to be
1379 folded out by compilers on 32-bit platforms. These are derived
1380 from LOADV64 and LOADV32.
1382 if (LIKELY(sizeof(void*) == 8
1383 && nBits
== 64 && VG_IS_8_ALIGNED(a
))) {
1384 SecMap
* sm
= get_secmap_for_reading(a
);
1385 UWord sm_off16
= SM_OFF_16(a
);
1386 UWord vabits16
= ((UShort
*)(sm
->vabits8
))[sm_off16
];
1387 if (LIKELY(vabits16
== VA_BITS16_DEFINED
))
1388 return V_BITS64_DEFINED
;
1389 if (LIKELY(vabits16
== VA_BITS16_UNDEFINED
))
1390 return V_BITS64_UNDEFINED
;
1391 /* else fall into the slow case */
1393 if (LIKELY(sizeof(void*) == 8
1394 && nBits
== 32 && VG_IS_4_ALIGNED(a
))) {
1395 SecMap
* sm
= get_secmap_for_reading(a
);
1396 UWord sm_off
= SM_OFF(a
);
1397 UWord vabits8
= sm
->vabits8
[sm_off
];
1398 if (LIKELY(vabits8
== VA_BITS8_DEFINED
))
1399 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_DEFINED
);
1400 if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
))
1401 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_UNDEFINED
);
1402 /* else fall into slow case */
1404 /* ------------ END semi-fast cases ------------ */
1406 ULong vbits64
= V_BITS64_UNDEFINED
; /* result */
1407 ULong pessim64
= V_BITS64_DEFINED
; /* only used when p-l-ok=yes */
1408 SSizeT szB
= nBits
/ 8;
1409 SSizeT i
; /* Must be signed. */
1410 SizeT n_addrs_bad
= 0;
1415 tl_assert(nBits
== 64 || nBits
== 32 || nBits
== 16 || nBits
== 8);
1417 /* Make up a 64-bit result V word, which contains the loaded data
1418 for valid addresses and Defined for invalid addresses. Iterate
1419 over the bytes in the word, from the most significant down to
1420 the least. The vbits to return are calculated into vbits64.
1421 Also compute the pessimising value to be used when
1422 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1423 info can be gleaned from pessim64) but is used as a
1425 for (i
= szB
-1; i
>= 0; i
--) {
1426 PROF_EVENT(MCPE_LOADVN_SLOW_LOOP
);
1427 ai
= a
+ byte_offset_w(szB
, bigendian
, i
);
1428 ok
= get_vbits8(ai
, &vbits8
);
1431 if (!ok
) n_addrs_bad
++;
1433 pessim64
|= (ok
? V_BITS8_DEFINED
: V_BITS8_UNDEFINED
);
1436 /* In the common case, all the addresses involved are valid, so we
1437 just return the computed V bits and have done. */
1438 if (LIKELY(n_addrs_bad
== 0))
1441 /* If there's no possibility of getting a partial-loads-ok
1442 exemption, report the error and quit. */
1443 if (!MC_(clo_partial_loads_ok
)) {
1444 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1448 /* The partial-loads-ok excemption might apply. Find out if it
1449 does. If so, don't report an addressing error, but do return
1450 Undefined for the bytes that are out of range, so as to avoid
1451 false negatives. If it doesn't apply, just report an addressing
1452 error in the usual way. */
1454 /* Some code steps along byte strings in aligned word-sized chunks
1455 even when there is only a partially defined word at the end (eg,
1456 optimised strlen). This is allowed by the memory model of
1457 modern machines, since an aligned load cannot span two pages and
1458 thus cannot "partially fault". Despite such behaviour being
1459 declared undefined by ANSI C/C++.
1461 Therefore, a load from a partially-addressible place is allowed
1462 if all of the following hold:
1463 - the command-line flag is set [by default, it isn't]
1464 - it's a word-sized, word-aligned load
1465 - at least one of the addresses in the word *is* valid
1467 Since this suppresses the addressing error, we avoid false
1468 negatives by marking bytes undefined when they come from an
1472 /* "at least one of the addresses is invalid" */
1473 tl_assert(pessim64
!= V_BITS64_DEFINED
);
1475 if (szB
== VG_WORDSIZE
&& VG_IS_WORD_ALIGNED(a
)
1476 && n_addrs_bad
< VG_WORDSIZE
) {
1477 /* Exemption applies. Use the previously computed pessimising
1478 value for vbits64 and return the combined result, but don't
1479 flag an addressing error. The pessimising value is Defined
1480 for valid addresses and Undefined for invalid addresses. */
1481 /* for assumption that doing bitwise or implements UifU */
1482 tl_assert(V_BIT_UNDEFINED
== 1 && V_BIT_DEFINED
== 0);
1483 /* (really need "UifU" here...)
1484 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1485 vbits64
|= pessim64
;
1489 /* Also, in appears that gcc generates string-stepping code in
1490 32-bit chunks on 64 bit platforms. So, also grant an exception
1491 for this case. Note that the first clause of the conditional
1492 (VG_WORDSIZE == 8) is known at compile time, so the whole clause
1493 will get folded out in 32 bit builds. */
1494 if (VG_WORDSIZE
== 8
1495 && VG_IS_4_ALIGNED(a
) && nBits
== 32 && n_addrs_bad
< 4) {
1496 tl_assert(V_BIT_UNDEFINED
== 1 && V_BIT_DEFINED
== 0);
1497 /* (really need "UifU" here...)
1498 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1499 vbits64
|= pessim64
;
1500 /* Mark the upper 32 bits as undefined, just to be on the safe
1502 vbits64
|= (((ULong
)V_BITS32_UNDEFINED
) << 32);
1506 /* Exemption doesn't apply. Flag an addressing error in the normal
1508 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1515 __attribute__((noinline
))
1516 void mc_STOREVn_slow ( Addr a
, SizeT nBits
, ULong vbytes
, Bool bigendian
)
1518 SizeT szB
= nBits
/ 8;
1519 SizeT i
, n_addrs_bad
= 0;
1524 PROF_EVENT(MCPE_STOREVN_SLOW
);
1526 /* ------------ BEGIN semi-fast cases ------------ */
1527 /* These deal quickly-ish with the common auxiliary primary map
1528 cases on 64-bit platforms. Are merely a speedup hack; can be
1529 omitted without loss of correctness/functionality. Note that in
1530 both cases the "sizeof(void*) == 8" causes these cases to be
1531 folded out by compilers on 32-bit platforms. The logic below
1532 is somewhat similar to some cases extensively commented in
1533 MC_(helperc_STOREV8).
1535 if (LIKELY(sizeof(void*) == 8
1536 && nBits
== 64 && VG_IS_8_ALIGNED(a
))) {
1537 SecMap
* sm
= get_secmap_for_reading(a
);
1538 UWord sm_off16
= SM_OFF_16(a
);
1539 UWord vabits16
= ((UShort
*)(sm
->vabits8
))[sm_off16
];
1540 if (LIKELY( !is_distinguished_sm(sm
) &&
1541 (VA_BITS16_DEFINED
== vabits16
||
1542 VA_BITS16_UNDEFINED
== vabits16
) )) {
1543 /* Handle common case quickly: a is suitably aligned, */
1544 /* is mapped, and is addressible. */
1545 // Convert full V-bits in register to compact 2-bit form.
1546 if (LIKELY(V_BITS64_DEFINED
== vbytes
)) {
1547 ((UShort
*)(sm
->vabits8
))[sm_off16
] = (UShort
)VA_BITS16_DEFINED
;
1549 } else if (V_BITS64_UNDEFINED
== vbytes
) {
1550 ((UShort
*)(sm
->vabits8
))[sm_off16
] = (UShort
)VA_BITS16_UNDEFINED
;
1553 /* else fall into the slow case */
1555 /* else fall into the slow case */
1557 if (LIKELY(sizeof(void*) == 8
1558 && nBits
== 32 && VG_IS_4_ALIGNED(a
))) {
1559 SecMap
* sm
= get_secmap_for_reading(a
);
1560 UWord sm_off
= SM_OFF(a
);
1561 UWord vabits8
= sm
->vabits8
[sm_off
];
1562 if (LIKELY( !is_distinguished_sm(sm
) &&
1563 (VA_BITS8_DEFINED
== vabits8
||
1564 VA_BITS8_UNDEFINED
== vabits8
) )) {
1565 /* Handle common case quickly: a is suitably aligned, */
1566 /* is mapped, and is addressible. */
1567 // Convert full V-bits in register to compact 2-bit form.
1568 if (LIKELY(V_BITS32_DEFINED
== (vbytes
& 0xFFFFFFFF))) {
1569 sm
->vabits8
[sm_off
] = VA_BITS8_DEFINED
;
1571 } else if (V_BITS32_UNDEFINED
== (vbytes
& 0xFFFFFFFF)) {
1572 sm
->vabits8
[sm_off
] = VA_BITS8_UNDEFINED
;
1575 /* else fall into the slow case */
1577 /* else fall into the slow case */
1579 /* ------------ END semi-fast cases ------------ */
1581 tl_assert(nBits
== 64 || nBits
== 32 || nBits
== 16 || nBits
== 8);
1583 /* Dump vbytes in memory, iterating from least to most significant
1584 byte. At the same time establish addressibility of the location. */
1585 for (i
= 0; i
< szB
; i
++) {
1586 PROF_EVENT(MCPE_STOREVN_SLOW_LOOP
);
1587 ai
= a
+ byte_offset_w(szB
, bigendian
, i
);
1588 vbits8
= vbytes
& 0xff;
1589 ok
= set_vbits8(ai
, vbits8
);
1590 if (!ok
) n_addrs_bad
++;
1594 /* If an address error has happened, report it. */
1595 if (n_addrs_bad
> 0)
1596 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, True
);
1600 /*------------------------------------------------------------*/
1601 /*--- Setting permissions over address ranges. ---*/
1602 /*------------------------------------------------------------*/
1604 static void set_address_range_perms ( Addr a
, SizeT lenT
, UWord vabits16
,
1607 UWord sm_off
, sm_off16
;
1608 UWord vabits2
= vabits16
& 0x3;
1609 SizeT lenA
, lenB
, len_to_next_secmap
;
1613 SecMap
* example_dsm
;
1615 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS
);
1617 /* Check the V+A bits make sense. */
1618 tl_assert(VA_BITS16_NOACCESS
== vabits16
||
1619 VA_BITS16_UNDEFINED
== vabits16
||
1620 VA_BITS16_DEFINED
== vabits16
);
1622 // This code should never write PDBs; ensure this. (See comment above
1624 tl_assert(VA_BITS2_PARTDEFINED
!= vabits2
);
1629 if (lenT
> 256 * 1024 * 1024) {
1630 if (VG_(clo_verbosity
) > 0 && !VG_(clo_xml
)) {
1631 const HChar
* s
= "unknown???";
1632 if (vabits16
== VA_BITS16_NOACCESS
) s
= "noaccess";
1633 if (vabits16
== VA_BITS16_UNDEFINED
) s
= "undefined";
1634 if (vabits16
== VA_BITS16_DEFINED
) s
= "defined";
1635 VG_(message
)(Vg_UserMsg
, "Warning: set address range perms: "
1636 "large range [0x%lx, 0x%lx) (%s)\n",
1641 #ifndef PERF_FAST_SARP
1642 /*------------------ debug-only case ------------------ */
1644 // Endianness doesn't matter here because all bytes are being set to
1646 // Nb: We don't have to worry about updating the sec-V-bits table
1647 // after these set_vabits2() calls because this code never writes
1648 // VA_BITS2_PARTDEFINED values.
1650 for (i
= 0; i
< lenT
; i
++) {
1651 set_vabits2(a
+ i
, vabits2
);
1657 /*------------------ standard handling ------------------ */
1659 /* Get the distinguished secondary that we might want
1660 to use (part of the space-compression scheme). */
1661 example_dsm
= &sm_distinguished
[dsm_num
];
1663 // We have to handle ranges covering various combinations of partial and
1664 // whole sec-maps. Here is how parts 1, 2 and 3 are used in each case.
1665 // Cases marked with a '*' are common.
1669 // * one partial sec-map (p) 1
1670 // - one whole sec-map (P) 2
1672 // * two partial sec-maps (pp) 1,3
1673 // - one partial, one whole sec-map (pP) 1,2
1674 // - one whole, one partial sec-map (Pp) 2,3
1675 // - two whole sec-maps (PP) 2,2
1677 // * one partial, one whole, one partial (pPp) 1,2,3
1678 // - one partial, two whole (pPP) 1,2,2
1679 // - two whole, one partial (PPp) 2,2,3
1680 // - three whole (PPP) 2,2,2
1682 // * one partial, N-2 whole, one partial (pP...Pp) 1,2...2,3
1683 // - one partial, N-1 whole (pP...PP) 1,2...2,2
1684 // - N-1 whole, one partial (PP...Pp) 2,2...2,3
1685 // - N whole (PP...PP) 2,2...2,3
1687 // Break up total length (lenT) into two parts: length in the first
1688 // sec-map (lenA), and the rest (lenB); lenT == lenA + lenB.
1689 aNext
= start_of_this_sm(a
) + SM_SIZE
;
1690 len_to_next_secmap
= aNext
- a
;
1691 if ( lenT
<= len_to_next_secmap
) {
1692 // Range entirely within one sec-map. Covers almost all cases.
1693 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP
);
1696 } else if (is_start_of_sm(a
)) {
1697 // Range spans at least one whole sec-map, and starts at the beginning
1698 // of a sec-map; skip to Part 2.
1699 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP
);
1704 // Range spans two or more sec-maps, first one is partial.
1705 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS
);
1706 lenA
= len_to_next_secmap
;
1710 //------------------------------------------------------------------------
1711 // Part 1: Deal with the first sec_map. Most of the time the range will be
1712 // entirely within a sec_map and this part alone will suffice. Also,
1713 // doing it this way lets us avoid repeatedly testing for the crossing of
1714 // a sec-map boundary within these loops.
1715 //------------------------------------------------------------------------
1717 // If it's distinguished, make it undistinguished if necessary.
1718 sm_ptr
= get_secmap_ptr(a
);
1719 if (is_distinguished_sm(*sm_ptr
)) {
1720 if (*sm_ptr
== example_dsm
) {
1721 // Sec-map already has the V+A bits that we want, so skip.
1722 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK
);
1726 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1
);
1727 *sm_ptr
= copy_for_writing(*sm_ptr
);
1734 if (VG_IS_8_ALIGNED(a
)) break;
1735 if (lenA
< 1) break;
1736 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A
);
1738 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
1742 // 8-aligned, 8 byte steps
1744 if (lenA
< 8) break;
1745 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A
);
1746 sm_off16
= SM_OFF_16(a
);
1747 ((UShort
*)(sm
->vabits8
))[sm_off16
] = vabits16
;
1753 if (lenA
< 1) break;
1754 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B
);
1756 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
1761 // We've finished the first sec-map. Is that it?
1765 //------------------------------------------------------------------------
1766 // Part 2: Fast-set entire sec-maps at a time.
1767 //------------------------------------------------------------------------
1769 // 64KB-aligned, 64KB steps.
1770 // Nb: we can reach here with lenB < SM_SIZE
1771 tl_assert(0 == lenA
);
1773 if (lenB
< SM_SIZE
) break;
1774 tl_assert(is_start_of_sm(a
));
1775 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K
);
1776 sm_ptr
= get_secmap_ptr(a
);
1777 if (!is_distinguished_sm(*sm_ptr
)) {
1778 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM
);
1779 // Free the non-distinguished sec-map that we're replacing. This
1780 // case happens moderately often, enough to be worthwhile.
1781 SysRes sres
= VG_(am_munmap_valgrind
)((Addr
)*sm_ptr
, sizeof(SecMap
));
1782 tl_assert2(! sr_isError(sres
), "SecMap valgrind munmap failure\n");
1784 update_SM_counts(*sm_ptr
, example_dsm
);
1785 // Make the sec-map entry point to the example DSM
1786 *sm_ptr
= example_dsm
;
1791 // We've finished the whole sec-maps. Is that it?
1795 //------------------------------------------------------------------------
1796 // Part 3: Finish off the final partial sec-map, if necessary.
1797 //------------------------------------------------------------------------
1799 tl_assert(is_start_of_sm(a
) && lenB
< SM_SIZE
);
1801 // If it's distinguished, make it undistinguished if necessary.
1802 sm_ptr
= get_secmap_ptr(a
);
1803 if (is_distinguished_sm(*sm_ptr
)) {
1804 if (*sm_ptr
== example_dsm
) {
1805 // Sec-map already has the V+A bits that we want, so stop.
1806 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK
);
1809 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2
);
1810 *sm_ptr
= copy_for_writing(*sm_ptr
);
1815 // 8-aligned, 8 byte steps
1817 if (lenB
< 8) break;
1818 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B
);
1819 sm_off16
= SM_OFF_16(a
);
1820 ((UShort
*)(sm
->vabits8
))[sm_off16
] = vabits16
;
1826 if (lenB
< 1) return;
1827 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C
);
1829 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
1836 /* --- Set permissions for arbitrary address ranges --- */
1838 void MC_(make_mem_noaccess
) ( Addr a
, SizeT len
)
1840 PROF_EVENT(MCPE_MAKE_MEM_NOACCESS
);
1841 DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a
, len
);
1842 set_address_range_perms ( a
, len
, VA_BITS16_NOACCESS
, SM_DIST_NOACCESS
);
1843 if (UNLIKELY( MC_(clo_mc_level
) == 3 ))
1844 ocache_sarp_Clear_Origins ( a
, len
);
1847 static void make_mem_undefined ( Addr a
, SizeT len
)
1849 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED
);
1850 DEBUG("make_mem_undefined(%p, %lu)\n", a
, len
);
1851 set_address_range_perms ( a
, len
, VA_BITS16_UNDEFINED
, SM_DIST_UNDEFINED
);
1854 void MC_(make_mem_undefined_w_otag
) ( Addr a
, SizeT len
, UInt otag
)
1856 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED_W_OTAG
);
1857 DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a
, len
);
1858 set_address_range_perms ( a
, len
, VA_BITS16_UNDEFINED
, SM_DIST_UNDEFINED
);
1859 if (UNLIKELY( MC_(clo_mc_level
) == 3 ))
1860 ocache_sarp_Set_Origins ( a
, len
, otag
);
1864 void make_mem_undefined_w_tid_and_okind ( Addr a
, SizeT len
,
1865 ThreadId tid
, UInt okind
)
1869 /* VG_(record_ExeContext) checks for validity of tid, and asserts
1870 if it is invalid. So no need to do it here. */
1871 tl_assert(okind
<= 3);
1872 here
= VG_(record_ExeContext
)( tid
, 0/*first_ip_delta*/ );
1874 ecu
= VG_(get_ECU_from_ExeContext
)(here
);
1875 tl_assert(VG_(is_plausible_ECU
)(ecu
));
1876 MC_(make_mem_undefined_w_otag
) ( a
, len
, ecu
| okind
);
1880 void mc_new_mem_w_tid_make_ECU ( Addr a
, SizeT len
, ThreadId tid
)
1882 make_mem_undefined_w_tid_and_okind ( a
, len
, tid
, MC_OKIND_UNKNOWN
);
1886 void mc_new_mem_w_tid_no_ECU ( Addr a
, SizeT len
, ThreadId tid
)
1888 MC_(make_mem_undefined_w_otag
) ( a
, len
, MC_OKIND_UNKNOWN
);
1891 void MC_(make_mem_defined
) ( Addr a
, SizeT len
)
1893 PROF_EVENT(MCPE_MAKE_MEM_DEFINED
);
1894 DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a
, len
);
1895 set_address_range_perms ( a
, len
, VA_BITS16_DEFINED
, SM_DIST_DEFINED
);
1896 if (UNLIKELY( MC_(clo_mc_level
) == 3 ))
1897 ocache_sarp_Clear_Origins ( a
, len
);
1900 __attribute__((unused
))
1901 static void make_mem_defined_w_tid ( Addr a
, SizeT len
, ThreadId tid
)
1903 MC_(make_mem_defined
)(a
, len
);
1906 /* For each byte in [a,a+len), if the byte is addressable, make it be
1907 defined, but if it isn't addressible, leave it alone. In other
1908 words a version of MC_(make_mem_defined) that doesn't mess with
1909 addressibility. Low-performance implementation. */
1910 static void make_mem_defined_if_addressable ( Addr a
, SizeT len
)
1914 DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a
, (ULong
)len
);
1915 for (i
= 0; i
< len
; i
++) {
1916 vabits2
= get_vabits2( a
+i
);
1917 if (LIKELY(VA_BITS2_NOACCESS
!= vabits2
)) {
1918 set_vabits2(a
+i
, VA_BITS2_DEFINED
);
1919 if (UNLIKELY(MC_(clo_mc_level
) >= 3)) {
1920 MC_(helperc_b_store1
)( a
+i
, 0 ); /* clear the origin tag */
1926 /* Similarly (needed for mprotect handling ..) */
1927 static void make_mem_defined_if_noaccess ( Addr a
, SizeT len
)
1931 DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a
, (ULong
)len
);
1932 for (i
= 0; i
< len
; i
++) {
1933 vabits2
= get_vabits2( a
+i
);
1934 if (LIKELY(VA_BITS2_NOACCESS
== vabits2
)) {
1935 set_vabits2(a
+i
, VA_BITS2_DEFINED
);
1936 if (UNLIKELY(MC_(clo_mc_level
) >= 3)) {
1937 MC_(helperc_b_store1
)( a
+i
, 0 ); /* clear the origin tag */
1943 /* --- Block-copy permissions (needed for implementing realloc() and
1946 void MC_(copy_address_range_state
) ( Addr src
, Addr dst
, SizeT len
)
1949 UChar vabits2
, vabits8
;
1950 Bool aligned
, nooverlap
;
1952 DEBUG("MC_(copy_address_range_state)\n");
1953 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE
);
1955 if (len
== 0 || src
== dst
)
1958 aligned
= VG_IS_4_ALIGNED(src
) && VG_IS_4_ALIGNED(dst
);
1959 nooverlap
= src
+len
<= dst
|| dst
+len
<= src
;
1961 if (nooverlap
&& aligned
) {
1963 /* Vectorised fast case, when no overlap and suitably aligned */
1967 vabits8
= get_vabits8_for_aligned_word32( src
+i
);
1968 set_vabits8_for_aligned_word32( dst
+i
, vabits8
);
1969 if (LIKELY(VA_BITS8_DEFINED
== vabits8
1970 || VA_BITS8_UNDEFINED
== vabits8
1971 || VA_BITS8_NOACCESS
== vabits8
)) {
1974 /* have to copy secondary map info */
1975 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+0 ))
1976 set_sec_vbits8( dst
+i
+0, get_sec_vbits8( src
+i
+0 ) );
1977 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+1 ))
1978 set_sec_vbits8( dst
+i
+1, get_sec_vbits8( src
+i
+1 ) );
1979 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+2 ))
1980 set_sec_vbits8( dst
+i
+2, get_sec_vbits8( src
+i
+2 ) );
1981 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+3 ))
1982 set_sec_vbits8( dst
+i
+3, get_sec_vbits8( src
+i
+3 ) );
1989 vabits2
= get_vabits2( src
+i
);
1990 set_vabits2( dst
+i
, vabits2
);
1991 if (VA_BITS2_PARTDEFINED
== vabits2
) {
1992 set_sec_vbits8( dst
+i
, get_sec_vbits8( src
+i
) );
2000 /* We have to do things the slow way */
2002 for (i
= 0, j
= len
-1; i
< len
; i
++, j
--) {
2003 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1
);
2004 vabits2
= get_vabits2( src
+j
);
2005 set_vabits2( dst
+j
, vabits2
);
2006 if (VA_BITS2_PARTDEFINED
== vabits2
) {
2007 set_sec_vbits8( dst
+j
, get_sec_vbits8( src
+j
) );
2013 for (i
= 0; i
< len
; i
++) {
2014 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2
);
2015 vabits2
= get_vabits2( src
+i
);
2016 set_vabits2( dst
+i
, vabits2
);
2017 if (VA_BITS2_PARTDEFINED
== vabits2
) {
2018 set_sec_vbits8( dst
+i
, get_sec_vbits8( src
+i
) );
2027 /*------------------------------------------------------------*/
2028 /*--- Origin tracking stuff - cache basics ---*/
2029 /*------------------------------------------------------------*/
2031 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
2032 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2034 Note that this implementation draws inspiration from the "origin
2035 tracking by value piggybacking" scheme described in "Tracking Bad
2036 Apples: Reporting the Origin of Null and Undefined Value Errors"
2037 (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
2038 Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
2039 implemented completely differently.
2041 Origin tags and ECUs -- about the shadow values
2042 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2044 This implementation tracks the defining point of all uninitialised
2045 values using so called "origin tags", which are 32-bit integers,
2046 rather than using the values themselves to encode the origins. The
2047 latter, so-called value piggybacking", is what the OOPSLA07 paper
2050 Origin tags, as tracked by the machinery below, are 32-bit unsigned
2051 ints (UInts), regardless of the machine's word size. Each tag
2052 comprises an upper 30-bit ECU field and a lower 2-bit
2053 'kind' field. The ECU field is a number given out by m_execontext
2054 and has a 1-1 mapping with ExeContext*s. An ECU can be used
2055 directly as an origin tag (otag), but in fact we want to put
2056 additional information 'kind' field to indicate roughly where the
2057 tag came from. This helps print more understandable error messages
2058 for the user -- it has no other purpose. In summary:
2060 * Both ECUs and origin tags are represented as 32-bit words
2062 * m_execontext and the core-tool interface deal purely in ECUs.
2063 They have no knowledge of origin tags - that is a purely
2064 Memcheck-internal matter.
2066 * all valid ECUs have the lowest 2 bits zero and at least
2067 one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
2069 * to convert from an ECU to an otag, OR in one of the MC_OKIND_
2070 constants defined in mc_include.h.
2072 * to convert an otag back to an ECU, AND it with ~3
2074 One important fact is that no valid otag is zero. A zero otag is
2075 used by the implementation to indicate "no origin", which could
2076 mean that either the value is defined, or it is undefined but the
2077 implementation somehow managed to lose the origin.
2079 The ECU used for memory created by malloc etc is derived from the
2080 stack trace at the time the malloc etc happens. This means the
2081 mechanism can show the exact allocation point for heap-created
2082 uninitialised values.
2084 In contrast, it is simply too expensive to create a complete
2085 backtrace for each stack allocation. Therefore we merely use a
2086 depth-1 backtrace for stack allocations, which can be done once at
2087 translation time, rather than N times at run time. The result of
2088 this is that, for stack created uninitialised values, Memcheck can
2089 only show the allocating function, and not what called it.
2090 Furthermore, compilers tend to move the stack pointer just once at
2091 the start of the function, to allocate all locals, and so in fact
2092 the stack origin almost always simply points to the opening brace
2093 of the function. Net result is, for stack origins, the mechanism
2094 can tell you in which function the undefined value was created, but
2095 that's all. Users will need to carefully check all locals in the
2098 Shadowing registers and memory
2099 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2101 Memory is shadowed using a two level cache structure (ocacheL1 and
2102 ocacheL2). Memory references are first directed to ocacheL1. This
2103 is a traditional 2-way set associative cache with 32-byte lines and
2104 approximate LRU replacement within each set.
2106 A naive implementation would require storing one 32 bit otag for
2107 each byte of memory covered, a 4:1 space overhead. Instead, there
2108 is one otag for every 4 bytes of memory covered, plus a 4-bit mask
2109 that shows which of the 4 bytes have that shadow value and which
2110 have a shadow value of zero (indicating no origin). Hence a lot of
2111 space is saved, but the cost is that only one different origin per
2112 4 bytes of address space can be represented. This is a source of
2113 imprecision, but how much of a problem it really is remains to be
2116 A cache line that contains all zeroes ("no origins") contains no
2117 useful information, and can be ejected from the L1 cache "for
2118 free", in the sense that a read miss on the L1 causes a line of
2119 zeroes to be installed. However, ejecting a line containing
2120 nonzeroes risks losing origin information permanently. In order to
2121 prevent such lossage, ejected nonzero lines are placed in a
2122 secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
2123 lines. This can grow arbitrarily large, and so should ensure that
2124 Memcheck runs out of memory in preference to losing useful origin
2125 info due to cache size limitations.
2127 Shadowing registers is a bit tricky, because the shadow values are
2128 32 bits, regardless of the size of the register. That gives a
2129 problem for registers smaller than 32 bits. The solution is to
2130 find spaces in the guest state that are unused, and use those to
2131 shadow guest state fragments smaller than 32 bits. For example, on
2132 ppc32/64, each vector register is 16 bytes long. If 4 bytes of the
2133 shadow are allocated for the register's otag, then there are still
2134 12 bytes left over which could be used to shadow 3 other values.
2136 This implies there is some non-obvious mapping from guest state
2137 (start,length) pairs to the relevant shadow offset (for the origin
2138 tags). And it is unfortunately guest-architecture specific. The
2139 mapping is contained in mc_machine.c, which is quite lengthy but
2142 Instrumenting the IR
2143 ~~~~~~~~~~~~~~~~~~~~
2145 Instrumentation is largely straightforward, and done by the
2146 functions schemeE and schemeS in mc_translate.c. These generate
2147 code for handling the origin tags of expressions (E) and statements
2148 (S) respectively. The rather strange names are a reference to the
2149 "compilation schemes" shown in Simon Peyton Jones' book "The
2150 Implementation of Functional Programming Languages" (Prentice Hall,
2152 http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
2154 schemeS merely arranges to move shadow values around the guest
2155 state to track the incoming IR. schemeE is largely trivial too.
2156 The only significant point is how to compute the otag corresponding
2157 to binary (or ternary, quaternary, etc) operator applications. The
2158 rule is simple: just take whichever value is larger (32-bit
2159 unsigned max). Constants get the special value zero. Hence this
2160 rule always propagates a nonzero (known) otag in preference to a
2161 zero (unknown, or more likely, value-is-defined) tag, as we want.
2162 If two different undefined values are inputs to a binary operator
2163 application, then which is propagated is arbitrary, but that
2164 doesn't matter, since the program is erroneous in using either of
2165 the values, and so there's no point in attempting to propagate
2168 Since constants are abstracted to (otag) zero, much of the
2169 instrumentation code can be folded out without difficulty by the
2170 generic post-instrumentation IR cleanup pass, using these rules:
2171 Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
2172 constants is evaluated at JIT time. And the resulting dead code
2173 removal. In practice this causes surprisingly few Max32Us to
2174 survive through to backend code generation.
2176 Integration with the V-bits machinery
2177 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2179 This is again largely straightforward. Mostly the otag and V bits
2180 stuff are independent. The only point of interaction is when the V
2181 bits instrumenter creates a call to a helper function to report an
2182 uninitialised value error -- in that case it must first use schemeE
2183 to get hold of the origin tag expression for the value, and pass
2184 that to the helper too.
2186 There is the usual stuff to do with setting address range
2187 permissions. When memory is painted undefined, we must also know
2188 the origin tag to paint with, which involves some tedious plumbing,
2189 particularly to do with the fast case stack handlers. When memory
2190 is painted defined or noaccess then the origin tags must be forced
2193 One of the goals of the implementation was to ensure that the
2194 non-origin tracking mode isn't slowed down at all. To do this,
2195 various functions to do with memory permissions setting (again,
2196 mostly pertaining to the stack) are duplicated for the with- and
2199 Dealing with stack redzones, and the NIA cache
2200 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2202 This is one of the few non-obvious parts of the implementation.
2204 Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
2205 reserved area below the stack pointer, that can be used as scratch
2206 space by compiler generated code for functions. In the Memcheck
2207 sources this is referred to as the "stack redzone". The important
2208 thing here is that such redzones are considered volatile across
2209 function calls and returns. So Memcheck takes care to mark them as
2210 undefined for each call and return, on the afflicted platforms.
2211 Past experience shows this is essential in order to get reliable
2212 messages about uninitialised values that come from the stack.
2214 So the question is, when we paint a redzone undefined, what origin
2215 tag should we use for it? Consider a function f() calling g(). If
2216 we paint the redzone using an otag derived from the ExeContext of
2217 the CALL/BL instruction in f, then any errors in g causing it to
2218 use uninitialised values that happen to lie in the redzone, will be
2219 reported as having their origin in f. Which is highly confusing.
2221 The same applies for returns: if, on a return, we paint the redzone
2222 using a origin tag derived from the ExeContext of the RET/BLR
2223 instruction in g, then any later errors in f causing it to use
2224 uninitialised values in the redzone, will be reported as having
2225 their origin in g. Which is just as confusing.
2227 To do it right, in both cases we need to use an origin tag which
2228 pertains to the instruction which dynamically follows the CALL/BL
2229 or RET/BLR. In short, one derived from the NIA - the "next
2230 instruction address".
2232 To make this work, Memcheck's redzone-painting helper,
2233 MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
2234 NIA. It converts the NIA to a 1-element ExeContext, and uses that
2235 ExeContext's ECU as the basis for the otag used to paint the
2236 redzone. The expensive part of this is converting an NIA into an
2237 ECU, since this happens once for every call and every return. So
2238 we use a simple 511-line, 2-way set associative cache
2239 (nia_to_ecu_cache) to cache the mappings, and that knocks most of
2242 Further background comments
2243 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2245 > Question: why is otag a UInt? Wouldn't a UWord be better? Isn't
2246 > it really just the address of the relevant ExeContext?
2248 Well, it's not the address, but a value which has a 1-1 mapping
2249 with ExeContexts, and is guaranteed not to be zero, since zero
2250 denotes (to memcheck) "unknown origin or defined value". So these
2251 UInts are just numbers starting at 4 and incrementing by 4; each
2252 ExeContext is given a number when it is created. (*** NOTE this
2253 confuses otags and ECUs; see comments above ***).
2255 Making these otags 32-bit regardless of the machine's word size
2256 makes the 64-bit implementation easier (next para). And it doesn't
2257 really limit us in any way, since for the tags to overflow would
2258 require that the program somehow caused 2^30-1 different
2259 ExeContexts to be created, in which case it is probably in deep
2260 trouble. Not to mention V will have soaked up many tens of
2261 gigabytes of memory merely to store them all.
2263 So having 64-bit origins doesn't really buy you anything, and has
2264 the following downsides:
2266 Suppose that instead, an otag is a UWord. This would mean that, on
2269 1. It becomes hard to shadow any element of guest state which is
2270 smaller than 8 bytes. To do so means you'd need to find some
2271 8-byte-sized hole in the guest state which you don't want to
2272 shadow, and use that instead to hold the otag. On ppc64, the
2273 condition code register(s) are split into 20 UChar sized pieces,
2274 all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
2275 and so that would entail finding 160 bytes somewhere else in the
2278 Even on x86, I want to track origins for %AH .. %DH (bits 15:8
2279 of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
2280 same) and so I had to look for 4 untracked otag-sized areas in
2281 the guest state to make that possible.
2283 The same problem exists of course when origin tags are only 32
2284 bits, but it's less extreme.
2286 2. (More compelling) it doubles the size of the origin shadow
2287 memory. Given that the shadow memory is organised as a fixed
2288 size cache, and that accuracy of tracking is limited by origins
2289 falling out the cache due to space conflicts, this isn't good.
2291 > Another question: is the origin tracking perfect, or are there
2292 > cases where it fails to determine an origin?
2294 It is imperfect for at least for the following reasons, and
2297 * Insufficient capacity in the origin cache. When a line is
2298 evicted from the cache it is gone forever, and so subsequent
2299 queries for the line produce zero, indicating no origin
2300 information. Interestingly, a line containing all zeroes can be
2301 evicted "free" from the cache, since it contains no useful
2302 information, so there is scope perhaps for some cleverer cache
2303 management schemes. (*** NOTE, with the introduction of the
2304 second level origin tag cache, ocacheL2, this is no longer a
2307 * The origin cache only stores one otag per 32-bits of address
2308 space, plus 4 bits indicating which of the 4 bytes has that tag
2309 and which are considered defined. The result is that if two
2310 undefined bytes in the same word are stored in memory, the first
2311 stored byte's origin will be lost and replaced by the origin for
2314 * Nonzero origin tags for defined values. Consider a binary
2315 operator application op(x,y). Suppose y is undefined (and so has
2316 a valid nonzero origin tag), and x is defined, but erroneously
2317 has a nonzero origin tag (defined values should have tag zero).
2318 If the erroneous tag has a numeric value greater than y's tag,
2319 then the rule for propagating origin tags though binary
2320 operations, which is simply to take the unsigned max of the two
2321 tags, will erroneously propagate x's tag rather than y's.
2323 * Some obscure uses of x86/amd64 byte registers can cause lossage
2324 or confusion of origins. %AH .. %DH are treated as different
2325 from, and unrelated to, their parent registers, %EAX .. %EDX.
2326 So some weird sequences like
2328 movb undefined-value, %AH
2329 movb defined-value, %AL
2330 .. use %AX or %EAX ..
2332 will cause the origin attributed to %AH to be ignored, since %AL,
2333 %AX, %EAX are treated as the same register, and %AH as a
2334 completely separate one.
2336 But having said all that, it actually seems to work fairly well in
2340 static UWord stats_ocacheL1_find
= 0;
2341 static UWord stats_ocacheL1_found_at_1
= 0;
2342 static UWord stats_ocacheL1_found_at_N
= 0;
2343 static UWord stats_ocacheL1_misses
= 0;
2344 static UWord stats_ocacheL1_lossage
= 0;
2345 static UWord stats_ocacheL1_movefwds
= 0;
2347 static UWord stats__ocacheL2_refs
= 0;
2348 static UWord stats__ocacheL2_misses
= 0;
2349 static UWord stats__ocacheL2_n_nodes_max
= 0;
2351 /* Cache of 32-bit values, one every 32 bits of address space */
2353 #define OC_BITS_PER_LINE 5
2354 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2356 static INLINE UWord
oc_line_offset ( Addr a
) {
2357 return (a
>> 2) & (OC_W32S_PER_LINE
- 1);
2359 static INLINE Bool
is_valid_oc_tag ( Addr tag
) {
2360 return 0 == (tag
& ((1 << OC_BITS_PER_LINE
) - 1));
2363 #define OC_LINES_PER_SET 2
2365 #define OC_N_SET_BITS 20
2366 #define OC_N_SETS (1 << OC_N_SET_BITS)
2368 /* These settings give:
2369 64 bit host: ocache: 100,663,296 sizeB 67,108,864 useful
2370 32 bit host: ocache: 92,274,688 sizeB 67,108,864 useful
2373 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2379 UInt w32
[OC_W32S_PER_LINE
];
2380 UChar descr
[OC_W32S_PER_LINE
];
2384 /* Classify and also sanity-check 'line'. Return 'e' (empty) if not
2385 in use, 'n' (nonzero) if it contains at least one valid origin tag,
2386 and 'z' if all the represented tags are zero. */
2387 static UChar
classify_OCacheLine ( OCacheLine
* line
)
2390 if (line
->tag
== 1/*invalid*/)
2391 return 'e'; /* EMPTY */
2392 tl_assert(is_valid_oc_tag(line
->tag
));
2393 for (i
= 0; i
< OC_W32S_PER_LINE
; i
++) {
2394 tl_assert(0 == ((~0xF) & line
->descr
[i
]));
2395 if (line
->w32
[i
] > 0 && line
->descr
[i
] > 0)
2396 return 'n'; /* NONZERO - contains useful info */
2398 return 'z'; /* ZERO - no useful info */
2403 OCacheLine line
[OC_LINES_PER_SET
];
2409 OCacheSet set
[OC_N_SETS
];
2413 static OCache
* ocacheL1
= NULL
;
2414 static UWord ocacheL1_event_ctr
= 0;
2416 static void init_ocacheL2 ( void ); /* fwds */
2417 static void init_OCache ( void )
2420 tl_assert(MC_(clo_mc_level
) >= 3);
2421 tl_assert(ocacheL1
== NULL
);
2422 ocacheL1
= VG_(am_shadow_alloc
)(sizeof(OCache
));
2423 if (ocacheL1
== NULL
) {
2424 VG_(out_of_memory_NORETURN
)( "memcheck:allocating ocacheL1",
2427 tl_assert(ocacheL1
!= NULL
);
2428 for (set
= 0; set
< OC_N_SETS
; set
++) {
2429 for (line
= 0; line
< OC_LINES_PER_SET
; line
++) {
2430 ocacheL1
->set
[set
].line
[line
].tag
= 1/*invalid*/;
2436 static void moveLineForwards ( OCacheSet
* set
, UWord lineno
)
2439 stats_ocacheL1_movefwds
++;
2440 tl_assert(lineno
> 0 && lineno
< OC_LINES_PER_SET
);
2441 tmp
= set
->line
[lineno
-1];
2442 set
->line
[lineno
-1] = set
->line
[lineno
];
2443 set
->line
[lineno
] = tmp
;
2446 static void zeroise_OCacheLine ( OCacheLine
* line
, Addr tag
) {
2448 for (i
= 0; i
< OC_W32S_PER_LINE
; i
++) {
2449 line
->w32
[i
] = 0; /* NO ORIGIN */
2450 line
->descr
[i
] = 0; /* REALLY REALLY NO ORIGIN! */
2455 //////////////////////////////////////////////////////////////
2456 //// OCache backing store
2458 static OSet
* ocacheL2
= NULL
;
2460 static void* ocacheL2_malloc ( const HChar
* cc
, SizeT szB
) {
2461 return VG_(malloc
)(cc
, szB
);
2463 static void ocacheL2_free ( void* v
) {
2467 /* Stats: # nodes currently in tree */
2468 static UWord stats__ocacheL2_n_nodes
= 0;
2470 static void init_ocacheL2 ( void )
2472 tl_assert(!ocacheL2
);
2473 tl_assert(sizeof(Word
) == sizeof(Addr
)); /* since OCacheLine.tag :: Addr */
2474 tl_assert(0 == offsetof(OCacheLine
,tag
));
2476 = VG_(OSetGen_Create
)( offsetof(OCacheLine
,tag
),
2477 NULL
, /* fast cmp */
2478 ocacheL2_malloc
, "mc.ioL2", ocacheL2_free
);
2479 stats__ocacheL2_n_nodes
= 0;
2482 /* Find line with the given tag in the tree, or NULL if not found. */
2483 static OCacheLine
* ocacheL2_find_tag ( Addr tag
)
2486 tl_assert(is_valid_oc_tag(tag
));
2487 stats__ocacheL2_refs
++;
2488 line
= VG_(OSetGen_Lookup
)( ocacheL2
, &tag
);
2492 /* Delete the line with the given tag from the tree, if it is present, and
2493 free up the associated memory. */
2494 static void ocacheL2_del_tag ( Addr tag
)
2497 tl_assert(is_valid_oc_tag(tag
));
2498 stats__ocacheL2_refs
++;
2499 line
= VG_(OSetGen_Remove
)( ocacheL2
, &tag
);
2501 VG_(OSetGen_FreeNode
)(ocacheL2
, line
);
2502 tl_assert(stats__ocacheL2_n_nodes
> 0);
2503 stats__ocacheL2_n_nodes
--;
2507 /* Add a copy of the given line to the tree. It must not already be
2509 static void ocacheL2_add_line ( OCacheLine
* line
)
2512 tl_assert(is_valid_oc_tag(line
->tag
));
2513 copy
= VG_(OSetGen_AllocNode
)( ocacheL2
, sizeof(OCacheLine
) );
2515 stats__ocacheL2_refs
++;
2516 VG_(OSetGen_Insert
)( ocacheL2
, copy
);
2517 stats__ocacheL2_n_nodes
++;
2518 if (stats__ocacheL2_n_nodes
> stats__ocacheL2_n_nodes_max
)
2519 stats__ocacheL2_n_nodes_max
= stats__ocacheL2_n_nodes
;
2523 //////////////////////////////////////////////////////////////
2525 __attribute__((noinline
))
2526 static OCacheLine
* find_OCacheLine_SLOW ( Addr a
)
2528 OCacheLine
*victim
, *inL2
;
2531 UWord setno
= (a
>> OC_BITS_PER_LINE
) & (OC_N_SETS
- 1);
2532 UWord tagmask
= ~((1 << OC_BITS_PER_LINE
) - 1);
2533 UWord tag
= a
& tagmask
;
2534 tl_assert(setno
>= 0 && setno
< OC_N_SETS
);
2536 /* we already tried line == 0; skip therefore. */
2537 for (line
= 1; line
< OC_LINES_PER_SET
; line
++) {
2538 if (ocacheL1
->set
[setno
].line
[line
].tag
== tag
) {
2540 stats_ocacheL1_found_at_1
++;
2542 stats_ocacheL1_found_at_N
++;
2544 if (UNLIKELY(0 == (ocacheL1_event_ctr
++
2545 & ((1<<OC_MOVE_FORWARDS_EVERY_BITS
)-1)))) {
2546 moveLineForwards( &ocacheL1
->set
[setno
], line
);
2549 return &ocacheL1
->set
[setno
].line
[line
];
2553 /* A miss. Use the last slot. Implicitly this means we're
2554 ejecting the line in the last slot. */
2555 stats_ocacheL1_misses
++;
2556 tl_assert(line
== OC_LINES_PER_SET
);
2558 tl_assert(line
> 0);
2560 /* First, move the to-be-ejected line to the L2 cache. */
2561 victim
= &ocacheL1
->set
[setno
].line
[line
];
2562 c
= classify_OCacheLine(victim
);
2565 /* the line is empty (has invalid tag); ignore it. */
2568 /* line contains zeroes. We must ensure the backing store is
2569 updated accordingly, either by copying the line there
2570 verbatim, or by ensuring it isn't present there. We
2571 chosse the latter on the basis that it reduces the size of
2572 the backing store. */
2573 ocacheL2_del_tag( victim
->tag
);
2576 /* line contains at least one real, useful origin. Copy it
2577 to the backing store. */
2578 stats_ocacheL1_lossage
++;
2579 inL2
= ocacheL2_find_tag( victim
->tag
);
2583 ocacheL2_add_line( victim
);
2590 /* Now we must reload the L1 cache from the backing tree, if
2592 tl_assert(tag
!= victim
->tag
); /* stay sane */
2593 inL2
= ocacheL2_find_tag( tag
);
2595 /* We're in luck. It's in the L2. */
2596 ocacheL1
->set
[setno
].line
[line
] = *inL2
;
2598 /* Missed at both levels of the cache hierarchy. We have to
2599 declare it as full of zeroes (unknown origins). */
2600 stats__ocacheL2_misses
++;
2601 zeroise_OCacheLine( &ocacheL1
->set
[setno
].line
[line
], tag
);
2604 /* Move it one forwards */
2605 moveLineForwards( &ocacheL1
->set
[setno
], line
);
2608 return &ocacheL1
->set
[setno
].line
[line
];
2611 static INLINE OCacheLine
* find_OCacheLine ( Addr a
)
2613 UWord setno
= (a
>> OC_BITS_PER_LINE
) & (OC_N_SETS
- 1);
2614 UWord tagmask
= ~((1 << OC_BITS_PER_LINE
) - 1);
2615 UWord tag
= a
& tagmask
;
2617 stats_ocacheL1_find
++;
2619 if (OC_ENABLE_ASSERTIONS
) {
2620 tl_assert(setno
>= 0 && setno
< OC_N_SETS
);
2621 tl_assert(0 == (tag
& (4 * OC_W32S_PER_LINE
- 1)));
2624 if (LIKELY(ocacheL1
->set
[setno
].line
[0].tag
== tag
)) {
2625 return &ocacheL1
->set
[setno
].line
[0];
2628 return find_OCacheLine_SLOW( a
);
2631 static INLINE
void set_aligned_word64_Origin_to_undef ( Addr a
, UInt otag
)
2633 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2634 //// Set the origins for a+0 .. a+7
2636 UWord lineoff
= oc_line_offset(a
);
2637 if (OC_ENABLE_ASSERTIONS
) {
2638 tl_assert(lineoff
>= 0
2639 && lineoff
< OC_W32S_PER_LINE
-1/*'cos 8-aligned*/);
2641 line
= find_OCacheLine( a
);
2642 line
->descr
[lineoff
+0] = 0xF;
2643 line
->descr
[lineoff
+1] = 0xF;
2644 line
->w32
[lineoff
+0] = otag
;
2645 line
->w32
[lineoff
+1] = otag
;
2647 //// END inlined, specialised version of MC_(helperc_b_store8)
2651 /*------------------------------------------------------------*/
2652 /*--- Aligned fast case permission setters, ---*/
2653 /*--- for dealing with stacks ---*/
2654 /*------------------------------------------------------------*/
2656 /*--------------------- 32-bit ---------------------*/
2658 /* Nb: by "aligned" here we mean 4-byte aligned */
2660 static INLINE
void make_aligned_word32_undefined ( Addr a
)
2662 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED
);
2664 #ifndef PERF_FAST_STACK2
2665 make_mem_undefined(a
, 4);
2671 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2672 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW
);
2673 make_mem_undefined(a
, 4);
2677 sm
= get_secmap_for_writing_low(a
);
2679 sm
->vabits8
[sm_off
] = VA_BITS8_UNDEFINED
;
2685 void make_aligned_word32_undefined_w_otag ( Addr a
, UInt otag
)
2687 make_aligned_word32_undefined(a
);
2688 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2689 //// Set the origins for a+0 .. a+3
2691 UWord lineoff
= oc_line_offset(a
);
2692 if (OC_ENABLE_ASSERTIONS
) {
2693 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
2695 line
= find_OCacheLine( a
);
2696 line
->descr
[lineoff
] = 0xF;
2697 line
->w32
[lineoff
] = otag
;
2699 //// END inlined, specialised version of MC_(helperc_b_store4)
2703 void make_aligned_word32_noaccess ( Addr a
)
2705 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS
);
2707 #ifndef PERF_FAST_STACK2
2708 MC_(make_mem_noaccess
)(a
, 4);
2714 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2715 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW
);
2716 MC_(make_mem_noaccess
)(a
, 4);
2720 sm
= get_secmap_for_writing_low(a
);
2722 sm
->vabits8
[sm_off
] = VA_BITS8_NOACCESS
;
2724 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2725 //// Set the origins for a+0 .. a+3.
2726 if (UNLIKELY( MC_(clo_mc_level
) == 3 )) {
2728 UWord lineoff
= oc_line_offset(a
);
2729 if (OC_ENABLE_ASSERTIONS
) {
2730 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
2732 line
= find_OCacheLine( a
);
2733 line
->descr
[lineoff
] = 0;
2735 //// END inlined, specialised version of MC_(helperc_b_store4)
2740 /*--------------------- 64-bit ---------------------*/
2742 /* Nb: by "aligned" here we mean 8-byte aligned */
2744 static INLINE
void make_aligned_word64_undefined ( Addr a
)
2746 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED
);
2748 #ifndef PERF_FAST_STACK2
2749 make_mem_undefined(a
, 8);
2755 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2756 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW
);
2757 make_mem_undefined(a
, 8);
2761 sm
= get_secmap_for_writing_low(a
);
2762 sm_off16
= SM_OFF_16(a
);
2763 ((UShort
*)(sm
->vabits8
))[sm_off16
] = VA_BITS16_UNDEFINED
;
2769 void make_aligned_word64_undefined_w_otag ( Addr a
, UInt otag
)
2771 make_aligned_word64_undefined(a
);
2772 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2773 //// Set the origins for a+0 .. a+7
2775 UWord lineoff
= oc_line_offset(a
);
2776 tl_assert(lineoff
>= 0
2777 && lineoff
< OC_W32S_PER_LINE
-1/*'cos 8-aligned*/);
2778 line
= find_OCacheLine( a
);
2779 line
->descr
[lineoff
+0] = 0xF;
2780 line
->descr
[lineoff
+1] = 0xF;
2781 line
->w32
[lineoff
+0] = otag
;
2782 line
->w32
[lineoff
+1] = otag
;
2784 //// END inlined, specialised version of MC_(helperc_b_store8)
2788 void make_aligned_word64_noaccess ( Addr a
)
2790 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS
);
2792 #ifndef PERF_FAST_STACK2
2793 MC_(make_mem_noaccess
)(a
, 8);
2799 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2800 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW
);
2801 MC_(make_mem_noaccess
)(a
, 8);
2805 sm
= get_secmap_for_writing_low(a
);
2806 sm_off16
= SM_OFF_16(a
);
2807 ((UShort
*)(sm
->vabits8
))[sm_off16
] = VA_BITS16_NOACCESS
;
2809 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2810 //// Clear the origins for a+0 .. a+7.
2811 if (UNLIKELY( MC_(clo_mc_level
) == 3 )) {
2813 UWord lineoff
= oc_line_offset(a
);
2814 tl_assert(lineoff
>= 0
2815 && lineoff
< OC_W32S_PER_LINE
-1/*'cos 8-aligned*/);
2816 line
= find_OCacheLine( a
);
2817 line
->descr
[lineoff
+0] = 0;
2818 line
->descr
[lineoff
+1] = 0;
2820 //// END inlined, specialised version of MC_(helperc_b_store8)
2826 /*------------------------------------------------------------*/
2827 /*--- Stack pointer adjustment ---*/
2828 /*------------------------------------------------------------*/
2830 #ifdef PERF_FAST_STACK
2833 # define MAYBE_USED __attribute__((unused))
2836 /*--------------- adjustment by 4 bytes ---------------*/
2839 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP
, UInt ecu
)
2841 UInt otag
= ecu
| MC_OKIND_STACK
;
2842 PROF_EVENT(MCPE_NEW_MEM_STACK_4
);
2843 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2844 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2846 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 4, otag
);
2851 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP
)
2853 PROF_EVENT(MCPE_NEW_MEM_STACK_4
);
2854 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2855 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
2857 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 4 );
2862 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP
)
2864 PROF_EVENT(MCPE_DIE_MEM_STACK_4
);
2865 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2866 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
2868 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-4, 4 );
2872 /*--------------- adjustment by 8 bytes ---------------*/
2875 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP
, UInt ecu
)
2877 UInt otag
= ecu
| MC_OKIND_STACK
;
2878 PROF_EVENT(MCPE_NEW_MEM_STACK_8
);
2879 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2880 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2881 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2882 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2883 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4, otag
);
2885 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 8, otag
);
2890 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP
)
2892 PROF_EVENT(MCPE_NEW_MEM_STACK_8
);
2893 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2894 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
2895 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2896 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
2897 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
2899 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 8 );
2904 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP
)
2906 PROF_EVENT(MCPE_DIE_MEM_STACK_8
);
2907 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2908 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
2909 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2910 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
2911 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
2913 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-8, 8 );
2917 /*--------------- adjustment by 12 bytes ---------------*/
2920 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP
, UInt ecu
)
2922 UInt otag
= ecu
| MC_OKIND_STACK
;
2923 PROF_EVENT(MCPE_NEW_MEM_STACK_12
);
2924 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2925 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2926 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
2927 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2928 /* from previous test we don't have 8-alignment at offset +0,
2929 hence must have 8 alignment at offsets +4/-4. Hence safe to
2930 do 4 at +0 and then 8 at +4/. */
2931 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2932 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4, otag
);
2934 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 12, otag
);
2939 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP
)
2941 PROF_EVENT(MCPE_NEW_MEM_STACK_12
);
2942 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2943 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
2944 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
2945 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2946 /* from previous test we don't have 8-alignment at offset +0,
2947 hence must have 8 alignment at offsets +4/-4. Hence safe to
2948 do 4 at +0 and then 8 at +4/. */
2949 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
2950 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
2952 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 12 );
2957 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP
)
2959 PROF_EVENT(MCPE_DIE_MEM_STACK_12
);
2960 /* Note the -12 in the test */
2961 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
-12 )) {
2962 /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
2964 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
2965 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
2966 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2967 /* We have 4-alignment at +0, but we don't have 8-alignment at
2968 -12. So we must have 8-alignment at -8. Hence do 4 at -12
2969 and then 8 at -8. */
2970 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
2971 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
2973 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-12, 12 );
2977 /*--------------- adjustment by 16 bytes ---------------*/
2980 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP
, UInt ecu
)
2982 UInt otag
= ecu
| MC_OKIND_STACK
;
2983 PROF_EVENT(MCPE_NEW_MEM_STACK_16
);
2984 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2985 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2986 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2987 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
2988 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2989 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2990 Hence do 4 at +0, 8 at +4, 4 at +12. */
2991 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2992 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 , otag
);
2993 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+12, otag
);
2995 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 16, otag
);
3000 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP
)
3002 PROF_EVENT(MCPE_NEW_MEM_STACK_16
);
3003 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3004 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
3005 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3006 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3007 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3008 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
3009 Hence do 4 at +0, 8 at +4, 4 at +12. */
3010 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3011 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
3012 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+12 );
3014 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 16 );
3019 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP
)
3021 PROF_EVENT(MCPE_DIE_MEM_STACK_16
);
3022 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3023 /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
3024 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3025 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
3026 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3027 /* 8 alignment must be at -12. Do 4 at -16, 8 at -12, 4 at -4. */
3028 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3029 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
3030 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
3032 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-16, 16 );
3036 /*--------------- adjustment by 32 bytes ---------------*/
3039 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP
, UInt ecu
)
3041 UInt otag
= ecu
| MC_OKIND_STACK
;
3042 PROF_EVENT(MCPE_NEW_MEM_STACK_32
);
3043 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3044 /* Straightforward */
3045 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3046 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 , otag
);
3047 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3048 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3049 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3050 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3052 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3053 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 , otag
);
3054 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+12, otag
);
3055 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+20, otag
);
3056 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+28, otag
);
3058 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 32, otag
);
3063 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP
)
3065 PROF_EVENT(MCPE_NEW_MEM_STACK_32
);
3066 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3067 /* Straightforward */
3068 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3069 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3070 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3071 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3072 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3073 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3075 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3076 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
3077 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+12 );
3078 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+20 );
3079 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+28 );
3081 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 32 );
3086 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP
)
3088 PROF_EVENT(MCPE_DIE_MEM_STACK_32
);
3089 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3090 /* Straightforward */
3091 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3092 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3093 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3094 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3095 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3096 /* 8 alignment must be at -4 etc. Hence do 8 at -12,-20,-28 and
3098 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3099 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-28 );
3100 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-20 );
3101 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
3102 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
3104 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-32, 32 );
3108 /*--------------- adjustment by 112 bytes ---------------*/
3111 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP
, UInt ecu
)
3113 UInt otag
= ecu
| MC_OKIND_STACK
;
3114 PROF_EVENT(MCPE_NEW_MEM_STACK_112
);
3115 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3116 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3117 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 , otag
);
3118 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3119 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3120 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3121 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3122 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3123 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3124 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3125 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3126 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3127 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3128 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3129 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3131 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 112, otag
);
3136 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP
)
3138 PROF_EVENT(MCPE_NEW_MEM_STACK_112
);
3139 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3140 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3141 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3142 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3143 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3144 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3145 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3146 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3147 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3148 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3149 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3150 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3151 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3152 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3153 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3155 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 112 );
3160 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP
)
3162 PROF_EVENT(MCPE_DIE_MEM_STACK_112
);
3163 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3164 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3165 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3166 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3167 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3168 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3169 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3170 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3171 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3172 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3173 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3174 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3175 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3176 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3177 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3179 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-112, 112 );
3183 /*--------------- adjustment by 128 bytes ---------------*/
3186 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP
, UInt ecu
)
3188 UInt otag
= ecu
| MC_OKIND_STACK
;
3189 PROF_EVENT(MCPE_NEW_MEM_STACK_128
);
3190 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3191 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3192 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 , otag
);
3193 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3194 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3195 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3196 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3197 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3198 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3199 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3200 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3201 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3202 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3203 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3204 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3205 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+112, otag
);
3206 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+120, otag
);
3208 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 128, otag
);
3213 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP
)
3215 PROF_EVENT(MCPE_NEW_MEM_STACK_128
);
3216 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3217 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3218 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3219 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3220 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3221 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3222 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3223 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3224 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3225 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3226 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3227 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3228 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3229 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3230 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3231 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+112 );
3232 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+120 );
3234 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 128 );
3239 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP
)
3241 PROF_EVENT(MCPE_DIE_MEM_STACK_128
);
3242 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3243 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-128);
3244 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-120);
3245 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3246 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3247 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3248 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3249 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3250 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3251 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3252 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3253 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3254 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3255 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3256 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3257 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3258 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3260 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-128, 128 );
3264 /*--------------- adjustment by 144 bytes ---------------*/
3267 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP
, UInt ecu
)
3269 UInt otag
= ecu
| MC_OKIND_STACK
;
3270 PROF_EVENT(MCPE_NEW_MEM_STACK_144
);
3271 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3272 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3273 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
3274 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3275 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3276 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3277 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3278 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3279 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3280 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3281 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3282 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3283 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3284 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3285 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3286 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+112, otag
);
3287 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+120, otag
);
3288 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+128, otag
);
3289 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+136, otag
);
3291 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 144, otag
);
3296 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP
)
3298 PROF_EVENT(MCPE_NEW_MEM_STACK_144
);
3299 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3300 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3301 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3302 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3303 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3304 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3305 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3306 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3307 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3308 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3309 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3310 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3311 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3312 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3313 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3314 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+112 );
3315 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+120 );
3316 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+128 );
3317 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+136 );
3319 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 144 );
3324 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP
)
3326 PROF_EVENT(MCPE_DIE_MEM_STACK_144
);
3327 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3328 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-144);
3329 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-136);
3330 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-128);
3331 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-120);
3332 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3333 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3334 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3335 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3336 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3337 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3338 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3339 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3340 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3341 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3342 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3343 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3344 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3345 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3347 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-144, 144 );
3351 /*--------------- adjustment by 160 bytes ---------------*/
3354 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP
, UInt ecu
)
3356 UInt otag
= ecu
| MC_OKIND_STACK
;
3357 PROF_EVENT(MCPE_NEW_MEM_STACK_160
);
3358 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3359 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3360 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
3361 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3362 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3363 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3364 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3365 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3366 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3367 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3368 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3369 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3370 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3371 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3372 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3373 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+112, otag
);
3374 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+120, otag
);
3375 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+128, otag
);
3376 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+136, otag
);
3377 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+144, otag
);
3378 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+152, otag
);
3380 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 160, otag
);
3385 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP
)
3387 PROF_EVENT(MCPE_NEW_MEM_STACK_160
);
3388 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3389 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3390 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3391 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3392 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3393 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3394 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3395 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3396 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3397 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3398 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3399 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3400 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3401 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3402 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3403 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+112 );
3404 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+120 );
3405 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+128 );
3406 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+136 );
3407 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+144 );
3408 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+152 );
3410 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 160 );
3415 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP
)
3417 PROF_EVENT(MCPE_DIE_MEM_STACK_160
);
3418 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3419 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-160);
3420 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-152);
3421 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-144);
3422 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-136);
3423 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-128);
3424 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-120);
3425 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3426 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3427 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3428 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3429 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3430 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3431 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3432 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3433 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3434 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3435 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3436 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3437 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3438 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3440 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-160, 160 );
3444 /*--------------- adjustment by N bytes ---------------*/
3446 static void mc_new_mem_stack_w_ECU ( Addr a
, SizeT len
, UInt ecu
)
3448 UInt otag
= ecu
| MC_OKIND_STACK
;
3449 PROF_EVENT(MCPE_NEW_MEM_STACK
);
3450 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ a
, len
, otag
);
3453 static void mc_new_mem_stack ( Addr a
, SizeT len
)
3455 PROF_EVENT(MCPE_NEW_MEM_STACK
);
3456 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ a
, len
);
3459 static void mc_die_mem_stack ( Addr a
, SizeT len
)
3461 PROF_EVENT(MCPE_DIE_MEM_STACK
);
3462 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ a
, len
);
3466 /* The AMD64 ABI says:
3468 "The 128-byte area beyond the location pointed to by %rsp is considered
3469 to be reserved and shall not be modified by signal or interrupt
3470 handlers. Therefore, functions may use this area for temporary data
3471 that is not needed across function calls. In particular, leaf functions
3472 may use this area for their entire stack frame, rather than adjusting
3473 the stack pointer in the prologue and epilogue. This area is known as
3476 So after any call or return we need to mark this redzone as containing
3479 Consider this: we're in function f. f calls g. g moves rsp down
3480 modestly (say 16 bytes) and writes stuff all over the red zone, making it
3481 defined. g returns. f is buggy and reads from parts of the red zone
3482 that it didn't write on. But because g filled that area in, f is going
3483 to be picking up defined V bits and so any errors from reading bits of
3484 the red zone it didn't write, will be missed. The only solution I could
3485 think of was to make the red zone undefined when g returns to f.
3487 This is in accordance with the ABI, which makes it clear the redzone
3488 is volatile across function calls.
3490 The problem occurs the other way round too: f could fill the RZ up
3491 with defined values and g could mistakenly read them. So the RZ
3492 also needs to be nuked on function calls.
3496 /* Here's a simple cache to hold nia -> ECU mappings. It could be
3497 improved so as to have a lower miss rate. */
3499 static UWord stats__nia_cache_queries
= 0;
3500 static UWord stats__nia_cache_misses
= 0;
3503 struct { UWord nia0
; UWord ecu0
; /* nia0 maps to ecu0 */
3504 UWord nia1
; UWord ecu1
; } /* nia1 maps to ecu1 */
3507 #define N_NIA_TO_ECU_CACHE 511
3509 static WCacheEnt nia_to_ecu_cache
[N_NIA_TO_ECU_CACHE
];
3511 static void init_nia_to_ecu_cache ( void )
3515 ExeContext
* zero_ec
;
3517 /* Fill all the slots with an entry for address zero, and the
3518 relevant otags accordingly. Hence the cache is initially filled
3520 zero_ec
= VG_(make_depth_1_ExeContext_from_Addr
)(zero_addr
);
3522 zero_ecu
= VG_(get_ECU_from_ExeContext
)(zero_ec
);
3523 tl_assert(VG_(is_plausible_ECU
)(zero_ecu
));
3524 for (i
= 0; i
< N_NIA_TO_ECU_CACHE
; i
++) {
3525 nia_to_ecu_cache
[i
].nia0
= zero_addr
;
3526 nia_to_ecu_cache
[i
].ecu0
= zero_ecu
;
3527 nia_to_ecu_cache
[i
].nia1
= zero_addr
;
3528 nia_to_ecu_cache
[i
].ecu1
= zero_ecu
;
3532 static inline UInt
convert_nia_to_ecu ( Addr nia
)
3538 tl_assert( sizeof(nia_to_ecu_cache
[0].nia1
) == sizeof(nia
) );
3540 stats__nia_cache_queries
++;
3541 i
= nia
% N_NIA_TO_ECU_CACHE
;
3542 tl_assert(i
>= 0 && i
< N_NIA_TO_ECU_CACHE
);
3544 if (LIKELY( nia_to_ecu_cache
[i
].nia0
== nia
))
3545 return nia_to_ecu_cache
[i
].ecu0
;
3547 if (LIKELY( nia_to_ecu_cache
[i
].nia1
== nia
)) {
3548 # define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3549 SWAP( nia_to_ecu_cache
[i
].nia0
, nia_to_ecu_cache
[i
].nia1
);
3550 SWAP( nia_to_ecu_cache
[i
].ecu0
, nia_to_ecu_cache
[i
].ecu1
);
3552 return nia_to_ecu_cache
[i
].ecu0
;
3555 stats__nia_cache_misses
++;
3556 ec
= VG_(make_depth_1_ExeContext_from_Addr
)(nia
);
3558 ecu
= VG_(get_ECU_from_ExeContext
)(ec
);
3559 tl_assert(VG_(is_plausible_ECU
)(ecu
));
3561 nia_to_ecu_cache
[i
].nia1
= nia_to_ecu_cache
[i
].nia0
;
3562 nia_to_ecu_cache
[i
].ecu1
= nia_to_ecu_cache
[i
].ecu0
;
3564 nia_to_ecu_cache
[i
].nia0
= nia
;
3565 nia_to_ecu_cache
[i
].ecu0
= (UWord
)ecu
;
3570 /* This marks the stack as addressible but undefined, after a call or
3571 return for a target that has an ABI defined stack redzone. It
3572 happens quite a lot and needs to be fast. This is the version for
3573 origin tracking. The non-origin-tracking version is below. */
3575 void MC_(helperc_MAKE_STACK_UNINIT_w_o
) ( Addr base
, UWord len
, Addr nia
)
3577 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_W_O
);
3579 VG_(printf
)("helperc_MAKE_STACK_UNINIT_w_o (%#lx,%lu,nia=%#lx)\n",
3582 UInt ecu
= convert_nia_to_ecu ( nia
);
3583 tl_assert(VG_(is_plausible_ECU
)(ecu
));
3585 UInt otag
= ecu
| MC_OKIND_STACK
;
3588 /* Slow(ish) version, which is fairly easily seen to be correct.
3590 if (LIKELY( VG_IS_8_ALIGNED(base
) && len
==128 )) {
3591 make_aligned_word64_undefined_w_otag(base
+ 0, otag
);
3592 make_aligned_word64_undefined_w_otag(base
+ 8, otag
);
3593 make_aligned_word64_undefined_w_otag(base
+ 16, otag
);
3594 make_aligned_word64_undefined_w_otag(base
+ 24, otag
);
3596 make_aligned_word64_undefined_w_otag(base
+ 32, otag
);
3597 make_aligned_word64_undefined_w_otag(base
+ 40, otag
);
3598 make_aligned_word64_undefined_w_otag(base
+ 48, otag
);
3599 make_aligned_word64_undefined_w_otag(base
+ 56, otag
);
3601 make_aligned_word64_undefined_w_otag(base
+ 64, otag
);
3602 make_aligned_word64_undefined_w_otag(base
+ 72, otag
);
3603 make_aligned_word64_undefined_w_otag(base
+ 80, otag
);
3604 make_aligned_word64_undefined_w_otag(base
+ 88, otag
);
3606 make_aligned_word64_undefined_w_otag(base
+ 96, otag
);
3607 make_aligned_word64_undefined_w_otag(base
+ 104, otag
);
3608 make_aligned_word64_undefined_w_otag(base
+ 112, otag
);
3609 make_aligned_word64_undefined_w_otag(base
+ 120, otag
);
3611 MC_(make_mem_undefined_w_otag
)(base
, len
, otag
);
3615 /* Idea is: go fast when
3616 * 8-aligned and length is 128
3617 * the sm is available in the main primary map
3618 * the address range falls entirely with a single secondary map
3619 If all those conditions hold, just update the V+A bits by writing
3620 directly into the vabits array. (If the sm was distinguished, this
3621 will make a copy and then write to it.)
3623 if (LIKELY( len
== 128 && VG_IS_8_ALIGNED(base
) )) {
3624 /* Now we know the address range is suitably sized and aligned. */
3625 UWord a_lo
= (UWord
)(base
);
3626 UWord a_hi
= (UWord
)(base
+ 128 - 1);
3627 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
3628 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
3629 /* Now we know the entire range is within the main primary map. */
3630 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
3631 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
3632 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
3633 /* Now we know that the entire address range falls within a
3634 single secondary map, and that that secondary 'lives' in
3635 the main primary map. */
3636 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
3637 UWord v_off
= SM_OFF(a_lo
);
3638 UShort
* p
= (UShort
*)(&sm
->vabits8
[v_off
]);
3639 p
[ 0] = VA_BITS16_UNDEFINED
;
3640 p
[ 1] = VA_BITS16_UNDEFINED
;
3641 p
[ 2] = VA_BITS16_UNDEFINED
;
3642 p
[ 3] = VA_BITS16_UNDEFINED
;
3643 p
[ 4] = VA_BITS16_UNDEFINED
;
3644 p
[ 5] = VA_BITS16_UNDEFINED
;
3645 p
[ 6] = VA_BITS16_UNDEFINED
;
3646 p
[ 7] = VA_BITS16_UNDEFINED
;
3647 p
[ 8] = VA_BITS16_UNDEFINED
;
3648 p
[ 9] = VA_BITS16_UNDEFINED
;
3649 p
[10] = VA_BITS16_UNDEFINED
;
3650 p
[11] = VA_BITS16_UNDEFINED
;
3651 p
[12] = VA_BITS16_UNDEFINED
;
3652 p
[13] = VA_BITS16_UNDEFINED
;
3653 p
[14] = VA_BITS16_UNDEFINED
;
3654 p
[15] = VA_BITS16_UNDEFINED
;
3655 set_aligned_word64_Origin_to_undef( base
+ 8 * 0, otag
);
3656 set_aligned_word64_Origin_to_undef( base
+ 8 * 1, otag
);
3657 set_aligned_word64_Origin_to_undef( base
+ 8 * 2, otag
);
3658 set_aligned_word64_Origin_to_undef( base
+ 8 * 3, otag
);
3659 set_aligned_word64_Origin_to_undef( base
+ 8 * 4, otag
);
3660 set_aligned_word64_Origin_to_undef( base
+ 8 * 5, otag
);
3661 set_aligned_word64_Origin_to_undef( base
+ 8 * 6, otag
);
3662 set_aligned_word64_Origin_to_undef( base
+ 8 * 7, otag
);
3663 set_aligned_word64_Origin_to_undef( base
+ 8 * 8, otag
);
3664 set_aligned_word64_Origin_to_undef( base
+ 8 * 9, otag
);
3665 set_aligned_word64_Origin_to_undef( base
+ 8 * 10, otag
);
3666 set_aligned_word64_Origin_to_undef( base
+ 8 * 11, otag
);
3667 set_aligned_word64_Origin_to_undef( base
+ 8 * 12, otag
);
3668 set_aligned_word64_Origin_to_undef( base
+ 8 * 13, otag
);
3669 set_aligned_word64_Origin_to_undef( base
+ 8 * 14, otag
);
3670 set_aligned_word64_Origin_to_undef( base
+ 8 * 15, otag
);
3676 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3677 if (LIKELY( len
== 288 && VG_IS_8_ALIGNED(base
) )) {
3678 /* Now we know the address range is suitably sized and aligned. */
3679 UWord a_lo
= (UWord
)(base
);
3680 UWord a_hi
= (UWord
)(base
+ 288 - 1);
3681 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
3682 if (a_hi
<= MAX_PRIMARY_ADDRESS
) {
3683 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
3684 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
3685 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
3686 /* Now we know that the entire address range falls within a
3687 single secondary map, and that that secondary 'lives' in
3688 the main primary map. */
3689 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
3690 UWord v_off
= SM_OFF(a_lo
);
3691 UShort
* p
= (UShort
*)(&sm
->vabits8
[v_off
]);
3692 p
[ 0] = VA_BITS16_UNDEFINED
;
3693 p
[ 1] = VA_BITS16_UNDEFINED
;
3694 p
[ 2] = VA_BITS16_UNDEFINED
;
3695 p
[ 3] = VA_BITS16_UNDEFINED
;
3696 p
[ 4] = VA_BITS16_UNDEFINED
;
3697 p
[ 5] = VA_BITS16_UNDEFINED
;
3698 p
[ 6] = VA_BITS16_UNDEFINED
;
3699 p
[ 7] = VA_BITS16_UNDEFINED
;
3700 p
[ 8] = VA_BITS16_UNDEFINED
;
3701 p
[ 9] = VA_BITS16_UNDEFINED
;
3702 p
[10] = VA_BITS16_UNDEFINED
;
3703 p
[11] = VA_BITS16_UNDEFINED
;
3704 p
[12] = VA_BITS16_UNDEFINED
;
3705 p
[13] = VA_BITS16_UNDEFINED
;
3706 p
[14] = VA_BITS16_UNDEFINED
;
3707 p
[15] = VA_BITS16_UNDEFINED
;
3708 p
[16] = VA_BITS16_UNDEFINED
;
3709 p
[17] = VA_BITS16_UNDEFINED
;
3710 p
[18] = VA_BITS16_UNDEFINED
;
3711 p
[19] = VA_BITS16_UNDEFINED
;
3712 p
[20] = VA_BITS16_UNDEFINED
;
3713 p
[21] = VA_BITS16_UNDEFINED
;
3714 p
[22] = VA_BITS16_UNDEFINED
;
3715 p
[23] = VA_BITS16_UNDEFINED
;
3716 p
[24] = VA_BITS16_UNDEFINED
;
3717 p
[25] = VA_BITS16_UNDEFINED
;
3718 p
[26] = VA_BITS16_UNDEFINED
;
3719 p
[27] = VA_BITS16_UNDEFINED
;
3720 p
[28] = VA_BITS16_UNDEFINED
;
3721 p
[29] = VA_BITS16_UNDEFINED
;
3722 p
[30] = VA_BITS16_UNDEFINED
;
3723 p
[31] = VA_BITS16_UNDEFINED
;
3724 p
[32] = VA_BITS16_UNDEFINED
;
3725 p
[33] = VA_BITS16_UNDEFINED
;
3726 p
[34] = VA_BITS16_UNDEFINED
;
3727 p
[35] = VA_BITS16_UNDEFINED
;
3728 set_aligned_word64_Origin_to_undef( base
+ 8 * 0, otag
);
3729 set_aligned_word64_Origin_to_undef( base
+ 8 * 1, otag
);
3730 set_aligned_word64_Origin_to_undef( base
+ 8 * 2, otag
);
3731 set_aligned_word64_Origin_to_undef( base
+ 8 * 3, otag
);
3732 set_aligned_word64_Origin_to_undef( base
+ 8 * 4, otag
);
3733 set_aligned_word64_Origin_to_undef( base
+ 8 * 5, otag
);
3734 set_aligned_word64_Origin_to_undef( base
+ 8 * 6, otag
);
3735 set_aligned_word64_Origin_to_undef( base
+ 8 * 7, otag
);
3736 set_aligned_word64_Origin_to_undef( base
+ 8 * 8, otag
);
3737 set_aligned_word64_Origin_to_undef( base
+ 8 * 9, otag
);
3738 set_aligned_word64_Origin_to_undef( base
+ 8 * 10, otag
);
3739 set_aligned_word64_Origin_to_undef( base
+ 8 * 11, otag
);
3740 set_aligned_word64_Origin_to_undef( base
+ 8 * 12, otag
);
3741 set_aligned_word64_Origin_to_undef( base
+ 8 * 13, otag
);
3742 set_aligned_word64_Origin_to_undef( base
+ 8 * 14, otag
);
3743 set_aligned_word64_Origin_to_undef( base
+ 8 * 15, otag
);
3744 set_aligned_word64_Origin_to_undef( base
+ 8 * 16, otag
);
3745 set_aligned_word64_Origin_to_undef( base
+ 8 * 17, otag
);
3746 set_aligned_word64_Origin_to_undef( base
+ 8 * 18, otag
);
3747 set_aligned_word64_Origin_to_undef( base
+ 8 * 19, otag
);
3748 set_aligned_word64_Origin_to_undef( base
+ 8 * 20, otag
);
3749 set_aligned_word64_Origin_to_undef( base
+ 8 * 21, otag
);
3750 set_aligned_word64_Origin_to_undef( base
+ 8 * 22, otag
);
3751 set_aligned_word64_Origin_to_undef( base
+ 8 * 23, otag
);
3752 set_aligned_word64_Origin_to_undef( base
+ 8 * 24, otag
);
3753 set_aligned_word64_Origin_to_undef( base
+ 8 * 25, otag
);
3754 set_aligned_word64_Origin_to_undef( base
+ 8 * 26, otag
);
3755 set_aligned_word64_Origin_to_undef( base
+ 8 * 27, otag
);
3756 set_aligned_word64_Origin_to_undef( base
+ 8 * 28, otag
);
3757 set_aligned_word64_Origin_to_undef( base
+ 8 * 29, otag
);
3758 set_aligned_word64_Origin_to_undef( base
+ 8 * 30, otag
);
3759 set_aligned_word64_Origin_to_undef( base
+ 8 * 31, otag
);
3760 set_aligned_word64_Origin_to_undef( base
+ 8 * 32, otag
);
3761 set_aligned_word64_Origin_to_undef( base
+ 8 * 33, otag
);
3762 set_aligned_word64_Origin_to_undef( base
+ 8 * 34, otag
);
3763 set_aligned_word64_Origin_to_undef( base
+ 8 * 35, otag
);
3769 /* else fall into slow case */
3770 MC_(make_mem_undefined_w_otag
)(base
, len
, otag
);
3774 /* This is a version of MC_(helperc_MAKE_STACK_UNINIT_w_o) that is
3775 specialised for the non-origin-tracking case. */
3777 void MC_(helperc_MAKE_STACK_UNINIT_no_o
) ( Addr base
, UWord len
)
3779 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_NO_O
);
3781 VG_(printf
)("helperc_MAKE_STACK_UNINIT_no_o (%#lx,%lu)\n",
3785 /* Slow(ish) version, which is fairly easily seen to be correct.
3787 if (LIKELY( VG_IS_8_ALIGNED(base
) && len
==128 )) {
3788 make_aligned_word64_undefined(base
+ 0);
3789 make_aligned_word64_undefined(base
+ 8);
3790 make_aligned_word64_undefined(base
+ 16);
3791 make_aligned_word64_undefined(base
+ 24);
3793 make_aligned_word64_undefined(base
+ 32);
3794 make_aligned_word64_undefined(base
+ 40);
3795 make_aligned_word64_undefined(base
+ 48);
3796 make_aligned_word64_undefined(base
+ 56);
3798 make_aligned_word64_undefined(base
+ 64);
3799 make_aligned_word64_undefined(base
+ 72);
3800 make_aligned_word64_undefined(base
+ 80);
3801 make_aligned_word64_undefined(base
+ 88);
3803 make_aligned_word64_undefined(base
+ 96);
3804 make_aligned_word64_undefined(base
+ 104);
3805 make_aligned_word64_undefined(base
+ 112);
3806 make_aligned_word64_undefined(base
+ 120);
3808 make_mem_undefined(base
, len
);
3812 /* Idea is: go fast when
3813 * 8-aligned and length is 128
3814 * the sm is available in the main primary map
3815 * the address range falls entirely with a single secondary map
3816 If all those conditions hold, just update the V+A bits by writing
3817 directly into the vabits array. (If the sm was distinguished, this
3818 will make a copy and then write to it.)
3820 if (LIKELY( len
== 128 && VG_IS_8_ALIGNED(base
) )) {
3821 /* Now we know the address range is suitably sized and aligned. */
3822 UWord a_lo
= (UWord
)(base
);
3823 UWord a_hi
= (UWord
)(base
+ 128 - 1);
3824 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
3825 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
3826 /* Now we know the entire range is within the main primary map. */
3827 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
3828 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
3829 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
3830 /* Now we know that the entire address range falls within a
3831 single secondary map, and that that secondary 'lives' in
3832 the main primary map. */
3833 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
3834 UWord v_off
= SM_OFF(a_lo
);
3835 UShort
* p
= (UShort
*)(&sm
->vabits8
[v_off
]);
3836 p
[ 0] = VA_BITS16_UNDEFINED
;
3837 p
[ 1] = VA_BITS16_UNDEFINED
;
3838 p
[ 2] = VA_BITS16_UNDEFINED
;
3839 p
[ 3] = VA_BITS16_UNDEFINED
;
3840 p
[ 4] = VA_BITS16_UNDEFINED
;
3841 p
[ 5] = VA_BITS16_UNDEFINED
;
3842 p
[ 6] = VA_BITS16_UNDEFINED
;
3843 p
[ 7] = VA_BITS16_UNDEFINED
;
3844 p
[ 8] = VA_BITS16_UNDEFINED
;
3845 p
[ 9] = VA_BITS16_UNDEFINED
;
3846 p
[10] = VA_BITS16_UNDEFINED
;
3847 p
[11] = VA_BITS16_UNDEFINED
;
3848 p
[12] = VA_BITS16_UNDEFINED
;
3849 p
[13] = VA_BITS16_UNDEFINED
;
3850 p
[14] = VA_BITS16_UNDEFINED
;
3851 p
[15] = VA_BITS16_UNDEFINED
;
3857 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3858 if (LIKELY( len
== 288 && VG_IS_8_ALIGNED(base
) )) {
3859 /* Now we know the address range is suitably sized and aligned. */
3860 UWord a_lo
= (UWord
)(base
);
3861 UWord a_hi
= (UWord
)(base
+ 288 - 1);
3862 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
3863 if (a_hi
<= MAX_PRIMARY_ADDRESS
) {
3864 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
3865 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
3866 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
3867 /* Now we know that the entire address range falls within a
3868 single secondary map, and that that secondary 'lives' in
3869 the main primary map. */
3870 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
3871 UWord v_off
= SM_OFF(a_lo
);
3872 UShort
* p
= (UShort
*)(&sm
->vabits8
[v_off
]);
3873 p
[ 0] = VA_BITS16_UNDEFINED
;
3874 p
[ 1] = VA_BITS16_UNDEFINED
;
3875 p
[ 2] = VA_BITS16_UNDEFINED
;
3876 p
[ 3] = VA_BITS16_UNDEFINED
;
3877 p
[ 4] = VA_BITS16_UNDEFINED
;
3878 p
[ 5] = VA_BITS16_UNDEFINED
;
3879 p
[ 6] = VA_BITS16_UNDEFINED
;
3880 p
[ 7] = VA_BITS16_UNDEFINED
;
3881 p
[ 8] = VA_BITS16_UNDEFINED
;
3882 p
[ 9] = VA_BITS16_UNDEFINED
;
3883 p
[10] = VA_BITS16_UNDEFINED
;
3884 p
[11] = VA_BITS16_UNDEFINED
;
3885 p
[12] = VA_BITS16_UNDEFINED
;
3886 p
[13] = VA_BITS16_UNDEFINED
;
3887 p
[14] = VA_BITS16_UNDEFINED
;
3888 p
[15] = VA_BITS16_UNDEFINED
;
3889 p
[16] = VA_BITS16_UNDEFINED
;
3890 p
[17] = VA_BITS16_UNDEFINED
;
3891 p
[18] = VA_BITS16_UNDEFINED
;
3892 p
[19] = VA_BITS16_UNDEFINED
;
3893 p
[20] = VA_BITS16_UNDEFINED
;
3894 p
[21] = VA_BITS16_UNDEFINED
;
3895 p
[22] = VA_BITS16_UNDEFINED
;
3896 p
[23] = VA_BITS16_UNDEFINED
;
3897 p
[24] = VA_BITS16_UNDEFINED
;
3898 p
[25] = VA_BITS16_UNDEFINED
;
3899 p
[26] = VA_BITS16_UNDEFINED
;
3900 p
[27] = VA_BITS16_UNDEFINED
;
3901 p
[28] = VA_BITS16_UNDEFINED
;
3902 p
[29] = VA_BITS16_UNDEFINED
;
3903 p
[30] = VA_BITS16_UNDEFINED
;
3904 p
[31] = VA_BITS16_UNDEFINED
;
3905 p
[32] = VA_BITS16_UNDEFINED
;
3906 p
[33] = VA_BITS16_UNDEFINED
;
3907 p
[34] = VA_BITS16_UNDEFINED
;
3908 p
[35] = VA_BITS16_UNDEFINED
;
3914 /* else fall into slow case */
3915 make_mem_undefined(base
, len
);
3919 /* And this is an even more specialised case, for the case where there
3920 is no origin tracking, and the length is 128. */
3922 void MC_(helperc_MAKE_STACK_UNINIT_128_no_o
) ( Addr base
)
3924 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O
);
3926 VG_(printf
)("helperc_MAKE_STACK_UNINIT_128_no_o (%#lx)\n", base
);
3929 /* Slow(ish) version, which is fairly easily seen to be correct.
3931 if (LIKELY( VG_IS_8_ALIGNED(base
) )) {
3932 make_aligned_word64_undefined(base
+ 0);
3933 make_aligned_word64_undefined(base
+ 8);
3934 make_aligned_word64_undefined(base
+ 16);
3935 make_aligned_word64_undefined(base
+ 24);
3937 make_aligned_word64_undefined(base
+ 32);
3938 make_aligned_word64_undefined(base
+ 40);
3939 make_aligned_word64_undefined(base
+ 48);
3940 make_aligned_word64_undefined(base
+ 56);
3942 make_aligned_word64_undefined(base
+ 64);
3943 make_aligned_word64_undefined(base
+ 72);
3944 make_aligned_word64_undefined(base
+ 80);
3945 make_aligned_word64_undefined(base
+ 88);
3947 make_aligned_word64_undefined(base
+ 96);
3948 make_aligned_word64_undefined(base
+ 104);
3949 make_aligned_word64_undefined(base
+ 112);
3950 make_aligned_word64_undefined(base
+ 120);
3952 make_mem_undefined(base
, 128);
3956 /* Idea is: go fast when
3957 * 16-aligned and length is 128
3958 * the sm is available in the main primary map
3959 * the address range falls entirely with a single secondary map
3960 If all those conditions hold, just update the V+A bits by writing
3961 directly into the vabits array. (If the sm was distinguished, this
3962 will make a copy and then write to it.)
3964 Typically this applies to amd64 'ret' instructions, since RSP is
3965 16-aligned (0 % 16) after the instruction (per the amd64-ELF ABI).
3967 if (LIKELY( VG_IS_16_ALIGNED(base
) )) {
3968 /* Now we know the address range is suitably sized and aligned. */
3969 UWord a_lo
= (UWord
)(base
);
3970 UWord a_hi
= (UWord
)(base
+ 128 - 1);
3971 /* FIXME: come up with a sane story on the wraparound case
3972 (which of course cnanot happen, but still..) */
3973 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
3974 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
3975 /* Now we know the entire range is within the main primary map. */
3976 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
3977 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
3978 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
3979 /* Now we know that the entire address range falls within a
3980 single secondary map, and that that secondary 'lives' in
3981 the main primary map. */
3982 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16
);
3983 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
3984 UWord v_off
= SM_OFF(a_lo
);
3985 UInt
* w32
= (UInt
*)(&sm
->vabits8
[v_off
]);
3986 w32
[ 0] = VA_BITS32_UNDEFINED
;
3987 w32
[ 1] = VA_BITS32_UNDEFINED
;
3988 w32
[ 2] = VA_BITS32_UNDEFINED
;
3989 w32
[ 3] = VA_BITS32_UNDEFINED
;
3990 w32
[ 4] = VA_BITS32_UNDEFINED
;
3991 w32
[ 5] = VA_BITS32_UNDEFINED
;
3992 w32
[ 6] = VA_BITS32_UNDEFINED
;
3993 w32
[ 7] = VA_BITS32_UNDEFINED
;
3999 /* The same, but for when base is 8 % 16, which is the situation
4000 with RSP for amd64-ELF immediately after call instructions.
4002 if (LIKELY( VG_IS_16_ALIGNED(base
+8) )) { // restricts to 8 aligned
4003 /* Now we know the address range is suitably sized and aligned. */
4004 UWord a_lo
= (UWord
)(base
);
4005 UWord a_hi
= (UWord
)(base
+ 128 - 1);
4006 /* FIXME: come up with a sane story on the wraparound case
4007 (which of course cnanot happen, but still..) */
4008 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
4009 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
4010 /* Now we know the entire range is within the main primary map. */
4011 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
4012 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
4013 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
4014 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8
);
4015 /* Now we know that the entire address range falls within a
4016 single secondary map, and that that secondary 'lives' in
4017 the main primary map. */
4018 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
4019 UWord v_off
= SM_OFF(a_lo
);
4020 UShort
* w16
= (UShort
*)(&sm
->vabits8
[v_off
]);
4021 UInt
* w32
= (UInt
*)(&w16
[1]);
4022 /* The following assertion is commented out for obvious
4023 performance reasons, but was verified as valid when
4024 running the entire testsuite and also Firefox. */
4025 /* tl_assert(VG_IS_4_ALIGNED(w32)); */
4026 w16
[ 0] = VA_BITS16_UNDEFINED
; // w16[0]
4027 w32
[ 0] = VA_BITS32_UNDEFINED
; // w16[1,2]
4028 w32
[ 1] = VA_BITS32_UNDEFINED
; // w16[3,4]
4029 w32
[ 2] = VA_BITS32_UNDEFINED
; // w16[5,6]
4030 w32
[ 3] = VA_BITS32_UNDEFINED
; // w16[7,8]
4031 w32
[ 4] = VA_BITS32_UNDEFINED
; // w16[9,10]
4032 w32
[ 5] = VA_BITS32_UNDEFINED
; // w16[11,12]
4033 w32
[ 6] = VA_BITS32_UNDEFINED
; // w16[13,14]
4034 w16
[15] = VA_BITS16_UNDEFINED
; // w16[15]
4040 /* else fall into slow case */
4041 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE
);
4042 make_mem_undefined(base
, 128);
4046 /*------------------------------------------------------------*/
4047 /*--- Checking memory ---*/
4048 /*------------------------------------------------------------*/
4059 /* Check permissions for address range. If inadequate permissions
4060 exist, *bad_addr is set to the offending address, so the caller can
4063 /* Returns True if [a .. a+len) is not addressible. Otherwise,
4064 returns False, and if bad_addr is non-NULL, sets *bad_addr to
4065 indicate the lowest failing address. Functions below are
4067 Bool
MC_(check_mem_is_noaccess
) ( Addr a
, SizeT len
, Addr
* bad_addr
)
4072 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS
);
4073 for (i
= 0; i
< len
; i
++) {
4074 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS_LOOP
);
4075 vabits2
= get_vabits2(a
);
4076 if (VA_BITS2_NOACCESS
!= vabits2
) {
4077 if (bad_addr
!= NULL
) *bad_addr
= a
;
4085 static Bool
is_mem_addressable ( Addr a
, SizeT len
,
4086 /*OUT*/Addr
* bad_addr
)
4091 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE
);
4092 for (i
= 0; i
< len
; i
++) {
4093 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE_LOOP
);
4094 vabits2
= get_vabits2(a
);
4095 if (VA_BITS2_NOACCESS
== vabits2
) {
4096 if (bad_addr
!= NULL
) *bad_addr
= a
;
4104 static MC_ReadResult
is_mem_defined ( Addr a
, SizeT len
,
4105 /*OUT*/Addr
* bad_addr
,
4111 PROF_EVENT(MCPE_IS_MEM_DEFINED
);
4112 DEBUG("is_mem_defined\n");
4114 if (otag
) *otag
= 0;
4115 if (bad_addr
) *bad_addr
= 0;
4116 for (i
= 0; i
< len
; i
++) {
4117 PROF_EVENT(MCPE_IS_MEM_DEFINED_LOOP
);
4118 vabits2
= get_vabits2(a
);
4119 if (VA_BITS2_DEFINED
!= vabits2
) {
4120 // Error! Nb: Report addressability errors in preference to
4121 // definedness errors. And don't report definedeness errors unless
4122 // --undef-value-errors=yes.
4126 if (VA_BITS2_NOACCESS
== vabits2
) {
4129 if (MC_(clo_mc_level
) >= 2) {
4130 if (otag
&& MC_(clo_mc_level
) == 3) {
4131 *otag
= MC_(helperc_b_load1
)( a
);
4142 /* Like is_mem_defined but doesn't give up at the first uninitialised
4143 byte -- the entire range is always checked. This is important for
4144 detecting errors in the case where a checked range strays into
4145 invalid memory, but that fact is not detected by the ordinary
4146 is_mem_defined(), because of an undefined section that precedes the
4147 out of range section, possibly as a result of an alignment hole in
4148 the checked data. This version always checks the entire range and
4149 can report both a definedness and an accessbility error, if
4151 static void is_mem_defined_comprehensive (
4153 /*OUT*/Bool
* errorV
, /* is there a definedness err? */
4154 /*OUT*/Addr
* bad_addrV
, /* if so where? */
4155 /*OUT*/UInt
* otagV
, /* and what's its otag? */
4156 /*OUT*/Bool
* errorA
, /* is there an addressability err? */
4157 /*OUT*/Addr
* bad_addrA
/* if so where? */
4162 Bool already_saw_errV
= False
;
4164 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE
);
4165 DEBUG("is_mem_defined_comprehensive\n");
4167 tl_assert(!(*errorV
|| *errorA
));
4169 for (i
= 0; i
< len
; i
++) {
4170 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP
);
4171 vabits2
= get_vabits2(a
);
4173 case VA_BITS2_DEFINED
:
4176 case VA_BITS2_UNDEFINED
:
4177 case VA_BITS2_PARTDEFINED
:
4178 if (!already_saw_errV
) {
4181 if (MC_(clo_mc_level
) == 3) {
4182 *otagV
= MC_(helperc_b_load1
)( a
);
4186 already_saw_errV
= True
;
4188 a
++; /* keep going */
4190 case VA_BITS2_NOACCESS
:
4193 return; /* give up now. */
4201 /* Check a zero-terminated ascii string. Tricky -- don't want to
4202 examine the actual bytes, to find the end, until we're sure it is
4205 static Bool
mc_is_defined_asciiz ( Addr a
, Addr
* bad_addr
, UInt
* otag
)
4209 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ
);
4210 DEBUG("mc_is_defined_asciiz\n");
4212 if (otag
) *otag
= 0;
4213 if (bad_addr
) *bad_addr
= 0;
4215 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ_LOOP
);
4216 vabits2
= get_vabits2(a
);
4217 if (VA_BITS2_DEFINED
!= vabits2
) {
4218 // Error! Nb: Report addressability errors in preference to
4219 // definedness errors. And don't report definedeness errors unless
4220 // --undef-value-errors=yes.
4224 if (VA_BITS2_NOACCESS
== vabits2
) {
4227 if (MC_(clo_mc_level
) >= 2) {
4228 if (otag
&& MC_(clo_mc_level
) == 3) {
4229 *otag
= MC_(helperc_b_load1
)( a
);
4234 /* Ok, a is safe to read. */
4235 if (* ((UChar
*)a
) == 0) {
4243 /*------------------------------------------------------------*/
4244 /*--- Memory event handlers ---*/
4245 /*------------------------------------------------------------*/
4248 void check_mem_is_addressable ( CorePart part
, ThreadId tid
, const HChar
* s
,
4249 Addr base
, SizeT size
)
4252 Bool ok
= is_mem_addressable ( base
, size
, &bad_addr
);
4256 case Vg_CoreSysCall
:
4257 MC_(record_memparam_error
) ( tid
, bad_addr
,
4258 /*isAddrErr*/True
, s
, 0/*otag*/ );
4262 MC_(record_core_mem_error
)( tid
, s
);
4266 VG_(tool_panic
)("check_mem_is_addressable: unexpected CorePart");
4272 void check_mem_is_defined ( CorePart part
, ThreadId tid
, const HChar
* s
,
4273 Addr base
, SizeT size
)
4277 MC_ReadResult res
= is_mem_defined ( base
, size
, &bad_addr
, &otag
);
4280 Bool isAddrErr
= ( MC_AddrErr
== res
? True
: False
);
4283 case Vg_CoreSysCall
:
4284 MC_(record_memparam_error
) ( tid
, bad_addr
, isAddrErr
, s
,
4285 isAddrErr
? 0 : otag
);
4288 case Vg_CoreSysCallArgInMem
:
4289 MC_(record_regparam_error
) ( tid
, s
, otag
);
4292 /* If we're being asked to jump to a silly address, record an error
4293 message before potentially crashing the entire system. */
4294 case Vg_CoreTranslate
:
4295 MC_(record_jump_error
)( tid
, bad_addr
);
4299 VG_(tool_panic
)("check_mem_is_defined: unexpected CorePart");
4305 void check_mem_is_defined_asciiz ( CorePart part
, ThreadId tid
,
4306 const HChar
* s
, Addr str
)
4309 Addr bad_addr
= 0; // shut GCC up
4312 tl_assert(part
== Vg_CoreSysCall
);
4313 res
= mc_is_defined_asciiz ( (Addr
)str
, &bad_addr
, &otag
);
4315 Bool isAddrErr
= ( MC_AddrErr
== res
? True
: False
);
4316 MC_(record_memparam_error
) ( tid
, bad_addr
, isAddrErr
, s
,
4317 isAddrErr
? 0 : otag
);
4321 /* Handling of mmap and mprotect is not as simple as it seems.
4323 The underlying semantics are that memory obtained from mmap is
4324 always initialised, but may be inaccessible. And changes to the
4325 protection of memory do not change its contents and hence not its
4326 definedness state. Problem is we can't model
4327 inaccessible-but-with-some-definedness state; once we mark memory
4328 as inaccessible we lose all info about definedness, and so can't
4329 restore that if it is later made accessible again.
4331 One obvious thing to do is this:
4333 mmap/mprotect NONE -> noaccess
4334 mmap/mprotect other -> defined
4336 The problem case here is: taking accessible memory, writing
4337 uninitialised data to it, mprotecting it NONE and later mprotecting
4338 it back to some accessible state causes the undefinedness to be
4341 A better proposal is:
4343 (1) mmap NONE -> make noaccess
4344 (2) mmap other -> make defined
4346 (3) mprotect NONE -> # no change
4347 (4) mprotect other -> change any "noaccess" to "defined"
4349 (2) is OK because memory newly obtained from mmap really is defined
4350 (zeroed out by the kernel -- doing anything else would
4351 constitute a massive security hole.)
4353 (1) is OK because the only way to make the memory usable is via
4354 (4), in which case we also wind up correctly marking it all as
4357 (3) is the weak case. We choose not to change memory state.
4358 (presumably the range is in some mixture of "defined" and
4359 "undefined", viz, accessible but with arbitrary V bits). Doing
4360 nothing means we retain the V bits, so that if the memory is
4361 later mprotected "other", the V bits remain unchanged, so there
4362 can be no false negatives. The bad effect is that if there's
4363 an access in the area, then MC cannot warn; but at least we'll
4364 get a SEGV to show, so it's better than nothing.
4366 Consider the sequence (3) followed by (4). Any memory that was
4367 "defined" or "undefined" previously retains its state (as
4368 required). Any memory that was "noaccess" before can only have
4369 been made that way by (1), and so it's OK to change it to
4372 See https://bugs.kde.org/show_bug.cgi?id=205541
4373 and https://bugs.kde.org/show_bug.cgi?id=210268
4376 void mc_new_mem_mmap ( Addr a
, SizeT len
, Bool rr
, Bool ww
, Bool xx
,
4379 if (rr
|| ww
|| xx
) {
4380 /* (2) mmap/mprotect other -> defined */
4381 MC_(make_mem_defined
)(a
, len
);
4383 /* (1) mmap/mprotect NONE -> noaccess */
4384 MC_(make_mem_noaccess
)(a
, len
);
4389 void mc_new_mem_mprotect ( Addr a
, SizeT len
, Bool rr
, Bool ww
, Bool xx
)
4391 if (rr
|| ww
|| xx
) {
4392 /* (4) mprotect other -> change any "noaccess" to "defined" */
4393 make_mem_defined_if_noaccess(a
, len
);
4395 /* (3) mprotect NONE -> # no change */
4402 void mc_new_mem_startup( Addr a
, SizeT len
,
4403 Bool rr
, Bool ww
, Bool xx
, ULong di_handle
)
4405 // Because code is defined, initialised variables get put in the data
4406 // segment and are defined, and uninitialised variables get put in the
4407 // bss segment and are auto-zeroed (and so defined).
4409 // It's possible that there will be padding between global variables.
4410 // This will also be auto-zeroed, and marked as defined by Memcheck. If
4411 // a program uses it, Memcheck will not complain. This is arguably a
4412 // false negative, but it's a grey area -- the behaviour is defined (the
4413 // padding is zeroed) but it's probably not what the user intended. And
4414 // we can't avoid it.
4416 // Note: we generally ignore RWX permissions, because we can't track them
4417 // without requiring more than one A bit which would slow things down a
4418 // lot. But on Darwin the 0th page is mapped but !R and !W and !X.
4419 // So we mark any such pages as "unaddressable".
4420 DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
4421 a
, (ULong
)len
, rr
, ww
, xx
);
4422 mc_new_mem_mmap(a
, len
, rr
, ww
, xx
, di_handle
);
4426 void mc_post_mem_write(CorePart part
, ThreadId tid
, Addr a
, SizeT len
)
4428 MC_(make_mem_defined
)(a
, len
);
4432 /*------------------------------------------------------------*/
4433 /*--- Register event handlers ---*/
4434 /*------------------------------------------------------------*/
4436 /* Try and get a nonzero origin for the guest state section of thread
4437 tid characterised by (offset,size). Return 0 if nothing to show
4439 static UInt
mb_get_origin_for_guest_offset ( ThreadId tid
,
4440 Int offset
, SizeT size
)
4445 sh2off
= MC_(get_otrack_shadow_offset
)( offset
, size
);
4447 return 0; /* This piece of guest state is not tracked */
4448 tl_assert(sh2off
>= 0);
4449 tl_assert(0 == (sh2off
% 4));
4450 area
[0] = 0x31313131;
4451 area
[2] = 0x27272727;
4452 VG_(get_shadow_regs_area
)( tid
, (UChar
*)&area
[1], 2/*shadowno*/,sh2off
,4 );
4453 tl_assert(area
[0] == 0x31313131);
4454 tl_assert(area
[2] == 0x27272727);
4460 /* When some chunk of guest state is written, mark the corresponding
4461 shadow area as valid. This is used to initialise arbitrarily large
4462 chunks of guest state, hence the _SIZE value, which has to be as
4463 big as the biggest guest state.
4465 static void mc_post_reg_write ( CorePart part
, ThreadId tid
,
4466 PtrdiffT offset
, SizeT size
)
4468 # define MAX_REG_WRITE_SIZE 1712
4469 UChar area
[MAX_REG_WRITE_SIZE
];
4470 tl_assert(size
<= MAX_REG_WRITE_SIZE
);
4471 VG_(memset
)(area
, V_BITS8_DEFINED
, size
);
4472 VG_(set_shadow_regs_area
)( tid
, 1/*shadowNo*/,offset
,size
, area
);
4473 # undef MAX_REG_WRITE_SIZE
4477 void mc_post_reg_write_clientcall ( ThreadId tid
,
4478 PtrdiffT offset
, SizeT size
, Addr f
)
4480 mc_post_reg_write(/*dummy*/0, tid
, offset
, size
);
4483 /* Look at the definedness of the guest's shadow state for
4484 [offset, offset+len). If any part of that is undefined, record
4487 static void mc_pre_reg_read ( CorePart part
, ThreadId tid
, const HChar
* s
,
4488 PtrdiffT offset
, SizeT size
)
4495 tl_assert(size
<= 16);
4497 VG_(get_shadow_regs_area
)( tid
, area
, 1/*shadowNo*/,offset
,size
);
4500 for (i
= 0; i
< size
; i
++) {
4501 if (area
[i
] != V_BITS8_DEFINED
) {
4510 /* We've found some undefinedness. See if we can also find an
4512 otag
= mb_get_origin_for_guest_offset( tid
, offset
, size
);
4513 MC_(record_regparam_error
) ( tid
, s
, otag
);
4517 /*------------------------------------------------------------*/
4518 /*--- Register-memory event handlers ---*/
4519 /*------------------------------------------------------------*/
4521 static void mc_copy_mem_to_reg ( CorePart part
, ThreadId tid
, Addr a
,
4522 PtrdiffT guest_state_offset
, SizeT size
)
4530 for (i
= 0; i
< size
; i
++) {
4531 get_vbits8( a
+i
, &vbits8
);
4532 VG_(set_shadow_regs_area
)( tid
, 1/*shadowNo*/, guest_state_offset
+i
,
4536 if (MC_(clo_mc_level
) != 3)
4539 /* Track origins. */
4540 offset
= MC_(get_otrack_shadow_offset
)( guest_state_offset
, size
);
4546 d32
= MC_(helperc_b_load1
)( a
);
4549 d32
= MC_(helperc_b_load2
)( a
);
4552 d32
= MC_(helperc_b_load4
)( a
);
4555 d32
= MC_(helperc_b_load8
)( a
);
4558 d32
= MC_(helperc_b_load16
)( a
);
4561 d32
= MC_(helperc_b_load32
)( a
);
4567 VG_(set_shadow_regs_area
)( tid
, 2/*shadowNo*/, offset
, 4, (UChar
*)&d32
);
4570 static void mc_copy_reg_to_mem ( CorePart part
, ThreadId tid
,
4571 PtrdiffT guest_state_offset
, Addr a
,
4580 for (i
= 0; i
< size
; i
++) {
4581 VG_(get_shadow_regs_area
)( tid
, &vbits8
, 1/*shadowNo*/,
4582 guest_state_offset
+i
, 1 );
4583 set_vbits8( a
+i
, vbits8
);
4586 if (MC_(clo_mc_level
) != 3)
4589 /* Track origins. */
4590 offset
= MC_(get_otrack_shadow_offset
)( guest_state_offset
, size
);
4594 VG_(get_shadow_regs_area
)( tid
, (UChar
*)&d32
, 2/*shadowNo*/, offset
, 4 );
4597 MC_(helperc_b_store1
)( a
, d32
);
4600 MC_(helperc_b_store2
)( a
, d32
);
4603 MC_(helperc_b_store4
)( a
, d32
);
4606 MC_(helperc_b_store8
)( a
, d32
);
4609 MC_(helperc_b_store16
)( a
, d32
);
4612 MC_(helperc_b_store32
)( a
, d32
);
4620 /*------------------------------------------------------------*/
4621 /*--- Some static assertions ---*/
4622 /*------------------------------------------------------------*/
4624 /* The handwritten assembly helpers below have baked-in assumptions
4625 about various constant values. These assertions attempt to make
4626 that a bit safer by checking those values and flagging changes that
4627 would make the assembly invalid. Not perfect but it's better than
4630 STATIC_ASSERT(SM_CHUNKS
* 4 == 65536);
4632 STATIC_ASSERT(VA_BITS8_DEFINED
== 0xAA);
4633 STATIC_ASSERT(VA_BITS8_UNDEFINED
== 0x55);
4635 STATIC_ASSERT(V_BITS32_DEFINED
== 0x00000000);
4636 STATIC_ASSERT(V_BITS32_UNDEFINED
== 0xFFFFFFFF);
4638 STATIC_ASSERT(VA_BITS4_DEFINED
== 0xA);
4639 STATIC_ASSERT(VA_BITS4_UNDEFINED
== 0x5);
4641 STATIC_ASSERT(V_BITS16_DEFINED
== 0x0000);
4642 STATIC_ASSERT(V_BITS16_UNDEFINED
== 0xFFFF);
4644 STATIC_ASSERT(VA_BITS2_DEFINED
== 2);
4645 STATIC_ASSERT(VA_BITS2_UNDEFINED
== 1);
4647 STATIC_ASSERT(V_BITS8_DEFINED
== 0x00);
4648 STATIC_ASSERT(V_BITS8_UNDEFINED
== 0xFF);
4651 /*------------------------------------------------------------*/
4652 /*--- Functions called directly from generated code: ---*/
4653 /*--- Load/store handlers. ---*/
4654 /*------------------------------------------------------------*/
4656 /* Types: LOADV32, LOADV16, LOADV8 are:
4658 so they return 32-bits on 32-bit machines and 64-bits on
4659 64-bit machines. Addr has the same size as a host word.
4661 LOADV64 is always ULong fn ( Addr a )
4663 Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
4664 are a UWord, and for STOREV64 they are a ULong.
4667 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
4668 naturally '_sz/8'-aligned, or it exceeds the range covered by the
4669 primary map. This is all very tricky (and important!), so let's
4670 work through the maths by hand (below), *and* assert for these
4671 values at startup. */
4672 #define MASK(_szInBytes) \
4673 ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
4675 /* MASK only exists so as to define this macro. */
4676 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
4677 ((_a) & MASK((_szInBits>>3)))
4679 /* On a 32-bit machine:
4681 N_PRIMARY_BITS == 16, so
4682 N_PRIMARY_MAP == 0x10000, so
4683 N_PRIMARY_MAP-1 == 0xFFFF, so
4684 (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
4686 MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
4687 = ~ ( 0xFFFF | 0xFFFF0000 )
4691 MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
4692 = ~ ( 0xFFFE | 0xFFFF0000 )
4696 MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
4697 = ~ ( 0xFFFC | 0xFFFF0000 )
4701 MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
4702 = ~ ( 0xFFF8 | 0xFFFF0000 )
4706 Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
4707 precisely when a is not 1/2/4/8-bytes aligned. And obviously, for
4708 the 1-byte alignment case, it is always a zero value, since MASK(1)
4709 is zero. All as expected.
4711 On a 64-bit machine, it's more complex, since we're testing
4712 simultaneously for misalignment and for the address being at or
4715 N_PRIMARY_BITS == 20, so
4716 N_PRIMARY_MAP == 0x100000, so
4717 N_PRIMARY_MAP-1 == 0xFFFFF, so
4718 (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
4720 MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
4721 = ~ ( 0xFFFF | 0xF'FFFF'0000 )
4723 = 0xFFFF'FFF0'0000'0000
4725 MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
4726 = ~ ( 0xFFFE | 0xF'FFFF'0000 )
4728 = 0xFFFF'FFF0'0000'0001
4730 MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
4731 = ~ ( 0xFFFC | 0xF'FFFF'0000 )
4733 = 0xFFFF'FFF0'0000'0003
4735 MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
4736 = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
4738 = 0xFFFF'FFF0'0000'0007
4741 /*------------------------------------------------------------*/
4742 /*--- LOADV256 and LOADV128 ---*/
4743 /*------------------------------------------------------------*/
4746 void mc_LOADV_128_or_256 ( /*OUT*/ULong
* res
,
4747 Addr a
, SizeT nBits
, Bool isBigEndian
)
4749 PROF_EVENT(MCPE_LOADV_128_OR_256
);
4751 #ifndef PERF_FAST_LOADV
4752 mc_LOADV_128_or_256_slow( res
, a
, nBits
, isBigEndian
);
4756 UWord sm_off16
, vabits16
, j
;
4757 UWord nBytes
= nBits
/ 8;
4758 UWord nULongs
= nBytes
/ 8;
4761 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,nBits
) )) {
4762 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW1
);
4763 mc_LOADV_128_or_256_slow( res
, a
, nBits
, isBigEndian
);
4767 /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
4768 suitably aligned, is mapped, and addressible. */
4769 for (j
= 0; j
< nULongs
; j
++) {
4770 sm
= get_secmap_for_reading_low(a
+ 8*j
);
4771 sm_off16
= SM_OFF_16(a
+ 8*j
);
4772 vabits16
= ((UShort
*)(sm
->vabits8
))[sm_off16
];
4774 // Convert V bits from compact memory form to expanded
4776 if (LIKELY(vabits16
== VA_BITS16_DEFINED
)) {
4777 res
[j
] = V_BITS64_DEFINED
;
4778 } else if (LIKELY(vabits16
== VA_BITS16_UNDEFINED
)) {
4779 res
[j
] = V_BITS64_UNDEFINED
;
4781 /* Slow case: some block of 8 bytes are not all-defined or
4783 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW2
);
4784 mc_LOADV_128_or_256_slow( res
, a
, nBits
, isBigEndian
);
4793 VG_REGPARM(2) void MC_(helperc_LOADV256be
) ( /*OUT*/V256
* res
, Addr a
)
4795 mc_LOADV_128_or_256(&res
->w64
[0], a
, 256, True
);
4797 VG_REGPARM(2) void MC_(helperc_LOADV256le
) ( /*OUT*/V256
* res
, Addr a
)
4799 mc_LOADV_128_or_256(&res
->w64
[0], a
, 256, False
);
4802 VG_REGPARM(2) void MC_(helperc_LOADV128be
) ( /*OUT*/V128
* res
, Addr a
)
4804 mc_LOADV_128_or_256(&res
->w64
[0], a
, 128, True
);
4806 VG_REGPARM(2) void MC_(helperc_LOADV128le
) ( /*OUT*/V128
* res
, Addr a
)
4808 mc_LOADV_128_or_256(&res
->w64
[0], a
, 128, False
);
4811 /*------------------------------------------------------------*/
4813 /*------------------------------------------------------------*/
4816 ULong
mc_LOADV64 ( Addr a
, Bool isBigEndian
)
4818 PROF_EVENT(MCPE_LOADV64
);
4820 #ifndef PERF_FAST_LOADV
4821 return mc_LOADVn_slow( a
, 64, isBigEndian
);
4824 UWord sm_off16
, vabits16
;
4827 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,64) )) {
4828 PROF_EVENT(MCPE_LOADV64_SLOW1
);
4829 return (ULong
)mc_LOADVn_slow( a
, 64, isBigEndian
);
4832 sm
= get_secmap_for_reading_low(a
);
4833 sm_off16
= SM_OFF_16(a
);
4834 vabits16
= ((UShort
*)(sm
->vabits8
))[sm_off16
];
4836 // Handle common case quickly: a is suitably aligned, is mapped, and
4838 // Convert V bits from compact memory form to expanded register form.
4839 if (LIKELY(vabits16
== VA_BITS16_DEFINED
)) {
4840 return V_BITS64_DEFINED
;
4841 } else if (LIKELY(vabits16
== VA_BITS16_UNDEFINED
)) {
4842 return V_BITS64_UNDEFINED
;
4844 /* Slow case: the 8 bytes are not all-defined or all-undefined. */
4845 PROF_EVENT(MCPE_LOADV64_SLOW2
);
4846 return mc_LOADVn_slow( a
, 64, isBigEndian
);
4852 // Generic for all platforms
4853 VG_REGPARM(1) ULong
MC_(helperc_LOADV64be
) ( Addr a
)
4855 return mc_LOADV64(a
, True
);
4858 // Non-generic assembly for arm32-linux
4859 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4860 && defined(VGP_arm_linux)
4861 __asm__( /* Derived from the 32 bit assembly helper */
4864 ".global vgMemCheck_helperc_LOADV64le \n"
4865 ".type vgMemCheck_helperc_LOADV64le, %function \n"
4866 "vgMemCheck_helperc_LOADV64le: \n"
4868 " movw r3, #:lower16:primary_map \n"
4869 " bne .LLV64LEc4 \n" // if misaligned
4870 " lsr r2, r0, #16 \n"
4871 " movt r3, #:upper16:primary_map \n"
4872 " ldr r2, [r3, r2, lsl #2] \n"
4873 " uxth r1, r0 \n" // r1 is 0-(16)-0 X-(13)-X 000
4874 " movw r3, #0xAAAA \n"
4875 " lsr r1, r1, #2 \n" // r1 is 0-(16)-0 00 X-(13)-X 0
4876 " ldrh r1, [r2, r1] \n"
4877 " cmp r1, r3 \n" // 0xAAAA == VA_BITS16_DEFINED
4878 " bne .LLV64LEc0 \n" // if !all_defined
4879 " mov r1, #0x0 \n" // 0x0 == V_BITS32_DEFINED
4880 " mov r0, #0x0 \n" // 0x0 == V_BITS32_DEFINED
4883 " movw r3, #0x5555 \n"
4884 " cmp r1, r3 \n" // 0x5555 == VA_BITS16_UNDEFINED
4885 " bne .LLV64LEc4 \n" // if !all_undefined
4886 " mov r1, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
4887 " mov r0, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
4893 " bl mc_LOADVn_slow \n"
4895 ".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le \n"
4899 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4900 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
4904 ".global vgMemCheck_helperc_LOADV64le\n"
4905 ".type vgMemCheck_helperc_LOADV64le, @function\n"
4906 "vgMemCheck_helperc_LOADV64le:\n"
4907 " test $0x7, %eax\n"
4908 " jne .LLV64LE2\n" /* jump if not aligned */
4910 " movzwl %ax, %edx\n"
4911 " shr $0x10, %ecx\n"
4912 " mov primary_map(,%ecx,4), %ecx\n"
4914 " movzwl (%ecx,%edx,2), %edx\n"
4915 " cmp $0xaaaa, %edx\n"
4916 " jne .LLV64LE1\n" /* jump if not all defined */
4917 " xor %eax, %eax\n" /* return 0 in edx:eax */
4921 " cmp $0x5555, %edx\n"
4922 " jne .LLV64LE2\n" /* jump if not all undefined */
4923 " or $0xffffffff, %eax\n" /* else return all bits set in edx:eax */
4924 " or $0xffffffff, %edx\n"
4927 " xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 64, 0) */
4929 " jmp mc_LOADVn_slow\n"
4930 ".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le\n"
4935 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
4936 VG_REGPARM(1) ULong
MC_(helperc_LOADV64le
) ( Addr a
)
4938 return mc_LOADV64(a
, False
);
4942 /*------------------------------------------------------------*/
4943 /*--- STOREV64 ---*/
4944 /*------------------------------------------------------------*/
4947 void mc_STOREV64 ( Addr a
, ULong vbits64
, Bool isBigEndian
)
4949 PROF_EVENT(MCPE_STOREV64
);
4951 #ifndef PERF_FAST_STOREV
4952 // XXX: this slow case seems to be marginally faster than the fast case!
4953 // Investigate further.
4954 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
4957 UWord sm_off16
, vabits16
;
4960 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,64) )) {
4961 PROF_EVENT(MCPE_STOREV64_SLOW1
);
4962 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
4966 sm
= get_secmap_for_reading_low(a
);
4967 sm_off16
= SM_OFF_16(a
);
4968 vabits16
= ((UShort
*)(sm
->vabits8
))[sm_off16
];
4970 // To understand the below cleverness, see the extensive comments
4971 // in MC_(helperc_STOREV8).
4972 if (LIKELY(V_BITS64_DEFINED
== vbits64
)) {
4973 if (LIKELY(vabits16
== (UShort
)VA_BITS16_DEFINED
)) {
4976 if (!is_distinguished_sm(sm
) && VA_BITS16_UNDEFINED
== vabits16
) {
4977 ((UShort
*)(sm
->vabits8
))[sm_off16
] = (UShort
)VA_BITS16_DEFINED
;
4980 PROF_EVENT(MCPE_STOREV64_SLOW2
);
4981 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
4984 if (V_BITS64_UNDEFINED
== vbits64
) {
4985 if (vabits16
== (UShort
)VA_BITS16_UNDEFINED
) {
4988 if (!is_distinguished_sm(sm
) && VA_BITS16_DEFINED
== vabits16
) {
4989 ((UShort
*)(sm
->vabits8
))[sm_off16
] = (UShort
)VA_BITS16_UNDEFINED
;
4992 PROF_EVENT(MCPE_STOREV64_SLOW3
);
4993 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
4997 PROF_EVENT(MCPE_STOREV64_SLOW4
);
4998 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
5003 VG_REGPARM(1) void MC_(helperc_STOREV64be
) ( Addr a
, ULong vbits64
)
5005 mc_STOREV64(a
, vbits64
, True
);
5007 VG_REGPARM(1) void MC_(helperc_STOREV64le
) ( Addr a
, ULong vbits64
)
5009 mc_STOREV64(a
, vbits64
, False
);
5012 /*------------------------------------------------------------*/
5014 /*------------------------------------------------------------*/
5017 UWord
mc_LOADV32 ( Addr a
, Bool isBigEndian
)
5019 PROF_EVENT(MCPE_LOADV32
);
5021 #ifndef PERF_FAST_LOADV
5022 return (UWord
)mc_LOADVn_slow( a
, 32, isBigEndian
);
5025 UWord sm_off
, vabits8
;
5028 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,32) )) {
5029 PROF_EVENT(MCPE_LOADV32_SLOW1
);
5030 return (UWord
)mc_LOADVn_slow( a
, 32, isBigEndian
);
5033 sm
= get_secmap_for_reading_low(a
);
5035 vabits8
= sm
->vabits8
[sm_off
];
5037 // Handle common case quickly: a is suitably aligned, is mapped, and the
5038 // entire word32 it lives in is addressible.
5039 // Convert V bits from compact memory form to expanded register form.
5040 // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
5041 // Almost certainly not necessary, but be paranoid.
5042 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) {
5043 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_DEFINED
);
5044 } else if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
)) {
5045 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_UNDEFINED
);
5047 /* Slow case: the 4 bytes are not all-defined or all-undefined. */
5048 PROF_EVENT(MCPE_LOADV32_SLOW2
);
5049 return (UWord
)mc_LOADVn_slow( a
, 32, isBigEndian
);
5055 // Generic for all platforms
5056 VG_REGPARM(1) UWord
MC_(helperc_LOADV32be
) ( Addr a
)
5058 return mc_LOADV32(a
, True
);
5061 // Non-generic assembly for arm32-linux
5062 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5063 && defined(VGP_arm_linux)
5064 __asm__( /* Derived from NCode template */
5067 ".global vgMemCheck_helperc_LOADV32le \n"
5068 ".type vgMemCheck_helperc_LOADV32le, %function \n"
5069 "vgMemCheck_helperc_LOADV32le: \n"
5070 " tst r0, #3 \n" // 1
5071 " movw r3, #:lower16:primary_map \n" // 1
5072 " bne .LLV32LEc4 \n" // 2 if misaligned
5073 " lsr r2, r0, #16 \n" // 3
5074 " movt r3, #:upper16:primary_map \n" // 3
5075 " ldr r2, [r3, r2, lsl #2] \n" // 4
5076 " uxth r1, r0 \n" // 4
5077 " ldrb r1, [r2, r1, lsr #2] \n" // 5
5078 " cmp r1, #0xAA \n" // 6 0xAA == VA_BITS8_DEFINED
5079 " bne .LLV32LEc0 \n" // 7 if !all_defined
5080 " mov r0, #0x0 \n" // 8 0x0 == V_BITS32_DEFINED
5083 " cmp r1, #0x55 \n" // 0x55 == VA_BITS8_UNDEFINED
5084 " bne .LLV32LEc4 \n" // if !all_undefined
5085 " mov r0, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
5091 " bl mc_LOADVn_slow \n"
5093 ".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le \n"
5097 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5098 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5102 ".global vgMemCheck_helperc_LOADV32le\n"
5103 ".type vgMemCheck_helperc_LOADV32le, @function\n"
5104 "vgMemCheck_helperc_LOADV32le:\n"
5105 " test $0x3, %eax\n"
5106 " jnz .LLV32LE2\n" /* jump if misaligned */
5109 " mov primary_map(,%edx,4), %ecx\n"
5110 " movzwl %ax, %edx\n"
5112 " movzbl (%ecx,%edx,1), %edx\n"
5113 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */
5114 " jne .LLV32LE1\n" /* jump if not completely defined */
5115 " xor %eax, %eax\n" /* else return V_BITS32_DEFINED */
5118 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5119 " jne .LLV32LE2\n" /* jump if not completely undefined */
5120 " or $0xffffffff, %eax\n" /* else return V_BITS32_UNDEFINED */
5123 " xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 32, 0) */
5125 " jmp mc_LOADVn_slow\n"
5126 ".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le\n"
5131 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5132 VG_REGPARM(1) UWord
MC_(helperc_LOADV32le
) ( Addr a
)
5134 return mc_LOADV32(a
, False
);
5138 /*------------------------------------------------------------*/
5139 /*--- STOREV32 ---*/
5140 /*------------------------------------------------------------*/
5143 void mc_STOREV32 ( Addr a
, UWord vbits32
, Bool isBigEndian
)
5145 PROF_EVENT(MCPE_STOREV32
);
5147 #ifndef PERF_FAST_STOREV
5148 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5151 UWord sm_off
, vabits8
;
5154 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,32) )) {
5155 PROF_EVENT(MCPE_STOREV32_SLOW1
);
5156 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5160 sm
= get_secmap_for_reading_low(a
);
5162 vabits8
= sm
->vabits8
[sm_off
];
5164 // To understand the below cleverness, see the extensive comments
5165 // in MC_(helperc_STOREV8).
5166 if (LIKELY(V_BITS32_DEFINED
== vbits32
)) {
5167 if (LIKELY(vabits8
== (UInt
)VA_BITS8_DEFINED
)) {
5170 if (!is_distinguished_sm(sm
) && VA_BITS8_UNDEFINED
== vabits8
) {
5171 sm
->vabits8
[sm_off
] = (UInt
)VA_BITS8_DEFINED
;
5174 PROF_EVENT(MCPE_STOREV32_SLOW2
);
5175 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5178 if (V_BITS32_UNDEFINED
== vbits32
) {
5179 if (vabits8
== (UInt
)VA_BITS8_UNDEFINED
) {
5182 if (!is_distinguished_sm(sm
) && VA_BITS8_DEFINED
== vabits8
) {
5183 sm
->vabits8
[sm_off
] = (UInt
)VA_BITS8_UNDEFINED
;
5186 PROF_EVENT(MCPE_STOREV32_SLOW3
);
5187 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5191 PROF_EVENT(MCPE_STOREV32_SLOW4
);
5192 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5197 VG_REGPARM(2) void MC_(helperc_STOREV32be
) ( Addr a
, UWord vbits32
)
5199 mc_STOREV32(a
, vbits32
, True
);
5201 VG_REGPARM(2) void MC_(helperc_STOREV32le
) ( Addr a
, UWord vbits32
)
5203 mc_STOREV32(a
, vbits32
, False
);
5206 /*------------------------------------------------------------*/
5208 /*------------------------------------------------------------*/
5211 UWord
mc_LOADV16 ( Addr a
, Bool isBigEndian
)
5213 PROF_EVENT(MCPE_LOADV16
);
5215 #ifndef PERF_FAST_LOADV
5216 return (UWord
)mc_LOADVn_slow( a
, 16, isBigEndian
);
5219 UWord sm_off
, vabits8
;
5222 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,16) )) {
5223 PROF_EVENT(MCPE_LOADV16_SLOW1
);
5224 return (UWord
)mc_LOADVn_slow( a
, 16, isBigEndian
);
5227 sm
= get_secmap_for_reading_low(a
);
5229 vabits8
= sm
->vabits8
[sm_off
];
5230 // Handle common case quickly: a is suitably aligned, is mapped, and is
5232 // Convert V bits from compact memory form to expanded register form
5233 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) { return V_BITS16_DEFINED
; }
5234 else if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
)) { return V_BITS16_UNDEFINED
; }
5236 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5237 // the two sub-bytes.
5238 UChar vabits4
= extract_vabits4_from_vabits8(a
, vabits8
);
5239 if (vabits4
== VA_BITS4_DEFINED
) { return V_BITS16_DEFINED
; }
5240 else if (vabits4
== VA_BITS4_UNDEFINED
) { return V_BITS16_UNDEFINED
; }
5242 /* Slow case: the two bytes are not all-defined or all-undefined. */
5243 PROF_EVENT(MCPE_LOADV16_SLOW2
);
5244 return (UWord
)mc_LOADVn_slow( a
, 16, isBigEndian
);
5251 // Generic for all platforms
5252 VG_REGPARM(1) UWord
MC_(helperc_LOADV16be
) ( Addr a
)
5254 return mc_LOADV16(a
, True
);
5257 // Non-generic assembly for arm32-linux
5258 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5259 && defined(VGP_arm_linux)
5260 __asm__( /* Derived from NCode template */
5263 ".global vgMemCheck_helperc_LOADV16le \n"
5264 ".type vgMemCheck_helperc_LOADV16le, %function \n"
5265 "vgMemCheck_helperc_LOADV16le: \n" //
5267 " bne .LLV16LEc12 \n" // if misaligned
5268 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5269 " movw r3, #:lower16:primary_map \n" //
5270 " uxth r1, r0 \n" // r1 = sec-map-offB
5271 " movt r3, #:upper16:primary_map \n" //
5272 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5273 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5274 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5275 " bne .LLV16LEc0 \n" // no, goto .LLV16LEc0
5277 " mov r0, #0xFFFFFFFF \n" //
5278 " lsl r0, r0, #16 \n" // V_BITS16_DEFINED | top16safe
5281 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5282 " bne .LLV16LEc4 \n" //
5284 " mov r0, #0xFFFFFFFF \n" // V_BITS16_UNDEFINED | top16safe
5287 // r1 holds sec-map-VABITS8. r0 holds the address and is 2-aligned.
5288 // Extract the relevant 4 bits and inspect.
5289 " and r2, r0, #2 \n" // addr & 2
5290 " add r2, r2, r2 \n" // 2 * (addr & 2)
5291 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 2))
5292 " and r1, r1, #15 \n" // (sec-map-VABITS8 >> (2 * (addr & 2))) & 15
5294 " cmp r1, #0xA \n" // VA_BITS4_DEFINED
5295 " beq .LLV16LEh9 \n" //
5297 " cmp r1, #0x5 \n" // VA_BITS4_UNDEFINED
5298 " beq .LLV16LEc2 \n" //
5300 ".LLV16LEc12: \n" //
5301 " push {r4, lr} \n" //
5303 " mov r1, #16 \n" //
5304 " bl mc_LOADVn_slow \n" //
5305 " pop {r4, pc} \n" //
5306 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5310 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5311 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5315 ".global vgMemCheck_helperc_LOADV16le\n"
5316 ".type vgMemCheck_helperc_LOADV16le, @function\n"
5317 "vgMemCheck_helperc_LOADV16le:\n"
5318 " test $0x1, %eax\n"
5319 " jne .LLV16LE5\n" /* jump if not aligned */
5321 " shr $0x10, %edx\n"
5322 " mov primary_map(,%edx,4), %ecx\n"
5323 " movzwl %ax, %edx\n"
5325 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5326 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */
5327 " jne .LLV16LE2\n" /* jump if not all 32bits defined */
5329 " mov $0xffff0000,%eax\n" /* V_BITS16_DEFINED | top16safe */
5332 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5333 " jne .LLV16LE4\n" /* jump if not all 32bits undefined */
5335 " or $0xffffffff,%eax\n" /* V_BITS16_UNDEFINED | top16safe */
5344 " je .LLV16LE1\n" /* jump if all 16bits are defined */
5346 " je .LLV16LE3\n" /* jump if all 16bits are undefined */
5348 " xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 16, 0) */
5350 " jmp mc_LOADVn_slow\n"
5351 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5356 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5357 VG_REGPARM(1) UWord
MC_(helperc_LOADV16le
) ( Addr a
)
5359 return mc_LOADV16(a
, False
);
5363 /*------------------------------------------------------------*/
5364 /*--- STOREV16 ---*/
5365 /*------------------------------------------------------------*/
5367 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
5369 Bool
accessible_vabits4_in_vabits8 ( Addr a
, UChar vabits8
)
5372 tl_assert(VG_IS_2_ALIGNED(a
)); // Must be 2-aligned
5373 shift
= (a
& 2) << 1; // shift by 0 or 4
5374 vabits8
>>= shift
; // shift the four bits to the bottom
5375 // check 2 x vabits2 != VA_BITS2_NOACCESS
5376 return ((0x3 & vabits8
) != VA_BITS2_NOACCESS
)
5377 && ((0xc & vabits8
) != VA_BITS2_NOACCESS
<< 2);
5381 void mc_STOREV16 ( Addr a
, UWord vbits16
, Bool isBigEndian
)
5383 PROF_EVENT(MCPE_STOREV16
);
5385 #ifndef PERF_FAST_STOREV
5386 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5389 UWord sm_off
, vabits8
;
5392 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,16) )) {
5393 PROF_EVENT(MCPE_STOREV16_SLOW1
);
5394 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5398 sm
= get_secmap_for_reading_low(a
);
5400 vabits8
= sm
->vabits8
[sm_off
];
5402 // To understand the below cleverness, see the extensive comments
5403 // in MC_(helperc_STOREV8).
5404 if (LIKELY(V_BITS16_DEFINED
== vbits16
)) {
5405 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) {
5408 if (!is_distinguished_sm(sm
)
5409 && accessible_vabits4_in_vabits8(a
, vabits8
)) {
5410 insert_vabits4_into_vabits8( a
, VA_BITS4_DEFINED
,
5411 &(sm
->vabits8
[sm_off
]) );
5414 PROF_EVENT(MCPE_STOREV16_SLOW2
);
5415 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5417 if (V_BITS16_UNDEFINED
== vbits16
) {
5418 if (vabits8
== VA_BITS8_UNDEFINED
) {
5421 if (!is_distinguished_sm(sm
)
5422 && accessible_vabits4_in_vabits8(a
, vabits8
)) {
5423 insert_vabits4_into_vabits8( a
, VA_BITS4_UNDEFINED
,
5424 &(sm
->vabits8
[sm_off
]) );
5427 PROF_EVENT(MCPE_STOREV16_SLOW3
);
5428 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5432 PROF_EVENT(MCPE_STOREV16_SLOW4
);
5433 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5439 VG_REGPARM(2) void MC_(helperc_STOREV16be
) ( Addr a
, UWord vbits16
)
5441 mc_STOREV16(a
, vbits16
, True
);
5443 VG_REGPARM(2) void MC_(helperc_STOREV16le
) ( Addr a
, UWord vbits16
)
5445 mc_STOREV16(a
, vbits16
, False
);
5448 /*------------------------------------------------------------*/
5450 /*------------------------------------------------------------*/
5452 /* Note: endianness is irrelevant for size == 1 */
5454 // Non-generic assembly for arm32-linux
5455 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5456 && defined(VGP_arm_linux)
5457 __asm__( /* Derived from NCode template */
5460 ".global vgMemCheck_helperc_LOADV8 \n"
5461 ".type vgMemCheck_helperc_LOADV8, %function \n"
5462 "vgMemCheck_helperc_LOADV8: \n" //
5463 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5464 " movw r3, #:lower16:primary_map \n" //
5465 " uxth r1, r0 \n" // r1 = sec-map-offB
5466 " movt r3, #:upper16:primary_map \n" //
5467 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5468 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5469 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5470 " bne .LLV8c0 \n" // no, goto .LLV8c0
5472 " mov r0, #0xFFFFFF00 \n" // V_BITS8_DEFINED | top24safe
5475 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5476 " bne .LLV8c4 \n" //
5478 " mov r0, #0xFFFFFFFF \n" // V_BITS8_UNDEFINED | top24safe
5481 // r1 holds sec-map-VABITS8
5482 // r0 holds the address. Extract the relevant 2 bits and inspect.
5483 " and r2, r0, #3 \n" // addr & 3
5484 " add r2, r2, r2 \n" // 2 * (addr & 3)
5485 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 3))
5486 " and r1, r1, #3 \n" // (sec-map-VABITS8 >> (2 * (addr & 3))) & 3
5488 " cmp r1, #2 \n" // VA_BITS2_DEFINED
5489 " beq .LLV8h9 \n" //
5491 " cmp r1, #1 \n" // VA_BITS2_UNDEFINED
5492 " beq .LLV8c2 \n" //
5494 " push {r4, lr} \n" //
5497 " bl mc_LOADVn_slow \n" //
5498 " pop {r4, pc} \n" //
5499 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8 \n"
5503 /* Non-generic assembly for x86-linux */
5504 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5505 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5509 ".global vgMemCheck_helperc_LOADV8\n"
5510 ".type vgMemCheck_helperc_LOADV8, @function\n"
5511 "vgMemCheck_helperc_LOADV8:\n"
5513 " shr $0x10, %edx\n"
5514 " mov primary_map(,%edx,4), %ecx\n"
5515 " movzwl %ax, %edx\n"
5517 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5518 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED? */
5519 " jne .LLV8LE2\n" /* jump if not defined */
5521 " mov $0xffffff00, %eax\n" /* V_BITS8_DEFINED | top24safe */
5524 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5525 " jne .LLV8LE4\n" /* jump if not all 32bits are undefined */
5527 " or $0xffffffff, %eax\n" /* V_BITS8_UNDEFINED | top24safe */
5536 " je .LLV8LE1\n" /* jump if all 8bits are defined */
5538 " je .LLV8LE3\n" /* jump if all 8bits are undefined */
5539 " xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 8, 0) */
5541 " jmp mc_LOADVn_slow\n"
5542 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8\n"
5547 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5549 UWord
MC_(helperc_LOADV8
) ( Addr a
)
5551 PROF_EVENT(MCPE_LOADV8
);
5553 #ifndef PERF_FAST_LOADV
5554 return (UWord
)mc_LOADVn_slow( a
, 8, False
/*irrelevant*/ );
5557 UWord sm_off
, vabits8
;
5560 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,8) )) {
5561 PROF_EVENT(MCPE_LOADV8_SLOW1
);
5562 return (UWord
)mc_LOADVn_slow( a
, 8, False
/*irrelevant*/ );
5565 sm
= get_secmap_for_reading_low(a
);
5567 vabits8
= sm
->vabits8
[sm_off
];
5568 // Convert V bits from compact memory form to expanded register form
5569 // Handle common case quickly: a is mapped, and the entire
5570 // word32 it lives in is addressible.
5571 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) { return V_BITS8_DEFINED
; }
5572 else if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
)) { return V_BITS8_UNDEFINED
; }
5574 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5576 UChar vabits2
= extract_vabits2_from_vabits8(a
, vabits8
);
5577 if (vabits2
== VA_BITS2_DEFINED
) { return V_BITS8_DEFINED
; }
5578 else if (vabits2
== VA_BITS2_UNDEFINED
) { return V_BITS8_UNDEFINED
; }
5580 /* Slow case: the byte is not all-defined or all-undefined. */
5581 PROF_EVENT(MCPE_LOADV8_SLOW2
);
5582 return (UWord
)mc_LOADVn_slow( a
, 8, False
/*irrelevant*/ );
5590 /*------------------------------------------------------------*/
5592 /*------------------------------------------------------------*/
5595 void MC_(helperc_STOREV8
) ( Addr a
, UWord vbits8
)
5597 PROF_EVENT(MCPE_STOREV8
);
5599 #ifndef PERF_FAST_STOREV
5600 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5603 UWord sm_off
, vabits8
;
5606 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,8) )) {
5607 PROF_EVENT(MCPE_STOREV8_SLOW1
);
5608 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5612 sm
= get_secmap_for_reading_low(a
);
5614 vabits8
= sm
->vabits8
[sm_off
];
5616 // Clevernesses to speed up storing V bits.
5617 // The 64/32/16 bit cases also have similar clevernesses, but it
5618 // works a little differently to the code below.
5620 // Cleverness 1: sometimes we don't have to write the shadow memory at
5621 // all, if we can tell that what we want to write is the same as what is
5622 // already there. These cases are marked below as "defined on defined" and
5623 // "undefined on undefined".
5626 // We also avoid to call mc_STOREVn_slow if the V bits can directly
5627 // be written in the secondary map. V bits can be directly written
5628 // if 4 conditions are respected:
5629 // * The address for which V bits are written is naturally aligned
5630 // on 1 byte for STOREV8 (this is always true)
5631 // on 2 bytes for STOREV16
5632 // on 4 bytes for STOREV32
5633 // on 8 bytes for STOREV64.
5634 // * V bits being written are either fully defined or fully undefined.
5635 // (for partially defined V bits, V bits cannot be directly written,
5636 // as the secondary vbits table must be maintained).
5637 // * the secmap is not distinguished (distinguished maps cannot be
5639 // * the memory corresponding to the V bits being written is
5640 // accessible (if one or more bytes are not accessible,
5641 // we must call mc_STOREVn_slow in order to report accessibility
5643 // Note that for STOREV32 and STOREV64, it is too expensive
5644 // to verify the accessibility of each byte for the benefit it
5645 // brings. Instead, a quicker check is done by comparing to
5646 // VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
5647 // but misses some opportunity of direct modifications.
5648 // Checking each byte accessibility was measured for
5649 // STOREV32+perf tests and was slowing down all perf tests.
5650 // The cases corresponding to cleverness 2 are marked below as
5652 if (LIKELY(V_BITS8_DEFINED
== vbits8
)) {
5653 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) {
5654 return; // defined on defined
5656 if (!is_distinguished_sm(sm
)
5657 && VA_BITS2_NOACCESS
!= extract_vabits2_from_vabits8(a
, vabits8
)) {
5659 insert_vabits2_into_vabits8( a
, VA_BITS2_DEFINED
,
5660 &(sm
->vabits8
[sm_off
]) );
5663 PROF_EVENT(MCPE_STOREV8_SLOW2
);
5664 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5667 if (V_BITS8_UNDEFINED
== vbits8
) {
5668 if (vabits8
== VA_BITS8_UNDEFINED
) {
5669 return; // undefined on undefined
5671 if (!is_distinguished_sm(sm
)
5672 && (VA_BITS2_NOACCESS
5673 != extract_vabits2_from_vabits8(a
, vabits8
))) {
5675 insert_vabits2_into_vabits8( a
, VA_BITS2_UNDEFINED
,
5676 &(sm
->vabits8
[sm_off
]) );
5679 PROF_EVENT(MCPE_STOREV8_SLOW3
);
5680 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5684 // Partially defined word
5685 PROF_EVENT(MCPE_STOREV8_SLOW4
);
5686 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5692 /*------------------------------------------------------------*/
5693 /*--- Functions called directly from generated code: ---*/
5694 /*--- Value-check failure handlers. ---*/
5695 /*------------------------------------------------------------*/
5697 /* Call these ones when an origin is available ... */
5699 void MC_(helperc_value_check0_fail_w_o
) ( UWord origin
) {
5700 MC_(record_cond_error
) ( VG_(get_running_tid
)(), (UInt
)origin
);
5704 void MC_(helperc_value_check1_fail_w_o
) ( UWord origin
) {
5705 MC_(record_value_error
) ( VG_(get_running_tid
)(), 1, (UInt
)origin
);
5709 void MC_(helperc_value_check4_fail_w_o
) ( UWord origin
) {
5710 MC_(record_value_error
) ( VG_(get_running_tid
)(), 4, (UInt
)origin
);
5714 void MC_(helperc_value_check8_fail_w_o
) ( UWord origin
) {
5715 MC_(record_value_error
) ( VG_(get_running_tid
)(), 8, (UInt
)origin
);
5719 void MC_(helperc_value_checkN_fail_w_o
) ( HWord sz
, UWord origin
) {
5720 MC_(record_value_error
) ( VG_(get_running_tid
)(), (Int
)sz
, (UInt
)origin
);
5723 /* ... and these when an origin isn't available. */
5726 void MC_(helperc_value_check0_fail_no_o
) ( void ) {
5727 MC_(record_cond_error
) ( VG_(get_running_tid
)(), 0/*origin*/ );
5731 void MC_(helperc_value_check1_fail_no_o
) ( void ) {
5732 MC_(record_value_error
) ( VG_(get_running_tid
)(), 1, 0/*origin*/ );
5736 void MC_(helperc_value_check4_fail_no_o
) ( void ) {
5737 MC_(record_value_error
) ( VG_(get_running_tid
)(), 4, 0/*origin*/ );
5741 void MC_(helperc_value_check8_fail_no_o
) ( void ) {
5742 MC_(record_value_error
) ( VG_(get_running_tid
)(), 8, 0/*origin*/ );
5746 void MC_(helperc_value_checkN_fail_no_o
) ( HWord sz
) {
5747 MC_(record_value_error
) ( VG_(get_running_tid
)(), (Int
)sz
, 0/*origin*/ );
5751 /*------------------------------------------------------------*/
5752 /*--- Metadata get/set functions, for client requests. ---*/
5753 /*------------------------------------------------------------*/
5755 // Nb: this expands the V+A bits out into register-form V bits, even though
5756 // they're in memory. This is for backward compatibility, and because it's
5757 // probably what the user wants.
5759 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
5760 error [no longer used], 3 == addressing error. */
5761 /* Nb: We used to issue various definedness/addressability errors from here,
5762 but we took them out because they ranged from not-very-helpful to
5763 downright annoying, and they complicated the error data structures. */
5764 static Int
mc_get_or_set_vbits_for_client (
5768 Bool setting
, /* True <=> set vbits, False <=> get vbits */
5769 Bool is_client_request
/* True <=> real user request
5770 False <=> internal call from gdbserver */
5777 /* Check that arrays are addressible before doing any getting/setting.
5778 vbits to be checked only for real user request. */
5779 for (i
= 0; i
< szB
; i
++) {
5780 if (VA_BITS2_NOACCESS
== get_vabits2(a
+ i
) ||
5781 (is_client_request
&& VA_BITS2_NOACCESS
== get_vabits2(vbits
+ i
))) {
5789 for (i
= 0; i
< szB
; i
++) {
5790 ok
= set_vbits8(a
+ i
, ((UChar
*)vbits
)[i
]);
5795 for (i
= 0; i
< szB
; i
++) {
5796 ok
= get_vbits8(a
+ i
, &vbits8
);
5798 ((UChar
*)vbits
)[i
] = vbits8
;
5800 if (is_client_request
)
5801 // The bytes in vbits[] have now been set, so mark them as such.
5802 MC_(make_mem_defined
)(vbits
, szB
);
5809 /*------------------------------------------------------------*/
5810 /*--- Detecting leaked (unreachable) malloc'd blocks. ---*/
5811 /*------------------------------------------------------------*/
5813 /* For the memory leak detector, say whether an entire 64k chunk of
5814 address space is possibly in use, or not. If in doubt return
5817 Bool
MC_(is_within_valid_secondary
) ( Addr a
)
5819 SecMap
* sm
= maybe_get_secmap_for ( a
);
5820 if (sm
== NULL
|| sm
== &sm_distinguished
[SM_DIST_NOACCESS
]) {
5821 /* Definitely not in use. */
5829 /* For the memory leak detector, say whether or not a given word
5830 address is to be regarded as valid. */
5831 Bool
MC_(is_valid_aligned_word
) ( Addr a
)
5833 tl_assert(sizeof(UWord
) == 4 || sizeof(UWord
) == 8);
5834 tl_assert(VG_IS_WORD_ALIGNED(a
));
5835 if (get_vabits8_for_aligned_word32 (a
) != VA_BITS8_DEFINED
)
5837 if (sizeof(UWord
) == 8) {
5838 if (get_vabits8_for_aligned_word32 (a
+ 4) != VA_BITS8_DEFINED
)
5841 if (UNLIKELY(MC_(in_ignored_range
)(a
)))
5848 /*------------------------------------------------------------*/
5849 /*--- Initialisation ---*/
5850 /*------------------------------------------------------------*/
5852 static void init_shadow_memory ( void )
5857 tl_assert(V_BIT_UNDEFINED
== 1);
5858 tl_assert(V_BIT_DEFINED
== 0);
5859 tl_assert(V_BITS8_UNDEFINED
== 0xFF);
5860 tl_assert(V_BITS8_DEFINED
== 0);
5862 /* Build the 3 distinguished secondaries */
5863 sm
= &sm_distinguished
[SM_DIST_NOACCESS
];
5864 for (i
= 0; i
< SM_CHUNKS
; i
++) sm
->vabits8
[i
] = VA_BITS8_NOACCESS
;
5866 sm
= &sm_distinguished
[SM_DIST_UNDEFINED
];
5867 for (i
= 0; i
< SM_CHUNKS
; i
++) sm
->vabits8
[i
] = VA_BITS8_UNDEFINED
;
5869 sm
= &sm_distinguished
[SM_DIST_DEFINED
];
5870 for (i
= 0; i
< SM_CHUNKS
; i
++) sm
->vabits8
[i
] = VA_BITS8_DEFINED
;
5872 /* Set up the primary map. */
5873 /* These entries gradually get overwritten as the used address
5875 for (i
= 0; i
< N_PRIMARY_MAP
; i
++)
5876 primary_map
[i
] = &sm_distinguished
[SM_DIST_NOACCESS
];
5878 /* Auxiliary primary maps */
5879 init_auxmap_L1_L2();
5881 /* auxmap_size = auxmap_used = 0;
5882 no ... these are statically initialised */
5884 /* Secondary V bit table */
5885 secVBitTable
= createSecVBitTable();
5889 /*------------------------------------------------------------*/
5890 /*--- Sanity check machinery (permanently engaged) ---*/
5891 /*------------------------------------------------------------*/
5893 static Bool
mc_cheap_sanity_check ( void )
5896 PROF_EVENT(MCPE_CHEAP_SANITY_CHECK
);
5897 /* Check for sane operating level */
5898 if (MC_(clo_mc_level
) < 1 || MC_(clo_mc_level
) > 3)
5900 /* nothing else useful we can rapidly check */
5904 static Bool
mc_expensive_sanity_check ( void )
5907 Word n_secmaps_found
;
5909 const HChar
* errmsg
;
5912 if (0) VG_(printf
)("expensive sanity check\n");
5915 n_sanity_expensive
++;
5916 PROF_EVENT(MCPE_EXPENSIVE_SANITY_CHECK
);
5918 /* Check for sane operating level */
5919 if (MC_(clo_mc_level
) < 1 || MC_(clo_mc_level
) > 3)
5922 /* Check that the 3 distinguished SMs are still as they should be. */
5924 /* Check noaccess DSM. */
5925 sm
= &sm_distinguished
[SM_DIST_NOACCESS
];
5926 for (i
= 0; i
< SM_CHUNKS
; i
++)
5927 if (sm
->vabits8
[i
] != VA_BITS8_NOACCESS
)
5930 /* Check undefined DSM. */
5931 sm
= &sm_distinguished
[SM_DIST_UNDEFINED
];
5932 for (i
= 0; i
< SM_CHUNKS
; i
++)
5933 if (sm
->vabits8
[i
] != VA_BITS8_UNDEFINED
)
5936 /* Check defined DSM. */
5937 sm
= &sm_distinguished
[SM_DIST_DEFINED
];
5938 for (i
= 0; i
< SM_CHUNKS
; i
++)
5939 if (sm
->vabits8
[i
] != VA_BITS8_DEFINED
)
5943 VG_(printf
)("memcheck expensive sanity: "
5944 "distinguished_secondaries have changed\n");
5948 /* If we're not checking for undefined value errors, the secondary V bit
5949 * table should be empty. */
5950 if (MC_(clo_mc_level
) == 1) {
5951 if (0 != VG_(OSetGen_Size
)(secVBitTable
))
5955 /* check the auxiliary maps, very thoroughly */
5956 n_secmaps_found
= 0;
5957 errmsg
= check_auxmap_L1_L2_sanity( &n_secmaps_found
);
5959 VG_(printf
)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg
);
5963 /* n_secmaps_found is now the number referred to by the auxiliary
5964 primary map. Now add on the ones referred to by the main
5966 for (i
= 0; i
< N_PRIMARY_MAP
; i
++) {
5967 if (primary_map
[i
] == NULL
) {
5970 if (!is_distinguished_sm(primary_map
[i
]))
5975 /* check that the number of secmaps issued matches the number that
5976 are reachable (iow, no secmap leaks) */
5977 if (n_secmaps_found
!= (n_issued_SMs
- n_deissued_SMs
))
5981 VG_(printf
)("memcheck expensive sanity: "
5982 "apparent secmap leakage\n");
5987 VG_(printf
)("memcheck expensive sanity: "
5988 "auxmap covers wrong address space\n");
5992 /* there is only one pointer to each secmap (expensive) */
5997 /*------------------------------------------------------------*/
5998 /*--- Command line args ---*/
5999 /*------------------------------------------------------------*/
6001 /* 31 Aug 2015: Vectorised code is now so widespread that
6002 --partial-loads-ok needs to be enabled by default on all platforms.
6003 Not doing so causes lots of false errors. */
6004 Bool
MC_(clo_partial_loads_ok
) = True
;
6005 Long
MC_(clo_freelist_vol
) = 20*1000*1000LL;
6006 Long
MC_(clo_freelist_big_blocks
) = 1*1000*1000LL;
6007 LeakCheckMode
MC_(clo_leak_check
) = LC_Summary
;
6008 VgRes
MC_(clo_leak_resolution
) = Vg_HighRes
;
6009 UInt
MC_(clo_show_leak_kinds
) = R2S(Possible
) | R2S(Unreached
);
6010 UInt
MC_(clo_error_for_leak_kinds
) = R2S(Possible
) | R2S(Unreached
);
6011 UInt
MC_(clo_leak_check_heuristics
) = H2S(LchStdString
)
6014 | H2S( LchMultipleInheritance
);
6015 Bool
MC_(clo_xtree_leak
) = False
;
6016 const HChar
* MC_(clo_xtree_leak_file
) = "xtleak.kcg.%p";
6017 Bool
MC_(clo_workaround_gcc296_bugs
) = False
;
6018 Int
MC_(clo_malloc_fill
) = -1;
6019 Int
MC_(clo_free_fill
) = -1;
6020 KeepStacktraces
MC_(clo_keep_stacktraces
) = KS_alloc_and_free
;
6021 Int
MC_(clo_mc_level
) = 2;
6022 Bool
MC_(clo_show_mismatched_frees
) = True
;
6023 Bool
MC_(clo_expensive_definedness_checks
) = False
;
6024 Bool
MC_(clo_ignore_range_below_sp
) = False
;
6025 UInt
MC_(clo_ignore_range_below_sp__first_offset
) = 0;
6026 UInt
MC_(clo_ignore_range_below_sp__last_offset
) = 0;
6028 static const HChar
* MC_(parse_leak_heuristics_tokens
) =
6029 "-,stdstring,length64,newarray,multipleinheritance";
6030 /* The first heuristic value (LchNone) has no keyword, as this is
6031 a fake heuristic used to collect the blocks found without any
6034 static Bool
mc_process_cmd_line_options(const HChar
* arg
)
6036 const HChar
* tmp_str
;
6039 tl_assert( MC_(clo_mc_level
) >= 1 && MC_(clo_mc_level
) <= 3 );
6041 /* Set MC_(clo_mc_level):
6042 1 = A bit tracking only
6043 2 = A and V bit tracking, but no V bit origins
6044 3 = A and V bit tracking, and V bit origins
6046 Do this by inspecting --undef-value-errors= and
6047 --track-origins=. Reject the case --undef-value-errors=no
6048 --track-origins=yes as meaningless.
6050 if (0 == VG_(strcmp
)(arg
, "--undef-value-errors=no")) {
6051 if (MC_(clo_mc_level
) == 3) {
6054 MC_(clo_mc_level
) = 1;
6058 if (0 == VG_(strcmp
)(arg
, "--undef-value-errors=yes")) {
6059 if (MC_(clo_mc_level
) == 1)
6060 MC_(clo_mc_level
) = 2;
6063 if (0 == VG_(strcmp
)(arg
, "--track-origins=no")) {
6064 if (MC_(clo_mc_level
) == 3)
6065 MC_(clo_mc_level
) = 2;
6068 if (0 == VG_(strcmp
)(arg
, "--track-origins=yes")) {
6069 if (MC_(clo_mc_level
) == 1) {
6072 MC_(clo_mc_level
) = 3;
6077 if VG_BOOL_CLO(arg
, "--partial-loads-ok", MC_(clo_partial_loads_ok
)) {}
6078 else if VG_USET_CLO(arg
, "--errors-for-leak-kinds",
6079 MC_(parse_leak_kinds_tokens
),
6080 MC_(clo_error_for_leak_kinds
)) {}
6081 else if VG_USET_CLO(arg
, "--show-leak-kinds",
6082 MC_(parse_leak_kinds_tokens
),
6083 MC_(clo_show_leak_kinds
)) {}
6084 else if VG_USET_CLO(arg
, "--leak-check-heuristics",
6085 MC_(parse_leak_heuristics_tokens
),
6086 MC_(clo_leak_check_heuristics
)) {}
6087 else if (VG_BOOL_CLO(arg
, "--show-reachable", tmp_show
)) {
6089 MC_(clo_show_leak_kinds
) = MC_(all_Reachedness
)();
6091 MC_(clo_show_leak_kinds
) &= ~R2S(Reachable
);
6094 else if VG_BOOL_CLO(arg
, "--show-possibly-lost", tmp_show
) {
6096 MC_(clo_show_leak_kinds
) |= R2S(Possible
);
6098 MC_(clo_show_leak_kinds
) &= ~R2S(Possible
);
6101 else if VG_BOOL_CLO(arg
, "--workaround-gcc296-bugs",
6102 MC_(clo_workaround_gcc296_bugs
)) {}
6104 else if VG_BINT_CLO(arg
, "--freelist-vol", MC_(clo_freelist_vol
),
6105 0, 10*1000*1000*1000LL) {}
6107 else if VG_BINT_CLO(arg
, "--freelist-big-blocks",
6108 MC_(clo_freelist_big_blocks
),
6109 0, 10*1000*1000*1000LL) {}
6111 else if VG_XACT_CLO(arg
, "--leak-check=no",
6112 MC_(clo_leak_check
), LC_Off
) {}
6113 else if VG_XACT_CLO(arg
, "--leak-check=summary",
6114 MC_(clo_leak_check
), LC_Summary
) {}
6115 else if VG_XACT_CLO(arg
, "--leak-check=yes",
6116 MC_(clo_leak_check
), LC_Full
) {}
6117 else if VG_XACT_CLO(arg
, "--leak-check=full",
6118 MC_(clo_leak_check
), LC_Full
) {}
6120 else if VG_XACT_CLO(arg
, "--leak-resolution=low",
6121 MC_(clo_leak_resolution
), Vg_LowRes
) {}
6122 else if VG_XACT_CLO(arg
, "--leak-resolution=med",
6123 MC_(clo_leak_resolution
), Vg_MedRes
) {}
6124 else if VG_XACT_CLO(arg
, "--leak-resolution=high",
6125 MC_(clo_leak_resolution
), Vg_HighRes
) {}
6127 else if VG_STR_CLO(arg
, "--ignore-ranges", tmp_str
) {
6128 Bool ok
= parse_ignore_ranges(tmp_str
);
6130 VG_(message
)(Vg_DebugMsg
,
6131 "ERROR: --ignore-ranges: "
6132 "invalid syntax, or end <= start in range\n");
6135 if (gIgnoredAddressRanges
) {
6137 for (i
= 0; i
< VG_(sizeRangeMap
)(gIgnoredAddressRanges
); i
++) {
6138 UWord val
= IAR_INVALID
;
6139 UWord key_min
= ~(UWord
)0;
6140 UWord key_max
= (UWord
)0;
6141 VG_(indexRangeMap
)( &key_min
, &key_max
, &val
,
6142 gIgnoredAddressRanges
, i
);
6143 tl_assert(key_min
<= key_max
);
6144 UWord limit
= 0x4000000; /* 64M - entirely arbitrary limit */
6145 if (key_max
- key_min
> limit
&& val
== IAR_CommandLine
) {
6146 VG_(message
)(Vg_DebugMsg
,
6147 "ERROR: --ignore-ranges: suspiciously large range:\n");
6148 VG_(message
)(Vg_DebugMsg
,
6149 " 0x%lx-0x%lx (size %lu)\n", key_min
, key_max
,
6150 key_max
- key_min
+ 1);
6157 else if VG_STR_CLO(arg
, "--ignore-range-below-sp", tmp_str
) {
6158 /* This seems at first a bit weird, but: in order to imply
6159 a non-wrapped-around address range, the first offset needs to be
6160 larger than the second one. For example
6161 --ignore-range-below-sp=8192,8189
6162 would cause accesses to in the range [SP-8192, SP-8189] to be
6164 UInt offs1
= 0, offs2
= 0;
6165 Bool ok
= parse_UInt_pair(&tmp_str
, &offs1
, &offs2
);
6166 // Ensure we used all the text after the '=' sign.
6167 if (ok
&& *tmp_str
!= 0) ok
= False
;
6169 VG_(message
)(Vg_DebugMsg
,
6170 "ERROR: --ignore-range-below-sp: invalid syntax. "
6171 " Expected \"...=decimalnumber-decimalnumber\".\n");
6174 if (offs1
> 1000*1000 /*arbitrary*/ || offs2
> 1000*1000 /*ditto*/) {
6175 VG_(message
)(Vg_DebugMsg
,
6176 "ERROR: --ignore-range-below-sp: suspiciously large "
6177 "offset(s): %u and %u\n", offs1
, offs2
);
6180 if (offs1
<= offs2
) {
6181 VG_(message
)(Vg_DebugMsg
,
6182 "ERROR: --ignore-range-below-sp: invalid offsets "
6183 "(the first must be larger): %u and %u\n", offs1
, offs2
);
6186 tl_assert(offs1
> offs2
);
6187 if (offs1
- offs2
> 4096 /*arbitrary*/) {
6188 VG_(message
)(Vg_DebugMsg
,
6189 "ERROR: --ignore-range-below-sp: suspiciously large "
6190 "range: %u-%u (size %u)\n", offs1
, offs2
, offs1
- offs2
);
6193 MC_(clo_ignore_range_below_sp
) = True
;
6194 MC_(clo_ignore_range_below_sp__first_offset
) = offs1
;
6195 MC_(clo_ignore_range_below_sp__last_offset
) = offs2
;
6199 else if VG_BHEX_CLO(arg
, "--malloc-fill", MC_(clo_malloc_fill
), 0x00,0xFF) {}
6200 else if VG_BHEX_CLO(arg
, "--free-fill", MC_(clo_free_fill
), 0x00,0xFF) {}
6202 else if VG_XACT_CLO(arg
, "--keep-stacktraces=alloc",
6203 MC_(clo_keep_stacktraces
), KS_alloc
) {}
6204 else if VG_XACT_CLO(arg
, "--keep-stacktraces=free",
6205 MC_(clo_keep_stacktraces
), KS_free
) {}
6206 else if VG_XACT_CLO(arg
, "--keep-stacktraces=alloc-and-free",
6207 MC_(clo_keep_stacktraces
), KS_alloc_and_free
) {}
6208 else if VG_XACT_CLO(arg
, "--keep-stacktraces=alloc-then-free",
6209 MC_(clo_keep_stacktraces
), KS_alloc_then_free
) {}
6210 else if VG_XACT_CLO(arg
, "--keep-stacktraces=none",
6211 MC_(clo_keep_stacktraces
), KS_none
) {}
6213 else if VG_BOOL_CLO(arg
, "--show-mismatched-frees",
6214 MC_(clo_show_mismatched_frees
)) {}
6215 else if VG_BOOL_CLO(arg
, "--expensive-definedness-checks",
6216 MC_(clo_expensive_definedness_checks
)) {}
6218 else if VG_BOOL_CLO(arg
, "--xtree-leak",
6219 MC_(clo_xtree_leak
)) {}
6220 else if VG_STR_CLO (arg
, "--xtree-leak-file",
6221 MC_(clo_xtree_leak_file
)) {}
6224 return VG_(replacement_malloc_process_cmd_line_option
)(arg
);
6230 VG_(fmsg_bad_option
)(arg
,
6231 "--track-origins=yes has no effect when --undef-value-errors=no.\n");
6234 static void mc_print_usage(void)
6237 " --leak-check=no|summary|full search for memory leaks at exit? [summary]\n"
6238 " --leak-resolution=low|med|high differentiation of leak stack traces [high]\n"
6239 " --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
6240 " [definite,possible]\n"
6241 " --errors-for-leak-kinds=kind1,kind2,.. which leak kinds are errors?\n"
6242 " [definite,possible]\n"
6243 " where kind is one of:\n"
6244 " definite indirect possible reachable all none\n"
6245 " --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
6246 " improving leak search false positive [all]\n"
6247 " where heur is one of:\n"
6248 " stdstring length64 newarray multipleinheritance all none\n"
6249 " --show-reachable=yes same as --show-leak-kinds=all\n"
6250 " --show-reachable=no --show-possibly-lost=yes\n"
6251 " same as --show-leak-kinds=definite,possible\n"
6252 " --show-reachable=no --show-possibly-lost=no\n"
6253 " same as --show-leak-kinds=definite\n"
6254 " --xtree-leak=no|yes output leak result in xtree format? [no]\n"
6255 " --xtree-leak-file=<file> xtree leak report file [xtleak.kcg.%%p]\n"
6256 " --undef-value-errors=no|yes check for undefined value errors [yes]\n"
6257 " --track-origins=no|yes show origins of undefined values? [no]\n"
6258 " --partial-loads-ok=no|yes too hard to explain here; see manual [yes]\n"
6259 " --expensive-definedness-checks=no|yes\n"
6260 " Use extra-precise definedness tracking [no]\n"
6261 " --freelist-vol=<number> volume of freed blocks queue [20000000]\n"
6262 " --freelist-big-blocks=<number> releases first blocks with size>= [1000000]\n"
6263 " --workaround-gcc296-bugs=no|yes self explanatory [no]. Deprecated.\n"
6264 " Use --ignore-range-below-sp instead.\n"
6265 " --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS] assume given addresses are OK\n"
6266 " --ignore-range-below-sp=<number>-<number> do not report errors for\n"
6267 " accesses at the given offsets below SP\n"
6268 " --malloc-fill=<hexnumber> fill malloc'd areas with given value\n"
6269 " --free-fill=<hexnumber> fill free'd areas with given value\n"
6270 " --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
6271 " stack trace(s) to keep for malloc'd/free'd areas [alloc-and-free]\n"
6272 " --show-mismatched-frees=no|yes show frees that don't match the allocator? [yes]\n"
6276 static void mc_print_debug_usage(void)
6284 /*------------------------------------------------------------*/
6285 /*--- Client blocks ---*/
6286 /*------------------------------------------------------------*/
6288 /* Client block management:
6290 This is managed as an expanding array of client block descriptors.
6291 Indices of live descriptors are issued to the client, so it can ask
6292 to free them later. Therefore we cannot slide live entries down
6293 over dead ones. Instead we must use free/inuse flags and scan for
6294 an empty slot at allocation time. This in turn means allocation is
6295 relatively expensive, so we hope this does not happen too often.
6297 An unused block has start == size == 0
6300 /* type CGenBlock is defined in mc_include.h */
6302 /* This subsystem is self-initialising. */
6303 static UWord cgb_size
= 0;
6304 static UWord cgb_used
= 0;
6305 static CGenBlock
* cgbs
= NULL
;
6307 /* Stats for this subsystem. */
6308 static ULong cgb_used_MAX
= 0; /* Max in use. */
6309 static ULong cgb_allocs
= 0; /* Number of allocs. */
6310 static ULong cgb_discards
= 0; /* Number of discards. */
6311 static ULong cgb_search
= 0; /* Number of searches. */
6314 /* Get access to the client block array. */
6315 void MC_(get_ClientBlock_array
)( /*OUT*/CGenBlock
** blocks
,
6316 /*OUT*/UWord
* nBlocks
)
6319 *nBlocks
= cgb_used
;
6324 Int
alloc_client_block ( void )
6327 CGenBlock
* cgbs_new
;
6331 for (i
= 0; i
< cgb_used
; i
++) {
6333 if (cgbs
[i
].start
== 0 && cgbs
[i
].size
== 0)
6337 /* Not found. Try to allocate one at the end. */
6338 if (cgb_used
< cgb_size
) {
6343 /* Ok, we have to allocate a new one. */
6344 tl_assert(cgb_used
== cgb_size
);
6345 sz_new
= (cgbs
== NULL
) ? 10 : (2 * cgb_size
);
6347 cgbs_new
= VG_(malloc
)( "mc.acb.1", sz_new
* sizeof(CGenBlock
) );
6348 for (i
= 0; i
< cgb_used
; i
++)
6349 cgbs_new
[i
] = cgbs
[i
];
6357 if (cgb_used
> cgb_used_MAX
)
6358 cgb_used_MAX
= cgb_used
;
6363 static void show_client_block_stats ( void )
6365 VG_(message
)(Vg_DebugMsg
,
6366 "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
6367 cgb_allocs
, cgb_discards
, cgb_used_MAX
, cgb_search
6370 static void print_monitor_help ( void )
6375 "memcheck monitor commands:\n"
6376 " xb <addr> [<len>]\n"
6377 " prints validity bits for <len> (or 1) bytes at <addr>\n"
6378 " bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
6379 " Then prints the bytes values below the corresponding validity bits\n"
6380 " in a layout similar to the gdb command 'x /<len>xb <addr>'\n"
6381 " Example: xb 0x8049c78 10\n"
6382 " get_vbits <addr> [<len>]\n"
6383 " Similar to xb, but only prints the validity bytes by group of 4.\n"
6384 " make_memory [noaccess|undefined\n"
6385 " |defined|Definedifaddressable] <addr> [<len>]\n"
6386 " mark <len> (or 1) bytes at <addr> with the given accessibility\n"
6387 " check_memory [addressable|defined] <addr> [<len>]\n"
6388 " check that <len> (or 1) bytes at <addr> have the given accessibility\n"
6389 " and outputs a description of <addr>\n"
6390 " leak_check [full*|summary|xtleak]\n"
6391 " [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
6392 " [heuristics heur1,heur2,...]\n"
6393 " [increased*|changed|any]\n"
6394 " [unlimited*|limited <max_loss_records_output>]\n"
6396 " xtleak produces an xtree full leak result in xtleak.kcg.%%p.%%n\n"
6397 " where kind is one of:\n"
6398 " definite indirect possible reachable all none\n"
6399 " where heur is one of:\n"
6400 " stdstring length64 newarray multipleinheritance all none*\n"
6401 " Examples: leak_check\n"
6402 " leak_check summary any\n"
6403 " leak_check full kinds indirect,possible\n"
6404 " leak_check full reachable any limited 100\n"
6405 " block_list <loss_record_nr>|<loss_record_nr_from>..<loss_record_nr_to>\n"
6406 " [unlimited*|limited <max_blocks>]\n"
6407 " [heuristics heur1,heur2,...]\n"
6408 " after a leak search, shows the list of blocks of <loss_record_nr>\n"
6409 " (or of the range <loss_record_nr_from>..<loss_record_nr_to>).\n"
6410 " With heuristics, only shows the blocks found via heur1,heur2,...\n"
6412 " who_points_at <addr> [<len>]\n"
6413 " shows places pointing inside <len> (default 1) bytes at <addr>\n"
6414 " (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
6415 " with len > 1, will also show \"interior pointers\")\n"
6416 " xtmemory [<filename>]\n"
6417 " dump xtree memory profile in <filename> (default xtmemory.kcg.%%p.%%n)\n"
6421 /* Print szB bytes at address, with a format similar to the gdb command
6423 res[i] == 1 indicates the corresponding byte is addressable. */
6424 static void gdb_xb (Addr address
, SizeT szB
, Int res
[])
6428 for (i
= 0; i
< szB
; i
++) {
6432 VG_(printf
) ("\n"); // Terminate previous line
6433 VG_(printf
) ("%p:", (void*)(address
+i
));
6436 VG_(printf
) ("\t0x%02x", *(UChar
*)(address
+i
));
6438 VG_(printf
) ("\t0x??");
6440 VG_(printf
) ("\n"); // Terminate previous line
6444 /* Returns the address of the next non space character,
6445 or address of the string terminator. */
6446 static HChar
* next_non_space (HChar
*s
)
6448 while (*s
&& *s
== ' ')
6453 /* Parse an integer slice, i.e. a single integer or a range of integer.
6455 <integer>[..<integer> ]
6456 (spaces are allowed before and/or after ..).
6457 Return True if range correctly parsed, False otherwise. */
6458 static Bool
VG_(parse_slice
) (HChar
* s
, HChar
** saveptr
,
6459 UInt
*from
, UInt
*to
)
6464 wl
= VG_(strtok_r
) (s
, " ", saveptr
);
6466 /* slice must start with an integer. */
6468 VG_(gdb_printf
) ("expecting integer or slice <from>..<to>\n");
6471 *from
= VG_(strtoull10
) (wl
, &endptr
);
6473 VG_(gdb_printf
) ("invalid integer or slice <from>..<to>\n");
6477 if (*endptr
== '\0' && *next_non_space(*saveptr
) != '.') {
6478 /* wl token is an integer terminating the string
6479 or else next token does not start with .
6480 In both cases, the slice is a single integer. */
6485 if (*endptr
== '\0') {
6486 // iii .. => get the next token
6487 wl
= VG_(strtok_r
) (NULL
, " .", saveptr
);
6490 if (*endptr
!= '.' && *(endptr
+1) != '.') {
6491 VG_(gdb_printf
) ("expecting slice <from>..<to>\n");
6494 if ( *(endptr
+2) == ' ') {
6495 // It must be iii.. jjj => get the next token
6496 wl
= VG_(strtok_r
) (NULL
, " .", saveptr
);
6498 // It must be iii..jjj
6503 *to
= VG_(strtoull10
) (wl
, &endptr
);
6504 if (*endptr
!= '\0') {
6505 VG_(gdb_printf
) ("missing/wrong 'to' of slice <from>..<to>\n");
6510 VG_(gdb_printf
) ("<from> cannot be bigger than <to> "
6511 "in slice <from>..<to>\n");
6518 /* return True if request recognised, False otherwise */
6519 static Bool
handle_gdb_monitor_command (ThreadId tid
, HChar
*req
)
6522 HChar s
[VG_(strlen
)(req
) + 1]; /* copy for strtok_r */
6525 VG_(strcpy
) (s
, req
);
6527 wcmd
= VG_(strtok_r
) (s
, " ", &ssaveptr
);
6528 /* NB: if possible, avoid introducing a new command below which
6529 starts with the same first letter(s) as an already existing
6530 command. This ensures a shorter abbreviation for the user. */
6531 switch (VG_(keyword_id
)
6532 ("help get_vbits leak_check make_memory check_memory "
6533 "block_list who_points_at xb xtmemory",
6534 wcmd
, kwd_report_duplicated_matches
)) {
6535 case -2: /* multiple matches */
6537 case -1: /* not found */
6540 print_monitor_help();
6542 case 1: { /* get_vbits */
6545 if (VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
)) {
6548 Int unaddressable
= 0;
6549 for (i
= 0; i
< szB
; i
++) {
6550 Int res
= mc_get_or_set_vbits_for_client
6551 (address
+i
, (Addr
) &vbits
, 1,
6552 False
, /* get them */
6553 False
/* is client request */ );
6554 /* we are before the first character on next line, print a \n. */
6555 if ((i
% 32) == 0 && i
!= 0)
6557 /* we are before the next block of 4 starts, print a space. */
6558 else if ((i
% 4) == 0 && i
!= 0)
6561 VG_(printf
) ("%02x", vbits
);
6563 tl_assert(3 == res
);
6569 if (unaddressable
) {
6571 ("Address %p len %lu has %d bytes unaddressable\n",
6572 (void *)address
, szB
, unaddressable
);
6577 case 2: { /* leak_check */
6579 LeakCheckParams lcp
;
6580 HChar
* xt_filename
= NULL
;
6584 lcp
.show_leak_kinds
= R2S(Possible
) | R2S(Unreached
);
6585 lcp
.errors_for_leak_kinds
= 0; // no errors for interactive leak search.
6587 lcp
.deltamode
= LCD_Increased
;
6588 lcp
.max_loss_records_output
= 999999999;
6589 lcp
.requested_by_monitor_command
= True
;
6590 lcp
.xt_filename
= NULL
;
6592 for (kw
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6594 kw
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
)) {
6595 switch (VG_(keyword_id
)
6596 ("full summary xtleak "
6597 "kinds reachable possibleleak definiteleak "
6599 "increased changed any "
6600 "unlimited limited ",
6601 kw
, kwd_report_all
)) {
6602 case -2: err
++; break;
6603 case -1: err
++; break;
6605 lcp
.mode
= LC_Full
; break;
6606 case 1: /* summary */
6607 lcp
.mode
= LC_Summary
; break;
6608 case 2: /* xtleak */
6611 = VG_(expand_file_name
)("--xtleak-mc_main.c",
6612 "xtleak.kcg.%p.%n");
6613 lcp
.xt_filename
= xt_filename
;
6615 case 3: { /* kinds */
6616 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6618 || !VG_(parse_enum_set
)(MC_(parse_leak_kinds_tokens
),
6621 &lcp
.show_leak_kinds
)) {
6622 VG_(gdb_printf
) ("missing or malformed leak kinds set\n");
6627 case 4: /* reachable */
6628 lcp
.show_leak_kinds
= MC_(all_Reachedness
)();
6630 case 5: /* possibleleak */
6632 = R2S(Possible
) | R2S(IndirectLeak
) | R2S(Unreached
);
6634 case 6: /* definiteleak */
6635 lcp
.show_leak_kinds
= R2S(Unreached
);
6637 case 7: { /* heuristics */
6638 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6640 || !VG_(parse_enum_set
)(MC_(parse_leak_heuristics_tokens
),
6644 VG_(gdb_printf
) ("missing or malformed heuristics set\n");
6649 case 8: /* increased */
6650 lcp
.deltamode
= LCD_Increased
; break;
6651 case 9: /* changed */
6652 lcp
.deltamode
= LCD_Changed
; break;
6654 lcp
.deltamode
= LCD_Any
; break;
6655 case 11: /* unlimited */
6656 lcp
.max_loss_records_output
= 999999999; break;
6657 case 12: { /* limited */
6659 const HChar
* endptr
;
6661 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6664 endptr
= "empty"; /* to report an error below */
6667 int_value
= VG_(strtoll10
) (wcmd
, &the_end
);
6670 if (*endptr
!= '\0')
6671 VG_(gdb_printf
) ("missing or malformed integer value\n");
6672 else if (int_value
> 0)
6673 lcp
.max_loss_records_output
= (UInt
) int_value
;
6675 VG_(gdb_printf
) ("max_loss_records_output must be >= 1,"
6676 " got %d\n", int_value
);
6684 MC_(detect_memory_leaks
)(tid
, &lcp
);
6685 if (xt_filename
!= NULL
)
6686 VG_(free
)(xt_filename
);
6690 case 3: { /* make_memory */
6693 Int kwdid
= VG_(keyword_id
)
6694 ("noaccess undefined defined Definedifaddressable",
6695 VG_(strtok_r
) (NULL
, " ", &ssaveptr
), kwd_report_all
);
6696 if (!VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
))
6701 case 0: MC_(make_mem_noaccess
) (address
, szB
); break;
6702 case 1: make_mem_undefined_w_tid_and_okind ( address
, szB
, tid
,
6703 MC_OKIND_USER
); break;
6704 case 2: MC_(make_mem_defined
) ( address
, szB
); break;
6705 case 3: make_mem_defined_if_addressable ( address
, szB
); break;;
6706 default: tl_assert(0);
6711 case 4: { /* check_memory */
6719 ExeContext
* origin_ec
;
6722 Int kwdid
= VG_(keyword_id
)
6723 ("addressable defined",
6724 VG_(strtok_r
) (NULL
, " ", &ssaveptr
), kwd_report_all
);
6725 if (!VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
))
6730 case 0: /* addressable */
6731 if (is_mem_addressable ( address
, szB
, &bad_addr
))
6732 VG_(printf
) ("Address %p len %lu addressable\n",
6733 (void *)address
, szB
);
6736 ("Address %p len %lu not addressable:\nbad address %p\n",
6737 (void *)address
, szB
, (void *) bad_addr
);
6738 MC_(pp_describe_addr
) (address
);
6740 case 1: /* defined */
6741 res
= is_mem_defined ( address
, szB
, &bad_addr
, &otag
);
6742 if (MC_AddrErr
== res
)
6744 ("Address %p len %lu not addressable:\nbad address %p\n",
6745 (void *)address
, szB
, (void *) bad_addr
);
6746 else if (MC_ValueErr
== res
) {
6749 case MC_OKIND_STACK
:
6750 src
= " was created by a stack allocation"; break;
6752 src
= " was created by a heap allocation"; break;
6754 src
= " was created by a client request"; break;
6755 case MC_OKIND_UNKNOWN
:
6757 default: tl_assert(0);
6760 ("Address %p len %lu not defined:\n"
6761 "Uninitialised value at %p%s\n",
6762 (void *)address
, szB
, (void *) bad_addr
, src
);
6764 if (VG_(is_plausible_ECU
)(ecu
)) {
6765 origin_ec
= VG_(get_ExeContext_from_ECU
)( ecu
);
6766 VG_(pp_ExeContext
)( origin_ec
);
6770 VG_(printf
) ("Address %p len %lu defined\n",
6771 (void *)address
, szB
);
6772 MC_(pp_describe_addr
) (address
);
6774 default: tl_assert(0);
6779 case 5: { /* block_list */
6782 UInt lr_nr_from
= 0;
6785 if (VG_(parse_slice
) (NULL
, &ssaveptr
, &lr_nr_from
, &lr_nr_to
)) {
6786 UInt limit_blocks
= 999999999;
6788 UInt heuristics
= 0;
6790 for (wl
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6792 wl
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
)) {
6793 switch (VG_(keyword_id
) ("unlimited limited heuristics ",
6794 wl
, kwd_report_all
)) {
6795 case -2: return True
;
6796 case -1: return True
;
6797 case 0: /* unlimited */
6798 limit_blocks
= 999999999; break;
6799 case 1: /* limited */
6800 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6802 VG_(gdb_printf
) ("missing integer value\n");
6805 int_value
= VG_(strtoll10
) (wcmd
, &the_end
);
6806 if (*the_end
!= '\0') {
6807 VG_(gdb_printf
) ("malformed integer value\n");
6810 if (int_value
<= 0) {
6811 VG_(gdb_printf
) ("max_blocks must be >= 1,"
6812 " got %d\n", int_value
);
6815 limit_blocks
= (UInt
) int_value
;
6817 case 2: /* heuristics */
6818 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6820 || !VG_(parse_enum_set
)(MC_(parse_leak_heuristics_tokens
),
6824 VG_(gdb_printf
) ("missing or malformed heuristics set\n");
6832 /* substract 1 from lr_nr_from/lr_nr_to as what is shown to the user
6833 is 1 more than the index in lr_array. */
6834 if (lr_nr_from
== 0 || ! MC_(print_block_list
) (lr_nr_from
-1,
6838 VG_(gdb_printf
) ("invalid loss record nr\n");
6843 case 6: { /* who_points_at */
6847 if (!VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
))
6849 if (address
== (Addr
) 0) {
6850 VG_(gdb_printf
) ("Cannot search who points at 0x0\n");
6853 MC_(who_points_at
) (address
, szB
);
6860 if (VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
)) {
6864 Int unaddressable
= 0;
6865 for (i
= 0; i
< szB
; i
++) {
6867 res
[bnr
] = mc_get_or_set_vbits_for_client
6868 (address
+i
, (Addr
) &vbits
[bnr
], 1,
6869 False
, /* get them */
6870 False
/* is client request */ );
6871 /* We going to print the first vabits of a new line.
6872 Terminate the previous line if needed: prints a line with the
6873 address and the data. */
6877 gdb_xb (address
+ i
- 8, 8, res
);
6879 VG_(printf
) ("\t"); // To align VABITS with gdb_xb layout
6881 if (res
[bnr
] == 1) {
6882 VG_(printf
) ("\t %02x", vbits
[bnr
]);
6884 tl_assert(3 == res
[bnr
]);
6886 VG_(printf
) ("\t __");
6890 if (szB
% 8 == 0 && szB
> 0)
6891 gdb_xb (address
+ szB
- 8, 8, res
);
6893 gdb_xb (address
+ szB
- szB
% 8, szB
% 8, res
);
6894 if (unaddressable
) {
6896 ("Address %p len %lu has %d bytes unaddressable\n",
6897 (void *)address
, szB
, unaddressable
);
6903 case 8: { /* xtmemory */
6905 filename
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6906 MC_(xtmemory_report
)(filename
, False
);
6916 /*------------------------------------------------------------*/
6917 /*--- Client requests ---*/
6918 /*------------------------------------------------------------*/
6920 static Bool
mc_handle_client_request ( ThreadId tid
, UWord
* arg
, UWord
* ret
)
6925 if (!VG_IS_TOOL_USERREQ('M','C',arg
[0])
6926 && VG_USERREQ__MALLOCLIKE_BLOCK
!= arg
[0]
6927 && VG_USERREQ__RESIZEINPLACE_BLOCK
!= arg
[0]
6928 && VG_USERREQ__FREELIKE_BLOCK
!= arg
[0]
6929 && VG_USERREQ__CREATE_MEMPOOL
!= arg
[0]
6930 && VG_USERREQ__DESTROY_MEMPOOL
!= arg
[0]
6931 && VG_USERREQ__MEMPOOL_ALLOC
!= arg
[0]
6932 && VG_USERREQ__MEMPOOL_FREE
!= arg
[0]
6933 && VG_USERREQ__MEMPOOL_TRIM
!= arg
[0]
6934 && VG_USERREQ__MOVE_MEMPOOL
!= arg
[0]
6935 && VG_USERREQ__MEMPOOL_CHANGE
!= arg
[0]
6936 && VG_USERREQ__MEMPOOL_EXISTS
!= arg
[0]
6937 && VG_USERREQ__GDB_MONITOR_COMMAND
!= arg
[0]
6938 && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE
!= arg
[0]
6939 && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE
!= arg
[0])
6943 case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE
: {
6944 Bool ok
= is_mem_addressable ( arg
[1], arg
[2], &bad_addr
);
6946 MC_(record_user_error
) ( tid
, bad_addr
, /*isAddrErr*/True
, 0 );
6947 *ret
= ok
? (UWord
)NULL
: bad_addr
;
6951 case VG_USERREQ__CHECK_MEM_IS_DEFINED
: {
6952 Bool errorV
= False
;
6955 Bool errorA
= False
;
6957 is_mem_defined_comprehensive(
6959 &errorV
, &bad_addrV
, &otagV
, &errorA
, &bad_addrA
6962 MC_(record_user_error
) ( tid
, bad_addrV
,
6963 /*isAddrErr*/False
, otagV
);
6966 MC_(record_user_error
) ( tid
, bad_addrA
,
6967 /*isAddrErr*/True
, 0 );
6969 /* Return the lower of the two erring addresses, if any. */
6971 if (errorV
&& !errorA
) {
6974 if (!errorV
&& errorA
) {
6977 if (errorV
&& errorA
) {
6978 *ret
= bad_addrV
< bad_addrA
? bad_addrV
: bad_addrA
;
6983 case VG_USERREQ__DO_LEAK_CHECK
: {
6984 LeakCheckParams lcp
;
6988 else if (arg
[1] == 1)
6989 lcp
.mode
= LC_Summary
;
6991 VG_(message
)(Vg_UserMsg
,
6992 "Warning: unknown memcheck leak search mode\n");
6996 lcp
.show_leak_kinds
= MC_(clo_show_leak_kinds
);
6997 lcp
.errors_for_leak_kinds
= MC_(clo_error_for_leak_kinds
);
6998 lcp
.heuristics
= MC_(clo_leak_check_heuristics
);
7001 lcp
.deltamode
= LCD_Any
;
7002 else if (arg
[2] == 1)
7003 lcp
.deltamode
= LCD_Increased
;
7004 else if (arg
[2] == 2)
7005 lcp
.deltamode
= LCD_Changed
;
7009 "Warning: unknown memcheck leak search deltamode\n");
7010 lcp
.deltamode
= LCD_Any
;
7012 lcp
.max_loss_records_output
= 999999999;
7013 lcp
.requested_by_monitor_command
= False
;
7014 lcp
.xt_filename
= NULL
;
7016 MC_(detect_memory_leaks
)(tid
, &lcp
);
7017 *ret
= 0; /* return value is meaningless */
7021 case VG_USERREQ__MAKE_MEM_NOACCESS
:
7022 MC_(make_mem_noaccess
) ( arg
[1], arg
[2] );
7026 case VG_USERREQ__MAKE_MEM_UNDEFINED
:
7027 make_mem_undefined_w_tid_and_okind ( arg
[1], arg
[2], tid
,
7032 case VG_USERREQ__MAKE_MEM_DEFINED
:
7033 MC_(make_mem_defined
) ( arg
[1], arg
[2] );
7037 case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE
:
7038 make_mem_defined_if_addressable ( arg
[1], arg
[2] );
7042 case VG_USERREQ__CREATE_BLOCK
: /* describe a block */
7043 if (arg
[1] != 0 && arg
[2] != 0) {
7044 i
= alloc_client_block();
7045 /* VG_(printf)("allocated %d %p\n", i, cgbs); */
7046 cgbs
[i
].start
= arg
[1];
7047 cgbs
[i
].size
= arg
[2];
7048 cgbs
[i
].desc
= VG_(strdup
)("mc.mhcr.1", (HChar
*)arg
[3]);
7049 cgbs
[i
].where
= VG_(record_ExeContext
) ( tid
, 0/*first_ip_delta*/ );
7055 case VG_USERREQ__DISCARD
: /* discard */
7057 || arg
[2] >= cgb_used
||
7058 (cgbs
[arg
[2]].start
== 0 && cgbs
[arg
[2]].size
== 0)) {
7061 tl_assert(arg
[2] >= 0 && arg
[2] < cgb_used
);
7062 cgbs
[arg
[2]].start
= cgbs
[arg
[2]].size
= 0;
7063 VG_(free
)(cgbs
[arg
[2]].desc
);
7069 case VG_USERREQ__GET_VBITS
:
7070 *ret
= mc_get_or_set_vbits_for_client
7071 ( arg
[1], arg
[2], arg
[3],
7072 False
/* get them */,
7073 True
/* is client request */ );
7076 case VG_USERREQ__SET_VBITS
:
7077 *ret
= mc_get_or_set_vbits_for_client
7078 ( arg
[1], arg
[2], arg
[3],
7079 True
/* set them */,
7080 True
/* is client request */ );
7083 case VG_USERREQ__COUNT_LEAKS
: { /* count leaked bytes */
7084 UWord
** argp
= (UWord
**)arg
;
7085 // MC_(bytes_leaked) et al were set by the last leak check (or zero
7086 // if no prior leak checks performed).
7087 *argp
[1] = MC_(bytes_leaked
) + MC_(bytes_indirect
);
7088 *argp
[2] = MC_(bytes_dubious
);
7089 *argp
[3] = MC_(bytes_reachable
);
7090 *argp
[4] = MC_(bytes_suppressed
);
7091 // there is no argp[5]
7092 //*argp[5] = MC_(bytes_indirect);
7093 // XXX need to make *argp[1-4] defined; currently done in the
7094 // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
7098 case VG_USERREQ__COUNT_LEAK_BLOCKS
: { /* count leaked blocks */
7099 UWord
** argp
= (UWord
**)arg
;
7100 // MC_(blocks_leaked) et al were set by the last leak check (or zero
7101 // if no prior leak checks performed).
7102 *argp
[1] = MC_(blocks_leaked
) + MC_(blocks_indirect
);
7103 *argp
[2] = MC_(blocks_dubious
);
7104 *argp
[3] = MC_(blocks_reachable
);
7105 *argp
[4] = MC_(blocks_suppressed
);
7106 // there is no argp[5]
7107 //*argp[5] = MC_(blocks_indirect);
7108 // XXX need to make *argp[1-4] defined; currently done in the
7109 // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
7113 case VG_USERREQ__MALLOCLIKE_BLOCK
: {
7114 Addr p
= (Addr
)arg
[1];
7115 SizeT sizeB
= arg
[2];
7117 Bool is_zeroed
= (Bool
)arg
[4];
7119 MC_(new_block
) ( tid
, p
, sizeB
, /*ignored*/0, is_zeroed
,
7120 MC_AllocCustom
, MC_(malloc_list
) );
7122 MC_(make_mem_noaccess
) ( p
- rzB
, rzB
);
7123 MC_(make_mem_noaccess
) ( p
+ sizeB
, rzB
);
7127 case VG_USERREQ__RESIZEINPLACE_BLOCK
: {
7128 Addr p
= (Addr
)arg
[1];
7129 SizeT oldSizeB
= arg
[2];
7130 SizeT newSizeB
= arg
[3];
7133 MC_(handle_resizeInPlace
) ( tid
, p
, oldSizeB
, newSizeB
, rzB
);
7136 case VG_USERREQ__FREELIKE_BLOCK
: {
7137 Addr p
= (Addr
)arg
[1];
7140 MC_(handle_free
) ( tid
, p
, rzB
, MC_AllocCustom
);
7144 case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR
: {
7145 HChar
* s
= (HChar
*)arg
[1];
7146 Addr dst
= (Addr
) arg
[2];
7147 Addr src
= (Addr
) arg
[3];
7148 SizeT len
= (SizeT
)arg
[4];
7149 MC_(record_overlap_error
)(tid
, s
, src
, dst
, len
);
7153 case VG_USERREQ__CREATE_MEMPOOL
: {
7154 Addr pool
= (Addr
)arg
[1];
7156 Bool is_zeroed
= (Bool
)arg
[3];
7157 UInt flags
= arg
[4];
7159 // The create_mempool function does not know these mempool flags,
7160 // pass as booleans.
7161 MC_(create_mempool
) ( pool
, rzB
, is_zeroed
,
7162 (flags
& VALGRIND_MEMPOOL_AUTO_FREE
),
7163 (flags
& VALGRIND_MEMPOOL_METAPOOL
) );
7167 case VG_USERREQ__DESTROY_MEMPOOL
: {
7168 Addr pool
= (Addr
)arg
[1];
7170 MC_(destroy_mempool
) ( pool
);
7174 case VG_USERREQ__MEMPOOL_ALLOC
: {
7175 Addr pool
= (Addr
)arg
[1];
7176 Addr addr
= (Addr
)arg
[2];
7179 MC_(mempool_alloc
) ( tid
, pool
, addr
, size
);
7183 case VG_USERREQ__MEMPOOL_FREE
: {
7184 Addr pool
= (Addr
)arg
[1];
7185 Addr addr
= (Addr
)arg
[2];
7187 MC_(mempool_free
) ( pool
, addr
);
7191 case VG_USERREQ__MEMPOOL_TRIM
: {
7192 Addr pool
= (Addr
)arg
[1];
7193 Addr addr
= (Addr
)arg
[2];
7196 MC_(mempool_trim
) ( pool
, addr
, size
);
7200 case VG_USERREQ__MOVE_MEMPOOL
: {
7201 Addr poolA
= (Addr
)arg
[1];
7202 Addr poolB
= (Addr
)arg
[2];
7204 MC_(move_mempool
) ( poolA
, poolB
);
7208 case VG_USERREQ__MEMPOOL_CHANGE
: {
7209 Addr pool
= (Addr
)arg
[1];
7210 Addr addrA
= (Addr
)arg
[2];
7211 Addr addrB
= (Addr
)arg
[3];
7214 MC_(mempool_change
) ( pool
, addrA
, addrB
, size
);
7218 case VG_USERREQ__MEMPOOL_EXISTS
: {
7219 Addr pool
= (Addr
)arg
[1];
7221 *ret
= (UWord
) MC_(mempool_exists
) ( pool
);
7225 case VG_USERREQ__GDB_MONITOR_COMMAND
: {
7226 Bool handled
= handle_gdb_monitor_command (tid
, (HChar
*)arg
[1]);
7234 case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE
:
7235 case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE
: {
7237 = arg
[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE
;
7239 = modify_ignore_ranges(addRange
, arg
[1], arg
[2]);
7247 "Warning: unknown memcheck client request code %llx\n",
7256 /*------------------------------------------------------------*/
7257 /*--- Crude profiling machinery. ---*/
7258 /*------------------------------------------------------------*/
7260 // We track a number of interesting events (using PROF_EVENT)
7261 // if MC_PROFILE_MEMORY is defined.
7263 #ifdef MC_PROFILE_MEMORY
7265 ULong
MC_(event_ctr
)[MCPE_LAST
];
7267 /* Event counter names. Use the name of the function that increases the
7268 event counter. Drop any MC_() and mc_ prefices. */
7269 static const HChar
* MC_(event_ctr_name
)[MCPE_LAST
] = {
7270 [MCPE_LOADVN_SLOW
] = "LOADVn_slow",
7271 [MCPE_LOADVN_SLOW_LOOP
] = "LOADVn_slow_loop",
7272 [MCPE_STOREVN_SLOW
] = "STOREVn_slow",
7273 [MCPE_STOREVN_SLOW_LOOP
] = "STOREVn_slow(loop)",
7274 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED
] = "make_aligned_word32_undefined",
7275 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW
] =
7276 "make_aligned_word32_undefined_slow",
7277 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED
] = "make_aligned_word64_undefined",
7278 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW
] =
7279 "make_aligned_word64_undefined_slow",
7280 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS
] = "make_aligned_word32_noaccess",
7281 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW
] =
7282 "make_aligned_word32_noaccess_slow",
7283 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS
] = "make_aligned_word64_noaccess",
7284 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW
] =
7285 "make_aligned_word64_noaccess_slow",
7286 [MCPE_MAKE_MEM_NOACCESS
] = "make_mem_noaccess",
7287 [MCPE_MAKE_MEM_UNDEFINED
] = "make_mem_undefined",
7288 [MCPE_MAKE_MEM_UNDEFINED_W_OTAG
] = "make_mem_undefined_w_otag",
7289 [MCPE_MAKE_MEM_DEFINED
] = "make_mem_defined",
7290 [MCPE_CHEAP_SANITY_CHECK
] = "cheap_sanity_check",
7291 [MCPE_EXPENSIVE_SANITY_CHECK
] = "expensive_sanity_check",
7292 [MCPE_COPY_ADDRESS_RANGE_STATE
] = "copy_address_range_state",
7293 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1
] = "copy_address_range_state(loop1)",
7294 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2
] = "copy_address_range_state(loop2)",
7295 [MCPE_CHECK_MEM_IS_NOACCESS
] = "check_mem_is_noaccess",
7296 [MCPE_CHECK_MEM_IS_NOACCESS_LOOP
] = "check_mem_is_noaccess(loop)",
7297 [MCPE_IS_MEM_ADDRESSABLE
] = "is_mem_addressable",
7298 [MCPE_IS_MEM_ADDRESSABLE_LOOP
] = "is_mem_addressable(loop)",
7299 [MCPE_IS_MEM_DEFINED
] = "is_mem_defined",
7300 [MCPE_IS_MEM_DEFINED_LOOP
] = "is_mem_defined(loop)",
7301 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE
] = "is_mem_defined_comprehensive",
7302 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP
] =
7303 "is_mem_defined_comprehensive(loop)",
7304 [MCPE_IS_DEFINED_ASCIIZ
] = "is_defined_asciiz",
7305 [MCPE_IS_DEFINED_ASCIIZ_LOOP
] = "is_defined_asciiz(loop)",
7306 [MCPE_FIND_CHUNK_FOR_OLD
] = "find_chunk_for_OLD",
7307 [MCPE_FIND_CHUNK_FOR_OLD_LOOP
] = "find_chunk_for_OLD(loop)",
7308 [MCPE_SET_ADDRESS_RANGE_PERMS
] = "set_address_range_perms",
7309 [MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP
] =
7310 "set_address_range_perms(single-secmap)",
7311 [MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP
] =
7312 "set_address_range_perms(startof-secmap)",
7313 [MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS
] =
7314 "set_address_range_perms(multiple-secmaps)",
7315 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1
] =
7316 "set_address_range_perms(dist-sm1)",
7317 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2
] =
7318 "set_address_range_perms(dist-sm2)",
7319 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK
] =
7320 "set_address_range_perms(dist-sm1-quick)",
7321 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK
] =
7322 "set_address_range_perms(dist-sm2-quick)",
7323 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A
] = "set_address_range_perms(loop1a)",
7324 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B
] = "set_address_range_perms(loop1b)",
7325 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C
] = "set_address_range_perms(loop1c)",
7326 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A
] = "set_address_range_perms(loop8a)",
7327 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B
] = "set_address_range_perms(loop8b)",
7328 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K
] = "set_address_range_perms(loop64K)",
7329 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM
] =
7330 "set_address_range_perms(loop64K-free-dist-sm)",
7331 [MCPE_LOADV_128_OR_256_SLOW_LOOP
] = "LOADV_128_or_256_slow(loop)",
7332 [MCPE_LOADV_128_OR_256
] = "LOADV_128_or_256",
7333 [MCPE_LOADV_128_OR_256_SLOW1
] = "LOADV_128_or_256-slow1",
7334 [MCPE_LOADV_128_OR_256_SLOW2
] = "LOADV_128_or_256-slow2",
7335 [MCPE_LOADV64
] = "LOADV64",
7336 [MCPE_LOADV64_SLOW1
] = "LOADV64-slow1",
7337 [MCPE_LOADV64_SLOW2
] = "LOADV64-slow2",
7338 [MCPE_STOREV64
] = "STOREV64",
7339 [MCPE_STOREV64_SLOW1
] = "STOREV64-slow1",
7340 [MCPE_STOREV64_SLOW2
] = "STOREV64-slow2",
7341 [MCPE_STOREV64_SLOW3
] = "STOREV64-slow3",
7342 [MCPE_STOREV64_SLOW4
] = "STOREV64-slow4",
7343 [MCPE_LOADV32
] = "LOADV32",
7344 [MCPE_LOADV32_SLOW1
] = "LOADV32-slow1",
7345 [MCPE_LOADV32_SLOW2
] = "LOADV32-slow2",
7346 [MCPE_STOREV32
] = "STOREV32",
7347 [MCPE_STOREV32_SLOW1
] = "STOREV32-slow1",
7348 [MCPE_STOREV32_SLOW2
] = "STOREV32-slow2",
7349 [MCPE_STOREV32_SLOW3
] = "STOREV32-slow3",
7350 [MCPE_STOREV32_SLOW4
] = "STOREV32-slow4",
7351 [MCPE_LOADV16
] = "LOADV16",
7352 [MCPE_LOADV16_SLOW1
] = "LOADV16-slow1",
7353 [MCPE_LOADV16_SLOW2
] = "LOADV16-slow2",
7354 [MCPE_STOREV16
] = "STOREV16",
7355 [MCPE_STOREV16_SLOW1
] = "STOREV16-slow1",
7356 [MCPE_STOREV16_SLOW2
] = "STOREV16-slow2",
7357 [MCPE_STOREV16_SLOW3
] = "STOREV16-slow3",
7358 [MCPE_STOREV16_SLOW4
] = "STOREV16-slow4",
7359 [MCPE_LOADV8
] = "LOADV8",
7360 [MCPE_LOADV8_SLOW1
] = "LOADV8-slow1",
7361 [MCPE_LOADV8_SLOW2
] = "LOADV8-slow2",
7362 [MCPE_STOREV8
] = "STOREV8",
7363 [MCPE_STOREV8_SLOW1
] = "STOREV8-slow1",
7364 [MCPE_STOREV8_SLOW2
] = "STOREV8-slow2",
7365 [MCPE_STOREV8_SLOW3
] = "STOREV8-slow3",
7366 [MCPE_STOREV8_SLOW4
] = "STOREV8-slow4",
7367 [MCPE_NEW_MEM_STACK_4
] = "new_mem_stack_4",
7368 [MCPE_NEW_MEM_STACK_8
] = "new_mem_stack_8",
7369 [MCPE_NEW_MEM_STACK_12
] = "new_mem_stack_12",
7370 [MCPE_NEW_MEM_STACK_16
] = "new_mem_stack_16",
7371 [MCPE_NEW_MEM_STACK_32
] = "new_mem_stack_32",
7372 [MCPE_NEW_MEM_STACK_112
] = "new_mem_stack_112",
7373 [MCPE_NEW_MEM_STACK_128
] = "new_mem_stack_128",
7374 [MCPE_NEW_MEM_STACK_144
] = "new_mem_stack_144",
7375 [MCPE_NEW_MEM_STACK_160
] = "new_mem_stack_160",
7376 [MCPE_DIE_MEM_STACK_4
] = "die_mem_stack_4",
7377 [MCPE_DIE_MEM_STACK_8
] = "die_mem_stack_8",
7378 [MCPE_DIE_MEM_STACK_12
] = "die_mem_stack_12",
7379 [MCPE_DIE_MEM_STACK_16
] = "die_mem_stack_16",
7380 [MCPE_DIE_MEM_STACK_32
] = "die_mem_stack_32",
7381 [MCPE_DIE_MEM_STACK_112
] = "die_mem_stack_112",
7382 [MCPE_DIE_MEM_STACK_128
] = "die_mem_stack_128",
7383 [MCPE_DIE_MEM_STACK_144
] = "die_mem_stack_144",
7384 [MCPE_DIE_MEM_STACK_160
] = "die_mem_stack_160",
7385 [MCPE_NEW_MEM_STACK
] = "new_mem_stack",
7386 [MCPE_DIE_MEM_STACK
] = "die_mem_stack",
7387 [MCPE_MAKE_STACK_UNINIT_W_O
] = "MAKE_STACK_UNINIT_w_o",
7388 [MCPE_MAKE_STACK_UNINIT_NO_O
] = "MAKE_STACK_UNINIT_no_o",
7389 [MCPE_MAKE_STACK_UNINIT_128_NO_O
] = "MAKE_STACK_UNINIT_128_no_o",
7390 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16
]
7391 = "MAKE_STACK_UNINIT_128_no_o_aligned_16",
7392 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8
]
7393 = "MAKE_STACK_UNINIT_128_no_o_aligned_8",
7394 [MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE
]
7395 = "MAKE_STACK_UNINIT_128_no_o_slowcase",
7398 static void init_prof_mem ( void )
7400 Int i
, name_count
= 0;
7402 for (i
= 0; i
< MCPE_LAST
; i
++) {
7403 MC_(event_ctr
)[i
] = 0;
7404 if (MC_(event_ctr_name
)[i
] != NULL
)
7408 /* Make sure every profiling event has a name */
7409 tl_assert(name_count
== MCPE_LAST
);
7412 static void done_prof_mem ( void )
7415 Bool spaced
= False
;
7416 for (i
= n
= 0; i
< MCPE_LAST
; i
++) {
7417 if (!spaced
&& (n
% 10) == 0) {
7421 if (MC_(event_ctr
)[i
] > 0) {
7424 VG_(printf
)( "prof mem event %3d: %11llu %s\n",
7425 i
, MC_(event_ctr
)[i
],
7426 MC_(event_ctr_name
)[i
]);
7433 static void init_prof_mem ( void ) { }
7434 static void done_prof_mem ( void ) { }
7439 /*------------------------------------------------------------*/
7440 /*--- Origin tracking stuff ---*/
7441 /*------------------------------------------------------------*/
7443 /*--------------------------------------------*/
7444 /*--- Origin tracking: load handlers ---*/
7445 /*--------------------------------------------*/
7447 static INLINE UInt
merge_origins ( UInt or1
, UInt or2
) {
7448 return or1
> or2
? or1
: or2
;
7451 UWord
VG_REGPARM(1) MC_(helperc_b_load1
)( Addr a
) {
7454 UWord lineoff
= oc_line_offset(a
);
7455 UWord byteoff
= a
& 3; /* 0, 1, 2 or 3 */
7457 if (OC_ENABLE_ASSERTIONS
) {
7458 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7461 line
= find_OCacheLine( a
);
7463 descr
= line
->descr
[lineoff
];
7464 if (OC_ENABLE_ASSERTIONS
) {
7465 tl_assert(descr
< 0x10);
7468 if (LIKELY(0 == (descr
& (1 << byteoff
)))) {
7471 return line
->w32
[lineoff
];
7475 UWord
VG_REGPARM(1) MC_(helperc_b_load2
)( Addr a
) {
7478 UWord lineoff
, byteoff
;
7480 if (UNLIKELY(a
& 1)) {
7481 /* Handle misaligned case, slowly. */
7482 UInt oLo
= (UInt
)MC_(helperc_b_load1
)( a
+ 0 );
7483 UInt oHi
= (UInt
)MC_(helperc_b_load1
)( a
+ 1 );
7484 return merge_origins(oLo
, oHi
);
7487 lineoff
= oc_line_offset(a
);
7488 byteoff
= a
& 3; /* 0 or 2 */
7490 if (OC_ENABLE_ASSERTIONS
) {
7491 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7493 line
= find_OCacheLine( a
);
7495 descr
= line
->descr
[lineoff
];
7496 if (OC_ENABLE_ASSERTIONS
) {
7497 tl_assert(descr
< 0x10);
7500 if (LIKELY(0 == (descr
& (3 << byteoff
)))) {
7503 return line
->w32
[lineoff
];
7507 UWord
VG_REGPARM(1) MC_(helperc_b_load4
)( Addr a
) {
7512 if (UNLIKELY(a
& 3)) {
7513 /* Handle misaligned case, slowly. */
7514 UInt oLo
= (UInt
)MC_(helperc_b_load2
)( a
+ 0 );
7515 UInt oHi
= (UInt
)MC_(helperc_b_load2
)( a
+ 2 );
7516 return merge_origins(oLo
, oHi
);
7519 lineoff
= oc_line_offset(a
);
7520 if (OC_ENABLE_ASSERTIONS
) {
7521 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7524 line
= find_OCacheLine( a
);
7526 descr
= line
->descr
[lineoff
];
7527 if (OC_ENABLE_ASSERTIONS
) {
7528 tl_assert(descr
< 0x10);
7531 if (LIKELY(0 == descr
)) {
7534 return line
->w32
[lineoff
];
7538 UWord
VG_REGPARM(1) MC_(helperc_b_load8
)( Addr a
) {
7540 UChar descrLo
, descrHi
, descr
;
7543 if (UNLIKELY(a
& 7)) {
7544 /* Handle misaligned case, slowly. */
7545 UInt oLo
= (UInt
)MC_(helperc_b_load4
)( a
+ 0 );
7546 UInt oHi
= (UInt
)MC_(helperc_b_load4
)( a
+ 4 );
7547 return merge_origins(oLo
, oHi
);
7550 lineoff
= oc_line_offset(a
);
7551 if (OC_ENABLE_ASSERTIONS
) {
7552 tl_assert(lineoff
== (lineoff
& 6)); /*0,2,4,6*//*since 8-aligned*/
7555 line
= find_OCacheLine( a
);
7557 descrLo
= line
->descr
[lineoff
+ 0];
7558 descrHi
= line
->descr
[lineoff
+ 1];
7559 descr
= descrLo
| descrHi
;
7560 if (OC_ENABLE_ASSERTIONS
) {
7561 tl_assert(descr
< 0x10);
7564 if (LIKELY(0 == descr
)) {
7565 return 0; /* both 32-bit chunks are defined */
7567 UInt oLo
= descrLo
== 0 ? 0 : line
->w32
[lineoff
+ 0];
7568 UInt oHi
= descrHi
== 0 ? 0 : line
->w32
[lineoff
+ 1];
7569 return merge_origins(oLo
, oHi
);
7573 UWord
VG_REGPARM(1) MC_(helperc_b_load16
)( Addr a
) {
7574 UInt oLo
= (UInt
)MC_(helperc_b_load8
)( a
+ 0 );
7575 UInt oHi
= (UInt
)MC_(helperc_b_load8
)( a
+ 8 );
7576 UInt oBoth
= merge_origins(oLo
, oHi
);
7577 return (UWord
)oBoth
;
7580 UWord
VG_REGPARM(1) MC_(helperc_b_load32
)( Addr a
) {
7581 UInt oQ0
= (UInt
)MC_(helperc_b_load8
)( a
+ 0 );
7582 UInt oQ1
= (UInt
)MC_(helperc_b_load8
)( a
+ 8 );
7583 UInt oQ2
= (UInt
)MC_(helperc_b_load8
)( a
+ 16 );
7584 UInt oQ3
= (UInt
)MC_(helperc_b_load8
)( a
+ 24 );
7585 UInt oAll
= merge_origins(merge_origins(oQ0
, oQ1
),
7586 merge_origins(oQ2
, oQ3
));
7591 /*--------------------------------------------*/
7592 /*--- Origin tracking: store handlers ---*/
7593 /*--------------------------------------------*/
7595 void VG_REGPARM(2) MC_(helperc_b_store1
)( Addr a
, UWord d32
) {
7597 UWord lineoff
= oc_line_offset(a
);
7598 UWord byteoff
= a
& 3; /* 0, 1, 2 or 3 */
7600 if (OC_ENABLE_ASSERTIONS
) {
7601 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7604 line
= find_OCacheLine( a
);
7607 line
->descr
[lineoff
] &= ~(1 << byteoff
);
7609 line
->descr
[lineoff
] |= (1 << byteoff
);
7610 line
->w32
[lineoff
] = d32
;
7614 void VG_REGPARM(2) MC_(helperc_b_store2
)( Addr a
, UWord d32
) {
7616 UWord lineoff
, byteoff
;
7618 if (UNLIKELY(a
& 1)) {
7619 /* Handle misaligned case, slowly. */
7620 MC_(helperc_b_store1
)( a
+ 0, d32
);
7621 MC_(helperc_b_store1
)( a
+ 1, d32
);
7625 lineoff
= oc_line_offset(a
);
7626 byteoff
= a
& 3; /* 0 or 2 */
7628 if (OC_ENABLE_ASSERTIONS
) {
7629 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7632 line
= find_OCacheLine( a
);
7635 line
->descr
[lineoff
] &= ~(3 << byteoff
);
7637 line
->descr
[lineoff
] |= (3 << byteoff
);
7638 line
->w32
[lineoff
] = d32
;
7642 void VG_REGPARM(2) MC_(helperc_b_store4
)( Addr a
, UWord d32
) {
7646 if (UNLIKELY(a
& 3)) {
7647 /* Handle misaligned case, slowly. */
7648 MC_(helperc_b_store2
)( a
+ 0, d32
);
7649 MC_(helperc_b_store2
)( a
+ 2, d32
);
7653 lineoff
= oc_line_offset(a
);
7654 if (OC_ENABLE_ASSERTIONS
) {
7655 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7658 line
= find_OCacheLine( a
);
7661 line
->descr
[lineoff
] = 0;
7663 line
->descr
[lineoff
] = 0xF;
7664 line
->w32
[lineoff
] = d32
;
7668 void VG_REGPARM(2) MC_(helperc_b_store8
)( Addr a
, UWord d32
) {
7672 if (UNLIKELY(a
& 7)) {
7673 /* Handle misaligned case, slowly. */
7674 MC_(helperc_b_store4
)( a
+ 0, d32
);
7675 MC_(helperc_b_store4
)( a
+ 4, d32
);
7679 lineoff
= oc_line_offset(a
);
7680 if (OC_ENABLE_ASSERTIONS
) {
7681 tl_assert(lineoff
== (lineoff
& 6)); /*0,2,4,6*//*since 8-aligned*/
7684 line
= find_OCacheLine( a
);
7687 line
->descr
[lineoff
+ 0] = 0;
7688 line
->descr
[lineoff
+ 1] = 0;
7690 line
->descr
[lineoff
+ 0] = 0xF;
7691 line
->descr
[lineoff
+ 1] = 0xF;
7692 line
->w32
[lineoff
+ 0] = d32
;
7693 line
->w32
[lineoff
+ 1] = d32
;
7697 void VG_REGPARM(2) MC_(helperc_b_store16
)( Addr a
, UWord d32
) {
7698 MC_(helperc_b_store8
)( a
+ 0, d32
);
7699 MC_(helperc_b_store8
)( a
+ 8, d32
);
7702 void VG_REGPARM(2) MC_(helperc_b_store32
)( Addr a
, UWord d32
) {
7703 MC_(helperc_b_store8
)( a
+ 0, d32
);
7704 MC_(helperc_b_store8
)( a
+ 8, d32
);
7705 MC_(helperc_b_store8
)( a
+ 16, d32
);
7706 MC_(helperc_b_store8
)( a
+ 24, d32
);
7710 /*--------------------------------------------*/
7711 /*--- Origin tracking: sarp handlers ---*/
7712 /*--------------------------------------------*/
7714 __attribute__((noinline
))
7715 static void ocache_sarp_Set_Origins ( Addr a
, UWord len
, UInt otag
) {
7716 if ((a
& 1) && len
>= 1) {
7717 MC_(helperc_b_store1
)( a
, otag
);
7721 if ((a
& 2) && len
>= 2) {
7722 MC_(helperc_b_store2
)( a
, otag
);
7727 tl_assert(0 == (a
& 3));
7729 MC_(helperc_b_store4
)( a
, otag
);
7734 MC_(helperc_b_store2
)( a
, otag
);
7739 MC_(helperc_b_store1
)( a
, otag
);
7743 tl_assert(len
== 0);
7746 __attribute__((noinline
))
7747 static void ocache_sarp_Clear_Origins ( Addr a
, UWord len
) {
7748 if ((a
& 1) && len
>= 1) {
7749 MC_(helperc_b_store1
)( a
, 0 );
7753 if ((a
& 2) && len
>= 2) {
7754 MC_(helperc_b_store2
)( a
, 0 );
7759 tl_assert(0 == (a
& 3));
7761 MC_(helperc_b_store4
)( a
, 0 );
7766 MC_(helperc_b_store2
)( a
, 0 );
7771 MC_(helperc_b_store1
)( a
, 0 );
7775 tl_assert(len
== 0);
7779 /*------------------------------------------------------------*/
7780 /*--- Setup and finalisation ---*/
7781 /*------------------------------------------------------------*/
7783 static void mc_post_clo_init ( void )
7785 /* If we've been asked to emit XML, mash around various other
7786 options so as to constrain the output somewhat. */
7788 /* Extract as much info as possible from the leak checker. */
7789 MC_(clo_leak_check
) = LC_Full
;
7792 if (MC_(clo_freelist_big_blocks
) >= MC_(clo_freelist_vol
)
7793 && VG_(clo_verbosity
) == 1 && !VG_(clo_xml
)) {
7794 VG_(message
)(Vg_UserMsg
,
7795 "Warning: --freelist-big-blocks value %lld has no effect\n"
7796 "as it is >= to --freelist-vol value %lld\n",
7797 MC_(clo_freelist_big_blocks
),
7798 MC_(clo_freelist_vol
));
7801 if (MC_(clo_workaround_gcc296_bugs
)
7802 && VG_(clo_verbosity
) == 1 && !VG_(clo_xml
)) {
7804 "Warning: --workaround-gcc296-bugs=yes is deprecated.\n"
7805 "Warning: Instead use: --ignore-range-below-sp=1024-1\n"
7810 tl_assert( MC_(clo_mc_level
) >= 1 && MC_(clo_mc_level
) <= 3 );
7812 if (MC_(clo_mc_level
) == 3) {
7813 /* We're doing origin tracking. */
7814 # ifdef PERF_FAST_STACK
7815 VG_(track_new_mem_stack_4_w_ECU
) ( mc_new_mem_stack_4_w_ECU
);
7816 VG_(track_new_mem_stack_8_w_ECU
) ( mc_new_mem_stack_8_w_ECU
);
7817 VG_(track_new_mem_stack_12_w_ECU
) ( mc_new_mem_stack_12_w_ECU
);
7818 VG_(track_new_mem_stack_16_w_ECU
) ( mc_new_mem_stack_16_w_ECU
);
7819 VG_(track_new_mem_stack_32_w_ECU
) ( mc_new_mem_stack_32_w_ECU
);
7820 VG_(track_new_mem_stack_112_w_ECU
) ( mc_new_mem_stack_112_w_ECU
);
7821 VG_(track_new_mem_stack_128_w_ECU
) ( mc_new_mem_stack_128_w_ECU
);
7822 VG_(track_new_mem_stack_144_w_ECU
) ( mc_new_mem_stack_144_w_ECU
);
7823 VG_(track_new_mem_stack_160_w_ECU
) ( mc_new_mem_stack_160_w_ECU
);
7825 VG_(track_new_mem_stack_w_ECU
) ( mc_new_mem_stack_w_ECU
);
7826 VG_(track_new_mem_stack_signal
) ( mc_new_mem_w_tid_make_ECU
);
7828 /* Not doing origin tracking */
7829 # ifdef PERF_FAST_STACK
7830 VG_(track_new_mem_stack_4
) ( mc_new_mem_stack_4
);
7831 VG_(track_new_mem_stack_8
) ( mc_new_mem_stack_8
);
7832 VG_(track_new_mem_stack_12
) ( mc_new_mem_stack_12
);
7833 VG_(track_new_mem_stack_16
) ( mc_new_mem_stack_16
);
7834 VG_(track_new_mem_stack_32
) ( mc_new_mem_stack_32
);
7835 VG_(track_new_mem_stack_112
) ( mc_new_mem_stack_112
);
7836 VG_(track_new_mem_stack_128
) ( mc_new_mem_stack_128
);
7837 VG_(track_new_mem_stack_144
) ( mc_new_mem_stack_144
);
7838 VG_(track_new_mem_stack_160
) ( mc_new_mem_stack_160
);
7840 VG_(track_new_mem_stack
) ( mc_new_mem_stack
);
7841 VG_(track_new_mem_stack_signal
) ( mc_new_mem_w_tid_no_ECU
);
7844 // We assume that brk()/sbrk() does not initialise new memory. Is this
7845 // accurate? John Reiser says:
7847 // 0) sbrk() can *decrease* process address space. No zero fill is done
7848 // for a decrease, not even the fragment on the high end of the last page
7849 // that is beyond the new highest address. For maximum safety and
7850 // portability, then the bytes in the last page that reside above [the
7851 // new] sbrk(0) should be considered to be uninitialized, but in practice
7852 // it is exceedingly likely that they will retain their previous
7855 // 1) If an increase is large enough to require new whole pages, then
7856 // those new whole pages (like all new pages) are zero-filled by the
7857 // operating system. So if sbrk(0) already is page aligned, then
7858 // sbrk(PAGE_SIZE) *does* zero-fill the new memory.
7860 // 2) Any increase that lies within an existing allocated page is not
7861 // changed. So if (x = sbrk(0)) is not page aligned, then
7862 // sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
7863 // existing contents, and an additional PAGE_SIZE bytes which are zeroed.
7864 // ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
7865 // of them come along for the ride because the operating system deals
7866 // only in whole pages. Again, for maximum safety and portability, then
7867 // anything that lives above [the new] sbrk(0) should be considered
7868 // uninitialized, but in practice will retain previous contents [zero in
7873 // A key property of sbrk/brk is that new whole pages that are supplied
7874 // by the operating system *do* get initialized to zero.
7876 // As for the portability of all this:
7878 // sbrk and brk are not POSIX. However, any system that is a derivative
7879 // of *nix has sbrk and brk because there are too many softwares (such as
7880 // the Bourne shell) which rely on the traditional memory map (.text,
7881 // .data+.bss, stack) and the existence of sbrk/brk.
7883 // So we should arguably observe all this. However:
7884 // - The current inaccuracy has caused maybe one complaint in seven years(?)
7885 // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
7886 // doubt most programmers know the above information.
7887 // So I'm not terribly unhappy with marking it as undefined. --njn.
7889 // [More: I think most of what John said only applies to sbrk(). It seems
7890 // that brk() always deals in whole pages. And since this event deals
7891 // directly with brk(), not with sbrk(), perhaps it would be reasonable to
7892 // just mark all memory it allocates as defined.]
7894 # if !defined(VGO_solaris)
7895 if (MC_(clo_mc_level
) == 3)
7896 VG_(track_new_mem_brk
) ( mc_new_mem_w_tid_make_ECU
);
7898 VG_(track_new_mem_brk
) ( mc_new_mem_w_tid_no_ECU
);
7900 // On Solaris, brk memory has to be marked as defined, otherwise we get
7901 // many false positives.
7902 VG_(track_new_mem_brk
) ( make_mem_defined_w_tid
);
7905 /* This origin tracking cache is huge (~100M), so only initialise
7907 if (MC_(clo_mc_level
) >= 3) {
7909 tl_assert(ocacheL1
!= NULL
);
7910 tl_assert(ocacheL2
!= NULL
);
7912 tl_assert(ocacheL1
== NULL
);
7913 tl_assert(ocacheL2
== NULL
);
7916 MC_(chunk_poolalloc
) = VG_(newPA
)
7917 (sizeof(MC_Chunk
) + MC_(n_where_pointers
)() * sizeof(ExeContext
*),
7920 "mc.cMC.1 (MC_Chunk pools)",
7923 /* Do not check definedness of guest state if --undef-value-errors=no */
7924 if (MC_(clo_mc_level
) >= 2)
7925 VG_(track_pre_reg_read
) ( mc_pre_reg_read
);
7927 if (VG_(clo_xtree_memory
) == Vg_XTMemory_Full
) {
7928 if (MC_(clo_keep_stacktraces
) == KS_none
7929 || MC_(clo_keep_stacktraces
) == KS_free
)
7930 VG_(fmsg_bad_option
)("--keep-stacktraces",
7931 "To use --xtree-memory=full, you must"
7932 " keep at least the alloc stacktrace\n");
7933 // Activate full xtree memory profiling.
7934 VG_(XTMemory_Full_init
)(VG_(XT_filter_1top_and_maybe_below_main
));
7939 static void print_SM_info(const HChar
* type
, Int n_SMs
)
7941 VG_(message
)(Vg_DebugMsg
,
7942 " memcheck: SMs: %s = %d (%luk, %luM)\n",
7945 n_SMs
* sizeof(SecMap
) / 1024UL,
7946 n_SMs
* sizeof(SecMap
) / (1024 * 1024UL) );
7949 static void mc_print_stats (void)
7951 SizeT max_secVBit_szB
, max_SMs_szB
, max_shmem_szB
;
7953 VG_(message
)(Vg_DebugMsg
, " memcheck: freelist: vol %lld length %lld\n",
7954 VG_(free_queue_volume
), VG_(free_queue_length
));
7955 VG_(message
)(Vg_DebugMsg
,
7956 " memcheck: sanity checks: %d cheap, %d expensive\n",
7957 n_sanity_cheap
, n_sanity_expensive
);
7958 VG_(message
)(Vg_DebugMsg
,
7959 " memcheck: auxmaps: %llu auxmap entries (%lluk, %lluM) in use\n",
7961 n_auxmap_L2_nodes
* 64,
7962 n_auxmap_L2_nodes
/ 16 );
7963 VG_(message
)(Vg_DebugMsg
,
7964 " memcheck: auxmaps_L1: %llu searches, %llu cmps, ratio %llu:10\n",
7965 n_auxmap_L1_searches
, n_auxmap_L1_cmps
,
7966 (10ULL * n_auxmap_L1_cmps
)
7967 / (n_auxmap_L1_searches
? n_auxmap_L1_searches
: 1)
7969 VG_(message
)(Vg_DebugMsg
,
7970 " memcheck: auxmaps_L2: %llu searches, %llu nodes\n",
7971 n_auxmap_L2_searches
, n_auxmap_L2_nodes
7974 print_SM_info("n_issued ", n_issued_SMs
);
7975 print_SM_info("n_deissued ", n_deissued_SMs
);
7976 print_SM_info("max_noaccess ", max_noaccess_SMs
);
7977 print_SM_info("max_undefined", max_undefined_SMs
);
7978 print_SM_info("max_defined ", max_defined_SMs
);
7979 print_SM_info("max_non_DSM ", max_non_DSM_SMs
);
7981 // Three DSMs, plus the non-DSM ones
7982 max_SMs_szB
= (3 + max_non_DSM_SMs
) * sizeof(SecMap
);
7983 // The 3*sizeof(Word) bytes is the AVL node metadata size.
7984 // The VG_ROUNDUP is because the OSet pool allocator will/must align
7985 // the elements on pointer size.
7986 // Note that the pool allocator has some additional small overhead
7987 // which is not counted in the below.
7988 // Hardwiring this logic sucks, but I don't see how else to do it.
7989 max_secVBit_szB
= max_secVBit_nodes
*
7990 (3*sizeof(Word
) + VG_ROUNDUP(sizeof(SecVBitNode
), sizeof(void*)));
7991 max_shmem_szB
= sizeof(primary_map
) + max_SMs_szB
+ max_secVBit_szB
;
7993 VG_(message
)(Vg_DebugMsg
,
7994 " memcheck: max sec V bit nodes: %d (%luk, %luM)\n",
7995 max_secVBit_nodes
, max_secVBit_szB
/ 1024,
7996 max_secVBit_szB
/ (1024 * 1024));
7997 VG_(message
)(Vg_DebugMsg
,
7998 " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
7999 sec_vbits_new_nodes
+ sec_vbits_updates
,
8000 sec_vbits_new_nodes
, sec_vbits_updates
);
8001 VG_(message
)(Vg_DebugMsg
,
8002 " memcheck: max shadow mem size: %luk, %luM\n",
8003 max_shmem_szB
/ 1024, max_shmem_szB
/ (1024 * 1024));
8005 if (MC_(clo_mc_level
) >= 3) {
8006 VG_(message
)(Vg_DebugMsg
,
8007 " ocacheL1: %'12lu refs %'12lu misses (%'lu lossage)\n",
8008 stats_ocacheL1_find
,
8009 stats_ocacheL1_misses
,
8010 stats_ocacheL1_lossage
);
8011 VG_(message
)(Vg_DebugMsg
,
8012 " ocacheL1: %'12lu at 0 %'12lu at 1\n",
8013 stats_ocacheL1_find
- stats_ocacheL1_misses
8014 - stats_ocacheL1_found_at_1
8015 - stats_ocacheL1_found_at_N
,
8016 stats_ocacheL1_found_at_1
);
8017 VG_(message
)(Vg_DebugMsg
,
8018 " ocacheL1: %'12lu at 2+ %'12lu move-fwds\n",
8019 stats_ocacheL1_found_at_N
,
8020 stats_ocacheL1_movefwds
);
8021 VG_(message
)(Vg_DebugMsg
,
8022 " ocacheL1: %'12lu sizeB %'12d useful\n",
8023 (SizeT
)sizeof(OCache
),
8024 4 * OC_W32S_PER_LINE
* OC_LINES_PER_SET
* OC_N_SETS
);
8025 VG_(message
)(Vg_DebugMsg
,
8026 " ocacheL2: %'12lu refs %'12lu misses\n",
8027 stats__ocacheL2_refs
,
8028 stats__ocacheL2_misses
);
8029 VG_(message
)(Vg_DebugMsg
,
8030 " ocacheL2: %'9lu max nodes %'9lu curr nodes\n",
8031 stats__ocacheL2_n_nodes_max
,
8032 stats__ocacheL2_n_nodes
);
8033 VG_(message
)(Vg_DebugMsg
,
8034 " niacache: %'12lu refs %'12lu misses\n",
8035 stats__nia_cache_queries
, stats__nia_cache_misses
);
8037 tl_assert(ocacheL1
== NULL
);
8038 tl_assert(ocacheL2
== NULL
);
8043 static void mc_fini ( Int exitcode
)
8045 MC_(xtmemory_report
) (VG_(clo_xtree_memory_file
), True
);
8046 MC_(print_malloc_stats
)();
8048 if (MC_(clo_leak_check
) != LC_Off
) {
8049 LeakCheckParams lcp
;
8050 HChar
* xt_filename
= NULL
;
8051 lcp
.mode
= MC_(clo_leak_check
);
8052 lcp
.show_leak_kinds
= MC_(clo_show_leak_kinds
);
8053 lcp
.heuristics
= MC_(clo_leak_check_heuristics
);
8054 lcp
.errors_for_leak_kinds
= MC_(clo_error_for_leak_kinds
);
8055 lcp
.deltamode
= LCD_Any
;
8056 lcp
.max_loss_records_output
= 999999999;
8057 lcp
.requested_by_monitor_command
= False
;
8058 if (MC_(clo_xtree_leak
)) {
8059 xt_filename
= VG_(expand_file_name
)("--xtree-leak-file",
8060 MC_(clo_xtree_leak_file
));
8061 lcp
.xt_filename
= xt_filename
;
8065 lcp
.xt_filename
= NULL
;
8066 MC_(detect_memory_leaks
)(1/*bogus ThreadId*/, &lcp
);
8067 if (MC_(clo_xtree_leak
))
8068 VG_(free
)(xt_filename
);
8070 if (VG_(clo_verbosity
) == 1 && !VG_(clo_xml
)) {
8072 "For a detailed leak analysis, rerun with: --leak-check=full\n"
8078 if (VG_(clo_verbosity
) == 1 && !VG_(clo_xml
)) {
8079 VG_(message
)(Vg_UserMsg
,
8080 "For counts of detected and suppressed errors, rerun with: -v\n");
8083 if (MC_(any_value_errors
) && !VG_(clo_xml
) && VG_(clo_verbosity
) >= 1
8084 && MC_(clo_mc_level
) == 2) {
8085 VG_(message
)(Vg_UserMsg
,
8086 "Use --track-origins=yes to see where "
8087 "uninitialised values come from\n");
8090 /* Print a warning if any client-request generated ignore-ranges
8091 still exist. It would be reasonable to expect that a properly
8092 written program would remove any such ranges before exiting, and
8093 since they are a bit on the dangerous side, let's comment. By
8094 contrast ranges which are specified on the command line normally
8095 pertain to hardware mapped into the address space, and so we
8096 can't expect the client to have got rid of them. */
8097 if (gIgnoredAddressRanges
) {
8099 for (i
= 0; i
< VG_(sizeRangeMap
)(gIgnoredAddressRanges
); i
++) {
8100 UWord val
= IAR_INVALID
;
8101 UWord key_min
= ~(UWord
)0;
8102 UWord key_max
= (UWord
)0;
8103 VG_(indexRangeMap
)( &key_min
, &key_max
, &val
,
8104 gIgnoredAddressRanges
, i
);
8105 if (val
!= IAR_ClientReq
)
8107 /* Print the offending range. Also, if it is the first,
8108 print a banner before it. */
8112 "WARNING: exiting program has the following client-requested\n"
8113 "WARNING: address error disablement range(s) still in force,\n"
8115 "possibly as a result of some mistake in the use of the\n"
8117 "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
8120 VG_(umsg
)(" [%u] 0x%016lx-0x%016lx %s\n",
8121 i
, key_min
, key_max
, showIARKind(val
));
8131 VG_(message
)(Vg_DebugMsg
,
8132 "------ Valgrind's client block stats follow ---------------\n" );
8133 show_client_block_stats();
8137 /* mark the given addr/len unaddressable for watchpoint implementation
8138 The PointKind will be handled at access time */
8139 static Bool
mc_mark_unaddressable_for_watchpoint (PointKind kind
, Bool insert
,
8140 Addr addr
, SizeT len
)
8142 /* GDBTD this is somewhat fishy. We might rather have to save the previous
8143 accessibility and definedness in gdbserver so as to allow restoring it
8144 properly. Currently, we assume that the user only watches things
8145 which are properly addressable and defined */
8147 MC_(make_mem_noaccess
) (addr
, len
);
8149 MC_(make_mem_defined
) (addr
, len
);
8153 static void mc_pre_clo_init(void)
8155 VG_(details_name
) ("Memcheck");
8156 VG_(details_version
) (NULL
);
8157 VG_(details_description
) ("a memory error detector");
8158 VG_(details_copyright_author
)(
8159 "Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.");
8160 VG_(details_bug_reports_to
) (VG_BUGS_TO
);
8161 VG_(details_avg_translation_sizeB
) ( 640 );
8163 VG_(basic_tool_funcs
) (mc_post_clo_init
,
8167 VG_(needs_final_IR_tidy_pass
) ( MC_(final_tidy
) );
8170 VG_(needs_core_errors
) ();
8171 VG_(needs_tool_errors
) (MC_(eq_Error
),
8172 MC_(before_pp_Error
),
8174 True
,/*show TIDs for errors*/
8175 MC_(update_Error_extra
),
8176 MC_(is_recognised_suppression
),
8177 MC_(read_extra_suppression_info
),
8178 MC_(error_matches_suppression
),
8179 MC_(get_error_name
),
8180 MC_(get_extra_suppression_info
),
8181 MC_(print_extra_suppression_use
),
8182 MC_(update_extra_suppression_use
));
8183 VG_(needs_libc_freeres
) ();
8184 VG_(needs_cxx_freeres
) ();
8185 VG_(needs_command_line_options
)(mc_process_cmd_line_options
,
8187 mc_print_debug_usage
);
8188 VG_(needs_client_requests
) (mc_handle_client_request
);
8189 VG_(needs_sanity_checks
) (mc_cheap_sanity_check
,
8190 mc_expensive_sanity_check
);
8191 VG_(needs_print_stats
) (mc_print_stats
);
8192 VG_(needs_info_location
) (MC_(pp_describe_addr
));
8193 VG_(needs_malloc_replacement
) (MC_(malloc
),
8195 MC_(__builtin_vec_new
),
8199 MC_(__builtin_delete
),
8200 MC_(__builtin_vec_delete
),
8202 MC_(malloc_usable_size
),
8203 MC_MALLOC_DEFAULT_REDZONE_SZB
);
8204 MC_(Malloc_Redzone_SzB
) = VG_(malloc_effective_client_redzone_size
)();
8206 VG_(needs_xml_output
) ();
8208 VG_(track_new_mem_startup
) ( mc_new_mem_startup
);
8210 // Handling of mmap and mprotect isn't simple (well, it is simple,
8211 // but the justification isn't.) See comments above, just prior to
8213 VG_(track_new_mem_mmap
) ( mc_new_mem_mmap
);
8214 VG_(track_change_mem_mprotect
) ( mc_new_mem_mprotect
);
8216 VG_(track_copy_mem_remap
) ( MC_(copy_address_range_state
) );
8218 VG_(track_die_mem_stack_signal
)( MC_(make_mem_noaccess
) );
8219 VG_(track_die_mem_brk
) ( MC_(make_mem_noaccess
) );
8220 VG_(track_die_mem_munmap
) ( MC_(make_mem_noaccess
) );
8222 /* Defer the specification of the new_mem_stack functions to the
8223 post_clo_init function, since we need to first parse the command
8224 line before deciding which set to use. */
8226 # ifdef PERF_FAST_STACK
8227 VG_(track_die_mem_stack_4
) ( mc_die_mem_stack_4
);
8228 VG_(track_die_mem_stack_8
) ( mc_die_mem_stack_8
);
8229 VG_(track_die_mem_stack_12
) ( mc_die_mem_stack_12
);
8230 VG_(track_die_mem_stack_16
) ( mc_die_mem_stack_16
);
8231 VG_(track_die_mem_stack_32
) ( mc_die_mem_stack_32
);
8232 VG_(track_die_mem_stack_112
) ( mc_die_mem_stack_112
);
8233 VG_(track_die_mem_stack_128
) ( mc_die_mem_stack_128
);
8234 VG_(track_die_mem_stack_144
) ( mc_die_mem_stack_144
);
8235 VG_(track_die_mem_stack_160
) ( mc_die_mem_stack_160
);
8237 VG_(track_die_mem_stack
) ( mc_die_mem_stack
);
8239 VG_(track_ban_mem_stack
) ( MC_(make_mem_noaccess
) );
8241 VG_(track_pre_mem_read
) ( check_mem_is_defined
);
8242 VG_(track_pre_mem_read_asciiz
) ( check_mem_is_defined_asciiz
);
8243 VG_(track_pre_mem_write
) ( check_mem_is_addressable
);
8244 VG_(track_post_mem_write
) ( mc_post_mem_write
);
8246 VG_(track_post_reg_write
) ( mc_post_reg_write
);
8247 VG_(track_post_reg_write_clientcall_return
)( mc_post_reg_write_clientcall
);
8249 if (MC_(clo_mc_level
) >= 2) {
8250 VG_(track_copy_mem_to_reg
) ( mc_copy_mem_to_reg
);
8251 VG_(track_copy_reg_to_mem
) ( mc_copy_reg_to_mem
);
8254 VG_(needs_watchpoint
) ( mc_mark_unaddressable_for_watchpoint
);
8256 init_shadow_memory();
8257 // MC_(chunk_poolalloc) must be allocated in post_clo_init
8258 tl_assert(MC_(chunk_poolalloc
) == NULL
);
8259 MC_(malloc_list
) = VG_(HT_construct
)( "MC_(malloc_list)" );
8260 MC_(mempool_list
) = VG_(HT_construct
)( "MC_(mempool_list)" );
8263 tl_assert( mc_expensive_sanity_check() );
8265 // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
8266 tl_assert(sizeof(UWord
) == sizeof(Addr
));
8267 // Call me paranoid. I don't care.
8268 tl_assert(sizeof(void*) == sizeof(Addr
));
8270 // BYTES_PER_SEC_VBIT_NODE must be a power of two.
8271 tl_assert(-1 != VG_(log2
)(BYTES_PER_SEC_VBIT_NODE
));
8273 /* This is small. Always initialise it. */
8274 init_nia_to_ecu_cache();
8276 /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
8277 if we need to, since the command line args haven't been
8278 processed yet. Hence defer it to mc_post_clo_init. */
8279 tl_assert(ocacheL1
== NULL
);
8280 tl_assert(ocacheL2
== NULL
);
8282 /* Check some important stuff. See extensive comments above
8283 re UNALIGNED_OR_HIGH for background. */
8284 # if VG_WORDSIZE == 4
8285 tl_assert(sizeof(void*) == 4);
8286 tl_assert(sizeof(Addr
) == 4);
8287 tl_assert(sizeof(UWord
) == 4);
8288 tl_assert(sizeof(Word
) == 4);
8289 tl_assert(MAX_PRIMARY_ADDRESS
== 0xFFFFFFFFUL
);
8290 tl_assert(MASK(1) == 0UL);
8291 tl_assert(MASK(2) == 1UL);
8292 tl_assert(MASK(4) == 3UL);
8293 tl_assert(MASK(8) == 7UL);
8295 tl_assert(VG_WORDSIZE
== 8);
8296 tl_assert(sizeof(void*) == 8);
8297 tl_assert(sizeof(Addr
) == 8);
8298 tl_assert(sizeof(UWord
) == 8);
8299 tl_assert(sizeof(Word
) == 8);
8300 tl_assert(MAX_PRIMARY_ADDRESS
== 0xFFFFFFFFFULL
);
8301 tl_assert(MASK(1) == 0xFFFFFFF000000000ULL
);
8302 tl_assert(MASK(2) == 0xFFFFFFF000000001ULL
);
8303 tl_assert(MASK(4) == 0xFFFFFFF000000003ULL
);
8304 tl_assert(MASK(8) == 0xFFFFFFF000000007ULL
);
8307 /* Check some assertions to do with the instrumentation machinery. */
8308 MC_(do_instrumentation_startup_checks
)();
8311 STATIC_ASSERT(sizeof(UWord
) == sizeof(SizeT
));
8313 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init
)
8315 /*--------------------------------------------------------------------*/
8316 /*--- end mc_main.c ---*/
8317 /*--------------------------------------------------------------------*/