1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- MemCheck: Maintain bitmaps of memory, tracking the ---*/
5 /*--- accessibility (A) and validity (V) status of each byte. ---*/
7 /*--------------------------------------------------------------------*/
10 This file is part of MemCheck, a heavyweight Valgrind tool for
11 detecting memory errors.
13 Copyright (C) 2000-2017 Julian Seward
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, see <http://www.gnu.org/licenses/>.
29 The GNU General Public License is contained in the file COPYING.
32 #include "pub_tool_basics.h"
33 #include "pub_tool_aspacemgr.h"
34 #include "pub_tool_gdbserver.h"
35 #include "pub_tool_poolalloc.h"
36 #include "pub_tool_hashtable.h" // For mc_include.h
37 #include "pub_tool_libcbase.h"
38 #include "pub_tool_libcassert.h"
39 #include "pub_tool_libcprint.h"
40 #include "pub_tool_machine.h"
41 #include "pub_tool_mallocfree.h"
42 #include "pub_tool_options.h"
43 #include "pub_tool_oset.h"
44 #include "pub_tool_rangemap.h"
45 #include "pub_tool_replacemalloc.h"
46 #include "pub_tool_tooliface.h"
47 #include "pub_tool_threadstate.h"
48 #include "pub_tool_xarray.h"
49 #include "pub_tool_xtree.h"
50 #include "pub_tool_xtmemory.h"
52 #include "mc_include.h"
53 #include "memcheck.h" /* for client requests */
55 /* Set to 1 to do a little more sanity checking */
56 #define VG_DEBUG_MEMORY 0
58 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
60 static void ocache_sarp_Set_Origins ( Addr
, UWord
, UInt
); /* fwds */
61 static void ocache_sarp_Clear_Origins ( Addr
, UWord
); /* fwds */
64 /*------------------------------------------------------------*/
65 /*--- Fast-case knobs ---*/
66 /*------------------------------------------------------------*/
68 // Comment these out to disable the fast cases (don't just set them to zero).
70 /* PERF_FAST_LOADV is in mc_include.h */
71 #define PERF_FAST_STOREV 1
73 #define PERF_FAST_SARP 1
75 #define PERF_FAST_STACK 1
76 #define PERF_FAST_STACK2 1
78 /* Change this to 1 to enable assertions on origin tracking cache fast
80 #define OC_ENABLE_ASSERTIONS 0
83 /*------------------------------------------------------------*/
84 /*--- Comments on the origin tracking implementation ---*/
85 /*------------------------------------------------------------*/
87 /* See detailed comment entitled
88 AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
89 which is contained further on in this file. */
92 /*------------------------------------------------------------*/
93 /*--- V bits and A bits ---*/
94 /*------------------------------------------------------------*/
96 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
97 thinks the corresponding value bit is defined. And every memory byte
98 has an A bit, which tracks whether Memcheck thinks the program can access
99 it safely (ie. it's mapped, and has at least one of the RWX permission bits
100 set). So every N-bit register is shadowed with N V bits, and every memory
101 byte is shadowed with 8 V bits and one A bit.
103 In the implementation, we use two forms of compression (compressed V bits
104 and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
107 Memcheck also tracks extra information about each heap block that is
108 allocated, for detecting memory leaks and other purposes.
111 /*------------------------------------------------------------*/
112 /*--- Basic A/V bitmap representation. ---*/
113 /*------------------------------------------------------------*/
115 /* All reads and writes are checked against a memory map (a.k.a. shadow
116 memory), which records the state of all memory in the process.
118 On 32-bit machines the memory map is organised as follows.
119 The top 16 bits of an address are used to index into a top-level
120 map table, containing 65536 entries. Each entry is a pointer to a
121 second-level map, which records the accesibililty and validity
122 permissions for the 65536 bytes indexed by the lower 16 bits of the
123 address. Each byte is represented by two bits (details are below). So
124 each second-level map contains 16384 bytes. This two-level arrangement
125 conveniently divides the 4G address space into 64k lumps, each size 64k
128 All entries in the primary (top-level) map must point to a valid
129 secondary (second-level) map. Since many of the 64kB chunks will
130 have the same status for every bit -- ie. noaccess (for unused
131 address space) or entirely addressable and defined (for code segments) --
132 there are three distinguished secondary maps, which indicate 'noaccess',
133 'undefined' and 'defined'. For these uniform 64kB chunks, the primary
134 map entry points to the relevant distinguished map. In practice,
135 typically more than half of the addressable memory is represented with
136 the 'undefined' or 'defined' distinguished secondary map, so it gives a
137 good saving. It also lets us set the V+A bits of large address regions
138 quickly in set_address_range_perms().
140 On 64-bit machines it's more complicated. If we followed the same basic
141 scheme we'd have a four-level table which would require too many memory
142 accesses. So instead the top-level map table has 2^20 entries (indexed
143 using bits 16..35 of the address); this covers the bottom 64GB. Any
144 accesses above 64GB are handled with a slow, sparse auxiliary table.
145 Valgrind's address space manager tries very hard to keep things below
146 this 64GB barrier so that performance doesn't suffer too much.
148 Note that this file has a lot of different functions for reading and
149 writing shadow memory. Only a couple are strictly necessary (eg.
150 get_vabits2 and set_vabits2), most are just specialised for specific
151 common cases to improve performance.
153 Aside: the V+A bits are less precise than they could be -- we have no way
154 of marking memory as read-only. It would be great if we could add an
155 extra state VA_BITSn_READONLY. But then we'd have 5 different states,
156 which requires 2.3 bits to hold, and there's no way to do that elegantly
157 -- we'd have to double up to 4 bits of metadata per byte, which doesn't
161 /* --------------- Basic configuration --------------- */
163 /* Only change this. N_PRIMARY_MAP *must* be a power of 2. */
167 /* cover the entire address space */
168 # define N_PRIMARY_BITS 16
172 /* Just handle the first 128G fast and the rest via auxiliary
173 primaries. If you change this, Memcheck will assert at startup.
174 See the definition of UNALIGNED_OR_HIGH for extensive comments. */
175 # define N_PRIMARY_BITS 21
180 /* Do not change this. */
181 #define N_PRIMARY_MAP ( ((UWord)1) << N_PRIMARY_BITS)
183 /* Do not change this. */
184 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
187 /* --------------- Secondary maps --------------- */
189 // Each byte of memory conceptually has an A bit, which indicates its
190 // addressability, and 8 V bits, which indicates its definedness.
192 // But because very few bytes are partially defined, we can use a nice
193 // compression scheme to reduce the size of shadow memory. Each byte of
194 // memory has 2 bits which indicates its state (ie. V+A bits):
196 // 00: noaccess (unaddressable but treated as fully defined)
197 // 01: undefined (addressable and fully undefined)
198 // 10: defined (addressable and fully defined)
199 // 11: partdefined (addressable and partially defined)
201 // In the "partdefined" case, we use a secondary table to store the V bits.
202 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
205 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
206 // four bytes (32 bits) of memory are in each chunk. Hence the name
207 // "vabits8". This lets us get the V+A bits for four bytes at a time
208 // easily (without having to do any shifting and/or masking), and that is a
209 // very common operation. (Note that although each vabits8 chunk
210 // is 8 bits in size, it represents 32 bits of memory.)
212 // The representation is "inverse" little-endian... each 4 bytes of
213 // memory is represented by a 1 byte value, where:
215 // - the status of byte (a+0) is held in bits [1..0]
216 // - the status of byte (a+1) is held in bits [3..2]
217 // - the status of byte (a+2) is held in bits [5..4]
218 // - the status of byte (a+3) is held in bits [7..6]
220 // It's "inverse" because endianness normally describes a mapping from
221 // value bits to memory addresses; in this case the mapping is inverted.
222 // Ie. instead of particular value bits being held in certain addresses, in
223 // this case certain addresses are represented by particular value bits.
224 // See insert_vabits2_into_vabits8() for an example.
226 // But note that we don't compress the V bits stored in registers; they
227 // need to be explicit to made the shadow operations possible. Therefore
228 // when moving values between registers and memory we need to convert
229 // between the expanded in-register format and the compressed in-memory
230 // format. This isn't so difficult, it just requires careful attention in a
233 // These represent eight bits of memory.
234 #define VA_BITS2_NOACCESS 0x0 // 00b
235 #define VA_BITS2_UNDEFINED 0x1 // 01b
236 #define VA_BITS2_DEFINED 0x2 // 10b
237 #define VA_BITS2_PARTDEFINED 0x3 // 11b
239 // These represent 16 bits of memory.
240 #define VA_BITS4_NOACCESS 0x0 // 00_00b
241 #define VA_BITS4_UNDEFINED 0x5 // 01_01b
242 #define VA_BITS4_DEFINED 0xa // 10_10b
244 // These represent 32 bits of memory.
245 #define VA_BITS8_NOACCESS 0x00 // 00_00_00_00b
246 #define VA_BITS8_UNDEFINED 0x55 // 01_01_01_01b
247 #define VA_BITS8_DEFINED 0xaa // 10_10_10_10b
249 // These represent 64 bits of memory.
250 #define VA_BITS16_NOACCESS 0x0000 // 00_00_00_00b x 2
251 #define VA_BITS16_UNDEFINED 0x5555 // 01_01_01_01b x 2
252 #define VA_BITS16_DEFINED 0xaaaa // 10_10_10_10b x 2
254 // These represent 128 bits of memory.
255 #define VA_BITS32_UNDEFINED 0x55555555 // 01_01_01_01b x 4
258 #define SM_CHUNKS 16384 // Each SM covers 64k of memory.
259 #define SM_OFF(aaa) (((aaa) & 0xffff) >> 2)
260 #define SM_OFF_16(aaa) (((aaa) & 0xffff) >> 3)
262 // Paranoia: it's critical for performance that the requested inlining
263 // occurs. So try extra hard.
264 #define INLINE inline __attribute__((always_inline))
266 static INLINE Addr
start_of_this_sm ( Addr a
) {
267 return (a
& (~SM_MASK
));
269 static INLINE Bool
is_start_of_sm ( Addr a
) {
270 return (start_of_this_sm(a
) == a
);
273 STATIC_ASSERT(SM_CHUNKS
% 2 == 0);
277 UChar vabits8
[SM_CHUNKS
];
278 UShort vabits16
[SM_CHUNKS
/2];
282 // 3 distinguished secondary maps, one for no-access, one for
283 // accessible but undefined, and one for accessible and defined.
284 // Distinguished secondaries may never be modified.
285 #define SM_DIST_NOACCESS 0
286 #define SM_DIST_UNDEFINED 1
287 #define SM_DIST_DEFINED 2
289 static SecMap sm_distinguished
[3];
291 static INLINE Bool
is_distinguished_sm ( SecMap
* sm
) {
292 return sm
>= &sm_distinguished
[0] && sm
<= &sm_distinguished
[2];
295 // Forward declaration
296 static void update_SM_counts(SecMap
* oldSM
, SecMap
* newSM
);
298 /* dist_sm points to one of our three distinguished secondaries. Make
299 a copy of it so that we can write to it.
301 static SecMap
* copy_for_writing ( SecMap
* dist_sm
)
304 tl_assert(dist_sm
== &sm_distinguished
[0]
305 || dist_sm
== &sm_distinguished
[1]
306 || dist_sm
== &sm_distinguished
[2]);
308 new_sm
= VG_(am_shadow_alloc
)(sizeof(SecMap
));
310 VG_(out_of_memory_NORETURN
)( "memcheck:allocate new SecMap",
312 VG_(memcpy
)(new_sm
, dist_sm
, sizeof(SecMap
));
313 update_SM_counts(dist_sm
, new_sm
);
317 /* --------------- Stats --------------- */
319 static Int n_issued_SMs
= 0;
320 static Int n_deissued_SMs
= 0;
321 static Int n_noaccess_SMs
= N_PRIMARY_MAP
; // start with many noaccess DSMs
322 static Int n_undefined_SMs
= 0;
323 static Int n_defined_SMs
= 0;
324 static Int n_non_DSM_SMs
= 0;
325 static Int max_noaccess_SMs
= 0;
326 static Int max_undefined_SMs
= 0;
327 static Int max_defined_SMs
= 0;
328 static Int max_non_DSM_SMs
= 0;
330 /* # searches initiated in auxmap_L1, and # base cmps required */
331 static ULong n_auxmap_L1_searches
= 0;
332 static ULong n_auxmap_L1_cmps
= 0;
333 /* # of searches that missed in auxmap_L1 and therefore had to
334 be handed to auxmap_L2. And the number of nodes inserted. */
335 static ULong n_auxmap_L2_searches
= 0;
336 static ULong n_auxmap_L2_nodes
= 0;
338 static Int n_sanity_cheap
= 0;
339 static Int n_sanity_expensive
= 0;
341 static Int n_secVBit_nodes
= 0;
342 static Int max_secVBit_nodes
= 0;
344 static void update_SM_counts(SecMap
* oldSM
, SecMap
* newSM
)
346 if (oldSM
== &sm_distinguished
[SM_DIST_NOACCESS
]) n_noaccess_SMs
--;
347 else if (oldSM
== &sm_distinguished
[SM_DIST_UNDEFINED
]) n_undefined_SMs
--;
348 else if (oldSM
== &sm_distinguished
[SM_DIST_DEFINED
]) n_defined_SMs
--;
349 else { n_non_DSM_SMs
--;
352 if (newSM
== &sm_distinguished
[SM_DIST_NOACCESS
]) n_noaccess_SMs
++;
353 else if (newSM
== &sm_distinguished
[SM_DIST_UNDEFINED
]) n_undefined_SMs
++;
354 else if (newSM
== &sm_distinguished
[SM_DIST_DEFINED
]) n_defined_SMs
++;
355 else { n_non_DSM_SMs
++;
358 if (n_noaccess_SMs
> max_noaccess_SMs
) max_noaccess_SMs
= n_noaccess_SMs
;
359 if (n_undefined_SMs
> max_undefined_SMs
) max_undefined_SMs
= n_undefined_SMs
;
360 if (n_defined_SMs
> max_defined_SMs
) max_defined_SMs
= n_defined_SMs
;
361 if (n_non_DSM_SMs
> max_non_DSM_SMs
) max_non_DSM_SMs
= n_non_DSM_SMs
;
364 /* --------------- Primary maps --------------- */
366 /* The main primary map. This covers some initial part of the address
367 space, addresses 0 .. (N_PRIMARY_MAP << 16)-1. The rest of it is
368 handled using the auxiliary primary map.
370 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
371 && (defined(VGP_arm_linux) \
372 || defined(VGP_x86_linux) || defined(VGP_x86_solaris))
373 /* mc_main_asm.c needs visibility on a few things declared in this file.
374 MC_MAIN_STATIC allows to define them static if ok, i.e. on
375 platforms that are not using hand-coded asm statements. */
376 #define MC_MAIN_STATIC
378 #define MC_MAIN_STATIC static
380 MC_MAIN_STATIC SecMap
* primary_map
[N_PRIMARY_MAP
];
383 /* An entry in the auxiliary primary map. base must be a 64k-aligned
384 value, and sm points at the relevant secondary map. As with the
385 main primary map, the secondary may be either a real secondary, or
386 one of the three distinguished secondaries. DO NOT CHANGE THIS
387 LAYOUT: the first word has to be the key for OSet fast lookups.
396 /* Tunable parameter: How big is the L1 queue? */
397 #define N_AUXMAP_L1 24
399 /* Tunable parameter: How far along the L1 queue to insert
400 entries resulting from L2 lookups? */
401 #define AUXMAP_L1_INSERT_IX 12
405 AuxMapEnt
* ent
; // pointer to the matching auxmap_L2 node
407 auxmap_L1
[N_AUXMAP_L1
];
409 static OSet
* auxmap_L2
= NULL
;
411 static void init_auxmap_L1_L2 ( void )
414 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
415 auxmap_L1
[i
].base
= 0;
416 auxmap_L1
[i
].ent
= NULL
;
419 tl_assert(0 == offsetof(AuxMapEnt
,base
));
420 tl_assert(sizeof(Addr
) == sizeof(void*));
421 auxmap_L2
= VG_(OSetGen_Create
)( /*keyOff*/ offsetof(AuxMapEnt
,base
),
423 VG_(malloc
), "mc.iaLL.1", VG_(free
) );
426 /* Check representation invariants; if OK return NULL; else a
427 descriptive bit of text. Also return the number of
428 non-distinguished secondary maps referred to from the auxiliary
431 static const HChar
* check_auxmap_L1_L2_sanity ( Word
* n_secmaps_found
)
434 /* On a 32-bit platform, the L2 and L1 tables should
435 both remain empty forever.
437 On a 64-bit platform:
439 all .base & 0xFFFF == 0
440 all .base > MAX_PRIMARY_ADDRESS
442 all .base & 0xFFFF == 0
443 all (.base > MAX_PRIMARY_ADDRESS
445 and .ent points to an AuxMapEnt with the same .base)
447 (.base == 0 and .ent == NULL)
449 *n_secmaps_found
= 0;
450 if (sizeof(void*) == 4) {
451 /* 32-bit platform */
452 if (VG_(OSetGen_Size
)(auxmap_L2
) != 0)
453 return "32-bit: auxmap_L2 is non-empty";
454 for (i
= 0; i
< N_AUXMAP_L1
; i
++)
455 if (auxmap_L1
[i
].base
!= 0 || auxmap_L1
[i
].ent
!= NULL
)
456 return "32-bit: auxmap_L1 is non-empty";
458 /* 64-bit platform */
459 UWord elems_seen
= 0;
460 AuxMapEnt
*elem
, *res
;
463 VG_(OSetGen_ResetIter
)(auxmap_L2
);
464 while ( (elem
= VG_(OSetGen_Next
)(auxmap_L2
)) ) {
466 if (0 != (elem
->base
& (Addr
)0xFFFF))
467 return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
468 if (elem
->base
<= MAX_PRIMARY_ADDRESS
)
469 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
470 if (elem
->sm
== NULL
)
471 return "64-bit: .sm in _L2 is NULL";
472 if (!is_distinguished_sm(elem
->sm
))
473 (*n_secmaps_found
)++;
475 if (elems_seen
!= n_auxmap_L2_nodes
)
476 return "64-bit: disagreement on number of elems in _L2";
477 /* Check L1-L2 correspondence */
478 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
479 if (auxmap_L1
[i
].base
== 0 && auxmap_L1
[i
].ent
== NULL
)
481 if (0 != (auxmap_L1
[i
].base
& (Addr
)0xFFFF))
482 return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
483 if (auxmap_L1
[i
].base
<= MAX_PRIMARY_ADDRESS
)
484 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
485 if (auxmap_L1
[i
].ent
== NULL
)
486 return "64-bit: .ent is NULL in auxmap_L1";
487 if (auxmap_L1
[i
].ent
->base
!= auxmap_L1
[i
].base
)
488 return "64-bit: _L1 and _L2 bases are inconsistent";
489 /* Look it up in auxmap_L2. */
490 key
.base
= auxmap_L1
[i
].base
;
492 res
= VG_(OSetGen_Lookup
)(auxmap_L2
, &key
);
494 return "64-bit: _L1 .base not found in _L2";
495 if (res
!= auxmap_L1
[i
].ent
)
496 return "64-bit: _L1 .ent disagrees with _L2 entry";
498 /* Check L1 contains no duplicates */
499 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
500 if (auxmap_L1
[i
].base
== 0)
502 for (j
= i
+1; j
< N_AUXMAP_L1
; j
++) {
503 if (auxmap_L1
[j
].base
== 0)
505 if (auxmap_L1
[j
].base
== auxmap_L1
[i
].base
)
506 return "64-bit: duplicate _L1 .base entries";
510 return NULL
; /* ok */
513 static void insert_into_auxmap_L1_at ( Word rank
, AuxMapEnt
* ent
)
517 tl_assert(rank
>= 0 && rank
< N_AUXMAP_L1
);
518 for (i
= N_AUXMAP_L1
-1; i
> rank
; i
--)
519 auxmap_L1
[i
] = auxmap_L1
[i
-1];
520 auxmap_L1
[rank
].base
= ent
->base
;
521 auxmap_L1
[rank
].ent
= ent
;
524 static INLINE AuxMapEnt
* maybe_find_in_auxmap ( Addr a
)
530 tl_assert(a
> MAX_PRIMARY_ADDRESS
);
533 /* First search the front-cache, which is a self-organising
534 list containing the most popular entries. */
536 if (LIKELY(auxmap_L1
[0].base
== a
))
537 return auxmap_L1
[0].ent
;
538 if (LIKELY(auxmap_L1
[1].base
== a
)) {
539 Addr t_base
= auxmap_L1
[0].base
;
540 AuxMapEnt
* t_ent
= auxmap_L1
[0].ent
;
541 auxmap_L1
[0].base
= auxmap_L1
[1].base
;
542 auxmap_L1
[0].ent
= auxmap_L1
[1].ent
;
543 auxmap_L1
[1].base
= t_base
;
544 auxmap_L1
[1].ent
= t_ent
;
545 return auxmap_L1
[0].ent
;
548 n_auxmap_L1_searches
++;
550 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
551 if (auxmap_L1
[i
].base
== a
) {
555 tl_assert(i
>= 0 && i
<= N_AUXMAP_L1
);
557 n_auxmap_L1_cmps
+= (ULong
)(i
+1);
559 if (i
< N_AUXMAP_L1
) {
561 Addr t_base
= auxmap_L1
[i
-1].base
;
562 AuxMapEnt
* t_ent
= auxmap_L1
[i
-1].ent
;
563 auxmap_L1
[i
-1].base
= auxmap_L1
[i
-0].base
;
564 auxmap_L1
[i
-1].ent
= auxmap_L1
[i
-0].ent
;
565 auxmap_L1
[i
-0].base
= t_base
;
566 auxmap_L1
[i
-0].ent
= t_ent
;
569 return auxmap_L1
[i
].ent
;
572 n_auxmap_L2_searches
++;
574 /* First see if we already have it. */
578 res
= VG_(OSetGen_Lookup
)(auxmap_L2
, &key
);
580 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX
, res
);
584 static AuxMapEnt
* find_or_alloc_in_auxmap ( Addr a
)
586 AuxMapEnt
*nyu
, *res
;
588 /* First see if we already have it. */
589 res
= maybe_find_in_auxmap( a
);
593 /* Ok, there's no entry in the secondary map, so we'll have
597 nyu
= (AuxMapEnt
*) VG_(OSetGen_AllocNode
)( auxmap_L2
, sizeof(AuxMapEnt
) );
599 nyu
->sm
= &sm_distinguished
[SM_DIST_NOACCESS
];
600 VG_(OSetGen_Insert
)( auxmap_L2
, nyu
);
601 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX
, nyu
);
606 /* --------------- SecMap fundamentals --------------- */
608 // In all these, 'low' means it's definitely in the main primary map,
609 // 'high' means it's definitely in the auxiliary table.
611 static INLINE UWord
get_primary_map_low_offset ( Addr a
)
613 UWord pm_off
= a
>> 16;
617 static INLINE SecMap
** get_secmap_low_ptr ( Addr a
)
619 UWord pm_off
= a
>> 16;
620 # if VG_DEBUG_MEMORY >= 1
621 tl_assert(pm_off
< N_PRIMARY_MAP
);
623 return &primary_map
[ pm_off
];
626 static INLINE SecMap
** get_secmap_high_ptr ( Addr a
)
628 AuxMapEnt
* am
= find_or_alloc_in_auxmap(a
);
632 static INLINE SecMap
** get_secmap_ptr ( Addr a
)
634 return ( a
<= MAX_PRIMARY_ADDRESS
635 ? get_secmap_low_ptr(a
)
636 : get_secmap_high_ptr(a
));
639 static INLINE SecMap
* get_secmap_for_reading_low ( Addr a
)
641 return *get_secmap_low_ptr(a
);
644 static INLINE SecMap
* get_secmap_for_reading_high ( Addr a
)
646 return *get_secmap_high_ptr(a
);
649 static INLINE SecMap
* get_secmap_for_writing_low(Addr a
)
651 SecMap
** p
= get_secmap_low_ptr(a
);
652 if (UNLIKELY(is_distinguished_sm(*p
)))
653 *p
= copy_for_writing(*p
);
657 static INLINE SecMap
* get_secmap_for_writing_high ( Addr a
)
659 SecMap
** p
= get_secmap_high_ptr(a
);
660 if (UNLIKELY(is_distinguished_sm(*p
)))
661 *p
= copy_for_writing(*p
);
665 /* Produce the secmap for 'a', either from the primary map or by
666 ensuring there is an entry for it in the aux primary map. The
667 secmap may be a distinguished one as the caller will only want to
670 static INLINE SecMap
* get_secmap_for_reading ( Addr a
)
672 return ( a
<= MAX_PRIMARY_ADDRESS
673 ? get_secmap_for_reading_low (a
)
674 : get_secmap_for_reading_high(a
) );
677 /* Produce the secmap for 'a', either from the primary map or by
678 ensuring there is an entry for it in the aux primary map. The
679 secmap may not be a distinguished one, since the caller will want
680 to be able to write it. If it is a distinguished secondary, make a
681 writable copy of it, install it, and return the copy instead. (COW
684 static INLINE SecMap
* get_secmap_for_writing ( Addr a
)
686 return ( a
<= MAX_PRIMARY_ADDRESS
687 ? get_secmap_for_writing_low (a
)
688 : get_secmap_for_writing_high(a
) );
691 /* If 'a' has a SecMap, produce it. Else produce NULL. But don't
692 allocate one if one doesn't already exist. This is used by the
695 static SecMap
* maybe_get_secmap_for ( Addr a
)
697 if (a
<= MAX_PRIMARY_ADDRESS
) {
698 return get_secmap_for_reading_low(a
);
700 AuxMapEnt
* am
= maybe_find_in_auxmap(a
);
701 return am
? am
->sm
: NULL
;
705 /* --------------- Fundamental functions --------------- */
708 void insert_vabits2_into_vabits8 ( Addr a
, UChar vabits2
, UChar
* vabits8
)
710 UInt shift
= (a
& 3) << 1; // shift by 0, 2, 4, or 6
711 *vabits8
&= ~(0x3 << shift
); // mask out the two old bits
712 *vabits8
|= (vabits2
<< shift
); // mask in the two new bits
716 void insert_vabits4_into_vabits8 ( Addr a
, UChar vabits4
, UChar
* vabits8
)
719 tl_assert(VG_IS_2_ALIGNED(a
)); // Must be 2-aligned
720 shift
= (a
& 2) << 1; // shift by 0 or 4
721 *vabits8
&= ~(0xf << shift
); // mask out the four old bits
722 *vabits8
|= (vabits4
<< shift
); // mask in the four new bits
726 UChar
extract_vabits2_from_vabits8 ( Addr a
, UChar vabits8
)
728 UInt shift
= (a
& 3) << 1; // shift by 0, 2, 4, or 6
729 vabits8
>>= shift
; // shift the two bits to the bottom
730 return 0x3 & vabits8
; // mask out the rest
734 UChar
extract_vabits4_from_vabits8 ( Addr a
, UChar vabits8
)
737 tl_assert(VG_IS_2_ALIGNED(a
)); // Must be 2-aligned
738 shift
= (a
& 2) << 1; // shift by 0 or 4
739 vabits8
>>= shift
; // shift the four bits to the bottom
740 return 0xf & vabits8
; // mask out the rest
743 // Note that these four are only used in slow cases. The fast cases do
744 // clever things like combine the auxmap check (in
745 // get_secmap_{read,writ}able) with alignment checks.
748 // Any time this function is called, if it is possible that vabits2
749 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
750 // sec-V-bits table must also be set!
752 void set_vabits2 ( Addr a
, UChar vabits2
)
754 SecMap
* sm
= get_secmap_for_writing(a
);
755 UWord sm_off
= SM_OFF(a
);
756 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
760 UChar
get_vabits2 ( Addr a
)
762 SecMap
* sm
= get_secmap_for_reading(a
);
763 UWord sm_off
= SM_OFF(a
);
764 UChar vabits8
= sm
->vabits8
[sm_off
];
765 return extract_vabits2_from_vabits8(a
, vabits8
);
769 // Any time this function is called, if it is possible that any of the
770 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
771 // corresponding entry(s) in the sec-V-bits table must also be set!
773 UChar
get_vabits8_for_aligned_word32 ( Addr a
)
775 SecMap
* sm
= get_secmap_for_reading(a
);
776 UWord sm_off
= SM_OFF(a
);
777 UChar vabits8
= sm
->vabits8
[sm_off
];
782 void set_vabits8_for_aligned_word32 ( Addr a
, UChar vabits8
)
784 SecMap
* sm
= get_secmap_for_writing(a
);
785 UWord sm_off
= SM_OFF(a
);
786 sm
->vabits8
[sm_off
] = vabits8
;
790 // Forward declarations
791 static UWord
get_sec_vbits8(Addr a
);
792 static void set_sec_vbits8(Addr a
, UWord vbits8
);
794 // Returns False if there was an addressability error.
796 Bool
set_vbits8 ( Addr a
, UChar vbits8
)
799 UChar vabits2
= get_vabits2(a
);
800 if ( VA_BITS2_NOACCESS
!= vabits2
) {
801 // Addressable. Convert in-register format to in-memory format.
802 // Also remove any existing sec V bit entry for the byte if no
804 if ( V_BITS8_DEFINED
== vbits8
) { vabits2
= VA_BITS2_DEFINED
; }
805 else if ( V_BITS8_UNDEFINED
== vbits8
) { vabits2
= VA_BITS2_UNDEFINED
; }
806 else { vabits2
= VA_BITS2_PARTDEFINED
;
807 set_sec_vbits8(a
, vbits8
); }
808 set_vabits2(a
, vabits2
);
811 // Unaddressable! Do nothing -- when writing to unaddressable
812 // memory it acts as a black hole, and the V bits can never be seen
813 // again. So we don't have to write them at all.
819 // Returns False if there was an addressability error. In that case, we put
820 // all defined bits into vbits8.
822 Bool
get_vbits8 ( Addr a
, UChar
* vbits8
)
825 UChar vabits2
= get_vabits2(a
);
827 // Convert the in-memory format to in-register format.
828 if ( VA_BITS2_DEFINED
== vabits2
) { *vbits8
= V_BITS8_DEFINED
; }
829 else if ( VA_BITS2_UNDEFINED
== vabits2
) { *vbits8
= V_BITS8_UNDEFINED
; }
830 else if ( VA_BITS2_NOACCESS
== vabits2
) {
831 *vbits8
= V_BITS8_DEFINED
; // Make V bits defined!
834 tl_assert( VA_BITS2_PARTDEFINED
== vabits2
);
835 *vbits8
= get_sec_vbits8(a
);
841 /* --------------- Secondary V bit table ------------ */
843 // This table holds the full V bit pattern for partially-defined bytes
844 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
847 // Note: the nodes in this table can become stale. Eg. if you write a PDB,
848 // then overwrite the same address with a fully defined byte, the sec-V-bit
849 // node will not necessarily be removed. This is because checking for
850 // whether removal is necessary would slow down the fast paths.
852 // To avoid the stale nodes building up too much, we periodically (once the
853 // table reaches a certain size) garbage collect (GC) the table by
854 // traversing it and evicting any nodes not having PDB.
855 // If more than a certain proportion of nodes survived, we increase the
856 // table size so that GCs occur less often.
858 // This policy is designed to avoid bad table bloat in the worst case where
859 // a program creates huge numbers of stale PDBs -- we would get this bloat
860 // if we had no GC -- while handling well the case where a node becomes
861 // stale but shortly afterwards is rewritten with a PDB and so becomes
862 // non-stale again (which happens quite often, eg. in perf/bz2). If we just
863 // remove all stale nodes as soon as possible, we just end up re-adding a
864 // lot of them in later again. The "sufficiently stale" approach avoids
865 // this. (If a program has many live PDBs, performance will just suck,
866 // there's no way around that.)
868 // Further comments, JRS 14 Feb 2012. It turns out that the policy of
869 // holding on to stale entries for 2 GCs before discarding them can lead
870 // to massive space leaks. So we're changing to an arrangement where
871 // lines are evicted as soon as they are observed to be stale during a
872 // GC. This also has a side benefit of allowing the sufficiently_stale
873 // field to be removed from the SecVBitNode struct, reducing its size by
874 // 8 bytes, which is a substantial space saving considering that the
875 // struct was previously 32 or so bytes, on a 64 bit target.
877 // In order to try and mitigate the problem that the "sufficiently stale"
878 // heuristic was designed to avoid, the table size is allowed to drift
879 // up ("DRIFTUP") slowly to 80000, even if the residency is low. This
880 // means that nodes will exist in the table longer on average, and hopefully
881 // will be deleted and re-added less frequently.
883 // The previous scaling up mechanism (now called STEPUP) is retained:
884 // if residency exceeds 50%, the table is scaled up, although by a
885 // factor sqrt(2) rather than 2 as before. This effectively doubles the
886 // frequency of GCs when there are many PDBs at reduces the tendency of
887 // stale PDBs to reside for long periods in the table.
889 static OSet
* secVBitTable
;
892 static ULong sec_vbits_new_nodes
= 0;
893 static ULong sec_vbits_updates
= 0;
895 // This must be a power of two; this is checked in mc_pre_clo_init().
896 // The size chosen here is a trade-off: if the nodes are bigger (ie. cover
897 // a larger address range) they take more space but we can get multiple
898 // partially-defined bytes in one if they are close to each other, reducing
899 // the number of total nodes. In practice sometimes they are clustered (eg.
900 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
901 // row), but often not. So we choose something intermediate.
902 #define BYTES_PER_SEC_VBIT_NODE 16
904 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
905 // more than this many nodes survive a GC.
906 #define STEPUP_SURVIVOR_PROPORTION 0.5
907 #define STEPUP_GROWTH_FACTOR 1.414213562
909 // If the above heuristic doesn't apply, then we may make the table
910 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
911 // this many nodes survive a GC, _and_ the total table size does
912 // not exceed a fixed limit. The numbers are somewhat arbitrary, but
913 // work tolerably well on long Firefox runs. The scaleup ratio of 1.5%
914 // effectively although gradually reduces residency and increases time
915 // between GCs for programs with small numbers of PDBs. The 80000 limit
916 // effectively limits the table size to around 2MB for programs with
917 // small numbers of PDBs, whilst giving a reasonably long lifetime to
918 // entries, to try and reduce the costs resulting from deleting and
919 // re-adding of entries.
920 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
921 #define DRIFTUP_GROWTH_FACTOR 1.015
922 #define DRIFTUP_MAX_SIZE 80000
924 // We GC the table when it gets this many nodes in it, ie. it's effectively
925 // the table size. It can change.
926 static Int secVBitLimit
= 1000;
928 // The number of GCs done, used to age sec-V-bit nodes for eviction.
929 // Because it's unsigned, wrapping doesn't matter -- the right answer will
931 static UInt GCs_done
= 0;
936 UChar vbits8
[BYTES_PER_SEC_VBIT_NODE
];
940 static OSet
* createSecVBitTable(void)
942 OSet
* newSecVBitTable
;
943 newSecVBitTable
= VG_(OSetGen_Create_With_Pool
)
944 ( offsetof(SecVBitNode
, a
),
945 NULL
, // use fast comparisons
946 VG_(malloc
), "mc.cSVT.1 (sec VBit table)",
949 sizeof(SecVBitNode
));
950 return newSecVBitTable
;
953 static void gcSecVBitTable(void)
957 Int i
, n_nodes
= 0, n_survivors
= 0;
961 // Create the new table.
962 secVBitTable2
= createSecVBitTable();
964 // Traverse the table, moving fresh nodes into the new table.
965 VG_(OSetGen_ResetIter
)(secVBitTable
);
966 while ( (n
= VG_(OSetGen_Next
)(secVBitTable
)) ) {
967 // Keep node if any of its bytes are non-stale. Using
968 // get_vabits2() for the lookup is not very efficient, but I don't
970 for (i
= 0; i
< BYTES_PER_SEC_VBIT_NODE
; i
++) {
971 if (VA_BITS2_PARTDEFINED
== get_vabits2(n
->a
+ i
)) {
972 // Found a non-stale byte, so keep =>
973 // Insert a copy of the node into the new table.
975 VG_(OSetGen_AllocNode
)(secVBitTable2
, sizeof(SecVBitNode
));
977 VG_(OSetGen_Insert
)(secVBitTable2
, n2
);
983 // Get the before and after sizes.
984 n_nodes
= VG_(OSetGen_Size
)(secVBitTable
);
985 n_survivors
= VG_(OSetGen_Size
)(secVBitTable2
);
987 // Destroy the old table, and put the new one in its place.
988 VG_(OSetGen_Destroy
)(secVBitTable
);
989 secVBitTable
= secVBitTable2
;
991 if (VG_(clo_verbosity
) > 1 && n_nodes
!= 0) {
992 VG_(message
)(Vg_DebugMsg
, "memcheck GC: %d nodes, %d survivors (%.1f%%)\n",
993 n_nodes
, n_survivors
, n_survivors
* 100.0 / n_nodes
);
996 // Increase table size if necessary.
997 if ((Double
)n_survivors
998 > ((Double
)secVBitLimit
* STEPUP_SURVIVOR_PROPORTION
)) {
999 secVBitLimit
= (Int
)((Double
)secVBitLimit
* (Double
)STEPUP_GROWTH_FACTOR
);
1000 if (VG_(clo_verbosity
) > 1)
1001 VG_(message
)(Vg_DebugMsg
,
1002 "memcheck GC: %d new table size (stepup)\n",
1006 if (secVBitLimit
< DRIFTUP_MAX_SIZE
1007 && (Double
)n_survivors
1008 > ((Double
)secVBitLimit
* DRIFTUP_SURVIVOR_PROPORTION
)) {
1009 secVBitLimit
= (Int
)((Double
)secVBitLimit
* (Double
)DRIFTUP_GROWTH_FACTOR
);
1010 if (VG_(clo_verbosity
) > 1)
1011 VG_(message
)(Vg_DebugMsg
,
1012 "memcheck GC: %d new table size (driftup)\n",
1017 static UWord
get_sec_vbits8(Addr a
)
1019 Addr aAligned
= VG_ROUNDDN(a
, BYTES_PER_SEC_VBIT_NODE
);
1020 Int amod
= a
% BYTES_PER_SEC_VBIT_NODE
;
1021 SecVBitNode
* n
= VG_(OSetGen_Lookup
)(secVBitTable
, &aAligned
);
1023 tl_assert2(n
, "get_sec_vbits8: no node for address %p (%p)\n", aAligned
, a
);
1024 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1025 // make it to the secondary V bits table.
1026 vbits8
= n
->vbits8
[amod
];
1027 tl_assert(V_BITS8_DEFINED
!= vbits8
&& V_BITS8_UNDEFINED
!= vbits8
);
1031 static void set_sec_vbits8(Addr a
, UWord vbits8
)
1033 Addr aAligned
= VG_ROUNDDN(a
, BYTES_PER_SEC_VBIT_NODE
);
1034 Int i
, amod
= a
% BYTES_PER_SEC_VBIT_NODE
;
1035 SecVBitNode
* n
= VG_(OSetGen_Lookup
)(secVBitTable
, &aAligned
);
1036 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1037 // make it to the secondary V bits table.
1038 tl_assert(V_BITS8_DEFINED
!= vbits8
&& V_BITS8_UNDEFINED
!= vbits8
);
1040 n
->vbits8
[amod
] = vbits8
; // update
1041 sec_vbits_updates
++;
1043 // Do a table GC if necessary. Nb: do this before creating and
1044 // inserting the new node, to avoid erroneously GC'ing the new node.
1045 if (secVBitLimit
== VG_(OSetGen_Size
)(secVBitTable
)) {
1049 // New node: assign the specific byte, make the rest invalid (they
1050 // should never be read as-is, but be cautious).
1051 n
= VG_(OSetGen_AllocNode
)(secVBitTable
, sizeof(SecVBitNode
));
1053 for (i
= 0; i
< BYTES_PER_SEC_VBIT_NODE
; i
++) {
1054 n
->vbits8
[i
] = V_BITS8_UNDEFINED
;
1056 n
->vbits8
[amod
] = vbits8
;
1058 // Insert the new node.
1059 VG_(OSetGen_Insert
)(secVBitTable
, n
);
1060 sec_vbits_new_nodes
++;
1062 n_secVBit_nodes
= VG_(OSetGen_Size
)(secVBitTable
);
1063 if (n_secVBit_nodes
> max_secVBit_nodes
)
1064 max_secVBit_nodes
= n_secVBit_nodes
;
1068 /* --------------- Endianness helpers --------------- */
1070 /* Returns the offset in memory of the byteno-th most significant byte
1071 in a wordszB-sized word, given the specified endianness. */
1072 static INLINE UWord
byte_offset_w ( UWord wordszB
, Bool bigendian
,
1074 return bigendian
? (wordszB
-1-byteno
) : byteno
;
1078 /* --------------- Ignored address ranges --------------- */
1080 /* Denotes the address-error-reportability status for address ranges:
1081 IAR_NotIgnored: the usual case -- report errors in this range
1082 IAR_CommandLine: don't report errors -- from command line setting
1083 IAR_ClientReq: don't report errors -- from client request
1086 enum { IAR_INVALID
=99,
1092 static const HChar
* showIARKind ( IARKind iark
)
1095 case IAR_INVALID
: return "INVALID";
1096 case IAR_NotIgnored
: return "NotIgnored";
1097 case IAR_CommandLine
: return "CommandLine";
1098 case IAR_ClientReq
: return "ClientReq";
1099 default: return "???";
1103 // RangeMap<IARKind>
1104 static RangeMap
* gIgnoredAddressRanges
= NULL
;
1106 static void init_gIgnoredAddressRanges ( void )
1108 if (LIKELY(gIgnoredAddressRanges
!= NULL
))
1110 gIgnoredAddressRanges
= VG_(newRangeMap
)( VG_(malloc
), "mc.igIAR.1",
1111 VG_(free
), IAR_NotIgnored
);
1114 Bool
MC_(in_ignored_range
) ( Addr a
)
1116 if (LIKELY(gIgnoredAddressRanges
== NULL
))
1118 UWord how
= IAR_INVALID
;
1119 UWord key_min
= ~(UWord
)0;
1120 UWord key_max
= (UWord
)0;
1121 VG_(lookupRangeMap
)(&key_min
, &key_max
, &how
, gIgnoredAddressRanges
, a
);
1122 tl_assert(key_min
<= a
&& a
<= key_max
);
1124 case IAR_NotIgnored
: return False
;
1125 case IAR_CommandLine
: return True
;
1126 case IAR_ClientReq
: return True
;
1127 default: break; /* invalid */
1129 VG_(tool_panic
)("MC_(in_ignore_range)");
1133 Bool
MC_(in_ignored_range_below_sp
) ( Addr sp
, Addr a
, UInt szB
)
1135 if (LIKELY(!MC_(clo_ignore_range_below_sp
)))
1137 tl_assert(szB
>= 1 && szB
<= 32);
1138 tl_assert(MC_(clo_ignore_range_below_sp__first_offset
)
1139 > MC_(clo_ignore_range_below_sp__last_offset
));
1140 Addr range_lo
= sp
- MC_(clo_ignore_range_below_sp__first_offset
);
1141 Addr range_hi
= sp
- MC_(clo_ignore_range_below_sp__last_offset
);
1142 if (range_lo
>= range_hi
) {
1143 /* Bizarre. We have a wraparound situation. What should we do? */
1144 return False
; // Play safe
1146 /* This is the expected case. */
1147 if (range_lo
<= a
&& a
+ szB
- 1 <= range_hi
)
1156 /* Parse two Addrs (in hex) separated by a dash, or fail. */
1158 static Bool
parse_Addr_pair ( const HChar
** ppc
, Addr
* result1
, Addr
* result2
)
1160 Bool ok
= VG_(parse_Addr
) (ppc
, result1
);
1166 ok
= VG_(parse_Addr
) (ppc
, result2
);
1172 /* Parse two UInts (32 bit unsigned, in decimal) separated by a dash,
1175 static Bool
parse_UInt_pair ( const HChar
** ppc
, UInt
* result1
, UInt
* result2
)
1177 Bool ok
= VG_(parse_UInt
) (ppc
, result1
);
1183 ok
= VG_(parse_UInt
) (ppc
, result2
);
1189 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1190 fail. If they are valid, add them to the global set of ignored
1192 static Bool
parse_ignore_ranges ( const HChar
* str0
)
1194 init_gIgnoredAddressRanges();
1195 const HChar
* str
= str0
;
1196 const HChar
** ppc
= &str
;
1198 Addr start
= ~(Addr
)0;
1200 Bool ok
= parse_Addr_pair(ppc
, &start
, &end
);
1205 VG_(bindRangeMap
)( gIgnoredAddressRanges
, start
, end
, IAR_CommandLine
);
1216 /* Add or remove [start, +len) from the set of ignored ranges. */
1217 static Bool
modify_ignore_ranges ( Bool addRange
, Addr start
, Addr len
)
1219 init_gIgnoredAddressRanges();
1220 const Bool verbose
= (VG_(clo_verbosity
) > 1);
1225 VG_(bindRangeMap
)(gIgnoredAddressRanges
,
1226 start
, start
+len
-1, IAR_ClientReq
);
1228 VG_(dmsg
)("memcheck: modify_ignore_ranges: add %p %p\n",
1229 (void*)start
, (void*)(start
+len
-1));
1231 VG_(bindRangeMap
)(gIgnoredAddressRanges
,
1232 start
, start
+len
-1, IAR_NotIgnored
);
1234 VG_(dmsg
)("memcheck: modify_ignore_ranges: del %p %p\n",
1235 (void*)start
, (void*)(start
+len
-1));
1238 VG_(dmsg
)("memcheck: now have %u ranges:\n",
1239 VG_(sizeRangeMap
)(gIgnoredAddressRanges
));
1241 for (i
= 0; i
< VG_(sizeRangeMap
)(gIgnoredAddressRanges
); i
++) {
1242 UWord val
= IAR_INVALID
;
1243 UWord key_min
= ~(UWord
)0;
1244 UWord key_max
= (UWord
)0;
1245 VG_(indexRangeMap
)( &key_min
, &key_max
, &val
,
1246 gIgnoredAddressRanges
, i
);
1247 VG_(dmsg
)("memcheck: [%u] %016lx-%016lx %s\n",
1248 i
, key_min
, key_max
, showIARKind(val
));
1255 /* --------------- Load/store slow cases. --------------- */
1258 __attribute__((noinline
))
1259 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong
* res
,
1260 Addr a
, SizeT nBits
, Bool bigendian
)
1262 ULong pessim
[4]; /* only used when p-l-ok=yes */
1263 SSizeT szB
= nBits
/ 8;
1264 SSizeT szL
= szB
/ 8; /* Size in Longs (64-bit units) */
1265 SSizeT i
, j
; /* Must be signed. */
1266 SizeT n_addrs_bad
= 0;
1271 /* Code below assumes load size is a power of two and at least 64
1273 tl_assert((szB
& (szB
-1)) == 0 && szL
> 0);
1275 /* If this triggers, you probably just need to increase the size of
1276 the pessim array. */
1277 tl_assert(szL
<= sizeof(pessim
) / sizeof(pessim
[0]));
1279 for (j
= 0; j
< szL
; j
++) {
1280 pessim
[j
] = V_BITS64_DEFINED
;
1281 res
[j
] = V_BITS64_UNDEFINED
;
1284 /* Make up a result V word, which contains the loaded data for
1285 valid addresses and Defined for invalid addresses. Iterate over
1286 the bytes in the word, from the most significant down to the
1287 least. The vbits to return are calculated into vbits128. Also
1288 compute the pessimising value to be used when
1289 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1290 info can be gleaned from the pessim array) but is used as a
1292 for (j
= szL
-1; j
>= 0; j
--) {
1293 ULong vbits64
= V_BITS64_UNDEFINED
;
1294 ULong pessim64
= V_BITS64_DEFINED
;
1295 UWord long_index
= byte_offset_w(szL
, bigendian
, j
);
1296 for (i
= 8-1; i
>= 0; i
--) {
1297 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW_LOOP
);
1298 ai
= a
+ 8*long_index
+ byte_offset_w(8, bigendian
, i
);
1299 ok
= get_vbits8(ai
, &vbits8
);
1302 if (!ok
) n_addrs_bad
++;
1304 pessim64
|= (ok
? V_BITS8_DEFINED
: V_BITS8_UNDEFINED
);
1306 res
[long_index
] = vbits64
;
1307 pessim
[long_index
] = pessim64
;
1310 /* In the common case, all the addresses involved are valid, so we
1311 just return the computed V bits and have done. */
1312 if (LIKELY(n_addrs_bad
== 0))
1315 /* If there's no possibility of getting a partial-loads-ok
1316 exemption, report the error and quit. */
1317 if (!MC_(clo_partial_loads_ok
)) {
1318 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1322 /* The partial-loads-ok excemption might apply. Find out if it
1323 does. If so, don't report an addressing error, but do return
1324 Undefined for the bytes that are out of range, so as to avoid
1325 false negatives. If it doesn't apply, just report an addressing
1326 error in the usual way. */
1328 /* Some code steps along byte strings in aligned chunks
1329 even when there is only a partially defined word at the end (eg,
1330 optimised strlen). This is allowed by the memory model of
1331 modern machines, since an aligned load cannot span two pages and
1332 thus cannot "partially fault".
1334 Therefore, a load from a partially-addressible place is allowed
1335 if all of the following hold:
1336 - the command-line flag is set [by default, it isn't]
1337 - it's an aligned load
1338 - at least one of the addresses in the word *is* valid
1340 Since this suppresses the addressing error, we avoid false
1341 negatives by marking bytes undefined when they come from an
1345 /* "at least one of the addresses is invalid" */
1347 for (j
= 0; j
< szL
; j
++)
1348 ok
|= pessim
[j
] != V_BITS64_DEFINED
;
1351 # if defined(VGP_s390x_linux)
1352 tl_assert(szB
== 16); // s390 doesn't have > 128 bit SIMD
1353 /* OK if all loaded bytes are from the same page. */
1354 Bool alignedOK
= ((a
& 0xfff) <= 0x1000 - szB
);
1355 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1356 /* lxvd2x might generate an unaligned 128 bit vector load. */
1357 Bool alignedOK
= (szB
== 16);
1359 /* OK if the address is aligned by the load size. */
1360 Bool alignedOK
= (0 == (a
& (szB
- 1)));
1363 if (alignedOK
&& n_addrs_bad
< szB
) {
1364 /* Exemption applies. Use the previously computed pessimising
1365 value and return the combined result, but don't flag an
1366 addressing error. The pessimising value is Defined for valid
1367 addresses and Undefined for invalid addresses. */
1368 /* for assumption that doing bitwise or implements UifU */
1369 tl_assert(V_BIT_UNDEFINED
== 1 && V_BIT_DEFINED
== 0);
1370 /* (really need "UifU" here...)
1371 vbits[j] UifU= pessim[j] (is pessimised by it, iow) */
1372 for (j
= szL
-1; j
>= 0; j
--)
1373 res
[j
] |= pessim
[j
];
1377 /* Exemption doesn't apply. Flag an addressing error in the normal
1379 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1383 __attribute__((noinline
))
1384 __attribute__((used
))
1386 ULong
mc_LOADVn_slow ( Addr a
, SizeT nBits
, Bool bigendian
);
1389 __attribute__((noinline
))
1390 __attribute__((used
))
1391 VG_REGPARM(3) /* make sure we're using a fixed calling convention, since
1392 this function may get called from hand written assembly. */
1393 ULong
mc_LOADVn_slow ( Addr a
, SizeT nBits
, Bool bigendian
)
1395 PROF_EVENT(MCPE_LOADVN_SLOW
);
1397 /* ------------ BEGIN semi-fast cases ------------ */
1398 /* These deal quickly-ish with the common auxiliary primary map
1399 cases on 64-bit platforms. Are merely a speedup hack; can be
1400 omitted without loss of correctness/functionality. Note that in
1401 both cases the "sizeof(void*) == 8" causes these cases to be
1402 folded out by compilers on 32-bit platforms. These are derived
1403 from LOADV64 and LOADV32.
1406 # if defined(VGA_mips64) && defined(VGABI_N32)
1407 if (LIKELY(sizeof(void*) == 4 && nBits
== 64 && VG_IS_8_ALIGNED(a
)))
1409 if (LIKELY(sizeof(void*) == 8 && nBits
== 64 && VG_IS_8_ALIGNED(a
)))
1412 SecMap
* sm
= get_secmap_for_reading(a
);
1413 UWord sm_off16
= SM_OFF_16(a
);
1414 UWord vabits16
= sm
->vabits16
[sm_off16
];
1415 if (LIKELY(vabits16
== VA_BITS16_DEFINED
))
1416 return V_BITS64_DEFINED
;
1417 if (LIKELY(vabits16
== VA_BITS16_UNDEFINED
))
1418 return V_BITS64_UNDEFINED
;
1419 /* else fall into the slow case */
1422 # if defined(VGA_mips64) && defined(VGABI_N32)
1423 if (LIKELY(sizeof(void*) == 4 && nBits
== 32 && VG_IS_4_ALIGNED(a
)))
1425 if (LIKELY(sizeof(void*) == 8 && nBits
== 32 && VG_IS_4_ALIGNED(a
)))
1428 SecMap
* sm
= get_secmap_for_reading(a
);
1429 UWord sm_off
= SM_OFF(a
);
1430 UWord vabits8
= sm
->vabits8
[sm_off
];
1431 if (LIKELY(vabits8
== VA_BITS8_DEFINED
))
1432 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_DEFINED
);
1433 if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
))
1434 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_UNDEFINED
);
1435 /* else fall into slow case */
1438 /* ------------ END semi-fast cases ------------ */
1440 ULong vbits64
= V_BITS64_UNDEFINED
; /* result */
1441 ULong pessim64
= V_BITS64_DEFINED
; /* only used when p-l-ok=yes */
1442 SSizeT szB
= nBits
/ 8;
1443 SSizeT i
; /* Must be signed. */
1444 SizeT n_addrs_bad
= 0;
1449 tl_assert(nBits
== 64 || nBits
== 32 || nBits
== 16 || nBits
== 8);
1451 /* Make up a 64-bit result V word, which contains the loaded data
1452 for valid addresses and Defined for invalid addresses. Iterate
1453 over the bytes in the word, from the most significant down to
1454 the least. The vbits to return are calculated into vbits64.
1455 Also compute the pessimising value to be used when
1456 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1457 info can be gleaned from pessim64) but is used as a
1459 for (i
= szB
-1; i
>= 0; i
--) {
1460 PROF_EVENT(MCPE_LOADVN_SLOW_LOOP
);
1461 ai
= a
+ byte_offset_w(szB
, bigendian
, i
);
1462 ok
= get_vbits8(ai
, &vbits8
);
1465 if (!ok
) n_addrs_bad
++;
1467 pessim64
|= (ok
? V_BITS8_DEFINED
: V_BITS8_UNDEFINED
);
1470 /* In the common case, all the addresses involved are valid, so we
1471 just return the computed V bits and have done. */
1472 if (LIKELY(n_addrs_bad
== 0))
1475 /* If there's no possibility of getting a partial-loads-ok
1476 exemption, report the error and quit. */
1477 if (!MC_(clo_partial_loads_ok
)) {
1478 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1482 /* The partial-loads-ok excemption might apply. Find out if it
1483 does. If so, don't report an addressing error, but do return
1484 Undefined for the bytes that are out of range, so as to avoid
1485 false negatives. If it doesn't apply, just report an addressing
1486 error in the usual way. */
1488 /* Some code steps along byte strings in aligned word-sized chunks
1489 even when there is only a partially defined word at the end (eg,
1490 optimised strlen). This is allowed by the memory model of
1491 modern machines, since an aligned load cannot span two pages and
1492 thus cannot "partially fault". Despite such behaviour being
1493 declared undefined by ANSI C/C++.
1495 Therefore, a load from a partially-addressible place is allowed
1496 if all of the following hold:
1497 - the command-line flag is set [by default, it isn't]
1498 - it's a word-sized, word-aligned load
1499 - at least one of the addresses in the word *is* valid
1501 Since this suppresses the addressing error, we avoid false
1502 negatives by marking bytes undefined when they come from an
1506 /* "at least one of the addresses is invalid" */
1507 tl_assert(pessim64
!= V_BITS64_DEFINED
);
1509 # if defined(VGA_mips64) && defined(VGABI_N32)
1510 if (szB
== VG_WORDSIZE
* 2 && VG_IS_WORD_ALIGNED(a
)
1511 && n_addrs_bad
< VG_WORDSIZE
* 2)
1512 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1513 /* On power unaligned loads of words are OK. */
1514 if (szB
== VG_WORDSIZE
&& n_addrs_bad
< VG_WORDSIZE
)
1516 if (szB
== VG_WORDSIZE
&& VG_IS_WORD_ALIGNED(a
)
1517 && n_addrs_bad
< VG_WORDSIZE
)
1520 /* Exemption applies. Use the previously computed pessimising
1521 value for vbits64 and return the combined result, but don't
1522 flag an addressing error. The pessimising value is Defined
1523 for valid addresses and Undefined for invalid addresses. */
1524 /* for assumption that doing bitwise or implements UifU */
1525 tl_assert(V_BIT_UNDEFINED
== 1 && V_BIT_DEFINED
== 0);
1526 /* (really need "UifU" here...)
1527 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1528 vbits64
|= pessim64
;
1532 /* Also, in appears that gcc generates string-stepping code in
1533 32-bit chunks on 64 bit platforms. So, also grant an exception
1534 for this case. Note that the first clause of the conditional
1535 (VG_WORDSIZE == 8) is known at compile time, so the whole clause
1536 will get folded out in 32 bit builds. */
1537 # if defined(VGA_mips64) && defined(VGABI_N32)
1538 if (VG_WORDSIZE
== 4
1539 && VG_IS_4_ALIGNED(a
) && nBits
== 32 && n_addrs_bad
< 4)
1541 if (VG_WORDSIZE
== 8
1542 && VG_IS_4_ALIGNED(a
) && nBits
== 32 && n_addrs_bad
< 4)
1545 tl_assert(V_BIT_UNDEFINED
== 1 && V_BIT_DEFINED
== 0);
1546 /* (really need "UifU" here...)
1547 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1548 vbits64
|= pessim64
;
1549 /* Mark the upper 32 bits as undefined, just to be on the safe
1551 vbits64
|= (((ULong
)V_BITS32_UNDEFINED
) << 32);
1555 /* Exemption doesn't apply. Flag an addressing error in the normal
1557 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1564 __attribute__((noinline
))
1565 void mc_STOREVn_slow ( Addr a
, SizeT nBits
, ULong vbytes
, Bool bigendian
)
1567 SizeT szB
= nBits
/ 8;
1568 SizeT i
, n_addrs_bad
= 0;
1573 PROF_EVENT(MCPE_STOREVN_SLOW
);
1575 /* ------------ BEGIN semi-fast cases ------------ */
1576 /* These deal quickly-ish with the common auxiliary primary map
1577 cases on 64-bit platforms. Are merely a speedup hack; can be
1578 omitted without loss of correctness/functionality. Note that in
1579 both cases the "sizeof(void*) == 8" causes these cases to be
1580 folded out by compilers on 32-bit platforms. The logic below
1581 is somewhat similar to some cases extensively commented in
1582 MC_(helperc_STOREV8).
1584 # if defined(VGA_mips64) && defined(VGABI_N32)
1585 if (LIKELY(sizeof(void*) == 4 && nBits
== 64 && VG_IS_8_ALIGNED(a
)))
1587 if (LIKELY(sizeof(void*) == 8 && nBits
== 64 && VG_IS_8_ALIGNED(a
)))
1590 SecMap
* sm
= get_secmap_for_reading(a
);
1591 UWord sm_off16
= SM_OFF_16(a
);
1592 UWord vabits16
= sm
->vabits16
[sm_off16
];
1593 if (LIKELY( !is_distinguished_sm(sm
) &&
1594 (VA_BITS16_DEFINED
== vabits16
||
1595 VA_BITS16_UNDEFINED
== vabits16
) )) {
1596 /* Handle common case quickly: a is suitably aligned, */
1597 /* is mapped, and is addressible. */
1598 // Convert full V-bits in register to compact 2-bit form.
1599 if (LIKELY(V_BITS64_DEFINED
== vbytes
)) {
1600 sm
->vabits16
[sm_off16
] = VA_BITS16_DEFINED
;
1602 } else if (V_BITS64_UNDEFINED
== vbytes
) {
1603 sm
->vabits16
[sm_off16
] = VA_BITS16_UNDEFINED
;
1606 /* else fall into the slow case */
1608 /* else fall into the slow case */
1611 # if defined(VGA_mips64) && defined(VGABI_N32)
1612 if (LIKELY(sizeof(void*) == 4 && nBits
== 32 && VG_IS_4_ALIGNED(a
)))
1614 if (LIKELY(sizeof(void*) == 8 && nBits
== 32 && VG_IS_4_ALIGNED(a
)))
1617 SecMap
* sm
= get_secmap_for_reading(a
);
1618 UWord sm_off
= SM_OFF(a
);
1619 UWord vabits8
= sm
->vabits8
[sm_off
];
1620 if (LIKELY( !is_distinguished_sm(sm
) &&
1621 (VA_BITS8_DEFINED
== vabits8
||
1622 VA_BITS8_UNDEFINED
== vabits8
) )) {
1623 /* Handle common case quickly: a is suitably aligned, */
1624 /* is mapped, and is addressible. */
1625 // Convert full V-bits in register to compact 2-bit form.
1626 if (LIKELY(V_BITS32_DEFINED
== (vbytes
& 0xFFFFFFFF))) {
1627 sm
->vabits8
[sm_off
] = VA_BITS8_DEFINED
;
1629 } else if (V_BITS32_UNDEFINED
== (vbytes
& 0xFFFFFFFF)) {
1630 sm
->vabits8
[sm_off
] = VA_BITS8_UNDEFINED
;
1633 /* else fall into the slow case */
1635 /* else fall into the slow case */
1637 /* ------------ END semi-fast cases ------------ */
1639 tl_assert(nBits
== 64 || nBits
== 32 || nBits
== 16 || nBits
== 8);
1641 /* Dump vbytes in memory, iterating from least to most significant
1642 byte. At the same time establish addressibility of the location. */
1643 for (i
= 0; i
< szB
; i
++) {
1644 PROF_EVENT(MCPE_STOREVN_SLOW_LOOP
);
1645 ai
= a
+ byte_offset_w(szB
, bigendian
, i
);
1646 vbits8
= vbytes
& 0xff;
1647 ok
= set_vbits8(ai
, vbits8
);
1648 if (!ok
) n_addrs_bad
++;
1652 /* If an address error has happened, report it. */
1653 if (n_addrs_bad
> 0)
1654 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, True
);
1658 /*------------------------------------------------------------*/
1659 /*--- Setting permissions over address ranges. ---*/
1660 /*------------------------------------------------------------*/
1662 static void set_address_range_perms ( Addr a
, SizeT lenT
, UWord vabits16
,
1665 UWord sm_off
, sm_off16
;
1666 UWord vabits2
= vabits16
& 0x3;
1667 SizeT lenA
, lenB
, len_to_next_secmap
;
1671 SecMap
* example_dsm
;
1673 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS
);
1675 /* Check the V+A bits make sense. */
1676 tl_assert(VA_BITS16_NOACCESS
== vabits16
||
1677 VA_BITS16_UNDEFINED
== vabits16
||
1678 VA_BITS16_DEFINED
== vabits16
);
1680 // This code should never write PDBs; ensure this. (See comment above
1682 tl_assert(VA_BITS2_PARTDEFINED
!= vabits2
);
1687 if (lenT
> 256 * 1024 * 1024) {
1688 if (VG_(clo_verbosity
) > 0 && !VG_(clo_xml
)) {
1689 const HChar
* s
= "unknown???";
1690 if (vabits16
== VA_BITS16_NOACCESS
) s
= "noaccess";
1691 if (vabits16
== VA_BITS16_UNDEFINED
) s
= "undefined";
1692 if (vabits16
== VA_BITS16_DEFINED
) s
= "defined";
1693 VG_(message
)(Vg_UserMsg
, "Warning: set address range perms: "
1694 "large range [0x%lx, 0x%lx) (%s)\n",
1699 #ifndef PERF_FAST_SARP
1700 /*------------------ debug-only case ------------------ */
1702 // Endianness doesn't matter here because all bytes are being set to
1704 // Nb: We don't have to worry about updating the sec-V-bits table
1705 // after these set_vabits2() calls because this code never writes
1706 // VA_BITS2_PARTDEFINED values.
1708 for (i
= 0; i
< lenT
; i
++) {
1709 set_vabits2(a
+ i
, vabits2
);
1715 /*------------------ standard handling ------------------ */
1717 /* Get the distinguished secondary that we might want
1718 to use (part of the space-compression scheme). */
1719 example_dsm
= &sm_distinguished
[dsm_num
];
1721 // We have to handle ranges covering various combinations of partial and
1722 // whole sec-maps. Here is how parts 1, 2 and 3 are used in each case.
1723 // Cases marked with a '*' are common.
1727 // * one partial sec-map (p) 1
1728 // - one whole sec-map (P) 2
1730 // * two partial sec-maps (pp) 1,3
1731 // - one partial, one whole sec-map (pP) 1,2
1732 // - one whole, one partial sec-map (Pp) 2,3
1733 // - two whole sec-maps (PP) 2,2
1735 // * one partial, one whole, one partial (pPp) 1,2,3
1736 // - one partial, two whole (pPP) 1,2,2
1737 // - two whole, one partial (PPp) 2,2,3
1738 // - three whole (PPP) 2,2,2
1740 // * one partial, N-2 whole, one partial (pP...Pp) 1,2...2,3
1741 // - one partial, N-1 whole (pP...PP) 1,2...2,2
1742 // - N-1 whole, one partial (PP...Pp) 2,2...2,3
1743 // - N whole (PP...PP) 2,2...2,3
1745 // Break up total length (lenT) into two parts: length in the first
1746 // sec-map (lenA), and the rest (lenB); lenT == lenA + lenB.
1747 aNext
= start_of_this_sm(a
) + SM_SIZE
;
1748 len_to_next_secmap
= aNext
- a
;
1749 if ( lenT
<= len_to_next_secmap
) {
1750 // Range entirely within one sec-map. Covers almost all cases.
1751 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP
);
1754 } else if (is_start_of_sm(a
)) {
1755 // Range spans at least one whole sec-map, and starts at the beginning
1756 // of a sec-map; skip to Part 2.
1757 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP
);
1762 // Range spans two or more sec-maps, first one is partial.
1763 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS
);
1764 lenA
= len_to_next_secmap
;
1768 //------------------------------------------------------------------------
1769 // Part 1: Deal with the first sec_map. Most of the time the range will be
1770 // entirely within a sec_map and this part alone will suffice. Also,
1771 // doing it this way lets us avoid repeatedly testing for the crossing of
1772 // a sec-map boundary within these loops.
1773 //------------------------------------------------------------------------
1775 // If it's distinguished, make it undistinguished if necessary.
1776 sm_ptr
= get_secmap_ptr(a
);
1777 if (is_distinguished_sm(*sm_ptr
)) {
1778 if (*sm_ptr
== example_dsm
) {
1779 // Sec-map already has the V+A bits that we want, so skip.
1780 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK
);
1784 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1
);
1785 *sm_ptr
= copy_for_writing(*sm_ptr
);
1792 if (VG_IS_8_ALIGNED(a
)) break;
1793 if (lenA
< 1) break;
1794 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A
);
1796 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
1800 // 8-aligned, 8 byte steps
1802 if (lenA
< 8) break;
1803 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A
);
1804 sm_off16
= SM_OFF_16(a
);
1805 sm
->vabits16
[sm_off16
] = vabits16
;
1811 if (lenA
< 1) break;
1812 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B
);
1814 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
1819 // We've finished the first sec-map. Is that it?
1823 //------------------------------------------------------------------------
1824 // Part 2: Fast-set entire sec-maps at a time.
1825 //------------------------------------------------------------------------
1827 // 64KB-aligned, 64KB steps.
1828 // Nb: we can reach here with lenB < SM_SIZE
1829 tl_assert(0 == lenA
);
1831 if (lenB
< SM_SIZE
) break;
1832 tl_assert(is_start_of_sm(a
));
1833 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K
);
1834 sm_ptr
= get_secmap_ptr(a
);
1835 if (!is_distinguished_sm(*sm_ptr
)) {
1836 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM
);
1837 // Free the non-distinguished sec-map that we're replacing. This
1838 // case happens moderately often, enough to be worthwhile.
1839 SysRes sres
= VG_(am_munmap_valgrind
)((Addr
)*sm_ptr
, sizeof(SecMap
));
1840 tl_assert2(! sr_isError(sres
), "SecMap valgrind munmap failure\n");
1842 update_SM_counts(*sm_ptr
, example_dsm
);
1843 // Make the sec-map entry point to the example DSM
1844 *sm_ptr
= example_dsm
;
1849 // We've finished the whole sec-maps. Is that it?
1853 //------------------------------------------------------------------------
1854 // Part 3: Finish off the final partial sec-map, if necessary.
1855 //------------------------------------------------------------------------
1857 tl_assert(is_start_of_sm(a
) && lenB
< SM_SIZE
);
1859 // If it's distinguished, make it undistinguished if necessary.
1860 sm_ptr
= get_secmap_ptr(a
);
1861 if (is_distinguished_sm(*sm_ptr
)) {
1862 if (*sm_ptr
== example_dsm
) {
1863 // Sec-map already has the V+A bits that we want, so stop.
1864 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK
);
1867 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2
);
1868 *sm_ptr
= copy_for_writing(*sm_ptr
);
1873 // 8-aligned, 8 byte steps
1875 if (lenB
< 8) break;
1876 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B
);
1877 sm_off16
= SM_OFF_16(a
);
1878 sm
->vabits16
[sm_off16
] = vabits16
;
1884 if (lenB
< 1) return;
1885 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C
);
1887 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
1894 /* --- Set permissions for arbitrary address ranges --- */
1896 void MC_(make_mem_noaccess
) ( Addr a
, SizeT len
)
1898 PROF_EVENT(MCPE_MAKE_MEM_NOACCESS
);
1899 DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a
, len
);
1900 set_address_range_perms ( a
, len
, VA_BITS16_NOACCESS
, SM_DIST_NOACCESS
);
1901 if (UNLIKELY( MC_(clo_mc_level
) == 3 ))
1902 ocache_sarp_Clear_Origins ( a
, len
);
1905 static void make_mem_undefined ( Addr a
, SizeT len
)
1907 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED
);
1908 DEBUG("make_mem_undefined(%p, %lu)\n", a
, len
);
1909 set_address_range_perms ( a
, len
, VA_BITS16_UNDEFINED
, SM_DIST_UNDEFINED
);
1912 void MC_(make_mem_undefined_w_otag
) ( Addr a
, SizeT len
, UInt otag
)
1914 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED_W_OTAG
);
1915 DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a
, len
);
1916 set_address_range_perms ( a
, len
, VA_BITS16_UNDEFINED
, SM_DIST_UNDEFINED
);
1917 if (UNLIKELY( MC_(clo_mc_level
) == 3 ))
1918 ocache_sarp_Set_Origins ( a
, len
, otag
);
1922 void make_mem_undefined_w_tid_and_okind ( Addr a
, SizeT len
,
1923 ThreadId tid
, UInt okind
)
1927 /* VG_(record_ExeContext) checks for validity of tid, and asserts
1928 if it is invalid. So no need to do it here. */
1929 tl_assert(okind
<= 3);
1930 here
= VG_(record_ExeContext
)( tid
, 0/*first_ip_delta*/ );
1932 ecu
= VG_(get_ECU_from_ExeContext
)(here
);
1933 tl_assert(VG_(is_plausible_ECU
)(ecu
));
1934 MC_(make_mem_undefined_w_otag
) ( a
, len
, ecu
| okind
);
1938 void mc_new_mem_w_tid_make_ECU ( Addr a
, SizeT len
, ThreadId tid
)
1940 make_mem_undefined_w_tid_and_okind ( a
, len
, tid
, MC_OKIND_UNKNOWN
);
1944 void mc_new_mem_w_tid_no_ECU ( Addr a
, SizeT len
, ThreadId tid
)
1946 MC_(make_mem_undefined_w_otag
) ( a
, len
, MC_OKIND_UNKNOWN
);
1949 void MC_(make_mem_defined
) ( Addr a
, SizeT len
)
1951 PROF_EVENT(MCPE_MAKE_MEM_DEFINED
);
1952 DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a
, len
);
1953 set_address_range_perms ( a
, len
, VA_BITS16_DEFINED
, SM_DIST_DEFINED
);
1954 if (UNLIKELY( MC_(clo_mc_level
) == 3 ))
1955 ocache_sarp_Clear_Origins ( a
, len
);
1958 __attribute__((unused
))
1959 static void make_mem_defined_w_tid ( Addr a
, SizeT len
, ThreadId tid
)
1961 MC_(make_mem_defined
)(a
, len
);
1964 /* For each byte in [a,a+len), if the byte is addressable, make it be
1965 defined, but if it isn't addressible, leave it alone. In other
1966 words a version of MC_(make_mem_defined) that doesn't mess with
1967 addressibility. Low-performance implementation. */
1968 static void make_mem_defined_if_addressable ( Addr a
, SizeT len
)
1972 DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a
, (ULong
)len
);
1973 for (i
= 0; i
< len
; i
++) {
1974 vabits2
= get_vabits2( a
+i
);
1975 if (LIKELY(VA_BITS2_NOACCESS
!= vabits2
)) {
1976 set_vabits2(a
+i
, VA_BITS2_DEFINED
);
1977 if (UNLIKELY(MC_(clo_mc_level
) >= 3)) {
1978 MC_(helperc_b_store1
)( a
+i
, 0 ); /* clear the origin tag */
1984 /* Similarly (needed for mprotect handling ..) */
1985 static void make_mem_defined_if_noaccess ( Addr a
, SizeT len
)
1989 DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a
, (ULong
)len
);
1990 for (i
= 0; i
< len
; i
++) {
1991 vabits2
= get_vabits2( a
+i
);
1992 if (LIKELY(VA_BITS2_NOACCESS
== vabits2
)) {
1993 set_vabits2(a
+i
, VA_BITS2_DEFINED
);
1994 if (UNLIKELY(MC_(clo_mc_level
) >= 3)) {
1995 MC_(helperc_b_store1
)( a
+i
, 0 ); /* clear the origin tag */
2001 /* --- Block-copy permissions (needed for implementing realloc() and
2004 void MC_(copy_address_range_state
) ( Addr src
, Addr dst
, SizeT len
)
2007 UChar vabits2
, vabits8
;
2008 Bool aligned
, nooverlap
;
2010 DEBUG("MC_(copy_address_range_state)\n");
2011 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE
);
2013 if (len
== 0 || src
== dst
)
2016 aligned
= VG_IS_4_ALIGNED(src
) && VG_IS_4_ALIGNED(dst
);
2017 nooverlap
= src
+len
<= dst
|| dst
+len
<= src
;
2019 if (nooverlap
&& aligned
) {
2021 /* Vectorised fast case, when no overlap and suitably aligned */
2025 vabits8
= get_vabits8_for_aligned_word32( src
+i
);
2026 set_vabits8_for_aligned_word32( dst
+i
, vabits8
);
2027 if (LIKELY(VA_BITS8_DEFINED
== vabits8
2028 || VA_BITS8_UNDEFINED
== vabits8
2029 || VA_BITS8_NOACCESS
== vabits8
)) {
2032 /* have to copy secondary map info */
2033 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+0 ))
2034 set_sec_vbits8( dst
+i
+0, get_sec_vbits8( src
+i
+0 ) );
2035 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+1 ))
2036 set_sec_vbits8( dst
+i
+1, get_sec_vbits8( src
+i
+1 ) );
2037 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+2 ))
2038 set_sec_vbits8( dst
+i
+2, get_sec_vbits8( src
+i
+2 ) );
2039 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+3 ))
2040 set_sec_vbits8( dst
+i
+3, get_sec_vbits8( src
+i
+3 ) );
2047 vabits2
= get_vabits2( src
+i
);
2048 set_vabits2( dst
+i
, vabits2
);
2049 if (VA_BITS2_PARTDEFINED
== vabits2
) {
2050 set_sec_vbits8( dst
+i
, get_sec_vbits8( src
+i
) );
2058 /* We have to do things the slow way */
2060 for (i
= 0, j
= len
-1; i
< len
; i
++, j
--) {
2061 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1
);
2062 vabits2
= get_vabits2( src
+j
);
2063 set_vabits2( dst
+j
, vabits2
);
2064 if (VA_BITS2_PARTDEFINED
== vabits2
) {
2065 set_sec_vbits8( dst
+j
, get_sec_vbits8( src
+j
) );
2071 for (i
= 0; i
< len
; i
++) {
2072 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2
);
2073 vabits2
= get_vabits2( src
+i
);
2074 set_vabits2( dst
+i
, vabits2
);
2075 if (VA_BITS2_PARTDEFINED
== vabits2
) {
2076 set_sec_vbits8( dst
+i
, get_sec_vbits8( src
+i
) );
2085 /*------------------------------------------------------------*/
2086 /*--- Origin tracking stuff - cache basics ---*/
2087 /*------------------------------------------------------------*/
2089 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
2090 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2092 Note that this implementation draws inspiration from the "origin
2093 tracking by value piggybacking" scheme described in "Tracking Bad
2094 Apples: Reporting the Origin of Null and Undefined Value Errors"
2095 (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
2096 Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
2097 implemented completely differently.
2099 Origin tags and ECUs -- about the shadow values
2100 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2102 This implementation tracks the defining point of all uninitialised
2103 values using so called "origin tags", which are 32-bit integers,
2104 rather than using the values themselves to encode the origins. The
2105 latter, so-called value piggybacking", is what the OOPSLA07 paper
2108 Origin tags, as tracked by the machinery below, are 32-bit unsigned
2109 ints (UInts), regardless of the machine's word size. Each tag
2110 comprises an upper 30-bit ECU field and a lower 2-bit
2111 'kind' field. The ECU field is a number given out by m_execontext
2112 and has a 1-1 mapping with ExeContext*s. An ECU can be used
2113 directly as an origin tag (otag), but in fact we want to put
2114 additional information 'kind' field to indicate roughly where the
2115 tag came from. This helps print more understandable error messages
2116 for the user -- it has no other purpose. In summary:
2118 * Both ECUs and origin tags are represented as 32-bit words
2120 * m_execontext and the core-tool interface deal purely in ECUs.
2121 They have no knowledge of origin tags - that is a purely
2122 Memcheck-internal matter.
2124 * all valid ECUs have the lowest 2 bits zero and at least
2125 one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
2127 * to convert from an ECU to an otag, OR in one of the MC_OKIND_
2128 constants defined in mc_include.h.
2130 * to convert an otag back to an ECU, AND it with ~3
2132 One important fact is that no valid otag is zero. A zero otag is
2133 used by the implementation to indicate "no origin", which could
2134 mean that either the value is defined, or it is undefined but the
2135 implementation somehow managed to lose the origin.
2137 The ECU used for memory created by malloc etc is derived from the
2138 stack trace at the time the malloc etc happens. This means the
2139 mechanism can show the exact allocation point for heap-created
2140 uninitialised values.
2142 In contrast, it is simply too expensive to create a complete
2143 backtrace for each stack allocation. Therefore we merely use a
2144 depth-1 backtrace for stack allocations, which can be done once at
2145 translation time, rather than N times at run time. The result of
2146 this is that, for stack created uninitialised values, Memcheck can
2147 only show the allocating function, and not what called it.
2148 Furthermore, compilers tend to move the stack pointer just once at
2149 the start of the function, to allocate all locals, and so in fact
2150 the stack origin almost always simply points to the opening brace
2151 of the function. Net result is, for stack origins, the mechanism
2152 can tell you in which function the undefined value was created, but
2153 that's all. Users will need to carefully check all locals in the
2156 Shadowing registers and memory
2157 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2159 Memory is shadowed using a two level cache structure (ocacheL1 and
2160 ocacheL2). Memory references are first directed to ocacheL1. This
2161 is a traditional 2-way set associative cache with 32-byte lines and
2162 approximate LRU replacement within each set.
2164 A naive implementation would require storing one 32 bit otag for
2165 each byte of memory covered, a 4:1 space overhead. Instead, there
2166 is one otag for every 4 bytes of memory covered, plus a 4-bit mask
2167 that shows which of the 4 bytes have that shadow value and which
2168 have a shadow value of zero (indicating no origin). Hence a lot of
2169 space is saved, but the cost is that only one different origin per
2170 4 bytes of address space can be represented. This is a source of
2171 imprecision, but how much of a problem it really is remains to be
2174 A cache line that contains all zeroes ("no origins") contains no
2175 useful information, and can be ejected from the L1 cache "for
2176 free", in the sense that a read miss on the L1 causes a line of
2177 zeroes to be installed. However, ejecting a line containing
2178 nonzeroes risks losing origin information permanently. In order to
2179 prevent such lossage, ejected nonzero lines are placed in a
2180 secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
2181 lines. This can grow arbitrarily large, and so should ensure that
2182 Memcheck runs out of memory in preference to losing useful origin
2183 info due to cache size limitations.
2185 Shadowing registers is a bit tricky, because the shadow values are
2186 32 bits, regardless of the size of the register. That gives a
2187 problem for registers smaller than 32 bits. The solution is to
2188 find spaces in the guest state that are unused, and use those to
2189 shadow guest state fragments smaller than 32 bits. For example, on
2190 ppc32/64, each vector register is 16 bytes long. If 4 bytes of the
2191 shadow are allocated for the register's otag, then there are still
2192 12 bytes left over which could be used to shadow 3 other values.
2194 This implies there is some non-obvious mapping from guest state
2195 (start,length) pairs to the relevant shadow offset (for the origin
2196 tags). And it is unfortunately guest-architecture specific. The
2197 mapping is contained in mc_machine.c, which is quite lengthy but
2200 Instrumenting the IR
2201 ~~~~~~~~~~~~~~~~~~~~
2203 Instrumentation is largely straightforward, and done by the
2204 functions schemeE and schemeS in mc_translate.c. These generate
2205 code for handling the origin tags of expressions (E) and statements
2206 (S) respectively. The rather strange names are a reference to the
2207 "compilation schemes" shown in Simon Peyton Jones' book "The
2208 Implementation of Functional Programming Languages" (Prentice Hall,
2210 http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
2212 schemeS merely arranges to move shadow values around the guest
2213 state to track the incoming IR. schemeE is largely trivial too.
2214 The only significant point is how to compute the otag corresponding
2215 to binary (or ternary, quaternary, etc) operator applications. The
2216 rule is simple: just take whichever value is larger (32-bit
2217 unsigned max). Constants get the special value zero. Hence this
2218 rule always propagates a nonzero (known) otag in preference to a
2219 zero (unknown, or more likely, value-is-defined) tag, as we want.
2220 If two different undefined values are inputs to a binary operator
2221 application, then which is propagated is arbitrary, but that
2222 doesn't matter, since the program is erroneous in using either of
2223 the values, and so there's no point in attempting to propagate
2226 Since constants are abstracted to (otag) zero, much of the
2227 instrumentation code can be folded out without difficulty by the
2228 generic post-instrumentation IR cleanup pass, using these rules:
2229 Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
2230 constants is evaluated at JIT time. And the resulting dead code
2231 removal. In practice this causes surprisingly few Max32Us to
2232 survive through to backend code generation.
2234 Integration with the V-bits machinery
2235 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2237 This is again largely straightforward. Mostly the otag and V bits
2238 stuff are independent. The only point of interaction is when the V
2239 bits instrumenter creates a call to a helper function to report an
2240 uninitialised value error -- in that case it must first use schemeE
2241 to get hold of the origin tag expression for the value, and pass
2242 that to the helper too.
2244 There is the usual stuff to do with setting address range
2245 permissions. When memory is painted undefined, we must also know
2246 the origin tag to paint with, which involves some tedious plumbing,
2247 particularly to do with the fast case stack handlers. When memory
2248 is painted defined or noaccess then the origin tags must be forced
2251 One of the goals of the implementation was to ensure that the
2252 non-origin tracking mode isn't slowed down at all. To do this,
2253 various functions to do with memory permissions setting (again,
2254 mostly pertaining to the stack) are duplicated for the with- and
2257 Dealing with stack redzones, and the NIA cache
2258 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2260 This is one of the few non-obvious parts of the implementation.
2262 Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
2263 reserved area below the stack pointer, that can be used as scratch
2264 space by compiler generated code for functions. In the Memcheck
2265 sources this is referred to as the "stack redzone". The important
2266 thing here is that such redzones are considered volatile across
2267 function calls and returns. So Memcheck takes care to mark them as
2268 undefined for each call and return, on the afflicted platforms.
2269 Past experience shows this is essential in order to get reliable
2270 messages about uninitialised values that come from the stack.
2272 So the question is, when we paint a redzone undefined, what origin
2273 tag should we use for it? Consider a function f() calling g(). If
2274 we paint the redzone using an otag derived from the ExeContext of
2275 the CALL/BL instruction in f, then any errors in g causing it to
2276 use uninitialised values that happen to lie in the redzone, will be
2277 reported as having their origin in f. Which is highly confusing.
2279 The same applies for returns: if, on a return, we paint the redzone
2280 using a origin tag derived from the ExeContext of the RET/BLR
2281 instruction in g, then any later errors in f causing it to use
2282 uninitialised values in the redzone, will be reported as having
2283 their origin in g. Which is just as confusing.
2285 To do it right, in both cases we need to use an origin tag which
2286 pertains to the instruction which dynamically follows the CALL/BL
2287 or RET/BLR. In short, one derived from the NIA - the "next
2288 instruction address".
2290 To make this work, Memcheck's redzone-painting helper,
2291 MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
2292 NIA. It converts the NIA to a 1-element ExeContext, and uses that
2293 ExeContext's ECU as the basis for the otag used to paint the
2294 redzone. The expensive part of this is converting an NIA into an
2295 ECU, since this happens once for every call and every return. So
2296 we use a simple 511-line, 2-way set associative cache
2297 (nia_to_ecu_cache) to cache the mappings, and that knocks most of
2300 Further background comments
2301 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2303 > Question: why is otag a UInt? Wouldn't a UWord be better? Isn't
2304 > it really just the address of the relevant ExeContext?
2306 Well, it's not the address, but a value which has a 1-1 mapping
2307 with ExeContexts, and is guaranteed not to be zero, since zero
2308 denotes (to memcheck) "unknown origin or defined value". So these
2309 UInts are just numbers starting at 4 and incrementing by 4; each
2310 ExeContext is given a number when it is created. (*** NOTE this
2311 confuses otags and ECUs; see comments above ***).
2313 Making these otags 32-bit regardless of the machine's word size
2314 makes the 64-bit implementation easier (next para). And it doesn't
2315 really limit us in any way, since for the tags to overflow would
2316 require that the program somehow caused 2^30-1 different
2317 ExeContexts to be created, in which case it is probably in deep
2318 trouble. Not to mention V will have soaked up many tens of
2319 gigabytes of memory merely to store them all.
2321 So having 64-bit origins doesn't really buy you anything, and has
2322 the following downsides:
2324 Suppose that instead, an otag is a UWord. This would mean that, on
2327 1. It becomes hard to shadow any element of guest state which is
2328 smaller than 8 bytes. To do so means you'd need to find some
2329 8-byte-sized hole in the guest state which you don't want to
2330 shadow, and use that instead to hold the otag. On ppc64, the
2331 condition code register(s) are split into 20 UChar sized pieces,
2332 all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
2333 and so that would entail finding 160 bytes somewhere else in the
2336 Even on x86, I want to track origins for %AH .. %DH (bits 15:8
2337 of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
2338 same) and so I had to look for 4 untracked otag-sized areas in
2339 the guest state to make that possible.
2341 The same problem exists of course when origin tags are only 32
2342 bits, but it's less extreme.
2344 2. (More compelling) it doubles the size of the origin shadow
2345 memory. Given that the shadow memory is organised as a fixed
2346 size cache, and that accuracy of tracking is limited by origins
2347 falling out the cache due to space conflicts, this isn't good.
2349 > Another question: is the origin tracking perfect, or are there
2350 > cases where it fails to determine an origin?
2352 It is imperfect for at least for the following reasons, and
2355 * Insufficient capacity in the origin cache. When a line is
2356 evicted from the cache it is gone forever, and so subsequent
2357 queries for the line produce zero, indicating no origin
2358 information. Interestingly, a line containing all zeroes can be
2359 evicted "free" from the cache, since it contains no useful
2360 information, so there is scope perhaps for some cleverer cache
2361 management schemes. (*** NOTE, with the introduction of the
2362 second level origin tag cache, ocacheL2, this is no longer a
2365 * The origin cache only stores one otag per 32-bits of address
2366 space, plus 4 bits indicating which of the 4 bytes has that tag
2367 and which are considered defined. The result is that if two
2368 undefined bytes in the same word are stored in memory, the first
2369 stored byte's origin will be lost and replaced by the origin for
2372 * Nonzero origin tags for defined values. Consider a binary
2373 operator application op(x,y). Suppose y is undefined (and so has
2374 a valid nonzero origin tag), and x is defined, but erroneously
2375 has a nonzero origin tag (defined values should have tag zero).
2376 If the erroneous tag has a numeric value greater than y's tag,
2377 then the rule for propagating origin tags though binary
2378 operations, which is simply to take the unsigned max of the two
2379 tags, will erroneously propagate x's tag rather than y's.
2381 * Some obscure uses of x86/amd64 byte registers can cause lossage
2382 or confusion of origins. %AH .. %DH are treated as different
2383 from, and unrelated to, their parent registers, %EAX .. %EDX.
2384 So some weird sequences like
2386 movb undefined-value, %AH
2387 movb defined-value, %AL
2388 .. use %AX or %EAX ..
2390 will cause the origin attributed to %AH to be ignored, since %AL,
2391 %AX, %EAX are treated as the same register, and %AH as a
2392 completely separate one.
2394 But having said all that, it actually seems to work fairly well in
2398 static UWord stats_ocacheL1_find
= 0;
2399 static UWord stats_ocacheL1_found_at_1
= 0;
2400 static UWord stats_ocacheL1_found_at_N
= 0;
2401 static UWord stats_ocacheL1_misses
= 0;
2402 static UWord stats_ocacheL1_lossage
= 0;
2403 static UWord stats_ocacheL1_movefwds
= 0;
2405 static UWord stats__ocacheL2_refs
= 0;
2406 static UWord stats__ocacheL2_misses
= 0;
2407 static UWord stats__ocacheL2_n_nodes_max
= 0;
2409 /* Cache of 32-bit values, one every 32 bits of address space */
2411 #define OC_BITS_PER_LINE 5
2412 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2414 static INLINE UWord
oc_line_offset ( Addr a
) {
2415 return (a
>> 2) & (OC_W32S_PER_LINE
- 1);
2417 static INLINE Bool
is_valid_oc_tag ( Addr tag
) {
2418 return 0 == (tag
& ((1 << OC_BITS_PER_LINE
) - 1));
2421 #define OC_LINES_PER_SET 2
2423 #define OC_N_SET_BITS 20
2424 #define OC_N_SETS (1 << OC_N_SET_BITS)
2426 /* These settings give:
2427 64 bit host: ocache: 100,663,296 sizeB 67,108,864 useful
2428 32 bit host: ocache: 92,274,688 sizeB 67,108,864 useful
2431 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2437 UInt w32
[OC_W32S_PER_LINE
];
2438 UChar descr
[OC_W32S_PER_LINE
];
2442 /* Classify and also sanity-check 'line'. Return 'e' (empty) if not
2443 in use, 'n' (nonzero) if it contains at least one valid origin tag,
2444 and 'z' if all the represented tags are zero. */
2445 static UChar
classify_OCacheLine ( OCacheLine
* line
)
2448 if (line
->tag
== 1/*invalid*/)
2449 return 'e'; /* EMPTY */
2450 tl_assert(is_valid_oc_tag(line
->tag
));
2451 for (i
= 0; i
< OC_W32S_PER_LINE
; i
++) {
2452 tl_assert(0 == ((~0xF) & line
->descr
[i
]));
2453 if (line
->w32
[i
] > 0 && line
->descr
[i
] > 0)
2454 return 'n'; /* NONZERO - contains useful info */
2456 return 'z'; /* ZERO - no useful info */
2461 OCacheLine line
[OC_LINES_PER_SET
];
2467 OCacheSet set
[OC_N_SETS
];
2471 static OCache
* ocacheL1
= NULL
;
2472 static UWord ocacheL1_event_ctr
= 0;
2474 static void init_ocacheL2 ( void ); /* fwds */
2475 static void init_OCache ( void )
2478 tl_assert(MC_(clo_mc_level
) >= 3);
2479 tl_assert(ocacheL1
== NULL
);
2480 ocacheL1
= VG_(am_shadow_alloc
)(sizeof(OCache
));
2481 if (ocacheL1
== NULL
) {
2482 VG_(out_of_memory_NORETURN
)( "memcheck:allocating ocacheL1",
2485 tl_assert(ocacheL1
!= NULL
);
2486 for (set
= 0; set
< OC_N_SETS
; set
++) {
2487 for (line
= 0; line
< OC_LINES_PER_SET
; line
++) {
2488 ocacheL1
->set
[set
].line
[line
].tag
= 1/*invalid*/;
2494 static void moveLineForwards ( OCacheSet
* set
, UWord lineno
)
2497 stats_ocacheL1_movefwds
++;
2498 tl_assert(lineno
> 0 && lineno
< OC_LINES_PER_SET
);
2499 tmp
= set
->line
[lineno
-1];
2500 set
->line
[lineno
-1] = set
->line
[lineno
];
2501 set
->line
[lineno
] = tmp
;
2504 static void zeroise_OCacheLine ( OCacheLine
* line
, Addr tag
) {
2506 for (i
= 0; i
< OC_W32S_PER_LINE
; i
++) {
2507 line
->w32
[i
] = 0; /* NO ORIGIN */
2508 line
->descr
[i
] = 0; /* REALLY REALLY NO ORIGIN! */
2513 //////////////////////////////////////////////////////////////
2514 //// OCache backing store
2516 static OSet
* ocacheL2
= NULL
;
2518 static void* ocacheL2_malloc ( const HChar
* cc
, SizeT szB
) {
2519 return VG_(malloc
)(cc
, szB
);
2521 static void ocacheL2_free ( void* v
) {
2525 /* Stats: # nodes currently in tree */
2526 static UWord stats__ocacheL2_n_nodes
= 0;
2528 static void init_ocacheL2 ( void )
2530 tl_assert(!ocacheL2
);
2531 tl_assert(sizeof(Word
) == sizeof(Addr
)); /* since OCacheLine.tag :: Addr */
2532 tl_assert(0 == offsetof(OCacheLine
,tag
));
2534 = VG_(OSetGen_Create
)( offsetof(OCacheLine
,tag
),
2535 NULL
, /* fast cmp */
2536 ocacheL2_malloc
, "mc.ioL2", ocacheL2_free
);
2537 stats__ocacheL2_n_nodes
= 0;
2540 /* Find line with the given tag in the tree, or NULL if not found. */
2541 static OCacheLine
* ocacheL2_find_tag ( Addr tag
)
2544 tl_assert(is_valid_oc_tag(tag
));
2545 stats__ocacheL2_refs
++;
2546 line
= VG_(OSetGen_Lookup
)( ocacheL2
, &tag
);
2550 /* Delete the line with the given tag from the tree, if it is present, and
2551 free up the associated memory. */
2552 static void ocacheL2_del_tag ( Addr tag
)
2555 tl_assert(is_valid_oc_tag(tag
));
2556 stats__ocacheL2_refs
++;
2557 line
= VG_(OSetGen_Remove
)( ocacheL2
, &tag
);
2559 VG_(OSetGen_FreeNode
)(ocacheL2
, line
);
2560 tl_assert(stats__ocacheL2_n_nodes
> 0);
2561 stats__ocacheL2_n_nodes
--;
2565 /* Add a copy of the given line to the tree. It must not already be
2567 static void ocacheL2_add_line ( OCacheLine
* line
)
2570 tl_assert(is_valid_oc_tag(line
->tag
));
2571 copy
= VG_(OSetGen_AllocNode
)( ocacheL2
, sizeof(OCacheLine
) );
2573 stats__ocacheL2_refs
++;
2574 VG_(OSetGen_Insert
)( ocacheL2
, copy
);
2575 stats__ocacheL2_n_nodes
++;
2576 if (stats__ocacheL2_n_nodes
> stats__ocacheL2_n_nodes_max
)
2577 stats__ocacheL2_n_nodes_max
= stats__ocacheL2_n_nodes
;
2581 //////////////////////////////////////////////////////////////
2583 __attribute__((noinline
))
2584 static OCacheLine
* find_OCacheLine_SLOW ( Addr a
)
2586 OCacheLine
*victim
, *inL2
;
2589 UWord setno
= (a
>> OC_BITS_PER_LINE
) & (OC_N_SETS
- 1);
2590 UWord tagmask
= ~((1 << OC_BITS_PER_LINE
) - 1);
2591 UWord tag
= a
& tagmask
;
2592 tl_assert(setno
>= 0 && setno
< OC_N_SETS
);
2594 /* we already tried line == 0; skip therefore. */
2595 for (line
= 1; line
< OC_LINES_PER_SET
; line
++) {
2596 if (ocacheL1
->set
[setno
].line
[line
].tag
== tag
) {
2598 stats_ocacheL1_found_at_1
++;
2600 stats_ocacheL1_found_at_N
++;
2602 if (UNLIKELY(0 == (ocacheL1_event_ctr
++
2603 & ((1<<OC_MOVE_FORWARDS_EVERY_BITS
)-1)))) {
2604 moveLineForwards( &ocacheL1
->set
[setno
], line
);
2607 return &ocacheL1
->set
[setno
].line
[line
];
2611 /* A miss. Use the last slot. Implicitly this means we're
2612 ejecting the line in the last slot. */
2613 stats_ocacheL1_misses
++;
2614 tl_assert(line
== OC_LINES_PER_SET
);
2616 tl_assert(line
> 0);
2618 /* First, move the to-be-ejected line to the L2 cache. */
2619 victim
= &ocacheL1
->set
[setno
].line
[line
];
2620 c
= classify_OCacheLine(victim
);
2623 /* the line is empty (has invalid tag); ignore it. */
2626 /* line contains zeroes. We must ensure the backing store is
2627 updated accordingly, either by copying the line there
2628 verbatim, or by ensuring it isn't present there. We
2629 chosse the latter on the basis that it reduces the size of
2630 the backing store. */
2631 ocacheL2_del_tag( victim
->tag
);
2634 /* line contains at least one real, useful origin. Copy it
2635 to the backing store. */
2636 stats_ocacheL1_lossage
++;
2637 inL2
= ocacheL2_find_tag( victim
->tag
);
2641 ocacheL2_add_line( victim
);
2648 /* Now we must reload the L1 cache from the backing tree, if
2650 tl_assert(tag
!= victim
->tag
); /* stay sane */
2651 inL2
= ocacheL2_find_tag( tag
);
2653 /* We're in luck. It's in the L2. */
2654 ocacheL1
->set
[setno
].line
[line
] = *inL2
;
2656 /* Missed at both levels of the cache hierarchy. We have to
2657 declare it as full of zeroes (unknown origins). */
2658 stats__ocacheL2_misses
++;
2659 zeroise_OCacheLine( &ocacheL1
->set
[setno
].line
[line
], tag
);
2662 /* Move it one forwards */
2663 moveLineForwards( &ocacheL1
->set
[setno
], line
);
2666 return &ocacheL1
->set
[setno
].line
[line
];
2669 static INLINE OCacheLine
* find_OCacheLine ( Addr a
)
2671 UWord setno
= (a
>> OC_BITS_PER_LINE
) & (OC_N_SETS
- 1);
2672 UWord tagmask
= ~((1 << OC_BITS_PER_LINE
) - 1);
2673 UWord tag
= a
& tagmask
;
2675 stats_ocacheL1_find
++;
2677 if (OC_ENABLE_ASSERTIONS
) {
2678 tl_assert(setno
>= 0 && setno
< OC_N_SETS
);
2679 tl_assert(0 == (tag
& (4 * OC_W32S_PER_LINE
- 1)));
2682 if (LIKELY(ocacheL1
->set
[setno
].line
[0].tag
== tag
)) {
2683 return &ocacheL1
->set
[setno
].line
[0];
2686 return find_OCacheLine_SLOW( a
);
2689 static INLINE
void set_aligned_word64_Origin_to_undef ( Addr a
, UInt otag
)
2691 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2692 //// Set the origins for a+0 .. a+7
2694 UWord lineoff
= oc_line_offset(a
);
2695 if (OC_ENABLE_ASSERTIONS
) {
2696 tl_assert(lineoff
>= 0
2697 && lineoff
< OC_W32S_PER_LINE
-1/*'cos 8-aligned*/);
2699 line
= find_OCacheLine( a
);
2700 line
->descr
[lineoff
+0] = 0xF;
2701 line
->descr
[lineoff
+1] = 0xF;
2702 line
->w32
[lineoff
+0] = otag
;
2703 line
->w32
[lineoff
+1] = otag
;
2705 //// END inlined, specialised version of MC_(helperc_b_store8)
2709 /*------------------------------------------------------------*/
2710 /*--- Aligned fast case permission setters, ---*/
2711 /*--- for dealing with stacks ---*/
2712 /*------------------------------------------------------------*/
2714 /*--------------------- 32-bit ---------------------*/
2716 /* Nb: by "aligned" here we mean 4-byte aligned */
2718 static INLINE
void make_aligned_word32_undefined ( Addr a
)
2720 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED
);
2722 #ifndef PERF_FAST_STACK2
2723 make_mem_undefined(a
, 4);
2729 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2730 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW
);
2731 make_mem_undefined(a
, 4);
2735 sm
= get_secmap_for_writing_low(a
);
2737 sm
->vabits8
[sm_off
] = VA_BITS8_UNDEFINED
;
2743 void make_aligned_word32_undefined_w_otag ( Addr a
, UInt otag
)
2745 make_aligned_word32_undefined(a
);
2746 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2747 //// Set the origins for a+0 .. a+3
2749 UWord lineoff
= oc_line_offset(a
);
2750 if (OC_ENABLE_ASSERTIONS
) {
2751 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
2753 line
= find_OCacheLine( a
);
2754 line
->descr
[lineoff
] = 0xF;
2755 line
->w32
[lineoff
] = otag
;
2757 //// END inlined, specialised version of MC_(helperc_b_store4)
2761 void make_aligned_word32_noaccess ( Addr a
)
2763 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS
);
2765 #ifndef PERF_FAST_STACK2
2766 MC_(make_mem_noaccess
)(a
, 4);
2772 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2773 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW
);
2774 MC_(make_mem_noaccess
)(a
, 4);
2778 sm
= get_secmap_for_writing_low(a
);
2780 sm
->vabits8
[sm_off
] = VA_BITS8_NOACCESS
;
2782 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2783 //// Set the origins for a+0 .. a+3.
2784 if (UNLIKELY( MC_(clo_mc_level
) == 3 )) {
2786 UWord lineoff
= oc_line_offset(a
);
2787 if (OC_ENABLE_ASSERTIONS
) {
2788 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
2790 line
= find_OCacheLine( a
);
2791 line
->descr
[lineoff
] = 0;
2793 //// END inlined, specialised version of MC_(helperc_b_store4)
2798 /*--------------------- 64-bit ---------------------*/
2800 /* Nb: by "aligned" here we mean 8-byte aligned */
2802 static INLINE
void make_aligned_word64_undefined ( Addr a
)
2804 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED
);
2806 #ifndef PERF_FAST_STACK2
2807 make_mem_undefined(a
, 8);
2813 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2814 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW
);
2815 make_mem_undefined(a
, 8);
2819 sm
= get_secmap_for_writing_low(a
);
2820 sm_off16
= SM_OFF_16(a
);
2821 sm
->vabits16
[sm_off16
] = VA_BITS16_UNDEFINED
;
2827 void make_aligned_word64_undefined_w_otag ( Addr a
, UInt otag
)
2829 make_aligned_word64_undefined(a
);
2830 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2831 //// Set the origins for a+0 .. a+7
2833 UWord lineoff
= oc_line_offset(a
);
2834 tl_assert(lineoff
>= 0
2835 && lineoff
< OC_W32S_PER_LINE
-1/*'cos 8-aligned*/);
2836 line
= find_OCacheLine( a
);
2837 line
->descr
[lineoff
+0] = 0xF;
2838 line
->descr
[lineoff
+1] = 0xF;
2839 line
->w32
[lineoff
+0] = otag
;
2840 line
->w32
[lineoff
+1] = otag
;
2842 //// END inlined, specialised version of MC_(helperc_b_store8)
2846 void make_aligned_word64_noaccess ( Addr a
)
2848 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS
);
2850 #ifndef PERF_FAST_STACK2
2851 MC_(make_mem_noaccess
)(a
, 8);
2857 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2858 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW
);
2859 MC_(make_mem_noaccess
)(a
, 8);
2863 sm
= get_secmap_for_writing_low(a
);
2864 sm_off16
= SM_OFF_16(a
);
2865 sm
->vabits16
[sm_off16
] = VA_BITS16_NOACCESS
;
2867 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2868 //// Clear the origins for a+0 .. a+7.
2869 if (UNLIKELY( MC_(clo_mc_level
) == 3 )) {
2871 UWord lineoff
= oc_line_offset(a
);
2872 tl_assert(lineoff
>= 0
2873 && lineoff
< OC_W32S_PER_LINE
-1/*'cos 8-aligned*/);
2874 line
= find_OCacheLine( a
);
2875 line
->descr
[lineoff
+0] = 0;
2876 line
->descr
[lineoff
+1] = 0;
2878 //// END inlined, specialised version of MC_(helperc_b_store8)
2884 /*------------------------------------------------------------*/
2885 /*--- Stack pointer adjustment ---*/
2886 /*------------------------------------------------------------*/
2888 #ifdef PERF_FAST_STACK
2891 # define MAYBE_USED __attribute__((unused))
2894 /*--------------- adjustment by 4 bytes ---------------*/
2897 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP
, UInt ecu
)
2899 UInt otag
= ecu
| MC_OKIND_STACK
;
2900 PROF_EVENT(MCPE_NEW_MEM_STACK_4
);
2901 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2902 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2904 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 4, otag
);
2909 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP
)
2911 PROF_EVENT(MCPE_NEW_MEM_STACK_4
);
2912 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2913 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
2915 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 4 );
2920 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP
)
2922 PROF_EVENT(MCPE_DIE_MEM_STACK_4
);
2923 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2924 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
2926 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-4, 4 );
2930 /*--------------- adjustment by 8 bytes ---------------*/
2933 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP
, UInt ecu
)
2935 UInt otag
= ecu
| MC_OKIND_STACK
;
2936 PROF_EVENT(MCPE_NEW_MEM_STACK_8
);
2937 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2938 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2939 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2940 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2941 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4, otag
);
2943 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 8, otag
);
2948 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP
)
2950 PROF_EVENT(MCPE_NEW_MEM_STACK_8
);
2951 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2952 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
2953 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2954 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
2955 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
2957 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 8 );
2962 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP
)
2964 PROF_EVENT(MCPE_DIE_MEM_STACK_8
);
2965 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2966 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
2967 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2968 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
2969 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
2971 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-8, 8 );
2975 /*--------------- adjustment by 12 bytes ---------------*/
2978 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP
, UInt ecu
)
2980 UInt otag
= ecu
| MC_OKIND_STACK
;
2981 PROF_EVENT(MCPE_NEW_MEM_STACK_12
);
2982 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2983 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2984 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
2985 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2986 /* from previous test we don't have 8-alignment at offset +0,
2987 hence must have 8 alignment at offsets +4/-4. Hence safe to
2988 do 4 at +0 and then 8 at +4/. */
2989 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2990 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4, otag
);
2992 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 12, otag
);
2997 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP
)
2999 PROF_EVENT(MCPE_NEW_MEM_STACK_12
);
3000 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3001 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3002 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3003 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3004 /* from previous test we don't have 8-alignment at offset +0,
3005 hence must have 8 alignment at offsets +4/-4. Hence safe to
3006 do 4 at +0 and then 8 at +4/. */
3007 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3008 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
3010 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 12 );
3015 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP
)
3017 PROF_EVENT(MCPE_DIE_MEM_STACK_12
);
3018 /* Note the -12 in the test */
3019 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
-12 )) {
3020 /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
3022 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
3023 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
3024 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3025 /* We have 4-alignment at +0, but we don't have 8-alignment at
3026 -12. So we must have 8-alignment at -8. Hence do 4 at -12
3027 and then 8 at -8. */
3028 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
3029 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
3031 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-12, 12 );
3035 /*--------------- adjustment by 16 bytes ---------------*/
3038 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP
, UInt ecu
)
3040 UInt otag
= ecu
| MC_OKIND_STACK
;
3041 PROF_EVENT(MCPE_NEW_MEM_STACK_16
);
3042 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3043 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
3044 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3045 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
3046 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3047 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
3048 Hence do 4 at +0, 8 at +4, 4 at +12. */
3049 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3050 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 , otag
);
3051 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+12, otag
);
3053 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 16, otag
);
3058 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP
)
3060 PROF_EVENT(MCPE_NEW_MEM_STACK_16
);
3061 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3062 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
3063 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3064 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3065 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3066 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
3067 Hence do 4 at +0, 8 at +4, 4 at +12. */
3068 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3069 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
3070 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+12 );
3072 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 16 );
3077 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP
)
3079 PROF_EVENT(MCPE_DIE_MEM_STACK_16
);
3080 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3081 /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
3082 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3083 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
3084 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3085 /* 8 alignment must be at -12. Do 4 at -16, 8 at -12, 4 at -4. */
3086 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3087 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
3088 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
3090 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-16, 16 );
3094 /*--------------- adjustment by 32 bytes ---------------*/
3097 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP
, UInt ecu
)
3099 UInt otag
= ecu
| MC_OKIND_STACK
;
3100 PROF_EVENT(MCPE_NEW_MEM_STACK_32
);
3101 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3102 /* Straightforward */
3103 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3104 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 , otag
);
3105 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3106 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3107 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3108 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3110 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3111 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 , otag
);
3112 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+12, otag
);
3113 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+20, otag
);
3114 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+28, otag
);
3116 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 32, otag
);
3121 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP
)
3123 PROF_EVENT(MCPE_NEW_MEM_STACK_32
);
3124 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3125 /* Straightforward */
3126 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3127 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3128 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3129 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3130 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3131 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3133 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3134 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
3135 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+12 );
3136 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+20 );
3137 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+28 );
3139 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 32 );
3144 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP
)
3146 PROF_EVENT(MCPE_DIE_MEM_STACK_32
);
3147 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3148 /* Straightforward */
3149 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3150 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3151 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3152 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3153 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3154 /* 8 alignment must be at -4 etc. Hence do 8 at -12,-20,-28 and
3156 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3157 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-28 );
3158 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-20 );
3159 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
3160 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
3162 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-32, 32 );
3166 /*--------------- adjustment by 112 bytes ---------------*/
3169 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP
, UInt ecu
)
3171 UInt otag
= ecu
| MC_OKIND_STACK
;
3172 PROF_EVENT(MCPE_NEW_MEM_STACK_112
);
3173 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3174 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3175 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 , otag
);
3176 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3177 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3178 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3179 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3180 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3181 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3182 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3183 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3184 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3185 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3186 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3187 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3189 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 112, otag
);
3194 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP
)
3196 PROF_EVENT(MCPE_NEW_MEM_STACK_112
);
3197 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3198 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3199 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3200 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3201 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3202 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3203 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3204 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3205 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3206 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3207 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3208 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3209 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3210 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3211 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3213 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 112 );
3218 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP
)
3220 PROF_EVENT(MCPE_DIE_MEM_STACK_112
);
3221 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3222 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3223 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3224 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3225 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3226 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3227 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3228 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3229 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3230 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3231 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3232 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3233 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3234 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3235 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3237 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-112, 112 );
3241 /*--------------- adjustment by 128 bytes ---------------*/
3244 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP
, UInt ecu
)
3246 UInt otag
= ecu
| MC_OKIND_STACK
;
3247 PROF_EVENT(MCPE_NEW_MEM_STACK_128
);
3248 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3249 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3250 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 , otag
);
3251 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3252 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3253 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3254 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3255 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3256 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3257 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3258 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3259 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3260 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3261 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3262 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3263 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+112, otag
);
3264 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+120, otag
);
3266 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 128, otag
);
3271 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP
)
3273 PROF_EVENT(MCPE_NEW_MEM_STACK_128
);
3274 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3275 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3276 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3277 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3278 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3279 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3280 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3281 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3282 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3283 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3284 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3285 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3286 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3287 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3288 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3289 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+112 );
3290 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+120 );
3292 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 128 );
3297 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP
)
3299 PROF_EVENT(MCPE_DIE_MEM_STACK_128
);
3300 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3301 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-128);
3302 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-120);
3303 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3304 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3305 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3306 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3307 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3308 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3309 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3310 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3311 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3312 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3313 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3314 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3315 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3316 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3318 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-128, 128 );
3322 /*--------------- adjustment by 144 bytes ---------------*/
3325 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP
, UInt ecu
)
3327 UInt otag
= ecu
| MC_OKIND_STACK
;
3328 PROF_EVENT(MCPE_NEW_MEM_STACK_144
);
3329 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3330 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3331 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
3332 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3333 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3334 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3335 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3336 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3337 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3338 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3339 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3340 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3341 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3342 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3343 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3344 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+112, otag
);
3345 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+120, otag
);
3346 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+128, otag
);
3347 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+136, otag
);
3349 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 144, otag
);
3354 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP
)
3356 PROF_EVENT(MCPE_NEW_MEM_STACK_144
);
3357 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3358 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3359 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3360 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3361 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3362 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3363 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3364 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3365 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3366 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3367 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3368 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3369 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3370 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3371 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3372 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+112 );
3373 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+120 );
3374 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+128 );
3375 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+136 );
3377 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 144 );
3382 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP
)
3384 PROF_EVENT(MCPE_DIE_MEM_STACK_144
);
3385 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3386 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-144);
3387 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-136);
3388 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-128);
3389 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-120);
3390 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3391 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3392 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3393 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3394 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3395 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3396 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3397 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3398 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3399 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3400 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3401 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3402 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3403 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3405 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-144, 144 );
3409 /*--------------- adjustment by 160 bytes ---------------*/
3412 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP
, UInt ecu
)
3414 UInt otag
= ecu
| MC_OKIND_STACK
;
3415 PROF_EVENT(MCPE_NEW_MEM_STACK_160
);
3416 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3417 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3418 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
3419 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3420 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3421 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3422 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3423 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3424 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3425 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3426 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3427 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3428 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3429 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3430 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3431 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+112, otag
);
3432 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+120, otag
);
3433 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+128, otag
);
3434 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+136, otag
);
3435 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+144, otag
);
3436 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+152, otag
);
3438 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 160, otag
);
3443 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP
)
3445 PROF_EVENT(MCPE_NEW_MEM_STACK_160
);
3446 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3447 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3448 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3449 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3450 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3451 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3452 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3453 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3454 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3455 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3456 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3457 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3458 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3459 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3460 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3461 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+112 );
3462 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+120 );
3463 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+128 );
3464 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+136 );
3465 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+144 );
3466 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+152 );
3468 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 160 );
3473 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP
)
3475 PROF_EVENT(MCPE_DIE_MEM_STACK_160
);
3476 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3477 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-160);
3478 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-152);
3479 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-144);
3480 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-136);
3481 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-128);
3482 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-120);
3483 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3484 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3485 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3486 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3487 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3488 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3489 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3490 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3491 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3492 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3493 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3494 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3495 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3496 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3498 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-160, 160 );
3502 /*--------------- adjustment by N bytes ---------------*/
3504 static void mc_new_mem_stack_w_ECU ( Addr a
, SizeT len
, UInt ecu
)
3506 UInt otag
= ecu
| MC_OKIND_STACK
;
3507 PROF_EVENT(MCPE_NEW_MEM_STACK
);
3508 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ a
, len
, otag
);
3511 static void mc_new_mem_stack ( Addr a
, SizeT len
)
3513 PROF_EVENT(MCPE_NEW_MEM_STACK
);
3514 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ a
, len
);
3517 static void mc_die_mem_stack ( Addr a
, SizeT len
)
3519 PROF_EVENT(MCPE_DIE_MEM_STACK
);
3520 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ a
, len
);
3524 /* The AMD64 ABI says:
3526 "The 128-byte area beyond the location pointed to by %rsp is considered
3527 to be reserved and shall not be modified by signal or interrupt
3528 handlers. Therefore, functions may use this area for temporary data
3529 that is not needed across function calls. In particular, leaf functions
3530 may use this area for their entire stack frame, rather than adjusting
3531 the stack pointer in the prologue and epilogue. This area is known as
3534 So after any call or return we need to mark this redzone as containing
3537 Consider this: we're in function f. f calls g. g moves rsp down
3538 modestly (say 16 bytes) and writes stuff all over the red zone, making it
3539 defined. g returns. f is buggy and reads from parts of the red zone
3540 that it didn't write on. But because g filled that area in, f is going
3541 to be picking up defined V bits and so any errors from reading bits of
3542 the red zone it didn't write, will be missed. The only solution I could
3543 think of was to make the red zone undefined when g returns to f.
3545 This is in accordance with the ABI, which makes it clear the redzone
3546 is volatile across function calls.
3548 The problem occurs the other way round too: f could fill the RZ up
3549 with defined values and g could mistakenly read them. So the RZ
3550 also needs to be nuked on function calls.
3554 /* Here's a simple cache to hold nia -> ECU mappings. It could be
3555 improved so as to have a lower miss rate. */
3557 static UWord stats__nia_cache_queries
= 0;
3558 static UWord stats__nia_cache_misses
= 0;
3561 struct { UWord nia0
; UWord ecu0
; /* nia0 maps to ecu0 */
3562 UWord nia1
; UWord ecu1
; } /* nia1 maps to ecu1 */
3565 #define N_NIA_TO_ECU_CACHE 511
3567 static WCacheEnt nia_to_ecu_cache
[N_NIA_TO_ECU_CACHE
];
3569 static void init_nia_to_ecu_cache ( void )
3573 ExeContext
* zero_ec
;
3575 /* Fill all the slots with an entry for address zero, and the
3576 relevant otags accordingly. Hence the cache is initially filled
3578 zero_ec
= VG_(make_depth_1_ExeContext_from_Addr
)(zero_addr
);
3580 zero_ecu
= VG_(get_ECU_from_ExeContext
)(zero_ec
);
3581 tl_assert(VG_(is_plausible_ECU
)(zero_ecu
));
3582 for (i
= 0; i
< N_NIA_TO_ECU_CACHE
; i
++) {
3583 nia_to_ecu_cache
[i
].nia0
= zero_addr
;
3584 nia_to_ecu_cache
[i
].ecu0
= zero_ecu
;
3585 nia_to_ecu_cache
[i
].nia1
= zero_addr
;
3586 nia_to_ecu_cache
[i
].ecu1
= zero_ecu
;
3590 static inline UInt
convert_nia_to_ecu ( Addr nia
)
3596 tl_assert( sizeof(nia_to_ecu_cache
[0].nia1
) == sizeof(nia
) );
3598 stats__nia_cache_queries
++;
3599 i
= nia
% N_NIA_TO_ECU_CACHE
;
3600 tl_assert(i
>= 0 && i
< N_NIA_TO_ECU_CACHE
);
3602 if (LIKELY( nia_to_ecu_cache
[i
].nia0
== nia
))
3603 return nia_to_ecu_cache
[i
].ecu0
;
3605 if (LIKELY( nia_to_ecu_cache
[i
].nia1
== nia
)) {
3606 # define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3607 SWAP( nia_to_ecu_cache
[i
].nia0
, nia_to_ecu_cache
[i
].nia1
);
3608 SWAP( nia_to_ecu_cache
[i
].ecu0
, nia_to_ecu_cache
[i
].ecu1
);
3610 return nia_to_ecu_cache
[i
].ecu0
;
3613 stats__nia_cache_misses
++;
3614 ec
= VG_(make_depth_1_ExeContext_from_Addr
)(nia
);
3616 ecu
= VG_(get_ECU_from_ExeContext
)(ec
);
3617 tl_assert(VG_(is_plausible_ECU
)(ecu
));
3619 nia_to_ecu_cache
[i
].nia1
= nia_to_ecu_cache
[i
].nia0
;
3620 nia_to_ecu_cache
[i
].ecu1
= nia_to_ecu_cache
[i
].ecu0
;
3622 nia_to_ecu_cache
[i
].nia0
= nia
;
3623 nia_to_ecu_cache
[i
].ecu0
= (UWord
)ecu
;
3628 /* This marks the stack as addressible but undefined, after a call or
3629 return for a target that has an ABI defined stack redzone. It
3630 happens quite a lot and needs to be fast. This is the version for
3631 origin tracking. The non-origin-tracking version is below. */
3633 void MC_(helperc_MAKE_STACK_UNINIT_w_o
) ( Addr base
, UWord len
, Addr nia
)
3635 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_W_O
);
3637 VG_(printf
)("helperc_MAKE_STACK_UNINIT_w_o (%#lx,%lu,nia=%#lx)\n",
3640 UInt ecu
= convert_nia_to_ecu ( nia
);
3641 tl_assert(VG_(is_plausible_ECU
)(ecu
));
3643 UInt otag
= ecu
| MC_OKIND_STACK
;
3646 /* Slow(ish) version, which is fairly easily seen to be correct.
3648 if (LIKELY( VG_IS_8_ALIGNED(base
) && len
==128 )) {
3649 make_aligned_word64_undefined_w_otag(base
+ 0, otag
);
3650 make_aligned_word64_undefined_w_otag(base
+ 8, otag
);
3651 make_aligned_word64_undefined_w_otag(base
+ 16, otag
);
3652 make_aligned_word64_undefined_w_otag(base
+ 24, otag
);
3654 make_aligned_word64_undefined_w_otag(base
+ 32, otag
);
3655 make_aligned_word64_undefined_w_otag(base
+ 40, otag
);
3656 make_aligned_word64_undefined_w_otag(base
+ 48, otag
);
3657 make_aligned_word64_undefined_w_otag(base
+ 56, otag
);
3659 make_aligned_word64_undefined_w_otag(base
+ 64, otag
);
3660 make_aligned_word64_undefined_w_otag(base
+ 72, otag
);
3661 make_aligned_word64_undefined_w_otag(base
+ 80, otag
);
3662 make_aligned_word64_undefined_w_otag(base
+ 88, otag
);
3664 make_aligned_word64_undefined_w_otag(base
+ 96, otag
);
3665 make_aligned_word64_undefined_w_otag(base
+ 104, otag
);
3666 make_aligned_word64_undefined_w_otag(base
+ 112, otag
);
3667 make_aligned_word64_undefined_w_otag(base
+ 120, otag
);
3669 MC_(make_mem_undefined_w_otag
)(base
, len
, otag
);
3673 /* Idea is: go fast when
3674 * 8-aligned and length is 128
3675 * the sm is available in the main primary map
3676 * the address range falls entirely with a single secondary map
3677 If all those conditions hold, just update the V+A bits by writing
3678 directly into the vabits array. (If the sm was distinguished, this
3679 will make a copy and then write to it.)
3681 if (LIKELY( len
== 128 && VG_IS_8_ALIGNED(base
) )) {
3682 /* Now we know the address range is suitably sized and aligned. */
3683 UWord a_lo
= (UWord
)(base
);
3684 UWord a_hi
= (UWord
)(base
+ 128 - 1);
3685 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
3686 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
3687 /* Now we know the entire range is within the main primary map. */
3688 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
3689 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
3690 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
3691 /* Now we know that the entire address range falls within a
3692 single secondary map, and that that secondary 'lives' in
3693 the main primary map. */
3694 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
3695 UWord v_off16
= SM_OFF_16(a_lo
);
3696 UShort
* p
= &sm
->vabits16
[v_off16
];
3697 p
[ 0] = VA_BITS16_UNDEFINED
;
3698 p
[ 1] = VA_BITS16_UNDEFINED
;
3699 p
[ 2] = VA_BITS16_UNDEFINED
;
3700 p
[ 3] = VA_BITS16_UNDEFINED
;
3701 p
[ 4] = VA_BITS16_UNDEFINED
;
3702 p
[ 5] = VA_BITS16_UNDEFINED
;
3703 p
[ 6] = VA_BITS16_UNDEFINED
;
3704 p
[ 7] = VA_BITS16_UNDEFINED
;
3705 p
[ 8] = VA_BITS16_UNDEFINED
;
3706 p
[ 9] = VA_BITS16_UNDEFINED
;
3707 p
[10] = VA_BITS16_UNDEFINED
;
3708 p
[11] = VA_BITS16_UNDEFINED
;
3709 p
[12] = VA_BITS16_UNDEFINED
;
3710 p
[13] = VA_BITS16_UNDEFINED
;
3711 p
[14] = VA_BITS16_UNDEFINED
;
3712 p
[15] = VA_BITS16_UNDEFINED
;
3713 set_aligned_word64_Origin_to_undef( base
+ 8 * 0, otag
);
3714 set_aligned_word64_Origin_to_undef( base
+ 8 * 1, otag
);
3715 set_aligned_word64_Origin_to_undef( base
+ 8 * 2, otag
);
3716 set_aligned_word64_Origin_to_undef( base
+ 8 * 3, otag
);
3717 set_aligned_word64_Origin_to_undef( base
+ 8 * 4, otag
);
3718 set_aligned_word64_Origin_to_undef( base
+ 8 * 5, otag
);
3719 set_aligned_word64_Origin_to_undef( base
+ 8 * 6, otag
);
3720 set_aligned_word64_Origin_to_undef( base
+ 8 * 7, otag
);
3721 set_aligned_word64_Origin_to_undef( base
+ 8 * 8, otag
);
3722 set_aligned_word64_Origin_to_undef( base
+ 8 * 9, otag
);
3723 set_aligned_word64_Origin_to_undef( base
+ 8 * 10, otag
);
3724 set_aligned_word64_Origin_to_undef( base
+ 8 * 11, otag
);
3725 set_aligned_word64_Origin_to_undef( base
+ 8 * 12, otag
);
3726 set_aligned_word64_Origin_to_undef( base
+ 8 * 13, otag
);
3727 set_aligned_word64_Origin_to_undef( base
+ 8 * 14, otag
);
3728 set_aligned_word64_Origin_to_undef( base
+ 8 * 15, otag
);
3734 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3735 if (LIKELY( len
== 288 && VG_IS_8_ALIGNED(base
) )) {
3736 /* Now we know the address range is suitably sized and aligned. */
3737 UWord a_lo
= (UWord
)(base
);
3738 UWord a_hi
= (UWord
)(base
+ 288 - 1);
3739 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
3740 if (a_hi
<= MAX_PRIMARY_ADDRESS
) {
3741 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
3742 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
3743 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
3744 /* Now we know that the entire address range falls within a
3745 single secondary map, and that that secondary 'lives' in
3746 the main primary map. */
3747 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
3748 UWord v_off16
= SM_OFF_16(a_lo
);
3749 UShort
* p
= &sm
->vabits16
[v_off16
];
3750 p
[ 0] = VA_BITS16_UNDEFINED
;
3751 p
[ 1] = VA_BITS16_UNDEFINED
;
3752 p
[ 2] = VA_BITS16_UNDEFINED
;
3753 p
[ 3] = VA_BITS16_UNDEFINED
;
3754 p
[ 4] = VA_BITS16_UNDEFINED
;
3755 p
[ 5] = VA_BITS16_UNDEFINED
;
3756 p
[ 6] = VA_BITS16_UNDEFINED
;
3757 p
[ 7] = VA_BITS16_UNDEFINED
;
3758 p
[ 8] = VA_BITS16_UNDEFINED
;
3759 p
[ 9] = VA_BITS16_UNDEFINED
;
3760 p
[10] = VA_BITS16_UNDEFINED
;
3761 p
[11] = VA_BITS16_UNDEFINED
;
3762 p
[12] = VA_BITS16_UNDEFINED
;
3763 p
[13] = VA_BITS16_UNDEFINED
;
3764 p
[14] = VA_BITS16_UNDEFINED
;
3765 p
[15] = VA_BITS16_UNDEFINED
;
3766 p
[16] = VA_BITS16_UNDEFINED
;
3767 p
[17] = VA_BITS16_UNDEFINED
;
3768 p
[18] = VA_BITS16_UNDEFINED
;
3769 p
[19] = VA_BITS16_UNDEFINED
;
3770 p
[20] = VA_BITS16_UNDEFINED
;
3771 p
[21] = VA_BITS16_UNDEFINED
;
3772 p
[22] = VA_BITS16_UNDEFINED
;
3773 p
[23] = VA_BITS16_UNDEFINED
;
3774 p
[24] = VA_BITS16_UNDEFINED
;
3775 p
[25] = VA_BITS16_UNDEFINED
;
3776 p
[26] = VA_BITS16_UNDEFINED
;
3777 p
[27] = VA_BITS16_UNDEFINED
;
3778 p
[28] = VA_BITS16_UNDEFINED
;
3779 p
[29] = VA_BITS16_UNDEFINED
;
3780 p
[30] = VA_BITS16_UNDEFINED
;
3781 p
[31] = VA_BITS16_UNDEFINED
;
3782 p
[32] = VA_BITS16_UNDEFINED
;
3783 p
[33] = VA_BITS16_UNDEFINED
;
3784 p
[34] = VA_BITS16_UNDEFINED
;
3785 p
[35] = VA_BITS16_UNDEFINED
;
3786 set_aligned_word64_Origin_to_undef( base
+ 8 * 0, otag
);
3787 set_aligned_word64_Origin_to_undef( base
+ 8 * 1, otag
);
3788 set_aligned_word64_Origin_to_undef( base
+ 8 * 2, otag
);
3789 set_aligned_word64_Origin_to_undef( base
+ 8 * 3, otag
);
3790 set_aligned_word64_Origin_to_undef( base
+ 8 * 4, otag
);
3791 set_aligned_word64_Origin_to_undef( base
+ 8 * 5, otag
);
3792 set_aligned_word64_Origin_to_undef( base
+ 8 * 6, otag
);
3793 set_aligned_word64_Origin_to_undef( base
+ 8 * 7, otag
);
3794 set_aligned_word64_Origin_to_undef( base
+ 8 * 8, otag
);
3795 set_aligned_word64_Origin_to_undef( base
+ 8 * 9, otag
);
3796 set_aligned_word64_Origin_to_undef( base
+ 8 * 10, otag
);
3797 set_aligned_word64_Origin_to_undef( base
+ 8 * 11, otag
);
3798 set_aligned_word64_Origin_to_undef( base
+ 8 * 12, otag
);
3799 set_aligned_word64_Origin_to_undef( base
+ 8 * 13, otag
);
3800 set_aligned_word64_Origin_to_undef( base
+ 8 * 14, otag
);
3801 set_aligned_word64_Origin_to_undef( base
+ 8 * 15, otag
);
3802 set_aligned_word64_Origin_to_undef( base
+ 8 * 16, otag
);
3803 set_aligned_word64_Origin_to_undef( base
+ 8 * 17, otag
);
3804 set_aligned_word64_Origin_to_undef( base
+ 8 * 18, otag
);
3805 set_aligned_word64_Origin_to_undef( base
+ 8 * 19, otag
);
3806 set_aligned_word64_Origin_to_undef( base
+ 8 * 20, otag
);
3807 set_aligned_word64_Origin_to_undef( base
+ 8 * 21, otag
);
3808 set_aligned_word64_Origin_to_undef( base
+ 8 * 22, otag
);
3809 set_aligned_word64_Origin_to_undef( base
+ 8 * 23, otag
);
3810 set_aligned_word64_Origin_to_undef( base
+ 8 * 24, otag
);
3811 set_aligned_word64_Origin_to_undef( base
+ 8 * 25, otag
);
3812 set_aligned_word64_Origin_to_undef( base
+ 8 * 26, otag
);
3813 set_aligned_word64_Origin_to_undef( base
+ 8 * 27, otag
);
3814 set_aligned_word64_Origin_to_undef( base
+ 8 * 28, otag
);
3815 set_aligned_word64_Origin_to_undef( base
+ 8 * 29, otag
);
3816 set_aligned_word64_Origin_to_undef( base
+ 8 * 30, otag
);
3817 set_aligned_word64_Origin_to_undef( base
+ 8 * 31, otag
);
3818 set_aligned_word64_Origin_to_undef( base
+ 8 * 32, otag
);
3819 set_aligned_word64_Origin_to_undef( base
+ 8 * 33, otag
);
3820 set_aligned_word64_Origin_to_undef( base
+ 8 * 34, otag
);
3821 set_aligned_word64_Origin_to_undef( base
+ 8 * 35, otag
);
3827 /* else fall into slow case */
3828 MC_(make_mem_undefined_w_otag
)(base
, len
, otag
);
3832 /* This is a version of MC_(helperc_MAKE_STACK_UNINIT_w_o) that is
3833 specialised for the non-origin-tracking case. */
3835 void MC_(helperc_MAKE_STACK_UNINIT_no_o
) ( Addr base
, UWord len
)
3837 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_NO_O
);
3839 VG_(printf
)("helperc_MAKE_STACK_UNINIT_no_o (%#lx,%lu)\n",
3843 /* Slow(ish) version, which is fairly easily seen to be correct.
3845 if (LIKELY( VG_IS_8_ALIGNED(base
) && len
==128 )) {
3846 make_aligned_word64_undefined(base
+ 0);
3847 make_aligned_word64_undefined(base
+ 8);
3848 make_aligned_word64_undefined(base
+ 16);
3849 make_aligned_word64_undefined(base
+ 24);
3851 make_aligned_word64_undefined(base
+ 32);
3852 make_aligned_word64_undefined(base
+ 40);
3853 make_aligned_word64_undefined(base
+ 48);
3854 make_aligned_word64_undefined(base
+ 56);
3856 make_aligned_word64_undefined(base
+ 64);
3857 make_aligned_word64_undefined(base
+ 72);
3858 make_aligned_word64_undefined(base
+ 80);
3859 make_aligned_word64_undefined(base
+ 88);
3861 make_aligned_word64_undefined(base
+ 96);
3862 make_aligned_word64_undefined(base
+ 104);
3863 make_aligned_word64_undefined(base
+ 112);
3864 make_aligned_word64_undefined(base
+ 120);
3866 make_mem_undefined(base
, len
);
3870 /* Idea is: go fast when
3871 * 8-aligned and length is 128
3872 * the sm is available in the main primary map
3873 * the address range falls entirely with a single secondary map
3874 If all those conditions hold, just update the V+A bits by writing
3875 directly into the vabits array. (If the sm was distinguished, this
3876 will make a copy and then write to it.)
3878 if (LIKELY( len
== 128 && VG_IS_8_ALIGNED(base
) )) {
3879 /* Now we know the address range is suitably sized and aligned. */
3880 UWord a_lo
= (UWord
)(base
);
3881 UWord a_hi
= (UWord
)(base
+ 128 - 1);
3882 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
3883 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
3884 /* Now we know the entire range is within the main primary map. */
3885 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
3886 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
3887 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
3888 /* Now we know that the entire address range falls within a
3889 single secondary map, and that that secondary 'lives' in
3890 the main primary map. */
3891 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
3892 UWord v_off16
= SM_OFF_16(a_lo
);
3893 UShort
* p
= &sm
->vabits16
[v_off16
];
3894 p
[ 0] = VA_BITS16_UNDEFINED
;
3895 p
[ 1] = VA_BITS16_UNDEFINED
;
3896 p
[ 2] = VA_BITS16_UNDEFINED
;
3897 p
[ 3] = VA_BITS16_UNDEFINED
;
3898 p
[ 4] = VA_BITS16_UNDEFINED
;
3899 p
[ 5] = VA_BITS16_UNDEFINED
;
3900 p
[ 6] = VA_BITS16_UNDEFINED
;
3901 p
[ 7] = VA_BITS16_UNDEFINED
;
3902 p
[ 8] = VA_BITS16_UNDEFINED
;
3903 p
[ 9] = VA_BITS16_UNDEFINED
;
3904 p
[10] = VA_BITS16_UNDEFINED
;
3905 p
[11] = VA_BITS16_UNDEFINED
;
3906 p
[12] = VA_BITS16_UNDEFINED
;
3907 p
[13] = VA_BITS16_UNDEFINED
;
3908 p
[14] = VA_BITS16_UNDEFINED
;
3909 p
[15] = VA_BITS16_UNDEFINED
;
3915 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3916 if (LIKELY( len
== 288 && VG_IS_8_ALIGNED(base
) )) {
3917 /* Now we know the address range is suitably sized and aligned. */
3918 UWord a_lo
= (UWord
)(base
);
3919 UWord a_hi
= (UWord
)(base
+ 288 - 1);
3920 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
3921 if (a_hi
<= MAX_PRIMARY_ADDRESS
) {
3922 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
3923 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
3924 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
3925 /* Now we know that the entire address range falls within a
3926 single secondary map, and that that secondary 'lives' in
3927 the main primary map. */
3928 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
3929 UWord v_off16
= SM_OFF_16(a_lo
);
3930 UShort
* p
= &sm
->vabits16
[v_off16
];
3931 p
[ 0] = VA_BITS16_UNDEFINED
;
3932 p
[ 1] = VA_BITS16_UNDEFINED
;
3933 p
[ 2] = VA_BITS16_UNDEFINED
;
3934 p
[ 3] = VA_BITS16_UNDEFINED
;
3935 p
[ 4] = VA_BITS16_UNDEFINED
;
3936 p
[ 5] = VA_BITS16_UNDEFINED
;
3937 p
[ 6] = VA_BITS16_UNDEFINED
;
3938 p
[ 7] = VA_BITS16_UNDEFINED
;
3939 p
[ 8] = VA_BITS16_UNDEFINED
;
3940 p
[ 9] = VA_BITS16_UNDEFINED
;
3941 p
[10] = VA_BITS16_UNDEFINED
;
3942 p
[11] = VA_BITS16_UNDEFINED
;
3943 p
[12] = VA_BITS16_UNDEFINED
;
3944 p
[13] = VA_BITS16_UNDEFINED
;
3945 p
[14] = VA_BITS16_UNDEFINED
;
3946 p
[15] = VA_BITS16_UNDEFINED
;
3947 p
[16] = VA_BITS16_UNDEFINED
;
3948 p
[17] = VA_BITS16_UNDEFINED
;
3949 p
[18] = VA_BITS16_UNDEFINED
;
3950 p
[19] = VA_BITS16_UNDEFINED
;
3951 p
[20] = VA_BITS16_UNDEFINED
;
3952 p
[21] = VA_BITS16_UNDEFINED
;
3953 p
[22] = VA_BITS16_UNDEFINED
;
3954 p
[23] = VA_BITS16_UNDEFINED
;
3955 p
[24] = VA_BITS16_UNDEFINED
;
3956 p
[25] = VA_BITS16_UNDEFINED
;
3957 p
[26] = VA_BITS16_UNDEFINED
;
3958 p
[27] = VA_BITS16_UNDEFINED
;
3959 p
[28] = VA_BITS16_UNDEFINED
;
3960 p
[29] = VA_BITS16_UNDEFINED
;
3961 p
[30] = VA_BITS16_UNDEFINED
;
3962 p
[31] = VA_BITS16_UNDEFINED
;
3963 p
[32] = VA_BITS16_UNDEFINED
;
3964 p
[33] = VA_BITS16_UNDEFINED
;
3965 p
[34] = VA_BITS16_UNDEFINED
;
3966 p
[35] = VA_BITS16_UNDEFINED
;
3972 /* else fall into slow case */
3973 make_mem_undefined(base
, len
);
3977 /* And this is an even more specialised case, for the case where there
3978 is no origin tracking, and the length is 128. */
3980 void MC_(helperc_MAKE_STACK_UNINIT_128_no_o
) ( Addr base
)
3982 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O
);
3984 VG_(printf
)("helperc_MAKE_STACK_UNINIT_128_no_o (%#lx)\n", base
);
3987 /* Slow(ish) version, which is fairly easily seen to be correct.
3989 if (LIKELY( VG_IS_8_ALIGNED(base
) )) {
3990 make_aligned_word64_undefined(base
+ 0);
3991 make_aligned_word64_undefined(base
+ 8);
3992 make_aligned_word64_undefined(base
+ 16);
3993 make_aligned_word64_undefined(base
+ 24);
3995 make_aligned_word64_undefined(base
+ 32);
3996 make_aligned_word64_undefined(base
+ 40);
3997 make_aligned_word64_undefined(base
+ 48);
3998 make_aligned_word64_undefined(base
+ 56);
4000 make_aligned_word64_undefined(base
+ 64);
4001 make_aligned_word64_undefined(base
+ 72);
4002 make_aligned_word64_undefined(base
+ 80);
4003 make_aligned_word64_undefined(base
+ 88);
4005 make_aligned_word64_undefined(base
+ 96);
4006 make_aligned_word64_undefined(base
+ 104);
4007 make_aligned_word64_undefined(base
+ 112);
4008 make_aligned_word64_undefined(base
+ 120);
4010 make_mem_undefined(base
, 128);
4014 /* Idea is: go fast when
4015 * 16-aligned and length is 128
4016 * the sm is available in the main primary map
4017 * the address range falls entirely with a single secondary map
4018 If all those conditions hold, just update the V+A bits by writing
4019 directly into the vabits array. (If the sm was distinguished, this
4020 will make a copy and then write to it.)
4022 Typically this applies to amd64 'ret' instructions, since RSP is
4023 16-aligned (0 % 16) after the instruction (per the amd64-ELF ABI).
4025 if (LIKELY( VG_IS_16_ALIGNED(base
) )) {
4026 /* Now we know the address range is suitably sized and aligned. */
4027 UWord a_lo
= (UWord
)(base
);
4028 UWord a_hi
= (UWord
)(base
+ 128 - 1);
4029 /* FIXME: come up with a sane story on the wraparound case
4030 (which of course cnanot happen, but still..) */
4031 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
4032 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
4033 /* Now we know the entire range is within the main primary map. */
4034 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
4035 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
4036 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
4037 /* Now we know that the entire address range falls within a
4038 single secondary map, and that that secondary 'lives' in
4039 the main primary map. */
4040 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16
);
4041 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
4042 UWord v_off
= SM_OFF(a_lo
);
4043 UInt
* w32
= ASSUME_ALIGNED(UInt
*, &sm
->vabits8
[v_off
]);
4044 w32
[ 0] = VA_BITS32_UNDEFINED
;
4045 w32
[ 1] = VA_BITS32_UNDEFINED
;
4046 w32
[ 2] = VA_BITS32_UNDEFINED
;
4047 w32
[ 3] = VA_BITS32_UNDEFINED
;
4048 w32
[ 4] = VA_BITS32_UNDEFINED
;
4049 w32
[ 5] = VA_BITS32_UNDEFINED
;
4050 w32
[ 6] = VA_BITS32_UNDEFINED
;
4051 w32
[ 7] = VA_BITS32_UNDEFINED
;
4057 /* The same, but for when base is 8 % 16, which is the situation
4058 with RSP for amd64-ELF immediately after call instructions.
4060 if (LIKELY( VG_IS_16_ALIGNED(base
+8) )) { // restricts to 8 aligned
4061 /* Now we know the address range is suitably sized and aligned. */
4062 UWord a_lo
= (UWord
)(base
);
4063 UWord a_hi
= (UWord
)(base
+ 128 - 1);
4064 /* FIXME: come up with a sane story on the wraparound case
4065 (which of course cnanot happen, but still..) */
4066 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
4067 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
4068 /* Now we know the entire range is within the main primary map. */
4069 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
4070 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
4071 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
4072 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8
);
4073 /* Now we know that the entire address range falls within a
4074 single secondary map, and that that secondary 'lives' in
4075 the main primary map. */
4076 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
4077 UWord v_off16
= SM_OFF_16(a_lo
);
4078 UShort
* w16
= &sm
->vabits16
[v_off16
];
4079 UInt
* w32
= ASSUME_ALIGNED(UInt
*, &w16
[1]);
4080 /* The following assertion is commented out for obvious
4081 performance reasons, but was verified as valid when
4082 running the entire testsuite and also Firefox. */
4083 /* tl_assert(VG_IS_4_ALIGNED(w32)); */
4084 w16
[ 0] = VA_BITS16_UNDEFINED
; // w16[0]
4085 w32
[ 0] = VA_BITS32_UNDEFINED
; // w16[1,2]
4086 w32
[ 1] = VA_BITS32_UNDEFINED
; // w16[3,4]
4087 w32
[ 2] = VA_BITS32_UNDEFINED
; // w16[5,6]
4088 w32
[ 3] = VA_BITS32_UNDEFINED
; // w16[7,8]
4089 w32
[ 4] = VA_BITS32_UNDEFINED
; // w16[9,10]
4090 w32
[ 5] = VA_BITS32_UNDEFINED
; // w16[11,12]
4091 w32
[ 6] = VA_BITS32_UNDEFINED
; // w16[13,14]
4092 w16
[15] = VA_BITS16_UNDEFINED
; // w16[15]
4098 /* else fall into slow case */
4099 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE
);
4100 make_mem_undefined(base
, 128);
4104 /*------------------------------------------------------------*/
4105 /*--- Checking memory ---*/
4106 /*------------------------------------------------------------*/
4117 /* Check permissions for address range. If inadequate permissions
4118 exist, *bad_addr is set to the offending address, so the caller can
4121 /* Returns True if [a .. a+len) is not addressible. Otherwise,
4122 returns False, and if bad_addr is non-NULL, sets *bad_addr to
4123 indicate the lowest failing address. Functions below are
4125 Bool
MC_(check_mem_is_noaccess
) ( Addr a
, SizeT len
, Addr
* bad_addr
)
4130 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS
);
4131 for (i
= 0; i
< len
; i
++) {
4132 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS_LOOP
);
4133 vabits2
= get_vabits2(a
);
4134 if (VA_BITS2_NOACCESS
!= vabits2
) {
4135 if (bad_addr
!= NULL
) *bad_addr
= a
;
4143 static Bool
is_mem_addressable ( Addr a
, SizeT len
,
4144 /*OUT*/Addr
* bad_addr
)
4149 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE
);
4150 for (i
= 0; i
< len
; i
++) {
4151 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE_LOOP
);
4152 vabits2
= get_vabits2(a
);
4153 if (VA_BITS2_NOACCESS
== vabits2
) {
4154 if (bad_addr
!= NULL
) *bad_addr
= a
;
4162 static MC_ReadResult
is_mem_defined ( Addr a
, SizeT len
,
4163 /*OUT*/Addr
* bad_addr
,
4169 PROF_EVENT(MCPE_IS_MEM_DEFINED
);
4170 DEBUG("is_mem_defined\n");
4172 if (otag
) *otag
= 0;
4173 if (bad_addr
) *bad_addr
= 0;
4174 for (i
= 0; i
< len
; i
++) {
4175 PROF_EVENT(MCPE_IS_MEM_DEFINED_LOOP
);
4176 vabits2
= get_vabits2(a
);
4177 if (VA_BITS2_DEFINED
!= vabits2
) {
4178 // Error! Nb: Report addressability errors in preference to
4179 // definedness errors. And don't report definedeness errors unless
4180 // --undef-value-errors=yes.
4184 if (VA_BITS2_NOACCESS
== vabits2
) {
4187 if (MC_(clo_mc_level
) >= 2) {
4188 if (otag
&& MC_(clo_mc_level
) == 3) {
4189 *otag
= MC_(helperc_b_load1
)( a
);
4200 /* Like is_mem_defined but doesn't give up at the first uninitialised
4201 byte -- the entire range is always checked. This is important for
4202 detecting errors in the case where a checked range strays into
4203 invalid memory, but that fact is not detected by the ordinary
4204 is_mem_defined(), because of an undefined section that precedes the
4205 out of range section, possibly as a result of an alignment hole in
4206 the checked data. This version always checks the entire range and
4207 can report both a definedness and an accessbility error, if
4209 static void is_mem_defined_comprehensive (
4211 /*OUT*/Bool
* errorV
, /* is there a definedness err? */
4212 /*OUT*/Addr
* bad_addrV
, /* if so where? */
4213 /*OUT*/UInt
* otagV
, /* and what's its otag? */
4214 /*OUT*/Bool
* errorA
, /* is there an addressability err? */
4215 /*OUT*/Addr
* bad_addrA
/* if so where? */
4220 Bool already_saw_errV
= False
;
4222 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE
);
4223 DEBUG("is_mem_defined_comprehensive\n");
4225 tl_assert(!(*errorV
|| *errorA
));
4227 for (i
= 0; i
< len
; i
++) {
4228 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP
);
4229 vabits2
= get_vabits2(a
);
4231 case VA_BITS2_DEFINED
:
4234 case VA_BITS2_UNDEFINED
:
4235 case VA_BITS2_PARTDEFINED
:
4236 if (!already_saw_errV
) {
4239 if (MC_(clo_mc_level
) == 3) {
4240 *otagV
= MC_(helperc_b_load1
)( a
);
4244 already_saw_errV
= True
;
4246 a
++; /* keep going */
4248 case VA_BITS2_NOACCESS
:
4251 return; /* give up now. */
4259 /* Check a zero-terminated ascii string. Tricky -- don't want to
4260 examine the actual bytes, to find the end, until we're sure it is
4263 static Bool
mc_is_defined_asciiz ( Addr a
, Addr
* bad_addr
, UInt
* otag
)
4267 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ
);
4268 DEBUG("mc_is_defined_asciiz\n");
4270 if (otag
) *otag
= 0;
4271 if (bad_addr
) *bad_addr
= 0;
4273 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ_LOOP
);
4274 vabits2
= get_vabits2(a
);
4275 if (VA_BITS2_DEFINED
!= vabits2
) {
4276 // Error! Nb: Report addressability errors in preference to
4277 // definedness errors. And don't report definedeness errors unless
4278 // --undef-value-errors=yes.
4282 if (VA_BITS2_NOACCESS
== vabits2
) {
4285 if (MC_(clo_mc_level
) >= 2) {
4286 if (otag
&& MC_(clo_mc_level
) == 3) {
4287 *otag
= MC_(helperc_b_load1
)( a
);
4292 /* Ok, a is safe to read. */
4293 if (* ((UChar
*)a
) == 0) {
4301 /*------------------------------------------------------------*/
4302 /*--- Memory event handlers ---*/
4303 /*------------------------------------------------------------*/
4306 void check_mem_is_addressable ( CorePart part
, ThreadId tid
, const HChar
* s
,
4307 Addr base
, SizeT size
)
4310 Bool ok
= is_mem_addressable ( base
, size
, &bad_addr
);
4314 case Vg_CoreSysCall
:
4315 MC_(record_memparam_error
) ( tid
, bad_addr
,
4316 /*isAddrErr*/True
, s
, 0/*otag*/ );
4320 MC_(record_core_mem_error
)( tid
, s
);
4324 VG_(tool_panic
)("check_mem_is_addressable: unexpected CorePart");
4330 void check_mem_is_defined ( CorePart part
, ThreadId tid
, const HChar
* s
,
4331 Addr base
, SizeT size
)
4335 MC_ReadResult res
= is_mem_defined ( base
, size
, &bad_addr
, &otag
);
4338 Bool isAddrErr
= ( MC_AddrErr
== res
? True
: False
);
4341 case Vg_CoreSysCall
:
4342 MC_(record_memparam_error
) ( tid
, bad_addr
, isAddrErr
, s
,
4343 isAddrErr
? 0 : otag
);
4346 case Vg_CoreSysCallArgInMem
:
4347 MC_(record_regparam_error
) ( tid
, s
, otag
);
4350 /* If we're being asked to jump to a silly address, record an error
4351 message before potentially crashing the entire system. */
4352 case Vg_CoreTranslate
:
4353 MC_(record_jump_error
)( tid
, bad_addr
);
4357 VG_(tool_panic
)("check_mem_is_defined: unexpected CorePart");
4363 void check_mem_is_defined_asciiz ( CorePart part
, ThreadId tid
,
4364 const HChar
* s
, Addr str
)
4367 Addr bad_addr
= 0; // shut GCC up
4370 tl_assert(part
== Vg_CoreSysCall
);
4371 res
= mc_is_defined_asciiz ( (Addr
)str
, &bad_addr
, &otag
);
4373 Bool isAddrErr
= ( MC_AddrErr
== res
? True
: False
);
4374 MC_(record_memparam_error
) ( tid
, bad_addr
, isAddrErr
, s
,
4375 isAddrErr
? 0 : otag
);
4379 /* Handling of mmap and mprotect is not as simple as it seems.
4381 The underlying semantics are that memory obtained from mmap is
4382 always initialised, but may be inaccessible. And changes to the
4383 protection of memory do not change its contents and hence not its
4384 definedness state. Problem is we can't model
4385 inaccessible-but-with-some-definedness state; once we mark memory
4386 as inaccessible we lose all info about definedness, and so can't
4387 restore that if it is later made accessible again.
4389 One obvious thing to do is this:
4391 mmap/mprotect NONE -> noaccess
4392 mmap/mprotect other -> defined
4394 The problem case here is: taking accessible memory, writing
4395 uninitialised data to it, mprotecting it NONE and later mprotecting
4396 it back to some accessible state causes the undefinedness to be
4399 A better proposal is:
4401 (1) mmap NONE -> make noaccess
4402 (2) mmap other -> make defined
4404 (3) mprotect NONE -> # no change
4405 (4) mprotect other -> change any "noaccess" to "defined"
4407 (2) is OK because memory newly obtained from mmap really is defined
4408 (zeroed out by the kernel -- doing anything else would
4409 constitute a massive security hole.)
4411 (1) is OK because the only way to make the memory usable is via
4412 (4), in which case we also wind up correctly marking it all as
4415 (3) is the weak case. We choose not to change memory state.
4416 (presumably the range is in some mixture of "defined" and
4417 "undefined", viz, accessible but with arbitrary V bits). Doing
4418 nothing means we retain the V bits, so that if the memory is
4419 later mprotected "other", the V bits remain unchanged, so there
4420 can be no false negatives. The bad effect is that if there's
4421 an access in the area, then MC cannot warn; but at least we'll
4422 get a SEGV to show, so it's better than nothing.
4424 Consider the sequence (3) followed by (4). Any memory that was
4425 "defined" or "undefined" previously retains its state (as
4426 required). Any memory that was "noaccess" before can only have
4427 been made that way by (1), and so it's OK to change it to
4430 See https://bugs.kde.org/show_bug.cgi?id=205541
4431 and https://bugs.kde.org/show_bug.cgi?id=210268
4434 void mc_new_mem_mmap ( Addr a
, SizeT len
, Bool rr
, Bool ww
, Bool xx
,
4437 if (rr
|| ww
|| xx
) {
4438 /* (2) mmap/mprotect other -> defined */
4439 MC_(make_mem_defined
)(a
, len
);
4441 /* (1) mmap/mprotect NONE -> noaccess */
4442 MC_(make_mem_noaccess
)(a
, len
);
4447 void mc_new_mem_mprotect ( Addr a
, SizeT len
, Bool rr
, Bool ww
, Bool xx
)
4449 if (rr
|| ww
|| xx
) {
4450 /* (4) mprotect other -> change any "noaccess" to "defined" */
4451 make_mem_defined_if_noaccess(a
, len
);
4453 /* (3) mprotect NONE -> # no change */
4460 void mc_new_mem_startup( Addr a
, SizeT len
,
4461 Bool rr
, Bool ww
, Bool xx
, ULong di_handle
)
4463 // Because code is defined, initialised variables get put in the data
4464 // segment and are defined, and uninitialised variables get put in the
4465 // bss segment and are auto-zeroed (and so defined).
4467 // It's possible that there will be padding between global variables.
4468 // This will also be auto-zeroed, and marked as defined by Memcheck. If
4469 // a program uses it, Memcheck will not complain. This is arguably a
4470 // false negative, but it's a grey area -- the behaviour is defined (the
4471 // padding is zeroed) but it's probably not what the user intended. And
4472 // we can't avoid it.
4474 // Note: we generally ignore RWX permissions, because we can't track them
4475 // without requiring more than one A bit which would slow things down a
4476 // lot. But on Darwin the 0th page is mapped but !R and !W and !X.
4477 // So we mark any such pages as "unaddressable".
4478 DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
4479 a
, (ULong
)len
, rr
, ww
, xx
);
4480 mc_new_mem_mmap(a
, len
, rr
, ww
, xx
, di_handle
);
4484 void mc_post_mem_write(CorePart part
, ThreadId tid
, Addr a
, SizeT len
)
4486 MC_(make_mem_defined
)(a
, len
);
4490 /*------------------------------------------------------------*/
4491 /*--- Register event handlers ---*/
4492 /*------------------------------------------------------------*/
4494 /* Try and get a nonzero origin for the guest state section of thread
4495 tid characterised by (offset,size). Return 0 if nothing to show
4497 static UInt
mb_get_origin_for_guest_offset ( ThreadId tid
,
4498 Int offset
, SizeT size
)
4503 sh2off
= MC_(get_otrack_shadow_offset
)( offset
, size
);
4505 return 0; /* This piece of guest state is not tracked */
4506 tl_assert(sh2off
>= 0);
4507 tl_assert(0 == (sh2off
% 4));
4508 area
[0] = 0x31313131;
4509 area
[2] = 0x27272727;
4510 VG_(get_shadow_regs_area
)( tid
, (UChar
*)&area
[1], 2/*shadowno*/,sh2off
,4 );
4511 tl_assert(area
[0] == 0x31313131);
4512 tl_assert(area
[2] == 0x27272727);
4518 /* When some chunk of guest state is written, mark the corresponding
4519 shadow area as valid. This is used to initialise arbitrarily large
4520 chunks of guest state, hence the _SIZE value, which has to be as
4521 big as the biggest guest state.
4523 static void mc_post_reg_write ( CorePart part
, ThreadId tid
,
4524 PtrdiffT offset
, SizeT size
)
4526 # define MAX_REG_WRITE_SIZE 1744
4527 UChar area
[MAX_REG_WRITE_SIZE
];
4528 tl_assert(size
<= MAX_REG_WRITE_SIZE
);
4529 VG_(memset
)(area
, V_BITS8_DEFINED
, size
);
4530 VG_(set_shadow_regs_area
)( tid
, 1/*shadowNo*/,offset
,size
, area
);
4531 # undef MAX_REG_WRITE_SIZE
4535 void mc_post_reg_write_clientcall ( ThreadId tid
,
4536 PtrdiffT offset
, SizeT size
, Addr f
)
4538 mc_post_reg_write(/*dummy*/0, tid
, offset
, size
);
4541 /* Look at the definedness of the guest's shadow state for
4542 [offset, offset+len). If any part of that is undefined, record
4545 static void mc_pre_reg_read ( CorePart part
, ThreadId tid
, const HChar
* s
,
4546 PtrdiffT offset
, SizeT size
)
4553 tl_assert(size
<= 16);
4555 VG_(get_shadow_regs_area
)( tid
, area
, 1/*shadowNo*/,offset
,size
);
4558 for (i
= 0; i
< size
; i
++) {
4559 if (area
[i
] != V_BITS8_DEFINED
) {
4568 /* We've found some undefinedness. See if we can also find an
4570 otag
= mb_get_origin_for_guest_offset( tid
, offset
, size
);
4571 MC_(record_regparam_error
) ( tid
, s
, otag
);
4575 /*------------------------------------------------------------*/
4576 /*--- Register-memory event handlers ---*/
4577 /*------------------------------------------------------------*/
4579 static void mc_copy_mem_to_reg ( CorePart part
, ThreadId tid
, Addr a
,
4580 PtrdiffT guest_state_offset
, SizeT size
)
4588 for (i
= 0; i
< size
; i
++) {
4589 get_vbits8( a
+i
, &vbits8
);
4590 VG_(set_shadow_regs_area
)( tid
, 1/*shadowNo*/, guest_state_offset
+i
,
4594 if (MC_(clo_mc_level
) != 3)
4597 /* Track origins. */
4598 offset
= MC_(get_otrack_shadow_offset
)( guest_state_offset
, size
);
4604 d32
= MC_(helperc_b_load1
)( a
);
4607 d32
= MC_(helperc_b_load2
)( a
);
4610 d32
= MC_(helperc_b_load4
)( a
);
4613 d32
= MC_(helperc_b_load8
)( a
);
4616 d32
= MC_(helperc_b_load16
)( a
);
4619 d32
= MC_(helperc_b_load32
)( a
);
4625 VG_(set_shadow_regs_area
)( tid
, 2/*shadowNo*/, offset
, 4, (UChar
*)&d32
);
4628 static void mc_copy_reg_to_mem ( CorePart part
, ThreadId tid
,
4629 PtrdiffT guest_state_offset
, Addr a
,
4638 for (i
= 0; i
< size
; i
++) {
4639 VG_(get_shadow_regs_area
)( tid
, &vbits8
, 1/*shadowNo*/,
4640 guest_state_offset
+i
, 1 );
4641 set_vbits8( a
+i
, vbits8
);
4644 if (MC_(clo_mc_level
) != 3)
4647 /* Track origins. */
4648 offset
= MC_(get_otrack_shadow_offset
)( guest_state_offset
, size
);
4652 VG_(get_shadow_regs_area
)( tid
, (UChar
*)&d32
, 2/*shadowNo*/, offset
, 4 );
4655 MC_(helperc_b_store1
)( a
, d32
);
4658 MC_(helperc_b_store2
)( a
, d32
);
4661 MC_(helperc_b_store4
)( a
, d32
);
4664 MC_(helperc_b_store8
)( a
, d32
);
4667 MC_(helperc_b_store16
)( a
, d32
);
4670 MC_(helperc_b_store32
)( a
, d32
);
4678 /*------------------------------------------------------------*/
4679 /*--- Some static assertions ---*/
4680 /*------------------------------------------------------------*/
4682 /* The handwritten assembly helpers below have baked-in assumptions
4683 about various constant values. These assertions attempt to make
4684 that a bit safer by checking those values and flagging changes that
4685 would make the assembly invalid. Not perfect but it's better than
4688 STATIC_ASSERT(SM_CHUNKS
* 4 == 65536);
4690 STATIC_ASSERT(VA_BITS8_DEFINED
== 0xAA);
4691 STATIC_ASSERT(VA_BITS8_UNDEFINED
== 0x55);
4693 STATIC_ASSERT(V_BITS32_DEFINED
== 0x00000000);
4694 STATIC_ASSERT(V_BITS32_UNDEFINED
== 0xFFFFFFFF);
4696 STATIC_ASSERT(VA_BITS4_DEFINED
== 0xA);
4697 STATIC_ASSERT(VA_BITS4_UNDEFINED
== 0x5);
4699 STATIC_ASSERT(V_BITS16_DEFINED
== 0x0000);
4700 STATIC_ASSERT(V_BITS16_UNDEFINED
== 0xFFFF);
4702 STATIC_ASSERT(VA_BITS2_DEFINED
== 2);
4703 STATIC_ASSERT(VA_BITS2_UNDEFINED
== 1);
4705 STATIC_ASSERT(V_BITS8_DEFINED
== 0x00);
4706 STATIC_ASSERT(V_BITS8_UNDEFINED
== 0xFF);
4709 /*------------------------------------------------------------*/
4710 /*--- Functions called directly from generated code: ---*/
4711 /*--- Load/store handlers. ---*/
4712 /*------------------------------------------------------------*/
4714 /* Types: LOADV32, LOADV16, LOADV8 are:
4716 so they return 32-bits on 32-bit machines and 64-bits on
4717 64-bit machines. Addr has the same size as a host word.
4719 LOADV64 is always ULong fn ( Addr a )
4721 Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
4722 are a UWord, and for STOREV64 they are a ULong.
4725 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
4726 naturally '_sz/8'-aligned, or it exceeds the range covered by the
4727 primary map. This is all very tricky (and important!), so let's
4728 work through the maths by hand (below), *and* assert for these
4729 values at startup. */
4730 #define MASK(_szInBytes) \
4731 ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
4733 /* MASK only exists so as to define this macro. */
4734 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
4735 ((_a) & MASK((_szInBits>>3)))
4737 /* On a 32-bit machine:
4739 N_PRIMARY_BITS == 16, so
4740 N_PRIMARY_MAP == 0x10000, so
4741 N_PRIMARY_MAP-1 == 0xFFFF, so
4742 (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
4744 MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
4745 = ~ ( 0xFFFF | 0xFFFF0000 )
4749 MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
4750 = ~ ( 0xFFFE | 0xFFFF0000 )
4754 MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
4755 = ~ ( 0xFFFC | 0xFFFF0000 )
4759 MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
4760 = ~ ( 0xFFF8 | 0xFFFF0000 )
4764 Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
4765 precisely when a is not 1/2/4/8-bytes aligned. And obviously, for
4766 the 1-byte alignment case, it is always a zero value, since MASK(1)
4767 is zero. All as expected.
4769 On a 64-bit machine, it's more complex, since we're testing
4770 simultaneously for misalignment and for the address being at or
4773 N_PRIMARY_BITS == 20, so
4774 N_PRIMARY_MAP == 0x100000, so
4775 N_PRIMARY_MAP-1 == 0xFFFFF, so
4776 (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
4778 MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
4779 = ~ ( 0xFFFF | 0xF'FFFF'0000 )
4781 = 0xFFFF'FFF0'0000'0000
4783 MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
4784 = ~ ( 0xFFFE | 0xF'FFFF'0000 )
4786 = 0xFFFF'FFF0'0000'0001
4788 MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
4789 = ~ ( 0xFFFC | 0xF'FFFF'0000 )
4791 = 0xFFFF'FFF0'0000'0003
4793 MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
4794 = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
4796 = 0xFFFF'FFF0'0000'0007
4799 /*------------------------------------------------------------*/
4800 /*--- LOADV256 and LOADV128 ---*/
4801 /*------------------------------------------------------------*/
4804 void mc_LOADV_128_or_256 ( /*OUT*/ULong
* res
,
4805 Addr a
, SizeT nBits
, Bool isBigEndian
)
4807 PROF_EVENT(MCPE_LOADV_128_OR_256
);
4809 #ifndef PERF_FAST_LOADV
4810 mc_LOADV_128_or_256_slow( res
, a
, nBits
, isBigEndian
);
4814 UWord sm_off16
, vabits16
, j
;
4815 UWord nBytes
= nBits
/ 8;
4816 UWord nULongs
= nBytes
/ 8;
4819 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,nBits
) )) {
4820 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW1
);
4821 mc_LOADV_128_or_256_slow( res
, a
, nBits
, isBigEndian
);
4825 /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
4826 suitably aligned, is mapped, and addressible. */
4827 for (j
= 0; j
< nULongs
; j
++) {
4828 sm
= get_secmap_for_reading_low(a
+ 8*j
);
4829 sm_off16
= SM_OFF_16(a
+ 8*j
);
4830 vabits16
= sm
->vabits16
[sm_off16
];
4832 // Convert V bits from compact memory form to expanded
4834 if (LIKELY(vabits16
== VA_BITS16_DEFINED
)) {
4835 res
[j
] = V_BITS64_DEFINED
;
4836 } else if (LIKELY(vabits16
== VA_BITS16_UNDEFINED
)) {
4837 res
[j
] = V_BITS64_UNDEFINED
;
4839 /* Slow case: some block of 8 bytes are not all-defined or
4841 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW2
);
4842 mc_LOADV_128_or_256_slow( res
, a
, nBits
, isBigEndian
);
4851 VG_REGPARM(2) void MC_(helperc_LOADV256be
) ( /*OUT*/V256
* res
, Addr a
)
4853 mc_LOADV_128_or_256(&res
->w64
[0], a
, 256, True
);
4855 VG_REGPARM(2) void MC_(helperc_LOADV256le
) ( /*OUT*/V256
* res
, Addr a
)
4857 mc_LOADV_128_or_256(&res
->w64
[0], a
, 256, False
);
4860 VG_REGPARM(2) void MC_(helperc_LOADV128be
) ( /*OUT*/V128
* res
, Addr a
)
4862 mc_LOADV_128_or_256(&res
->w64
[0], a
, 128, True
);
4864 VG_REGPARM(2) void MC_(helperc_LOADV128le
) ( /*OUT*/V128
* res
, Addr a
)
4866 mc_LOADV_128_or_256(&res
->w64
[0], a
, 128, False
);
4869 /*------------------------------------------------------------*/
4871 /*------------------------------------------------------------*/
4874 ULong
mc_LOADV64 ( Addr a
, Bool isBigEndian
)
4876 PROF_EVENT(MCPE_LOADV64
);
4878 #ifndef PERF_FAST_LOADV
4879 return mc_LOADVn_slow( a
, 64, isBigEndian
);
4882 UWord sm_off16
, vabits16
;
4885 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,64) )) {
4886 PROF_EVENT(MCPE_LOADV64_SLOW1
);
4887 return (ULong
)mc_LOADVn_slow( a
, 64, isBigEndian
);
4890 sm
= get_secmap_for_reading_low(a
);
4891 sm_off16
= SM_OFF_16(a
);
4892 vabits16
= sm
->vabits16
[sm_off16
];
4894 // Handle common case quickly: a is suitably aligned, is mapped, and
4896 // Convert V bits from compact memory form to expanded register form.
4897 if (LIKELY(vabits16
== VA_BITS16_DEFINED
)) {
4898 return V_BITS64_DEFINED
;
4899 } else if (LIKELY(vabits16
== VA_BITS16_UNDEFINED
)) {
4900 return V_BITS64_UNDEFINED
;
4902 /* Slow case: the 8 bytes are not all-defined or all-undefined. */
4903 PROF_EVENT(MCPE_LOADV64_SLOW2
);
4904 return mc_LOADVn_slow( a
, 64, isBigEndian
);
4910 // Generic for all platforms
4911 VG_REGPARM(1) ULong
MC_(helperc_LOADV64be
) ( Addr a
)
4913 return mc_LOADV64(a
, True
);
4916 // Non-generic assembly for arm32-linux
4917 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4918 && defined(VGP_arm_linux)
4919 /* See mc_main_asm.c */
4921 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4922 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
4923 /* See mc_main_asm.c */
4926 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
4927 VG_REGPARM(1) ULong
MC_(helperc_LOADV64le
) ( Addr a
)
4929 return mc_LOADV64(a
, False
);
4933 /*------------------------------------------------------------*/
4934 /*--- STOREV64 ---*/
4935 /*------------------------------------------------------------*/
4938 void mc_STOREV64 ( Addr a
, ULong vbits64
, Bool isBigEndian
)
4940 PROF_EVENT(MCPE_STOREV64
);
4942 #ifndef PERF_FAST_STOREV
4943 // XXX: this slow case seems to be marginally faster than the fast case!
4944 // Investigate further.
4945 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
4948 UWord sm_off16
, vabits16
;
4951 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,64) )) {
4952 PROF_EVENT(MCPE_STOREV64_SLOW1
);
4953 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
4957 sm
= get_secmap_for_reading_low(a
);
4958 sm_off16
= SM_OFF_16(a
);
4959 vabits16
= sm
->vabits16
[sm_off16
];
4961 // To understand the below cleverness, see the extensive comments
4962 // in MC_(helperc_STOREV8).
4963 if (LIKELY(V_BITS64_DEFINED
== vbits64
)) {
4964 if (LIKELY(vabits16
== (UShort
)VA_BITS16_DEFINED
)) {
4967 if (!is_distinguished_sm(sm
) && VA_BITS16_UNDEFINED
== vabits16
) {
4968 sm
->vabits16
[sm_off16
] = VA_BITS16_DEFINED
;
4971 PROF_EVENT(MCPE_STOREV64_SLOW2
);
4972 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
4975 if (V_BITS64_UNDEFINED
== vbits64
) {
4976 if (vabits16
== (UShort
)VA_BITS16_UNDEFINED
) {
4979 if (!is_distinguished_sm(sm
) && VA_BITS16_DEFINED
== vabits16
) {
4980 sm
->vabits16
[sm_off16
] = VA_BITS16_UNDEFINED
;
4983 PROF_EVENT(MCPE_STOREV64_SLOW3
);
4984 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
4988 PROF_EVENT(MCPE_STOREV64_SLOW4
);
4989 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
4994 VG_REGPARM(1) void MC_(helperc_STOREV64be
) ( Addr a
, ULong vbits64
)
4996 mc_STOREV64(a
, vbits64
, True
);
4998 VG_REGPARM(1) void MC_(helperc_STOREV64le
) ( Addr a
, ULong vbits64
)
5000 mc_STOREV64(a
, vbits64
, False
);
5003 /*------------------------------------------------------------*/
5005 /*------------------------------------------------------------*/
5008 UWord
mc_LOADV32 ( Addr a
, Bool isBigEndian
)
5010 PROF_EVENT(MCPE_LOADV32
);
5012 #ifndef PERF_FAST_LOADV
5013 return (UWord
)mc_LOADVn_slow( a
, 32, isBigEndian
);
5016 UWord sm_off
, vabits8
;
5019 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,32) )) {
5020 PROF_EVENT(MCPE_LOADV32_SLOW1
);
5021 return (UWord
)mc_LOADVn_slow( a
, 32, isBigEndian
);
5024 sm
= get_secmap_for_reading_low(a
);
5026 vabits8
= sm
->vabits8
[sm_off
];
5028 // Handle common case quickly: a is suitably aligned, is mapped, and the
5029 // entire word32 it lives in is addressible.
5030 // Convert V bits from compact memory form to expanded register form.
5031 // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
5032 // Almost certainly not necessary, but be paranoid.
5033 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) {
5034 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_DEFINED
);
5035 } else if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
)) {
5036 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_UNDEFINED
);
5038 /* Slow case: the 4 bytes are not all-defined or all-undefined. */
5039 PROF_EVENT(MCPE_LOADV32_SLOW2
);
5040 return (UWord
)mc_LOADVn_slow( a
, 32, isBigEndian
);
5046 // Generic for all platforms
5047 VG_REGPARM(1) UWord
MC_(helperc_LOADV32be
) ( Addr a
)
5049 return mc_LOADV32(a
, True
);
5052 // Non-generic assembly for arm32-linux
5053 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5054 && defined(VGP_arm_linux)
5055 /* See mc_main_asm.c */
5057 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5058 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5059 /* See mc_main_asm.c */
5062 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5063 VG_REGPARM(1) UWord
MC_(helperc_LOADV32le
) ( Addr a
)
5065 return mc_LOADV32(a
, False
);
5069 /*------------------------------------------------------------*/
5070 /*--- STOREV32 ---*/
5071 /*------------------------------------------------------------*/
5074 void mc_STOREV32 ( Addr a
, UWord vbits32
, Bool isBigEndian
)
5076 PROF_EVENT(MCPE_STOREV32
);
5078 #ifndef PERF_FAST_STOREV
5079 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5082 UWord sm_off
, vabits8
;
5085 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,32) )) {
5086 PROF_EVENT(MCPE_STOREV32_SLOW1
);
5087 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5091 sm
= get_secmap_for_reading_low(a
);
5093 vabits8
= sm
->vabits8
[sm_off
];
5095 // To understand the below cleverness, see the extensive comments
5096 // in MC_(helperc_STOREV8).
5097 if (LIKELY(V_BITS32_DEFINED
== vbits32
)) {
5098 if (LIKELY(vabits8
== (UInt
)VA_BITS8_DEFINED
)) {
5101 if (!is_distinguished_sm(sm
) && VA_BITS8_UNDEFINED
== vabits8
) {
5102 sm
->vabits8
[sm_off
] = (UInt
)VA_BITS8_DEFINED
;
5105 PROF_EVENT(MCPE_STOREV32_SLOW2
);
5106 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5109 if (V_BITS32_UNDEFINED
== vbits32
) {
5110 if (vabits8
== (UInt
)VA_BITS8_UNDEFINED
) {
5113 if (!is_distinguished_sm(sm
) && VA_BITS8_DEFINED
== vabits8
) {
5114 sm
->vabits8
[sm_off
] = (UInt
)VA_BITS8_UNDEFINED
;
5117 PROF_EVENT(MCPE_STOREV32_SLOW3
);
5118 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5122 PROF_EVENT(MCPE_STOREV32_SLOW4
);
5123 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5128 VG_REGPARM(2) void MC_(helperc_STOREV32be
) ( Addr a
, UWord vbits32
)
5130 mc_STOREV32(a
, vbits32
, True
);
5132 VG_REGPARM(2) void MC_(helperc_STOREV32le
) ( Addr a
, UWord vbits32
)
5134 mc_STOREV32(a
, vbits32
, False
);
5137 /*------------------------------------------------------------*/
5139 /*------------------------------------------------------------*/
5142 UWord
mc_LOADV16 ( Addr a
, Bool isBigEndian
)
5144 PROF_EVENT(MCPE_LOADV16
);
5146 #ifndef PERF_FAST_LOADV
5147 return (UWord
)mc_LOADVn_slow( a
, 16, isBigEndian
);
5150 UWord sm_off
, vabits8
;
5153 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,16) )) {
5154 PROF_EVENT(MCPE_LOADV16_SLOW1
);
5155 return (UWord
)mc_LOADVn_slow( a
, 16, isBigEndian
);
5158 sm
= get_secmap_for_reading_low(a
);
5160 vabits8
= sm
->vabits8
[sm_off
];
5161 // Handle common case quickly: a is suitably aligned, is mapped, and is
5163 // Convert V bits from compact memory form to expanded register form
5164 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) { return V_BITS16_DEFINED
; }
5165 else if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
)) { return V_BITS16_UNDEFINED
; }
5167 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5168 // the two sub-bytes.
5169 UChar vabits4
= extract_vabits4_from_vabits8(a
, vabits8
);
5170 if (vabits4
== VA_BITS4_DEFINED
) { return V_BITS16_DEFINED
; }
5171 else if (vabits4
== VA_BITS4_UNDEFINED
) { return V_BITS16_UNDEFINED
; }
5173 /* Slow case: the two bytes are not all-defined or all-undefined. */
5174 PROF_EVENT(MCPE_LOADV16_SLOW2
);
5175 return (UWord
)mc_LOADVn_slow( a
, 16, isBigEndian
);
5182 // Generic for all platforms
5183 VG_REGPARM(1) UWord
MC_(helperc_LOADV16be
) ( Addr a
)
5185 return mc_LOADV16(a
, True
);
5188 // Non-generic assembly for arm32-linux
5189 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5190 && defined(VGP_arm_linux)
5191 __asm__( /* Derived from NCode template */
5194 ".global vgMemCheck_helperc_LOADV16le \n"
5195 ".type vgMemCheck_helperc_LOADV16le, %function \n"
5196 "vgMemCheck_helperc_LOADV16le: \n" //
5198 " bne .LLV16LEc12 \n" // if misaligned
5199 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5200 " movw r3, #:lower16:primary_map \n" //
5201 " uxth r1, r0 \n" // r1 = sec-map-offB
5202 " movt r3, #:upper16:primary_map \n" //
5203 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5204 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5205 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5206 " bne .LLV16LEc0 \n" // no, goto .LLV16LEc0
5208 " mov r0, #0xFFFFFFFF \n" //
5209 " lsl r0, r0, #16 \n" // V_BITS16_DEFINED | top16safe
5212 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5213 " bne .LLV16LEc4 \n" //
5215 " mov r0, #0xFFFFFFFF \n" // V_BITS16_UNDEFINED | top16safe
5218 // r1 holds sec-map-VABITS8. r0 holds the address and is 2-aligned.
5219 // Extract the relevant 4 bits and inspect.
5220 " and r2, r0, #2 \n" // addr & 2
5221 " add r2, r2, r2 \n" // 2 * (addr & 2)
5222 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 2))
5223 " and r1, r1, #15 \n" // (sec-map-VABITS8 >> (2 * (addr & 2))) & 15
5225 " cmp r1, #0xA \n" // VA_BITS4_DEFINED
5226 " beq .LLV16LEh9 \n" //
5228 " cmp r1, #0x5 \n" // VA_BITS4_UNDEFINED
5229 " beq .LLV16LEc2 \n" //
5231 ".LLV16LEc12: \n" //
5232 " push {r4, lr} \n" //
5234 " mov r1, #16 \n" //
5235 " bl mc_LOADVn_slow \n" //
5236 " pop {r4, pc} \n" //
5237 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5241 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5242 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5246 ".global vgMemCheck_helperc_LOADV16le\n"
5247 ".type vgMemCheck_helperc_LOADV16le, @function\n"
5248 "vgMemCheck_helperc_LOADV16le:\n"
5249 " test $0x1, %eax\n"
5250 " jne .LLV16LE5\n" /* jump if not aligned */
5252 " shr $0x10, %edx\n"
5253 " mov primary_map(,%edx,4), %ecx\n"
5254 " movzwl %ax, %edx\n"
5256 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5257 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */
5258 " jne .LLV16LE2\n" /* jump if not all 32bits defined */
5260 " mov $0xffff0000,%eax\n" /* V_BITS16_DEFINED | top16safe */
5263 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5264 " jne .LLV16LE4\n" /* jump if not all 32bits undefined */
5266 " or $0xffffffff,%eax\n" /* V_BITS16_UNDEFINED | top16safe */
5275 " je .LLV16LE1\n" /* jump if all 16bits are defined */
5277 " je .LLV16LE3\n" /* jump if all 16bits are undefined */
5279 " xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 16, 0) */
5281 " jmp mc_LOADVn_slow\n"
5282 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5287 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5288 VG_REGPARM(1) UWord
MC_(helperc_LOADV16le
) ( Addr a
)
5290 return mc_LOADV16(a
, False
);
5294 /*------------------------------------------------------------*/
5295 /*--- STOREV16 ---*/
5296 /*------------------------------------------------------------*/
5298 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
5300 Bool
accessible_vabits4_in_vabits8 ( Addr a
, UChar vabits8
)
5303 tl_assert(VG_IS_2_ALIGNED(a
)); // Must be 2-aligned
5304 shift
= (a
& 2) << 1; // shift by 0 or 4
5305 vabits8
>>= shift
; // shift the four bits to the bottom
5306 // check 2 x vabits2 != VA_BITS2_NOACCESS
5307 return ((0x3 & vabits8
) != VA_BITS2_NOACCESS
)
5308 && ((0xc & vabits8
) != VA_BITS2_NOACCESS
<< 2);
5312 void mc_STOREV16 ( Addr a
, UWord vbits16
, Bool isBigEndian
)
5314 PROF_EVENT(MCPE_STOREV16
);
5316 #ifndef PERF_FAST_STOREV
5317 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5320 UWord sm_off
, vabits8
;
5323 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,16) )) {
5324 PROF_EVENT(MCPE_STOREV16_SLOW1
);
5325 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5329 sm
= get_secmap_for_reading_low(a
);
5331 vabits8
= sm
->vabits8
[sm_off
];
5333 // To understand the below cleverness, see the extensive comments
5334 // in MC_(helperc_STOREV8).
5335 if (LIKELY(V_BITS16_DEFINED
== vbits16
)) {
5336 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) {
5339 if (!is_distinguished_sm(sm
)
5340 && accessible_vabits4_in_vabits8(a
, vabits8
)) {
5341 insert_vabits4_into_vabits8( a
, VA_BITS4_DEFINED
,
5342 &(sm
->vabits8
[sm_off
]) );
5345 PROF_EVENT(MCPE_STOREV16_SLOW2
);
5346 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5348 if (V_BITS16_UNDEFINED
== vbits16
) {
5349 if (vabits8
== VA_BITS8_UNDEFINED
) {
5352 if (!is_distinguished_sm(sm
)
5353 && accessible_vabits4_in_vabits8(a
, vabits8
)) {
5354 insert_vabits4_into_vabits8( a
, VA_BITS4_UNDEFINED
,
5355 &(sm
->vabits8
[sm_off
]) );
5358 PROF_EVENT(MCPE_STOREV16_SLOW3
);
5359 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5363 PROF_EVENT(MCPE_STOREV16_SLOW4
);
5364 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5370 VG_REGPARM(2) void MC_(helperc_STOREV16be
) ( Addr a
, UWord vbits16
)
5372 mc_STOREV16(a
, vbits16
, True
);
5374 VG_REGPARM(2) void MC_(helperc_STOREV16le
) ( Addr a
, UWord vbits16
)
5376 mc_STOREV16(a
, vbits16
, False
);
5379 /*------------------------------------------------------------*/
5381 /*------------------------------------------------------------*/
5383 /* Note: endianness is irrelevant for size == 1 */
5385 // Non-generic assembly for arm32-linux
5386 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5387 && defined(VGP_arm_linux)
5388 __asm__( /* Derived from NCode template */
5391 ".global vgMemCheck_helperc_LOADV8 \n"
5392 ".type vgMemCheck_helperc_LOADV8, %function \n"
5393 "vgMemCheck_helperc_LOADV8: \n" //
5394 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5395 " movw r3, #:lower16:primary_map \n" //
5396 " uxth r1, r0 \n" // r1 = sec-map-offB
5397 " movt r3, #:upper16:primary_map \n" //
5398 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5399 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5400 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5401 " bne .LLV8c0 \n" // no, goto .LLV8c0
5403 " mov r0, #0xFFFFFF00 \n" // V_BITS8_DEFINED | top24safe
5406 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5407 " bne .LLV8c4 \n" //
5409 " mov r0, #0xFFFFFFFF \n" // V_BITS8_UNDEFINED | top24safe
5412 // r1 holds sec-map-VABITS8
5413 // r0 holds the address. Extract the relevant 2 bits and inspect.
5414 " and r2, r0, #3 \n" // addr & 3
5415 " add r2, r2, r2 \n" // 2 * (addr & 3)
5416 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 3))
5417 " and r1, r1, #3 \n" // (sec-map-VABITS8 >> (2 * (addr & 3))) & 3
5419 " cmp r1, #2 \n" // VA_BITS2_DEFINED
5420 " beq .LLV8h9 \n" //
5422 " cmp r1, #1 \n" // VA_BITS2_UNDEFINED
5423 " beq .LLV8c2 \n" //
5425 " push {r4, lr} \n" //
5428 " bl mc_LOADVn_slow \n" //
5429 " pop {r4, pc} \n" //
5430 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8 \n"
5434 /* Non-generic assembly for x86-linux */
5435 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5436 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5440 ".global vgMemCheck_helperc_LOADV8\n"
5441 ".type vgMemCheck_helperc_LOADV8, @function\n"
5442 "vgMemCheck_helperc_LOADV8:\n"
5444 " shr $0x10, %edx\n"
5445 " mov primary_map(,%edx,4), %ecx\n"
5446 " movzwl %ax, %edx\n"
5448 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5449 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED? */
5450 " jne .LLV8LE2\n" /* jump if not defined */
5452 " mov $0xffffff00, %eax\n" /* V_BITS8_DEFINED | top24safe */
5455 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5456 " jne .LLV8LE4\n" /* jump if not all 32bits are undefined */
5458 " or $0xffffffff, %eax\n" /* V_BITS8_UNDEFINED | top24safe */
5467 " je .LLV8LE1\n" /* jump if all 8bits are defined */
5469 " je .LLV8LE3\n" /* jump if all 8bits are undefined */
5470 " xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 8, 0) */
5472 " jmp mc_LOADVn_slow\n"
5473 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8\n"
5478 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5480 UWord
MC_(helperc_LOADV8
) ( Addr a
)
5482 PROF_EVENT(MCPE_LOADV8
);
5484 #ifndef PERF_FAST_LOADV
5485 return (UWord
)mc_LOADVn_slow( a
, 8, False
/*irrelevant*/ );
5488 UWord sm_off
, vabits8
;
5491 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,8) )) {
5492 PROF_EVENT(MCPE_LOADV8_SLOW1
);
5493 return (UWord
)mc_LOADVn_slow( a
, 8, False
/*irrelevant*/ );
5496 sm
= get_secmap_for_reading_low(a
);
5498 vabits8
= sm
->vabits8
[sm_off
];
5499 // Convert V bits from compact memory form to expanded register form
5500 // Handle common case quickly: a is mapped, and the entire
5501 // word32 it lives in is addressible.
5502 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) { return V_BITS8_DEFINED
; }
5503 else if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
)) { return V_BITS8_UNDEFINED
; }
5505 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5507 UChar vabits2
= extract_vabits2_from_vabits8(a
, vabits8
);
5508 if (vabits2
== VA_BITS2_DEFINED
) { return V_BITS8_DEFINED
; }
5509 else if (vabits2
== VA_BITS2_UNDEFINED
) { return V_BITS8_UNDEFINED
; }
5511 /* Slow case: the byte is not all-defined or all-undefined. */
5512 PROF_EVENT(MCPE_LOADV8_SLOW2
);
5513 return (UWord
)mc_LOADVn_slow( a
, 8, False
/*irrelevant*/ );
5521 /*------------------------------------------------------------*/
5523 /*------------------------------------------------------------*/
5526 void MC_(helperc_STOREV8
) ( Addr a
, UWord vbits8
)
5528 PROF_EVENT(MCPE_STOREV8
);
5530 #ifndef PERF_FAST_STOREV
5531 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5534 UWord sm_off
, vabits8
;
5537 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,8) )) {
5538 PROF_EVENT(MCPE_STOREV8_SLOW1
);
5539 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5543 sm
= get_secmap_for_reading_low(a
);
5545 vabits8
= sm
->vabits8
[sm_off
];
5547 // Clevernesses to speed up storing V bits.
5548 // The 64/32/16 bit cases also have similar clevernesses, but it
5549 // works a little differently to the code below.
5551 // Cleverness 1: sometimes we don't have to write the shadow memory at
5552 // all, if we can tell that what we want to write is the same as what is
5553 // already there. These cases are marked below as "defined on defined" and
5554 // "undefined on undefined".
5557 // We also avoid to call mc_STOREVn_slow if the V bits can directly
5558 // be written in the secondary map. V bits can be directly written
5559 // if 4 conditions are respected:
5560 // * The address for which V bits are written is naturally aligned
5561 // on 1 byte for STOREV8 (this is always true)
5562 // on 2 bytes for STOREV16
5563 // on 4 bytes for STOREV32
5564 // on 8 bytes for STOREV64.
5565 // * V bits being written are either fully defined or fully undefined.
5566 // (for partially defined V bits, V bits cannot be directly written,
5567 // as the secondary vbits table must be maintained).
5568 // * the secmap is not distinguished (distinguished maps cannot be
5570 // * the memory corresponding to the V bits being written is
5571 // accessible (if one or more bytes are not accessible,
5572 // we must call mc_STOREVn_slow in order to report accessibility
5574 // Note that for STOREV32 and STOREV64, it is too expensive
5575 // to verify the accessibility of each byte for the benefit it
5576 // brings. Instead, a quicker check is done by comparing to
5577 // VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
5578 // but misses some opportunity of direct modifications.
5579 // Checking each byte accessibility was measured for
5580 // STOREV32+perf tests and was slowing down all perf tests.
5581 // The cases corresponding to cleverness 2 are marked below as
5583 if (LIKELY(V_BITS8_DEFINED
== vbits8
)) {
5584 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) {
5585 return; // defined on defined
5587 if (!is_distinguished_sm(sm
)
5588 && VA_BITS2_NOACCESS
!= extract_vabits2_from_vabits8(a
, vabits8
)) {
5590 insert_vabits2_into_vabits8( a
, VA_BITS2_DEFINED
,
5591 &(sm
->vabits8
[sm_off
]) );
5594 PROF_EVENT(MCPE_STOREV8_SLOW2
);
5595 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5598 if (V_BITS8_UNDEFINED
== vbits8
) {
5599 if (vabits8
== VA_BITS8_UNDEFINED
) {
5600 return; // undefined on undefined
5602 if (!is_distinguished_sm(sm
)
5603 && (VA_BITS2_NOACCESS
5604 != extract_vabits2_from_vabits8(a
, vabits8
))) {
5606 insert_vabits2_into_vabits8( a
, VA_BITS2_UNDEFINED
,
5607 &(sm
->vabits8
[sm_off
]) );
5610 PROF_EVENT(MCPE_STOREV8_SLOW3
);
5611 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5615 // Partially defined word
5616 PROF_EVENT(MCPE_STOREV8_SLOW4
);
5617 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5623 /*------------------------------------------------------------*/
5624 /*--- Functions called directly from generated code: ---*/
5625 /*--- Value-check failure handlers. ---*/
5626 /*------------------------------------------------------------*/
5628 /* Call these ones when an origin is available ... */
5630 void MC_(helperc_value_check0_fail_w_o
) ( UWord origin
) {
5631 MC_(record_cond_error
) ( VG_(get_running_tid
)(), (UInt
)origin
);
5635 void MC_(helperc_value_check1_fail_w_o
) ( UWord origin
) {
5636 MC_(record_value_error
) ( VG_(get_running_tid
)(), 1, (UInt
)origin
);
5640 void MC_(helperc_value_check4_fail_w_o
) ( UWord origin
) {
5641 MC_(record_value_error
) ( VG_(get_running_tid
)(), 4, (UInt
)origin
);
5645 void MC_(helperc_value_check8_fail_w_o
) ( UWord origin
) {
5646 MC_(record_value_error
) ( VG_(get_running_tid
)(), 8, (UInt
)origin
);
5650 void MC_(helperc_value_checkN_fail_w_o
) ( HWord sz
, UWord origin
) {
5651 MC_(record_value_error
) ( VG_(get_running_tid
)(), (Int
)sz
, (UInt
)origin
);
5654 /* ... and these when an origin isn't available. */
5657 void MC_(helperc_value_check0_fail_no_o
) ( void ) {
5658 MC_(record_cond_error
) ( VG_(get_running_tid
)(), 0/*origin*/ );
5662 void MC_(helperc_value_check1_fail_no_o
) ( void ) {
5663 MC_(record_value_error
) ( VG_(get_running_tid
)(), 1, 0/*origin*/ );
5667 void MC_(helperc_value_check4_fail_no_o
) ( void ) {
5668 MC_(record_value_error
) ( VG_(get_running_tid
)(), 4, 0/*origin*/ );
5672 void MC_(helperc_value_check8_fail_no_o
) ( void ) {
5673 MC_(record_value_error
) ( VG_(get_running_tid
)(), 8, 0/*origin*/ );
5677 void MC_(helperc_value_checkN_fail_no_o
) ( HWord sz
) {
5678 MC_(record_value_error
) ( VG_(get_running_tid
)(), (Int
)sz
, 0/*origin*/ );
5682 /*------------------------------------------------------------*/
5683 /*--- Metadata get/set functions, for client requests. ---*/
5684 /*------------------------------------------------------------*/
5686 // Nb: this expands the V+A bits out into register-form V bits, even though
5687 // they're in memory. This is for backward compatibility, and because it's
5688 // probably what the user wants.
5690 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
5691 error [no longer used], 3 == addressing error. */
5692 /* Nb: We used to issue various definedness/addressability errors from here,
5693 but we took them out because they ranged from not-very-helpful to
5694 downright annoying, and they complicated the error data structures. */
5695 static Int
mc_get_or_set_vbits_for_client (
5699 Bool setting
, /* True <=> set vbits, False <=> get vbits */
5700 Bool is_client_request
/* True <=> real user request
5701 False <=> internal call from gdbserver */
5708 /* Check that arrays are addressible before doing any getting/setting.
5709 vbits to be checked only for real user request. */
5710 for (i
= 0; i
< szB
; i
++) {
5711 if (VA_BITS2_NOACCESS
== get_vabits2(a
+ i
) ||
5712 (is_client_request
&& VA_BITS2_NOACCESS
== get_vabits2(vbits
+ i
))) {
5720 for (i
= 0; i
< szB
; i
++) {
5721 ok
= set_vbits8(a
+ i
, ((UChar
*)vbits
)[i
]);
5726 for (i
= 0; i
< szB
; i
++) {
5727 ok
= get_vbits8(a
+ i
, &vbits8
);
5729 ((UChar
*)vbits
)[i
] = vbits8
;
5731 if (is_client_request
)
5732 // The bytes in vbits[] have now been set, so mark them as such.
5733 MC_(make_mem_defined
)(vbits
, szB
);
5740 /*------------------------------------------------------------*/
5741 /*--- Detecting leaked (unreachable) malloc'd blocks. ---*/
5742 /*------------------------------------------------------------*/
5744 /* For the memory leak detector, say whether an entire 64k chunk of
5745 address space is possibly in use, or not. If in doubt return
5748 Bool
MC_(is_within_valid_secondary
) ( Addr a
)
5750 SecMap
* sm
= maybe_get_secmap_for ( a
);
5751 if (sm
== NULL
|| sm
== &sm_distinguished
[SM_DIST_NOACCESS
]) {
5752 /* Definitely not in use. */
5760 /* For the memory leak detector, say whether or not a given word
5761 address is to be regarded as valid. */
5762 Bool
MC_(is_valid_aligned_word
) ( Addr a
)
5764 tl_assert(sizeof(UWord
) == 4 || sizeof(UWord
) == 8);
5765 tl_assert(VG_IS_WORD_ALIGNED(a
));
5766 if (get_vabits8_for_aligned_word32 (a
) != VA_BITS8_DEFINED
)
5768 if (sizeof(UWord
) == 8) {
5769 if (get_vabits8_for_aligned_word32 (a
+ 4) != VA_BITS8_DEFINED
)
5772 if (UNLIKELY(MC_(in_ignored_range
)(a
)))
5779 /*------------------------------------------------------------*/
5780 /*--- Initialisation ---*/
5781 /*------------------------------------------------------------*/
5783 static void init_shadow_memory ( void )
5788 tl_assert(V_BIT_UNDEFINED
== 1);
5789 tl_assert(V_BIT_DEFINED
== 0);
5790 tl_assert(V_BITS8_UNDEFINED
== 0xFF);
5791 tl_assert(V_BITS8_DEFINED
== 0);
5793 /* Build the 3 distinguished secondaries */
5794 sm
= &sm_distinguished
[SM_DIST_NOACCESS
];
5795 for (i
= 0; i
< SM_CHUNKS
; i
++) sm
->vabits8
[i
] = VA_BITS8_NOACCESS
;
5797 sm
= &sm_distinguished
[SM_DIST_UNDEFINED
];
5798 for (i
= 0; i
< SM_CHUNKS
; i
++) sm
->vabits8
[i
] = VA_BITS8_UNDEFINED
;
5800 sm
= &sm_distinguished
[SM_DIST_DEFINED
];
5801 for (i
= 0; i
< SM_CHUNKS
; i
++) sm
->vabits8
[i
] = VA_BITS8_DEFINED
;
5803 /* Set up the primary map. */
5804 /* These entries gradually get overwritten as the used address
5806 for (i
= 0; i
< N_PRIMARY_MAP
; i
++)
5807 primary_map
[i
] = &sm_distinguished
[SM_DIST_NOACCESS
];
5809 /* Auxiliary primary maps */
5810 init_auxmap_L1_L2();
5812 /* auxmap_size = auxmap_used = 0;
5813 no ... these are statically initialised */
5815 /* Secondary V bit table */
5816 secVBitTable
= createSecVBitTable();
5820 /*------------------------------------------------------------*/
5821 /*--- Sanity check machinery (permanently engaged) ---*/
5822 /*------------------------------------------------------------*/
5824 static Bool
mc_cheap_sanity_check ( void )
5827 PROF_EVENT(MCPE_CHEAP_SANITY_CHECK
);
5828 /* Check for sane operating level */
5829 if (MC_(clo_mc_level
) < 1 || MC_(clo_mc_level
) > 3)
5831 /* nothing else useful we can rapidly check */
5835 static Bool
mc_expensive_sanity_check ( void )
5838 Word n_secmaps_found
;
5840 const HChar
* errmsg
;
5843 if (0) VG_(printf
)("expensive sanity check\n");
5846 n_sanity_expensive
++;
5847 PROF_EVENT(MCPE_EXPENSIVE_SANITY_CHECK
);
5849 /* Check for sane operating level */
5850 if (MC_(clo_mc_level
) < 1 || MC_(clo_mc_level
) > 3)
5853 /* Check that the 3 distinguished SMs are still as they should be. */
5855 /* Check noaccess DSM. */
5856 sm
= &sm_distinguished
[SM_DIST_NOACCESS
];
5857 for (i
= 0; i
< SM_CHUNKS
; i
++)
5858 if (sm
->vabits8
[i
] != VA_BITS8_NOACCESS
)
5861 /* Check undefined DSM. */
5862 sm
= &sm_distinguished
[SM_DIST_UNDEFINED
];
5863 for (i
= 0; i
< SM_CHUNKS
; i
++)
5864 if (sm
->vabits8
[i
] != VA_BITS8_UNDEFINED
)
5867 /* Check defined DSM. */
5868 sm
= &sm_distinguished
[SM_DIST_DEFINED
];
5869 for (i
= 0; i
< SM_CHUNKS
; i
++)
5870 if (sm
->vabits8
[i
] != VA_BITS8_DEFINED
)
5874 VG_(printf
)("memcheck expensive sanity: "
5875 "distinguished_secondaries have changed\n");
5879 /* If we're not checking for undefined value errors, the secondary V bit
5880 * table should be empty. */
5881 if (MC_(clo_mc_level
) == 1) {
5882 if (0 != VG_(OSetGen_Size
)(secVBitTable
))
5886 /* check the auxiliary maps, very thoroughly */
5887 n_secmaps_found
= 0;
5888 errmsg
= check_auxmap_L1_L2_sanity( &n_secmaps_found
);
5890 VG_(printf
)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg
);
5894 /* n_secmaps_found is now the number referred to by the auxiliary
5895 primary map. Now add on the ones referred to by the main
5897 for (i
= 0; i
< N_PRIMARY_MAP
; i
++) {
5898 if (primary_map
[i
] == NULL
) {
5901 if (!is_distinguished_sm(primary_map
[i
]))
5906 /* check that the number of secmaps issued matches the number that
5907 are reachable (iow, no secmap leaks) */
5908 if (n_secmaps_found
!= (n_issued_SMs
- n_deissued_SMs
))
5912 VG_(printf
)("memcheck expensive sanity: "
5913 "apparent secmap leakage\n");
5918 VG_(printf
)("memcheck expensive sanity: "
5919 "auxmap covers wrong address space\n");
5923 /* there is only one pointer to each secmap (expensive) */
5928 /*------------------------------------------------------------*/
5929 /*--- Command line args ---*/
5930 /*------------------------------------------------------------*/
5932 /* 31 Aug 2015: Vectorised code is now so widespread that
5933 --partial-loads-ok needs to be enabled by default on all platforms.
5934 Not doing so causes lots of false errors. */
5935 Bool
MC_(clo_partial_loads_ok
) = True
;
5936 Long
MC_(clo_freelist_vol
) = 20*1000*1000LL;
5937 Long
MC_(clo_freelist_big_blocks
) = 1*1000*1000LL;
5938 LeakCheckMode
MC_(clo_leak_check
) = LC_Summary
;
5939 VgRes
MC_(clo_leak_resolution
) = Vg_HighRes
;
5940 UInt
MC_(clo_show_leak_kinds
) = R2S(Possible
) | R2S(Unreached
);
5941 UInt
MC_(clo_error_for_leak_kinds
) = R2S(Possible
) | R2S(Unreached
);
5942 UInt
MC_(clo_leak_check_heuristics
) = H2S(LchStdString
)
5945 | H2S( LchMultipleInheritance
);
5946 Bool
MC_(clo_xtree_leak
) = False
;
5947 const HChar
* MC_(clo_xtree_leak_file
) = "xtleak.kcg.%p";
5948 Bool
MC_(clo_workaround_gcc296_bugs
) = False
;
5949 Int
MC_(clo_malloc_fill
) = -1;
5950 Int
MC_(clo_free_fill
) = -1;
5951 KeepStacktraces
MC_(clo_keep_stacktraces
) = KS_alloc_and_free
;
5952 Int
MC_(clo_mc_level
) = 2;
5953 Bool
MC_(clo_show_mismatched_frees
) = True
;
5955 ExpensiveDefinednessChecks
5956 MC_(clo_expensive_definedness_checks
) = EdcAUTO
;
5958 Bool
MC_(clo_ignore_range_below_sp
) = False
;
5959 UInt
MC_(clo_ignore_range_below_sp__first_offset
) = 0;
5960 UInt
MC_(clo_ignore_range_below_sp__last_offset
) = 0;
5962 static const HChar
* MC_(parse_leak_heuristics_tokens
) =
5963 "-,stdstring,length64,newarray,multipleinheritance";
5964 /* The first heuristic value (LchNone) has no keyword, as this is
5965 a fake heuristic used to collect the blocks found without any
5968 static Bool
mc_process_cmd_line_options(const HChar
* arg
)
5970 const HChar
* tmp_str
;
5973 tl_assert( MC_(clo_mc_level
) >= 1 && MC_(clo_mc_level
) <= 3 );
5975 /* Set MC_(clo_mc_level):
5976 1 = A bit tracking only
5977 2 = A and V bit tracking, but no V bit origins
5978 3 = A and V bit tracking, and V bit origins
5980 Do this by inspecting --undef-value-errors= and
5981 --track-origins=. Reject the case --undef-value-errors=no
5982 --track-origins=yes as meaningless.
5984 if VG_BOOL_CLO(arg
, "--undef-value-errors", tmp_show
) {
5986 if (MC_(clo_mc_level
) == 1)
5987 MC_(clo_mc_level
) = 2;
5989 if (MC_(clo_mc_level
) == 3) {
5992 MC_(clo_mc_level
) = 1;
5996 else if VG_BOOL_CLO(arg
, "--track-origins", tmp_show
) {
5998 if (MC_(clo_mc_level
) == 1) {
6001 MC_(clo_mc_level
) = 3;
6004 if (MC_(clo_mc_level
) == 3)
6005 MC_(clo_mc_level
) = 2;
6008 else if VG_BOOL_CLO(arg
, "--partial-loads-ok", MC_(clo_partial_loads_ok
)) {}
6009 else if VG_USET_CLOM(cloPD
, arg
, "--errors-for-leak-kinds",
6010 MC_(parse_leak_kinds_tokens
),
6011 MC_(clo_error_for_leak_kinds
)) {}
6012 else if VG_USET_CLOM(cloPD
, arg
, "--show-leak-kinds",
6013 MC_(parse_leak_kinds_tokens
),
6014 MC_(clo_show_leak_kinds
)) {}
6015 else if VG_USET_CLOM(cloPD
, arg
, "--leak-check-heuristics",
6016 MC_(parse_leak_heuristics_tokens
),
6017 MC_(clo_leak_check_heuristics
)) {}
6018 else if (VG_BOOL_CLOM(cloPD
, arg
, "--show-reachable", tmp_show
)) {
6020 MC_(clo_show_leak_kinds
) = MC_(all_Reachedness
)();
6022 MC_(clo_show_leak_kinds
) &= ~R2S(Reachable
);
6025 else if VG_BOOL_CLOM(cloPD
, arg
, "--show-possibly-lost", tmp_show
) {
6027 MC_(clo_show_leak_kinds
) |= R2S(Possible
);
6029 MC_(clo_show_leak_kinds
) &= ~R2S(Possible
);
6032 else if VG_BOOL_CLO(arg
, "--workaround-gcc296-bugs",
6033 MC_(clo_workaround_gcc296_bugs
)) {}
6035 else if VG_BINT_CLOM(cloPD
, arg
, "--freelist-vol", MC_(clo_freelist_vol
),
6036 0, 10*1000*1000*1000LL) {}
6038 else if VG_BINT_CLOM(cloPD
, arg
, "--freelist-big-blocks",
6039 MC_(clo_freelist_big_blocks
),
6040 0, 10*1000*1000*1000LL) {}
6042 else if VG_XACT_CLOM(cloPD
, arg
, "--leak-check=no",
6043 MC_(clo_leak_check
), LC_Off
) {}
6044 else if VG_XACT_CLOM(cloPD
, arg
, "--leak-check=summary",
6045 MC_(clo_leak_check
), LC_Summary
) {}
6046 else if VG_XACT_CLOM(cloPD
, arg
, "--leak-check=yes",
6047 MC_(clo_leak_check
), LC_Full
) {}
6048 else if VG_XACT_CLOM(cloPD
, arg
, "--leak-check=full",
6049 MC_(clo_leak_check
), LC_Full
) {}
6051 else if VG_XACT_CLO(arg
, "--leak-resolution=low",
6052 MC_(clo_leak_resolution
), Vg_LowRes
) {}
6053 else if VG_XACT_CLO(arg
, "--leak-resolution=med",
6054 MC_(clo_leak_resolution
), Vg_MedRes
) {}
6055 else if VG_XACT_CLO(arg
, "--leak-resolution=high",
6056 MC_(clo_leak_resolution
), Vg_HighRes
) {}
6058 else if VG_STR_CLOM(cloPD
, arg
, "--ignore-ranges", tmp_str
) {
6059 Bool ok
= parse_ignore_ranges(tmp_str
);
6061 VG_(message
)(Vg_DebugMsg
,
6062 "ERROR: --ignore-ranges: "
6063 "invalid syntax, or end <= start in range\n");
6066 if (gIgnoredAddressRanges
) {
6068 for (i
= 0; i
< VG_(sizeRangeMap
)(gIgnoredAddressRanges
); i
++) {
6069 UWord val
= IAR_INVALID
;
6070 UWord key_min
= ~(UWord
)0;
6071 UWord key_max
= (UWord
)0;
6072 VG_(indexRangeMap
)( &key_min
, &key_max
, &val
,
6073 gIgnoredAddressRanges
, i
);
6074 tl_assert(key_min
<= key_max
);
6075 UWord limit
= 0x4000000; /* 64M - entirely arbitrary limit */
6076 if (key_max
- key_min
> limit
&& val
== IAR_CommandLine
) {
6077 VG_(message
)(Vg_DebugMsg
,
6078 "ERROR: --ignore-ranges: suspiciously large range:\n");
6079 VG_(message
)(Vg_DebugMsg
,
6080 " 0x%lx-0x%lx (size %lu)\n", key_min
, key_max
,
6081 key_max
- key_min
+ 1);
6088 else if VG_STR_CLOM(cloPD
, arg
, "--ignore-range-below-sp", tmp_str
) {
6089 /* This seems at first a bit weird, but: in order to imply
6090 a non-wrapped-around address range, the first offset needs to be
6091 larger than the second one. For example
6092 --ignore-range-below-sp=8192,8189
6093 would cause accesses to in the range [SP-8192, SP-8189] to be
6095 UInt offs1
= 0, offs2
= 0;
6096 Bool ok
= parse_UInt_pair(&tmp_str
, &offs1
, &offs2
);
6097 // Ensure we used all the text after the '=' sign.
6098 if (ok
&& *tmp_str
!= 0) ok
= False
;
6100 VG_(message
)(Vg_DebugMsg
,
6101 "ERROR: --ignore-range-below-sp: invalid syntax. "
6102 " Expected \"...=decimalnumber-decimalnumber\".\n");
6105 if (offs1
> 1000*1000 /*arbitrary*/ || offs2
> 1000*1000 /*ditto*/) {
6106 VG_(message
)(Vg_DebugMsg
,
6107 "ERROR: --ignore-range-below-sp: suspiciously large "
6108 "offset(s): %u and %u\n", offs1
, offs2
);
6111 if (offs1
<= offs2
) {
6112 VG_(message
)(Vg_DebugMsg
,
6113 "ERROR: --ignore-range-below-sp: invalid offsets "
6114 "(the first must be larger): %u and %u\n", offs1
, offs2
);
6117 tl_assert(offs1
> offs2
);
6118 if (offs1
- offs2
> 4096 /*arbitrary*/) {
6119 VG_(message
)(Vg_DebugMsg
,
6120 "ERROR: --ignore-range-below-sp: suspiciously large "
6121 "range: %u-%u (size %u)\n", offs1
, offs2
, offs1
- offs2
);
6124 MC_(clo_ignore_range_below_sp
) = True
;
6125 MC_(clo_ignore_range_below_sp__first_offset
) = offs1
;
6126 MC_(clo_ignore_range_below_sp__last_offset
) = offs2
;
6130 else if VG_BHEX_CLO(arg
, "--malloc-fill", MC_(clo_malloc_fill
), 0x00,0xFF) {}
6131 else if VG_BHEX_CLO(arg
, "--free-fill", MC_(clo_free_fill
), 0x00,0xFF) {}
6133 else if VG_XACT_CLO(arg
, "--keep-stacktraces=alloc",
6134 MC_(clo_keep_stacktraces
), KS_alloc
) {}
6135 else if VG_XACT_CLO(arg
, "--keep-stacktraces=free",
6136 MC_(clo_keep_stacktraces
), KS_free
) {}
6137 else if VG_XACT_CLO(arg
, "--keep-stacktraces=alloc-and-free",
6138 MC_(clo_keep_stacktraces
), KS_alloc_and_free
) {}
6139 else if VG_XACT_CLO(arg
, "--keep-stacktraces=alloc-then-free",
6140 MC_(clo_keep_stacktraces
), KS_alloc_then_free
) {}
6141 else if VG_XACT_CLO(arg
, "--keep-stacktraces=none",
6142 MC_(clo_keep_stacktraces
), KS_none
) {}
6144 else if VG_BOOL_CLOM(cloPD
, arg
, "--show-mismatched-frees",
6145 MC_(clo_show_mismatched_frees
)) {}
6147 else if VG_XACT_CLO(arg
, "--expensive-definedness-checks=no",
6148 MC_(clo_expensive_definedness_checks
), EdcNO
) {}
6149 else if VG_XACT_CLO(arg
, "--expensive-definedness-checks=auto",
6150 MC_(clo_expensive_definedness_checks
), EdcAUTO
) {}
6151 else if VG_XACT_CLO(arg
, "--expensive-definedness-checks=yes",
6152 MC_(clo_expensive_definedness_checks
), EdcYES
) {}
6154 else if VG_BOOL_CLO(arg
, "--xtree-leak",
6155 MC_(clo_xtree_leak
)) {}
6156 else if VG_STR_CLO (arg
, "--xtree-leak-file",
6157 MC_(clo_xtree_leak_file
)) {}
6160 return VG_(replacement_malloc_process_cmd_line_option
)(arg
);
6166 VG_(fmsg_bad_option
)(arg
,
6167 "--track-origins=yes has no effect when --undef-value-errors=no.\n");
6171 static void mc_print_usage(void)
6174 " --leak-check=no|summary|full search for memory leaks at exit? [summary]\n"
6175 " --leak-resolution=low|med|high differentiation of leak stack traces [high]\n"
6176 " --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
6177 " [definite,possible]\n"
6178 " --errors-for-leak-kinds=kind1,kind2,.. which leak kinds are errors?\n"
6179 " [definite,possible]\n"
6180 " where kind is one of:\n"
6181 " definite indirect possible reachable all none\n"
6182 " --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
6183 " improving leak search false positive [all]\n"
6184 " where heur is one of:\n"
6185 " stdstring length64 newarray multipleinheritance all none\n"
6186 " --show-reachable=yes same as --show-leak-kinds=all\n"
6187 " --show-reachable=no --show-possibly-lost=yes\n"
6188 " same as --show-leak-kinds=definite,possible\n"
6189 " --show-reachable=no --show-possibly-lost=no\n"
6190 " same as --show-leak-kinds=definite\n"
6191 " --xtree-leak=no|yes output leak result in xtree format? [no]\n"
6192 " --xtree-leak-file=<file> xtree leak report file [xtleak.kcg.%%p]\n"
6193 " --undef-value-errors=no|yes check for undefined value errors [yes]\n"
6194 " --track-origins=no|yes show origins of undefined values? [no]\n"
6195 " --partial-loads-ok=no|yes too hard to explain here; see manual [yes]\n"
6196 " --expensive-definedness-checks=no|auto|yes\n"
6197 " Use extra-precise definedness tracking [auto]\n"
6198 " --freelist-vol=<number> volume of freed blocks queue [20000000]\n"
6199 " --freelist-big-blocks=<number> releases first blocks with size>= [1000000]\n"
6200 " --workaround-gcc296-bugs=no|yes self explanatory [no]. Deprecated.\n"
6201 " Use --ignore-range-below-sp instead.\n"
6202 " --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS] assume given addresses are OK\n"
6203 " --ignore-range-below-sp=<number>-<number> do not report errors for\n"
6204 " accesses at the given offsets below SP\n"
6205 " --malloc-fill=<hexnumber> fill malloc'd areas with given value\n"
6206 " --free-fill=<hexnumber> fill free'd areas with given value\n"
6207 " --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
6208 " stack trace(s) to keep for malloc'd/free'd areas [alloc-and-free]\n"
6209 " --show-mismatched-frees=no|yes show frees that don't match the allocator? [yes]\n"
6213 static void mc_print_debug_usage(void)
6221 /*------------------------------------------------------------*/
6222 /*--- Client blocks ---*/
6223 /*------------------------------------------------------------*/
6225 /* Client block management:
6227 This is managed as an expanding array of client block descriptors.
6228 Indices of live descriptors are issued to the client, so it can ask
6229 to free them later. Therefore we cannot slide live entries down
6230 over dead ones. Instead we must use free/inuse flags and scan for
6231 an empty slot at allocation time. This in turn means allocation is
6232 relatively expensive, so we hope this does not happen too often.
6234 An unused block has start == size == 0
6237 /* type CGenBlock is defined in mc_include.h */
6239 /* This subsystem is self-initialising. */
6240 static UWord cgb_size
= 0;
6241 static UWord cgb_used
= 0;
6242 static CGenBlock
* cgbs
= NULL
;
6244 /* Stats for this subsystem. */
6245 static ULong cgb_used_MAX
= 0; /* Max in use. */
6246 static ULong cgb_allocs
= 0; /* Number of allocs. */
6247 static ULong cgb_discards
= 0; /* Number of discards. */
6248 static ULong cgb_search
= 0; /* Number of searches. */
6251 /* Get access to the client block array. */
6252 void MC_(get_ClientBlock_array
)( /*OUT*/CGenBlock
** blocks
,
6253 /*OUT*/UWord
* nBlocks
)
6256 *nBlocks
= cgb_used
;
6261 Int
alloc_client_block ( void )
6264 CGenBlock
* cgbs_new
;
6268 for (i
= 0; i
< cgb_used
; i
++) {
6270 if (cgbs
[i
].start
== 0 && cgbs
[i
].size
== 0)
6274 /* Not found. Try to allocate one at the end. */
6275 if (cgb_used
< cgb_size
) {
6280 /* Ok, we have to allocate a new one. */
6281 tl_assert(cgb_used
== cgb_size
);
6282 sz_new
= (cgbs
== NULL
) ? 10 : (2 * cgb_size
);
6284 cgbs_new
= VG_(malloc
)( "mc.acb.1", sz_new
* sizeof(CGenBlock
) );
6285 for (i
= 0; i
< cgb_used
; i
++)
6286 cgbs_new
[i
] = cgbs
[i
];
6294 if (cgb_used
> cgb_used_MAX
)
6295 cgb_used_MAX
= cgb_used
;
6300 static void show_client_block_stats ( void )
6302 VG_(message
)(Vg_DebugMsg
,
6303 "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
6304 cgb_allocs
, cgb_discards
, cgb_used_MAX
, cgb_search
6307 static void print_monitor_help ( void )
6312 "memcheck monitor commands:\n"
6313 " xb <addr> [<len>]\n"
6314 " prints validity bits for <len> (or 1) bytes at <addr>\n"
6315 " bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
6316 " Then prints the bytes values below the corresponding validity bits\n"
6317 " in a layout similar to the gdb command 'x /<len>xb <addr>'\n"
6318 " Example: xb 0x8049c78 10\n"
6319 " get_vbits <addr> [<len>]\n"
6320 " Similar to xb, but only prints the validity bytes by group of 4.\n"
6321 " make_memory [noaccess|undefined\n"
6322 " |defined|Definedifaddressable] <addr> [<len>]\n"
6323 " mark <len> (or 1) bytes at <addr> with the given accessibility\n"
6324 " check_memory [addressable|defined] <addr> [<len>]\n"
6325 " check that <len> (or 1) bytes at <addr> have the given accessibility\n"
6326 " and outputs a description of <addr>\n"
6327 " leak_check [full*|summary|xtleak]\n"
6328 " [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
6329 " [heuristics heur1,heur2,...]\n"
6330 " [increased*|changed|any]\n"
6331 " [unlimited*|limited <max_loss_records_output>]\n"
6333 " xtleak produces an xtree full leak result in xtleak.kcg.%%p.%%n\n"
6334 " where kind is one of:\n"
6335 " definite indirect possible reachable all none\n"
6336 " where heur is one of:\n"
6337 " stdstring length64 newarray multipleinheritance all none*\n"
6338 " Examples: leak_check\n"
6339 " leak_check summary any\n"
6340 " leak_check full kinds indirect,possible\n"
6341 " leak_check full reachable any limited 100\n"
6342 " block_list <loss_record_nr>|<loss_record_nr_from>..<loss_record_nr_to>\n"
6343 " [unlimited*|limited <max_blocks>]\n"
6344 " [heuristics heur1,heur2,...]\n"
6345 " after a leak search, shows the list of blocks of <loss_record_nr>\n"
6346 " (or of the range <loss_record_nr_from>..<loss_record_nr_to>).\n"
6347 " With heuristics, only shows the blocks found via heur1,heur2,...\n"
6349 " who_points_at <addr> [<len>]\n"
6350 " shows places pointing inside <len> (default 1) bytes at <addr>\n"
6351 " (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
6352 " with len > 1, will also show \"interior pointers\")\n"
6353 " xtmemory [<filename>]\n"
6354 " dump xtree memory profile in <filename> (default xtmemory.kcg.%%p.%%n)\n"
6358 /* Print szB bytes at address, with a format similar to the gdb command
6360 res[i] == 1 indicates the corresponding byte is addressable. */
6361 static void gdb_xb (Addr address
, SizeT szB
, Int res
[])
6365 for (i
= 0; i
< szB
; i
++) {
6369 VG_(printf
) ("\n"); // Terminate previous line
6370 VG_(printf
) ("%p:", (void*)(address
+i
));
6373 VG_(printf
) ("\t0x%02x", *(UChar
*)(address
+i
));
6375 VG_(printf
) ("\t0x??");
6377 VG_(printf
) ("\n"); // Terminate previous line
6381 /* Returns the address of the next non space character,
6382 or address of the string terminator. */
6383 static HChar
* next_non_space (HChar
*s
)
6385 while (*s
&& *s
== ' ')
6390 /* Parse an integer slice, i.e. a single integer or a range of integer.
6392 <integer>[..<integer> ]
6393 (spaces are allowed before and/or after ..).
6394 Return True if range correctly parsed, False otherwise. */
6395 static Bool
VG_(parse_slice
) (HChar
* s
, HChar
** saveptr
,
6396 UInt
*from
, UInt
*to
)
6401 wl
= VG_(strtok_r
) (s
, " ", saveptr
);
6403 /* slice must start with an integer. */
6405 VG_(gdb_printf
) ("expecting integer or slice <from>..<to>\n");
6408 *from
= VG_(strtoull10
) (wl
, &endptr
);
6410 VG_(gdb_printf
) ("invalid integer or slice <from>..<to>\n");
6414 if (*endptr
== '\0' && *next_non_space(*saveptr
) != '.') {
6415 /* wl token is an integer terminating the string
6416 or else next token does not start with .
6417 In both cases, the slice is a single integer. */
6422 if (*endptr
== '\0') {
6423 // iii .. => get the next token
6424 wl
= VG_(strtok_r
) (NULL
, " .", saveptr
);
6427 if (*endptr
!= '.' && *(endptr
+1) != '.') {
6428 VG_(gdb_printf
) ("expecting slice <from>..<to>\n");
6431 if ( *(endptr
+2) == ' ') {
6432 // It must be iii.. jjj => get the next token
6433 wl
= VG_(strtok_r
) (NULL
, " .", saveptr
);
6435 // It must be iii..jjj
6440 *to
= VG_(strtoull10
) (wl
, &endptr
);
6441 if (*endptr
!= '\0') {
6442 VG_(gdb_printf
) ("missing/wrong 'to' of slice <from>..<to>\n");
6447 VG_(gdb_printf
) ("<from> cannot be bigger than <to> "
6448 "in slice <from>..<to>\n");
6455 /* return True if request recognised, False otherwise */
6456 static Bool
handle_gdb_monitor_command (ThreadId tid
, HChar
*req
)
6459 HChar s
[VG_(strlen
)(req
) + 1]; /* copy for strtok_r */
6462 VG_(strcpy
) (s
, req
);
6464 wcmd
= VG_(strtok_r
) (s
, " ", &ssaveptr
);
6465 /* NB: if possible, avoid introducing a new command below which
6466 starts with the same first letter(s) as an already existing
6467 command. This ensures a shorter abbreviation for the user. */
6468 switch (VG_(keyword_id
)
6469 ("help get_vbits leak_check make_memory check_memory "
6470 "block_list who_points_at xb xtmemory",
6471 wcmd
, kwd_report_duplicated_matches
)) {
6472 case -2: /* multiple matches */
6474 case -1: /* not found */
6477 print_monitor_help();
6479 case 1: { /* get_vbits */
6482 if (VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
)) {
6485 Int unaddressable
= 0;
6486 for (i
= 0; i
< szB
; i
++) {
6487 Int res
= mc_get_or_set_vbits_for_client
6488 (address
+i
, (Addr
) &vbits
, 1,
6489 False
, /* get them */
6490 False
/* is client request */ );
6491 /* we are before the first character on next line, print a \n. */
6492 if ((i
% 32) == 0 && i
!= 0)
6494 /* we are before the next block of 4 starts, print a space. */
6495 else if ((i
% 4) == 0 && i
!= 0)
6498 VG_(printf
) ("%02x", vbits
);
6500 tl_assert(3 == res
);
6506 if (unaddressable
) {
6508 ("Address %p len %lu has %d bytes unaddressable\n",
6509 (void *)address
, szB
, unaddressable
);
6514 case 2: { /* leak_check */
6516 LeakCheckParams lcp
;
6517 HChar
* xt_filename
= NULL
;
6521 lcp
.show_leak_kinds
= R2S(Possible
) | R2S(Unreached
);
6522 lcp
.errors_for_leak_kinds
= 0; // no errors for interactive leak search.
6524 lcp
.deltamode
= LCD_Increased
;
6525 lcp
.max_loss_records_output
= 999999999;
6526 lcp
.requested_by_monitor_command
= True
;
6527 lcp
.xt_filename
= NULL
;
6529 for (kw
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6531 kw
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
)) {
6532 switch (VG_(keyword_id
)
6533 ("full summary xtleak "
6534 "kinds reachable possibleleak definiteleak "
6536 "increased changed any "
6537 "unlimited limited ",
6538 kw
, kwd_report_all
)) {
6539 case -2: err
++; break;
6540 case -1: err
++; break;
6542 lcp
.mode
= LC_Full
; break;
6543 case 1: /* summary */
6544 lcp
.mode
= LC_Summary
; break;
6545 case 2: /* xtleak */
6548 = VG_(expand_file_name
)("--xtleak-mc_main.c",
6549 "xtleak.kcg.%p.%n");
6550 lcp
.xt_filename
= xt_filename
;
6552 case 3: { /* kinds */
6553 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6555 || !VG_(parse_enum_set
)(MC_(parse_leak_kinds_tokens
),
6558 &lcp
.show_leak_kinds
)) {
6559 VG_(gdb_printf
) ("missing or malformed leak kinds set\n");
6564 case 4: /* reachable */
6565 lcp
.show_leak_kinds
= MC_(all_Reachedness
)();
6567 case 5: /* possibleleak */
6569 = R2S(Possible
) | R2S(IndirectLeak
) | R2S(Unreached
);
6571 case 6: /* definiteleak */
6572 lcp
.show_leak_kinds
= R2S(Unreached
);
6574 case 7: { /* heuristics */
6575 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6577 || !VG_(parse_enum_set
)(MC_(parse_leak_heuristics_tokens
),
6581 VG_(gdb_printf
) ("missing or malformed heuristics set\n");
6586 case 8: /* increased */
6587 lcp
.deltamode
= LCD_Increased
; break;
6588 case 9: /* changed */
6589 lcp
.deltamode
= LCD_Changed
; break;
6591 lcp
.deltamode
= LCD_Any
; break;
6592 case 11: /* unlimited */
6593 lcp
.max_loss_records_output
= 999999999; break;
6594 case 12: { /* limited */
6596 const HChar
* endptr
;
6598 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6601 endptr
= "empty"; /* to report an error below */
6604 int_value
= VG_(strtoll10
) (wcmd
, &the_end
);
6607 if (*endptr
!= '\0')
6608 VG_(gdb_printf
) ("missing or malformed integer value\n");
6609 else if (int_value
> 0)
6610 lcp
.max_loss_records_output
= (UInt
) int_value
;
6612 VG_(gdb_printf
) ("max_loss_records_output must be >= 1,"
6613 " got %d\n", int_value
);
6621 MC_(detect_memory_leaks
)(tid
, &lcp
);
6622 if (xt_filename
!= NULL
)
6623 VG_(free
)(xt_filename
);
6627 case 3: { /* make_memory */
6630 Int kwdid
= VG_(keyword_id
)
6631 ("noaccess undefined defined Definedifaddressable",
6632 VG_(strtok_r
) (NULL
, " ", &ssaveptr
), kwd_report_all
);
6633 if (!VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
))
6638 case 0: MC_(make_mem_noaccess
) (address
, szB
); break;
6639 case 1: make_mem_undefined_w_tid_and_okind ( address
, szB
, tid
,
6640 MC_OKIND_USER
); break;
6641 case 2: MC_(make_mem_defined
) ( address
, szB
); break;
6642 case 3: make_mem_defined_if_addressable ( address
, szB
); break;;
6643 default: tl_assert(0);
6648 case 4: { /* check_memory */
6656 ExeContext
* origin_ec
;
6659 Int kwdid
= VG_(keyword_id
)
6660 ("addressable defined",
6661 VG_(strtok_r
) (NULL
, " ", &ssaveptr
), kwd_report_all
);
6662 if (!VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
))
6667 case 0: /* addressable */
6668 if (is_mem_addressable ( address
, szB
, &bad_addr
))
6669 VG_(printf
) ("Address %p len %lu addressable\n",
6670 (void *)address
, szB
);
6673 ("Address %p len %lu not addressable:\nbad address %p\n",
6674 (void *)address
, szB
, (void *) bad_addr
);
6675 // Describe this (probably live) address with current epoch
6676 MC_(pp_describe_addr
) (VG_(current_DiEpoch
)(), address
);
6678 case 1: /* defined */
6679 res
= is_mem_defined ( address
, szB
, &bad_addr
, &otag
);
6680 if (MC_AddrErr
== res
)
6682 ("Address %p len %lu not addressable:\nbad address %p\n",
6683 (void *)address
, szB
, (void *) bad_addr
);
6684 else if (MC_ValueErr
== res
) {
6687 case MC_OKIND_STACK
:
6688 src
= " was created by a stack allocation"; break;
6690 src
= " was created by a heap allocation"; break;
6692 src
= " was created by a client request"; break;
6693 case MC_OKIND_UNKNOWN
:
6695 default: tl_assert(0);
6698 ("Address %p len %lu not defined:\n"
6699 "Uninitialised value at %p%s\n",
6700 (void *)address
, szB
, (void *) bad_addr
, src
);
6702 if (VG_(is_plausible_ECU
)(ecu
)) {
6703 origin_ec
= VG_(get_ExeContext_from_ECU
)( ecu
);
6704 VG_(pp_ExeContext
)( origin_ec
);
6708 VG_(printf
) ("Address %p len %lu defined\n",
6709 (void *)address
, szB
);
6710 // Describe this (probably live) address with current epoch
6711 MC_(pp_describe_addr
) (VG_(current_DiEpoch
)(), address
);
6713 default: tl_assert(0);
6718 case 5: { /* block_list */
6721 UInt lr_nr_from
= 0;
6724 if (VG_(parse_slice
) (NULL
, &ssaveptr
, &lr_nr_from
, &lr_nr_to
)) {
6725 UInt limit_blocks
= 999999999;
6727 UInt heuristics
= 0;
6729 for (wl
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6731 wl
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
)) {
6732 switch (VG_(keyword_id
) ("unlimited limited heuristics ",
6733 wl
, kwd_report_all
)) {
6734 case -2: return True
;
6735 case -1: return True
;
6736 case 0: /* unlimited */
6737 limit_blocks
= 999999999; break;
6738 case 1: /* limited */
6739 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6741 VG_(gdb_printf
) ("missing integer value\n");
6744 int_value
= VG_(strtoll10
) (wcmd
, &the_end
);
6745 if (*the_end
!= '\0') {
6746 VG_(gdb_printf
) ("malformed integer value\n");
6749 if (int_value
<= 0) {
6750 VG_(gdb_printf
) ("max_blocks must be >= 1,"
6751 " got %d\n", int_value
);
6754 limit_blocks
= (UInt
) int_value
;
6756 case 2: /* heuristics */
6757 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6759 || !VG_(parse_enum_set
)(MC_(parse_leak_heuristics_tokens
),
6763 VG_(gdb_printf
) ("missing or malformed heuristics set\n");
6771 /* substract 1 from lr_nr_from/lr_nr_to as what is shown to the user
6772 is 1 more than the index in lr_array. */
6773 if (lr_nr_from
== 0 || ! MC_(print_block_list
) (lr_nr_from
-1,
6777 VG_(gdb_printf
) ("invalid loss record nr\n");
6782 case 6: { /* who_points_at */
6786 if (!VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
))
6788 if (address
== (Addr
) 0) {
6789 VG_(gdb_printf
) ("Cannot search who points at 0x0\n");
6792 MC_(who_points_at
) (address
, szB
);
6799 if (VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
)) {
6803 Int unaddressable
= 0;
6804 for (i
= 0; i
< szB
; i
++) {
6806 /* We going to print the first vabits of a new line.
6807 Terminate the previous line if needed: prints a line with the
6808 address and the data. */
6812 gdb_xb (address
+ i
- 8, 8, res
);
6814 VG_(printf
) ("\t"); // To align VABITS with gdb_xb layout
6816 res
[bnr
] = mc_get_or_set_vbits_for_client
6817 (address
+i
, (Addr
) &vbits
[bnr
], 1,
6818 False
, /* get them */
6819 False
/* is client request */ );
6820 if (res
[bnr
] == 1) {
6821 VG_(printf
) ("\t %02x", vbits
[bnr
]);
6823 tl_assert(3 == res
[bnr
]);
6825 VG_(printf
) ("\t __");
6829 if (szB
% 8 == 0 && szB
> 0)
6830 gdb_xb (address
+ szB
- 8, 8, res
);
6832 gdb_xb (address
+ szB
- szB
% 8, szB
% 8, res
);
6833 if (unaddressable
) {
6835 ("Address %p len %lu has %d bytes unaddressable\n",
6836 (void *)address
, szB
, unaddressable
);
6842 case 8: { /* xtmemory */
6844 filename
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6845 MC_(xtmemory_report
)(filename
, False
);
6855 /*------------------------------------------------------------*/
6856 /*--- Client requests ---*/
6857 /*------------------------------------------------------------*/
6859 static Bool
mc_handle_client_request ( ThreadId tid
, UWord
* arg
, UWord
* ret
)
6864 if (!VG_IS_TOOL_USERREQ('M','C',arg
[0])
6865 && VG_USERREQ__MALLOCLIKE_BLOCK
!= arg
[0]
6866 && VG_USERREQ__RESIZEINPLACE_BLOCK
!= arg
[0]
6867 && VG_USERREQ__FREELIKE_BLOCK
!= arg
[0]
6868 && VG_USERREQ__CREATE_MEMPOOL
!= arg
[0]
6869 && VG_USERREQ__DESTROY_MEMPOOL
!= arg
[0]
6870 && VG_USERREQ__MEMPOOL_ALLOC
!= arg
[0]
6871 && VG_USERREQ__MEMPOOL_FREE
!= arg
[0]
6872 && VG_USERREQ__MEMPOOL_TRIM
!= arg
[0]
6873 && VG_USERREQ__MOVE_MEMPOOL
!= arg
[0]
6874 && VG_USERREQ__MEMPOOL_CHANGE
!= arg
[0]
6875 && VG_USERREQ__MEMPOOL_EXISTS
!= arg
[0]
6876 && VG_USERREQ__GDB_MONITOR_COMMAND
!= arg
[0]
6877 && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE
!= arg
[0]
6878 && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE
!= arg
[0])
6882 case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE
: {
6883 Bool ok
= is_mem_addressable ( arg
[1], arg
[2], &bad_addr
);
6885 MC_(record_user_error
) ( tid
, bad_addr
, /*isAddrErr*/True
, 0 );
6886 *ret
= ok
? (UWord
)NULL
: bad_addr
;
6890 case VG_USERREQ__CHECK_MEM_IS_DEFINED
: {
6891 Bool errorV
= False
;
6894 Bool errorA
= False
;
6896 is_mem_defined_comprehensive(
6898 &errorV
, &bad_addrV
, &otagV
, &errorA
, &bad_addrA
6901 MC_(record_user_error
) ( tid
, bad_addrV
,
6902 /*isAddrErr*/False
, otagV
);
6905 MC_(record_user_error
) ( tid
, bad_addrA
,
6906 /*isAddrErr*/True
, 0 );
6908 /* Return the lower of the two erring addresses, if any. */
6910 if (errorV
&& !errorA
) {
6913 if (!errorV
&& errorA
) {
6916 if (errorV
&& errorA
) {
6917 *ret
= bad_addrV
< bad_addrA
? bad_addrV
: bad_addrA
;
6922 case VG_USERREQ__DO_LEAK_CHECK
: {
6923 LeakCheckParams lcp
;
6927 else if (arg
[1] == 1)
6928 lcp
.mode
= LC_Summary
;
6930 VG_(message
)(Vg_UserMsg
,
6931 "Warning: unknown memcheck leak search mode\n");
6935 lcp
.show_leak_kinds
= MC_(clo_show_leak_kinds
);
6936 lcp
.errors_for_leak_kinds
= MC_(clo_error_for_leak_kinds
);
6937 lcp
.heuristics
= MC_(clo_leak_check_heuristics
);
6940 lcp
.deltamode
= LCD_Any
;
6941 else if (arg
[2] == 1)
6942 lcp
.deltamode
= LCD_Increased
;
6943 else if (arg
[2] == 2)
6944 lcp
.deltamode
= LCD_Changed
;
6948 "Warning: unknown memcheck leak search deltamode\n");
6949 lcp
.deltamode
= LCD_Any
;
6951 lcp
.max_loss_records_output
= 999999999;
6952 lcp
.requested_by_monitor_command
= False
;
6953 lcp
.xt_filename
= NULL
;
6955 MC_(detect_memory_leaks
)(tid
, &lcp
);
6956 *ret
= 0; /* return value is meaningless */
6960 case VG_USERREQ__MAKE_MEM_NOACCESS
:
6961 MC_(make_mem_noaccess
) ( arg
[1], arg
[2] );
6965 case VG_USERREQ__MAKE_MEM_UNDEFINED
:
6966 make_mem_undefined_w_tid_and_okind ( arg
[1], arg
[2], tid
,
6971 case VG_USERREQ__MAKE_MEM_DEFINED
:
6972 MC_(make_mem_defined
) ( arg
[1], arg
[2] );
6976 case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE
:
6977 make_mem_defined_if_addressable ( arg
[1], arg
[2] );
6981 case VG_USERREQ__CREATE_BLOCK
: /* describe a block */
6982 if (arg
[1] != 0 && arg
[2] != 0) {
6983 i
= alloc_client_block();
6984 /* VG_(printf)("allocated %d %p\n", i, cgbs); */
6985 cgbs
[i
].start
= arg
[1];
6986 cgbs
[i
].size
= arg
[2];
6987 cgbs
[i
].desc
= VG_(strdup
)("mc.mhcr.1", (HChar
*)arg
[3]);
6988 cgbs
[i
].where
= VG_(record_ExeContext
) ( tid
, 0/*first_ip_delta*/ );
6994 case VG_USERREQ__DISCARD
: /* discard */
6996 || arg
[2] >= cgb_used
||
6997 (cgbs
[arg
[2]].start
== 0 && cgbs
[arg
[2]].size
== 0)) {
7000 tl_assert(arg
[2] >= 0 && arg
[2] < cgb_used
);
7001 cgbs
[arg
[2]].start
= cgbs
[arg
[2]].size
= 0;
7002 VG_(free
)(cgbs
[arg
[2]].desc
);
7008 case VG_USERREQ__GET_VBITS
:
7009 *ret
= mc_get_or_set_vbits_for_client
7010 ( arg
[1], arg
[2], arg
[3],
7011 False
/* get them */,
7012 True
/* is client request */ );
7015 case VG_USERREQ__SET_VBITS
:
7016 *ret
= mc_get_or_set_vbits_for_client
7017 ( arg
[1], arg
[2], arg
[3],
7018 True
/* set them */,
7019 True
/* is client request */ );
7022 case VG_USERREQ__COUNT_LEAKS
: { /* count leaked bytes */
7023 UWord
** argp
= (UWord
**)arg
;
7024 // MC_(bytes_leaked) et al were set by the last leak check (or zero
7025 // if no prior leak checks performed).
7026 *argp
[1] = MC_(bytes_leaked
) + MC_(bytes_indirect
);
7027 *argp
[2] = MC_(bytes_dubious
);
7028 *argp
[3] = MC_(bytes_reachable
);
7029 *argp
[4] = MC_(bytes_suppressed
);
7030 // there is no argp[5]
7031 //*argp[5] = MC_(bytes_indirect);
7032 // XXX need to make *argp[1-4] defined; currently done in the
7033 // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
7037 case VG_USERREQ__COUNT_LEAK_BLOCKS
: { /* count leaked blocks */
7038 UWord
** argp
= (UWord
**)arg
;
7039 // MC_(blocks_leaked) et al were set by the last leak check (or zero
7040 // if no prior leak checks performed).
7041 *argp
[1] = MC_(blocks_leaked
) + MC_(blocks_indirect
);
7042 *argp
[2] = MC_(blocks_dubious
);
7043 *argp
[3] = MC_(blocks_reachable
);
7044 *argp
[4] = MC_(blocks_suppressed
);
7045 // there is no argp[5]
7046 //*argp[5] = MC_(blocks_indirect);
7047 // XXX need to make *argp[1-4] defined; currently done in the
7048 // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
7052 case VG_USERREQ__MALLOCLIKE_BLOCK
: {
7053 Addr p
= (Addr
)arg
[1];
7054 SizeT sizeB
= arg
[2];
7056 Bool is_zeroed
= (Bool
)arg
[4];
7058 MC_(new_block
) ( tid
, p
, sizeB
, /*ignored*/0, is_zeroed
,
7059 MC_AllocCustom
, MC_(malloc_list
) );
7061 MC_(make_mem_noaccess
) ( p
- rzB
, rzB
);
7062 MC_(make_mem_noaccess
) ( p
+ sizeB
, rzB
);
7066 case VG_USERREQ__RESIZEINPLACE_BLOCK
: {
7067 Addr p
= (Addr
)arg
[1];
7068 SizeT oldSizeB
= arg
[2];
7069 SizeT newSizeB
= arg
[3];
7072 MC_(handle_resizeInPlace
) ( tid
, p
, oldSizeB
, newSizeB
, rzB
);
7075 case VG_USERREQ__FREELIKE_BLOCK
: {
7076 Addr p
= (Addr
)arg
[1];
7079 MC_(handle_free
) ( tid
, p
, rzB
, MC_AllocCustom
);
7083 case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR
: {
7084 HChar
* s
= (HChar
*)arg
[1];
7085 Addr dst
= (Addr
) arg
[2];
7086 Addr src
= (Addr
) arg
[3];
7087 SizeT len
= (SizeT
)arg
[4];
7088 MC_(record_overlap_error
)(tid
, s
, src
, dst
, len
);
7092 case VG_USERREQ__CREATE_MEMPOOL
: {
7093 Addr pool
= (Addr
)arg
[1];
7095 Bool is_zeroed
= (Bool
)arg
[3];
7096 UInt flags
= arg
[4];
7098 // The create_mempool function does not know these mempool flags,
7099 // pass as booleans.
7100 MC_(create_mempool
) ( pool
, rzB
, is_zeroed
,
7101 (flags
& VALGRIND_MEMPOOL_AUTO_FREE
),
7102 (flags
& VALGRIND_MEMPOOL_METAPOOL
) );
7106 case VG_USERREQ__DESTROY_MEMPOOL
: {
7107 Addr pool
= (Addr
)arg
[1];
7109 MC_(destroy_mempool
) ( pool
);
7113 case VG_USERREQ__MEMPOOL_ALLOC
: {
7114 Addr pool
= (Addr
)arg
[1];
7115 Addr addr
= (Addr
)arg
[2];
7118 MC_(mempool_alloc
) ( tid
, pool
, addr
, size
);
7122 case VG_USERREQ__MEMPOOL_FREE
: {
7123 Addr pool
= (Addr
)arg
[1];
7124 Addr addr
= (Addr
)arg
[2];
7126 MC_(mempool_free
) ( pool
, addr
);
7130 case VG_USERREQ__MEMPOOL_TRIM
: {
7131 Addr pool
= (Addr
)arg
[1];
7132 Addr addr
= (Addr
)arg
[2];
7135 MC_(mempool_trim
) ( pool
, addr
, size
);
7139 case VG_USERREQ__MOVE_MEMPOOL
: {
7140 Addr poolA
= (Addr
)arg
[1];
7141 Addr poolB
= (Addr
)arg
[2];
7143 MC_(move_mempool
) ( poolA
, poolB
);
7147 case VG_USERREQ__MEMPOOL_CHANGE
: {
7148 Addr pool
= (Addr
)arg
[1];
7149 Addr addrA
= (Addr
)arg
[2];
7150 Addr addrB
= (Addr
)arg
[3];
7153 MC_(mempool_change
) ( pool
, addrA
, addrB
, size
);
7157 case VG_USERREQ__MEMPOOL_EXISTS
: {
7158 Addr pool
= (Addr
)arg
[1];
7160 *ret
= (UWord
) MC_(mempool_exists
) ( pool
);
7164 case VG_USERREQ__GDB_MONITOR_COMMAND
: {
7165 Bool handled
= handle_gdb_monitor_command (tid
, (HChar
*)arg
[1]);
7173 case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE
:
7174 case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE
: {
7176 = arg
[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE
;
7178 = modify_ignore_ranges(addRange
, arg
[1], arg
[2]);
7186 "Warning: unknown memcheck client request code %llx\n",
7195 /*------------------------------------------------------------*/
7196 /*--- Crude profiling machinery. ---*/
7197 /*------------------------------------------------------------*/
7199 // We track a number of interesting events (using PROF_EVENT)
7200 // if MC_PROFILE_MEMORY is defined.
7202 #ifdef MC_PROFILE_MEMORY
7204 ULong
MC_(event_ctr
)[MCPE_LAST
];
7206 /* Event counter names. Use the name of the function that increases the
7207 event counter. Drop any MC_() and mc_ prefices. */
7208 static const HChar
* MC_(event_ctr_name
)[MCPE_LAST
] = {
7209 [MCPE_LOADVN_SLOW
] = "LOADVn_slow",
7210 [MCPE_LOADVN_SLOW_LOOP
] = "LOADVn_slow_loop",
7211 [MCPE_STOREVN_SLOW
] = "STOREVn_slow",
7212 [MCPE_STOREVN_SLOW_LOOP
] = "STOREVn_slow(loop)",
7213 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED
] = "make_aligned_word32_undefined",
7214 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW
] =
7215 "make_aligned_word32_undefined_slow",
7216 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED
] = "make_aligned_word64_undefined",
7217 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW
] =
7218 "make_aligned_word64_undefined_slow",
7219 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS
] = "make_aligned_word32_noaccess",
7220 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW
] =
7221 "make_aligned_word32_noaccess_slow",
7222 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS
] = "make_aligned_word64_noaccess",
7223 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW
] =
7224 "make_aligned_word64_noaccess_slow",
7225 [MCPE_MAKE_MEM_NOACCESS
] = "make_mem_noaccess",
7226 [MCPE_MAKE_MEM_UNDEFINED
] = "make_mem_undefined",
7227 [MCPE_MAKE_MEM_UNDEFINED_W_OTAG
] = "make_mem_undefined_w_otag",
7228 [MCPE_MAKE_MEM_DEFINED
] = "make_mem_defined",
7229 [MCPE_CHEAP_SANITY_CHECK
] = "cheap_sanity_check",
7230 [MCPE_EXPENSIVE_SANITY_CHECK
] = "expensive_sanity_check",
7231 [MCPE_COPY_ADDRESS_RANGE_STATE
] = "copy_address_range_state",
7232 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1
] = "copy_address_range_state(loop1)",
7233 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2
] = "copy_address_range_state(loop2)",
7234 [MCPE_CHECK_MEM_IS_NOACCESS
] = "check_mem_is_noaccess",
7235 [MCPE_CHECK_MEM_IS_NOACCESS_LOOP
] = "check_mem_is_noaccess(loop)",
7236 [MCPE_IS_MEM_ADDRESSABLE
] = "is_mem_addressable",
7237 [MCPE_IS_MEM_ADDRESSABLE_LOOP
] = "is_mem_addressable(loop)",
7238 [MCPE_IS_MEM_DEFINED
] = "is_mem_defined",
7239 [MCPE_IS_MEM_DEFINED_LOOP
] = "is_mem_defined(loop)",
7240 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE
] = "is_mem_defined_comprehensive",
7241 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP
] =
7242 "is_mem_defined_comprehensive(loop)",
7243 [MCPE_IS_DEFINED_ASCIIZ
] = "is_defined_asciiz",
7244 [MCPE_IS_DEFINED_ASCIIZ_LOOP
] = "is_defined_asciiz(loop)",
7245 [MCPE_FIND_CHUNK_FOR_OLD
] = "find_chunk_for_OLD",
7246 [MCPE_FIND_CHUNK_FOR_OLD_LOOP
] = "find_chunk_for_OLD(loop)",
7247 [MCPE_SET_ADDRESS_RANGE_PERMS
] = "set_address_range_perms",
7248 [MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP
] =
7249 "set_address_range_perms(single-secmap)",
7250 [MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP
] =
7251 "set_address_range_perms(startof-secmap)",
7252 [MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS
] =
7253 "set_address_range_perms(multiple-secmaps)",
7254 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1
] =
7255 "set_address_range_perms(dist-sm1)",
7256 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2
] =
7257 "set_address_range_perms(dist-sm2)",
7258 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK
] =
7259 "set_address_range_perms(dist-sm1-quick)",
7260 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK
] =
7261 "set_address_range_perms(dist-sm2-quick)",
7262 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A
] = "set_address_range_perms(loop1a)",
7263 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B
] = "set_address_range_perms(loop1b)",
7264 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C
] = "set_address_range_perms(loop1c)",
7265 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A
] = "set_address_range_perms(loop8a)",
7266 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B
] = "set_address_range_perms(loop8b)",
7267 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K
] = "set_address_range_perms(loop64K)",
7268 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM
] =
7269 "set_address_range_perms(loop64K-free-dist-sm)",
7270 [MCPE_LOADV_128_OR_256_SLOW_LOOP
] = "LOADV_128_or_256_slow(loop)",
7271 [MCPE_LOADV_128_OR_256
] = "LOADV_128_or_256",
7272 [MCPE_LOADV_128_OR_256_SLOW1
] = "LOADV_128_or_256-slow1",
7273 [MCPE_LOADV_128_OR_256_SLOW2
] = "LOADV_128_or_256-slow2",
7274 [MCPE_LOADV64
] = "LOADV64",
7275 [MCPE_LOADV64_SLOW1
] = "LOADV64-slow1",
7276 [MCPE_LOADV64_SLOW2
] = "LOADV64-slow2",
7277 [MCPE_STOREV64
] = "STOREV64",
7278 [MCPE_STOREV64_SLOW1
] = "STOREV64-slow1",
7279 [MCPE_STOREV64_SLOW2
] = "STOREV64-slow2",
7280 [MCPE_STOREV64_SLOW3
] = "STOREV64-slow3",
7281 [MCPE_STOREV64_SLOW4
] = "STOREV64-slow4",
7282 [MCPE_LOADV32
] = "LOADV32",
7283 [MCPE_LOADV32_SLOW1
] = "LOADV32-slow1",
7284 [MCPE_LOADV32_SLOW2
] = "LOADV32-slow2",
7285 [MCPE_STOREV32
] = "STOREV32",
7286 [MCPE_STOREV32_SLOW1
] = "STOREV32-slow1",
7287 [MCPE_STOREV32_SLOW2
] = "STOREV32-slow2",
7288 [MCPE_STOREV32_SLOW3
] = "STOREV32-slow3",
7289 [MCPE_STOREV32_SLOW4
] = "STOREV32-slow4",
7290 [MCPE_LOADV16
] = "LOADV16",
7291 [MCPE_LOADV16_SLOW1
] = "LOADV16-slow1",
7292 [MCPE_LOADV16_SLOW2
] = "LOADV16-slow2",
7293 [MCPE_STOREV16
] = "STOREV16",
7294 [MCPE_STOREV16_SLOW1
] = "STOREV16-slow1",
7295 [MCPE_STOREV16_SLOW2
] = "STOREV16-slow2",
7296 [MCPE_STOREV16_SLOW3
] = "STOREV16-slow3",
7297 [MCPE_STOREV16_SLOW4
] = "STOREV16-slow4",
7298 [MCPE_LOADV8
] = "LOADV8",
7299 [MCPE_LOADV8_SLOW1
] = "LOADV8-slow1",
7300 [MCPE_LOADV8_SLOW2
] = "LOADV8-slow2",
7301 [MCPE_STOREV8
] = "STOREV8",
7302 [MCPE_STOREV8_SLOW1
] = "STOREV8-slow1",
7303 [MCPE_STOREV8_SLOW2
] = "STOREV8-slow2",
7304 [MCPE_STOREV8_SLOW3
] = "STOREV8-slow3",
7305 [MCPE_STOREV8_SLOW4
] = "STOREV8-slow4",
7306 [MCPE_NEW_MEM_STACK_4
] = "new_mem_stack_4",
7307 [MCPE_NEW_MEM_STACK_8
] = "new_mem_stack_8",
7308 [MCPE_NEW_MEM_STACK_12
] = "new_mem_stack_12",
7309 [MCPE_NEW_MEM_STACK_16
] = "new_mem_stack_16",
7310 [MCPE_NEW_MEM_STACK_32
] = "new_mem_stack_32",
7311 [MCPE_NEW_MEM_STACK_112
] = "new_mem_stack_112",
7312 [MCPE_NEW_MEM_STACK_128
] = "new_mem_stack_128",
7313 [MCPE_NEW_MEM_STACK_144
] = "new_mem_stack_144",
7314 [MCPE_NEW_MEM_STACK_160
] = "new_mem_stack_160",
7315 [MCPE_DIE_MEM_STACK_4
] = "die_mem_stack_4",
7316 [MCPE_DIE_MEM_STACK_8
] = "die_mem_stack_8",
7317 [MCPE_DIE_MEM_STACK_12
] = "die_mem_stack_12",
7318 [MCPE_DIE_MEM_STACK_16
] = "die_mem_stack_16",
7319 [MCPE_DIE_MEM_STACK_32
] = "die_mem_stack_32",
7320 [MCPE_DIE_MEM_STACK_112
] = "die_mem_stack_112",
7321 [MCPE_DIE_MEM_STACK_128
] = "die_mem_stack_128",
7322 [MCPE_DIE_MEM_STACK_144
] = "die_mem_stack_144",
7323 [MCPE_DIE_MEM_STACK_160
] = "die_mem_stack_160",
7324 [MCPE_NEW_MEM_STACK
] = "new_mem_stack",
7325 [MCPE_DIE_MEM_STACK
] = "die_mem_stack",
7326 [MCPE_MAKE_STACK_UNINIT_W_O
] = "MAKE_STACK_UNINIT_w_o",
7327 [MCPE_MAKE_STACK_UNINIT_NO_O
] = "MAKE_STACK_UNINIT_no_o",
7328 [MCPE_MAKE_STACK_UNINIT_128_NO_O
] = "MAKE_STACK_UNINIT_128_no_o",
7329 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16
]
7330 = "MAKE_STACK_UNINIT_128_no_o_aligned_16",
7331 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8
]
7332 = "MAKE_STACK_UNINIT_128_no_o_aligned_8",
7333 [MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE
]
7334 = "MAKE_STACK_UNINIT_128_no_o_slowcase",
7337 static void init_prof_mem ( void )
7339 Int i
, name_count
= 0;
7341 for (i
= 0; i
< MCPE_LAST
; i
++) {
7342 MC_(event_ctr
)[i
] = 0;
7343 if (MC_(event_ctr_name
)[i
] != NULL
)
7347 /* Make sure every profiling event has a name */
7348 tl_assert(name_count
== MCPE_LAST
);
7351 static void done_prof_mem ( void )
7354 Bool spaced
= False
;
7355 for (i
= n
= 0; i
< MCPE_LAST
; i
++) {
7356 if (!spaced
&& (n
% 10) == 0) {
7360 if (MC_(event_ctr
)[i
] > 0) {
7363 VG_(printf
)( "prof mem event %3d: %11llu %s\n",
7364 i
, MC_(event_ctr
)[i
],
7365 MC_(event_ctr_name
)[i
]);
7372 static void init_prof_mem ( void ) { }
7373 static void done_prof_mem ( void ) { }
7378 /*------------------------------------------------------------*/
7379 /*--- Origin tracking stuff ---*/
7380 /*------------------------------------------------------------*/
7382 /*--------------------------------------------*/
7383 /*--- Origin tracking: load handlers ---*/
7384 /*--------------------------------------------*/
7386 static INLINE UInt
merge_origins ( UInt or1
, UInt or2
) {
7387 return or1
> or2
? or1
: or2
;
7390 UWord
VG_REGPARM(1) MC_(helperc_b_load1
)( Addr a
) {
7393 UWord lineoff
= oc_line_offset(a
);
7394 UWord byteoff
= a
& 3; /* 0, 1, 2 or 3 */
7396 if (OC_ENABLE_ASSERTIONS
) {
7397 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7400 line
= find_OCacheLine( a
);
7402 descr
= line
->descr
[lineoff
];
7403 if (OC_ENABLE_ASSERTIONS
) {
7404 tl_assert(descr
< 0x10);
7407 if (LIKELY(0 == (descr
& (1 << byteoff
)))) {
7410 return line
->w32
[lineoff
];
7414 UWord
VG_REGPARM(1) MC_(helperc_b_load2
)( Addr a
) {
7417 UWord lineoff
, byteoff
;
7419 if (UNLIKELY(a
& 1)) {
7420 /* Handle misaligned case, slowly. */
7421 UInt oLo
= (UInt
)MC_(helperc_b_load1
)( a
+ 0 );
7422 UInt oHi
= (UInt
)MC_(helperc_b_load1
)( a
+ 1 );
7423 return merge_origins(oLo
, oHi
);
7426 lineoff
= oc_line_offset(a
);
7427 byteoff
= a
& 3; /* 0 or 2 */
7429 if (OC_ENABLE_ASSERTIONS
) {
7430 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7432 line
= find_OCacheLine( a
);
7434 descr
= line
->descr
[lineoff
];
7435 if (OC_ENABLE_ASSERTIONS
) {
7436 tl_assert(descr
< 0x10);
7439 if (LIKELY(0 == (descr
& (3 << byteoff
)))) {
7442 return line
->w32
[lineoff
];
7446 UWord
VG_REGPARM(1) MC_(helperc_b_load4
)( Addr a
) {
7451 if (UNLIKELY(a
& 3)) {
7452 /* Handle misaligned case, slowly. */
7453 UInt oLo
= (UInt
)MC_(helperc_b_load2
)( a
+ 0 );
7454 UInt oHi
= (UInt
)MC_(helperc_b_load2
)( a
+ 2 );
7455 return merge_origins(oLo
, oHi
);
7458 lineoff
= oc_line_offset(a
);
7459 if (OC_ENABLE_ASSERTIONS
) {
7460 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7463 line
= find_OCacheLine( a
);
7465 descr
= line
->descr
[lineoff
];
7466 if (OC_ENABLE_ASSERTIONS
) {
7467 tl_assert(descr
< 0x10);
7470 if (LIKELY(0 == descr
)) {
7473 return line
->w32
[lineoff
];
7477 UWord
VG_REGPARM(1) MC_(helperc_b_load8
)( Addr a
) {
7479 UChar descrLo
, descrHi
, descr
;
7482 if (UNLIKELY(a
& 7)) {
7483 /* Handle misaligned case, slowly. */
7484 UInt oLo
= (UInt
)MC_(helperc_b_load4
)( a
+ 0 );
7485 UInt oHi
= (UInt
)MC_(helperc_b_load4
)( a
+ 4 );
7486 return merge_origins(oLo
, oHi
);
7489 lineoff
= oc_line_offset(a
);
7490 if (OC_ENABLE_ASSERTIONS
) {
7491 tl_assert(lineoff
== (lineoff
& 6)); /*0,2,4,6*//*since 8-aligned*/
7494 line
= find_OCacheLine( a
);
7496 descrLo
= line
->descr
[lineoff
+ 0];
7497 descrHi
= line
->descr
[lineoff
+ 1];
7498 descr
= descrLo
| descrHi
;
7499 if (OC_ENABLE_ASSERTIONS
) {
7500 tl_assert(descr
< 0x10);
7503 if (LIKELY(0 == descr
)) {
7504 return 0; /* both 32-bit chunks are defined */
7506 UInt oLo
= descrLo
== 0 ? 0 : line
->w32
[lineoff
+ 0];
7507 UInt oHi
= descrHi
== 0 ? 0 : line
->w32
[lineoff
+ 1];
7508 return merge_origins(oLo
, oHi
);
7512 UWord
VG_REGPARM(1) MC_(helperc_b_load16
)( Addr a
) {
7513 UInt oLo
= (UInt
)MC_(helperc_b_load8
)( a
+ 0 );
7514 UInt oHi
= (UInt
)MC_(helperc_b_load8
)( a
+ 8 );
7515 UInt oBoth
= merge_origins(oLo
, oHi
);
7516 return (UWord
)oBoth
;
7519 UWord
VG_REGPARM(1) MC_(helperc_b_load32
)( Addr a
) {
7520 UInt oQ0
= (UInt
)MC_(helperc_b_load8
)( a
+ 0 );
7521 UInt oQ1
= (UInt
)MC_(helperc_b_load8
)( a
+ 8 );
7522 UInt oQ2
= (UInt
)MC_(helperc_b_load8
)( a
+ 16 );
7523 UInt oQ3
= (UInt
)MC_(helperc_b_load8
)( a
+ 24 );
7524 UInt oAll
= merge_origins(merge_origins(oQ0
, oQ1
),
7525 merge_origins(oQ2
, oQ3
));
7530 /*--------------------------------------------*/
7531 /*--- Origin tracking: store handlers ---*/
7532 /*--------------------------------------------*/
7534 void VG_REGPARM(2) MC_(helperc_b_store1
)( Addr a
, UWord d32
) {
7536 UWord lineoff
= oc_line_offset(a
);
7537 UWord byteoff
= a
& 3; /* 0, 1, 2 or 3 */
7539 if (OC_ENABLE_ASSERTIONS
) {
7540 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7543 line
= find_OCacheLine( a
);
7546 line
->descr
[lineoff
] &= ~(1 << byteoff
);
7548 line
->descr
[lineoff
] |= (1 << byteoff
);
7549 line
->w32
[lineoff
] = d32
;
7553 void VG_REGPARM(2) MC_(helperc_b_store2
)( Addr a
, UWord d32
) {
7555 UWord lineoff
, byteoff
;
7557 if (UNLIKELY(a
& 1)) {
7558 /* Handle misaligned case, slowly. */
7559 MC_(helperc_b_store1
)( a
+ 0, d32
);
7560 MC_(helperc_b_store1
)( a
+ 1, d32
);
7564 lineoff
= oc_line_offset(a
);
7565 byteoff
= a
& 3; /* 0 or 2 */
7567 if (OC_ENABLE_ASSERTIONS
) {
7568 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7571 line
= find_OCacheLine( a
);
7574 line
->descr
[lineoff
] &= ~(3 << byteoff
);
7576 line
->descr
[lineoff
] |= (3 << byteoff
);
7577 line
->w32
[lineoff
] = d32
;
7581 void VG_REGPARM(2) MC_(helperc_b_store4
)( Addr a
, UWord d32
) {
7585 if (UNLIKELY(a
& 3)) {
7586 /* Handle misaligned case, slowly. */
7587 MC_(helperc_b_store2
)( a
+ 0, d32
);
7588 MC_(helperc_b_store2
)( a
+ 2, d32
);
7592 lineoff
= oc_line_offset(a
);
7593 if (OC_ENABLE_ASSERTIONS
) {
7594 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7597 line
= find_OCacheLine( a
);
7600 line
->descr
[lineoff
] = 0;
7602 line
->descr
[lineoff
] = 0xF;
7603 line
->w32
[lineoff
] = d32
;
7607 void VG_REGPARM(2) MC_(helperc_b_store8
)( Addr a
, UWord d32
) {
7611 if (UNLIKELY(a
& 7)) {
7612 /* Handle misaligned case, slowly. */
7613 MC_(helperc_b_store4
)( a
+ 0, d32
);
7614 MC_(helperc_b_store4
)( a
+ 4, d32
);
7618 lineoff
= oc_line_offset(a
);
7619 if (OC_ENABLE_ASSERTIONS
) {
7620 tl_assert(lineoff
== (lineoff
& 6)); /*0,2,4,6*//*since 8-aligned*/
7623 line
= find_OCacheLine( a
);
7626 line
->descr
[lineoff
+ 0] = 0;
7627 line
->descr
[lineoff
+ 1] = 0;
7629 line
->descr
[lineoff
+ 0] = 0xF;
7630 line
->descr
[lineoff
+ 1] = 0xF;
7631 line
->w32
[lineoff
+ 0] = d32
;
7632 line
->w32
[lineoff
+ 1] = d32
;
7636 void VG_REGPARM(2) MC_(helperc_b_store16
)( Addr a
, UWord d32
) {
7637 MC_(helperc_b_store8
)( a
+ 0, d32
);
7638 MC_(helperc_b_store8
)( a
+ 8, d32
);
7641 void VG_REGPARM(2) MC_(helperc_b_store32
)( Addr a
, UWord d32
) {
7642 MC_(helperc_b_store8
)( a
+ 0, d32
);
7643 MC_(helperc_b_store8
)( a
+ 8, d32
);
7644 MC_(helperc_b_store8
)( a
+ 16, d32
);
7645 MC_(helperc_b_store8
)( a
+ 24, d32
);
7649 /*--------------------------------------------*/
7650 /*--- Origin tracking: sarp handlers ---*/
7651 /*--------------------------------------------*/
7653 __attribute__((noinline
))
7654 static void ocache_sarp_Set_Origins ( Addr a
, UWord len
, UInt otag
) {
7655 if ((a
& 1) && len
>= 1) {
7656 MC_(helperc_b_store1
)( a
, otag
);
7660 if ((a
& 2) && len
>= 2) {
7661 MC_(helperc_b_store2
)( a
, otag
);
7666 tl_assert(0 == (a
& 3));
7668 MC_(helperc_b_store4
)( a
, otag
);
7673 MC_(helperc_b_store2
)( a
, otag
);
7678 MC_(helperc_b_store1
)( a
, otag
);
7682 tl_assert(len
== 0);
7685 __attribute__((noinline
))
7686 static void ocache_sarp_Clear_Origins ( Addr a
, UWord len
) {
7687 if ((a
& 1) && len
>= 1) {
7688 MC_(helperc_b_store1
)( a
, 0 );
7692 if ((a
& 2) && len
>= 2) {
7693 MC_(helperc_b_store2
)( a
, 0 );
7698 tl_assert(0 == (a
& 3));
7700 MC_(helperc_b_store4
)( a
, 0 );
7705 MC_(helperc_b_store2
)( a
, 0 );
7710 MC_(helperc_b_store1
)( a
, 0 );
7714 tl_assert(len
== 0);
7718 /*------------------------------------------------------------*/
7719 /*--- Setup and finalisation ---*/
7720 /*------------------------------------------------------------*/
7722 static void mc_post_clo_init ( void )
7724 /* If we've been asked to emit XML, mash around various other
7725 options so as to constrain the output somewhat. */
7727 /* Extract as much info as possible from the leak checker. */
7728 MC_(clo_leak_check
) = LC_Full
;
7731 if (MC_(clo_freelist_big_blocks
) >= MC_(clo_freelist_vol
)
7732 && VG_(clo_verbosity
) == 1 && !VG_(clo_xml
)) {
7733 VG_(message
)(Vg_UserMsg
,
7734 "Warning: --freelist-big-blocks value %lld has no effect\n"
7735 "as it is >= to --freelist-vol value %lld\n",
7736 MC_(clo_freelist_big_blocks
),
7737 MC_(clo_freelist_vol
));
7740 if (MC_(clo_workaround_gcc296_bugs
)
7741 && VG_(clo_verbosity
) == 1 && !VG_(clo_xml
)) {
7743 "Warning: --workaround-gcc296-bugs=yes is deprecated.\n"
7744 "Warning: Instead use: --ignore-range-below-sp=1024-1\n"
7749 tl_assert( MC_(clo_mc_level
) >= 1 && MC_(clo_mc_level
) <= 3 );
7751 if (MC_(clo_mc_level
) == 3) {
7752 /* We're doing origin tracking. */
7753 # ifdef PERF_FAST_STACK
7754 VG_(track_new_mem_stack_4_w_ECU
) ( mc_new_mem_stack_4_w_ECU
);
7755 VG_(track_new_mem_stack_8_w_ECU
) ( mc_new_mem_stack_8_w_ECU
);
7756 VG_(track_new_mem_stack_12_w_ECU
) ( mc_new_mem_stack_12_w_ECU
);
7757 VG_(track_new_mem_stack_16_w_ECU
) ( mc_new_mem_stack_16_w_ECU
);
7758 VG_(track_new_mem_stack_32_w_ECU
) ( mc_new_mem_stack_32_w_ECU
);
7759 VG_(track_new_mem_stack_112_w_ECU
) ( mc_new_mem_stack_112_w_ECU
);
7760 VG_(track_new_mem_stack_128_w_ECU
) ( mc_new_mem_stack_128_w_ECU
);
7761 VG_(track_new_mem_stack_144_w_ECU
) ( mc_new_mem_stack_144_w_ECU
);
7762 VG_(track_new_mem_stack_160_w_ECU
) ( mc_new_mem_stack_160_w_ECU
);
7764 VG_(track_new_mem_stack_w_ECU
) ( mc_new_mem_stack_w_ECU
);
7765 VG_(track_new_mem_stack_signal
) ( mc_new_mem_w_tid_make_ECU
);
7767 /* Not doing origin tracking */
7768 # ifdef PERF_FAST_STACK
7769 VG_(track_new_mem_stack_4
) ( mc_new_mem_stack_4
);
7770 VG_(track_new_mem_stack_8
) ( mc_new_mem_stack_8
);
7771 VG_(track_new_mem_stack_12
) ( mc_new_mem_stack_12
);
7772 VG_(track_new_mem_stack_16
) ( mc_new_mem_stack_16
);
7773 VG_(track_new_mem_stack_32
) ( mc_new_mem_stack_32
);
7774 VG_(track_new_mem_stack_112
) ( mc_new_mem_stack_112
);
7775 VG_(track_new_mem_stack_128
) ( mc_new_mem_stack_128
);
7776 VG_(track_new_mem_stack_144
) ( mc_new_mem_stack_144
);
7777 VG_(track_new_mem_stack_160
) ( mc_new_mem_stack_160
);
7779 VG_(track_new_mem_stack
) ( mc_new_mem_stack
);
7780 VG_(track_new_mem_stack_signal
) ( mc_new_mem_w_tid_no_ECU
);
7783 // We assume that brk()/sbrk() does not initialise new memory. Is this
7784 // accurate? John Reiser says:
7786 // 0) sbrk() can *decrease* process address space. No zero fill is done
7787 // for a decrease, not even the fragment on the high end of the last page
7788 // that is beyond the new highest address. For maximum safety and
7789 // portability, then the bytes in the last page that reside above [the
7790 // new] sbrk(0) should be considered to be uninitialized, but in practice
7791 // it is exceedingly likely that they will retain their previous
7794 // 1) If an increase is large enough to require new whole pages, then
7795 // those new whole pages (like all new pages) are zero-filled by the
7796 // operating system. So if sbrk(0) already is page aligned, then
7797 // sbrk(PAGE_SIZE) *does* zero-fill the new memory.
7799 // 2) Any increase that lies within an existing allocated page is not
7800 // changed. So if (x = sbrk(0)) is not page aligned, then
7801 // sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
7802 // existing contents, and an additional PAGE_SIZE bytes which are zeroed.
7803 // ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
7804 // of them come along for the ride because the operating system deals
7805 // only in whole pages. Again, for maximum safety and portability, then
7806 // anything that lives above [the new] sbrk(0) should be considered
7807 // uninitialized, but in practice will retain previous contents [zero in
7812 // A key property of sbrk/brk is that new whole pages that are supplied
7813 // by the operating system *do* get initialized to zero.
7815 // As for the portability of all this:
7817 // sbrk and brk are not POSIX. However, any system that is a derivative
7818 // of *nix has sbrk and brk because there are too many software (such as
7819 // the Bourne shell) which rely on the traditional memory map (.text,
7820 // .data+.bss, stack) and the existence of sbrk/brk.
7822 // So we should arguably observe all this. However:
7823 // - The current inaccuracy has caused maybe one complaint in seven years(?)
7824 // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
7825 // doubt most programmers know the above information.
7826 // So I'm not terribly unhappy with marking it as undefined. --njn.
7828 // [More: I think most of what John said only applies to sbrk(). It seems
7829 // that brk() always deals in whole pages. And since this event deals
7830 // directly with brk(), not with sbrk(), perhaps it would be reasonable to
7831 // just mark all memory it allocates as defined.]
7833 # if !defined(VGO_solaris)
7834 if (MC_(clo_mc_level
) == 3)
7835 VG_(track_new_mem_brk
) ( mc_new_mem_w_tid_make_ECU
);
7837 VG_(track_new_mem_brk
) ( mc_new_mem_w_tid_no_ECU
);
7839 // On Solaris, brk memory has to be marked as defined, otherwise we get
7840 // many false positives.
7841 VG_(track_new_mem_brk
) ( make_mem_defined_w_tid
);
7844 /* This origin tracking cache is huge (~100M), so only initialise
7846 if (MC_(clo_mc_level
) >= 3) {
7848 tl_assert(ocacheL1
!= NULL
);
7849 tl_assert(ocacheL2
!= NULL
);
7851 tl_assert(ocacheL1
== NULL
);
7852 tl_assert(ocacheL2
== NULL
);
7855 MC_(chunk_poolalloc
) = VG_(newPA
)
7856 (sizeof(MC_Chunk
) + MC_(n_where_pointers
)() * sizeof(ExeContext
*),
7859 "mc.cMC.1 (MC_Chunk pools)",
7862 /* Do not check definedness of guest state if --undef-value-errors=no */
7863 if (MC_(clo_mc_level
) >= 2)
7864 VG_(track_pre_reg_read
) ( mc_pre_reg_read
);
7866 if (VG_(clo_xtree_memory
) == Vg_XTMemory_Full
) {
7867 if (MC_(clo_keep_stacktraces
) == KS_none
7868 || MC_(clo_keep_stacktraces
) == KS_free
)
7869 VG_(fmsg_bad_option
)("--keep-stacktraces",
7870 "To use --xtree-memory=full, you must"
7871 " keep at least the alloc stacktrace\n");
7872 // Activate full xtree memory profiling.
7873 VG_(XTMemory_Full_init
)(VG_(XT_filter_1top_and_maybe_below_main
));
7878 static void print_SM_info(const HChar
* type
, Int n_SMs
)
7880 VG_(message
)(Vg_DebugMsg
,
7881 " memcheck: SMs: %s = %d (%luk, %luM)\n",
7884 n_SMs
* sizeof(SecMap
) / 1024UL,
7885 n_SMs
* sizeof(SecMap
) / (1024 * 1024UL) );
7888 static void mc_print_stats (void)
7890 SizeT max_secVBit_szB
, max_SMs_szB
, max_shmem_szB
;
7892 VG_(message
)(Vg_DebugMsg
, " memcheck: freelist: vol %lld length %lld\n",
7893 VG_(free_queue_volume
), VG_(free_queue_length
));
7894 VG_(message
)(Vg_DebugMsg
,
7895 " memcheck: sanity checks: %d cheap, %d expensive\n",
7896 n_sanity_cheap
, n_sanity_expensive
);
7897 VG_(message
)(Vg_DebugMsg
,
7898 " memcheck: auxmaps: %llu auxmap entries (%lluk, %lluM) in use\n",
7900 n_auxmap_L2_nodes
* 64,
7901 n_auxmap_L2_nodes
/ 16 );
7902 VG_(message
)(Vg_DebugMsg
,
7903 " memcheck: auxmaps_L1: %llu searches, %llu cmps, ratio %llu:10\n",
7904 n_auxmap_L1_searches
, n_auxmap_L1_cmps
,
7905 (10ULL * n_auxmap_L1_cmps
)
7906 / (n_auxmap_L1_searches
? n_auxmap_L1_searches
: 1)
7908 VG_(message
)(Vg_DebugMsg
,
7909 " memcheck: auxmaps_L2: %llu searches, %llu nodes\n",
7910 n_auxmap_L2_searches
, n_auxmap_L2_nodes
7913 print_SM_info("n_issued ", n_issued_SMs
);
7914 print_SM_info("n_deissued ", n_deissued_SMs
);
7915 print_SM_info("max_noaccess ", max_noaccess_SMs
);
7916 print_SM_info("max_undefined", max_undefined_SMs
);
7917 print_SM_info("max_defined ", max_defined_SMs
);
7918 print_SM_info("max_non_DSM ", max_non_DSM_SMs
);
7920 // Three DSMs, plus the non-DSM ones
7921 max_SMs_szB
= (3 + max_non_DSM_SMs
) * sizeof(SecMap
);
7922 // The 3*sizeof(Word) bytes is the AVL node metadata size.
7923 // The VG_ROUNDUP is because the OSet pool allocator will/must align
7924 // the elements on pointer size.
7925 // Note that the pool allocator has some additional small overhead
7926 // which is not counted in the below.
7927 // Hardwiring this logic sucks, but I don't see how else to do it.
7928 max_secVBit_szB
= max_secVBit_nodes
*
7929 (3*sizeof(Word
) + VG_ROUNDUP(sizeof(SecVBitNode
), sizeof(void*)));
7930 max_shmem_szB
= sizeof(primary_map
) + max_SMs_szB
+ max_secVBit_szB
;
7932 VG_(message
)(Vg_DebugMsg
,
7933 " memcheck: max sec V bit nodes: %d (%luk, %luM)\n",
7934 max_secVBit_nodes
, max_secVBit_szB
/ 1024,
7935 max_secVBit_szB
/ (1024 * 1024));
7936 VG_(message
)(Vg_DebugMsg
,
7937 " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
7938 sec_vbits_new_nodes
+ sec_vbits_updates
,
7939 sec_vbits_new_nodes
, sec_vbits_updates
);
7940 VG_(message
)(Vg_DebugMsg
,
7941 " memcheck: max shadow mem size: %luk, %luM\n",
7942 max_shmem_szB
/ 1024, max_shmem_szB
/ (1024 * 1024));
7944 if (MC_(clo_mc_level
) >= 3) {
7945 VG_(message
)(Vg_DebugMsg
,
7946 " ocacheL1: %'12lu refs %'12lu misses (%'lu lossage)\n",
7947 stats_ocacheL1_find
,
7948 stats_ocacheL1_misses
,
7949 stats_ocacheL1_lossage
);
7950 VG_(message
)(Vg_DebugMsg
,
7951 " ocacheL1: %'12lu at 0 %'12lu at 1\n",
7952 stats_ocacheL1_find
- stats_ocacheL1_misses
7953 - stats_ocacheL1_found_at_1
7954 - stats_ocacheL1_found_at_N
,
7955 stats_ocacheL1_found_at_1
);
7956 VG_(message
)(Vg_DebugMsg
,
7957 " ocacheL1: %'12lu at 2+ %'12lu move-fwds\n",
7958 stats_ocacheL1_found_at_N
,
7959 stats_ocacheL1_movefwds
);
7960 VG_(message
)(Vg_DebugMsg
,
7961 " ocacheL1: %'12lu sizeB %'12d useful\n",
7962 (SizeT
)sizeof(OCache
),
7963 4 * OC_W32S_PER_LINE
* OC_LINES_PER_SET
* OC_N_SETS
);
7964 VG_(message
)(Vg_DebugMsg
,
7965 " ocacheL2: %'12lu refs %'12lu misses\n",
7966 stats__ocacheL2_refs
,
7967 stats__ocacheL2_misses
);
7968 VG_(message
)(Vg_DebugMsg
,
7969 " ocacheL2: %'9lu max nodes %'9lu curr nodes\n",
7970 stats__ocacheL2_n_nodes_max
,
7971 stats__ocacheL2_n_nodes
);
7972 VG_(message
)(Vg_DebugMsg
,
7973 " niacache: %'12lu refs %'12lu misses\n",
7974 stats__nia_cache_queries
, stats__nia_cache_misses
);
7976 tl_assert(ocacheL1
== NULL
);
7977 tl_assert(ocacheL2
== NULL
);
7982 static void mc_fini ( Int exitcode
)
7984 MC_(xtmemory_report
) (VG_(clo_xtree_memory_file
), True
);
7985 MC_(print_malloc_stats
)();
7987 if (MC_(clo_leak_check
) != LC_Off
) {
7988 LeakCheckParams lcp
;
7989 HChar
* xt_filename
= NULL
;
7990 lcp
.mode
= MC_(clo_leak_check
);
7991 lcp
.show_leak_kinds
= MC_(clo_show_leak_kinds
);
7992 lcp
.heuristics
= MC_(clo_leak_check_heuristics
);
7993 lcp
.errors_for_leak_kinds
= MC_(clo_error_for_leak_kinds
);
7994 lcp
.deltamode
= LCD_Any
;
7995 lcp
.max_loss_records_output
= 999999999;
7996 lcp
.requested_by_monitor_command
= False
;
7997 if (MC_(clo_xtree_leak
)) {
7998 xt_filename
= VG_(expand_file_name
)("--xtree-leak-file",
7999 MC_(clo_xtree_leak_file
));
8000 lcp
.xt_filename
= xt_filename
;
8002 lcp
.show_leak_kinds
= MC_(all_Reachedness
)();
8005 lcp
.xt_filename
= NULL
;
8006 MC_(detect_memory_leaks
)(1/*bogus ThreadId*/, &lcp
);
8007 if (MC_(clo_xtree_leak
))
8008 VG_(free
)(xt_filename
);
8010 if (VG_(clo_verbosity
) == 1 && !VG_(clo_xml
)) {
8012 "For a detailed leak analysis, rerun with: --leak-check=full\n"
8018 if (MC_(any_value_errors
) && !VG_(clo_xml
) && VG_(clo_verbosity
) >= 1
8019 && MC_(clo_mc_level
) == 2) {
8020 VG_(message
)(Vg_UserMsg
,
8021 "Use --track-origins=yes to see where "
8022 "uninitialised values come from\n");
8025 /* Print a warning if any client-request generated ignore-ranges
8026 still exist. It would be reasonable to expect that a properly
8027 written program would remove any such ranges before exiting, and
8028 since they are a bit on the dangerous side, let's comment. By
8029 contrast ranges which are specified on the command line normally
8030 pertain to hardware mapped into the address space, and so we
8031 can't expect the client to have got rid of them. */
8032 if (gIgnoredAddressRanges
) {
8034 for (i
= 0; i
< VG_(sizeRangeMap
)(gIgnoredAddressRanges
); i
++) {
8035 UWord val
= IAR_INVALID
;
8036 UWord key_min
= ~(UWord
)0;
8037 UWord key_max
= (UWord
)0;
8038 VG_(indexRangeMap
)( &key_min
, &key_max
, &val
,
8039 gIgnoredAddressRanges
, i
);
8040 if (val
!= IAR_ClientReq
)
8042 /* Print the offending range. Also, if it is the first,
8043 print a banner before it. */
8047 "WARNING: exiting program has the following client-requested\n"
8048 "WARNING: address error disablement range(s) still in force,\n"
8050 "possibly as a result of some mistake in the use of the\n"
8052 "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
8055 VG_(umsg
)(" [%u] 0x%016lx-0x%016lx %s\n",
8056 i
, key_min
, key_max
, showIARKind(val
));
8066 VG_(message
)(Vg_DebugMsg
,
8067 "------ Valgrind's client block stats follow ---------------\n" );
8068 show_client_block_stats();
8072 /* mark the given addr/len unaddressable for watchpoint implementation
8073 The PointKind will be handled at access time */
8074 static Bool
mc_mark_unaddressable_for_watchpoint (PointKind kind
, Bool insert
,
8075 Addr addr
, SizeT len
)
8077 /* GDBTD this is somewhat fishy. We might rather have to save the previous
8078 accessibility and definedness in gdbserver so as to allow restoring it
8079 properly. Currently, we assume that the user only watches things
8080 which are properly addressable and defined */
8082 MC_(make_mem_noaccess
) (addr
, len
);
8084 MC_(make_mem_defined
) (addr
, len
);
8088 static void mc_pre_clo_init(void)
8090 VG_(details_name
) ("Memcheck");
8091 VG_(details_version
) (NULL
);
8092 VG_(details_description
) ("a memory error detector");
8093 VG_(details_copyright_author
)(
8094 "Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.");
8095 VG_(details_bug_reports_to
) (VG_BUGS_TO
);
8096 VG_(details_avg_translation_sizeB
) ( 640 );
8098 VG_(basic_tool_funcs
) (mc_post_clo_init
,
8102 VG_(needs_final_IR_tidy_pass
) ( MC_(final_tidy
) );
8105 VG_(needs_core_errors
) ();
8106 VG_(needs_tool_errors
) (MC_(eq_Error
),
8107 MC_(before_pp_Error
),
8109 True
,/*show TIDs for errors*/
8110 MC_(update_Error_extra
),
8111 MC_(is_recognised_suppression
),
8112 MC_(read_extra_suppression_info
),
8113 MC_(error_matches_suppression
),
8114 MC_(get_error_name
),
8115 MC_(get_extra_suppression_info
),
8116 MC_(print_extra_suppression_use
),
8117 MC_(update_extra_suppression_use
));
8118 VG_(needs_libc_freeres
) ();
8119 VG_(needs_cxx_freeres
) ();
8120 VG_(needs_command_line_options
)(mc_process_cmd_line_options
,
8122 mc_print_debug_usage
);
8123 VG_(needs_client_requests
) (mc_handle_client_request
);
8124 VG_(needs_sanity_checks
) (mc_cheap_sanity_check
,
8125 mc_expensive_sanity_check
);
8126 VG_(needs_print_stats
) (mc_print_stats
);
8127 VG_(needs_info_location
) (MC_(pp_describe_addr
));
8128 VG_(needs_malloc_replacement
) (MC_(malloc
),
8130 MC_(__builtin_vec_new
),
8134 MC_(__builtin_delete
),
8135 MC_(__builtin_vec_delete
),
8137 MC_(malloc_usable_size
),
8138 MC_MALLOC_DEFAULT_REDZONE_SZB
);
8139 MC_(Malloc_Redzone_SzB
) = VG_(malloc_effective_client_redzone_size
)();
8141 VG_(needs_xml_output
) ();
8143 VG_(track_new_mem_startup
) ( mc_new_mem_startup
);
8145 // Handling of mmap and mprotect isn't simple (well, it is simple,
8146 // but the justification isn't.) See comments above, just prior to
8148 VG_(track_new_mem_mmap
) ( mc_new_mem_mmap
);
8149 VG_(track_change_mem_mprotect
) ( mc_new_mem_mprotect
);
8151 VG_(track_copy_mem_remap
) ( MC_(copy_address_range_state
) );
8153 VG_(track_die_mem_stack_signal
)( MC_(make_mem_noaccess
) );
8154 VG_(track_die_mem_brk
) ( MC_(make_mem_noaccess
) );
8155 VG_(track_die_mem_munmap
) ( MC_(make_mem_noaccess
) );
8157 /* Defer the specification of the new_mem_stack functions to the
8158 post_clo_init function, since we need to first parse the command
8159 line before deciding which set to use. */
8161 # ifdef PERF_FAST_STACK
8162 VG_(track_die_mem_stack_4
) ( mc_die_mem_stack_4
);
8163 VG_(track_die_mem_stack_8
) ( mc_die_mem_stack_8
);
8164 VG_(track_die_mem_stack_12
) ( mc_die_mem_stack_12
);
8165 VG_(track_die_mem_stack_16
) ( mc_die_mem_stack_16
);
8166 VG_(track_die_mem_stack_32
) ( mc_die_mem_stack_32
);
8167 VG_(track_die_mem_stack_112
) ( mc_die_mem_stack_112
);
8168 VG_(track_die_mem_stack_128
) ( mc_die_mem_stack_128
);
8169 VG_(track_die_mem_stack_144
) ( mc_die_mem_stack_144
);
8170 VG_(track_die_mem_stack_160
) ( mc_die_mem_stack_160
);
8172 VG_(track_die_mem_stack
) ( mc_die_mem_stack
);
8174 VG_(track_ban_mem_stack
) ( MC_(make_mem_noaccess
) );
8176 VG_(track_pre_mem_read
) ( check_mem_is_defined
);
8177 VG_(track_pre_mem_read_asciiz
) ( check_mem_is_defined_asciiz
);
8178 VG_(track_pre_mem_write
) ( check_mem_is_addressable
);
8179 VG_(track_post_mem_write
) ( mc_post_mem_write
);
8181 VG_(track_post_reg_write
) ( mc_post_reg_write
);
8182 VG_(track_post_reg_write_clientcall_return
)( mc_post_reg_write_clientcall
);
8184 if (MC_(clo_mc_level
) >= 2) {
8185 VG_(track_copy_mem_to_reg
) ( mc_copy_mem_to_reg
);
8186 VG_(track_copy_reg_to_mem
) ( mc_copy_reg_to_mem
);
8189 VG_(needs_watchpoint
) ( mc_mark_unaddressable_for_watchpoint
);
8191 init_shadow_memory();
8192 // MC_(chunk_poolalloc) must be allocated in post_clo_init
8193 tl_assert(MC_(chunk_poolalloc
) == NULL
);
8194 MC_(malloc_list
) = VG_(HT_construct
)( "MC_(malloc_list)" );
8195 MC_(mempool_list
) = VG_(HT_construct
)( "MC_(mempool_list)" );
8198 tl_assert( mc_expensive_sanity_check() );
8200 // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
8201 tl_assert(sizeof(UWord
) == sizeof(Addr
));
8202 // Call me paranoid. I don't care.
8203 tl_assert(sizeof(void*) == sizeof(Addr
));
8205 // BYTES_PER_SEC_VBIT_NODE must be a power of two.
8206 tl_assert(-1 != VG_(log2
)(BYTES_PER_SEC_VBIT_NODE
));
8208 /* This is small. Always initialise it. */
8209 init_nia_to_ecu_cache();
8211 /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
8212 if we need to, since the command line args haven't been
8213 processed yet. Hence defer it to mc_post_clo_init. */
8214 tl_assert(ocacheL1
== NULL
);
8215 tl_assert(ocacheL2
== NULL
);
8217 /* Check some important stuff. See extensive comments above
8218 re UNALIGNED_OR_HIGH for background. */
8219 # if VG_WORDSIZE == 4
8220 tl_assert(sizeof(void*) == 4);
8221 tl_assert(sizeof(Addr
) == 4);
8222 tl_assert(sizeof(UWord
) == 4);
8223 tl_assert(sizeof(Word
) == 4);
8224 tl_assert(MAX_PRIMARY_ADDRESS
== 0xFFFFFFFFUL
);
8225 tl_assert(MASK(1) == 0UL);
8226 tl_assert(MASK(2) == 1UL);
8227 tl_assert(MASK(4) == 3UL);
8228 tl_assert(MASK(8) == 7UL);
8230 tl_assert(VG_WORDSIZE
== 8);
8231 tl_assert(sizeof(void*) == 8);
8232 tl_assert(sizeof(Addr
) == 8);
8233 tl_assert(sizeof(UWord
) == 8);
8234 tl_assert(sizeof(Word
) == 8);
8235 tl_assert(MAX_PRIMARY_ADDRESS
== 0x1FFFFFFFFFULL
);
8236 tl_assert(MASK(1) == 0xFFFFFFE000000000ULL
);
8237 tl_assert(MASK(2) == 0xFFFFFFE000000001ULL
);
8238 tl_assert(MASK(4) == 0xFFFFFFE000000003ULL
);
8239 tl_assert(MASK(8) == 0xFFFFFFE000000007ULL
);
8242 /* Check some assertions to do with the instrumentation machinery. */
8243 MC_(do_instrumentation_startup_checks
)();
8246 STATIC_ASSERT(sizeof(UWord
) == sizeof(SizeT
));
8248 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init
)
8250 /*--------------------------------------------------------------------*/
8251 /*--- end mc_main.c ---*/
8252 /*--------------------------------------------------------------------*/