2 /*--------------------------------------------------------------------*/
3 /*--- Ptrcheck: a pointer-use checker. ---*/
4 /*--- This file checks heap accesses. ---*/
6 /*--------------------------------------------------------------------*/
9 This file is part of Ptrcheck, a Valgrind tool for checking pointer
12 Initial version (Annelid):
14 Copyright (C) 2003-2017 Nicholas Nethercote
19 Copyright (C) 2008-2017 OpenWorks Ltd
22 This program is free software; you can redistribute it and/or
23 modify it under the terms of the GNU General Public License as
24 published by the Free Software Foundation; either version 2 of the
25 License, or (at your option) any later version.
27 This program is distributed in the hope that it will be useful, but
28 WITHOUT ANY WARRANTY; without even the implied warranty of
29 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30 General Public License for more details.
32 You should have received a copy of the GNU General Public License
33 along with this program; if not, write to the Free Software
34 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
37 The GNU General Public License is contained in the file COPYING.
40 #include "pub_tool_basics.h"
41 #include "pub_tool_libcbase.h"
42 #include "pub_tool_libcprint.h"
43 #include "pub_tool_libcassert.h"
44 #include "pub_tool_mallocfree.h"
45 #include "pub_tool_execontext.h"
46 #include "pub_tool_hashtable.h"
47 #include "pub_tool_tooliface.h"
48 #include "pub_tool_replacemalloc.h"
49 #include "pub_tool_options.h"
50 #include "pub_tool_execontext.h"
51 #include "pub_tool_aspacemgr.h" // VG_(am_shadow_malloc)
52 #include "pub_tool_vki.h" // VKI_MAX_PAGE_SIZE
53 #include "pub_tool_machine.h" // VG_({get,set}_shadow_regs_area) et al
54 #include "pub_tool_debuginfo.h" // VG_(get_fnname)
55 #include "pub_tool_threadstate.h" // VG_(get_running_tid)
56 #include "pub_tool_oset.h"
57 #include "pub_tool_vkiscnums.h"
58 #include "pub_tool_machine.h"
59 #include "pub_tool_wordfm.h"
60 #include "pub_tool_xarray.h"
62 #include "pc_common.h"
67 #include "sg_main.h" // sg_instrument_*, and struct _SGEnv
71 /*------------------------------------------------------------*/
72 /*--- Debug/trace options ---*/
73 /*------------------------------------------------------------*/
75 static ULong stats__client_mallocs
= 0;
76 static ULong stats__client_frees
= 0;
77 static ULong stats__segs_allocd
= 0;
78 static ULong stats__segs_recycled
= 0;
81 //////////////////////////////////////////////////////////////
83 // Segments low level storage //
85 //////////////////////////////////////////////////////////////
87 // NONPTR, UNKNOWN, BOTTOM defined in h_main.h since
88 // pc_common.c needs to see them, for error processing
90 // we only start recycling segs when this many exist
91 #define N_FREED_SEGS (1 * 1000 * 1000)
95 SizeT szB
; /* may be zero */
96 ExeContext
* ec
; /* where malloc'd or freed */
97 /* When 1, indicates block is in use. Otherwise, used to form a
98 linked list of freed blocks, running from oldest freed block to
99 the most recently freed block. */
100 struct _Seg
* nextfree
;
103 // Determines if 'a' is before, within, or after seg's range. Sets 'cmp' to
104 // -1/0/1 accordingly. Sets 'n' to the number of bytes before/within/after.
105 void Seg__cmp(Seg
* seg
, Addr a
, Int
* cmp
, UWord
* n
)
110 } else if (a
< seg
->addr
+ seg
->szB
&& seg
->szB
> 0) {
115 *n
= a
- (seg
->addr
+ seg
->szB
);
119 /*inline*/ Bool
Seg__is_freed(Seg
* seg
)
121 if (!is_known_segment(seg
))
124 return seg
->nextfree
!= (Seg
*)1;
127 ExeContext
* Seg__where(Seg
* seg
)
129 tl_assert(is_known_segment(seg
));
133 SizeT
Seg__size(Seg
* seg
)
135 tl_assert(is_known_segment(seg
));
139 Addr
Seg__addr(Seg
* seg
)
141 tl_assert(is_known_segment(seg
));
146 #define N_SEGS_PER_GROUP 10000
150 struct _SegGroup
* admin
;
151 UWord nextfree
; /* 0 .. N_SEGS_PER_GROUP */
152 Seg segs
[N_SEGS_PER_GROUP
];
156 static SegGroup
* group_list
= NULL
;
157 static UWord nFreeSegs
= 0;
158 static Seg
* freesegs_youngest
= NULL
;
159 static Seg
* freesegs_oldest
= NULL
;
162 static SegGroup
* new_SegGroup ( void ) {
163 SegGroup
* g
= VG_(malloc
)("pc.h_main.nTG.1", sizeof(SegGroup
));
164 VG_(memset
)(g
, 0, sizeof(*g
));
168 /* Get a completely new Seg */
169 static Seg
* new_Seg ( void )
173 if (group_list
== NULL
) {
178 tl_assert(group_list
->nextfree
<= N_SEGS_PER_GROUP
);
179 if (group_list
->nextfree
== N_SEGS_PER_GROUP
) {
181 g
->admin
= group_list
;
184 tl_assert(group_list
->nextfree
< N_SEGS_PER_GROUP
);
185 teg
= &group_list
->segs
[ group_list
->nextfree
];
186 group_list
->nextfree
++;
187 stats__segs_allocd
++;
191 static Seg
* get_Seg_for_malloc ( void )
194 if (nFreeSegs
< N_FREED_SEGS
) {
196 seg
->nextfree
= (Seg
*)1;
199 /* else recycle the oldest Seg in the free list */
200 tl_assert(freesegs_youngest
);
201 tl_assert(freesegs_oldest
);
202 tl_assert(freesegs_youngest
!= freesegs_oldest
);
203 seg
= freesegs_oldest
;
204 freesegs_oldest
= seg
->nextfree
;
206 seg
->nextfree
= (Seg
*)1;
207 stats__segs_recycled
++;
211 static void set_Seg_freed ( Seg
* seg
)
214 tl_assert(!Seg__is_freed(seg
));
215 if (nFreeSegs
== 0) {
216 tl_assert(freesegs_oldest
== NULL
);
217 tl_assert(freesegs_youngest
== NULL
);
218 seg
->nextfree
= NULL
;
219 freesegs_youngest
= seg
;
220 freesegs_oldest
= seg
;
223 tl_assert(freesegs_youngest
);
224 tl_assert(freesegs_oldest
);
225 if (nFreeSegs
== 1) {
226 tl_assert(freesegs_youngest
== freesegs_oldest
);
228 tl_assert(freesegs_youngest
!= freesegs_oldest
);
230 tl_assert(freesegs_youngest
->nextfree
== NULL
);
231 tl_assert(seg
!= freesegs_youngest
&& seg
!= freesegs_oldest
);
232 seg
->nextfree
= NULL
;
233 freesegs_youngest
->nextfree
= seg
;
234 freesegs_youngest
= seg
;
239 static WordFM
* addr_to_seg_map
= NULL
; /* GuestAddr -> Seg* */
241 static void addr_to_seg_map_ENSURE_INIT ( void )
243 if (UNLIKELY(addr_to_seg_map
== NULL
)) {
244 addr_to_seg_map
= VG_(newFM
)( VG_(malloc
), "pc.h_main.attmEI.1",
245 VG_(free
), NULL
/*unboxedcmp*/ );
249 static Seg
* find_Seg_by_addr ( Addr ga
)
252 addr_to_seg_map_ENSURE_INIT();
253 if (VG_(lookupFM
)( addr_to_seg_map
, &keyW
, &valW
, (UWord
)ga
)) {
254 tl_assert(keyW
== ga
);
261 static void bind_addr_to_Seg ( Addr ga
, Seg
* seg
)
264 addr_to_seg_map_ENSURE_INIT();
265 b
= VG_(addToFM
)( addr_to_seg_map
, (UWord
)ga
, (UWord
)seg
);
266 tl_assert(!b
); /* else ga is already bound */
269 static void unbind_addr_from_Seg ( Addr ga
)
273 addr_to_seg_map_ENSURE_INIT();
274 b
= VG_(delFromFM
)( addr_to_seg_map
, &keyW
, &valW
, (UWord
)ga
);
275 tl_assert(b
); /* else ga was not already bound */
276 tl_assert(keyW
== ga
);
277 tl_assert(valW
!= 0);
281 //////////////////////////////////////////////////////////////
282 //////////////////////////////////////////////////////////////
283 //////////////////////////////////////////////////////////////
285 // Returns the added heap segment
286 static Seg
* add_new_segment ( ThreadId tid
, Addr p
, SizeT size
)
288 Seg
* seg
= get_Seg_for_malloc();
289 tl_assert(seg
!= (Seg
*)1); /* since we're using 1 as a special value */
292 seg
->ec
= VG_(record_ExeContext
)( tid
, 0/*first_ip_delta*/ );
293 tl_assert(!Seg__is_freed(seg
));
295 bind_addr_to_Seg(p
, seg
);
303 void* alloc_and_new_mem_heap ( ThreadId tid
,
304 SizeT size
, SizeT alignment
, Bool is_zeroed
)
308 if ( ((SSizeT
)size
) < 0) return NULL
;
310 p
= (Addr
)VG_(cli_malloc
)(alignment
, size
);
311 if (is_zeroed
) VG_(memset
)((void*)p
, 0, size
);
313 add_new_segment( tid
, p
, size
);
315 stats__client_mallocs
++;
319 static void die_and_free_mem_heap ( ThreadId tid
, Seg
* seg
)
321 // Empty and free the actual block
322 tl_assert(!Seg__is_freed(seg
));
324 VG_(cli_free
)( (void*)seg
->addr
);
326 // Remember where freed
327 seg
->ec
= VG_(record_ExeContext
)( tid
, 0/*first_ip_delta*/ );
330 unbind_addr_from_Seg( seg
->addr
);
332 stats__client_frees
++;
335 static void handle_free_heap( ThreadId tid
, void* p
)
337 Seg
* seg
= find_Seg_by_addr( (Addr
)p
);
339 /* freeing a block that wasn't malloc'd. Ignore. */
342 die_and_free_mem_heap( tid
, seg
);
346 /*------------------------------------------------------------*/
347 /*--- malloc() et al replacements ---*/
348 /*------------------------------------------------------------*/
350 void* h_replace_malloc ( ThreadId tid
, SizeT n
)
352 return alloc_and_new_mem_heap ( tid
, n
, VG_(clo_alignment
),
353 /*is_zeroed*/False
);
356 void* h_replace___builtin_new ( ThreadId tid
, SizeT n
)
358 return alloc_and_new_mem_heap ( tid
, n
, VG_(clo_alignment
),
359 /*is_zeroed*/False
);
362 void* h_replace___builtin_vec_new ( ThreadId tid
, SizeT n
)
364 return alloc_and_new_mem_heap ( tid
, n
, VG_(clo_alignment
),
365 /*is_zeroed*/False
);
368 void* h_replace_memalign ( ThreadId tid
, SizeT align
, SizeT n
)
370 return alloc_and_new_mem_heap ( tid
, n
, align
,
371 /*is_zeroed*/False
);
374 void* h_replace_calloc ( ThreadId tid
, SizeT nmemb
, SizeT size1
)
376 return alloc_and_new_mem_heap ( tid
, nmemb
*size1
, VG_(clo_alignment
),
380 void h_replace_free ( ThreadId tid
, void* p
)
382 // Should arguably check here if p.vseg matches the segID of the
383 // pointed-to block... unfortunately, by this stage, we don't know what
384 // p.vseg is, because we don't know the address of p (the p here is a
385 // copy, and we've lost the address of its source). To do so would
386 // require passing &p in, which would require rewriting part of
387 // vg_replace_malloc.c... argh.
389 // However, Memcheck does free checking, and will catch almost all
390 // violations this checking would have caught. (Would only miss if we
391 // unluckily passed an unrelated pointer to the very start of a heap
392 // block that was unrelated to that block. This is very unlikely!) So
393 // we haven't lost much.
395 handle_free_heap(tid
, p
);
398 void h_replace___builtin_delete ( ThreadId tid
, void* p
)
400 handle_free_heap(tid
, p
);
403 void h_replace___builtin_vec_delete ( ThreadId tid
, void* p
)
405 handle_free_heap(tid
, p
);
408 void* h_replace_realloc ( ThreadId tid
, void* p_old
, SizeT new_size
)
412 /* First try and find the block. */
413 seg
= find_Seg_by_addr( (Addr
)p_old
);
417 tl_assert(seg
->addr
== (Addr
)p_old
);
419 if (new_size
<= seg
->szB
) {
420 /* new size is smaller: allocate, copy from old to new */
421 Addr p_new
= (Addr
)VG_(cli_malloc
)(VG_(clo_alignment
), new_size
);
422 VG_(memcpy
)((void*)p_new
, p_old
, new_size
);
424 /* Free old memory */
425 die_and_free_mem_heap( tid
, seg
);
427 /* This has to be after die_and_free_mem_heap, otherwise the
428 former succeeds in shorting out the new block, not the
429 old, in the case when both are on the same list. */
430 add_new_segment ( tid
, p_new
, new_size
);
434 /* new size is bigger: allocate, copy from old to new */
435 Addr p_new
= (Addr
)VG_(cli_malloc
)(VG_(clo_alignment
), new_size
);
436 VG_(memcpy
)((void*)p_new
, p_old
, seg
->szB
);
438 /* Free old memory */
439 die_and_free_mem_heap( tid
, seg
);
441 /* This has to be after die_and_free_mem_heap, otherwise the
442 former succeeds in shorting out the new block, not the old,
443 in the case when both are on the same list. NB jrs
444 2008-Sept-11: not sure if this comment is valid/correct any
445 more -- I suspect not. */
446 add_new_segment ( tid
, p_new
, new_size
);
452 SizeT
h_replace_malloc_usable_size ( ThreadId tid
, void* p
)
454 Seg
* seg
= find_Seg_by_addr( (Addr
)p
);
456 // There may be slop, but pretend there isn't because only the asked-for
457 // area will have been shadowed properly.
458 return ( seg
? seg
->szB
: 0 );
462 /*--------------------------------------------------------------------*/
463 /*--- Instrumentation ---*/
464 /*--------------------------------------------------------------------*/
466 /* The h_ instrumenter that follows is complex, since it deals with
467 shadow value computation.
469 It also needs to generate instrumentation for the sg_ side of
470 things. That's relatively straightforward. However, rather than
471 confuse the code herein any further, we simply delegate the problem
472 to sg_main.c, by using the four functions
473 sg_instrument_{init,fini,IRStmt,final_jump}. These four completely
474 abstractify the sg_ instrumentation. See comments in sg_main.c's
475 instrumentation section for further details. */
478 /* Carries info about a particular tmp. The tmp's number is not
479 recorded, as this is implied by (equal to) its index in the tmpMap
480 in PCEnv. The tmp's type is also not recorded, as this is present
483 When .kind is NonShad, .shadow may give the identity of the temp
484 currently holding the associated shadow value, or it may be
485 IRTemp_INVALID if code to compute the shadow has not yet been
488 When .kind is Shad tmp holds a shadow value, and so .shadow must be
489 IRTemp_INVALID, since it is illogical for a shadow tmp itself to be
493 enum { NonShad
=1, Shad
=2 }
505 /* Carries around state during Ptrcheck instrumentation. */
508 /* MODIFIED: the superblock being constructed. IRStmts are
513 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
514 current kind and possibly shadow temps for each temp in the
515 IRSB being constructed. Note that it does not contain the
516 type of each tmp. If you want to know the type, look at the
517 relevant entry in sb->tyenv. It follows that at all times
518 during the instrumentation process, the valid indices for
519 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
520 total number of NonShad and Shad temps allocated so far.
522 The reason for this strange split (types in one place, all
523 other info in another) is that we need the types to be
524 attached to sb so as to make it possible to do
525 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
526 instrumentation process.
528 Note that only integer temps of the guest word size are
529 shadowed, since it is impossible (or meaningless) to hold a
530 pointer in any other type of temp. */
531 XArray
* /* of TempMapEnt */ qmpMap
;
533 /* READONLY: the host word type. Needed for constructing
534 arguments of type 'HWord' to be passed to helper functions.
535 Ity_I32 or Ity_I64 only. */
538 /* READONLY: the guest word type, Ity_I32 or Ity_I64 only. */
541 /* READONLY: the guest state size, so we can generate shadow
542 offsets correctly. */
543 Int guest_state_sizeB
;
547 /* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
548 demand), as they are encountered. This is for two reasons.
550 (1) (less important reason): Many original tmps are unused due to
551 initial IR optimisation, and we do not want to spaces in tables
554 Shadow IRTemps are therefore allocated on demand. pce.tmpMap is a
555 table indexed [0 .. n_types-1], which gives the current shadow for
556 each original tmp, or INVALID_IRTEMP if none is so far assigned.
557 It is necessary to support making multiple assignments to a shadow
558 -- specifically, after testing a shadow for definedness, it needs
559 to be made defined. But IR's SSA property disallows this.
561 (2) (more important reason): Therefore, when a shadow needs to get
562 a new value, a new temporary is created, the value is assigned to
563 that, and the tmpMap is updated to reflect the new binding.
565 A corollary is that if the tmpMap maps a given tmp to
566 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
567 there's a read-before-write error in the original tmps. The IR
568 sanity checker should catch all such anomalies, however.
571 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
572 both the table in pce->sb and to our auxiliary mapping. Note that
573 newTemp may cause pce->tmpMap to resize, hence previous results
574 from VG_(indexXA)(pce->tmpMap) are invalidated. */
575 static IRTemp
newTemp ( PCEnv
* pce
, IRType ty
, TempKind kind
)
579 IRTemp tmp
= newIRTemp(pce
->sb
->tyenv
, ty
);
581 ent
.shadow
= IRTemp_INVALID
;
582 newIx
= VG_(addToXA
)( pce
->qmpMap
, &ent
);
583 tl_assert(newIx
== (Word
)tmp
);
587 /*------------------------------------------------------------*/
588 /*--- Constructing IR fragments ---*/
589 /*------------------------------------------------------------*/
591 /* add stmt to a bb */
592 static /*inline*/ void stmt ( HChar cat
, PCEnv
* pce
, IRStmt
* st
) {
594 VG_(printf
)(" %c: ", cat
);
598 addStmtToIRSB(pce
->sb
, st
);
601 static IRTemp
for_sg__newIRTemp_cb ( IRType ty
, void* opaque
)
603 PCEnv
* pce
= (PCEnv
*)opaque
;
604 return newTemp( pce
, ty
, NonShad
);
608 IRSB
* h_instrument ( VgCallbackClosure
* closure
,
610 const VexGuestLayout
* layout
,
611 const VexGuestExtents
* vge
,
612 const VexArchInfo
* archinfo_host
,
613 IRType gWordTy
, IRType hWordTy
)
615 Bool verboze
= 0||False
;
618 struct _SGEnv
* sgenv
;
620 if (gWordTy
!= hWordTy
) {
621 /* We don't currently support this case. */
622 VG_(tool_panic
)("host/guest word size mismatch");
625 /* Check we're not completely nuts */
626 tl_assert(sizeof(UWord
) == sizeof(void*));
627 tl_assert(sizeof(Word
) == sizeof(void*));
628 tl_assert(sizeof(Addr
) == sizeof(void*));
629 tl_assert(sizeof(ULong
) == 8);
630 tl_assert(sizeof(Long
) == 8);
631 tl_assert(sizeof(Addr
) == sizeof(void*));
632 tl_assert(sizeof(UInt
) == 4);
633 tl_assert(sizeof(Int
) == 4);
635 /* Set up the running environment. Both .sb and .tmpMap are
636 modified as we go along. Note that tmps are added to both
637 .sb->tyenv and .tmpMap together, so the valid index-set for
638 those two arrays should always be identical. */
639 VG_(memset
)(&pce
, 0, sizeof(pce
));
640 pce
.sb
= deepCopyIRSBExceptStmts(sbIn
);
642 pce
.hWordTy
= hWordTy
;
643 pce
.gWordTy
= gWordTy
;
644 pce
.guest_state_sizeB
= layout
->total_sizeB
;
646 pce
.qmpMap
= VG_(newXA
)( VG_(malloc
), "pc.h_instrument.1", VG_(free
),
648 for (i
= 0; i
< sbIn
->tyenv
->types_used
; i
++) {
651 ent
.shadow
= IRTemp_INVALID
;
652 VG_(addToXA
)( pce
.qmpMap
, &ent
);
654 tl_assert( VG_(sizeXA
)( pce
.qmpMap
) == sbIn
->tyenv
->types_used
);
656 /* Also set up for the sg_ instrumenter. See comments at the top
657 of this instrumentation section for details. The two parameters
658 constitute a closure, which sg_ can use to correctly generate
659 new IRTemps as needed. */
660 sgenv
= sg_instrument_init( for_sg__newIRTemp_cb
,
663 /* Copy verbatim any IR preamble preceding the first IMark */
666 while (i
< sbIn
->stmts_used
&& sbIn
->stmts
[i
]->tag
!= Ist_IMark
) {
667 IRStmt
* st
= sbIn
->stmts
[i
];
669 tl_assert(isFlatIRStmt(st
));
670 stmt( 'C', &pce
, sbIn
->stmts
[i
] );
674 /* Iterate over the remaining stmts to generate instrumentation. */
676 tl_assert(sbIn
->stmts_used
> 0);
678 tl_assert(i
< sbIn
->stmts_used
);
679 tl_assert(sbIn
->stmts
[i
]->tag
== Ist_IMark
);
681 for (/*use current i*/; i
< sbIn
->stmts_used
; i
++) {
682 /* generate sg_ instrumentation for this stmt */
683 sg_instrument_IRStmt( sgenv
, pce
.sb
, sbIn
->stmts
[i
],
684 layout
, gWordTy
, hWordTy
);
686 stmt( 'C', &pce
, sbIn
->stmts
[i
] );
689 /* generate sg_ instrumentation for the final jump */
690 sg_instrument_final_jump( sgenv
, pce
.sb
, sbIn
->next
, sbIn
->jumpkind
,
691 layout
, gWordTy
, hWordTy
);
693 /* and finalise .. */
694 sg_instrument_fini( sgenv
);
696 /* If this fails, there's been some serious snafu with tmp management,
697 that should be investigated. */
698 tl_assert( VG_(sizeXA
)( pce
.qmpMap
) == pce
.sb
->tyenv
->types_used
);
699 VG_(deleteXA
)( pce
.qmpMap
);
705 /*--------------------------------------------------------------------*/
706 /*--- Finalisation ---*/
707 /*--------------------------------------------------------------------*/
709 void h_fini ( Int exitcode
)
711 if (VG_(clo_verbosity
) == 1 && !VG_(clo_xml
)) {
712 VG_(message
)(Vg_UserMsg
,
713 "For counts of detected and suppressed errors, "
717 if (VG_(clo_stats
)) {
718 VG_(message
)(Vg_DebugMsg
,
719 " h_: %'10llu client allocs, %'10llu client frees\n",
720 stats__client_mallocs
, stats__client_frees
);
721 VG_(message
)(Vg_DebugMsg
,
722 " h_: %'10llu Segs allocd, %'10llu Segs recycled\n",
723 stats__segs_allocd
, stats__segs_recycled
);
728 /*--------------------------------------------------------------------*/
729 /*--- end h_main.c ---*/
730 /*--------------------------------------------------------------------*/