2 /*--------------------------------------------------------------------*/
3 /*--- Ptrcheck: a pointer-use checker. ---*/
4 /*--- This file checks stack and global array accesses. ---*/
6 /*--------------------------------------------------------------------*/
9 This file is part of Ptrcheck, a Valgrind tool for checking pointer
12 Copyright (C) 2008-2017 OpenWorks Ltd
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
30 The GNU General Public License is contained in the file COPYING.
32 Neither the names of the U.S. Department of Energy nor the
33 University of California nor the names of its contributors may be
34 used to endorse or promote products derived from this software
35 without prior written permission.
38 #include "pub_tool_basics.h"
39 #include "pub_tool_libcbase.h"
40 #include "pub_tool_libcassert.h"
41 #include "pub_tool_libcprint.h"
42 #include "pub_tool_tooliface.h"
43 #include "pub_tool_wordfm.h"
44 #include "pub_tool_xarray.h"
45 #include "pub_tool_threadstate.h"
46 #include "pub_tool_mallocfree.h"
47 #include "pub_tool_machine.h"
48 #include "pub_tool_debuginfo.h"
49 #include "pub_tool_options.h"
51 #include "pc_common.h"
53 #include "sg_main.h" // self
57 void preen_global_Invars ( Addr a
, SizeT len
); /*fwds*/
60 //////////////////////////////////////////////////////////////
64 //////////////////////////////////////////////////////////////
66 static inline Bool
is_sane_TId ( ThreadId tid
)
68 return tid
>= 0 && tid
< VG_N_THREADS
69 && tid
!= VG_INVALID_THREADID
;
72 static void* sg_malloc ( const HChar
* cc
, SizeT n
) {
75 p
= VG_(malloc
)( cc
, n
);
79 static void sg_free ( void* p
) {
85 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
86 first interval is lower, 1 if the first interval is higher, and 0
87 if there is any overlap. Redundant paranoia with casting is there
88 following what looked distinctly like a bug in gcc-4.1.2, in which
89 some of the comparisons were done signedly instead of
92 static Word
cmp_nonempty_intervals ( Addr a1
, SizeT n1
,
94 UWord a1w
= (UWord
)a1
;
95 UWord n1w
= (UWord
)n1
;
96 UWord a2w
= (UWord
)a2
;
97 UWord n2w
= (UWord
)n2
;
98 tl_assert(n1w
> 0 && n2w
> 0);
99 if (a1w
+ n1w
<= a2w
) return -1L;
100 if (a2w
+ n2w
<= a1w
) return 1L;
104 /* Return true iff [aSmall,aSmall+nSmall) is entirely contained
105 within [aBig,aBig+nBig). */
107 static Bool
is_subinterval_of ( Addr aBig
, SizeT nBig
,
108 Addr aSmall
, SizeT nSmall
) {
109 tl_assert(nBig
> 0 && nSmall
> 0);
110 return aBig
<= aSmall
&& aSmall
+ nSmall
<= aBig
+ nBig
;
114 static Addr
Addr__min ( Addr a1
, Addr a2
) {
115 return a1
< a2
? a1
: a2
;
119 static Addr
Addr__max ( Addr a1
, Addr a2
) {
120 return a1
< a2
? a2
: a1
;
124 //////////////////////////////////////////////////////////////
126 // StackBlocks Persistent Cache //
128 //////////////////////////////////////////////////////////////
130 /* We maintain a set of XArray* of StackBlocks. These are never
131 freed. When a new StackBlock vector is acquired from
132 VG_(di_get_local_blocks_at_ip), we compare it to the existing set.
133 If not present, it is added. If present, the just-acquired one is
134 freed and the copy used.
136 This simplifies storage management elsewhere. It allows us to
137 assume that a pointer to an XArray* of StackBlock is valid forever.
138 It also means there are no duplicates anywhere, which could be
139 important from a space point of view for programs that generate a
140 lot of translations, or where translations are frequently discarded
143 Note that we normalise the arrays by sorting the elements according
144 to an arbitrary total order, so as to avoid the situation that two
145 vectors describe the same set of variables but are not structurally
148 static inline Bool
StackBlock__sane ( const StackBlock
* fb
)
150 if (fb
->name
[ sizeof(fb
->name
)-1 ] != 0)
152 if (fb
->spRel
!= False
&& fb
->spRel
!= True
)
154 if (fb
->isVec
!= False
&& fb
->isVec
!= True
)
159 /* Generate an arbitrary total ordering on StackBlocks. */
160 static Word
StackBlock__cmp ( const StackBlock
* fb1
, const StackBlock
* fb2
)
163 tl_assert(StackBlock__sane(fb1
));
164 tl_assert(StackBlock__sane(fb2
));
165 /* Hopefully the .base test hits most of the time. For the blocks
166 associated with any particular instruction, if the .base values
167 are the same then probably it doesn't make sense for the other
168 fields to be different. But this is supposed to be a completely
169 general structural total order, so we have to compare everything
171 if (fb1
->base
< fb2
->base
) return -1;
172 if (fb1
->base
> fb2
->base
) return 1;
174 if (fb1
->szB
< fb2
->szB
) return -1;
175 if (fb1
->szB
> fb2
->szB
) return 1;
176 /* compare sp/fp flag */
177 if (fb1
->spRel
< fb2
->spRel
) return -1;
178 if (fb1
->spRel
> fb2
->spRel
) return 1;
179 /* compare is/is-not array-typed flag */
180 if (fb1
->isVec
< fb2
->isVec
) return -1;
181 if (fb1
->isVec
> fb2
->isVec
) return 1;
182 /* compare the name */
183 r
= (Word
)VG_(strcmp
)(fb1
->name
, fb2
->name
);
187 /* Returns True if all fields except .szB are the same. szBs may or
188 may not be the same; they are simply not consulted. */
189 static Bool
StackBlock__all_fields_except_szB_are_equal (
194 tl_assert(StackBlock__sane(fb1
));
195 tl_assert(StackBlock__sane(fb2
));
196 return fb1
->base
== fb2
->base
197 && fb1
->spRel
== fb2
->spRel
198 && fb1
->isVec
== fb2
->isVec
199 && 0 == VG_(strcmp
)(fb1
->name
, fb2
->name
);
203 /* Generate an arbitrary total ordering on vectors of StackBlocks. */
204 static Word
StackBlocks__cmp ( XArray
* fb1s
, XArray
* fb2s
)
207 n1
= VG_(sizeXA
)( fb1s
);
208 n2
= VG_(sizeXA
)( fb2s
);
209 if (n1
< n2
) return -1;
210 if (n1
> n2
) return 1;
211 for (i
= 0; i
< n1
; i
++) {
212 StackBlock
*fb1
, *fb2
;
213 fb1
= VG_(indexXA
)( fb1s
, i
);
214 fb2
= VG_(indexXA
)( fb2s
, i
);
215 r
= StackBlock__cmp( fb1
, fb2
);
216 if (r
!= 0) return r
;
218 tl_assert(i
== n1
&& i
== n2
);
222 static void pp_StackBlocks ( XArray
* sbs
)
224 Word i
, n
= VG_(sizeXA
)( sbs
);
225 VG_(message
)(Vg_DebugMsg
, "<<< STACKBLOCKS\n" );
226 for (i
= 0; i
< n
; i
++) {
227 StackBlock
* sb
= (StackBlock
*)VG_(indexXA
)( sbs
, i
);
228 VG_(message
)(Vg_DebugMsg
,
229 " StackBlock{ off %ld szB %lu spRel:%c isVec:%c \"%s\" }\n",
230 sb
->base
, sb
->szB
, sb
->spRel
? 'Y' : 'N',
231 sb
->isVec
? 'Y' : 'N', &sb
->name
[0]
234 VG_(message
)(Vg_DebugMsg
, ">>> STACKBLOCKS\n" );
238 /* ---------- The StackBlock vector cache ---------- */
240 static WordFM
* /* XArray* of StackBlock -> nothing */
241 frameBlocks_set
= NULL
;
243 static void init_StackBlocks_set ( void )
245 tl_assert(!frameBlocks_set
);
247 = VG_(newFM
)( sg_malloc
, "di.sg_main.iSBs.1", sg_free
,
248 (Word(*)(UWord
,UWord
))StackBlocks__cmp
);
249 tl_assert(frameBlocks_set
);
252 /* Find the given StackBlock-vector in our collection thereof. If
253 found, deallocate the supplied one, and return the address of the
254 copy. If not found, add the supplied one to our collection and
255 return its address. */
256 static XArray
* /* of StackBlock */
257 StackBlocks__find_and_dealloc__or_add
258 ( XArray
* /* of StackBlock */ orig
)
262 /* First, normalise, as per comments above. */
263 VG_(setCmpFnXA
)( orig
, (XACmpFn_t
)StackBlock__cmp
);
266 /* Now get rid of any exact duplicates. */
268 { Word r
, w
, nEQ
, n
= VG_(sizeXA
)( orig
);
272 for (r
= 0; r
< n
; r
++) {
274 StackBlock
* pR0
= VG_(indexXA
)( orig
, r
+0 );
275 StackBlock
* pR1
= VG_(indexXA
)( orig
, r
+1 );
276 Word c
= StackBlock__cmp(pR0
,pR1
);
277 tl_assert(c
== -1 || c
== 0);
278 if (c
== 0) { nEQ
++; continue; }
281 StackBlock
* pW
= VG_(indexXA
)( orig
, w
);
282 StackBlock
* pR
= VG_(indexXA
)( orig
, r
);
288 tl_assert(w
+ nEQ
== n
);
290 VG_(dropTailXA
)( orig
, n
-w
);
292 if (0) VG_(printf
)("delta %ld\n", n
-w
);
296 /* Deal with the following strangeness, where two otherwise
297 identical blocks are claimed to have different sizes. In which
298 case we use the larger size. */
299 /* StackBlock{ off 16 szB 66 spRel:Y isVec:Y "sz" }
300 StackBlock{ off 16 szB 130 spRel:Y isVec:Y "sz" }
301 StackBlock{ off 208 szB 16 spRel:Y isVec:Y "ar" }
303 { Word i
, n
= VG_(sizeXA
)( orig
);
305 for (i
= 0; i
< n
-1; i
++) {
306 StackBlock
* sb0
= VG_(indexXA
)( orig
, i
+0 );
307 StackBlock
* sb1
= VG_(indexXA
)( orig
, i
+1 );
308 if (StackBlock__all_fields_except_szB_are_equal(sb0
, sb1
)) {
309 /* They can't be identical because the previous tidying
310 pass would have removed the duplicates. And they
311 can't be > because the earlier sorting pass would
312 have ordered otherwise-identical descriptors
313 according to < on .szB fields. Hence: */
314 tl_assert(sb0
->szB
< sb1
->szB
);
316 /* This makes the blocks identical, at the size of the
317 larger one. Rather than go to all the hassle of
318 sliding the rest down, simply go back to the
319 remove-duplicates stage. The assertion guarantees
320 that we eventually make progress, since the rm-dups
321 stage will get rid of one of the blocks. This is
322 expected to happen only exceedingly rarely. */
323 tl_assert(StackBlock__cmp(sb0
,sb1
) == 0);
330 /* If there are any blocks which overlap and have the same
331 fpRel-ness, junk the whole descriptor; it's obviously bogus.
332 Icc11 certainly generates bogus info from time to time.
334 This check is pretty weak; really we ought to have a stronger
336 { Word i
, n
= VG_(sizeXA
)( orig
);
337 static Int moans
= 3;
338 for (i
= 0; i
< n
-1; i
++) {
339 StackBlock
* sb1
= (StackBlock
*)VG_(indexXA
)( orig
, i
);
340 StackBlock
* sb2
= (StackBlock
*)VG_(indexXA
)( orig
, i
+1 );
341 if (sb1
->spRel
== sb2
->spRel
342 && (sb1
->base
>= sb2
->base
343 || sb1
->base
+ sb1
->szB
> sb2
->base
)) {
344 if (moans
> 0 && !VG_(clo_xml
)) {
346 VG_(message
)(Vg_UserMsg
, "Warning: bogus DWARF3 info: "
347 "overlapping stack blocks\n");
348 if (VG_(clo_verbosity
) >= 2)
349 pp_StackBlocks(orig
);
351 VG_(message
)(Vg_UserMsg
, "Further instances of this "
352 "message will not be shown\n" );
354 VG_(dropTailXA
)( orig
, VG_(sizeXA
)( orig
));
360 /* Now, do we have it already? */
361 if (VG_(lookupFM
)( frameBlocks_set
, &key
, &val
, (UWord
)orig
)) {
365 tl_assert(key
!= (UWord
)orig
);
371 VG_(addToFM
)( frameBlocks_set
, (UWord
)orig
, 0 );
376 /* Top level function for getting the StackBlock vector for a given
377 instruction. It is guaranteed that the returned pointer will be
378 valid for the entire rest of the run, and also that the addresses
379 of the individual elements of the array will not change. */
381 static XArray
* /* of StackBlock */ get_StackBlocks_for_IP ( Addr ip
)
383 XArray
* blocks
= VG_(di_get_stack_blocks_at_ip
)( ip
, True
/*arrays only*/ );
385 return StackBlocks__find_and_dealloc__or_add( blocks
);
389 //////////////////////////////////////////////////////////////
391 // GlobalBlocks Persistent Cache //
393 //////////////////////////////////////////////////////////////
395 /* Generate an arbitrary total ordering on GlobalBlocks. */
396 static Word
GlobalBlock__cmp ( GlobalBlock
* gb1
, GlobalBlock
* gb2
)
400 if (gb1
->addr
< gb2
->addr
) return -1;
401 if (gb1
->addr
> gb2
->addr
) return 1;
403 if (gb1
->szB
< gb2
->szB
) return -1;
404 if (gb1
->szB
> gb2
->szB
) return 1;
405 /* compare is/is-not array-typed flag */
406 if (gb1
->isVec
< gb2
->isVec
) return -1;
407 if (gb1
->isVec
> gb2
->isVec
) return 1;
408 /* compare the name */
409 r
= (Word
)VG_(strcmp
)(gb1
->name
, gb2
->name
);
410 if (r
!= 0) return r
;
411 /* compare the soname */
412 r
= (Word
)VG_(strcmp
)(gb1
->soname
, gb2
->soname
);
416 static WordFM
* /* GlobalBlock* -> nothing */
417 globalBlock_set
= NULL
;
419 static void init_GlobalBlock_set ( void )
421 tl_assert(!globalBlock_set
);
423 = VG_(newFM
)( sg_malloc
, "di.sg_main.iGBs.1", sg_free
,
424 (Word(*)(UWord
,UWord
))GlobalBlock__cmp
);
425 tl_assert(globalBlock_set
);
429 /* Top level function for making GlobalBlocks persistent. Call here
430 with a non-persistent version, and the returned one is guaranteed
431 to be valid for the entire rest of the run. The supplied one is
432 copied, not stored, so can be freed after the call. */
434 static GlobalBlock
* get_persistent_GlobalBlock ( GlobalBlock
* orig
)
437 /* Now, do we have it already? */
438 if (VG_(lookupFM
)( globalBlock_set
, &key
, &val
, (UWord
)orig
)) {
439 /* yes, return the copy */
442 res
= (GlobalBlock
*)key
;
443 tl_assert(res
!= orig
);
446 /* no. clone it, store the clone and return the clone's
448 GlobalBlock
* clone
= sg_malloc( "di.sg_main.gpGB.1",
449 sizeof(GlobalBlock
) );
452 VG_(addToFM
)( globalBlock_set
, (UWord
)clone
, 0 );
458 //////////////////////////////////////////////////////////////
460 // Interval tree of StackTreeBlock //
462 //////////////////////////////////////////////////////////////
464 /* A node in a stack interval tree. Zero length intervals (.szB == 0)
467 A stack interval tree is a (WordFM StackTreeNode* void). There is
468 one stack interval tree for each thread.
473 SizeT szB
; /* copied from .descr->szB */
474 StackBlock
* descr
; /* it's an instance of this block */
475 UWord depth
; /* depth of stack at time block was pushed */
479 static void pp_StackTree ( WordFM
* sitree
, const HChar
* who
)
482 VG_(printf
)("<<< BEGIN pp_StackTree %s\n", who
);
483 VG_(initIterFM
)( sitree
);
484 while (VG_(nextIterFM
)( sitree
, &keyW
, &valW
)) {
485 StackTreeNode
* nd
= (StackTreeNode
*)keyW
;
486 VG_(printf
)(" [%#lx,+%lu) descr=%p %s %lu\n", nd
->addr
, nd
->szB
,
487 nd
->descr
, nd
->descr
->name
, nd
->descr
->szB
);
489 VG_(printf
)(">>> END pp_StackTree %s\n", who
);
492 /* Interval comparison function for StackTreeNode */
493 static Word
cmp_intervals_StackTreeNode ( StackTreeNode
* sn1
,
496 return cmp_nonempty_intervals(sn1
->addr
, sn1
->szB
,
497 sn2
->addr
, sn2
->szB
);
500 /* Find the node holding 'a', if any. */
501 static StackTreeNode
* find_StackTreeNode ( WordFM
* sitree
, Addr a
)
508 if (VG_(lookupFM
)( sitree
, &keyW
, &valW
, (UWord
)&key
)) {
509 StackTreeNode
* res
= (StackTreeNode
*)keyW
;
510 tl_assert(valW
== 0);
511 tl_assert(res
!= &key
);
518 /* Note that the supplied XArray of FrameBlock must have been
519 made persistent already. */
520 __attribute__((noinline
))
521 static void add_blocks_to_StackTree (
522 /*MOD*/WordFM
* sitree
,
523 XArray
* /* FrameBlock */ descrs
,
524 XArray
* /* Addr */ bases
,
528 Bool debug
= (Bool
)0;
529 Word i
, nDescrs
, nBases
;
531 nDescrs
= VG_(sizeXA
)( descrs
),
532 nBases
= VG_(sizeXA
)( bases
);
533 tl_assert(nDescrs
== nBases
);
535 if (nDescrs
== 0) return;
539 VG_(printf
)("\ndepth = %lu\n", depth
);
540 pp_StackTree( sitree
, "add_blocks_to_StackTree-pre" );
541 pp_StackBlocks(descrs
);
544 for (i
= 0; i
< nDescrs
; i
++) {
545 Bool already_present
;
547 Addr addr
= *(Addr
*)VG_(indexXA
)( bases
, i
);
548 StackBlock
* descr
= (StackBlock
*)VG_(indexXA
)( descrs
, i
);
549 tl_assert(descr
->szB
> 0);
550 nyu
= sg_malloc( "di.sg_main.abtST.1", sizeof(StackTreeNode
) );
552 nyu
->szB
= descr
->szB
;
555 if (debug
) VG_(printf
)("ADD %#lx %lu\n", addr
, descr
->szB
);
556 already_present
= VG_(addToFM
)( sitree
, (UWord
)nyu
, 0 );
557 /* The interval can't already be there; else we have
558 overlapping stack blocks. */
559 tl_assert(!already_present
);
561 pp_StackTree( sitree
, "add_blocks_to_StackTree-step" );
565 pp_StackTree( sitree
, "add_blocks_to_StackTree-post" );
570 static void del_blocks_from_StackTree ( /*MOD*/WordFM
* sitree
,
571 XArray
* /* Addr */ bases
)
574 Word i
, nBases
= VG_(sizeXA
)( bases
);
575 for (i
= 0; i
< nBases
; i
++) {
577 Addr addr
= *(Addr
*)VG_(indexXA
)( bases
, i
);
578 StackTreeNode
* nd
= find_StackTreeNode(sitree
, addr
);
579 /* The interval must be there; we added it earlier when
580 the associated frame was created. */
582 b
= VG_(delFromFM
)( sitree
, &oldK
, &oldV
, (UWord
)nd
);
583 /* we just found the block! */
585 tl_assert(oldV
== 0);
586 tl_assert(nd
== (StackTreeNode
*)oldK
);
592 static void delete_StackTree__kFin ( UWord keyW
) {
593 StackTreeNode
* nd
= (StackTreeNode
*)keyW
;
597 static void delete_StackTree__vFin ( UWord valW
) {
598 tl_assert(valW
== 0);
600 static void delete_StackTree ( WordFM
* sitree
)
602 VG_(deleteFM
)( sitree
,
603 delete_StackTree__kFin
, delete_StackTree__vFin
);
606 static WordFM
* new_StackTree ( void ) {
607 return VG_(newFM
)( sg_malloc
, "di.sg_main.nST.1", sg_free
,
608 (Word(*)(UWord
,UWord
))cmp_intervals_StackTreeNode
);
612 //////////////////////////////////////////////////////////////
614 // Interval tree of GlobalTreeBlock //
616 //////////////////////////////////////////////////////////////
618 /* A node in a global interval tree. Zero length intervals
619 (.szB == 0) are not allowed.
621 A global interval tree is a (WordFM GlobalTreeNode* void). There
622 is one global interval tree for the entire process.
626 Addr addr
; /* copied from .descr->addr */
627 SizeT szB
; /* copied from .descr->szB */
628 GlobalBlock
* descr
; /* it's this block */
632 static void GlobalTreeNode__pp ( GlobalTreeNode
* nd
) {
633 tl_assert(nd
->descr
);
634 VG_(printf
)("GTNode [%#lx,+%lu) %s",
635 nd
->addr
, nd
->szB
, nd
->descr
->name
);
638 static void GlobalTree__pp ( WordFM
* /* of (GlobalTreeNode,void) */ gitree
,
643 VG_(printf
)("<<< GlobalBlockTree (%s)\n", who
);
644 VG_(initIterFM
)( gitree
);
645 while (VG_(nextIterFM
)( gitree
, &keyW
, &valW
)) {
646 tl_assert(valW
== 0);
647 nd
= (GlobalTreeNode
*)keyW
;
649 GlobalTreeNode__pp(nd
);
652 VG_(doneIterFM
)( gitree
);
653 VG_(printf
)(">>>\n");
656 /* Interval comparison function for GlobalTreeNode */
657 static Word
cmp_intervals_GlobalTreeNode ( GlobalTreeNode
* gn1
,
658 GlobalTreeNode
* gn2
)
660 return cmp_nonempty_intervals( gn1
->addr
, gn1
->szB
,
661 gn2
->addr
, gn2
->szB
);
664 /* Find the node holding 'a', if any. */
665 static GlobalTreeNode
* find_GlobalTreeNode ( WordFM
* gitree
, Addr a
)
671 if (VG_(lookupFM
)( gitree
, &keyW
, &valW
, (UWord
)&key
)) {
672 GlobalTreeNode
* res
= (GlobalTreeNode
*)keyW
;
673 tl_assert(valW
== 0);
674 tl_assert(res
!= &key
);
681 /* Note that the supplied GlobalBlock must have been made persistent
683 static void add_block_to_GlobalTree (
684 /*MOD*/WordFM
* gitree
,
688 Bool already_present
;
689 GlobalTreeNode
*nyu
, *nd
;
691 static Int moans
= 3;
693 tl_assert(descr
->szB
> 0);
694 nyu
= sg_malloc( "di.sg_main.abtG.1", sizeof(GlobalTreeNode
) );
695 nyu
->addr
= descr
->addr
;
696 nyu
->szB
= descr
->szB
;
699 /* Basically it's an error to add a global block to the tree that
700 is already in the tree. However, detect and ignore attempts to
701 insert exact duplicates; they do appear for some reason
702 (possible a bug in m_debuginfo?) */
703 already_present
= VG_(lookupFM
)( gitree
, &keyW
, &valW
, (UWord
)nyu
);
704 if (already_present
) {
705 tl_assert(valW
== 0);
706 nd
= (GlobalTreeNode
*)keyW
;
708 tl_assert(nd
!= nyu
);
709 tl_assert(nd
->descr
);
710 tl_assert(nyu
->descr
);
711 if (nd
->addr
== nyu
->addr
&& nd
->szB
== nyu
->szB
712 /* && 0 == VG_(strcmp)(nd->descr->name, nyu->descr->name) */
713 /* Although it seems reasonable to demand that duplicate
714 blocks have identical names, that is too strict. For
715 example, reading debuginfo from glibc produces two
716 otherwise identical blocks with names "tzname" and
717 "__tzname". A constraint of the form "must be identical,
718 or one must be a substring of the other" would fix that.
719 However, such trickery is scuppered by the fact that we
720 truncate all variable names to 15 characters to make
721 storage management simpler, hence giving pairs like
722 "__EI___pthread_" (truncated) vs "__pthread_keys". So
723 it's simplest just to skip the name comparison
725 && 0 == VG_(strcmp
)(nd
->descr
->soname
, nyu
->descr
->soname
)) {
726 /* exact duplicate; ignore it */
730 /* else fall through; the assertion below will catch it */
733 already_present
= VG_(addToFM
)( gitree
, (UWord
)nyu
, 0 );
734 /* The interval can't already be there; else we have
735 overlapping global blocks. */
736 /* Unfortunately (25 Jan 09) at least icc11 has been seen to
737 generate overlapping block descriptions in the Dwarf3; clearly
739 if (already_present
&& moans
> 0 && !VG_(clo_xml
)) {
741 VG_(message
)(Vg_UserMsg
, "Warning: bogus DWARF3 info: "
742 "overlapping global blocks\n");
743 if (VG_(clo_verbosity
) >= 2) {
744 GlobalTree__pp( gitree
,
745 "add_block_to_GlobalTree: non-exact duplicate" );
746 VG_(printf
)("Overlapping block: ");
747 GlobalTreeNode__pp(nyu
);
751 VG_(message
)(Vg_UserMsg
, "Further instances of this "
752 "message will not be shown\n" );
754 /* tl_assert(!already_present); */
757 static Bool
del_GlobalTree_range ( /*MOD*/WordFM
* gitree
,
760 /* One easy way to do this: look up [a,a+szB) in the tree. That
761 will either succeed, producing a block which intersects that
762 range, in which case we delete it and repeat; or it will fail,
763 in which case there are no blocks intersecting the range, and we
764 can bring the process to a halt. */
765 UWord keyW
, valW
, oldK
, oldV
;
766 GlobalTreeNode key
, *nd
;
776 while (VG_(lookupFM
)( gitree
, &keyW
, &valW
, (UWord
)&key
)) {
778 nd
= (GlobalTreeNode
*)keyW
;
779 tl_assert(valW
== 0);
780 tl_assert(nd
!= &key
);
781 tl_assert(cmp_nonempty_intervals(a
, szB
, nd
->addr
, nd
->szB
) == 0);
783 b
= VG_(delFromFM
)( gitree
, &oldK
, &oldV
, (UWord
)&key
);
785 tl_assert(oldV
== 0);
786 tl_assert(oldK
== keyW
); /* check we deleted the node we just found */
793 //////////////////////////////////////////////////////////////
797 //////////////////////////////////////////////////////////////
799 /* An invariant, as resulting from watching the destination of a
800 memory referencing instruction. Initially is Inv_Unset until the
801 instruction makes a first access. */
805 Inv_Unset
=1, /* not established yet */
806 Inv_Unknown
, /* unknown location */
807 Inv_Stack0
, /* array-typed stack block in innermost frame */
808 Inv_StackN
, /* array-typed stack block in non-innermost frame */
809 Inv_Global
, /* array-typed global block */
825 } Stack0
; /* innermost stack frame */
827 /* Pointer to a node in the interval tree for
830 } StackN
; /* non-innermost stack frame */
832 /* Pointer to a GlobalBlock in the interval tree of
841 /* Partial debugging printing for an Invar. */
842 static void pp_Invar ( Invar
* i
)
846 VG_(printf
)("Unset");
849 VG_(printf
)("Unknown");
852 VG_(printf
)("Stack0 [%#lx,+%lu)",
853 i
->Inv
.Stack0
.addr
, i
->Inv
.Stack0
.szB
);
856 VG_(printf
)("StackN [%#lx,+%lu)",
857 i
->Inv
.StackN
.nd
->addr
, i
->Inv
.StackN
.nd
->szB
);
860 VG_(printf
)("Global [%#lx,+%lu)",
861 i
->Inv
.Global
.nd
->addr
, i
->Inv
.Global
.nd
->szB
);
868 /* Compare two Invars for equality. */
869 static Bool
eq_Invar ( Invar
* i1
, Invar
* i2
)
871 if (i1
->tag
!= i2
->tag
)
879 return i1
->Inv
.Stack0
.addr
== i2
->Inv
.Stack0
.addr
880 && i1
->Inv
.Stack0
.szB
== i2
->Inv
.Stack0
.szB
;
882 return i1
->Inv
.StackN
.nd
== i2
->Inv
.StackN
.nd
;
884 return i1
->Inv
.Global
.nd
== i2
->Inv
.Global
.nd
;
892 /* Generate a piece of text showing 'ea' is relative to 'invar', if
893 known. If unknown, generate an empty string. 'buf' must be at
894 least 32 bytes in size. Also return the absolute value of the
895 delta, if known, or zero if not known.
897 static void gen_delta_str ( /*OUT*/HChar
* buf
,
898 /*OUT*/UWord
* absDelta
,
899 Invar
* inv
, Addr ea
)
910 return; /* unknown */
912 block
= inv
->Inv
.Stack0
.addr
;
913 szB
= inv
->Inv
.Stack0
.szB
;
916 block
= inv
->Inv
.StackN
.nd
->addr
;
917 szB
= inv
->Inv
.StackN
.nd
->szB
;
920 block
= inv
->Inv
.Global
.nd
->addr
;
921 szB
= inv
->Inv
.Global
.nd
->szB
;
928 *absDelta
= block
- ea
;
929 VG_(sprintf
)(buf
, "%'lu before", *absDelta
);
931 else if (ea
>= block
+ szB
) {
932 *absDelta
= ea
- (block
+ szB
);
933 VG_(sprintf
)(buf
, "%'lu after", *absDelta
);
936 // Leave *absDelta at zero.
937 VG_(sprintf
)(buf
, "%'lu inside", ea
- block
);
942 /* Print selected parts of an Invar, suitable for use in error
944 static void show_Invar( HChar
* buf
, Word nBuf
, Invar
* inv
, Word depth
)
947 tl_assert(nBuf
>= 128);
951 VG_(sprintf
)(buf
, "%s", "unknown");
955 VG_(sprintf
)(buf
, "stack %s \"%s\" of size %'lu in this frame",
956 str
, inv
->Inv
.Stack0
.descr
->name
,
957 inv
->Inv
.Stack0
.szB
);
961 VG_(sprintf
)(buf
, "stack %s \"%s\" of size %'lu in frame %lu back from here",
962 str
, inv
->Inv
.StackN
.nd
->descr
->name
,
963 inv
->Inv
.StackN
.nd
->descr
->szB
,
964 depth
- inv
->Inv
.StackN
.nd
->depth
);
968 VG_(sprintf
)(buf
, "global %s \"%s\" of size %'lu in object with soname \"%s\"",
969 str
, inv
->Inv
.Global
.nd
->descr
->name
,
970 inv
->Inv
.Global
.nd
->descr
->szB
,
971 inv
->Inv
.Global
.nd
->descr
->soname
);
974 VG_(sprintf
)(buf
, "%s", "Unset!");
982 //////////////////////////////////////////////////////////////
986 //////////////////////////////////////////////////////////////
988 //////////////////////////////////////////////////////////////
993 /* Powers of two only, else the result will be chaos */
994 #define QCACHE_ADVANCE_EVERY 16
996 /* Per-thread query cache. Note that the invar can only be Inv_StackN
997 (but not Inv_Stack0), Inv_Global or Inv_Unknown. */
1009 QCElem elems
[N_QCACHE
];
1013 static void QCache__invalidate ( QCache
* qc
) {
1014 tl_assert(qc
->nInUse
>= 0);
1018 static void QCache__pp ( QCache
* qc
, const HChar
* who
)
1021 VG_(printf
)("<<< QCache with %ld elements (%s)\n", qc
->nInUse
, who
);
1022 for (i
= 0; i
< qc
->nInUse
; i
++) {
1023 VG_(printf
)(" [%#lx,+%#lx) ", qc
->elems
[i
].addr
, qc
->elems
[i
].szB
);
1024 pp_Invar(&qc
->elems
[i
].inv
);
1027 VG_(printf
)(">>>\n");
1030 static ULong stats__qcache_queries
= 0;
1031 static ULong stats__qcache_misses
= 0;
1032 static ULong stats__qcache_probes
= 0;
1035 //////////////////////////////////////////////////////////////
1038 * a shadow stack of StackFrames, which is a double-linked list
1039 * an stack block interval tree
1041 static struct _StackFrame
** shadowStacks
;
1043 static WordFM
** /* StackTreeNode */ siTrees
;
1045 static QCache
* qcaches
;
1048 /* Additionally, there is one global variable interval tree
1049 for the entire process.
1051 static WordFM
* /* GlobalTreeNode */ giTree
;
1054 static void invalidate_all_QCaches ( void )
1057 for (i
= 0; i
< VG_N_THREADS
; i
++) {
1058 QCache__invalidate( &qcaches
[i
] );
1062 static void ourGlobals_init ( void )
1066 shadowStacks
= sg_malloc( "di.sg_main.oGi.2",
1067 VG_N_THREADS
* sizeof shadowStacks
[0] );
1068 siTrees
= sg_malloc( "di.sg_main.oGi.3", VG_N_THREADS
* sizeof siTrees
[0] );
1069 qcaches
= sg_malloc( "di.sg_main.oGi.4", VG_N_THREADS
* sizeof qcaches
[0] );
1071 for (i
= 0; i
< VG_N_THREADS
; i
++) {
1072 shadowStacks
[i
] = NULL
;
1074 qcaches
[i
] = (QCache
){};
1076 invalidate_all_QCaches();
1077 giTree
= VG_(newFM
)( sg_malloc
, "di.sg_main.oGi.1", sg_free
,
1078 (Word(*)(UWord
,UWord
))cmp_intervals_GlobalTreeNode
);
1082 //////////////////////////////////////////////////////////////
1084 // Handle global variable load/unload events //
1086 //////////////////////////////////////////////////////////////
1088 static void acquire_globals ( ULong di_handle
)
1091 XArray
* /* of GlobalBlock */ gbs
;
1092 if (0) VG_(printf
)("ACQUIRE GLOBALS %llu\n", di_handle
);
1093 gbs
= VG_(di_get_global_blocks_from_dihandle
)
1094 (di_handle
, True
/*arrays only*/);
1095 if (0) VG_(printf
)(" GOT %ld globals\n", VG_(sizeXA
)( gbs
));
1097 n
= VG_(sizeXA
)( gbs
);
1098 for (i
= 0; i
< n
; i
++) {
1100 GlobalBlock
* gb
= VG_(indexXA
)( gbs
, i
);
1101 if (0) VG_(printf
)(" new Global size %2lu at %#lx: %s %s\n",
1102 gb
->szB
, gb
->addr
, gb
->soname
, gb
->name
);
1103 tl_assert(gb
->szB
> 0);
1104 /* Make a persistent copy of each GlobalBlock, and add it
1106 gbp
= get_persistent_GlobalBlock( gb
);
1107 add_block_to_GlobalTree( giTree
, gbp
);
1110 VG_(deleteXA
)( gbs
);
1114 /* We only intercept these two because we need to see any di_handles
1115 that might arise from the mappings/allocations. */
1116 void sg_new_mem_mmap( Addr a
, SizeT len
,
1117 Bool rr
, Bool ww
, Bool xx
, ULong di_handle
)
1120 acquire_globals(di_handle
);
1122 void sg_new_mem_startup( Addr a
, SizeT len
,
1123 Bool rr
, Bool ww
, Bool xx
, ULong di_handle
)
1126 acquire_globals(di_handle
);
1128 void sg_die_mem_munmap ( Addr a
, SizeT len
)
1130 Bool debug
= (Bool
)0;
1131 Bool overlap
= False
;
1133 if (debug
) VG_(printf
)("MUNMAP %#lx %lu\n", a
, len
);
1138 overlap
= del_GlobalTree_range(giTree
, a
, len
);
1140 { /* redundant sanity check */
1142 VG_(initIterFM
)( giTree
);
1143 while (VG_(nextIterFM
)( giTree
, &keyW
, &valW
)) {
1144 GlobalTreeNode
* nd
= (GlobalTreeNode
*)keyW
;
1145 tl_assert(valW
== 0);
1146 tl_assert(nd
->szB
> 0);
1147 tl_assert(nd
->addr
+ nd
->szB
<= a
1148 || a
+ len
<= nd
->addr
);
1150 VG_(doneIterFM
)( giTree
);
1156 /* Ok, the range contained some blocks. Therefore we'll need to
1157 visit all the Invars in all the thread shadow stacks, and
1158 convert all Inv_Global entries that intersect [a,a+len) to
1161 preen_global_Invars( a
, len
);
1162 invalidate_all_QCaches();
1166 //////////////////////////////////////////////////////////////
1170 //////////////////////////////////////////////////////////////
1172 static ULong stats__total_accesses
= 0;
1173 static ULong stats__classify_Stack0
= 0;
1174 static ULong stats__classify_StackN
= 0;
1175 static ULong stats__classify_Global
= 0;
1176 static ULong stats__classify_Unknown
= 0;
1177 static ULong stats__Invars_preened
= 0;
1178 static ULong stats__Invars_changed
= 0;
1179 static ULong stats__t_i_b_empty
= 0;
1180 static ULong stats__htab_fast
= 0;
1181 static ULong stats__htab_searches
= 0;
1182 static ULong stats__htab_probes
= 0;
1183 static ULong stats__htab_resizes
= 0;
1186 /* A dynamic instance of an instruction */
1190 Addr insn_addr
; /* NB! zero means 'not in use' */
1191 XArray
* blocks
; /* XArray* of StackBlock, or NULL if none */
1198 #define N_HTAB_FIXED 64
1201 struct _StackFrame
{
1202 /* The sp when the frame was created, so we know when to get rid
1205 /* The stack frames for a thread are arranged as a doubly linked
1206 list. Obviously the outermost frame in the stack has .outer
1207 as NULL and the innermost in theory has .inner as NULL.
1208 However, when a function returns, we don't delete the
1209 just-vacated StackFrame. Instead, it is retained in the list
1210 and will be re-used when the next call happens. This is so
1211 as to avoid constantly having to dynamically allocate and
1212 deallocate frames. */
1213 struct _StackFrame
* inner
;
1214 struct _StackFrame
* outer
;
1215 Word depth
; /* 0 for outermost; increases inwards */
1216 /* Information for each memory referencing instruction, for this
1217 instantiation of the function. The iinstances array is
1218 operated as a simple linear-probe hash table, which is
1219 dynamically expanded as necessary. Once critical thing is
1220 that an IInstance with a .insn_addr of zero is interpreted to
1221 mean that hash table slot is unused. This means we can't
1222 store an IInstance for address zero. */
1223 /* Note that htab initially points to htab_fixed. If htab_fixed
1224 turns out not to be big enough then htab is made to point to
1225 dynamically allocated memory. But it's often the case that
1226 htab_fixed is big enough, so this optimisation saves a huge
1227 number of sg_malloc/sg_free call pairs. */
1229 UWord htab_size
; /* size of hash table, MAY ONLY BE A POWER OF 2 */
1230 UWord htab_used
; /* number of hash table slots currently in use */
1231 /* If this frame is currently making a call, then the following
1235 XArray
* /* of Addr */ blocks_added_by_call
;
1236 /* See comment just above */
1237 IInstance htab_fixed
[N_HTAB_FIXED
];
1245 /* Move this somewhere else? */
1246 /* Visit all Invars in the entire system. If 'isHeap' is True, change
1247 all Inv_Heap Invars that intersect [a,a+len) to Inv_Unknown. If
1248 'isHeap' is False, do the same but to the Inv_Global{S,V} Invars
1251 __attribute__((noinline
))
1252 static void preen_global_Invar ( Invar
* inv
, Addr a
, SizeT len
)
1254 stats__Invars_preened
++;
1259 tl_assert(inv
->Inv
.Global
.nd
);
1260 tl_assert(inv
->Inv
.Global
.nd
->szB
> 0);
1261 if (0) VG_(printf
)("preen_Invar Global %#lx %lu\n",
1262 inv
->Inv
.Global
.nd
->addr
,
1263 inv
->Inv
.Global
.nd
->szB
);
1264 if (0 == cmp_nonempty_intervals(a
, len
, inv
->Inv
.Global
.nd
->addr
,
1265 inv
->Inv
.Global
.nd
->szB
)) {
1266 inv
->tag
= Inv_Unknown
;
1267 stats__Invars_changed
++;
1274 case Inv_Unset
: /* this should never happen */
1281 __attribute__((noinline
))
1282 static void preen_global_Invars ( Addr a
, SizeT len
)
1288 for (i
= 0; i
< VG_N_THREADS
; i
++) {
1289 frame
= shadowStacks
[i
];
1291 continue; /* no frames for this thread */
1292 /* start from the innermost frame */
1293 while (frame
->inner
)
1294 frame
= frame
->inner
;
1295 tl_assert(frame
->outer
);
1296 /* work through the frames from innermost to outermost. The
1297 order isn't important; we just need to ensure we visit each
1298 frame once (including those which are not actually active,
1299 more 'inner' than the 'innermost active frame', viz, just
1300 hanging around waiting to be used, when the current innermost
1301 active frame makes more calls. See comments on definition of
1302 struct _StackFrame. */
1303 for (; frame
; frame
= frame
->outer
) {
1304 UWord xx
= 0; /* sanity check only; count of used htab entries */
1306 continue; /* frame not in use. See shadowStack_unwind(). */
1307 for (u
= 0; u
< frame
->htab_size
; u
++) {
1308 IInstance
* ii
= &frame
->htab
[u
];
1309 if (ii
->insn_addr
== 0)
1310 continue; /* not in use */
1311 if (0) { pp_Invar(&ii
->invar
); VG_(printf
)(" x\n"); }
1312 preen_global_Invar( &ii
->invar
, a
, len
);
1315 tl_assert(xx
== frame
->htab_used
);
1321 /* XXX this should be >> 2 on ppc32/64 since the bottom two bits
1322 of the ip are guaranteed to be zero */
1323 inline static UWord
compute_II_hash ( Addr ip
, UWord htab_size
) {
1324 return (ip
>> 0) & (htab_size
- 1);
1327 __attribute__((noinline
))
1328 static void initialise_II_hash_table ( StackFrame
* sf
)
1331 sf
->htab_size
= N_HTAB_FIXED
; /* initial hash table size */
1332 sf
->htab
= &sf
->htab_fixed
[0];
1333 tl_assert(sf
->htab
);
1335 for (i
= 0; i
< sf
->htab_size
; i
++)
1336 sf
->htab
[i
].insn_addr
= 0; /* NOT IN USE */
1340 __attribute__((noinline
))
1341 static void resize_II_hash_table ( StackFrame
* sf
)
1343 UWord i
, j
, ix
, old_size
, new_size
;
1344 IInstance
*old_htab
, *new_htab
, *old
;
1346 tl_assert(sf
&& sf
->htab
);
1347 old_size
= sf
->htab_size
;
1348 new_size
= 2 * old_size
;
1349 old_htab
= sf
->htab
;
1350 new_htab
= sg_malloc( "di.sg_main.rIht.1",
1351 new_size
* sizeof(IInstance
) );
1352 for (i
= 0; i
< new_size
; i
++) {
1353 new_htab
[i
].insn_addr
= 0; /* NOT IN USE */
1355 for (i
= 0; i
< old_size
; i
++) {
1357 if (old
->insn_addr
== 0 /* NOT IN USE */)
1359 ix
= compute_II_hash(old
->insn_addr
, new_size
);
1360 /* find out where to put this, in the new table */
1363 if (new_htab
[ix
].insn_addr
== 0)
1365 /* This can't ever happen, because it would mean the new
1366 table is full; that isn't allowed -- even the old table is
1367 only allowed to become half full. */
1370 ix
++; if (ix
== new_size
) ix
= 0;
1372 /* copy the old entry to this location */
1373 tl_assert(ix
< new_size
);
1374 tl_assert(new_htab
[ix
].insn_addr
== 0);
1375 new_htab
[ix
] = *old
;
1376 tl_assert(new_htab
[ix
].insn_addr
!= 0);
1378 /* all entries copied; free old table. */
1379 if (old_htab
!= &sf
->htab_fixed
[0])
1381 sf
->htab
= new_htab
;
1382 sf
->htab_size
= new_size
;
1383 /* check sf->htab_used is correct. Optional and a bit expensive
1386 for (i
= 0; i
< new_size
; i
++) {
1387 if (new_htab
[i
].insn_addr
!= 0) {
1391 tl_assert(j
== sf
->htab_used
);
1392 if (0) VG_(printf
)("resized tab for SF %p to %lu\n", sf
, new_size
);
1396 __attribute__((noinline
))
1397 static IInstance
* find_or_create_IInstance_SLOW (
1400 XArray
* /* StackBlock */ ip_frameblocks
1405 stats__htab_searches
++;
1408 tl_assert(sf
->htab
);
1410 /* Make sure the table loading doesn't get too high. */
1411 if (UNLIKELY(2 * sf
->htab_used
>= 1 * sf
->htab_size
)) {
1412 stats__htab_resizes
++;
1413 resize_II_hash_table(sf
);
1415 tl_assert(2 * sf
->htab_used
<= sf
->htab_size
);
1417 ix
= compute_II_hash(ip
, sf
->htab_size
);
1420 stats__htab_probes
++;
1421 /* Note that because of the way the fast-case handler works,
1422 these two tests are actually redundant in the first iteration
1423 of this loop. (Except they aren't redundant if the code just
1424 above resized the table first. :-) */
1425 if (sf
->htab
[ix
].insn_addr
== ip
)
1426 return &sf
->htab
[ix
];
1427 if (sf
->htab
[ix
].insn_addr
== 0)
1429 /* If i ever gets to zero and we have found neither what we're
1430 looking for nor an empty slot, the table must be full. Which
1431 isn't possible -- we monitor the load factor to ensure it
1432 doesn't get above say 50%; if that ever does happen the table
1437 if (ix
== sf
->htab_size
) ix
= 0;
1440 /* So now we've found a free slot at ix, and we can use that. */
1441 tl_assert(sf
->htab
[ix
].insn_addr
== 0);
1443 /* Add a new record in this slot. */
1444 tl_assert(ip
!= 0); /* CAN'T REPRESENT THIS */
1445 sf
->htab
[ix
].insn_addr
= ip
;
1446 sf
->htab
[ix
].blocks
= ip_frameblocks
;
1447 sf
->htab
[ix
].invar
.tag
= Inv_Unset
;
1449 return &sf
->htab
[ix
];
1454 static IInstance
* find_or_create_IInstance (
1457 XArray
* /* StackBlock */ ip_frameblocks
1460 UWord ix
= compute_II_hash(ip
, sf
->htab_size
);
1461 /* Is it in the first slot we come to? */
1462 if (LIKELY(sf
->htab
[ix
].insn_addr
== ip
)) {
1464 return &sf
->htab
[ix
];
1466 /* If the first slot we come to is empty, bag it. */
1467 if (LIKELY(sf
->htab
[ix
].insn_addr
== 0)) {
1470 sf
->htab
[ix
].insn_addr
= ip
;
1471 sf
->htab
[ix
].blocks
= ip_frameblocks
;
1472 sf
->htab
[ix
].invar
.tag
= Inv_Unset
;
1474 return &sf
->htab
[ix
];
1476 /* Otherwise we hand off to the slow case, which searches other
1477 slots, and optionally resizes the table if necessary. */
1478 return find_or_create_IInstance_SLOW( sf
, ip
, ip_frameblocks
);
1482 __attribute__((noinline
))
1483 static Addr
calculate_StackBlock_EA ( StackBlock
* descr
,
1484 Addr sp
, Addr fp
) {
1485 UWord w1
= (UWord
)descr
->base
;
1486 UWord w2
= (UWord
)(descr
->spRel
? sp
: fp
);
1491 /* Given an array of StackBlocks, return an array of Addrs, holding
1492 their effective addresses. Caller deallocates result array. */
1493 __attribute__((noinline
))
1494 static XArray
* /* Addr */ calculate_StackBlock_EAs (
1495 XArray
* /* StackBlock */ blocks
,
1500 Word i
, n
= VG_(sizeXA
)( blocks
);
1502 res
= VG_(newXA
)( sg_malloc
, "di.sg_main.cSBE.1", sg_free
, sizeof(Addr
) );
1503 for (i
= 0; i
< n
; i
++) {
1504 StackBlock
* blk
= VG_(indexXA
)( blocks
, i
);
1505 Addr ea
= calculate_StackBlock_EA( blk
, sp
, fp
);
1506 VG_(addToXA
)( res
, &ea
);
1512 /* Try to classify the block into which a memory access falls, and
1513 write the result in 'inv'. This writes all relevant fields of
1515 __attribute__((noinline
))
1516 static void classify_address ( /*OUT*/Invar
* inv
,
1518 Addr ea
, Addr sp
, Addr fp
,
1520 XArray
* /* of StackBlock */ thisInstrBlocks
)
1523 /* First, look in the stack blocks accessible in this instruction's
1526 Word i
, nBlocks
= VG_(sizeXA
)( thisInstrBlocks
);
1527 if (nBlocks
== 0) stats__t_i_b_empty
++;
1528 for (i
= 0; i
< nBlocks
; i
++) {
1529 StackBlock
* descr
= VG_(indexXA
)( thisInstrBlocks
, i
);
1530 Addr bea
= calculate_StackBlock_EA( descr
, sp
, fp
);
1531 if (bea
<= ea
&& ea
+ szB
<= bea
+ descr
->szB
) {
1533 inv
->tag
= Inv_Stack0
;
1534 inv
->Inv
.Stack0
.addr
= bea
;
1535 inv
->Inv
.Stack0
.szB
= descr
->szB
;
1536 inv
->Inv
.Stack0
.descr
= descr
;
1537 stats__classify_Stack0
++;
1542 /* Look in this thread's query cache */
1544 QCache
* cache
= &qcaches
[tid
];
1545 static UWord ctr
= 0;
1546 stats__qcache_queries
++;
1547 for (i
= 0; i
< cache
->nInUse
; i
++) {
1548 if (0) /* expensive in a loop like this */
1549 tl_assert(cache
->elems
[i
].addr
+ cache
->elems
[i
].szB
!= 0);
1550 stats__qcache_probes
++;
1551 if (is_subinterval_of(cache
->elems
[i
].addr
,
1552 cache
->elems
[i
].szB
, ea
, szB
)) {
1554 && (ctr
++ & (QCACHE_ADVANCE_EVERY
-1)) == 0) {
1556 tmp
= cache
->elems
[i
-1];
1557 cache
->elems
[i
-1] = cache
->elems
[i
];
1558 cache
->elems
[i
] = tmp
;
1561 *inv
= cache
->elems
[i
].inv
;
1565 stats__qcache_misses
++;
1567 /* Ok, so it's not a block in the top frame. Perhaps it's a block
1568 in some calling frame? Consult this thread's stack-block
1569 interval tree to find out. */
1570 { StackTreeNode
* nd
= find_StackTreeNode( siTrees
[tid
], ea
);
1571 /* We know that [ea,ea+1) is in the block, but we need to
1572 restrict to the case where the whole access falls within
1574 if (nd
&& !is_subinterval_of(nd
->addr
, nd
->szB
, ea
, szB
)) {
1579 inv
->tag
= Inv_StackN
;
1580 inv
->Inv
.StackN
.nd
= nd
;
1581 stats__classify_StackN
++;
1585 /* Not in a stack block. Try the global pool. */
1586 { GlobalTreeNode
* nd
= find_GlobalTreeNode(giTree
, ea
);
1587 /* We know that [ea,ea+1) is in the block, but we need to
1588 restrict to the case where the whole access falls within
1590 if (nd
&& !is_subinterval_of(nd
->addr
, nd
->szB
, ea
, szB
)) {
1595 inv
->tag
= Inv_Global
;
1596 inv
->Inv
.Global
.nd
= nd
;
1597 stats__classify_Global
++;
1601 /* No idea - give up. */
1602 inv
->tag
= Inv_Unknown
;
1603 stats__classify_Unknown
++;
1605 /* Update the cache */
1607 { Addr toadd_addr
= 0;
1608 SizeT toadd_szB
= 0;
1609 QCache
* cache
= &qcaches
[tid
];
1611 static UWord ctr
= 0;
1613 if (0 && 0 == (ctr
++ & 0x1FFFFF)) show
= True
;
1615 if (show
) QCache__pp(cache
, "before upd");
1619 toadd_addr
= inv
->Inv
.Global
.nd
->addr
;
1620 toadd_szB
= inv
->Inv
.Global
.nd
->szB
;
1623 toadd_addr
= inv
->Inv
.StackN
.nd
->addr
;
1624 toadd_szB
= inv
->Inv
.StackN
.nd
->szB
;
1627 /* This is more complex. We need to figure out the
1628 intersection of the "holes" in the global and stack
1629 interval trees into which [ea,ea+szB) falls. This is
1630 further complicated by the fact that [ea,ea+szB) might
1631 not fall cleanly into a hole; it may instead fall across
1632 the boundary of a stack or global block. In that case
1633 we just ignore it and don't update the cache, since we
1634 have no way to represent this situation precisely. */
1635 StackTreeNode sNegInf
, sPosInf
, sKey
, *sLB
, *sUB
;
1636 GlobalTreeNode gNegInf
, gPosInf
, gKey
, *gLB
, *gUB
;
1637 Addr gMin
, gMax
, sMin
, sMax
, uMin
, uMax
;
1641 sPosInf
.addr
= ~(UWord
)0;
1645 gPosInf
.addr
= ~(UWord
)0;
1651 if (0) VG_(printf
)("Tree sizes %lu %lu\n",
1652 VG_(sizeFM
)(siTrees
[tid
]), VG_(sizeFM
)(giTree
));
1653 sOK
= VG_(findBoundsFM
)( siTrees
[tid
],
1654 (UWord
*)&sLB
, NULL
/*unused*/,
1655 (UWord
*)&sUB
, NULL
/*unused*/,
1656 (UWord
)&sNegInf
, 0/*unused*/,
1657 (UWord
)&sPosInf
, 0/*unused*/,
1659 gOK
= VG_(findBoundsFM
)( giTree
,
1660 (UWord
*)&gLB
, NULL
/*unused*/,
1661 (UWord
*)&gUB
, NULL
/*unused*/,
1662 (UWord
)&gNegInf
, 0/*unused*/,
1663 (UWord
)&gPosInf
, 0/*unused*/,
1665 if (!(sOK
&& gOK
)) {
1666 /* If this happens, then [ea,ea+szB) partially overlaps
1667 a heap or stack block. We can't represent that, so
1668 just forget it (should be very rare). However, do
1669 maximum sanity checks first. In such a
1670 partial overlap case, it can't be the case that both
1671 [ea] and [ea+szB-1] overlap the same block, since if
1672 that were indeed the case then it wouldn't be a
1673 partial overlap; rather it would simply fall inside
1674 that block entirely and we shouldn't be inside this
1675 conditional at all. */
1677 StackTreeNode
*ndFirst
, *ndLast
;
1678 ndFirst
= find_StackTreeNode( siTrees
[tid
], ea
);
1679 ndLast
= find_StackTreeNode( siTrees
[tid
], ea
+szB
-1 );
1680 /* if both ends of the range fall inside a block,
1681 they can't be in the same block. */
1682 if (ndFirst
&& ndLast
)
1683 tl_assert(ndFirst
!= ndLast
);
1684 /* for each end of the range, if it is in a block,
1685 the range as a whole can't be entirely within the
1688 tl_assert(!is_subinterval_of(ndFirst
->addr
,
1689 ndFirst
->szB
, ea
, szB
));
1691 tl_assert(!is_subinterval_of(ndLast
->addr
,
1692 ndLast
->szB
, ea
, szB
));
1695 GlobalTreeNode
*ndFirst
, *ndLast
;
1696 ndFirst
= find_GlobalTreeNode( giTree
, ea
);
1697 ndLast
= find_GlobalTreeNode( giTree
, ea
+szB
-1 );
1698 /* if both ends of the range fall inside a block,
1699 they can't be in the same block. */
1700 if (ndFirst
&& ndLast
)
1701 tl_assert(ndFirst
!= ndLast
);
1702 /* for each end of the range, if it is in a block,
1703 the range as a whole can't be entirely within the
1706 tl_assert(!is_subinterval_of(ndFirst
->addr
,
1707 ndFirst
->szB
, ea
, szB
));
1709 tl_assert(!is_subinterval_of(ndLast
->addr
,
1710 ndLast
->szB
, ea
, szB
));
1712 if (0) VG_(printf
)("overlapping blocks in cache\n");
1715 sMin
= sLB
== &sNegInf
? 0 : (sLB
->addr
+ sLB
->szB
);
1716 sMax
= sUB
== &sPosInf
? ~(UWord
)0 : (sUB
->addr
- 1);
1717 gMin
= gLB
== &gNegInf
? 0 : (gLB
->addr
+ gLB
->szB
);
1718 gMax
= gUB
== &gPosInf
? ~(UWord
)0 : (gUB
->addr
- 1);
1719 if (0) VG_(printf
)("sMin %lx sMax %lx gMin %lx gMax %lx\n",
1720 sMin
, sMax
, gMin
, gMax
);
1721 /* [sMin,sMax] and [gMin,gMax] must both contain
1722 [ea,ea+szB) (right?) That implies they must overlap at
1723 at least over [ea,ea+szB). */
1724 tl_assert(sMin
<= ea
&& ea
+szB
-1 <= sMax
);
1725 tl_assert(gMin
<= ea
&& ea
+szB
-1 <= gMax
);
1726 /* So now compute their intersection. */
1727 uMin
= Addr__max( sMin
, gMin
);
1728 uMax
= Addr__min( sMax
, gMax
);
1729 if (0) VG_(printf
)("uMin %lx uMax %lx\n", uMin
, uMax
);
1730 tl_assert(uMin
<= uMax
);
1731 tl_assert(uMin
<= ea
&& ea
+szB
-1 <= uMax
);
1732 /* Finally, we can park [uMin,uMax] in the cache. However,
1733 if uMax is ~0, we can't represent the difference; hence
1735 if (uMin
< uMax
&& uMax
== ~(UWord
)0)
1738 toadd_szB
= uMax
- uMin
+ 1;
1742 /* We should only be caching info for the above 3 cases */
1744 } /* switch (inv->tag) */
1746 { /* and actually add this to the cache, finally */
1748 Word ip
= cache
->nInUse
/ 2; /* doesn't seem critical */
1750 if (cache
->nInUse
< N_QCACHE
)
1752 for (i
= cache
->nInUse
-1; i
> ip
; i
--) {
1753 cache
->elems
[i
] = cache
->elems
[i
-1];
1756 tl_assert(toadd_szB
> 0);
1757 cache
->elems
[ip
].addr
= toadd_addr
;
1758 cache
->elems
[ip
].szB
= toadd_szB
;
1759 cache
->elems
[ip
].inv
= *inv
;
1762 if (show
) QCache__pp(cache
, "after upd");
1768 /* CALLED FROM GENERATED CODE */
1771 void helperc__mem_access ( /* Known only at run time: */
1772 Addr ea
, Addr sp
, Addr fp
,
1773 /* Known at translation time: */
1774 Word sszB
, Addr ip
, XArray
* ip_frameBlocks
)
1777 IInstance
* iinstance
;
1780 ThreadId tid
= VG_(get_running_tid
)();
1782 HChar bufE
[160], bufA
[160], bufD
[32];
1784 stats__total_accesses
++;
1786 tl_assert(is_sane_TId(tid
));
1787 frame
= shadowStacks
[tid
];
1790 /* Find the instance info for this instruction. */
1791 tl_assert(ip_frameBlocks
);
1792 iinstance
= find_or_create_IInstance( frame
, ip
, ip_frameBlocks
);
1793 tl_assert(iinstance
);
1794 tl_assert(iinstance
->blocks
== ip_frameBlocks
);
1796 szB
= (sszB
< 0) ? (-sszB
) : sszB
;
1799 inv
= &iinstance
->invar
;
1801 /* Deal with first uses of instruction instances. */
1802 if (inv
->tag
== Inv_Unset
) {
1803 /* This is the first use of this instance of the instruction, so
1804 we can't make any check; we merely record what we saw, so we
1805 can compare it against what happens for 2nd and subsequent
1807 classify_address( inv
,
1808 tid
, ea
, sp
, fp
, szB
, iinstance
->blocks
);
1809 tl_assert(inv
->tag
!= Inv_Unset
);
1813 /* So generate an Invar and see if it's different from what
1815 classify_address( &new_inv
,
1816 tid
, ea
, sp
, fp
, szB
, iinstance
->blocks
);
1817 tl_assert(new_inv
.tag
!= Inv_Unset
);
1819 /* Did we see something different from before? If no, then there's
1821 tl_assert(inv
->tag
!= Inv_Unset
);
1823 if (LIKELY(eq_Invar(&new_inv
, inv
)))
1826 VG_(memset
)(bufE
, 0, sizeof(bufE
));
1827 show_Invar( bufE
, sizeof(bufE
)-1, inv
, frame
->depth
);
1829 VG_(memset
)(bufA
, 0, sizeof(bufA
));
1830 show_Invar( bufA
, sizeof(bufA
)-1, &new_inv
, frame
->depth
);
1832 VG_(memset
)(bufD
, 0, sizeof(bufD
));
1834 gen_delta_str( bufD
, &absDelta
, inv
, ea
);
1836 if (absDelta
< 1024)
1837 sg_record_error_SorG( tid
, ea
, sszB
, bufE
, bufA
, bufD
);
1839 /* And now install the new observation as "standard", so as to
1840 make future error messages make more sense. */
1845 ////////////////////////////////////////
1846 /* Primary push-a-new-frame routine. Called indirectly from
1849 static UWord stats__max_sitree_size
= 0;
1850 static UWord stats__max_gitree_size
= 0;
1853 void shadowStack_new_frame ( ThreadId tid
,
1854 Addr sp_at_call_insn
,
1855 Addr sp_post_call_insn
,
1856 Addr fp_at_call_insn
,
1857 Addr ip_post_call_insn
,
1858 XArray
* descrs_at_call_insn
)
1860 StackFrame
*callee
, *caller
;
1861 tl_assert(is_sane_TId(tid
));
1863 caller
= shadowStacks
[tid
];
1866 if (caller
->outer
) { /* "this is not the outermost frame" */
1867 tl_assert(caller
->outer
->inner
== caller
);
1868 tl_assert(caller
->outer
->depth
>= 0);
1869 tl_assert(1 + caller
->outer
->depth
== caller
->depth
);
1871 tl_assert(caller
->depth
== 0);
1874 caller
->sp_at_call
= sp_at_call_insn
;
1875 caller
->fp_at_call
= fp_at_call_insn
;
1877 if (descrs_at_call_insn
) {
1878 tl_assert( VG_(sizeXA
)(descrs_at_call_insn
) > 0 );
1879 caller
->blocks_added_by_call
1880 = calculate_StackBlock_EAs( descrs_at_call_insn
,
1881 sp_at_call_insn
, fp_at_call_insn
);
1882 if (caller
->blocks_added_by_call
)
1883 add_blocks_to_StackTree( siTrees
[tid
],
1884 descrs_at_call_insn
,
1885 caller
->blocks_added_by_call
,
1886 caller
->depth
/* stack depth at which
1888 considered to exist*/ );
1890 UWord s
= VG_(sizeFM
)( siTrees
[tid
] );
1891 UWord g
= VG_(sizeFM
)( giTree
);
1892 Bool sb
= s
> stats__max_sitree_size
;
1893 Bool gb
= g
> stats__max_gitree_size
;
1894 if (sb
) stats__max_sitree_size
= s
;
1895 if (gb
) stats__max_gitree_size
= g
;
1896 if (0 && (sb
|| gb
))
1897 VG_(message
)(Vg_DebugMsg
,
1898 "exp-sgcheck: new max tree sizes: "
1899 "StackTree %lu, GlobalTree %lu\n",
1900 stats__max_sitree_size
, stats__max_gitree_size
);
1903 caller
->blocks_added_by_call
= NULL
;
1906 /* caller->blocks_added_by_call is used again (and then freed) when
1907 this frame is removed from the stack. */
1909 if (caller
->inner
) {
1910 callee
= caller
->inner
;
1912 callee
= sg_malloc("di.sg_main.sSnf.1", sizeof(StackFrame
));
1913 VG_(memset
)(callee
, 0, sizeof(StackFrame
));
1914 callee
->outer
= caller
;
1915 caller
->inner
= callee
;
1916 callee
->depth
= 1 + caller
->depth
;
1917 tl_assert(callee
->inner
== NULL
);
1920 /* This sets up .htab, .htab_size and .htab_used */
1921 initialise_II_hash_table( callee
);
1923 callee
->creation_sp
= sp_post_call_insn
;
1924 callee
->sp_at_call
= 0; // not actually required ..
1925 callee
->fp_at_call
= 0; // .. these 3 initialisations are ..
1926 callee
->blocks_added_by_call
= NULL
; // .. just for cleanness
1928 /* record the new running stack frame */
1929 shadowStacks
[tid
] = callee
;
1931 /* and this thread's query cache is now invalid */
1932 QCache__invalidate( &qcaches
[tid
] );
1935 { Word d
= callee
->depth
;
1936 const HChar
*fnname
;
1938 Addr ip
= ip_post_call_insn
;
1939 ok
= VG_(get_fnname_w_offset
)( ip
, &fnname
);
1944 VG_(printf
)("> %s %#lx\n", ok
? fnname
: "???", ip
);
1948 /* CALLED FROM GENERATED CODE */
1951 void helperc__new_frame ( Addr sp_post_call_insn
,
1952 Addr fp_at_call_insn
,
1953 Addr ip_post_call_insn
,
1954 XArray
* blocks_at_call_insn
,
1957 ThreadId tid
= VG_(get_running_tid
)();
1958 Addr sp_at_call_insn
= sp_post_call_insn
+ sp_adjust
;
1959 shadowStack_new_frame( tid
,
1964 blocks_at_call_insn
);
1968 ////////////////////////////////////////
1969 /* Primary remove-frame(s) routine. Called indirectly from
1972 __attribute__((noinline
))
1973 static void shadowStack_unwind ( ThreadId tid
, Addr sp_now
)
1975 StackFrame
*innermost
, *innermostOrig
;
1976 tl_assert(is_sane_TId(tid
));
1977 innermost
= shadowStacks
[tid
];
1978 tl_assert(innermost
);
1979 innermostOrig
= innermost
;
1980 //VG_(printf)("UNWIND sp_new = %p\n", sp_now);
1982 if (!innermost
->outer
)
1984 if (innermost
->inner
)
1985 tl_assert(innermost
->inner
->outer
== innermost
);
1986 tl_assert(innermost
->outer
->inner
== innermost
);
1987 tl_assert(innermost
->blocks_added_by_call
== NULL
);
1988 if (sp_now
<= innermost
->creation_sp
) break;
1989 //VG_(printf)("UNWIND dump %p\n", innermost->creation_sp);
1990 tl_assert(innermost
->htab
);
1991 if (innermost
->htab
!= &innermost
->htab_fixed
[0])
1992 sg_free(innermost
->htab
);
1993 /* be on the safe side */
1994 innermost
->creation_sp
= 0;
1995 innermost
->htab
= NULL
;
1996 innermost
->htab_size
= 0;
1997 innermost
->htab_used
= 0;
1998 innermost
->sp_at_call
= 0;
1999 innermost
->fp_at_call
= 0;
2000 innermost
->blocks_added_by_call
= NULL
;
2001 innermost
= innermost
->outer
;
2003 /* So now we're "back" in the calling frame. Remove from this
2004 thread's stack-interval-tree, the blocks added at the time of
2007 if (innermost
->outer
) { /* not at the outermost frame */
2008 if (innermost
->blocks_added_by_call
== NULL
) {
2010 del_blocks_from_StackTree( siTrees
[tid
],
2011 innermost
->blocks_added_by_call
);
2012 VG_(deleteXA
)( innermost
->blocks_added_by_call
);
2013 innermost
->blocks_added_by_call
= NULL
;
2016 /* That completes the required tidying of the interval tree
2017 associated with the frame we just removed. */
2020 Word d
= innermost
->depth
;
2030 tl_assert(innermost
);
2032 if (innermost
!= innermostOrig
) {
2033 shadowStacks
[tid
] = innermost
;
2034 /* this thread's query cache is now invalid */
2035 QCache__invalidate( &qcaches
[tid
] );
2041 //////////////////////////////////////////////////////////////
2043 // Instrumentation //
2045 //////////////////////////////////////////////////////////////
2047 /* What does instrumentation need to do?
2049 - at each Call transfer, generate a call to shadowStack_new_frame
2050 do this by manually inspecting the IR
2052 - at each sp change, if the sp change is negative,
2053 call shadowStack_unwind
2054 do this by asking for SP-change analysis
2056 - for each memory referencing instruction,
2057 call helperc__mem_access
2060 /* A complication: sg_ instrumentation and h_ instrumentation need to
2061 be interleaved. Since the latter is a lot more complex than the
2062 former, we split the sg_ instrumentation here into four functions
2063 and let the h_ instrumenter call the four functions as it goes.
2064 Hence the h_ instrumenter drives the sg_ instrumenter.
2066 To make this viable, the sg_ instrumenter carries what running
2067 state it needs in 'struct _SGEnv'. This is exported only
2068 abstractly from this file.
2072 /* the current insn's IP */
2074 /* whether the above is actually known */
2076 /* if we find a mem ref, is it the first for this insn? Used for
2077 detecting insns which make more than one memory ref, a situation
2078 we basically can't really handle properly; and so we ignore all
2079 but the first ref. */
2082 IRTemp (*newIRTemp_cb
)(IRType
,void*);
2083 void* newIRTemp_opaque
;
2087 /* --- Helper fns for instrumentation --- */
2089 static IRTemp
gen_Get_SP ( struct _SGEnv
* sge
,
2091 const VexGuestLayout
* layout
,
2097 /* This in effect forces the host and guest word sizes to be the
2099 tl_assert(hWordTy_szB
== layout
->sizeof_SP
);
2100 sp_type
= layout
->sizeof_SP
== 8 ? Ity_I64
: Ity_I32
;
2101 sp_expr
= IRExpr_Get( layout
->offset_SP
, sp_type
);
2102 sp_temp
= sge
->newIRTemp_cb( sp_type
, sge
->newIRTemp_opaque
);
2103 addStmtToIRSB( bbOut
, IRStmt_WrTmp( sp_temp
, sp_expr
) );
2107 static IRTemp
gen_Get_FP ( struct _SGEnv
* sge
,
2109 const VexGuestLayout
* layout
,
2115 /* This in effect forces the host and guest word sizes to be the
2117 tl_assert(hWordTy_szB
== layout
->sizeof_SP
);
2118 fp_type
= layout
->sizeof_FP
== 8 ? Ity_I64
: Ity_I32
;
2119 fp_expr
= IRExpr_Get( layout
->offset_FP
, fp_type
);
2120 fp_temp
= sge
->newIRTemp_cb( fp_type
, sge
->newIRTemp_opaque
);
2121 addStmtToIRSB( bbOut
, IRStmt_WrTmp( fp_temp
, fp_expr
) );
2125 static void instrument_mem_access ( struct _SGEnv
* sge
,
2132 const VexGuestLayout
* layout
)
2134 IRType tyAddr
= Ity_INVALID
;
2135 XArray
* frameBlocks
= NULL
;
2137 tl_assert(isIRAtom(addr
));
2138 tl_assert(hWordTy_szB
== 4 || hWordTy_szB
== 8);
2140 tyAddr
= typeOfIRExpr( bbOut
->tyenv
, addr
);
2141 tl_assert(tyAddr
== Ity_I32
|| tyAddr
== Ity_I64
);
2143 #if defined(VGA_x86)
2144 { UChar
* p
= (UChar
*)curr_IP
;
2146 if (p
[0] == 0xc3 && p
[-1] == 0x5d) return;
2147 // pop %ebp; RET $imm16
2148 if (p
[0] == 0xc2 && p
[-1] == 0x5d) return;
2149 // PUSH %EBP; mov %esp,%ebp
2150 if (p
[0] == 0x55 && p
[1] == 0x89 && p
[2] == 0xe5) return;
2154 /* First off, find or create the StackBlocks for this instruction. */
2155 frameBlocks
= get_StackBlocks_for_IP( curr_IP
);
2156 tl_assert(frameBlocks
);
2157 //if (VG_(sizeXA)( frameBlocks ) == 0)
2158 // frameBlocks = NULL;
2160 /* Generate a call to "helperc__mem_access", passing:
2161 addr current_SP current_FP szB curr_IP frameBlocks
2163 { IRTemp t_SP
= gen_Get_SP( sge
, bbOut
, layout
, hWordTy_szB
);
2164 IRTemp t_FP
= gen_Get_FP( sge
, bbOut
, layout
, hWordTy_szB
);
2166 = mkIRExprVec_6( addr
,
2169 mkIRExpr_HWord( isStore
? (-szB
) : szB
),
2170 mkIRExpr_HWord( curr_IP
),
2171 mkIRExpr_HWord( (HWord
)frameBlocks
) );
2173 = unsafeIRDirty_0_N( 3/*regparms*/,
2174 "helperc__mem_access",
2175 VG_(fnptr_to_fnentry
)( &helperc__mem_access
),
2178 addStmtToIRSB( bbOut
, IRStmt_Dirty(di
) );
2183 /* --- Instrumentation main (4 fns) --- */
2185 struct _SGEnv
* sg_instrument_init ( IRTemp (*newIRTemp_cb
)(IRType
,void*),
2186 void* newIRTemp_opaque
)
2188 struct _SGEnv
* env
= sg_malloc("di.sg_main.sii.1",
2189 sizeof(struct _SGEnv
));
2192 env
->curr_IP_known
= False
;
2193 env
->firstRef
= True
;
2194 env
->newIRTemp_cb
= newIRTemp_cb
;
2195 env
->newIRTemp_opaque
= newIRTemp_opaque
;
2199 void sg_instrument_fini ( struct _SGEnv
* env
)
2204 /* Add instrumentation for 'st' to 'sbOut', and possibly modify 'env'
2205 as required. This must be called before 'st' itself is added to
2207 void sg_instrument_IRStmt ( /*MOD*/struct _SGEnv
* env
,
2210 const VexGuestLayout
* layout
,
2211 IRType gWordTy
, IRType hWordTy
)
2213 if (!sg_clo_enable_sg_checks
)
2217 tl_assert(isFlatIRStmt(st
));
2224 /* None of these can contain any memory references. */
2228 tl_assert(st
->Ist
.Exit
.jk
!= Ijk_Call
);
2229 /* else we must deal with a conditional call */
2233 env
->curr_IP_known
= True
;
2234 env
->curr_IP
= st
->Ist
.IMark
.addr
;
2235 env
->firstRef
= True
;
2239 tl_assert(env
->curr_IP_known
);
2240 if (env
->firstRef
) {
2241 instrument_mem_access(
2244 sizeofIRType(typeOfIRExpr(sbOut
->tyenv
, st
->Ist
.Store
.data
)),
2246 sizeofIRType(hWordTy
),
2247 env
->curr_IP
, layout
2249 env
->firstRef
= False
;
2254 IRExpr
* data
= st
->Ist
.WrTmp
.data
;
2255 if (data
->tag
== Iex_Load
) {
2256 tl_assert(env
->curr_IP_known
);
2257 if (env
->firstRef
) {
2258 instrument_mem_access(
2260 data
->Iex
.Load
.addr
,
2261 sizeofIRType(data
->Iex
.Load
.ty
),
2263 sizeofIRType(hWordTy
),
2264 env
->curr_IP
, layout
2266 env
->firstRef
= False
;
2274 IRDirty
* d
= st
->Ist
.Dirty
.details
;
2275 if (d
->mFx
!= Ifx_None
) {
2276 /* This dirty helper accesses memory. Collect the
2278 tl_assert(env
->curr_IP_known
);
2279 if (env
->firstRef
) {
2280 tl_assert(d
->mAddr
!= NULL
);
2281 tl_assert(d
->mSize
!= 0);
2282 dataSize
= d
->mSize
;
2283 if (d
->mFx
== Ifx_Read
|| d
->mFx
== Ifx_Modify
) {
2284 instrument_mem_access(
2285 env
, sbOut
, d
->mAddr
, dataSize
, False
/*!isStore*/,
2286 sizeofIRType(hWordTy
), env
->curr_IP
, layout
2289 if (d
->mFx
== Ifx_Write
|| d
->mFx
== Ifx_Modify
) {
2290 instrument_mem_access(
2291 env
, sbOut
, d
->mAddr
, dataSize
, True
/*isStore*/,
2292 sizeofIRType(hWordTy
), env
->curr_IP
, layout
2295 env
->firstRef
= False
;
2298 tl_assert(d
->mAddr
== NULL
);
2299 tl_assert(d
->mSize
== 0);
2305 /* We treat it as a read and a write of the location. I
2306 think that is the same behaviour as it was before IRCAS
2307 was introduced, since prior to that point, the Vex front
2308 ends would translate a lock-prefixed instruction into a
2309 (normal) read followed by a (normal) write. */
2310 if (env
->firstRef
) {
2312 IRCAS
* cas
= st
->Ist
.CAS
.details
;
2313 tl_assert(cas
->addr
!= NULL
);
2314 tl_assert(cas
->dataLo
!= NULL
);
2315 dataSize
= sizeofIRType(typeOfIRExpr(sbOut
->tyenv
, cas
->dataLo
));
2316 if (cas
->dataHi
!= NULL
)
2317 dataSize
*= 2; /* since it's a doubleword-CAS */
2318 instrument_mem_access(
2319 env
, sbOut
, cas
->addr
, dataSize
, False
/*!isStore*/,
2320 sizeofIRType(hWordTy
), env
->curr_IP
, layout
2322 instrument_mem_access(
2323 env
, sbOut
, cas
->addr
, dataSize
, True
/*isStore*/,
2324 sizeofIRType(hWordTy
), env
->curr_IP
, layout
2326 env
->firstRef
= False
;
2334 } /* switch (st->tag) */
2338 /* Add instrumentation for the final jump of an IRSB 'sbOut', and
2339 possibly modify 'env' as required. This must be the last
2340 instrumentation statement in the block. */
2341 void sg_instrument_final_jump ( /*MOD*/struct _SGEnv
* env
,
2344 IRJumpKind jumpkind
,
2345 const VexGuestLayout
* layout
,
2346 IRType gWordTy
, IRType hWordTy
)
2348 if (!sg_clo_enable_sg_checks
)
2351 if (jumpkind
== Ijk_Call
) {
2352 // Assumes x86 or amd64
2353 IRTemp sp_post_call_insn
, fp_post_call_insn
;
2354 XArray
* frameBlocks
;
2358 = gen_Get_SP( env
, sbOut
, layout
, sizeofIRType(hWordTy
) );
2360 = gen_Get_FP( env
, sbOut
, layout
, sizeofIRType(hWordTy
) );
2361 tl_assert(env
->curr_IP_known
);
2362 frameBlocks
= get_StackBlocks_for_IP( env
->curr_IP
);
2363 tl_assert(frameBlocks
);
2364 if (VG_(sizeXA
)(frameBlocks
) == 0)
2368 IRExpr_RdTmp(sp_post_call_insn
),
2369 IRExpr_RdTmp(fp_post_call_insn
),
2370 /* assume the call doesn't change FP */
2372 mkIRExpr_HWord( (HWord
)frameBlocks
),
2373 mkIRExpr_HWord( sizeofIRType(gWordTy
) )
2375 di
= unsafeIRDirty_0_N(
2377 "helperc__new_frame",
2378 VG_(fnptr_to_fnentry
)( &helperc__new_frame
),
2380 addStmtToIRSB( sbOut
, IRStmt_Dirty(di
) );
2385 //////////////////////////////////////////////////////////////
2387 // end Instrumentation //
2389 //////////////////////////////////////////////////////////////
2392 //////////////////////////////////////////////////////////////
2396 //////////////////////////////////////////////////////////////
2398 /* Make a new empty stack frame that is suitable for being the
2399 outermost frame in a stack. It has a creation_sp of effectively
2400 infinity, so it can never be removed. */
2401 static StackFrame
* new_root_StackFrame ( void )
2403 StackFrame
* sframe
= sg_malloc("di.sg_main.nrS.1", sizeof(StackFrame
));
2404 VG_(memset
)( sframe
, 0, sizeof(*sframe
) );
2405 sframe
->creation_sp
= ~0UL;
2407 /* This sets up .htab, .htab_size and .htab_used */
2408 initialise_II_hash_table( sframe
);
2410 /* ->depth, ->outer, ->inner are 0, NULL, NULL */
2415 /* Primary routine for setting up the shadow stack for a new thread.
2416 Note that this is used to create not only child thread stacks, but
2417 the root thread's stack too. We create a new stack with
2418 .creation_sp set to infinity, so that the outermost frame can never
2419 be removed (by shadowStack_unwind). The core calls this function
2420 as soon as a thread is created. We cannot yet get its SP value,
2421 since that may not yet be set. */
2422 static void shadowStack_thread_create ( ThreadId parent
, ThreadId child
)
2424 tl_assert(is_sane_TId(child
));
2425 if (parent
== VG_INVALID_THREADID
) {
2426 /* creating the main thread's stack */
2428 tl_assert(is_sane_TId(parent
));
2429 tl_assert(parent
!= child
);
2430 tl_assert(shadowStacks
[parent
] != NULL
);
2431 tl_assert(siTrees
[parent
] != NULL
);
2434 /* Create the child's stack. Bear in mind we may be re-using
2436 if (shadowStacks
[child
] == NULL
) {
2437 /* First use of this stack. Just allocate an initial frame. */
2438 tl_assert(siTrees
[child
] == NULL
);
2440 StackFrame
*frame
, *frame2
;
2441 /* re-using a stack. */
2442 /* get rid of the interval tree */
2443 tl_assert(siTrees
[child
] != NULL
);
2444 delete_StackTree( siTrees
[child
] );
2445 siTrees
[child
] = NULL
;
2446 /* Throw away all existing frames. */
2447 frame
= shadowStacks
[child
];
2448 while (frame
->outer
)
2449 frame
= frame
->outer
;
2450 tl_assert(frame
->depth
== 0);
2452 frame2
= frame
->inner
;
2453 if (frame2
) tl_assert(1 + frame
->depth
== frame2
->depth
);
2457 shadowStacks
[child
] = NULL
;
2460 tl_assert(shadowStacks
[child
] == NULL
);
2461 tl_assert(siTrees
[child
] == NULL
);
2463 /* Set up the initial stack frame. */
2464 shadowStacks
[child
] = new_root_StackFrame();
2466 /* and set up the child's stack block interval tree. */
2467 siTrees
[child
] = new_StackTree();
2470 /* Once a thread is ready to go, the core calls here. We take the
2471 opportunity to push a second frame on its stack, with the
2472 presumably valid SP value that is going to be used for the thread's
2473 startup. Hence we should always wind up with a valid outermost
2474 frame for the thread. */
2475 static void shadowStack_set_initial_SP ( ThreadId tid
)
2478 tl_assert(is_sane_TId(tid
));
2479 sf
= shadowStacks
[tid
];
2480 tl_assert(sf
!= NULL
);
2481 tl_assert(sf
->outer
== NULL
);
2482 tl_assert(sf
->inner
== NULL
);
2483 tl_assert(sf
->creation_sp
== ~0UL);
2484 shadowStack_new_frame( tid
, 0, VG_(get_SP
)(tid
),
2485 0, VG_(get_IP
)(tid
), NULL
);
2489 //////////////////////////////////////////////////////////////
2493 //////////////////////////////////////////////////////////////
2495 /* CALLED indirectly FROM GENERATED CODE. Calls here are created by
2496 sp-change analysis, as requested in pc_pre_clo_int(). */
2497 void sg_die_mem_stack ( Addr old_SP
, SizeT len
) {
2498 ThreadId tid
= VG_(get_running_tid
)();
2499 shadowStack_unwind( tid
, old_SP
+len
);
2502 void sg_pre_clo_init ( void ) {
2504 init_StackBlocks_set();
2505 init_GlobalBlock_set();
2508 void sg_post_clo_init ( void ) {
2511 void sg_pre_thread_ll_create ( ThreadId parent
, ThreadId child
) {
2512 shadowStack_thread_create(parent
, child
);
2515 void sg_pre_thread_first_insn ( ThreadId tid
) {
2516 shadowStack_set_initial_SP(tid
);
2519 void sg_fini(Int exitcode
)
2521 if (VG_(clo_stats
)) {
2522 VG_(message
)(Vg_DebugMsg
,
2523 " sg_: %'llu total accesses, of which:\n", stats__total_accesses
);
2524 VG_(message
)(Vg_DebugMsg
,
2525 " sg_: stack0: %'12llu classify\n",
2526 stats__classify_Stack0
);
2527 VG_(message
)(Vg_DebugMsg
,
2528 " sg_: stackN: %'12llu classify\n",
2529 stats__classify_StackN
);
2530 VG_(message
)(Vg_DebugMsg
,
2531 " sg_: global: %'12llu classify\n",
2532 stats__classify_Global
);
2533 VG_(message
)(Vg_DebugMsg
,
2534 " sg_: unknown: %'12llu classify\n",
2535 stats__classify_Unknown
);
2536 VG_(message
)(Vg_DebugMsg
,
2537 " sg_: %'llu Invars preened, of which %'llu changed\n",
2538 stats__Invars_preened
, stats__Invars_changed
);
2539 VG_(message
)(Vg_DebugMsg
,
2540 " sg_: t_i_b_MT: %'12llu\n", stats__t_i_b_empty
);
2541 VG_(message
)(Vg_DebugMsg
,
2542 " sg_: qcache: %'llu searches, %'llu probes, %'llu misses\n",
2543 stats__qcache_queries
, stats__qcache_probes
, stats__qcache_misses
);
2544 VG_(message
)(Vg_DebugMsg
,
2545 " sg_: htab-fast: %'llu hits\n",
2547 VG_(message
)(Vg_DebugMsg
,
2548 " sg_: htab-slow: %'llu searches, %'llu probes, %'llu resizes\n",
2549 stats__htab_searches
, stats__htab_probes
, stats__htab_resizes
);
2554 /*--------------------------------------------------------------------*/
2555 /*--- end sg_main.c ---*/
2556 /*--------------------------------------------------------------------*/