2 /*--------------------------------------------------------------------*/
3 /*--- Ptrcheck: a pointer-use checker. ---*/
4 /*--- This file checks stack and global array accesses. ---*/
6 /*--------------------------------------------------------------------*/
9 This file is part of Ptrcheck, a Valgrind tool for checking pointer
12 Copyright (C) 2008-2011 OpenWorks Ltd
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
30 The GNU General Public License is contained in the file COPYING.
32 Neither the names of the U.S. Department of Energy nor the
33 University of California nor the names of its contributors may be
34 used to endorse or promote products derived from this software
35 without prior written permission.
38 #include "pub_tool_basics.h"
39 #include "pub_tool_libcbase.h"
40 #include "pub_tool_libcassert.h"
41 #include "pub_tool_libcprint.h"
42 #include "pub_tool_tooliface.h"
43 #include "pub_tool_wordfm.h"
44 #include "pub_tool_xarray.h"
45 #include "pub_tool_threadstate.h"
46 #include "pub_tool_mallocfree.h"
47 #include "pub_tool_machine.h"
48 #include "pub_tool_debuginfo.h"
49 #include "pub_tool_options.h"
51 #include "pc_common.h"
53 #include "sg_main.h" // self
57 void preen_global_Invars ( Addr a
, SizeT len
); /*fwds*/
60 //////////////////////////////////////////////////////////////
64 //////////////////////////////////////////////////////////////
66 static inline Bool
is_sane_TId ( ThreadId tid
)
68 return tid
>= 0 && tid
< VG_N_THREADS
69 && tid
!= VG_INVALID_THREADID
;
72 static void* sg_malloc ( HChar
* cc
, SizeT n
) {
75 p
= VG_(malloc
)( cc
, n
);
80 static void sg_free ( void* p
) {
86 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
87 first interval is lower, 1 if the first interval is higher, and 0
88 if there is any overlap. Redundant paranoia with casting is there
89 following what looked distinctly like a bug in gcc-4.1.2, in which
90 some of the comparisons were done signedly instead of
93 static Word
cmp_nonempty_intervals ( Addr a1
, SizeT n1
,
95 UWord a1w
= (UWord
)a1
;
96 UWord n1w
= (UWord
)n1
;
97 UWord a2w
= (UWord
)a2
;
98 UWord n2w
= (UWord
)n2
;
99 tl_assert(n1w
> 0 && n2w
> 0);
100 if (a1w
+ n1w
<= a2w
) return -1L;
101 if (a2w
+ n2w
<= a1w
) return 1L;
105 /* Return true iff [aSmall,aSmall+nSmall) is entirely contained
106 within [aBig,aBig+nBig). */
108 static Bool
is_subinterval_of ( Addr aBig
, SizeT nBig
,
109 Addr aSmall
, SizeT nSmall
) {
110 tl_assert(nBig
> 0 && nSmall
> 0);
111 return aBig
<= aSmall
&& aSmall
+ nSmall
<= aBig
+ nBig
;
115 static Addr
Addr__min ( Addr a1
, Addr a2
) {
116 return a1
< a2
? a1
: a2
;
120 static Addr
Addr__max ( Addr a1
, Addr a2
) {
121 return a1
< a2
? a2
: a1
;
125 //////////////////////////////////////////////////////////////
127 // StackBlocks Persistent Cache //
129 //////////////////////////////////////////////////////////////
131 /* We maintain a set of XArray* of StackBlocks. These are never
132 freed. When a new StackBlock vector is acquired from
133 VG_(di_get_local_blocks_at_ip), we compare it to the existing set.
134 If not present, it is added. If present, the just-acquired one is
135 freed and the copy used.
137 This simplifies storage management elsewhere. It allows us to
138 assume that a pointer to an XArray* of StackBlock is valid forever.
139 It also means there are no duplicates anywhere, which could be
140 important from a space point of view for programs that generate a
141 lot of translations, or where translations are frequently discarded
144 Note that we normalise the arrays by sorting the elements according
145 to an arbitrary total order, so as to avoid the situation that two
146 vectors describe the same set of variables but are not structurally
149 static inline Bool
StackBlock__sane ( StackBlock
* fb
)
151 if (fb
->name
[ sizeof(fb
->name
)-1 ] != 0)
153 if (fb
->spRel
!= False
&& fb
->spRel
!= True
)
155 if (fb
->isVec
!= False
&& fb
->isVec
!= True
)
160 /* Generate an arbitrary total ordering on StackBlocks. */
161 static Word
StackBlock__cmp ( StackBlock
* fb1
, StackBlock
* fb2
)
164 tl_assert(StackBlock__sane(fb1
));
165 tl_assert(StackBlock__sane(fb2
));
166 /* Hopefully the .base test hits most of the time. For the blocks
167 associated with any particular instruction, if the .base values
168 are the same then probably it doesn't make sense for the other
169 fields to be different. But this is supposed to be a completely
170 general structural total order, so we have to compare everything
172 if (fb1
->base
< fb2
->base
) return -1;
173 if (fb1
->base
> fb2
->base
) return 1;
175 if (fb1
->szB
< fb2
->szB
) return -1;
176 if (fb1
->szB
> fb2
->szB
) return 1;
177 /* compare sp/fp flag */
178 if (fb1
->spRel
< fb2
->spRel
) return -1;
179 if (fb1
->spRel
> fb2
->spRel
) return 1;
180 /* compare is/is-not array-typed flag */
181 if (fb1
->isVec
< fb2
->isVec
) return -1;
182 if (fb1
->isVec
> fb2
->isVec
) return 1;
183 /* compare the name */
184 r
= (Word
)VG_(strcmp
)(fb1
->name
, fb2
->name
);
188 /* Returns True if all fields except .szB are the same. szBs may or
189 may not be the same; they are simply not consulted. */
190 static Bool
StackBlock__all_fields_except_szB_are_equal (
195 tl_assert(StackBlock__sane(fb1
));
196 tl_assert(StackBlock__sane(fb2
));
197 return fb1
->base
== fb2
->base
198 && fb1
->spRel
== fb2
->spRel
199 && fb1
->isVec
== fb2
->isVec
200 && 0 == VG_(strcmp
)(fb1
->name
, fb2
->name
);
204 /* Generate an arbitrary total ordering on vectors of StackBlocks. */
205 static Word
StackBlocks__cmp ( XArray
* fb1s
, XArray
* fb2s
)
208 n1
= VG_(sizeXA
)( fb1s
);
209 n2
= VG_(sizeXA
)( fb2s
);
210 if (n1
< n2
) return -1;
211 if (n1
> n2
) return 1;
212 for (i
= 0; i
< n1
; i
++) {
213 StackBlock
*fb1
, *fb2
;
214 fb1
= VG_(indexXA
)( fb1s
, i
);
215 fb2
= VG_(indexXA
)( fb2s
, i
);
216 r
= StackBlock__cmp( fb1
, fb2
);
217 if (r
!= 0) return r
;
219 tl_assert(i
== n1
&& i
== n2
);
223 static void pp_StackBlocks ( XArray
* sbs
)
225 Word i
, n
= VG_(sizeXA
)( sbs
);
226 VG_(message
)(Vg_DebugMsg
, "<<< STACKBLOCKS\n" );
227 for (i
= 0; i
< n
; i
++) {
228 StackBlock
* sb
= (StackBlock
*)VG_(indexXA
)( sbs
, i
);
229 VG_(message
)(Vg_DebugMsg
,
230 " StackBlock{ off %ld szB %lu spRel:%c isVec:%c \"%s\" }\n",
231 sb
->base
, sb
->szB
, sb
->spRel
? 'Y' : 'N',
232 sb
->isVec
? 'Y' : 'N', &sb
->name
[0]
235 VG_(message
)(Vg_DebugMsg
, ">>> STACKBLOCKS\n" );
239 /* ---------- The StackBlock vector cache ---------- */
241 static WordFM
* /* XArray* of StackBlock -> nothing */
242 frameBlocks_set
= NULL
;
244 static void init_StackBlocks_set ( void )
246 tl_assert(!frameBlocks_set
);
248 = VG_(newFM
)( sg_malloc
, "di.sg_main.iSBs.1", sg_free
,
249 (Word(*)(UWord
,UWord
))StackBlocks__cmp
);
250 tl_assert(frameBlocks_set
);
253 /* Find the given StackBlock-vector in our collection thereof. If
254 found, deallocate the supplied one, and return the address of the
255 copy. If not found, add the supplied one to our collection and
256 return its address. */
257 static XArray
* /* of StackBlock */
258 StackBlocks__find_and_dealloc__or_add
259 ( XArray
* /* of StackBlock */ orig
)
263 /* First, normalise, as per comments above. */
264 VG_(setCmpFnXA
)( orig
, (Int(*)(void*,void*))StackBlock__cmp
);
267 /* Now get rid of any exact duplicates. */
269 { Word r
, w
, nEQ
, n
= VG_(sizeXA
)( orig
);
273 for (r
= 0; r
< n
; r
++) {
275 StackBlock
* pR0
= VG_(indexXA
)( orig
, r
+0 );
276 StackBlock
* pR1
= VG_(indexXA
)( orig
, r
+1 );
277 Word c
= StackBlock__cmp(pR0
,pR1
);
278 tl_assert(c
== -1 || c
== 0);
279 if (c
== 0) { nEQ
++; continue; }
282 StackBlock
* pW
= VG_(indexXA
)( orig
, w
);
283 StackBlock
* pR
= VG_(indexXA
)( orig
, r
);
289 tl_assert(w
+ nEQ
== n
);
291 VG_(dropTailXA
)( orig
, n
-w
);
293 if (0) VG_(printf
)("delta %ld\n", n
-w
);
297 /* Deal with the following strangeness, where two otherwise
298 identical blocks are claimed to have different sizes. In which
299 case we use the larger size. */
300 /* StackBlock{ off 16 szB 66 spRel:Y isVec:Y "sz" }
301 StackBlock{ off 16 szB 130 spRel:Y isVec:Y "sz" }
302 StackBlock{ off 208 szB 16 spRel:Y isVec:Y "ar" }
304 { Word i
, n
= VG_(sizeXA
)( orig
);
306 for (i
= 0; i
< n
-1; i
++) {
307 StackBlock
* sb0
= VG_(indexXA
)( orig
, i
+0 );
308 StackBlock
* sb1
= VG_(indexXA
)( orig
, i
+1 );
309 if (StackBlock__all_fields_except_szB_are_equal(sb0
, sb1
)) {
310 /* They can't be identical because the previous tidying
311 pass would have removed the duplicates. And they
312 can't be > because the earlier sorting pass would
313 have ordered otherwise-identical descriptors
314 according to < on .szB fields. Hence: */
315 tl_assert(sb0
->szB
< sb1
->szB
);
317 /* This makes the blocks identical, at the size of the
318 larger one. Rather than go to all the hassle of
319 sliding the rest down, simply go back to the
320 remove-duplicates stage. The assertion guarantees
321 that we eventually make progress, since the rm-dups
322 stage will get rid of one of the blocks. This is
323 expected to happen only exceedingly rarely. */
324 tl_assert(StackBlock__cmp(sb0
,sb1
) == 0);
331 /* If there are any blocks which overlap and have the same
332 fpRel-ness, junk the whole descriptor; it's obviously bogus.
333 Icc11 certainly generates bogus info from time to time.
335 This check is pretty weak; really we ought to have a stronger
337 { Word i
, n
= VG_(sizeXA
)( orig
);
338 static Int moans
= 3;
339 for (i
= 0; i
< n
-1; i
++) {
340 StackBlock
* sb1
= (StackBlock
*)VG_(indexXA
)( orig
, i
);
341 StackBlock
* sb2
= (StackBlock
*)VG_(indexXA
)( orig
, i
+1 );
342 if (sb1
->spRel
== sb2
->spRel
343 && (sb1
->base
>= sb2
->base
344 || sb1
->base
+ sb1
->szB
> sb2
->base
)) {
345 if (moans
> 0 && !VG_(clo_xml
)) {
347 VG_(message
)(Vg_UserMsg
, "Warning: bogus DWARF3 info: "
348 "overlapping stack blocks\n");
349 if (VG_(clo_verbosity
) >= 2)
350 pp_StackBlocks(orig
);
352 VG_(message
)(Vg_UserMsg
, "Further instances of this "
353 "message will not be shown\n" );
355 VG_(dropTailXA
)( orig
, VG_(sizeXA
)( orig
));
361 /* Now, do we have it already? */
362 if (VG_(lookupFM
)( frameBlocks_set
, &key
, &val
, (UWord
)orig
)) {
366 tl_assert(key
!= (UWord
)orig
);
372 VG_(addToFM
)( frameBlocks_set
, (UWord
)orig
, 0 );
377 /* Top level function for getting the StackBlock vector for a given
378 instruction. It is guaranteed that the returned pointer will be
379 valid for the entire rest of the run, and also that the addresses
380 of the individual elements of the array will not change. */
382 static XArray
* /* of StackBlock */ get_StackBlocks_for_IP ( Addr ip
)
384 XArray
* blocks
= VG_(di_get_stack_blocks_at_ip
)( ip
, True
/*arrays only*/ );
386 return StackBlocks__find_and_dealloc__or_add( blocks
);
390 //////////////////////////////////////////////////////////////
392 // GlobalBlocks Persistent Cache //
394 //////////////////////////////////////////////////////////////
396 /* Generate an arbitrary total ordering on GlobalBlocks. */
397 static Word
GlobalBlock__cmp ( GlobalBlock
* gb1
, GlobalBlock
* gb2
)
401 if (gb1
->addr
< gb2
->addr
) return -1;
402 if (gb1
->addr
> gb2
->addr
) return 1;
404 if (gb1
->szB
< gb2
->szB
) return -1;
405 if (gb1
->szB
> gb2
->szB
) return 1;
406 /* compare is/is-not array-typed flag */
407 if (gb1
->isVec
< gb2
->isVec
) return -1;
408 if (gb1
->isVec
> gb2
->isVec
) return 1;
409 /* compare the name */
410 r
= (Word
)VG_(strcmp
)(gb1
->name
, gb2
->name
);
411 if (r
!= 0) return r
;
412 /* compare the soname */
413 r
= (Word
)VG_(strcmp
)(gb1
->soname
, gb2
->soname
);
417 static WordFM
* /* GlobalBlock* -> nothing */
418 globalBlock_set
= NULL
;
420 static void init_GlobalBlock_set ( void )
422 tl_assert(!globalBlock_set
);
424 = VG_(newFM
)( sg_malloc
, "di.sg_main.iGBs.1", sg_free
,
425 (Word(*)(UWord
,UWord
))GlobalBlock__cmp
);
426 tl_assert(globalBlock_set
);
430 /* Top level function for making GlobalBlocks persistent. Call here
431 with a non-persistent version, and the returned one is guaranteed
432 to be valid for the entire rest of the run. The supplied one is
433 copied, not stored, so can be freed after the call. */
435 static GlobalBlock
* get_persistent_GlobalBlock ( GlobalBlock
* orig
)
438 /* Now, do we have it already? */
439 if (VG_(lookupFM
)( globalBlock_set
, &key
, &val
, (UWord
)orig
)) {
440 /* yes, return the copy */
443 res
= (GlobalBlock
*)key
;
444 tl_assert(res
!= orig
);
447 /* no. clone it, store the clone and return the clone's
449 GlobalBlock
* clone
= sg_malloc( "di.sg_main.gpGB.1",
450 sizeof(GlobalBlock
) );
453 VG_(addToFM
)( globalBlock_set
, (UWord
)clone
, 0 );
459 //////////////////////////////////////////////////////////////
461 // Interval tree of StackTreeBlock //
463 //////////////////////////////////////////////////////////////
465 /* A node in a stack interval tree. Zero length intervals (.szB == 0)
468 A stack interval tree is a (WordFM StackTreeNode* void). There is
469 one stack interval tree for each thread.
474 SizeT szB
; /* copied from .descr->szB */
475 StackBlock
* descr
; /* it's an instance of this block */
476 UWord depth
; /* depth of stack at time block was pushed */
480 static void pp_StackTree ( WordFM
* sitree
, HChar
* who
)
483 VG_(printf
)("<<< BEGIN pp_StackTree %s\n", who
);
484 VG_(initIterFM
)( sitree
);
485 while (VG_(nextIterFM
)( sitree
, &keyW
, &valW
)) {
486 StackTreeNode
* nd
= (StackTreeNode
*)keyW
;
487 VG_(printf
)(" [%#lx,+%lu) descr=%p %s %lu\n", nd
->addr
, nd
->szB
,
488 nd
->descr
, nd
->descr
->name
, nd
->descr
->szB
);
490 VG_(printf
)(">>> END pp_StackTree %s\n", who
);
493 /* Interval comparison function for StackTreeNode */
494 static Word
cmp_intervals_StackTreeNode ( StackTreeNode
* sn1
,
497 return cmp_nonempty_intervals(sn1
->addr
, sn1
->szB
,
498 sn2
->addr
, sn2
->szB
);
501 /* Find the node holding 'a', if any. */
502 static StackTreeNode
* find_StackTreeNode ( WordFM
* sitree
, Addr a
)
509 if (VG_(lookupFM
)( sitree
, &keyW
, &valW
, (UWord
)&key
)) {
510 StackTreeNode
* res
= (StackTreeNode
*)keyW
;
511 tl_assert(valW
== 0);
512 tl_assert(res
!= &key
);
519 /* Note that the supplied XArray of FrameBlock must have been
520 made persistent already. */
521 __attribute__((noinline
))
522 static void add_blocks_to_StackTree (
523 /*MOD*/WordFM
* sitree
,
524 XArray
* /* FrameBlock */ descrs
,
525 XArray
* /* Addr */ bases
,
529 Bool debug
= (Bool
)0;
530 Word i
, nDescrs
, nBases
;
532 nDescrs
= VG_(sizeXA
)( descrs
),
533 nBases
= VG_(sizeXA
)( bases
);
534 tl_assert(nDescrs
== nBases
);
536 if (nDescrs
== 0) return;
540 VG_(printf
)("\ndepth = %lu\n", depth
);
541 pp_StackTree( sitree
, "add_blocks_to_StackTree-pre" );
542 pp_StackBlocks(descrs
);
545 for (i
= 0; i
< nDescrs
; i
++) {
546 Bool already_present
;
548 Addr addr
= *(Addr
*)VG_(indexXA
)( bases
, i
);
549 StackBlock
* descr
= (StackBlock
*)VG_(indexXA
)( descrs
, i
);
550 tl_assert(descr
->szB
> 0);
551 nyu
= sg_malloc( "di.sg_main.abtST.1", sizeof(StackTreeNode
) );
553 nyu
->szB
= descr
->szB
;
556 if (debug
) VG_(printf
)("ADD %#lx %lu\n", addr
, descr
->szB
);
557 already_present
= VG_(addToFM
)( sitree
, (UWord
)nyu
, 0 );
558 /* The interval can't already be there; else we have
559 overlapping stack blocks. */
560 tl_assert(!already_present
);
562 pp_StackTree( sitree
, "add_blocks_to_StackTree-step" );
566 pp_StackTree( sitree
, "add_blocks_to_StackTree-post" );
571 static void del_blocks_from_StackTree ( /*MOD*/WordFM
* sitree
,
572 XArray
* /* Addr */ bases
)
575 Word i
, nBases
= VG_(sizeXA
)( bases
);
576 for (i
= 0; i
< nBases
; i
++) {
578 Addr addr
= *(Addr
*)VG_(indexXA
)( bases
, i
);
579 StackTreeNode
* nd
= find_StackTreeNode(sitree
, addr
);
580 /* The interval must be there; we added it earlier when
581 the associated frame was created. */
583 b
= VG_(delFromFM
)( sitree
, &oldK
, &oldV
, (UWord
)nd
);
584 /* we just found the block! */
586 tl_assert(oldV
== 0);
587 tl_assert(nd
== (StackTreeNode
*)oldK
);
593 static void delete_StackTree__kFin ( UWord keyW
) {
594 StackTreeNode
* nd
= (StackTreeNode
*)keyW
;
598 static void delete_StackTree__vFin ( UWord valW
) {
599 tl_assert(valW
== 0);
601 static void delete_StackTree ( WordFM
* sitree
)
603 VG_(deleteFM
)( sitree
,
604 delete_StackTree__kFin
, delete_StackTree__vFin
);
607 static WordFM
* new_StackTree ( void ) {
608 return VG_(newFM
)( sg_malloc
, "di.sg_main.nST.1", sg_free
,
609 (Word(*)(UWord
,UWord
))cmp_intervals_StackTreeNode
);
613 //////////////////////////////////////////////////////////////
615 // Interval tree of GlobalTreeBlock //
617 //////////////////////////////////////////////////////////////
619 /* A node in a global interval tree. Zero length intervals
620 (.szB == 0) are not allowed.
622 A global interval tree is a (WordFM GlobalTreeNode* void). There
623 is one global interval tree for the entire process.
627 Addr addr
; /* copied from .descr->addr */
628 SizeT szB
; /* copied from .descr->szB */
629 GlobalBlock
* descr
; /* it's this block */
633 static void GlobalTreeNode__pp ( GlobalTreeNode
* nd
) {
634 tl_assert(nd
->descr
);
635 VG_(printf
)("GTNode [%#lx,+%ld) %s",
636 nd
->addr
, nd
->szB
, nd
->descr
->name
);
639 static void GlobalTree__pp ( WordFM
* /* of (GlobalTreeNode,void) */ gitree
,
644 VG_(printf
)("<<< GlobalBlockTree (%s)\n", who
);
645 VG_(initIterFM
)( gitree
);
646 while (VG_(nextIterFM
)( gitree
, &keyW
, &valW
)) {
647 tl_assert(valW
== 0);
648 nd
= (GlobalTreeNode
*)keyW
;
650 GlobalTreeNode__pp(nd
);
653 VG_(doneIterFM
)( gitree
);
654 VG_(printf
)(">>>\n");
657 /* Interval comparison function for GlobalTreeNode */
658 static Word
cmp_intervals_GlobalTreeNode ( GlobalTreeNode
* gn1
,
659 GlobalTreeNode
* gn2
)
661 return cmp_nonempty_intervals( gn1
->addr
, gn1
->szB
,
662 gn2
->addr
, gn2
->szB
);
665 /* Find the node holding 'a', if any. */
666 static GlobalTreeNode
* find_GlobalTreeNode ( WordFM
* gitree
, Addr a
)
672 if (VG_(lookupFM
)( gitree
, &keyW
, &valW
, (UWord
)&key
)) {
673 GlobalTreeNode
* res
= (GlobalTreeNode
*)keyW
;
674 tl_assert(valW
== 0);
675 tl_assert(res
!= &key
);
682 /* Note that the supplied GlobalBlock must have been made persistent
684 static void add_block_to_GlobalTree (
685 /*MOD*/WordFM
* gitree
,
689 Bool already_present
;
690 GlobalTreeNode
*nyu
, *nd
;
692 static Int moans
= 3;
694 tl_assert(descr
->szB
> 0);
695 nyu
= sg_malloc( "di.sg_main.abtG.1", sizeof(GlobalTreeNode
) );
696 nyu
->addr
= descr
->addr
;
697 nyu
->szB
= descr
->szB
;
700 /* Basically it's an error to add a global block to the tree that
701 is already in the tree. However, detect and ignore attempts to
702 insert exact duplicates; they do appear for some reason
703 (possible a bug in m_debuginfo?) */
704 already_present
= VG_(lookupFM
)( gitree
, &keyW
, &valW
, (UWord
)nyu
);
705 if (already_present
) {
706 tl_assert(valW
== 0);
707 nd
= (GlobalTreeNode
*)keyW
;
709 tl_assert(nd
!= nyu
);
710 tl_assert(nd
->descr
);
711 tl_assert(nyu
->descr
);
712 if (nd
->addr
== nyu
->addr
&& nd
->szB
== nyu
->szB
713 /* && 0 == VG_(strcmp)(nd->descr->name, nyu->descr->name) */
714 /* Although it seems reasonable to demand that duplicate
715 blocks have identical names, that is too strict. For
716 example, reading debuginfo from glibc produces two
717 otherwise identical blocks with names "tzname" and
718 "__tzname". A constraint of the form "must be identical,
719 or one must be a substring of the other" would fix that.
720 However, such trickery is scuppered by the fact that we
721 truncate all variable names to 15 characters to make
722 storage management simpler, hence giving pairs like
723 "__EI___pthread_" (truncated) vs "__pthread_keys". So
724 it's simplest just to skip the name comparison
726 && 0 == VG_(strcmp
)(nd
->descr
->soname
, nyu
->descr
->soname
)) {
727 /* exact duplicate; ignore it */
731 /* else fall through; the assertion below will catch it */
734 already_present
= VG_(addToFM
)( gitree
, (UWord
)nyu
, 0 );
735 /* The interval can't already be there; else we have
736 overlapping global blocks. */
737 /* Unfortunately (25 Jan 09) at least icc11 has been seen to
738 generate overlapping block descriptions in the Dwarf3; clearly
740 if (already_present
&& moans
> 0 && !VG_(clo_xml
)) {
742 VG_(message
)(Vg_UserMsg
, "Warning: bogus DWARF3 info: "
743 "overlapping global blocks\n");
744 if (VG_(clo_verbosity
) >= 2) {
745 GlobalTree__pp( gitree
,
746 "add_block_to_GlobalTree: non-exact duplicate" );
747 VG_(printf
)("Overlapping block: ");
748 GlobalTreeNode__pp(nyu
);
752 VG_(message
)(Vg_UserMsg
, "Further instances of this "
753 "message will not be shown\n" );
755 /* tl_assert(!already_present); */
758 static Bool
del_GlobalTree_range ( /*MOD*/WordFM
* gitree
,
761 /* One easy way to do this: look up [a,a+szB) in the tree. That
762 will either succeed, producing a block which intersects that
763 range, in which case we delete it and repeat; or it will fail,
764 in which case there are no blocks intersecting the range, and we
765 can bring the process to a halt. */
766 UWord keyW
, valW
, oldK
, oldV
;
767 GlobalTreeNode key
, *nd
;
777 while (VG_(lookupFM
)( gitree
, &keyW
, &valW
, (UWord
)&key
)) {
779 nd
= (GlobalTreeNode
*)keyW
;
780 tl_assert(valW
== 0);
781 tl_assert(nd
!= &key
);
782 tl_assert(cmp_nonempty_intervals(a
, szB
, nd
->addr
, nd
->szB
) == 0);
784 b
= VG_(delFromFM
)( gitree
, &oldK
, &oldV
, (UWord
)&key
);
786 tl_assert(oldV
== 0);
787 tl_assert(oldK
== keyW
); /* check we deleted the node we just found */
794 //////////////////////////////////////////////////////////////
798 //////////////////////////////////////////////////////////////
800 /* An invariant, as resulting from watching the destination of a
801 memory referencing instruction. Initially is Inv_Unset until the
802 instruction makes a first access. */
806 Inv_Unset
=1, /* not established yet */
807 Inv_Unknown
, /* unknown location */
808 Inv_Stack0
, /* array-typed stack block in innermost frame */
809 Inv_StackN
, /* array-typed stack block in non-innermost frame */
810 Inv_Global
, /* array-typed global block */
826 } Stack0
; /* innermost stack frame */
828 /* Pointer to a node in the interval tree for
831 } StackN
; /* non-innermost stack frame */
833 /* Pointer to a GlobalBlock in the interval tree of
842 /* Partial debugging printing for an Invar. */
843 static void pp_Invar ( Invar
* i
)
847 VG_(printf
)("Unset");
850 VG_(printf
)("Unknown");
853 VG_(printf
)("Stack0 [%#lx,+%lu)",
854 i
->Inv
.Stack0
.addr
, i
->Inv
.Stack0
.szB
);
857 VG_(printf
)("StackN [%#lx,+%lu)",
858 i
->Inv
.StackN
.nd
->addr
, i
->Inv
.StackN
.nd
->szB
);
861 VG_(printf
)("Global [%#lx,+%lu)",
862 i
->Inv
.Global
.nd
->addr
, i
->Inv
.Global
.nd
->szB
);
869 /* Compare two Invars for equality. */
870 static Bool
eq_Invar ( Invar
* i1
, Invar
* i2
)
872 tl_assert(i1
->tag
!= Inv_Unset
);
873 tl_assert(i2
->tag
!= Inv_Unset
);
874 if (i1
->tag
!= i2
->tag
)
880 return i1
->Inv
.Stack0
.addr
== i2
->Inv
.Stack0
.addr
881 && i1
->Inv
.Stack0
.szB
== i2
->Inv
.Stack0
.szB
;
883 return i1
->Inv
.StackN
.nd
== i2
->Inv
.StackN
.nd
;
885 return i1
->Inv
.Global
.nd
== i2
->Inv
.Global
.nd
;
893 /* Generate a piece of text showing 'ea' is relative to 'invar', if
894 known. If unknown, generate an empty string. 'buf' must be at
895 least 32 bytes in size. Also return the absolute value of the
896 delta, if known, or zero if not known.
898 static void gen_delta_str ( /*OUT*/HChar
* buf
,
899 /*OUT*/UWord
* absDelta
,
900 Invar
* inv
, Addr ea
)
911 return; /* unknown */
913 block
= inv
->Inv
.Stack0
.addr
;
914 szB
= inv
->Inv
.Stack0
.szB
;
917 block
= inv
->Inv
.StackN
.nd
->addr
;
918 szB
= inv
->Inv
.StackN
.nd
->szB
;
921 block
= inv
->Inv
.Global
.nd
->addr
;
922 szB
= inv
->Inv
.Global
.nd
->szB
;
929 *absDelta
= block
- ea
;
930 VG_(sprintf
)(buf
, "%'lu before", *absDelta
);
932 else if (ea
>= block
+ szB
) {
933 *absDelta
= ea
- (block
+ szB
);
934 VG_(sprintf
)(buf
, "%'lu after", *absDelta
);
937 // Leave *absDelta at zero.
938 VG_(sprintf
)(buf
, "%'lu inside", ea
- block
);
943 /* Print selected parts of an Invar, suitable for use in error
945 static void show_Invar( HChar
* buf
, Word nBuf
, Invar
* inv
, Word depth
)
948 tl_assert(nBuf
>= 128);
952 VG_(sprintf
)(buf
, "%s", "unknown");
956 VG_(sprintf
)(buf
, "stack %s \"%s\" of size %'lu in this frame",
957 str
, inv
->Inv
.Stack0
.descr
->name
,
958 inv
->Inv
.Stack0
.szB
);
962 VG_(sprintf
)(buf
, "stack %s \"%s\" of size %'lu in frame %lu back from here",
963 str
, inv
->Inv
.StackN
.nd
->descr
->name
,
964 inv
->Inv
.StackN
.nd
->descr
->szB
,
965 depth
- inv
->Inv
.StackN
.nd
->depth
);
969 VG_(sprintf
)(buf
, "global %s \"%s\" of size %'lu in object with soname \"%s\"",
970 str
, inv
->Inv
.Global
.nd
->descr
->name
,
971 inv
->Inv
.Global
.nd
->descr
->szB
,
972 inv
->Inv
.Global
.nd
->descr
->soname
);
975 VG_(sprintf
)(buf
, "%s", "Unset!");
983 //////////////////////////////////////////////////////////////
987 //////////////////////////////////////////////////////////////
989 //////////////////////////////////////////////////////////////
994 /* Powers of two only, else the result will be chaos */
995 #define QCACHE_ADVANCE_EVERY 16
997 /* Per-thread query cache. Note that the invar can only be Inv_StackN
998 (but not Inv_Stack0), Inv_Global or Inv_Unknown. */
1010 QCElem elems
[N_QCACHE
];
1014 static void QCache__invalidate ( QCache
* qc
) {
1015 tl_assert(qc
->nInUse
>= 0);
1019 static void QCache__pp ( QCache
* qc
, HChar
* who
)
1022 VG_(printf
)("<<< QCache with %ld elements (%s)\n", qc
->nInUse
, who
);
1023 for (i
= 0; i
< qc
->nInUse
; i
++) {
1024 VG_(printf
)(" [%#lx,+%#lx) ", qc
->elems
[i
].addr
, qc
->elems
[i
].szB
);
1025 pp_Invar(&qc
->elems
[i
].inv
);
1028 VG_(printf
)(">>>\n");
1031 static ULong stats__qcache_queries
= 0;
1032 static ULong stats__qcache_misses
= 0;
1033 static ULong stats__qcache_probes
= 0;
1036 //////////////////////////////////////////////////////////////
1039 * a shadow stack of StackFrames, which is a double-linked list
1040 * an stack block interval tree
1042 static struct _StackFrame
* shadowStacks
[VG_N_THREADS
];
1044 static WordFM
* /* StackTreeNode */ siTrees
[VG_N_THREADS
];
1046 static QCache qcaches
[VG_N_THREADS
];
1049 /* Additionally, there is one global variable interval tree
1050 for the entire process.
1052 static WordFM
* /* GlobalTreeNode */ giTree
;
1055 static void invalidate_all_QCaches ( void )
1058 for (i
= 0; i
< VG_N_THREADS
; i
++) {
1059 QCache__invalidate( &qcaches
[i
] );
1063 static void ourGlobals_init ( void )
1066 for (i
= 0; i
< VG_N_THREADS
; i
++) {
1067 shadowStacks
[i
] = NULL
;
1070 invalidate_all_QCaches();
1071 giTree
= VG_(newFM
)( sg_malloc
, "di.sg_main.oGi.1", sg_free
,
1072 (Word(*)(UWord
,UWord
))cmp_intervals_GlobalTreeNode
);
1076 //////////////////////////////////////////////////////////////
1078 // Handle global variable load/unload events //
1080 //////////////////////////////////////////////////////////////
1082 static void acquire_globals ( ULong di_handle
)
1085 XArray
* /* of GlobalBlock */ gbs
;
1086 if (0) VG_(printf
)("ACQUIRE GLOBALS %llu\n", di_handle
);
1087 gbs
= VG_(di_get_global_blocks_from_dihandle
)
1088 (di_handle
, True
/*arrays only*/);
1089 if (0) VG_(printf
)(" GOT %ld globals\n", VG_(sizeXA
)( gbs
));
1091 n
= VG_(sizeXA
)( gbs
);
1092 for (i
= 0; i
< n
; i
++) {
1094 GlobalBlock
* gb
= VG_(indexXA
)( gbs
, i
);
1095 if (0) VG_(printf
)(" new Global size %2lu at %#lx: %s %s\n",
1096 gb
->szB
, gb
->addr
, gb
->soname
, gb
->name
);
1097 tl_assert(gb
->szB
> 0);
1098 /* Make a persistent copy of each GlobalBlock, and add it
1100 gbp
= get_persistent_GlobalBlock( gb
);
1101 add_block_to_GlobalTree( giTree
, gbp
);
1104 VG_(deleteXA
)( gbs
);
1108 /* We only intercept these two because we need to see any di_handles
1109 that might arise from the mappings/allocations. */
1110 void sg_new_mem_mmap( Addr a
, SizeT len
,
1111 Bool rr
, Bool ww
, Bool xx
, ULong di_handle
)
1114 acquire_globals(di_handle
);
1116 void sg_new_mem_startup( Addr a
, SizeT len
,
1117 Bool rr
, Bool ww
, Bool xx
, ULong di_handle
)
1120 acquire_globals(di_handle
);
1122 void sg_die_mem_munmap ( Addr a
, SizeT len
)
1124 Bool debug
= (Bool
)0;
1125 Bool overlap
= False
;
1127 if (debug
) VG_(printf
)("MUNMAP %#lx %lu\n", a
, len
);
1132 overlap
= del_GlobalTree_range(giTree
, a
, len
);
1134 { /* redundant sanity check */
1136 VG_(initIterFM
)( giTree
);
1137 while (VG_(nextIterFM
)( giTree
, &keyW
, &valW
)) {
1138 GlobalTreeNode
* nd
= (GlobalTreeNode
*)keyW
;
1139 tl_assert(valW
== 0);
1140 tl_assert(nd
->szB
> 0);
1141 tl_assert(nd
->addr
+ nd
->szB
<= a
1142 || a
+ len
<= nd
->addr
);
1144 VG_(doneIterFM
)( giTree
);
1150 /* Ok, the range contained some blocks. Therefore we'll need to
1151 visit all the Invars in all the thread shadow stacks, and
1152 convert all Inv_Global entries that intersect [a,a+len) to
1155 preen_global_Invars( a
, len
);
1156 invalidate_all_QCaches();
1160 //////////////////////////////////////////////////////////////
1164 //////////////////////////////////////////////////////////////
1166 static ULong stats__total_accesses
= 0;
1167 static ULong stats__classify_Stack0
= 0;
1168 static ULong stats__classify_StackN
= 0;
1169 static ULong stats__classify_Global
= 0;
1170 static ULong stats__classify_Unknown
= 0;
1171 static ULong stats__Invars_preened
= 0;
1172 static ULong stats__Invars_changed
= 0;
1173 static ULong stats__t_i_b_empty
= 0;
1174 static ULong stats__htab_fast
= 0;
1175 static ULong stats__htab_searches
= 0;
1176 static ULong stats__htab_probes
= 0;
1177 static ULong stats__htab_resizes
= 0;
1180 /* A dynamic instance of an instruction */
1184 Addr insn_addr
; /* NB! zero means 'not in use' */
1185 XArray
* blocks
; /* XArray* of StackBlock, or NULL if none */
1192 #define N_HTAB_FIXED 64
1195 struct _StackFrame
{
1196 /* The sp when the frame was created, so we know when to get rid
1199 /* The stack frames for a thread are arranged as a doubly linked
1200 list. Obviously the outermost frame in the stack has .outer
1201 as NULL and the innermost in theory has .inner as NULL.
1202 However, when a function returns, we don't delete the
1203 just-vacated StackFrame. Instead, it is retained in the list
1204 and will be re-used when the next call happens. This is so
1205 as to avoid constantly having to dynamically allocate and
1206 deallocate frames. */
1207 struct _StackFrame
* inner
;
1208 struct _StackFrame
* outer
;
1209 Word depth
; /* 0 for outermost; increases inwards */
1210 /* Information for each memory referencing instruction, for this
1211 instantiation of the function. The iinstances array is
1212 operated as a simple linear-probe hash table, which is
1213 dynamically expanded as necessary. Once critical thing is
1214 that an IInstance with a .insn_addr of zero is interpreted to
1215 mean that hash table slot is unused. This means we can't
1216 store an IInstance for address zero. */
1217 /* Note that htab initially points to htab_fixed. If htab_fixed
1218 turns out not to be big enough then htab is made to point to
1219 dynamically allocated memory. But it's often the case that
1220 htab_fixed is big enough, so this optimisation saves a huge
1221 number of sg_malloc/sg_free call pairs. */
1223 UWord htab_size
; /* size of hash table, MAY ONLY BE A POWER OF 2 */
1224 UWord htab_used
; /* number of hash table slots currently in use */
1225 /* If this frame is currently making a call, then the following
1229 XArray
* /* of Addr */ blocks_added_by_call
;
1230 /* See comment just above */
1231 IInstance htab_fixed
[N_HTAB_FIXED
];
1239 /* Move this somewhere else? */
1240 /* Visit all Invars in the entire system. If 'isHeap' is True, change
1241 all Inv_Heap Invars that intersect [a,a+len) to Inv_Unknown. If
1242 'isHeap' is False, do the same but to the Inv_Global{S,V} Invars
1245 __attribute__((noinline
))
1246 static void preen_global_Invar ( Invar
* inv
, Addr a
, SizeT len
)
1248 stats__Invars_preened
++;
1253 tl_assert(inv
->Inv
.Global
.nd
);
1254 tl_assert(inv
->Inv
.Global
.nd
->szB
> 0);
1255 if (0) VG_(printf
)("preen_Invar Global %#lx %lu\n",
1256 inv
->Inv
.Global
.nd
->addr
,
1257 inv
->Inv
.Global
.nd
->szB
);
1258 if (0 == cmp_nonempty_intervals(a
, len
, inv
->Inv
.Global
.nd
->addr
,
1259 inv
->Inv
.Global
.nd
->szB
)) {
1260 inv
->tag
= Inv_Unknown
;
1261 stats__Invars_changed
++;
1273 __attribute__((noinline
))
1274 static void preen_global_Invars ( Addr a
, SizeT len
)
1280 for (i
= 0; i
< VG_N_THREADS
; i
++) {
1281 frame
= shadowStacks
[i
];
1283 continue; /* no frames for this thread */
1284 /* start from the innermost frame */
1285 while (frame
->inner
)
1286 frame
= frame
->inner
;
1287 tl_assert(frame
->outer
);
1288 /* work through the frames from innermost to outermost. The
1289 order isn't important; we just need to ensure we visit each
1290 frame once (including those which are not actually active,
1291 more 'inner' than the 'innermost active frame', viz, just
1292 hanging around waiting to be used, when the current innermost
1293 active frame makes more calls. See comments on definition of
1294 struct _StackFrame. */
1295 for (; frame
; frame
= frame
->outer
) {
1296 UWord xx
= 0; /* sanity check only; count of used htab entries */
1298 continue; /* frame not in use. See shadowStack_unwind(). */
1299 for (u
= 0; u
< frame
->htab_size
; u
++) {
1300 IInstance
* ii
= &frame
->htab
[u
];
1301 if (ii
->insn_addr
== 0)
1302 continue; /* not in use */
1303 if (0) { pp_Invar(&ii
->invar
); VG_(printf
)(" x\n"); }
1304 preen_global_Invar( &ii
->invar
, a
, len
);
1307 tl_assert(xx
== frame
->htab_used
);
1313 /* XXX this should be >> 2 on ppc32/64 since the bottom two bits
1314 of the ip are guaranteed to be zero */
1315 inline static UWord
compute_II_hash ( Addr ip
, UWord htab_size
) {
1316 return (ip
>> 0) & (htab_size
- 1);
1319 __attribute__((noinline
))
1320 static void initialise_II_hash_table ( StackFrame
* sf
)
1323 sf
->htab_size
= N_HTAB_FIXED
; /* initial hash table size */
1324 sf
->htab
= &sf
->htab_fixed
[0];
1325 tl_assert(sf
->htab
);
1327 for (i
= 0; i
< sf
->htab_size
; i
++)
1328 sf
->htab
[i
].insn_addr
= 0; /* NOT IN USE */
1332 __attribute__((noinline
))
1333 static void resize_II_hash_table ( StackFrame
* sf
)
1335 UWord i
, j
, ix
, old_size
, new_size
;
1336 IInstance
*old_htab
, *new_htab
, *old
;
1338 tl_assert(sf
&& sf
->htab
);
1339 old_size
= sf
->htab_size
;
1340 new_size
= 2 * old_size
;
1341 old_htab
= sf
->htab
;
1342 new_htab
= sg_malloc( "di.sg_main.rIht.1",
1343 new_size
* sizeof(IInstance
) );
1344 for (i
= 0; i
< new_size
; i
++) {
1345 new_htab
[i
].insn_addr
= 0; /* NOT IN USE */
1347 for (i
= 0; i
< old_size
; i
++) {
1349 if (old
->insn_addr
== 0 /* NOT IN USE */)
1351 ix
= compute_II_hash(old
->insn_addr
, new_size
);
1352 /* find out where to put this, in the new table */
1355 if (new_htab
[ix
].insn_addr
== 0)
1357 /* This can't ever happen, because it would mean the new
1358 table is full; that isn't allowed -- even the old table is
1359 only allowed to become half full. */
1362 ix
++; if (ix
== new_size
) ix
= 0;
1364 /* copy the old entry to this location */
1365 tl_assert(ix
< new_size
);
1366 tl_assert(new_htab
[ix
].insn_addr
== 0);
1367 new_htab
[ix
] = *old
;
1368 tl_assert(new_htab
[ix
].insn_addr
!= 0);
1370 /* all entries copied; free old table. */
1371 if (old_htab
!= &sf
->htab_fixed
[0])
1373 sf
->htab
= new_htab
;
1374 sf
->htab_size
= new_size
;
1375 /* check sf->htab_used is correct. Optional and a bit expensive
1378 for (i
= 0; i
< new_size
; i
++) {
1379 if (new_htab
[i
].insn_addr
!= 0) {
1383 tl_assert(j
== sf
->htab_used
);
1384 if (0) VG_(printf
)("resized tab for SF %p to %lu\n", sf
, new_size
);
1388 __attribute__((noinline
))
1389 static IInstance
* find_or_create_IInstance_SLOW (
1392 XArray
* /* StackBlock */ ip_frameblocks
1397 stats__htab_searches
++;
1400 tl_assert(sf
->htab
);
1402 /* Make sure the table loading doesn't get too high. */
1403 if (UNLIKELY(2 * sf
->htab_used
>= 1 * sf
->htab_size
)) {
1404 stats__htab_resizes
++;
1405 resize_II_hash_table(sf
);
1407 tl_assert(2 * sf
->htab_used
<= sf
->htab_size
);
1409 ix
= compute_II_hash(ip
, sf
->htab_size
);
1412 stats__htab_probes
++;
1413 /* Note that because of the way the fast-case handler works,
1414 these two tests are actually redundant in the first iteration
1415 of this loop. (Except they aren't redundant if the code just
1416 above resized the table first. :-) */
1417 if (sf
->htab
[ix
].insn_addr
== ip
)
1418 return &sf
->htab
[ix
];
1419 if (sf
->htab
[ix
].insn_addr
== 0)
1421 /* If i ever gets to zero and we have found neither what we're
1422 looking for nor an empty slot, the table must be full. Which
1423 isn't possible -- we monitor the load factor to ensure it
1424 doesn't get above say 50%; if that ever does happen the table
1429 if (ix
== sf
->htab_size
) ix
= 0;
1432 /* So now we've found a free slot at ix, and we can use that. */
1433 tl_assert(sf
->htab
[ix
].insn_addr
== 0);
1435 /* Add a new record in this slot. */
1436 tl_assert(ip
!= 0); /* CAN'T REPRESENT THIS */
1437 sf
->htab
[ix
].insn_addr
= ip
;
1438 sf
->htab
[ix
].blocks
= ip_frameblocks
;
1439 sf
->htab
[ix
].invar
.tag
= Inv_Unset
;
1441 return &sf
->htab
[ix
];
1446 static IInstance
* find_or_create_IInstance (
1449 XArray
* /* StackBlock */ ip_frameblocks
1452 UWord ix
= compute_II_hash(ip
, sf
->htab_size
);
1453 /* Is it in the first slot we come to? */
1454 if (LIKELY(sf
->htab
[ix
].insn_addr
== ip
)) {
1456 return &sf
->htab
[ix
];
1458 /* If the first slot we come to is empty, bag it. */
1459 if (LIKELY(sf
->htab
[ix
].insn_addr
== 0)) {
1462 sf
->htab
[ix
].insn_addr
= ip
;
1463 sf
->htab
[ix
].blocks
= ip_frameblocks
;
1464 sf
->htab
[ix
].invar
.tag
= Inv_Unset
;
1466 return &sf
->htab
[ix
];
1468 /* Otherwise we hand off to the slow case, which searches other
1469 slots, and optionally resizes the table if necessary. */
1470 return find_or_create_IInstance_SLOW( sf
, ip
, ip_frameblocks
);
1474 __attribute__((noinline
))
1475 static Addr
calculate_StackBlock_EA ( StackBlock
* descr
,
1476 Addr sp
, Addr fp
) {
1477 UWord w1
= (UWord
)descr
->base
;
1478 UWord w2
= (UWord
)(descr
->spRel
? sp
: fp
);
1483 /* Given an array of StackBlocks, return an array of Addrs, holding
1484 their effective addresses. Caller deallocates result array. */
1485 __attribute__((noinline
))
1486 static XArray
* /* Addr */ calculate_StackBlock_EAs (
1487 XArray
* /* StackBlock */ blocks
,
1492 Word i
, n
= VG_(sizeXA
)( blocks
);
1494 res
= VG_(newXA
)( sg_malloc
, "di.sg_main.cSBE.1", sg_free
, sizeof(Addr
) );
1495 for (i
= 0; i
< n
; i
++) {
1496 StackBlock
* blk
= VG_(indexXA
)( blocks
, i
);
1497 Addr ea
= calculate_StackBlock_EA( blk
, sp
, fp
);
1498 VG_(addToXA
)( res
, &ea
);
1504 /* Try to classify the block into which a memory access falls, and
1505 write the result in 'inv'. This writes all relevant fields of
1507 __attribute__((noinline
))
1508 static void classify_address ( /*OUT*/Invar
* inv
,
1510 Addr ea
, Addr sp
, Addr fp
,
1512 XArray
* /* of StackBlock */ thisInstrBlocks
)
1515 /* First, look in the stack blocks accessible in this instruction's
1518 Word i
, nBlocks
= VG_(sizeXA
)( thisInstrBlocks
);
1519 if (nBlocks
== 0) stats__t_i_b_empty
++;
1520 for (i
= 0; i
< nBlocks
; i
++) {
1521 StackBlock
* descr
= VG_(indexXA
)( thisInstrBlocks
, i
);
1522 Addr bea
= calculate_StackBlock_EA( descr
, sp
, fp
);
1523 if (bea
<= ea
&& ea
+ szB
<= bea
+ descr
->szB
) {
1525 inv
->tag
= Inv_Stack0
;
1526 inv
->Inv
.Stack0
.addr
= bea
;
1527 inv
->Inv
.Stack0
.szB
= descr
->szB
;
1528 inv
->Inv
.Stack0
.descr
= descr
;
1529 stats__classify_Stack0
++;
1534 /* Look in this thread's query cache */
1536 QCache
* cache
= &qcaches
[tid
];
1537 static UWord ctr
= 0;
1538 stats__qcache_queries
++;
1539 for (i
= 0; i
< cache
->nInUse
; i
++) {
1540 if (0) /* expensive in a loop like this */
1541 tl_assert(cache
->elems
[i
].addr
+ cache
->elems
[i
].szB
!= 0);
1542 stats__qcache_probes
++;
1543 if (is_subinterval_of(cache
->elems
[i
].addr
,
1544 cache
->elems
[i
].szB
, ea
, szB
)) {
1546 && (ctr
++ & (QCACHE_ADVANCE_EVERY
-1)) == 0) {
1548 tmp
= cache
->elems
[i
-1];
1549 cache
->elems
[i
-1] = cache
->elems
[i
];
1550 cache
->elems
[i
] = tmp
;
1553 *inv
= cache
->elems
[i
].inv
;
1557 stats__qcache_misses
++;
1559 /* Ok, so it's not a block in the top frame. Perhaps it's a block
1560 in some calling frame? Consult this thread's stack-block
1561 interval tree to find out. */
1562 { StackTreeNode
* nd
= find_StackTreeNode( siTrees
[tid
], ea
);
1563 /* We know that [ea,ea+1) is in the block, but we need to
1564 restrict to the case where the whole access falls within
1566 if (nd
&& !is_subinterval_of(nd
->addr
, nd
->szB
, ea
, szB
)) {
1571 inv
->tag
= Inv_StackN
;
1572 inv
->Inv
.StackN
.nd
= nd
;
1573 stats__classify_StackN
++;
1577 /* Not in a stack block. Try the global pool. */
1578 { GlobalTreeNode
* nd
= find_GlobalTreeNode(giTree
, ea
);
1579 /* We know that [ea,ea+1) is in the block, but we need to
1580 restrict to the case where the whole access falls within
1582 if (nd
&& !is_subinterval_of(nd
->addr
, nd
->szB
, ea
, szB
)) {
1587 inv
->tag
= Inv_Global
;
1588 inv
->Inv
.Global
.nd
= nd
;
1589 stats__classify_Global
++;
1593 /* No idea - give up. */
1594 inv
->tag
= Inv_Unknown
;
1595 stats__classify_Unknown
++;
1597 /* Update the cache */
1599 { Addr toadd_addr
= 0;
1600 SizeT toadd_szB
= 0;
1601 QCache
* cache
= &qcaches
[tid
];
1603 static UWord ctr
= 0;
1605 if (0 && 0 == (ctr
++ & 0x1FFFFF)) show
= True
;
1607 if (show
) QCache__pp(cache
, "before upd");
1611 toadd_addr
= inv
->Inv
.Global
.nd
->addr
;
1612 toadd_szB
= inv
->Inv
.Global
.nd
->szB
;
1615 toadd_addr
= inv
->Inv
.StackN
.nd
->addr
;
1616 toadd_szB
= inv
->Inv
.StackN
.nd
->szB
;
1619 /* This is more complex. We need to figure out the
1620 intersection of the "holes" in the global and stack
1621 interval trees into which [ea,ea+szB) falls. This is
1622 further complicated by the fact that [ea,ea+szB) might
1623 not fall cleanly into a hole; it may instead fall across
1624 the boundary of a stack or global block. In that case
1625 we just ignore it and don't update the cache, since we
1626 have no way to represent this situation precisely. */
1627 StackTreeNode sNegInf
, sPosInf
, sKey
, *sLB
, *sUB
;
1628 GlobalTreeNode gNegInf
, gPosInf
, gKey
, *gLB
, *gUB
;
1629 Addr gMin
, gMax
, sMin
, sMax
, uMin
, uMax
;
1633 sPosInf
.addr
= ~(UWord
)0;
1637 gPosInf
.addr
= ~(UWord
)0;
1643 if (0) VG_(printf
)("Tree sizes %ld %ld\n",
1644 VG_(sizeFM
)(siTrees
[tid
]), VG_(sizeFM
)(giTree
));
1645 sOK
= VG_(findBoundsFM
)( siTrees
[tid
],
1646 (UWord
*)&sLB
, NULL
/*unused*/,
1647 (UWord
*)&sUB
, NULL
/*unused*/,
1648 (UWord
)&sNegInf
, 0/*unused*/,
1649 (UWord
)&sPosInf
, 0/*unused*/,
1651 gOK
= VG_(findBoundsFM
)( giTree
,
1652 (UWord
*)&gLB
, NULL
/*unused*/,
1653 (UWord
*)&gUB
, NULL
/*unused*/,
1654 (UWord
)&gNegInf
, 0/*unused*/,
1655 (UWord
)&gPosInf
, 0/*unused*/,
1657 if (!(sOK
&& gOK
)) {
1658 /* If this happens, then [ea,ea+szB) partially overlaps
1659 a heap or stack block. We can't represent that, so
1660 just forget it (should be very rare). However, do
1661 maximum sanity checks first. In such a
1662 partial overlap case, it can't be the case that both
1663 [ea] and [ea+szB-1] overlap the same block, since if
1664 that were indeed the case then it wouldn't be a
1665 partial overlap; rather it would simply fall inside
1666 that block entirely and we shouldn't be inside this
1667 conditional at all. */
1669 StackTreeNode
*ndFirst
, *ndLast
;
1670 ndFirst
= find_StackTreeNode( siTrees
[tid
], ea
);
1671 ndLast
= find_StackTreeNode( siTrees
[tid
], ea
+szB
-1 );
1672 /* if both ends of the range fall inside a block,
1673 they can't be in the same block. */
1674 if (ndFirst
&& ndLast
)
1675 tl_assert(ndFirst
!= ndLast
);
1676 /* for each end of the range, if it is in a block,
1677 the range as a whole can't be entirely within the
1680 tl_assert(!is_subinterval_of(ndFirst
->addr
,
1681 ndFirst
->szB
, ea
, szB
));
1683 tl_assert(!is_subinterval_of(ndLast
->addr
,
1684 ndLast
->szB
, ea
, szB
));
1687 GlobalTreeNode
*ndFirst
, *ndLast
;
1688 ndFirst
= find_GlobalTreeNode( giTree
, ea
);
1689 ndLast
= find_GlobalTreeNode( giTree
, ea
+szB
-1 );
1690 /* if both ends of the range fall inside a block,
1691 they can't be in the same block. */
1692 if (ndFirst
&& ndLast
)
1693 tl_assert(ndFirst
!= ndLast
);
1694 /* for each end of the range, if it is in a block,
1695 the range as a whole can't be entirely within the
1698 tl_assert(!is_subinterval_of(ndFirst
->addr
,
1699 ndFirst
->szB
, ea
, szB
));
1701 tl_assert(!is_subinterval_of(ndLast
->addr
,
1702 ndLast
->szB
, ea
, szB
));
1704 if (0) VG_(printf
)("overlapping blocks in cache\n");
1707 sMin
= sLB
== &sNegInf
? 0 : (sLB
->addr
+ sLB
->szB
);
1708 sMax
= sUB
== &sPosInf
? ~(UWord
)0 : (sUB
->addr
- 1);
1709 gMin
= gLB
== &gNegInf
? 0 : (gLB
->addr
+ gLB
->szB
);
1710 gMax
= gUB
== &gPosInf
? ~(UWord
)0 : (gUB
->addr
- 1);
1711 if (0) VG_(printf
)("sMin %lx sMax %lx gMin %lx gMax %lx\n",
1712 sMin
, sMax
, gMin
, gMax
);
1713 /* [sMin,sMax] and [gMin,gMax] must both contain
1714 [ea,ea+szB) (right?) That implies they must overlap at
1715 at least over [ea,ea+szB). */
1716 tl_assert(sMin
<= ea
&& ea
+szB
-1 <= sMax
);
1717 tl_assert(gMin
<= ea
&& ea
+szB
-1 <= gMax
);
1718 /* So now compute their intersection. */
1719 uMin
= Addr__max( sMin
, gMin
);
1720 uMax
= Addr__min( sMax
, gMax
);
1721 if (0) VG_(printf
)("uMin %lx uMax %lx\n", uMin
, uMax
);
1722 tl_assert(uMin
<= uMax
);
1723 tl_assert(uMin
<= ea
&& ea
+szB
-1 <= uMax
);
1724 /* Finally, we can park [uMin,uMax] in the cache. However,
1725 if uMax is ~0, we can't represent the difference; hence
1727 if (uMin
< uMax
&& uMax
== ~(UWord
)0)
1730 toadd_szB
= uMax
- uMin
+ 1;
1734 /* We should only be caching info for the above 3 cases */
1736 } /* switch (inv->tag) */
1738 { /* and actually add this to the cache, finally */
1740 Word ip
= cache
->nInUse
/ 2; /* doesn't seem critical */
1742 if (cache
->nInUse
< N_QCACHE
)
1744 for (i
= cache
->nInUse
-1; i
> ip
; i
--) {
1745 cache
->elems
[i
] = cache
->elems
[i
-1];
1748 tl_assert(toadd_szB
> 0);
1749 cache
->elems
[ip
].addr
= toadd_addr
;
1750 cache
->elems
[ip
].szB
= toadd_szB
;
1751 cache
->elems
[ip
].inv
= *inv
;
1754 if (show
) QCache__pp(cache
, "after upd");
1760 /* CALLED FROM GENERATED CODE */
1763 void helperc__mem_access ( /* Known only at run time: */
1764 Addr ea
, Addr sp
, Addr fp
,
1765 /* Known at translation time: */
1766 Word sszB
, Addr ip
, XArray
* ip_frameBlocks
)
1769 IInstance
* iinstance
;
1772 ThreadId tid
= VG_(get_running_tid
)();
1774 HChar bufE
[160], bufA
[160], bufD
[32];
1776 stats__total_accesses
++;
1778 tl_assert(is_sane_TId(tid
));
1779 frame
= shadowStacks
[tid
];
1782 /* Find the instance info for this instruction. */
1783 tl_assert(ip_frameBlocks
);
1784 iinstance
= find_or_create_IInstance( frame
, ip
, ip_frameBlocks
);
1785 tl_assert(iinstance
);
1786 tl_assert(iinstance
->blocks
== ip_frameBlocks
);
1788 szB
= (sszB
< 0) ? (-sszB
) : sszB
;
1791 inv
= &iinstance
->invar
;
1793 /* Deal with first uses of instruction instances. */
1794 if (inv
->tag
== Inv_Unset
) {
1795 /* This is the first use of this instance of the instruction, so
1796 we can't make any check; we merely record what we saw, so we
1797 can compare it against what happens for 2nd and subsequent
1799 classify_address( inv
,
1800 tid
, ea
, sp
, fp
, szB
, iinstance
->blocks
);
1801 tl_assert(inv
->tag
!= Inv_Unset
);
1805 /* So generate an Invar and see if it's different from what
1807 classify_address( &new_inv
,
1808 tid
, ea
, sp
, fp
, szB
, iinstance
->blocks
);
1809 tl_assert(new_inv
.tag
!= Inv_Unset
);
1811 /* Did we see something different from before? If no, then there's
1813 if (LIKELY(eq_Invar(&new_inv
, inv
)))
1816 tl_assert(inv
->tag
!= Inv_Unset
);
1818 VG_(memset
)(bufE
, 0, sizeof(bufE
));
1819 show_Invar( bufE
, sizeof(bufE
)-1, inv
, frame
->depth
);
1821 VG_(memset
)(bufA
, 0, sizeof(bufA
));
1822 show_Invar( bufA
, sizeof(bufA
)-1, &new_inv
, frame
->depth
);
1824 VG_(memset
)(bufD
, 0, sizeof(bufD
));
1826 gen_delta_str( bufD
, &absDelta
, inv
, ea
);
1828 if (absDelta
< 1024)
1829 sg_record_error_SorG( tid
, ea
, sszB
, bufE
, bufA
, bufD
);
1831 /* And now install the new observation as "standard", so as to
1832 make future error messages make more sense. */
1837 ////////////////////////////////////////
1838 /* Primary push-a-new-frame routine. Called indirectly from
1841 static UWord stats__max_sitree_size
= 0;
1842 static UWord stats__max_gitree_size
= 0;
1845 void shadowStack_new_frame ( ThreadId tid
,
1846 Addr sp_at_call_insn
,
1847 Addr sp_post_call_insn
,
1848 Addr fp_at_call_insn
,
1849 Addr ip_post_call_insn
,
1850 XArray
* descrs_at_call_insn
)
1852 StackFrame
*callee
, *caller
;
1853 tl_assert(is_sane_TId(tid
));
1855 caller
= shadowStacks
[tid
];
1858 if (caller
->outer
) { /* "this is not the outermost frame" */
1859 tl_assert(caller
->outer
->inner
== caller
);
1860 tl_assert(caller
->outer
->depth
>= 0);
1861 tl_assert(1 + caller
->outer
->depth
== caller
->depth
);
1863 tl_assert(caller
->depth
== 0);
1866 caller
->sp_at_call
= sp_at_call_insn
;
1867 caller
->fp_at_call
= fp_at_call_insn
;
1869 if (descrs_at_call_insn
) {
1870 tl_assert( VG_(sizeXA
)(descrs_at_call_insn
) > 0 );
1871 caller
->blocks_added_by_call
1872 = calculate_StackBlock_EAs( descrs_at_call_insn
,
1873 sp_at_call_insn
, fp_at_call_insn
);
1874 if (caller
->blocks_added_by_call
)
1875 add_blocks_to_StackTree( siTrees
[tid
],
1876 descrs_at_call_insn
,
1877 caller
->blocks_added_by_call
,
1878 caller
->depth
/* stack depth at which
1880 considered to exist*/ );
1882 UWord s
= VG_(sizeFM
)( siTrees
[tid
] );
1883 UWord g
= VG_(sizeFM
)( giTree
);
1884 Bool sb
= s
> stats__max_sitree_size
;
1885 Bool gb
= g
> stats__max_gitree_size
;
1886 if (sb
) stats__max_sitree_size
= s
;
1887 if (gb
) stats__max_gitree_size
= g
;
1888 if (0 && (sb
|| gb
))
1889 VG_(message
)(Vg_DebugMsg
,
1890 "exp-sgcheck: new max tree sizes: "
1891 "StackTree %ld, GlobalTree %ld\n",
1892 stats__max_sitree_size
, stats__max_gitree_size
);
1895 caller
->blocks_added_by_call
= NULL
;
1898 /* caller->blocks_added_by_call is used again (and then freed) when
1899 this frame is removed from the stack. */
1901 if (caller
->inner
) {
1902 callee
= caller
->inner
;
1904 callee
= sg_malloc("di.sg_main.sSnf.1", sizeof(StackFrame
));
1905 VG_(memset
)(callee
, 0, sizeof(StackFrame
));
1906 callee
->outer
= caller
;
1907 caller
->inner
= callee
;
1908 callee
->depth
= 1 + caller
->depth
;
1909 tl_assert(callee
->inner
== NULL
);
1912 /* This sets up .htab, .htab_size and .htab_used */
1913 initialise_II_hash_table( callee
);
1915 callee
->creation_sp
= sp_post_call_insn
;
1916 callee
->sp_at_call
= 0; // not actually required ..
1917 callee
->fp_at_call
= 0; // .. these 3 initialisations are ..
1918 callee
->blocks_added_by_call
= NULL
; // .. just for cleanness
1920 /* record the new running stack frame */
1921 shadowStacks
[tid
] = callee
;
1923 /* and this thread's query cache is now invalid */
1924 QCache__invalidate( &qcaches
[tid
] );
1927 { Word d
= callee
->depth
;
1930 Addr ip
= ip_post_call_insn
;
1931 ok
= VG_(get_fnname_w_offset
)( ip
, fnname
, sizeof(fnname
) );
1936 VG_(printf
)("> %s %#lx\n", ok
? fnname
: "???", ip
);
1940 /* CALLED FROM GENERATED CODE */
1943 void helperc__new_frame ( Addr sp_post_call_insn
,
1944 Addr fp_at_call_insn
,
1945 Addr ip_post_call_insn
,
1946 XArray
* blocks_at_call_insn
,
1949 ThreadId tid
= VG_(get_running_tid
)();
1950 Addr sp_at_call_insn
= sp_post_call_insn
+ sp_adjust
;
1951 shadowStack_new_frame( tid
,
1956 blocks_at_call_insn
);
1960 ////////////////////////////////////////
1961 /* Primary remove-frame(s) routine. Called indirectly from
1964 __attribute__((noinline
))
1965 static void shadowStack_unwind ( ThreadId tid
, Addr sp_now
)
1967 StackFrame
*innermost
, *innermostOrig
;
1968 tl_assert(is_sane_TId(tid
));
1969 innermost
= shadowStacks
[tid
];
1970 tl_assert(innermost
);
1971 innermostOrig
= innermost
;
1972 //VG_(printf)("UNWIND sp_new = %p\n", sp_now);
1974 if (!innermost
->outer
)
1976 if (innermost
->inner
)
1977 tl_assert(innermost
->inner
->outer
== innermost
);
1978 tl_assert(innermost
->outer
->inner
== innermost
);
1979 tl_assert(innermost
->blocks_added_by_call
== NULL
);
1980 if (sp_now
<= innermost
->creation_sp
) break;
1981 //VG_(printf)("UNWIND dump %p\n", innermost->creation_sp);
1982 tl_assert(innermost
->htab
);
1983 if (innermost
->htab
!= &innermost
->htab_fixed
[0])
1984 sg_free(innermost
->htab
);
1985 /* be on the safe side */
1986 innermost
->creation_sp
= 0;
1987 innermost
->htab
= NULL
;
1988 innermost
->htab_size
= 0;
1989 innermost
->htab_used
= 0;
1990 innermost
->sp_at_call
= 0;
1991 innermost
->fp_at_call
= 0;
1992 innermost
->blocks_added_by_call
= NULL
;
1993 innermost
= innermost
->outer
;
1995 /* So now we're "back" in the calling frame. Remove from this
1996 thread's stack-interval-tree, the blocks added at the time of
1999 if (innermost
->outer
) { /* not at the outermost frame */
2000 if (innermost
->blocks_added_by_call
== NULL
) {
2002 del_blocks_from_StackTree( siTrees
[tid
],
2003 innermost
->blocks_added_by_call
);
2004 VG_(deleteXA
)( innermost
->blocks_added_by_call
);
2005 innermost
->blocks_added_by_call
= NULL
;
2008 /* That completes the required tidying of the interval tree
2009 associated with the frame we just removed. */
2012 Word d
= innermost
->depth
;
2022 tl_assert(innermost
);
2024 if (innermost
!= innermostOrig
) {
2025 shadowStacks
[tid
] = innermost
;
2026 /* this thread's query cache is now invalid */
2027 QCache__invalidate( &qcaches
[tid
] );
2033 //////////////////////////////////////////////////////////////
2035 // Instrumentation //
2037 //////////////////////////////////////////////////////////////
2039 /* What does instrumentation need to do?
2041 - at each Call transfer, generate a call to shadowStack_new_frame
2042 do this by manually inspecting the IR
2044 - at each sp change, if the sp change is negative,
2045 call shadowStack_unwind
2046 do this by asking for SP-change analysis
2048 - for each memory referencing instruction,
2049 call helperc__mem_access
2052 /* A complication: sg_ instrumentation and h_ instrumentation need to
2053 be interleaved. Since the latter is a lot more complex than the
2054 former, we split the sg_ instrumentation here into four functions
2055 and let the h_ instrumenter call the four functions as it goes.
2056 Hence the h_ instrumenter drives the sg_ instrumenter.
2058 To make this viable, the sg_ instrumenter carries what running
2059 state it needs in 'struct _SGEnv'. This is exported only
2060 abstractly from this file.
2064 /* the current insn's IP */
2066 /* whether the above is actually known */
2068 /* if we find a mem ref, is it the first for this insn? Used for
2069 detecting insns which make more than one memory ref, a situation
2070 we basically can't really handle properly; and so we ignore all
2071 but the first ref. */
2074 IRTemp (*newIRTemp_cb
)(IRType
,void*);
2075 void* newIRTemp_opaque
;
2079 /* --- Helper fns for instrumentation --- */
2081 static IRTemp
gen_Get_SP ( struct _SGEnv
* sge
,
2083 VexGuestLayout
* layout
,
2089 /* This in effect forces the host and guest word sizes to be the
2091 tl_assert(hWordTy_szB
== layout
->sizeof_SP
);
2092 sp_type
= layout
->sizeof_SP
== 8 ? Ity_I64
: Ity_I32
;
2093 sp_expr
= IRExpr_Get( layout
->offset_SP
, sp_type
);
2094 sp_temp
= sge
->newIRTemp_cb( sp_type
, sge
->newIRTemp_opaque
);
2095 addStmtToIRSB( bbOut
, IRStmt_WrTmp( sp_temp
, sp_expr
) );
2099 static IRTemp
gen_Get_FP ( struct _SGEnv
* sge
,
2101 VexGuestLayout
* layout
,
2107 /* This in effect forces the host and guest word sizes to be the
2109 tl_assert(hWordTy_szB
== layout
->sizeof_SP
);
2110 fp_type
= layout
->sizeof_FP
== 8 ? Ity_I64
: Ity_I32
;
2111 fp_expr
= IRExpr_Get( layout
->offset_FP
, fp_type
);
2112 fp_temp
= sge
->newIRTemp_cb( fp_type
, sge
->newIRTemp_opaque
);
2113 addStmtToIRSB( bbOut
, IRStmt_WrTmp( fp_temp
, fp_expr
) );
2117 static void instrument_mem_access ( struct _SGEnv
* sge
,
2124 VexGuestLayout
* layout
)
2126 IRType tyAddr
= Ity_INVALID
;
2127 XArray
* frameBlocks
= NULL
;
2129 tl_assert(isIRAtom(addr
));
2130 tl_assert(hWordTy_szB
== 4 || hWordTy_szB
== 8);
2132 tyAddr
= typeOfIRExpr( bbOut
->tyenv
, addr
);
2133 tl_assert(tyAddr
== Ity_I32
|| tyAddr
== Ity_I64
);
2135 #if defined(VGA_x86)
2136 { UChar
* p
= (UChar
*)curr_IP
;
2138 if (p
[0] == 0xc3 && p
[-1] == 0x5d) return;
2139 // pop %ebp; RET $imm16
2140 if (p
[0] == 0xc2 && p
[-1] == 0x5d) return;
2141 // PUSH %EBP; mov %esp,%ebp
2142 if (p
[0] == 0x55 && p
[1] == 0x89 && p
[2] == 0xe5) return;
2146 /* First off, find or create the StackBlocks for this instruction. */
2147 frameBlocks
= get_StackBlocks_for_IP( curr_IP
);
2148 tl_assert(frameBlocks
);
2149 //if (VG_(sizeXA)( frameBlocks ) == 0)
2150 // frameBlocks = NULL;
2152 /* Generate a call to "helperc__mem_access", passing:
2153 addr current_SP current_FP szB curr_IP frameBlocks
2155 { IRTemp t_SP
= gen_Get_SP( sge
, bbOut
, layout
, hWordTy_szB
);
2156 IRTemp t_FP
= gen_Get_FP( sge
, bbOut
, layout
, hWordTy_szB
);
2158 = mkIRExprVec_6( addr
,
2161 mkIRExpr_HWord( isStore
? (-szB
) : szB
),
2162 mkIRExpr_HWord( curr_IP
),
2163 mkIRExpr_HWord( (HWord
)frameBlocks
) );
2165 = unsafeIRDirty_0_N( 3/*regparms*/,
2166 "helperc__mem_access",
2167 VG_(fnptr_to_fnentry
)( &helperc__mem_access
),
2170 addStmtToIRSB( bbOut
, IRStmt_Dirty(di
) );
2175 /* --- Instrumentation main (4 fns) --- */
2177 struct _SGEnv
* sg_instrument_init ( IRTemp (*newIRTemp_cb
)(IRType
,void*),
2178 void* newIRTemp_opaque
)
2180 struct _SGEnv
* env
= sg_malloc("di.sg_main.sii.1",
2181 sizeof(struct _SGEnv
));
2184 env
->curr_IP_known
= False
;
2185 env
->firstRef
= True
;
2186 env
->newIRTemp_cb
= newIRTemp_cb
;
2187 env
->newIRTemp_opaque
= newIRTemp_opaque
;
2191 void sg_instrument_fini ( struct _SGEnv
* env
)
2196 /* Add instrumentation for 'st' to 'sbOut', and possibly modify 'env'
2197 as required. This must be called before 'st' itself is added to
2199 void sg_instrument_IRStmt ( /*MOD*/struct _SGEnv
* env
,
2202 VexGuestLayout
* layout
,
2203 IRType gWordTy
, IRType hWordTy
)
2205 if (!sg_clo_enable_sg_checks
)
2209 tl_assert(isFlatIRStmt(st
));
2216 /* None of these can contain any memory references. */
2220 tl_assert(st
->Ist
.Exit
.jk
!= Ijk_Call
);
2221 /* else we must deal with a conditional call */
2225 env
->curr_IP_known
= True
;
2226 env
->curr_IP
= (Addr
)st
->Ist
.IMark
.addr
;
2227 env
->firstRef
= True
;
2231 tl_assert(env
->curr_IP_known
);
2232 if (env
->firstRef
) {
2233 instrument_mem_access(
2236 sizeofIRType(typeOfIRExpr(sbOut
->tyenv
, st
->Ist
.Store
.data
)),
2238 sizeofIRType(hWordTy
),
2239 env
->curr_IP
, layout
2241 env
->firstRef
= False
;
2246 IRExpr
* data
= st
->Ist
.WrTmp
.data
;
2247 if (data
->tag
== Iex_Load
) {
2248 tl_assert(env
->curr_IP_known
);
2249 if (env
->firstRef
) {
2250 instrument_mem_access(
2252 data
->Iex
.Load
.addr
,
2253 sizeofIRType(data
->Iex
.Load
.ty
),
2255 sizeofIRType(hWordTy
),
2256 env
->curr_IP
, layout
2258 env
->firstRef
= False
;
2266 IRDirty
* d
= st
->Ist
.Dirty
.details
;
2267 if (d
->mFx
!= Ifx_None
) {
2268 /* This dirty helper accesses memory. Collect the
2270 tl_assert(env
->curr_IP_known
);
2271 if (env
->firstRef
) {
2272 tl_assert(d
->mAddr
!= NULL
);
2273 tl_assert(d
->mSize
!= 0);
2274 dataSize
= d
->mSize
;
2275 if (d
->mFx
== Ifx_Read
|| d
->mFx
== Ifx_Modify
) {
2276 instrument_mem_access(
2277 env
, sbOut
, d
->mAddr
, dataSize
, False
/*!isStore*/,
2278 sizeofIRType(hWordTy
), env
->curr_IP
, layout
2281 if (d
->mFx
== Ifx_Write
|| d
->mFx
== Ifx_Modify
) {
2282 instrument_mem_access(
2283 env
, sbOut
, d
->mAddr
, dataSize
, True
/*isStore*/,
2284 sizeofIRType(hWordTy
), env
->curr_IP
, layout
2287 env
->firstRef
= False
;
2290 tl_assert(d
->mAddr
== NULL
);
2291 tl_assert(d
->mSize
== 0);
2297 /* We treat it as a read and a write of the location. I
2298 think that is the same behaviour as it was before IRCAS
2299 was introduced, since prior to that point, the Vex front
2300 ends would translate a lock-prefixed instruction into a
2301 (normal) read followed by a (normal) write. */
2302 if (env
->firstRef
) {
2304 IRCAS
* cas
= st
->Ist
.CAS
.details
;
2305 tl_assert(cas
->addr
!= NULL
);
2306 tl_assert(cas
->dataLo
!= NULL
);
2307 dataSize
= sizeofIRType(typeOfIRExpr(sbOut
->tyenv
, cas
->dataLo
));
2308 if (cas
->dataHi
!= NULL
)
2309 dataSize
*= 2; /* since it's a doubleword-CAS */
2310 instrument_mem_access(
2311 env
, sbOut
, cas
->addr
, dataSize
, False
/*!isStore*/,
2312 sizeofIRType(hWordTy
), env
->curr_IP
, layout
2314 instrument_mem_access(
2315 env
, sbOut
, cas
->addr
, dataSize
, True
/*isStore*/,
2316 sizeofIRType(hWordTy
), env
->curr_IP
, layout
2318 env
->firstRef
= False
;
2326 } /* switch (st->tag) */
2330 /* Add instrumentation for the final jump of an IRSB 'sbOut', and
2331 possibly modify 'env' as required. This must be the last
2332 instrumentation statement in the block. */
2333 void sg_instrument_final_jump ( /*MOD*/struct _SGEnv
* env
,
2336 IRJumpKind jumpkind
,
2337 VexGuestLayout
* layout
,
2338 IRType gWordTy
, IRType hWordTy
)
2340 if (!sg_clo_enable_sg_checks
)
2343 if (jumpkind
== Ijk_Call
) {
2344 // Assumes x86 or amd64
2345 IRTemp sp_post_call_insn
, fp_post_call_insn
;
2346 XArray
* frameBlocks
;
2350 = gen_Get_SP( env
, sbOut
, layout
, sizeofIRType(hWordTy
) );
2352 = gen_Get_FP( env
, sbOut
, layout
, sizeofIRType(hWordTy
) );
2353 tl_assert(env
->curr_IP_known
);
2354 frameBlocks
= get_StackBlocks_for_IP( env
->curr_IP
);
2355 tl_assert(frameBlocks
);
2356 if (VG_(sizeXA
)(frameBlocks
) == 0)
2360 IRExpr_RdTmp(sp_post_call_insn
),
2361 IRExpr_RdTmp(fp_post_call_insn
),
2362 /* assume the call doesn't change FP */
2364 mkIRExpr_HWord( (HWord
)frameBlocks
),
2365 mkIRExpr_HWord( sizeofIRType(gWordTy
) )
2367 di
= unsafeIRDirty_0_N(
2369 "helperc__new_frame",
2370 VG_(fnptr_to_fnentry
)( &helperc__new_frame
),
2372 addStmtToIRSB( sbOut
, IRStmt_Dirty(di
) );
2377 //////////////////////////////////////////////////////////////
2379 // end Instrumentation //
2381 //////////////////////////////////////////////////////////////
2384 //////////////////////////////////////////////////////////////
2388 //////////////////////////////////////////////////////////////
2390 /* Make a new empty stack frame that is suitable for being the
2391 outermost frame in a stack. It has a creation_sp of effectively
2392 infinity, so it can never be removed. */
2393 static StackFrame
* new_root_StackFrame ( void )
2395 StackFrame
* sframe
= sg_malloc("di.sg_main.nrS.1", sizeof(StackFrame
));
2396 VG_(memset
)( sframe
, 0, sizeof(*sframe
) );
2397 sframe
->creation_sp
= ~0UL;
2399 /* This sets up .htab, .htab_size and .htab_used */
2400 initialise_II_hash_table( sframe
);
2402 /* ->depth, ->outer, ->inner are 0, NULL, NULL */
2407 /* Primary routine for setting up the shadow stack for a new thread.
2408 Note that this is used to create not only child thread stacks, but
2409 the root thread's stack too. We create a new stack with
2410 .creation_sp set to infinity, so that the outermost frame can never
2411 be removed (by shadowStack_unwind). The core calls this function
2412 as soon as a thread is created. We cannot yet get its SP value,
2413 since that may not yet be set. */
2414 static void shadowStack_thread_create ( ThreadId parent
, ThreadId child
)
2416 tl_assert(is_sane_TId(child
));
2417 if (parent
== VG_INVALID_THREADID
) {
2418 /* creating the main thread's stack */
2420 tl_assert(is_sane_TId(parent
));
2421 tl_assert(parent
!= child
);
2422 tl_assert(shadowStacks
[parent
] != NULL
);
2423 tl_assert(siTrees
[parent
] != NULL
);
2426 /* Create the child's stack. Bear in mind we may be re-using
2428 if (shadowStacks
[child
] == NULL
) {
2429 /* First use of this stack. Just allocate an initial frame. */
2430 tl_assert(siTrees
[child
] == NULL
);
2432 StackFrame
*frame
, *frame2
;
2433 /* re-using a stack. */
2434 /* get rid of the interval tree */
2435 tl_assert(siTrees
[child
] != NULL
);
2436 delete_StackTree( siTrees
[child
] );
2437 siTrees
[child
] = NULL
;
2438 /* Throw away all existing frames. */
2439 frame
= shadowStacks
[child
];
2440 while (frame
->outer
)
2441 frame
= frame
->outer
;
2442 tl_assert(frame
->depth
== 0);
2444 frame2
= frame
->inner
;
2445 if (frame2
) tl_assert(1 + frame
->depth
== frame2
->depth
);
2449 shadowStacks
[child
] = NULL
;
2452 tl_assert(shadowStacks
[child
] == NULL
);
2453 tl_assert(siTrees
[child
] == NULL
);
2455 /* Set up the initial stack frame. */
2456 shadowStacks
[child
] = new_root_StackFrame();
2458 /* and set up the child's stack block interval tree. */
2459 siTrees
[child
] = new_StackTree();
2462 /* Once a thread is ready to go, the core calls here. We take the
2463 opportunity to push a second frame on its stack, with the
2464 presumably valid SP value that is going to be used for the thread's
2465 startup. Hence we should always wind up with a valid outermost
2466 frame for the thread. */
2467 static void shadowStack_set_initial_SP ( ThreadId tid
)
2470 tl_assert(is_sane_TId(tid
));
2471 sf
= shadowStacks
[tid
];
2472 tl_assert(sf
!= NULL
);
2473 tl_assert(sf
->outer
== NULL
);
2474 tl_assert(sf
->inner
== NULL
);
2475 tl_assert(sf
->creation_sp
== ~0UL);
2476 shadowStack_new_frame( tid
, 0, VG_(get_SP
)(tid
),
2477 0, VG_(get_IP
)(tid
), NULL
);
2481 //////////////////////////////////////////////////////////////
2485 //////////////////////////////////////////////////////////////
2487 /* CALLED indirectly FROM GENERATED CODE. Calls here are created by
2488 sp-change analysis, as requested in pc_pre_clo_int(). */
2489 void sg_die_mem_stack ( Addr old_SP
, SizeT len
) {
2490 ThreadId tid
= VG_(get_running_tid
)();
2491 shadowStack_unwind( tid
, old_SP
+len
);
2494 void sg_pre_clo_init ( void ) {
2496 init_StackBlocks_set();
2497 init_GlobalBlock_set();
2500 void sg_post_clo_init ( void ) {
2503 void sg_pre_thread_ll_create ( ThreadId parent
, ThreadId child
) {
2504 shadowStack_thread_create(parent
, child
);
2507 void sg_pre_thread_first_insn ( ThreadId tid
) {
2508 shadowStack_set_initial_SP(tid
);
2511 void sg_fini(Int exitcode
)
2513 if (VG_(clo_stats
)) {
2514 VG_(message
)(Vg_DebugMsg
,
2515 " sg_: %'llu total accesses, of which:\n", stats__total_accesses
);
2516 VG_(message
)(Vg_DebugMsg
,
2517 " sg_: stack0: %'12llu classify\n",
2518 stats__classify_Stack0
);
2519 VG_(message
)(Vg_DebugMsg
,
2520 " sg_: stackN: %'12llu classify\n",
2521 stats__classify_StackN
);
2522 VG_(message
)(Vg_DebugMsg
,
2523 " sg_: global: %'12llu classify\n",
2524 stats__classify_Global
);
2525 VG_(message
)(Vg_DebugMsg
,
2526 " sg_: unknown: %'12llu classify\n",
2527 stats__classify_Unknown
);
2528 VG_(message
)(Vg_DebugMsg
,
2529 " sg_: %'llu Invars preened, of which %'llu changed\n",
2530 stats__Invars_preened
, stats__Invars_changed
);
2531 VG_(message
)(Vg_DebugMsg
,
2532 " sg_: t_i_b_MT: %'12llu\n", stats__t_i_b_empty
);
2533 VG_(message
)(Vg_DebugMsg
,
2534 " sg_: qcache: %'llu searches, %'llu probes, %'llu misses\n",
2535 stats__qcache_queries
, stats__qcache_probes
, stats__qcache_misses
);
2536 VG_(message
)(Vg_DebugMsg
,
2537 " sg_: htab-fast: %'llu hits\n",
2539 VG_(message
)(Vg_DebugMsg
,
2540 " sg_: htab-slow: %'llu searches, %'llu probes, %'llu resizes\n",
2541 stats__htab_searches
, stats__htab_probes
, stats__htab_resizes
);
2546 /*--------------------------------------------------------------------*/
2547 /*--- end sg_main.c ---*/
2548 /*--------------------------------------------------------------------*/