Add missing zstd.h to coregrind Makefile.am noinst_HEADERS
[valgrind.git] / coregrind / m_stacktrace.c
blob630b5b87524c236106b805be643d1a65a5f70a11
2 /*--------------------------------------------------------------------*/
3 /*--- Take snapshots of client stacks. m_stacktrace.c ---*/
4 /*--------------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2000-2017 Julian Seward
11 jseward@acm.org
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
29 #include "pub_core_basics.h"
30 #include "pub_core_vki.h"
31 #include "pub_core_threadstate.h"
32 #include "pub_core_debuginfo.h" // XXX: circular dependency
33 #include "pub_core_aspacemgr.h" // For VG_(is_addressable)()
34 #include "pub_core_libcbase.h"
35 #include "pub_core_libcassert.h"
36 #include "pub_core_libcprint.h"
37 #include "pub_core_machine.h"
38 #include "pub_core_options.h"
39 #include "pub_core_stacks.h" // VG_(stack_limits)
40 #include "pub_core_stacktrace.h"
41 #include "pub_core_syswrap.h" // VG_(is_in_syscall)
42 #include "pub_core_xarray.h"
43 #include "pub_core_clientstate.h" // VG_(client__dl_sysinfo_int80)
44 #include "pub_core_trampoline.h"
45 #include "config.h"
48 /*------------------------------------------------------------*/
49 /*--- ---*/
50 /*--- BEGIN platform-dependent unwinder worker functions ---*/
51 /*--- ---*/
52 /*------------------------------------------------------------*/
54 /* Take a snapshot of the client's stack, putting up to 'max_n_ips'
55 IPs into 'ips'. In order to be thread-safe, we pass in the
56 thread's IP SP, FP if that's meaningful, and LR if that's
57 meaningful. Returns number of IPs put in 'ips'.
59 If you know what the thread ID for this stack is, send that as the
60 first parameter, else send zero. This helps generate better stack
61 traces on ppc64-linux and has no effect on other platforms.
64 /* Do frame merging in the _i frames in _ips array of recursive cycles
65 of up to _nframes. The merge is done during stack unwinding
66 (i.e. in platform specific unwinders) to collect as many
67 "interesting" stack traces as possible. */
68 #define RECURSIVE_MERGE(_nframes,_ips,_i) if (UNLIKELY(_nframes > 0)) \
69 do { \
70 Int dist; \
71 for (dist = 1; dist <= _nframes && dist < (Int)_i; dist++) { \
72 if (_ips[_i-1] == _ips[_i-1-dist]) { \
73 _i = _i - dist; \
74 break; \
75 } \
76 } \
77 } while (0)
79 /* Note about calculation of fp_min : fp_min is the lowest address
80 which can be accessed during unwinding. This is SP - VG_STACK_REDZONE_SZB.
81 On most platforms, this will be equal to SP (as VG_STACK_REDZONE_SZB
82 is 0). However, on some platforms (e.g. amd64), there is an accessible
83 redzone below the SP. Some CFI unwind info are generated, taking this
84 into account. As an example, the following is a CFI unwind info on
85 amd64 found for a 'retq' instruction:
86 [0x400f7e .. 0x400f7e]: let cfa=oldSP+8 in RA=*(cfa+-8) SP=cfa+0 BP=*(cfa+-16)
87 0x400f7e: retq
88 As you can see, the previous BP is found 16 bytes below the cfa, which
89 is the oldSP+8. So, effectively, the BP is found 8 bytes below the SP.
90 The fp_min must take this into account, otherwise, VG_(use_CF_info) will
91 not unwind the BP. */
93 /* ------------------------ x86 ------------------------- */
95 #if defined(VGP_x86_linux) || defined(VGP_x86_darwin) \
96 || defined(VGP_x86_solaris) || defined(VGP_x86_freebsd)
98 #define N_FP_CF_VERIF 1021
99 // prime number so that size of fp_CF_verif is just below 4K or 8K
100 // Note that this prime nr differs from the one chosen in
101 // m_debuginfo/debuginfo.c for the cfsi cache : in case we have
102 // a collision here between two IPs, we expect to not (often) have the
103 // same collision in the cfsi cache (and vice-versa).
105 // unwinding with fp chain is ok:
106 #define FPUNWIND 0
107 // there is no CFI info for this IP:
108 #define NOINFO 1
109 // Unwind with FP is not ok, must use CF unwind:
110 #define CFUNWIND 2
112 static Addr fp_CF_verif_cache [N_FP_CF_VERIF];
114 /* An unwind done by following the fp chain technique can be incorrect
115 as not all frames are respecting the standard bp/sp ABI.
116 The CF information is now generated by default by gcc
117 (as part of the dwarf info). However, unwinding using CF information
118 is significantly slower : a slowdown of 20% has been observed
119 on an helgrind test case.
120 So, by default, the unwinding will be done using the fp chain.
121 But before accepting to unwind an IP with fp_chain, the result
122 of the unwind will be checked with the CF information.
123 This check can give 3 results:
124 FPUNWIND (0): there is CF info, and it gives the same result as fp unwind.
125 => it is assumed that future unwind for this IP can be done
126 with the fast fp chain, without further CF checking
127 NOINFO (1): there is no CF info (so, fp unwind is the only do-able thing)
128 CFUNWIND (2): there is CF info, but unwind result differs.
129 => it is assumed that future unwind for this IP must be done
130 with the CF info.
131 Of course, if each fp unwind implies a check done with a CF unwind,
132 it would just be slower => we cache the check result in an
133 array of checked Addr.
134 The check for an IP will be stored at
135 fp_CF_verif_cache[IP % N_FP_CF_VERIF] as one of:
136 IP ^ FPUNWIND
137 IP ^ NOINFO
138 IP ^ CFUNWIND
140 Note: we can re-use the last (ROUNDDOWN (log (N_FP_CF_VERIF))) bits
141 to store the check result, as they are guaranteed to be non significant
142 in the comparison between 2 IPs stored in fp_CF_verif_cache).
143 In other words, if two IPs are only differing on the last 2 bits,
144 then they will not land in the same cache bucket.
147 /* cached result of VG_(FPO_info_present)(). Refreshed each time
148 the fp_CF_verif_generation is different of the current debuginfo
149 generation. */
150 static Bool FPO_info_present = False;
152 static UInt fp_CF_verif_generation = 0;
153 // Our cache has to be maintained in sync with the CFI cache.
154 // Each time the debuginfo is changed, its generation will be incremented.
155 // We will clear our cache when our saved generation differs from
156 // the debuginfo generation.
158 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
159 /*OUT*/Addr* ips, UInt max_n_ips,
160 /*OUT*/Addr* sps, /*OUT*/Addr* fps,
161 const UnwindStartRegs* startRegs,
162 Addr fp_max_orig )
164 const Bool do_stats = False; // compute and output some stats regularly.
165 static struct {
166 UInt nr; // nr of stacktraces computed
167 UInt nf; // nr of frames computed
168 UInt Ca; // unwind for which cache indicates CFUnwind must be used.
169 UInt FF; // unwind for which cache indicates FPUnwind can be used.
170 UInt Cf; // unwind at end of stack+store CFUNWIND (xip not end of stack).
171 UInt Fw; // unwind at end of stack+store FPUNWIND
172 UInt FO; // unwind + store FPUNWIND
173 UInt CF; // unwind + store CFUNWIND. Details below.
174 UInt xi; UInt xs; UInt xb; // register(s) which caused a 'store CFUNWIND'.
175 UInt Ck; // unwind fp invalid+store FPUNWIND
176 UInt MS; // microsoft unwind
177 } stats;
179 const Bool debug = False;
180 // = VG_(debugLog_getLevel) () > 3;
181 // = True;
182 // = stats.nr >= 123456;
183 const HChar* unwind_case; // used when debug is True.
184 // Debugging this function is not straightforward.
185 // Here is the easiest way I have found:
186 // 1. Change the above to True.
187 // 2. Start your program under Valgrind with --tool=none --vgdb-error=0
188 // 3. Use GDB/vgdb to put a breakpoint where you want to debug the stacktrace
189 // 4. Continue till breakpoint is encountered
190 // 5. From GDB, use 'monitor v.info scheduler' and examine the unwind traces.
191 // You might have to do twice 'monitor v.info scheduler' to see
192 // the effect of caching the results of the verification.
193 // You can also modify the debug dynamically using by using
194 // 'monitor v.set debuglog 4.
196 Int i;
197 Addr fp_max;
198 UInt n_found = 0;
199 const Int cmrf = VG_(clo_merge_recursive_frames);
201 vg_assert(sizeof(Addr) == sizeof(UWord));
202 vg_assert(sizeof(Addr) == sizeof(void*));
204 D3UnwindRegs fpverif_uregs; // result of CF unwind for a check reason.
205 Addr xip_verified = 0; // xip for which we have calculated fpverif_uregs
206 // 0 assigned to silence false positive -Wuninitialized warning
207 // This is a false positive as xip_verified is assigned when
208 // xip_verif > CFUNWIND and only used if xip_verif > CFUNWIND.
210 D3UnwindRegs uregs;
211 uregs.xip = (Addr)startRegs->r_pc;
212 uregs.xsp = (Addr)startRegs->r_sp;
213 uregs.xbp = startRegs->misc.X86.r_ebp;
214 Addr fp_min = uregs.xsp - VG_STACK_REDZONE_SZB;
216 /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
217 stopping when the trail goes cold, which we guess to be
218 when FP is not a reasonable stack location. */
220 // JRS 2002-sep-17: hack, to round up fp_max to the end of the
221 // current page, at least. Dunno if it helps.
222 // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
223 fp_max = VG_PGROUNDUP(fp_max_orig);
224 if (fp_max >= sizeof(Addr))
225 fp_max -= sizeof(Addr);
227 if (debug)
228 VG_(printf)("max_n_ips=%u fp_min=0x%08lx fp_max_orig=0x08%lx, "
229 "fp_max=0x%08lx ip=0x%08lx fp=0x%08lx\n",
230 max_n_ips, fp_min, fp_max_orig, fp_max,
231 uregs.xip, uregs.xbp);
233 /* Assertion broken before main() is reached in pthreaded programs; the
234 * offending stack traces only have one item. --njn, 2002-aug-16 */
235 /* vg_assert(fp_min <= fp_max);*/
236 // On Darwin, this kicks in for pthread-related stack traces, so they're
237 // only 1 entry long which is wrong.
238 # if defined(VGO_linux)
239 if (fp_min + 512 >= fp_max) {
240 /* If the stack limits look bogus, don't poke around ... but
241 don't bomb out either. */
242 # elif defined(VGO_solaris) || defined(VGO_freebsd)
243 if (fp_max == 0) {
244 /* VG_(get_StackTrace)() can be called by tools very early when
245 various tracing options are enabled. Don't proceed further
246 if the stack limits look bogus.
248 # endif
249 # if defined(VGO_linux) || defined(VGO_solaris) || defined(VGO_freebsd)
250 if (sps) sps[0] = uregs.xsp;
251 if (fps) fps[0] = uregs.xbp;
252 ips[0] = uregs.xip;
253 return 1;
255 # endif
257 if (UNLIKELY (fp_CF_verif_generation != VG_(debuginfo_generation)())) {
258 fp_CF_verif_generation = VG_(debuginfo_generation)();
259 VG_(memset)(&fp_CF_verif_cache, 0, sizeof(fp_CF_verif_cache));
260 FPO_info_present = VG_(FPO_info_present)();
264 /* Loop unwinding the stack. Note that the IP value we get on
265 * each pass (whether from CFI info or a stack frame) is a
266 * return address so is actually after the calling instruction
267 * in the calling function.
269 * Because of this we subtract one from the IP after each pass
270 * of the loop so that we find the right CFI block on the next
271 * pass - otherwise we can find the wrong CFI info if it happens
272 * to change after the calling instruction and that will mean
273 * that we will fail to unwind the next step.
275 * This most frequently happens at the end of a function when
276 * a tail call occurs and we wind up using the CFI info for the
277 * next function which is completely wrong.
279 if (sps) sps[0] = uregs.xsp;
280 if (fps) fps[0] = uregs.xbp;
281 ips[0] = uregs.xip;
282 i = 1;
283 if (do_stats) stats.nr++;
285 // Does this apply to macOS 10.14 and earlier?
286 # if defined(VGO_freebsd) && (__FreeBSD_version < 1300000)
287 if (VG_(is_valid_tid)(tid_if_known) &&
288 VG_(is_in_syscall)(tid_if_known) &&
289 i < max_n_ips) {
290 /* On FreeBSD, all the system call stubs have no function
291 * prolog. So instead of top of the stack being a new
292 * frame comprising a saved BP and a return address, we
293 * just have the return address in the caller's frame.
294 * Adjust for this by recording the return address.
296 if (debug)
297 VG_(printf)(" in syscall, use XSP-1\n");
298 ips[i] = *(Addr *)uregs.xsp - 1;
299 if (sps) sps[i] = uregs.xsp;
300 if (fps) fps[i] = uregs.xbp;
301 i++;
303 # endif
305 while (True) {
307 if (i >= max_n_ips)
308 break;
310 UWord hash = uregs.xip % N_FP_CF_VERIF;
311 Addr xip_verif = uregs.xip ^ fp_CF_verif_cache [hash];
312 if (debug)
313 VG_(printf)(" uregs.xip 0x%08lx xip_verif[0x%08lx]"
314 " xbp 0x%08lx xsp 0x%08lx\n",
315 uregs.xip, xip_verif,
316 uregs.xbp, uregs.xsp);
317 // If xip is in cache, then xip_verif will be <= CFUNWIND.
318 // Otherwise, if not in cache, xip_verif will be > CFUNWIND.
320 /* Try to derive a new (ip,sp,fp) triple from the current set. */
322 /* Do we have to do CFI unwinding ?
323 We do CFI unwinding if one of the following condition holds:
324 a. fp_CF_verif_cache contains xip but indicates CFUNWIND must
325 be done (i.e. fp unwind check failed when we did the first
326 unwind for this IP).
327 b. fp_CF_verif_cache does not contain xip.
328 We will try CFI unwinding in fpverif_uregs and compare with
329 FP unwind result to insert xip in the cache with the correct
330 indicator. */
331 if (UNLIKELY(xip_verif >= CFUNWIND)) {
332 if (xip_verif == CFUNWIND) {
333 /* case a : do "real" cfi unwind */
334 if ( VG_(use_CF_info)( &uregs, fp_min, fp_max ) ) {
335 if (debug) unwind_case = "Ca";
336 if (do_stats) stats.Ca++;
337 goto unwind_done;
339 /* ??? cache indicates we have to do CFI unwind (so, we
340 previously found CFI info, and failed the fp unwind
341 check). Now, we just failed with CFI. So, once we
342 succeed, once we fail. No idea what is going on =>
343 cleanup the cache entry and fallover to fp unwind (this
344 time). */
345 fp_CF_verif_cache [hash] = 0;
346 if (debug) VG_(printf)(" cache reset as CFI ok then nok\n");
347 //??? stats
348 xip_verif = NOINFO;
349 } else {
350 /* case b : do "verif" cfi unwind in fpverif_uregs */
351 fpverif_uregs = uregs;
352 xip_verified = uregs.xip;
353 if ( !VG_(use_CF_info)( &fpverif_uregs, fp_min, fp_max ) ) {
354 fp_CF_verif_cache [hash] = uregs.xip ^ NOINFO;
355 if (debug) VG_(printf)(" cache NOINFO fpverif_uregs\n");
356 xip_verif = NOINFO;
361 /* On x86, try the old-fashioned method of following the
362 %ebp-chain. This can be done if the fp_CF_verif_cache for xip
363 indicate fp unwind is ok. This must be done if the cache indicates
364 there is no info. This is also done to confirm what to put in the cache
365 if xip was not in the cache. */
366 /* This deals with frames resulting from functions which begin "pushl%
367 ebp ; movl %esp, %ebp" which is the ABI-mandated preamble. */
368 if (fp_min <= uregs.xbp &&
369 uregs.xbp <= fp_max - 1 * sizeof(UWord)/*see comment below*/ &&
370 VG_IS_4_ALIGNED(uregs.xbp))
372 Addr old_xsp;
374 /* fp looks sane, so use it. */
375 uregs.xip = (((UWord*)uregs.xbp)[1]);
376 // We stop if we hit a zero (the traditional end-of-stack
377 // marker) or a one -- these correspond to recorded IPs of 0 or -1.
378 // The latter because r8818 (in this file) changes the meaning of
379 // entries [1] and above in a stack trace, by subtracting 1 from
380 // them. Hence stacks that used to end with a zero value now end in
381 // -1 and so we must detect that too.
382 if (0 == uregs.xip || 1 == uregs.xip) {
383 if (xip_verif > CFUNWIND) {
384 // Check if we obtain the same result with fp unwind.
385 // If same result, then mark xip as fp unwindable
386 if (uregs.xip == fpverif_uregs.xip) {
387 fp_CF_verif_cache [hash] = xip_verified ^ FPUNWIND;
388 if (debug) VG_(printf)(" cache FPUNWIND 0\n");
389 unwind_case = "Fw";
390 if (do_stats) stats.Fw++;
391 break;
392 } else {
393 fp_CF_verif_cache [hash] = xip_verified ^ CFUNWIND;
394 uregs = fpverif_uregs;
395 if (debug) VG_(printf)(" cache CFUNWIND 0\n");
396 unwind_case = "Cf";
397 if (do_stats) stats.Cf++;
398 goto unwind_done;
400 } else {
401 // end of stack => out of the loop.
402 break;
406 old_xsp = uregs.xsp;
407 uregs.xsp = uregs.xbp + sizeof(Addr) /*saved %ebp*/
408 + sizeof(Addr) /*ra*/;
409 uregs.xbp = (((UWord*)uregs.xbp)[0]);
410 if (xip_verif > CFUNWIND) {
411 if (uregs.xip == fpverif_uregs.xip
412 && uregs.xsp == fpverif_uregs.xsp
413 && uregs.xbp == fpverif_uregs.xbp) {
414 fp_CF_verif_cache [hash] = xip_verified ^ FPUNWIND;
415 if (debug) VG_(printf)(" cache FPUNWIND >2\n");
416 if (debug) unwind_case = "FO";
417 if (do_stats) stats.FO++;
418 if (old_xsp >= uregs.xsp) {
419 if (debug)
420 VG_(printf) (" FO end of stack old_xsp %p >= xsp %p\n",
421 (void*)old_xsp, (void*)uregs.xsp);
422 break;
424 } else {
425 fp_CF_verif_cache [hash] = xip_verified ^ CFUNWIND;
426 if (debug) VG_(printf)(" cache CFUNWIND >2\n");
427 if (do_stats && uregs.xip != fpverif_uregs.xip) stats.xi++;
428 if (do_stats && uregs.xsp != fpverif_uregs.xsp) stats.xs++;
429 if (do_stats && uregs.xbp != fpverif_uregs.xbp) stats.xb++;
430 uregs = fpverif_uregs;
431 if (debug) unwind_case = "CF";
432 if (do_stats) stats.CF++;
434 } else {
435 if (debug) unwind_case = "FF";
436 if (do_stats) stats.FF++;
437 if (old_xsp >= uregs.xsp) {
438 if (debug)
439 VG_(printf) (" FF end of stack old_xsp %p >= xsp %p\n",
440 (void*)old_xsp, (void*)uregs.xsp);
441 break;
444 goto unwind_done;
445 } else {
446 // fp unwind has failed.
447 // If we were checking the validity of the cfi unwinding,
448 // we mark in the cache that the fp unwind cannot be done, and that
449 // cfi unwind is desired.
450 if (xip_verif > CFUNWIND) {
451 // We know that fpverif_uregs contains valid information,
452 // as a failed cf unwind would have put NOINFO in xip_verif.
453 fp_CF_verif_cache [hash] = xip_verified ^ CFUNWIND;
454 if (debug) VG_(printf)(" cache CFUNWIND as fp failed\n");
455 uregs = fpverif_uregs;
456 if (debug) unwind_case = "Ck";
457 if (do_stats) stats.Ck++;
458 goto unwind_done;
460 // xip_verif is FPUNWIND or NOINFO.
461 // We failed the cfi unwind and/or the fp unwind.
462 // => fallback to FPO info.
465 /* And, similarly, try for MSVC FPO unwind info. */
466 if (FPO_info_present
467 && VG_(use_FPO_info)( &uregs.xip, &uregs.xsp, &uregs.xbp,
468 VG_(current_DiEpoch)(),
469 fp_min, fp_max ) ) {
470 if (debug) unwind_case = "MS";
471 if (do_stats) stats.MS++;
472 goto unwind_done;
475 /* No luck. We have to give up. */
476 break;
478 unwind_done:
479 /* Add a frame in ips/sps/fps */
480 /* fp is %ebp. sp is %esp. ip is %eip. */
481 if (0 == uregs.xip || 1 == uregs.xip) break;
482 if (sps) sps[i] = uregs.xsp;
483 if (fps) fps[i] = uregs.xbp;
484 ips[i++] = uregs.xip - 1;
485 /* -1: refer to calling insn, not the RA */
486 if (debug)
487 VG_(printf)(" ips%s[%d]=0x%08lx\n", unwind_case, i-1, ips[i-1]);
488 uregs.xip = uregs.xip - 1;
489 /* as per comment at the head of this loop */
490 RECURSIVE_MERGE(cmrf,ips,i);
493 if (do_stats) stats.nf += i;
494 if (do_stats && stats.nr % 10000 == 0) {
495 VG_(printf)("nr %u nf %u "
496 "Ca %u FF %u "
497 "Cf %u "
498 "Fw %u FO %u "
499 "CF %u (xi %u xs %u xb %u) "
500 "Ck %u MS %u\n",
501 stats.nr, stats.nf,
502 stats.Ca, stats.FF,
503 stats.Cf,
504 stats.Fw, stats.FO,
505 stats.CF, stats.xi, stats.xs, stats.xb,
506 stats.Ck, stats.MS);
508 n_found = i;
509 return n_found;
512 #undef N_FP_CF_VERIF
513 #undef FPUNWIND
514 #undef NOINFO
515 #undef CFUNWIND
517 #endif
519 /* ----------------------- amd64 ------------------------ */
521 #if defined(VGP_amd64_linux) || defined(VGP_amd64_darwin) \
522 || defined(VGP_amd64_solaris) || defined(VGP_amd64_freebsd)
525 * Concerning the comment in the function about syscalls, I'm not sure
526 * what changed or when with FreeBSD. The situation going at least
527 * as far back as FreeBSD 12.1 (so Nov 2019) is that system calls are
528 * implemented with generated wrappers that call through an interposing
529 * table of function pointers. The result when built with clang is that
530 * code for the frame pointer prolog is generated but then an optimized
531 * sibling call is made. That means the frame pointer is popped off
532 * the stack and a jmp is made to the function in the table rather than
533 * a call.
535 * The end result is that, when we are in a syscall it is as though there were
536 * no prolog but a copy of the frame pointer is stored one 64bit word below the
537 * stack pointer. If more recent FreeBSD uses the hack that sets
538 * ips[i] = *(Addr *)uregs.xsp - 1;
539 * then the caller of the syscall gets added twice.
542 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
543 /*OUT*/Addr* ips, UInt max_n_ips,
544 /*OUT*/Addr* sps, /*OUT*/Addr* fps,
545 const UnwindStartRegs* startRegs,
546 Addr fp_max_orig )
548 const Bool debug = False;
549 Int i;
550 Addr fp_max;
551 UInt n_found = 0;
552 const Int cmrf = VG_(clo_merge_recursive_frames);
554 vg_assert(sizeof(Addr) == sizeof(UWord));
555 vg_assert(sizeof(Addr) == sizeof(void*));
557 D3UnwindRegs uregs;
558 uregs.xip = startRegs->r_pc;
559 uregs.xsp = startRegs->r_sp;
560 uregs.xbp = startRegs->misc.AMD64.r_rbp;
561 Addr fp_min = uregs.xsp - VG_STACK_REDZONE_SZB;
563 /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
564 stopping when the trail goes cold, which we guess to be
565 when FP is not a reasonable stack location. */
567 // JRS 2002-sep-17: hack, to round up fp_max to the end of the
568 // current page, at least. Dunno if it helps.
569 // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
570 fp_max = VG_PGROUNDUP(fp_max_orig);
571 if (fp_max >= sizeof(Addr))
572 fp_max -= sizeof(Addr);
574 if (debug)
575 VG_(printf)("max_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
576 "fp_max=0x%lx ip=0x%lx fp=0x%lx\n",
577 max_n_ips, fp_min, fp_max_orig, fp_max,
578 uregs.xip, uregs.xbp);
580 /* Assertion broken before main() is reached in pthreaded programs; the
581 * offending stack traces only have one item. --njn, 2002-aug-16 */
582 /* vg_assert(fp_min <= fp_max);*/
583 // On Darwin, this kicks in for pthread-related stack traces, so they're
584 // only 1 entry long which is wrong.
585 # if defined(VGO_linux)
586 if (fp_min + 256 >= fp_max) {
587 /* If the stack limits look bogus, don't poke around ... but
588 don't bomb out either. */
589 # elif defined(VGO_solaris)
590 if (fp_max == 0) {
591 /* VG_(get_StackTrace)() can be called by tools very early when
592 various tracing options are enabled. Don't proceed further
593 if the stack limits look bogus.
595 # endif
596 # if defined(VGO_linux) || defined(VGO_solaris)
598 if (sps) sps[0] = uregs.xsp;
599 if (fps) fps[0] = uregs.xbp;
600 ips[0] = uregs.xip;
601 return 1;
603 # endif
605 /* fp is %rbp. sp is %rsp. ip is %rip. */
607 ips[0] = uregs.xip;
608 if (sps) sps[0] = uregs.xsp;
609 if (fps) fps[0] = uregs.xbp;
610 i = 1;
611 if (debug)
612 VG_(printf)(" ipsS[%d]=%#08lx rbp %#08lx rsp %#08lx\n",
613 i-1, ips[i-1], uregs.xbp, uregs.xsp);
615 # if defined(VGO_darwin) || (defined(VGO_freebsd) && __FreeBSD_version < 1300000)
616 if (VG_(is_valid_tid)(tid_if_known) &&
617 VG_(is_in_syscall)(tid_if_known) &&
618 i < max_n_ips) {
619 /* On Darwin, all the system call stubs have no function
620 * prolog. So instead of top of the stack being a new
621 * frame comprising a saved BP and a return address, we
622 * just have the return address in the caller's frame.
623 * Adjust for this by recording the return address.
625 if (debug)
626 VG_(printf)(" in syscall, use XSP-1\n");
627 ips[i] = *(Addr *)uregs.xsp - 1;
628 if (sps) sps[i] = uregs.xsp;
629 if (fps) fps[i] = uregs.xbp;
630 i++;
632 # endif
634 /* Loop unwinding the stack. Note that the IP value we get on
635 * each pass (whether from CFI info or a stack frame) is a
636 * return address so is actually after the calling instruction
637 * in the calling function.
639 * Because of this we subtract one from the IP after each pass
640 * of the loop so that we find the right CFI block on the next
641 * pass - otherwise we can find the wrong CFI info if it happens
642 * to change after the calling instruction and that will mean
643 * that we will fail to unwind the next step.
645 * This most frequently happens at the end of a function when
646 * a tail call occurs and we wind up using the CFI info for the
647 * next function which is completely wrong.
649 while (True) {
650 Addr old_xsp;
652 if (i >= max_n_ips)
653 break;
655 old_xsp = uregs.xsp;
657 /* Try to derive a new (ip,sp,fp) triple from the current set. */
659 /* First off, see if there is any CFI info to hand which can
660 be used. */
661 if ( VG_(use_CF_info)( &uregs, fp_min, fp_max ) ) {
662 if (0 == uregs.xip || 1 == uregs.xip) break;
663 if (old_xsp >= uregs.xsp) {
664 if (debug)
665 VG_(printf) (" CF end of stack old_xsp %p >= xsp %p\n",
666 (void*)old_xsp, (void*)uregs.xsp);
667 break;
669 if (sps) sps[i] = uregs.xsp;
670 if (fps) fps[i] = uregs.xbp;
671 ips[i++] = uregs.xip - 1; /* -1: refer to calling insn, not the RA */
672 if (debug)
673 VG_(printf)(" ipsC[%d]=%#08lx rbp %#08lx rsp %#08lx\n",
674 i-1, ips[i-1], uregs.xbp, uregs.xsp);
675 uregs.xip = uregs.xip - 1; /* as per comment at the head of this loop */
676 RECURSIVE_MERGE(cmrf,ips,i);
677 continue;
680 /* If VG_(use_CF_info) fails, it won't modify ip/sp/fp, so
681 we can safely try the old-fashioned method. */
682 /* This bit is supposed to deal with frames resulting from
683 functions which begin "pushq %rbp ; movq %rsp, %rbp".
684 Unfortunately, since we can't (easily) look at the insns at
685 the start of the fn, like GDB does, there's no reliable way
686 to tell. Hence the hack of first trying out CFI, and if that
687 fails, then use this as a fallback. */
688 /* Note: re "- 1 * sizeof(UWord)", need to take account of the
689 fact that we are prodding at & ((UWord*)fp)[1] and so need to
690 adjust the limit check accordingly. Omitting this has been
691 observed to cause segfaults on rare occasions. */
692 if (fp_min <= uregs.xbp && uregs.xbp <= fp_max - 1 * sizeof(UWord)) {
693 /* fp looks sane, so use it. */
694 uregs.xip = (((UWord*)uregs.xbp)[1]);
695 if (0 == uregs.xip || 1 == uregs.xip) break;
696 uregs.xsp = uregs.xbp + sizeof(Addr) /*saved %rbp*/
697 + sizeof(Addr) /*ra*/;
698 if (old_xsp >= uregs.xsp) {
699 if (debug)
700 VG_(printf) (" FF end of stack old_xsp %p >= xsp %p\n",
701 (void*)old_xsp, (void*)uregs.xsp);
702 break;
704 uregs.xbp = (((UWord*)uregs.xbp)[0]);
705 if (sps) sps[i] = uregs.xsp;
706 if (fps) fps[i] = uregs.xbp;
707 ips[i++] = uregs.xip - 1; /* -1: refer to calling insn, not the RA */
708 if (debug)
709 VG_(printf)(" ipsF[%d]=%#08lx rbp %#08lx rsp %#08lx\n",
710 i-1, ips[i-1], uregs.xbp, uregs.xsp);
711 uregs.xip = uregs.xip - 1; /* as per comment at the head of this loop */
712 RECURSIVE_MERGE(cmrf,ips,i);
713 continue;
716 /* Last-ditch hack (evidently GDB does something similar). We
717 are in the middle of nowhere and we have a nonsense value for
718 the frame pointer. If the stack pointer is still valid,
719 assume that what it points at is a return address. Yes,
720 desperate measures. Could do better here:
721 - check that the supposed return address is in
722 an executable page
723 - check that the supposed return address is just after a call insn
724 - given those two checks, don't just consider *sp as the return
725 address; instead scan a likely section of stack (eg sp .. sp+256)
726 and use suitable values found there.
728 if (fp_min <= uregs.xsp && uregs.xsp < fp_max) {
729 uregs.xip = ((UWord*)uregs.xsp)[0];
730 if (0 == uregs.xip || 1 == uregs.xip) break;
731 if (sps) sps[i] = uregs.xsp;
732 if (fps) fps[i] = uregs.xbp;
733 ips[i++] = uregs.xip == 0
734 ? 0 /* sp[0] == 0 ==> stuck at the bottom of a
735 thread stack */
736 : uregs.xip - 1;
737 /* -1: refer to calling insn, not the RA */
738 if (debug)
739 VG_(printf)(" ipsH[%d]=%#08lx\n", i-1, ips[i-1]);
740 uregs.xip = uregs.xip - 1; /* as per comment at the head of this loop */
741 uregs.xsp += 8;
742 RECURSIVE_MERGE(cmrf,ips,i);
743 continue;
746 /* No luck at all. We have to give up. */
747 break;
750 n_found = i;
751 return n_found;
754 #endif
756 /* -----------------------ppc32/64 ---------------------- */
758 #if defined(VGP_ppc32_linux) || defined(VGP_ppc64be_linux) \
759 || defined(VGP_ppc64le_linux)
761 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
762 /*OUT*/Addr* ips, UInt max_n_ips,
763 /*OUT*/Addr* sps, /*OUT*/Addr* fps,
764 const UnwindStartRegs* startRegs,
765 Addr fp_max_orig )
767 Bool lr_is_first_RA = False;
768 # if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
769 Word redir_stack_size = 0;
770 Word redirs_used = 0;
771 # endif
772 const Int cmrf = VG_(clo_merge_recursive_frames);
773 const DiEpoch cur_ep = VG_(current_DiEpoch)();
775 Bool debug = False;
776 Int i;
777 Addr fp_max;
778 UInt n_found = 0;
780 vg_assert(sizeof(Addr) == sizeof(UWord));
781 vg_assert(sizeof(Addr) == sizeof(void*));
783 Addr ip = (Addr)startRegs->r_pc;
784 Addr sp = (Addr)startRegs->r_sp;
785 Addr fp = sp;
786 # if defined(VGP_ppc32_linux)
787 Addr lr = startRegs->misc.PPC32.r_lr;
788 # elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
789 Addr lr = startRegs->misc.PPC64.r_lr;
790 # endif
791 Addr fp_min = sp - VG_STACK_REDZONE_SZB;
793 VG_(addr_load_di)(ip);
795 /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
796 stopping when the trail goes cold, which we guess to be
797 when FP is not a reasonable stack location. */
799 // JRS 2002-sep-17: hack, to round up fp_max to the end of the
800 // current page, at least. Dunno if it helps.
801 // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
802 fp_max = VG_PGROUNDUP(fp_max_orig);
803 if (fp_max >= sizeof(Addr))
804 fp_max -= sizeof(Addr);
806 if (debug)
807 VG_(printf)("max_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
808 "fp_max=0x%lx ip=0x%lx fp=0x%lx\n",
809 max_n_ips, fp_min, fp_max_orig, fp_max, ip, fp);
811 /* Assertion broken before main() is reached in pthreaded programs; the
812 * offending stack traces only have one item. --njn, 2002-aug-16 */
813 /* vg_assert(fp_min <= fp_max);*/
814 if (fp_min + 512 >= fp_max) {
815 /* If the stack limits look bogus, don't poke around ... but
816 don't bomb out either. */
817 if (sps) sps[0] = sp;
818 if (fps) fps[0] = fp;
819 ips[0] = ip;
820 return 1;
823 /* fp is %r1. ip is %cia. Note, ppc uses r1 as both the stack and
824 frame pointers. */
826 # if defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
827 redir_stack_size = VEX_GUEST_PPC64_REDIR_STACK_SIZE;
828 redirs_used = 0;
829 # endif
831 # if defined(VG_PLAT_USES_PPCTOC) || defined (VGP_ppc64le_linux)
832 /* Deal with bogus LR values caused by function
833 interception/wrapping on ppc-TOC platforms; see comment on
834 similar code a few lines further down. */
835 if (lr == (Addr)&VG_(ppctoc_magic_redirect_return_stub)
836 && VG_(is_valid_tid)(tid_if_known)) {
837 Word hsp = VG_(threads)[tid_if_known].arch.vex.guest_REDIR_SP;
838 redirs_used++;
839 if (hsp >= 1 && hsp < redir_stack_size)
840 lr = VG_(threads)[tid_if_known]
841 .arch.vex.guest_REDIR_STACK[hsp-1];
843 # endif
845 /* We have to determine whether or not LR currently holds this fn
846 (call it F)'s return address. It might not if F has previously
847 called some other function, hence overwriting LR with a pointer
848 to some part of F. Hence if LR and IP point to the same
849 function then we conclude LR does not hold this function's
850 return address; instead the LR at entry must have been saved in
851 the stack by F's prologue and so we must get it from there
852 instead. Note all this guff only applies to the innermost
853 frame. */
854 lr_is_first_RA = False;
856 const HChar *buf_lr, *buf_ip;
857 /* The following conditional looks grossly inefficient and
858 surely could be majorly improved, with not much effort. */
859 if (VG_(get_fnname_raw) (cur_ep, lr, &buf_lr)) {
860 HChar buf_lr_copy[VG_(strlen)(buf_lr) + 1];
861 VG_(strcpy)(buf_lr_copy, buf_lr);
862 if (VG_(get_fnname_raw) (cur_ep, ip, &buf_ip))
863 if (VG_(strcmp)(buf_lr_copy, buf_ip))
864 lr_is_first_RA = True;
868 if (sps) sps[0] = fp; /* NB. not sp */
869 if (fps) fps[0] = fp;
870 ips[0] = ip;
871 i = 1;
873 if (fp_min <= fp && fp < fp_max-VG_WORDSIZE+1) {
875 /* initial FP is sane; keep going */
876 fp = (((UWord*)fp)[0]);
878 while (True) {
880 /* On ppc64-linux (ppc64-elf, really), the lr save
881 slot is 2 words back from sp, whereas on ppc32-elf(?) it's
882 only one word back. */
883 # if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
884 const Int lr_offset = 2;
885 # else
886 const Int lr_offset = 1;
887 # endif
889 if (i >= max_n_ips)
890 break;
892 /* Try to derive a new (ip,fp) pair from the current set. */
894 if (fp_min <= fp && fp <= fp_max - lr_offset * sizeof(UWord)) {
895 /* fp looks sane, so use it. */
897 if (i == 1 && lr_is_first_RA)
898 ip = lr;
899 else
900 ip = (((UWord*)fp)[lr_offset]);
902 # if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
903 /* Nasty hack to do with function replacement/wrapping on
904 ppc64-linux. If LR points to our magic return stub,
905 then we are in a wrapped or intercepted function, in
906 which LR has been messed with. The original LR will
907 have been pushed onto the thread's hidden REDIR stack
908 one down from the top (top element is the saved R2) and
909 so we should restore the value from there instead.
910 Since nested redirections can and do happen, we keep
911 track of the number of nested LRs used by the unwinding
912 so far with 'redirs_used'. */
913 if (ip == (Addr)&VG_(ppctoc_magic_redirect_return_stub)
914 && VG_(is_valid_tid)(tid_if_known)) {
915 Word hsp = VG_(threads)[tid_if_known]
916 .arch.vex.guest_REDIR_SP;
917 hsp -= 2 * redirs_used;
918 redirs_used ++;
919 if (hsp >= 1 && hsp < redir_stack_size)
920 ip = VG_(threads)[tid_if_known]
921 .arch.vex.guest_REDIR_STACK[hsp-1];
923 # endif
925 if (0 == ip || 1 == ip) break;
926 if (sps) sps[i] = fp; /* NB. not sp */
927 if (fps) fps[i] = fp;
928 fp = (((UWord*)fp)[0]);
929 ips[i++] = ip - 1; /* -1: refer to calling insn, not the RA */
930 if (debug)
931 VG_(printf)(" ipsF[%d]=%#08lx\n", i-1, ips[i-1]);
932 ip = ip - 1; /* ip is probably dead at this point, but
933 play safe, a la x86/amd64 above. See
934 extensive comments above. */
935 RECURSIVE_MERGE(cmrf,ips,i);
936 VG_(addr_load_di)(ip);
937 continue;
940 /* No luck there. We have to give up. */
941 break;
945 n_found = i;
946 return n_found;
949 #endif
951 /* ------------------------ arm ------------------------- */
953 #if defined(VGP_arm_linux)
955 static Bool in_same_fn ( Addr a1, Addr a2 )
957 const HChar *buf_a1, *buf_a2;
958 /* The following conditional looks grossly inefficient and
959 surely could be majorly improved, with not much effort. */
960 const DiEpoch cur_ep = VG_(current_DiEpoch)();
961 if (VG_(get_fnname_raw) (cur_ep, a1, &buf_a1)) {
962 HChar buf_a1_copy[VG_(strlen)(buf_a1) + 1];
963 VG_(strcpy)(buf_a1_copy, buf_a1);
964 if (VG_(get_fnname_raw) (cur_ep, a2, &buf_a2))
965 if (VG_(strcmp)(buf_a1_copy, buf_a2))
966 return True;
968 return False;
971 static Bool in_same_page ( Addr a1, Addr a2 ) {
972 return (a1 & ~0xFFF) == (a2 & ~0xFFF);
975 static Addr abs_diff ( Addr a1, Addr a2 ) {
976 return (Addr)(a1 > a2 ? a1 - a2 : a2 - a1);
979 static Bool has_XT_perms ( Addr a )
981 NSegment const* seg = VG_(am_find_nsegment)(a);
982 return seg && seg->hasX && seg->hasT;
985 static Bool looks_like_Thumb_call32 ( UShort w0, UShort w1 )
987 if (0)
988 VG_(printf)("isT32call %04x %04x\n", (UInt)w0, (UInt)w1);
989 // BL simm26
990 if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) return True;
991 // BLX simm26
992 if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) return True;
993 return False;
996 static Bool looks_like_Thumb_call16 ( UShort w0 )
998 return False;
1001 static Bool looks_like_ARM_call ( UInt a0 )
1003 if (0)
1004 VG_(printf)("isA32call %08x\n", a0);
1005 // Leading E forces unconditional only -- fix
1006 if ((a0 & 0xFF000000) == 0xEB000000) return True;
1007 return False;
1010 static Bool looks_like_RA ( Addr ra )
1012 /* 'ra' is a plausible return address if it points to
1013 an instruction after a call insn. */
1014 Bool isT = (ra & 1);
1015 if (isT) {
1016 // returning to Thumb code
1017 ra &= ~1;
1018 ra -= 4;
1019 if (has_XT_perms(ra)) {
1020 UShort w0 = *(UShort*)ra;
1021 UShort w1 = in_same_page(ra, ra+2) ? *(UShort*)(ra+2) : 0;
1022 if (looks_like_Thumb_call16(w1) || looks_like_Thumb_call32(w0,w1))
1023 return True;
1025 } else {
1026 // ARM
1027 ra &= ~3;
1028 ra -= 4;
1029 if (has_XT_perms(ra)) {
1030 UInt a0 = *(UInt*)ra;
1031 if (looks_like_ARM_call(a0))
1032 return True;
1035 return False;
1038 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
1039 /*OUT*/Addr* ips, UInt max_n_ips,
1040 /*OUT*/Addr* sps, /*OUT*/Addr* fps,
1041 const UnwindStartRegs* startRegs,
1042 Addr fp_max_orig )
1044 Bool debug = False;
1045 Int i;
1046 Addr fp_max;
1047 UInt n_found = 0;
1048 const Int cmrf = VG_(clo_merge_recursive_frames);
1050 vg_assert(sizeof(Addr) == sizeof(UWord));
1051 vg_assert(sizeof(Addr) == sizeof(void*));
1053 D3UnwindRegs uregs;
1054 uregs.r15 = startRegs->r_pc & 0xFFFFFFFE;
1055 uregs.r14 = startRegs->misc.ARM.r14;
1056 uregs.r13 = startRegs->r_sp;
1057 uregs.r12 = startRegs->misc.ARM.r12;
1058 uregs.r11 = startRegs->misc.ARM.r11;
1059 uregs.r7 = startRegs->misc.ARM.r7;
1060 Addr fp_min = uregs.r13 - VG_STACK_REDZONE_SZB;
1062 /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
1063 stopping when the trail goes cold, which we guess to be
1064 when FP is not a reasonable stack location. */
1066 // JRS 2002-sep-17: hack, to round up fp_max to the end of the
1067 // current page, at least. Dunno if it helps.
1068 // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
1069 fp_max = VG_PGROUNDUP(fp_max_orig);
1070 if (fp_max >= sizeof(Addr))
1071 fp_max -= sizeof(Addr);
1073 if (debug)
1074 VG_(printf)("\nmax_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
1075 "fp_max=0x%lx r15=0x%lx r13=0x%lx\n",
1076 max_n_ips, fp_min, fp_max_orig, fp_max,
1077 uregs.r15, uregs.r13);
1079 /* Assertion broken before main() is reached in pthreaded programs; the
1080 * offending stack traces only have one item. --njn, 2002-aug-16 */
1081 /* vg_assert(fp_min <= fp_max);*/
1082 // On Darwin, this kicks in for pthread-related stack traces, so they're
1083 // only 1 entry long which is wrong.
1084 if (fp_min + 512 >= fp_max) {
1085 /* If the stack limits look bogus, don't poke around ... but
1086 don't bomb out either. */
1087 if (sps) sps[0] = uregs.r13;
1088 if (fps) fps[0] = 0;
1089 ips[0] = uregs.r15;
1090 return 1;
1093 /* */
1095 if (sps) sps[0] = uregs.r13;
1096 if (fps) fps[0] = 0;
1097 ips[0] = uregs.r15;
1098 i = 1;
1100 /* Loop unwinding the stack. */
1101 Bool do_stack_scan = False;
1103 /* First try the Official Way, using Dwarf CFI. */
1104 while (True) {
1105 if (debug) {
1106 VG_(printf)("i: %d, r15: 0x%lx, r13: 0x%lx\n",
1107 i, uregs.r15, uregs.r13);
1110 if (i >= max_n_ips)
1111 break;
1113 if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
1114 if (sps) sps[i] = uregs.r13;
1115 if (fps) fps[i] = 0;
1116 ips[i++] = (uregs.r15 & 0xFFFFFFFE) - 1;
1117 if (debug)
1118 VG_(printf)("USING CFI: r15: 0x%lx, r13: 0x%lx\n",
1119 uregs.r15, uregs.r13);
1120 uregs.r15 = (uregs.r15 & 0xFFFFFFFE) - 1;
1121 RECURSIVE_MERGE(cmrf,ips,i);
1122 continue;
1125 /* No luck. We have to give up. */
1126 do_stack_scan = True;
1127 break;
1130 /* Now try Plan B (maybe) -- stack scanning. This often gives
1131 pretty bad results, so this has to be enabled explicitly by the
1132 user. */
1133 if (do_stack_scan
1134 && i < max_n_ips && i < (Int)VG_(clo_unw_stack_scan_thresh)) {
1135 Int nByStackScan = 0;
1136 Addr lr = uregs.r14;
1137 Addr sp = uregs.r13 & ~3;
1138 Addr pc = uregs.r15;
1139 // First see if LR contains
1140 // something that could be a valid return address.
1141 if (!in_same_fn(lr, pc) && looks_like_RA(lr)) {
1142 // take it only if 'cand' isn't obviously a duplicate
1143 // of the last found IP value
1144 Addr cand = (lr & 0xFFFFFFFE) - 1;
1145 if (abs_diff(cand, ips[i-1]) > 1) {
1146 if (sps) sps[i] = 0;
1147 if (fps) fps[i] = 0;
1148 ips[i++] = cand;
1149 RECURSIVE_MERGE(cmrf,ips,i);
1150 nByStackScan++;
1153 while (in_same_page(sp, uregs.r13)) {
1154 if (i >= max_n_ips)
1155 break;
1156 // we're in the same page; fairly safe to keep going
1157 UWord w = *(UWord*)(sp & ~0x3);
1158 if (looks_like_RA(w)) {
1159 Addr cand = (w & 0xFFFFFFFE) - 1;
1160 // take it only if 'cand' isn't obviously a duplicate
1161 // of the last found IP value
1162 if (abs_diff(cand, ips[i-1]) > 1) {
1163 if (sps) sps[i] = 0;
1164 if (fps) fps[i] = 0;
1165 ips[i++] = cand;
1166 RECURSIVE_MERGE(cmrf,ips,i);
1167 if (++nByStackScan >= VG_(clo_unw_stack_scan_frames)) break;
1170 sp += 4;
1174 n_found = i;
1175 return n_found;
1178 #endif
1180 /* ------------------------ arm64 ------------------------- */
1182 #if defined(VGP_arm64_linux) || defined(VGP_arm64_freebsd)
1184 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
1185 /*OUT*/Addr* ips, UInt max_n_ips,
1186 /*OUT*/Addr* sps, /*OUT*/Addr* fps,
1187 const UnwindStartRegs* startRegs,
1188 Addr fp_max_orig )
1190 Bool debug = False;
1191 Int i;
1192 Addr fp_max;
1193 UInt n_found = 0;
1194 const Int cmrf = VG_(clo_merge_recursive_frames);
1196 vg_assert(sizeof(Addr) == sizeof(UWord));
1197 vg_assert(sizeof(Addr) == sizeof(void*));
1199 D3UnwindRegs uregs;
1200 uregs.pc = startRegs->r_pc;
1201 uregs.sp = startRegs->r_sp;
1202 uregs.x30 = startRegs->misc.ARM64.x30;
1203 uregs.x29 = startRegs->misc.ARM64.x29;
1204 Addr fp_min = uregs.sp - VG_STACK_REDZONE_SZB;
1206 /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
1207 stopping when the trail goes cold, which we guess to be
1208 when FP is not a reasonable stack location. */
1210 // JRS 2002-sep-17: hack, to round up fp_max to the end of the
1211 // current page, at least. Dunno if it helps.
1212 // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
1213 fp_max = VG_PGROUNDUP(fp_max_orig);
1214 if (fp_max >= sizeof(Addr))
1215 fp_max -= sizeof(Addr);
1217 if (debug)
1218 VG_(printf)("\nmax_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
1219 "fp_max=0x%lx PC=0x%lx SP=0x%lx\n",
1220 max_n_ips, fp_min, fp_max_orig, fp_max,
1221 uregs.pc, uregs.sp);
1223 /* Assertion broken before main() is reached in pthreaded programs; the
1224 * offending stack traces only have one item. --njn, 2002-aug-16 */
1225 /* vg_assert(fp_min <= fp_max);*/
1226 // On Darwin, this kicks in for pthread-related stack traces, so they're
1227 // only 1 entry long which is wrong.
1228 # if defined(VGO_linux)
1229 if (fp_min + 512 >= fp_max) {
1230 # elif defined(VGO_freebsd)
1231 if (fp_max == 0) {
1232 #endif
1233 # if defined(VGO_linux) || defined(VGO_freebsd)
1234 /* If the stack limits look bogus, don't poke around ... but
1235 don't bomb out either. */
1236 if (sps) sps[0] = uregs.sp;
1237 if (fps) fps[0] = uregs.x29;
1238 ips[0] = uregs.pc;
1239 return 1;
1241 #endif
1243 /* */
1245 if (sps) sps[0] = uregs.sp;
1246 if (fps) fps[0] = uregs.x29;
1247 ips[0] = uregs.pc;
1248 i = 1;
1250 /* Loop unwinding the stack, using CFI. */
1251 while (True) {
1252 if (debug) {
1253 VG_(printf)("i: %d, pc: 0x%lx, sp: 0x%lx\n",
1254 i, uregs.pc, uregs.sp);
1257 if (i >= max_n_ips)
1258 break;
1260 if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
1261 if (sps) sps[i] = uregs.sp;
1262 if (fps) fps[i] = uregs.x29;
1263 ips[i++] = uregs.pc - 1;
1264 if (debug)
1265 VG_(printf)("USING CFI: pc: 0x%lx, sp: 0x%lx\n",
1266 uregs.pc, uregs.sp);
1267 uregs.pc = uregs.pc - 1;
1268 RECURSIVE_MERGE(cmrf,ips,i);
1269 continue;
1272 /* No luck. We have to give up. */
1273 break;
1276 n_found = i;
1277 return n_found;
1280 #endif
1282 /* ------------------------ s390x ------------------------- */
1284 #if defined(VGP_s390x_linux)
1286 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
1287 /*OUT*/Addr* ips, UInt max_n_ips,
1288 /*OUT*/Addr* sps, /*OUT*/Addr* fps,
1289 const UnwindStartRegs* startRegs,
1290 Addr fp_max_orig )
1292 Bool debug = False;
1293 Int i;
1294 Addr fp_max;
1295 UInt n_found = 0;
1296 const Int cmrf = VG_(clo_merge_recursive_frames);
1298 vg_assert(sizeof(Addr) == sizeof(UWord));
1299 vg_assert(sizeof(Addr) == sizeof(void*));
1301 D3UnwindRegs uregs;
1302 uregs.ia = startRegs->r_pc;
1303 uregs.sp = startRegs->r_sp;
1304 Addr fp_min = uregs.sp - VG_STACK_REDZONE_SZB;
1305 uregs.fp = startRegs->misc.S390X.r_fp;
1306 uregs.lr = startRegs->misc.S390X.r_lr;
1307 uregs.f0 = startRegs->misc.S390X.r_f0;
1308 uregs.f1 = startRegs->misc.S390X.r_f1;
1309 uregs.f2 = startRegs->misc.S390X.r_f2;
1310 uregs.f3 = startRegs->misc.S390X.r_f3;
1311 uregs.f4 = startRegs->misc.S390X.r_f4;
1312 uregs.f5 = startRegs->misc.S390X.r_f5;
1313 uregs.f6 = startRegs->misc.S390X.r_f6;
1314 uregs.f7 = startRegs->misc.S390X.r_f7;
1316 fp_max = VG_PGROUNDUP(fp_max_orig);
1317 if (fp_max >= sizeof(Addr))
1318 fp_max -= sizeof(Addr);
1320 if (debug)
1321 VG_(printf)("max_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
1322 "fp_max=0x%lx IA=0x%lx SP=0x%lx FP=0x%lx\n",
1323 max_n_ips, fp_min, fp_max_orig, fp_max,
1324 uregs.ia, uregs.sp,uregs.fp);
1326 /* The first frame is pretty obvious */
1327 ips[0] = uregs.ia;
1328 if (sps) sps[0] = uregs.sp;
1329 if (fps) fps[0] = uregs.fp;
1330 i = 1;
1332 /* for everything else we have to rely on the eh_frame. gcc defaults to
1333 not create a backchain and all the other tools (like gdb) also have
1334 to use the CFI. */
1335 while (True) {
1336 if (i >= max_n_ips)
1337 break;
1339 if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
1340 if (sps) sps[i] = uregs.sp;
1341 if (fps) fps[i] = uregs.fp;
1342 ips[i++] = uregs.ia - 1;
1343 uregs.ia = uregs.ia - 1;
1344 RECURSIVE_MERGE(cmrf,ips,i);
1345 continue;
1347 /* A problem on the first frame? Lets assume it was a bad jump.
1348 We will use the link register and the current stack and frame
1349 pointers and see if we can use the CFI in the next round. */
1350 if (i == 1) {
1351 if (sps) {
1352 sps[i] = sps[0];
1353 uregs.sp = sps[0];
1355 if (fps) {
1356 fps[i] = fps[0];
1357 uregs.fp = fps[0];
1359 uregs.ia = uregs.lr - 1;
1360 ips[i++] = uregs.lr - 1;
1361 RECURSIVE_MERGE(cmrf,ips,i);
1362 continue;
1365 /* No luck. We have to give up. */
1366 break;
1369 n_found = i;
1370 return n_found;
1373 #endif
1375 /* ------------------------ mips 32/64 ------------------------- */
1376 #if defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
1377 || defined(VGP_nanomips_linux)
1378 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
1379 /*OUT*/Addr* ips, UInt max_n_ips,
1380 /*OUT*/Addr* sps, /*OUT*/Addr* fps,
1381 const UnwindStartRegs* startRegs,
1382 Addr fp_max_orig )
1384 Bool debug = False;
1385 Int i;
1386 Addr fp_max;
1387 UInt n_found = 0;
1388 const Int cmrf = VG_(clo_merge_recursive_frames);
1390 vg_assert(sizeof(Addr) == sizeof(UWord));
1391 vg_assert(sizeof(Addr) == sizeof(void*));
1393 D3UnwindRegs uregs;
1394 uregs.pc = startRegs->r_pc;
1395 uregs.sp = startRegs->r_sp;
1396 Addr fp_min = uregs.sp - VG_STACK_REDZONE_SZB;
1398 #if defined(VGP_mips32_linux) || defined(VGP_nanomips_linux)
1399 uregs.fp = startRegs->misc.MIPS32.r30;
1400 uregs.ra = startRegs->misc.MIPS32.r31;
1401 #elif defined(VGP_mips64_linux)
1402 uregs.fp = startRegs->misc.MIPS64.r30;
1403 uregs.ra = startRegs->misc.MIPS64.r31;
1404 #endif
1406 /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
1407 stopping when the trail goes cold, which we guess to be
1408 when FP is not a reasonable stack location. */
1410 fp_max = VG_PGROUNDUP(fp_max_orig);
1411 if (fp_max >= sizeof(Addr))
1412 fp_max -= sizeof(Addr);
1414 if (debug)
1415 VG_(printf)("max_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
1416 "fp_max=0x%lx pc=0x%lx sp=0x%lx fp=0x%lx\n",
1417 max_n_ips, fp_min, fp_max_orig, fp_max,
1418 uregs.pc, uregs.sp, uregs.fp);
1420 if (sps) sps[0] = uregs.sp;
1421 if (fps) fps[0] = uregs.fp;
1422 ips[0] = uregs.pc;
1423 i = 1;
1425 /* Loop unwinding the stack. */
1427 while (True) {
1428 if (debug) {
1429 VG_(printf)("i: %d, pc: 0x%lx, sp: 0x%lx, ra: 0x%lx\n",
1430 i, uregs.pc, uregs.sp, uregs.ra);
1432 if (i >= max_n_ips)
1433 break;
1435 D3UnwindRegs uregs_copy = uregs;
1436 if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
1437 if (debug)
1438 VG_(printf)("USING CFI: pc: 0x%lx, sp: 0x%lx, ra: 0x%lx\n",
1439 uregs.pc, uregs.sp, uregs.ra);
1440 if (0 != uregs.pc && 1 != uregs.pc) {
1441 if (sps) sps[i] = uregs.sp;
1442 if (fps) fps[i] = uregs.fp;
1443 ips[i++] = uregs.pc - 4;
1444 uregs.pc = uregs.pc - 4;
1445 RECURSIVE_MERGE(cmrf,ips,i);
1446 continue;
1447 } else
1448 uregs = uregs_copy;
1451 int seen_sp_adjust = 0;
1452 long frame_offset = 0;
1453 PtrdiffT offset;
1454 const DiEpoch cur_ep = VG_(current_DiEpoch)();
1455 if (VG_(get_inst_offset_in_function)(cur_ep, uregs.pc, &offset)) {
1456 Addr start_pc = uregs.pc - offset;
1457 Addr limit_pc = uregs.pc;
1458 Addr cur_pc;
1459 for (cur_pc = start_pc; cur_pc < limit_pc; cur_pc += 4) {
1460 unsigned long inst, high_word, low_word;
1461 unsigned long * cur_inst;
1462 /* Fetch the instruction. */
1463 cur_inst = (unsigned long *)cur_pc;
1464 inst = *((UInt *) cur_inst);
1465 if(debug)
1466 VG_(printf)("cur_pc: 0x%lx, inst: 0x%lx\n", cur_pc, inst);
1468 /* Save some code by pre-extracting some useful fields. */
1469 high_word = (inst >> 16) & 0xffff;
1470 low_word = inst & 0xffff;
1472 if (high_word == 0x27bd /* addiu $sp,$sp,-i */
1473 || high_word == 0x23bd /* addi $sp,$sp,-i */
1474 || high_word == 0x67bd) { /* daddiu $sp,$sp,-i */
1475 if (low_word & 0x8000) /* negative stack adjustment? */
1476 frame_offset += 0x10000 - low_word;
1477 else
1478 /* Exit loop if a positive stack adjustment is found, which
1479 usually means that the stack cleanup code in the function
1480 epilogue is reached. */
1481 break;
1482 seen_sp_adjust = 1;
1485 if(debug)
1486 VG_(printf)("offset: 0x%ld\n", frame_offset);
1488 if (seen_sp_adjust) {
1489 if (0 == uregs.pc || 1 == uregs.pc) break;
1490 if (uregs.pc == uregs.ra - 8) break;
1491 if (sps) {
1492 sps[i] = uregs.sp + frame_offset;
1494 uregs.sp = uregs.sp + frame_offset;
1496 if (fps) {
1497 fps[i] = fps[0];
1498 uregs.fp = fps[0];
1500 if (0 == uregs.ra || 1 == uregs.ra) break;
1501 uregs.pc = uregs.ra - 8;
1502 ips[i++] = uregs.ra - 8;
1503 RECURSIVE_MERGE(cmrf,ips,i);
1504 continue;
1507 if (i == 1) {
1508 if (sps) {
1509 sps[i] = sps[0];
1510 uregs.sp = sps[0];
1512 if (fps) {
1513 fps[i] = fps[0];
1514 uregs.fp = fps[0];
1516 if (0 == uregs.ra || 1 == uregs.ra) break;
1517 uregs.pc = uregs.ra - 8;
1518 ips[i++] = uregs.ra - 8;
1519 RECURSIVE_MERGE(cmrf,ips,i);
1520 continue;
1522 /* No luck. We have to give up. */
1523 break;
1526 n_found = i;
1527 return n_found;
1530 #endif
1532 /*------------------------------------------------------------*/
1533 /*--- ---*/
1534 /*--- END platform-dependent unwinder worker functions ---*/
1535 /*--- ---*/
1536 /*------------------------------------------------------------*/
1538 /*------------------------------------------------------------*/
1539 /*--- Exported functions. ---*/
1540 /*------------------------------------------------------------*/
1542 UInt VG_(get_StackTrace_with_deltas)(
1543 ThreadId tid,
1544 /*OUT*/StackTrace ips, UInt n_ips,
1545 /*OUT*/StackTrace sps,
1546 /*OUT*/StackTrace fps,
1547 Word first_ip_delta,
1548 Word first_sp_delta
1551 /* Get the register values with which to start the unwind. */
1552 UnwindStartRegs startRegs;
1553 VG_(memset)( &startRegs, 0, sizeof(startRegs) );
1554 VG_(get_UnwindStartRegs)( &startRegs, tid );
1556 Addr stack_highest_byte = VG_(threads)[tid].client_stack_highest_byte;
1557 Addr stack_lowest_byte = 0;
1559 # if defined(VGP_x86_linux)
1560 /* Nasty little hack to deal with syscalls - if libc is using its
1561 _dl_sysinfo_int80 function for syscalls (the TLS version does),
1562 then ip will always appear to be in that function when doing a
1563 syscall, not the actual libc function doing the syscall. This
1564 check sees if IP is within that function, and pops the return
1565 address off the stack so that ip is placed within the library
1566 function calling the syscall. This makes stack backtraces much
1567 more useful.
1569 The function is assumed to look like this (from glibc-2.3.6 sources):
1570 _dl_sysinfo_int80:
1571 int $0x80
1573 That is 3 (2+1) bytes long. We could be more thorough and check
1574 the 3 bytes of the function are as expected, but I can't be
1575 bothered.
1577 if (VG_(client__dl_sysinfo_int80) != 0 /* we know its address */
1578 && startRegs.r_pc >= VG_(client__dl_sysinfo_int80)
1579 && startRegs.r_pc < VG_(client__dl_sysinfo_int80)+3
1580 && VG_(am_is_valid_for_client)(startRegs.r_pc, sizeof(Addr),
1581 VKI_PROT_READ)) {
1582 startRegs.r_pc = (ULong) *(Addr*)(UWord)startRegs.r_sp;
1583 startRegs.r_sp += (ULong) sizeof(Addr);
1585 # endif
1587 /* See if we can get a better idea of the stack limits */
1588 VG_(stack_limits)( (Addr)startRegs.r_sp,
1589 &stack_lowest_byte, &stack_highest_byte );
1591 /* Take into account the first_ip_delta and first_sp_delta. */
1592 startRegs.r_pc += (Long)first_ip_delta;
1593 startRegs.r_sp += (Long)first_sp_delta;
1595 if (0)
1596 VG_(printf)("tid %u: stack_highest=0x%08lx ip=0x%010llx "
1597 "sp=0x%010llx\n",
1598 tid, stack_highest_byte,
1599 startRegs.r_pc, startRegs.r_sp);
1601 return VG_(get_StackTrace_wrk)(tid, ips, n_ips,
1602 sps, fps,
1603 &startRegs,
1604 stack_highest_byte);
1607 UInt VG_(get_StackTrace) ( ThreadId tid,
1608 /*OUT*/StackTrace ips, UInt max_n_ips,
1609 /*OUT*/StackTrace sps,
1610 /*OUT*/StackTrace fps,
1611 Word first_ip_delta )
1613 return VG_(get_StackTrace_with_deltas) (tid,
1614 ips, max_n_ips,
1615 sps,
1616 fps,
1617 first_ip_delta,
1618 0 /* first_sp_delta */
1622 static void printIpDesc(UInt n, DiEpoch ep, Addr ip, void* uu_opaque)
1624 InlIPCursor *iipc = VG_(new_IIPC)(ep, ip);
1626 do {
1627 const HChar *buf = VG_(describe_IP)(ep, ip, iipc);
1628 if (VG_(clo_xml)) {
1629 VG_(printf_xml)(" %s\n", buf);
1630 } else {
1631 VG_(message)(Vg_UserMsg, " %s %s\n",
1632 ( n == 0 ? "at" : "by" ), buf);
1634 n++;
1635 // Increase n to show "at" for only one level.
1636 } while (VG_(next_IIPC)(iipc));
1637 VG_(delete_IIPC)(iipc);
1640 /* Print a StackTrace. */
1641 void VG_(pp_StackTrace) ( DiEpoch ep, StackTrace ips, UInt n_ips )
1643 vg_assert( n_ips > 0 );
1645 if (VG_(clo_xml))
1646 VG_(printf_xml)(" <stack>\n");
1648 VG_(apply_StackTrace)( printIpDesc, NULL, ep, ips, n_ips );
1650 if (VG_(clo_xml))
1651 VG_(printf_xml)(" </stack>\n");
1654 /* Get and immediately print a StackTrace. */
1655 void VG_(get_and_pp_StackTrace) ( ThreadId tid, UInt max_n_ips )
1657 Addr ips[max_n_ips];
1658 UInt n_ips
1659 = VG_(get_StackTrace)(tid, ips, max_n_ips,
1660 NULL/*array to dump SP values in*/,
1661 NULL/*array to dump FP values in*/,
1662 0/*first_ip_delta*/);
1663 VG_(pp_StackTrace)(VG_(current_DiEpoch)(), ips, n_ips);
1666 void VG_(apply_StackTrace)(
1667 void(*action)(UInt n, DiEpoch ep, Addr ip, void* opaque),
1668 void* opaque,
1669 DiEpoch ep, StackTrace ips, UInt n_ips
1672 Int i;
1674 vg_assert(n_ips > 0);
1675 if ( ! VG_(clo_show_below_main) ) {
1676 // Search (from the outer frame onwards) the appearance of "main"
1677 // or the last appearance of a below main function.
1678 // Then decrease n_ips so as to not call action for the below main
1679 for (i = n_ips - 1; i >= 0; i--) {
1680 Vg_FnNameKind kind = VG_(get_fnname_kind_from_IP)(ep, ips[i]);
1681 if (Vg_FnNameMain == kind || Vg_FnNameBelowMain == kind)
1682 n_ips = i + 1;
1683 if (Vg_FnNameMain == kind)
1684 break;
1688 for (i = 0; i < n_ips; i++)
1689 // Act on the ip
1690 action(i, ep, ips[i], opaque);
1694 /*--------------------------------------------------------------------*/
1695 /*--- end ---*/
1696 /*--------------------------------------------------------------------*/