Backed out changeset b71c8c052463 (bug 1943846) for causing mass failures. CLOSED...
[gecko.git] / mozglue / baseprofiler / lul / LulMain.cpp
bloba9bdaa52462539bd886c4eb69a70488fbe4f5735
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "LulMain.h"
9 #include <string.h>
10 #include <stdlib.h>
11 #include <stdio.h>
12 #include <unistd.h> // write(), only for testing LUL
14 #include <algorithm> // std::sort
15 #include <string>
16 #include <utility>
18 #include "mozilla/Assertions.h"
19 #include "mozilla/ArrayUtils.h"
20 #include "mozilla/CheckedInt.h"
21 #include "mozilla/DebugOnly.h"
22 #include "mozilla/MemoryChecking.h"
23 #include "mozilla/Sprintf.h"
24 #include "mozilla/UniquePtr.h"
25 #include "mozilla/Unused.h"
27 #include "BaseProfiler.h"
28 #include "LulCommonExt.h"
29 #include "LulElfExt.h"
30 #include "LulMainInt.h"
32 using mozilla::baseprofiler::profiler_current_process_id;
33 using mozilla::baseprofiler::profiler_current_thread_id;
35 // Set this to 1 for verbose logging
36 #define DEBUG_MAIN 0
38 namespace lul {
40 using mozilla::CheckedInt;
41 using mozilla::DebugOnly;
42 using mozilla::MallocSizeOf;
43 using mozilla::Unused;
44 using std::pair;
45 using std::string;
46 using std::vector;
48 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
50 // Some functions in this file are marked RUNS IN NO-MALLOC CONTEXT.
51 // Any such function -- and, hence, the transitive closure of those
52 // reachable from it -- must not do any dynamic memory allocation.
53 // Doing so risks deadlock. There is exactly one root function for
54 // the transitive closure: Lul::Unwind.
56 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
58 ////////////////////////////////////////////////////////////////
59 // RuleSet //
60 ////////////////////////////////////////////////////////////////
62 static const char* NameOf_DW_REG(int16_t aReg) {
63 switch (aReg) {
64 case DW_REG_CFA:
65 return "cfa";
66 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
67 case DW_REG_INTEL_XBP:
68 return "xbp";
69 case DW_REG_INTEL_XSP:
70 return "xsp";
71 case DW_REG_INTEL_XIP:
72 return "xip";
73 #elif defined(GP_ARCH_arm)
74 case DW_REG_ARM_R7:
75 return "r7";
76 case DW_REG_ARM_R11:
77 return "r11";
78 case DW_REG_ARM_R12:
79 return "r12";
80 case DW_REG_ARM_R13:
81 return "r13";
82 case DW_REG_ARM_R14:
83 return "r14";
84 case DW_REG_ARM_R15:
85 return "r15";
86 #elif defined(GP_ARCH_arm64)
87 case DW_REG_AARCH64_X29:
88 return "x29";
89 case DW_REG_AARCH64_X30:
90 return "x30";
91 case DW_REG_AARCH64_SP:
92 return "sp";
93 #elif defined(GP_ARCH_mips64)
94 case DW_REG_MIPS_SP:
95 return "sp";
96 case DW_REG_MIPS_FP:
97 return "fp";
98 case DW_REG_MIPS_PC:
99 return "pc";
100 #else
101 # error "Unsupported arch"
102 #endif
103 default:
104 return "???";
108 string LExpr::ShowRule(const char* aNewReg) const {
109 char buf[64];
110 string res = string(aNewReg) + "=";
111 switch (mHow) {
112 case UNKNOWN:
113 res += "Unknown";
114 break;
115 case NODEREF:
116 SprintfLiteral(buf, "%s+%d", NameOf_DW_REG(mReg), (int)mOffset);
117 res += buf;
118 break;
119 case DEREF:
120 SprintfLiteral(buf, "*(%s+%d)", NameOf_DW_REG(mReg), (int)mOffset);
121 res += buf;
122 break;
123 case PFXEXPR:
124 SprintfLiteral(buf, "PfxExpr-at-%d", (int)mOffset);
125 res += buf;
126 break;
127 default:
128 res += "???";
129 break;
131 return res;
134 void RuleSet::Print(void (*aLog)(const char*)) const {
135 char buf[96];
136 SprintfLiteral(buf, "[%llx .. %llx]: let ", (unsigned long long int)mAddr,
137 (unsigned long long int)(mAddr + mLen - 1));
138 string res = string(buf);
139 res += mCfaExpr.ShowRule("cfa");
140 res += " in";
141 // For each reg we care about, print the recovery expression.
142 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
143 res += mXipExpr.ShowRule(" RA");
144 res += mXspExpr.ShowRule(" SP");
145 res += mXbpExpr.ShowRule(" BP");
146 #elif defined(GP_ARCH_arm)
147 res += mR15expr.ShowRule(" R15");
148 res += mR7expr.ShowRule(" R7");
149 res += mR11expr.ShowRule(" R11");
150 res += mR12expr.ShowRule(" R12");
151 res += mR13expr.ShowRule(" R13");
152 res += mR14expr.ShowRule(" R14");
153 #elif defined(GP_ARCH_arm64)
154 res += mX29expr.ShowRule(" X29");
155 res += mX30expr.ShowRule(" X30");
156 res += mSPexpr.ShowRule(" SP");
157 #elif defined(GP_ARCH_mips64)
158 res += mPCexpr.ShowRule(" PC");
159 res += mSPexpr.ShowRule(" SP");
160 res += mFPexpr.ShowRule(" FP");
161 #else
162 # error "Unsupported arch"
163 #endif
164 aLog(res.c_str());
167 LExpr* RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) {
168 switch (aRegno) {
169 case DW_REG_CFA:
170 return &mCfaExpr;
171 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
172 case DW_REG_INTEL_XIP:
173 return &mXipExpr;
174 case DW_REG_INTEL_XSP:
175 return &mXspExpr;
176 case DW_REG_INTEL_XBP:
177 return &mXbpExpr;
178 #elif defined(GP_ARCH_arm)
179 case DW_REG_ARM_R15:
180 return &mR15expr;
181 case DW_REG_ARM_R14:
182 return &mR14expr;
183 case DW_REG_ARM_R13:
184 return &mR13expr;
185 case DW_REG_ARM_R12:
186 return &mR12expr;
187 case DW_REG_ARM_R11:
188 return &mR11expr;
189 case DW_REG_ARM_R7:
190 return &mR7expr;
191 #elif defined(GP_ARCH_arm64)
192 case DW_REG_AARCH64_X29:
193 return &mX29expr;
194 case DW_REG_AARCH64_X30:
195 return &mX30expr;
196 case DW_REG_AARCH64_SP:
197 return &mSPexpr;
198 #elif defined(GP_ARCH_mips64)
199 case DW_REG_MIPS_SP:
200 return &mSPexpr;
201 case DW_REG_MIPS_FP:
202 return &mFPexpr;
203 case DW_REG_MIPS_PC:
204 return &mPCexpr;
205 #else
206 # error "Unknown arch"
207 #endif
208 default:
209 return nullptr;
213 RuleSet::RuleSet() {
214 mAddr = 0;
215 mLen = 0;
216 // The only other fields are of type LExpr and those are initialised
217 // by LExpr::LExpr().
220 ////////////////////////////////////////////////////////////////
221 // SecMap //
222 ////////////////////////////////////////////////////////////////
224 // See header file LulMainInt.h for comments about invariants.
226 SecMap::SecMap(void (*aLog)(const char*))
227 : mSummaryMinAddr(1), mSummaryMaxAddr(0), mUsable(true), mLog(aLog) {}
229 SecMap::~SecMap() { mRuleSets.clear(); }
231 // RUNS IN NO-MALLOC CONTEXT
232 RuleSet* SecMap::FindRuleSet(uintptr_t ia) {
233 // Binary search mRuleSets to find one that brackets |ia|.
234 // lo and hi need to be signed, else the loop termination tests
235 // don't work properly. Note that this works correctly even when
236 // mRuleSets.size() == 0.
238 // Can't do this until the array has been sorted and preened.
239 MOZ_ASSERT(mUsable);
241 long int lo = 0;
242 long int hi = (long int)mRuleSets.size() - 1;
243 while (true) {
244 // current unsearched space is from lo to hi, inclusive.
245 if (lo > hi) {
246 // not found
247 return nullptr;
249 long int mid = lo + ((hi - lo) / 2);
250 RuleSet* mid_ruleSet = &mRuleSets[mid];
251 uintptr_t mid_minAddr = mid_ruleSet->mAddr;
252 uintptr_t mid_maxAddr = mid_minAddr + mid_ruleSet->mLen - 1;
253 if (ia < mid_minAddr) {
254 hi = mid - 1;
255 continue;
257 if (ia > mid_maxAddr) {
258 lo = mid + 1;
259 continue;
261 MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
262 return mid_ruleSet;
264 // NOTREACHED
267 // Add a RuleSet to the collection. The rule is copied in. Calling
268 // this makes the map non-searchable.
269 void SecMap::AddRuleSet(const RuleSet* rs) {
270 mUsable = false;
271 mRuleSets.push_back(*rs);
274 // Add a PfxInstr to the vector of such instrs, and return the index
275 // in the vector. Calling this makes the map non-searchable.
276 uint32_t SecMap::AddPfxInstr(PfxInstr pfxi) {
277 mUsable = false;
278 mPfxInstrs.push_back(pfxi);
279 return mPfxInstrs.size() - 1;
282 static bool CmpRuleSetsByAddrLE(const RuleSet& rs1, const RuleSet& rs2) {
283 return rs1.mAddr < rs2.mAddr;
286 // Prepare the map for searching. Completely remove any which don't
287 // fall inside the specified range [start, +len).
288 void SecMap::PrepareRuleSets(uintptr_t aStart, size_t aLen) {
289 if (mRuleSets.empty()) {
290 return;
293 MOZ_ASSERT(aLen > 0);
294 if (aLen == 0) {
295 // This should never happen.
296 mRuleSets.clear();
297 return;
300 // Sort by start addresses.
301 std::sort(mRuleSets.begin(), mRuleSets.end(), CmpRuleSetsByAddrLE);
303 // Detect any entry not completely contained within [start, +len).
304 // Set its length to zero, so that the next pass will remove it.
305 for (size_t i = 0; i < mRuleSets.size(); ++i) {
306 RuleSet* rs = &mRuleSets[i];
307 if (rs->mLen > 0 &&
308 (rs->mAddr < aStart || rs->mAddr + rs->mLen > aStart + aLen)) {
309 rs->mLen = 0;
313 // Iteratively truncate any overlaps and remove any zero length
314 // entries that might result, or that may have been present
315 // initially. Unless the input is seriously screwy, this is
316 // expected to iterate only once.
317 while (true) {
318 size_t i;
319 size_t n = mRuleSets.size();
320 size_t nZeroLen = 0;
322 if (n == 0) {
323 break;
326 for (i = 1; i < n; ++i) {
327 RuleSet* prev = &mRuleSets[i - 1];
328 RuleSet* here = &mRuleSets[i];
329 MOZ_ASSERT(prev->mAddr <= here->mAddr);
330 if (prev->mAddr + prev->mLen > here->mAddr) {
331 prev->mLen = here->mAddr - prev->mAddr;
333 if (prev->mLen == 0) nZeroLen++;
336 if (mRuleSets[n - 1].mLen == 0) {
337 nZeroLen++;
340 // At this point, the entries are in-order and non-overlapping.
341 // If none of them are zero-length, we are done.
342 if (nZeroLen == 0) {
343 break;
346 // Slide back the entries to remove the zero length ones.
347 size_t j = 0; // The write-point.
348 for (i = 0; i < n; ++i) {
349 if (mRuleSets[i].mLen == 0) {
350 continue;
352 if (j != i) mRuleSets[j] = mRuleSets[i];
353 ++j;
355 MOZ_ASSERT(i == n);
356 MOZ_ASSERT(nZeroLen <= n);
357 MOZ_ASSERT(j == n - nZeroLen);
358 while (nZeroLen > 0) {
359 mRuleSets.pop_back();
360 nZeroLen--;
363 MOZ_ASSERT(mRuleSets.size() == j);
366 size_t n = mRuleSets.size();
368 #ifdef DEBUG
369 // Do a final check on the rules: their address ranges must be
370 // ascending, non overlapping, non zero sized.
371 if (n > 0) {
372 MOZ_ASSERT(mRuleSets[0].mLen > 0);
373 for (size_t i = 1; i < n; ++i) {
374 RuleSet* prev = &mRuleSets[i - 1];
375 RuleSet* here = &mRuleSets[i];
376 MOZ_ASSERT(prev->mAddr < here->mAddr);
377 MOZ_ASSERT(here->mLen > 0);
378 MOZ_ASSERT(prev->mAddr + prev->mLen <= here->mAddr);
381 #endif
383 // Set the summary min and max address values.
384 if (n == 0) {
385 // Use the values defined in comments in the class declaration.
386 mSummaryMinAddr = 1;
387 mSummaryMaxAddr = 0;
388 } else {
389 mSummaryMinAddr = mRuleSets[0].mAddr;
390 mSummaryMaxAddr = mRuleSets[n - 1].mAddr + mRuleSets[n - 1].mLen - 1;
392 char buf[150];
393 SprintfLiteral(buf, "PrepareRuleSets: %d entries, smin/smax 0x%llx, 0x%llx\n",
394 (int)n, (unsigned long long int)mSummaryMinAddr,
395 (unsigned long long int)mSummaryMaxAddr);
396 buf[sizeof(buf) - 1] = 0;
397 mLog(buf);
399 // Is now usable for binary search.
400 mUsable = true;
402 #if 0
403 mLog("\nRulesets after preening\n");
404 for (size_t i = 0; i < mRuleSets.size(); ++i) {
405 mRuleSets[i].Print(mLog);
406 mLog("\n");
408 mLog("\n");
409 #endif
412 bool SecMap::IsEmpty() { return mRuleSets.empty(); }
414 size_t SecMap::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
415 size_t n = aMallocSizeOf(this);
417 // It's conceivable that these calls would be unsafe with some
418 // implementations of std::vector, but it seems to be working for now...
419 n += aMallocSizeOf(mRuleSets.data());
420 n += aMallocSizeOf(mPfxInstrs.data());
422 return n;
425 ////////////////////////////////////////////////////////////////
426 // SegArray //
427 ////////////////////////////////////////////////////////////////
429 // A SegArray holds a set of address ranges that together exactly
430 // cover an address range, with no overlaps or holes. Each range has
431 // an associated value, which in this case has been specialised to be
432 // a simple boolean. The representation is kept to minimal canonical
433 // form in which adjacent ranges with the same associated value are
434 // merged together. Each range is represented by a |struct Seg|.
436 // SegArrays are used to keep track of which parts of the address
437 // space are known to contain instructions.
438 class SegArray {
439 public:
440 void add(uintptr_t lo, uintptr_t hi, bool val) {
441 if (lo > hi) {
442 return;
444 split_at(lo);
445 if (hi < UINTPTR_MAX) {
446 split_at(hi + 1);
448 std::vector<Seg>::size_type iLo, iHi, i;
449 iLo = find(lo);
450 iHi = find(hi);
451 for (i = iLo; i <= iHi; ++i) {
452 mSegs[i].val = val;
454 preen();
457 // RUNS IN NO-MALLOC CONTEXT
458 bool getBoundingCodeSegment(/*OUT*/ uintptr_t* rx_min,
459 /*OUT*/ uintptr_t* rx_max, uintptr_t addr) {
460 std::vector<Seg>::size_type i = find(addr);
461 if (!mSegs[i].val) {
462 return false;
464 *rx_min = mSegs[i].lo;
465 *rx_max = mSegs[i].hi;
466 return true;
469 SegArray() {
470 Seg s(0, UINTPTR_MAX, false);
471 mSegs.push_back(s);
474 private:
475 struct Seg {
476 Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {}
477 uintptr_t lo;
478 uintptr_t hi;
479 bool val;
482 void preen() {
483 for (std::vector<Seg>::iterator iter = mSegs.begin();
484 iter < mSegs.end() - 1; ++iter) {
485 if (iter[0].val != iter[1].val) {
486 continue;
488 iter[0].hi = iter[1].hi;
489 mSegs.erase(iter + 1);
490 // Back up one, so as not to miss an opportunity to merge
491 // with the entry after this one.
492 --iter;
496 // RUNS IN NO-MALLOC CONTEXT
497 std::vector<Seg>::size_type find(uintptr_t a) {
498 long int lo = 0;
499 long int hi = (long int)mSegs.size();
500 while (true) {
501 // The unsearched space is lo .. hi inclusive.
502 if (lo > hi) {
503 // Not found. This can't happen.
504 return (std::vector<Seg>::size_type)(-1);
506 long int mid = lo + ((hi - lo) / 2);
507 uintptr_t mid_lo = mSegs[mid].lo;
508 uintptr_t mid_hi = mSegs[mid].hi;
509 if (a < mid_lo) {
510 hi = mid - 1;
511 continue;
513 if (a > mid_hi) {
514 lo = mid + 1;
515 continue;
517 return (std::vector<Seg>::size_type)mid;
521 void split_at(uintptr_t a) {
522 std::vector<Seg>::size_type i = find(a);
523 if (mSegs[i].lo == a) {
524 return;
526 mSegs.insert(mSegs.begin() + i + 1, mSegs[i]);
527 mSegs[i].hi = a - 1;
528 mSegs[i + 1].lo = a;
531 void show() {
532 printf("<< %d entries:\n", (int)mSegs.size());
533 for (std::vector<Seg>::iterator iter = mSegs.begin(); iter < mSegs.end();
534 ++iter) {
535 printf(" %016llx %016llx %s\n", (unsigned long long int)(*iter).lo,
536 (unsigned long long int)(*iter).hi,
537 (*iter).val ? "true" : "false");
539 printf(">>\n");
542 std::vector<Seg> mSegs;
545 ////////////////////////////////////////////////////////////////
546 // PriMap //
547 ////////////////////////////////////////////////////////////////
549 class PriMap {
550 public:
551 explicit PriMap(void (*aLog)(const char*)) : mLog(aLog) {}
553 // RUNS IN NO-MALLOC CONTEXT
554 pair<const RuleSet*, const vector<PfxInstr>*> Lookup(uintptr_t ia) {
555 SecMap* sm = FindSecMap(ia);
556 return pair<const RuleSet*, const vector<PfxInstr>*>(
557 sm ? sm->FindRuleSet(ia) : nullptr, sm ? sm->GetPfxInstrs() : nullptr);
560 // Add a secondary map. No overlaps allowed w.r.t. existing
561 // secondary maps.
562 void AddSecMap(mozilla::UniquePtr<SecMap>&& aSecMap) {
563 // We can't add an empty SecMap to the PriMap. But that's OK
564 // since we'd never be able to find anything in it anyway.
565 if (aSecMap->IsEmpty()) {
566 return;
569 // Iterate through the SecMaps and find the right place for this
570 // one. At the same time, ensure that the in-order
571 // non-overlapping invariant is preserved (and, generally, holds).
572 // FIXME: this gives a cost that is O(N^2) in the total number of
573 // shared objects in the system. ToDo: better.
574 MOZ_ASSERT(aSecMap->mSummaryMinAddr <= aSecMap->mSummaryMaxAddr);
576 size_t num_secMaps = mSecMaps.size();
577 uintptr_t i;
578 for (i = 0; i < num_secMaps; ++i) {
579 mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
580 MOZ_ASSERT(sm_i->mSummaryMinAddr <= sm_i->mSummaryMaxAddr);
581 if (aSecMap->mSummaryMinAddr < sm_i->mSummaryMaxAddr) {
582 // |aSecMap| needs to be inserted immediately before mSecMaps[i].
583 break;
586 MOZ_ASSERT(i <= num_secMaps);
587 if (i == num_secMaps) {
588 // It goes at the end.
589 mSecMaps.push_back(std::move(aSecMap));
590 } else {
591 std::vector<mozilla::UniquePtr<SecMap>>::iterator iter =
592 mSecMaps.begin() + i;
593 mSecMaps.insert(iter, std::move(aSecMap));
595 char buf[100];
596 SprintfLiteral(buf, "AddSecMap: now have %d SecMaps\n",
597 (int)mSecMaps.size());
598 buf[sizeof(buf) - 1] = 0;
599 mLog(buf);
602 // Remove and delete any SecMaps in the mapping, that intersect
603 // with the specified address range.
604 void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) {
605 MOZ_ASSERT(avma_min <= avma_max);
606 size_t num_secMaps = mSecMaps.size();
607 if (num_secMaps > 0) {
608 intptr_t i;
609 // Iterate from end to start over the vector, so as to ensure
610 // that the special case where |avma_min| and |avma_max| denote
611 // the entire address space, can be completed in time proportional
612 // to the number of elements in the map.
613 for (i = (intptr_t)num_secMaps - 1; i >= 0; i--) {
614 mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
615 if (sm_i->mSummaryMaxAddr < avma_min ||
616 avma_max < sm_i->mSummaryMinAddr) {
617 // There's no overlap. Move on.
618 continue;
620 // We need to remove mSecMaps[i] and slide all those above it
621 // downwards to cover the hole.
622 mSecMaps.erase(mSecMaps.begin() + i);
627 // Return the number of currently contained SecMaps.
628 size_t CountSecMaps() { return mSecMaps.size(); }
630 size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
631 size_t n = aMallocSizeOf(this);
633 // It's conceivable that this call would be unsafe with some
634 // implementations of std::vector, but it seems to be working for now...
635 n += aMallocSizeOf(mSecMaps.data());
637 for (size_t i = 0; i < mSecMaps.size(); i++) {
638 n += mSecMaps[i]->SizeOfIncludingThis(aMallocSizeOf);
641 return n;
644 private:
645 // RUNS IN NO-MALLOC CONTEXT
646 SecMap* FindSecMap(uintptr_t ia) {
647 // Binary search mSecMaps to find one that brackets |ia|.
648 // lo and hi need to be signed, else the loop termination tests
649 // don't work properly.
650 long int lo = 0;
651 long int hi = (long int)mSecMaps.size() - 1;
652 while (true) {
653 // current unsearched space is from lo to hi, inclusive.
654 if (lo > hi) {
655 // not found
656 return nullptr;
658 long int mid = lo + ((hi - lo) / 2);
659 mozilla::UniquePtr<SecMap>& mid_secMap = mSecMaps[mid];
660 uintptr_t mid_minAddr = mid_secMap->mSummaryMinAddr;
661 uintptr_t mid_maxAddr = mid_secMap->mSummaryMaxAddr;
662 if (ia < mid_minAddr) {
663 hi = mid - 1;
664 continue;
666 if (ia > mid_maxAddr) {
667 lo = mid + 1;
668 continue;
670 MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
671 return mid_secMap.get();
673 // NOTREACHED
676 private:
677 // sorted array of per-object ranges, non overlapping, non empty
678 std::vector<mozilla::UniquePtr<SecMap>> mSecMaps;
680 // a logging sink, for debugging.
681 void (*mLog)(const char*);
684 ////////////////////////////////////////////////////////////////
685 // LUL //
686 ////////////////////////////////////////////////////////////////
688 #define LUL_LOG(_str) \
689 do { \
690 char buf[200]; \
691 SprintfLiteral(buf, "LUL: pid %" PRIu64 " tid %" PRIu64 " lul-obj %p: %s", \
692 uint64_t(profiler_current_process_id().ToNumber()), \
693 uint64_t(profiler_current_thread_id().ToNumber()), this, \
694 (_str)); \
695 buf[sizeof(buf) - 1] = 0; \
696 mLog(buf); \
697 } while (0)
699 LUL::LUL(void (*aLog)(const char*))
700 : mLog(aLog),
701 mAdminMode(true),
702 mAdminThreadId(profiler_current_thread_id()),
703 mPriMap(new PriMap(aLog)),
704 mSegArray(new SegArray()),
705 mUSU(new UniqueStringUniverse()) {
706 LUL_LOG("LUL::LUL: Created object");
709 LUL::~LUL() {
710 LUL_LOG("LUL::~LUL: Destroyed object");
711 delete mPriMap;
712 delete mSegArray;
713 mLog = nullptr;
714 delete mUSU;
717 void LUL::MaybeShowStats() {
718 // This is racey in the sense that it can't guarantee that
719 // n_new == n_new_Context + n_new_CFI + n_new_Scanned
720 // if it should happen that mStats is updated by some other thread
721 // in between computation of n_new and n_new_{Context,CFI,FP}.
722 // But it's just stats printing, so we don't really care.
723 uint32_t n_new = mStats - mStatsPrevious;
724 if (n_new >= 5000) {
725 uint32_t n_new_Context = mStats.mContext - mStatsPrevious.mContext;
726 uint32_t n_new_CFI = mStats.mCFI - mStatsPrevious.mCFI;
727 uint32_t n_new_FP = mStats.mFP - mStatsPrevious.mFP;
728 mStatsPrevious = mStats;
729 char buf[200];
730 SprintfLiteral(buf,
731 "LUL frame stats: TOTAL %5u"
732 " CTX %4u CFI %4u FP %4u",
733 n_new, n_new_Context, n_new_CFI, n_new_FP);
734 buf[sizeof(buf) - 1] = 0;
735 mLog(buf);
739 size_t LUL::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
740 size_t n = aMallocSizeOf(this);
741 n += mPriMap->SizeOfIncludingThis(aMallocSizeOf);
743 // Measurement of the following members may be added later if DMD finds it
744 // is worthwhile:
745 // - mSegArray
746 // - mUSU
748 return n;
751 void LUL::EnableUnwinding() {
752 LUL_LOG("LUL::EnableUnwinding");
753 // Don't assert for Admin mode here. That is, tolerate a call here
754 // if we are already in Unwinding mode.
755 MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
757 mAdminMode = false;
760 void LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName,
761 const void* aMappedImage) {
762 MOZ_RELEASE_ASSERT(mAdminMode);
763 MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
765 mLog(":\n");
766 char buf[200];
767 SprintfLiteral(buf, "NotifyMap %llx %llu %s\n",
768 (unsigned long long int)aRXavma, (unsigned long long int)aSize,
769 aFileName);
770 buf[sizeof(buf) - 1] = 0;
771 mLog(buf);
773 // Ignore obviously-stupid notifications.
774 if (aSize > 0) {
775 // Here's a new mapping, for this object.
776 mozilla::UniquePtr<SecMap> smap = mozilla::MakeUnique<SecMap>(mLog);
778 // Read CFI or EXIDX unwind data into |smap|.
779 if (!aMappedImage) {
780 (void)lul::ReadSymbolData(string(aFileName), std::vector<string>(),
781 smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
782 } else {
783 (void)lul::ReadSymbolDataInternal(
784 (const uint8_t*)aMappedImage, string(aFileName),
785 std::vector<string>(), smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
788 mLog("NotifyMap .. preparing entries\n");
790 smap->PrepareRuleSets(aRXavma, aSize);
792 SprintfLiteral(buf, "NotifyMap got %lld entries\n",
793 (long long int)smap->Size());
794 buf[sizeof(buf) - 1] = 0;
795 mLog(buf);
797 // Add it to the primary map (the top level set of mapped objects).
798 mPriMap->AddSecMap(std::move(smap));
800 // Tell the segment array about the mapping, so that the stack
801 // scan and __kernel_syscall mechanisms know where valid code is.
802 mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
806 void LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize) {
807 MOZ_RELEASE_ASSERT(mAdminMode);
808 MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
810 mLog(":\n");
811 char buf[200];
812 SprintfLiteral(buf, "NotifyExecutableArea %llx %llu\n",
813 (unsigned long long int)aRXavma,
814 (unsigned long long int)aSize);
815 buf[sizeof(buf) - 1] = 0;
816 mLog(buf);
818 // Ignore obviously-stupid notifications.
819 if (aSize > 0) {
820 // Tell the segment array about the mapping, so that the stack
821 // scan and __kernel_syscall mechanisms know where valid code is.
822 mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
826 void LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax) {
827 MOZ_RELEASE_ASSERT(mAdminMode);
828 MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
830 mLog(":\n");
831 char buf[100];
832 SprintfLiteral(buf, "NotifyUnmap %016llx-%016llx\n",
833 (unsigned long long int)aRXavmaMin,
834 (unsigned long long int)aRXavmaMax);
835 buf[sizeof(buf) - 1] = 0;
836 mLog(buf);
838 MOZ_ASSERT(aRXavmaMin <= aRXavmaMax);
840 // Remove from the primary map, any secondary maps that intersect
841 // with the address range. Also delete the secondary maps.
842 mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax);
844 // Tell the segment array that the address range no longer
845 // contains valid code.
846 mSegArray->add(aRXavmaMin, aRXavmaMax, false);
848 SprintfLiteral(buf, "NotifyUnmap: now have %d SecMaps\n",
849 (int)mPriMap->CountSecMaps());
850 buf[sizeof(buf) - 1] = 0;
851 mLog(buf);
854 size_t LUL::CountMappings() {
855 MOZ_RELEASE_ASSERT(mAdminMode);
856 MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
858 return mPriMap->CountSecMaps();
861 // RUNS IN NO-MALLOC CONTEXT
862 static TaggedUWord DerefTUW(TaggedUWord aAddr, const StackImage* aStackImg) {
863 if (!aAddr.Valid()) {
864 return TaggedUWord();
867 // Lower limit check. |aAddr.Value()| is the lowest requested address
868 // and |aStackImg->mStartAvma| is the lowest address we actually have,
869 // so the comparison is straightforward.
870 if (aAddr.Value() < aStackImg->mStartAvma) {
871 return TaggedUWord();
874 // Upper limit check. We must compute the highest requested address
875 // and the highest address we actually have, but being careful to
876 // avoid overflow. In particular if |aAddr| is 0xFFF...FFF or the
877 // 3/7 values below that, then we will get overflow. See bug #1245477.
878 typedef CheckedInt<uintptr_t> CheckedUWord;
879 CheckedUWord highest_requested_plus_one =
880 CheckedUWord(aAddr.Value()) + CheckedUWord(sizeof(uintptr_t));
881 CheckedUWord highest_available_plus_one =
882 CheckedUWord(aStackImg->mStartAvma) + CheckedUWord(aStackImg->mLen);
883 if (!highest_requested_plus_one.isValid() // overflow?
884 || !highest_available_plus_one.isValid() // overflow?
885 || (highest_requested_plus_one.value() >
886 highest_available_plus_one.value())) { // in range?
887 return TaggedUWord();
890 return TaggedUWord(
891 *(uintptr_t*)(&aStackImg
892 ->mContents[aAddr.Value() - aStackImg->mStartAvma]));
895 // RUNS IN NO-MALLOC CONTEXT
896 static TaggedUWord EvaluateReg(int16_t aReg, const UnwindRegs* aOldRegs,
897 TaggedUWord aCFA) {
898 switch (aReg) {
899 case DW_REG_CFA:
900 return aCFA;
901 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
902 case DW_REG_INTEL_XBP:
903 return aOldRegs->xbp;
904 case DW_REG_INTEL_XSP:
905 return aOldRegs->xsp;
906 case DW_REG_INTEL_XIP:
907 return aOldRegs->xip;
908 #elif defined(GP_ARCH_arm)
909 case DW_REG_ARM_R7:
910 return aOldRegs->r7;
911 case DW_REG_ARM_R11:
912 return aOldRegs->r11;
913 case DW_REG_ARM_R12:
914 return aOldRegs->r12;
915 case DW_REG_ARM_R13:
916 return aOldRegs->r13;
917 case DW_REG_ARM_R14:
918 return aOldRegs->r14;
919 case DW_REG_ARM_R15:
920 return aOldRegs->r15;
921 #elif defined(GP_ARCH_arm64)
922 case DW_REG_AARCH64_X29:
923 return aOldRegs->x29;
924 case DW_REG_AARCH64_X30:
925 return aOldRegs->x30;
926 case DW_REG_AARCH64_SP:
927 return aOldRegs->sp;
928 #elif defined(GP_ARCH_mips64)
929 case DW_REG_MIPS_SP:
930 return aOldRegs->sp;
931 case DW_REG_MIPS_FP:
932 return aOldRegs->fp;
933 case DW_REG_MIPS_PC:
934 return aOldRegs->pc;
935 #else
936 # error "Unsupported arch"
937 #endif
938 default:
939 MOZ_ASSERT(0);
940 return TaggedUWord();
944 // RUNS IN NO-MALLOC CONTEXT
945 // See prototype for comment.
946 TaggedUWord EvaluatePfxExpr(int32_t start, const UnwindRegs* aOldRegs,
947 TaggedUWord aCFA, const StackImage* aStackImg,
948 const vector<PfxInstr>& aPfxInstrs) {
949 // A small evaluation stack, and a stack pointer, which points to
950 // the highest numbered in-use element.
951 const int N_STACK = 10;
952 TaggedUWord stack[N_STACK];
953 int stackPointer = -1;
954 for (int i = 0; i < N_STACK; i++) stack[i] = TaggedUWord();
956 #define PUSH(_tuw) \
957 do { \
958 if (stackPointer >= N_STACK - 1) goto fail; /* overflow */ \
959 stack[++stackPointer] = (_tuw); \
960 } while (0)
962 #define POP(_lval) \
963 do { \
964 if (stackPointer < 0) goto fail; /* underflow */ \
965 _lval = stack[stackPointer--]; \
966 } while (0)
968 // Cursor in the instruction sequence.
969 size_t curr = start + 1;
971 // Check the start point is sane.
972 size_t nInstrs = aPfxInstrs.size();
973 if (start < 0 || (size_t)start >= nInstrs) goto fail;
976 // The instruction sequence must start with PX_Start. If not,
977 // something is seriously wrong.
978 PfxInstr first = aPfxInstrs[start];
979 if (first.mOpcode != PX_Start) goto fail;
981 // Push the CFA on the stack to start with (or not), as required by
982 // the original DW_OP_*expression* CFI.
983 if (first.mOperand != 0) PUSH(aCFA);
986 while (true) {
987 if (curr >= nInstrs) goto fail; // ran off the end of the sequence
989 PfxInstr pfxi = aPfxInstrs[curr++];
990 if (pfxi.mOpcode == PX_End) break; // we're done
992 switch (pfxi.mOpcode) {
993 case PX_Start:
994 // This should appear only at the start of the sequence.
995 goto fail;
996 case PX_End:
997 // We just took care of that, so we shouldn't see it again.
998 MOZ_ASSERT(0);
999 goto fail;
1000 case PX_SImm32:
1001 PUSH(TaggedUWord((intptr_t)pfxi.mOperand));
1002 break;
1003 case PX_DwReg: {
1004 DW_REG_NUMBER reg = (DW_REG_NUMBER)pfxi.mOperand;
1005 MOZ_ASSERT(reg != DW_REG_CFA);
1006 PUSH(EvaluateReg(reg, aOldRegs, aCFA));
1007 break;
1009 case PX_Deref: {
1010 TaggedUWord addr;
1011 POP(addr);
1012 PUSH(DerefTUW(addr, aStackImg));
1013 break;
1015 case PX_Add: {
1016 TaggedUWord x, y;
1017 POP(x);
1018 POP(y);
1019 PUSH(y + x);
1020 break;
1022 case PX_Sub: {
1023 TaggedUWord x, y;
1024 POP(x);
1025 POP(y);
1026 PUSH(y - x);
1027 break;
1029 case PX_And: {
1030 TaggedUWord x, y;
1031 POP(x);
1032 POP(y);
1033 PUSH(y & x);
1034 break;
1036 case PX_Or: {
1037 TaggedUWord x, y;
1038 POP(x);
1039 POP(y);
1040 PUSH(y | x);
1041 break;
1043 case PX_CmpGES: {
1044 TaggedUWord x, y;
1045 POP(x);
1046 POP(y);
1047 PUSH(y.CmpGEs(x));
1048 break;
1050 case PX_Shl: {
1051 TaggedUWord x, y;
1052 POP(x);
1053 POP(y);
1054 PUSH(y << x);
1055 break;
1057 default:
1058 MOZ_ASSERT(0);
1059 goto fail;
1061 } // while (true)
1063 // Evaluation finished. The top value on the stack is the result.
1064 if (stackPointer >= 0) {
1065 return stack[stackPointer];
1067 // Else fall through
1069 fail:
1070 return TaggedUWord();
1072 #undef PUSH
1073 #undef POP
1076 // RUNS IN NO-MALLOC CONTEXT
1077 TaggedUWord LExpr::EvaluateExpr(const UnwindRegs* aOldRegs, TaggedUWord aCFA,
1078 const StackImage* aStackImg,
1079 const vector<PfxInstr>* aPfxInstrs) const {
1080 switch (mHow) {
1081 case UNKNOWN:
1082 return TaggedUWord();
1083 case NODEREF: {
1084 TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
1085 tuw = tuw + TaggedUWord((intptr_t)mOffset);
1086 return tuw;
1088 case DEREF: {
1089 TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
1090 tuw = tuw + TaggedUWord((intptr_t)mOffset);
1091 return DerefTUW(tuw, aStackImg);
1093 case PFXEXPR: {
1094 MOZ_ASSERT(aPfxInstrs);
1095 if (!aPfxInstrs) {
1096 return TaggedUWord();
1098 return EvaluatePfxExpr(mOffset, aOldRegs, aCFA, aStackImg, *aPfxInstrs);
1100 default:
1101 MOZ_ASSERT(0);
1102 return TaggedUWord();
1106 // RUNS IN NO-MALLOC CONTEXT
1107 static void UseRuleSet(/*MOD*/ UnwindRegs* aRegs, const StackImage* aStackImg,
1108 const RuleSet* aRS, const vector<PfxInstr>* aPfxInstrs) {
1109 // Take a copy of regs, since we'll need to refer to the old values
1110 // whilst computing the new ones.
1111 UnwindRegs old_regs = *aRegs;
1113 // Mark all the current register values as invalid, so that the
1114 // caller can see, on our return, which ones have been computed
1115 // anew. If we don't even manage to compute a new PC value, then
1116 // the caller will have to abandon the unwind.
1117 // FIXME: Create and use instead: aRegs->SetAllInvalid();
1118 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1119 aRegs->xbp = TaggedUWord();
1120 aRegs->xsp = TaggedUWord();
1121 aRegs->xip = TaggedUWord();
1122 #elif defined(GP_ARCH_arm)
1123 aRegs->r7 = TaggedUWord();
1124 aRegs->r11 = TaggedUWord();
1125 aRegs->r12 = TaggedUWord();
1126 aRegs->r13 = TaggedUWord();
1127 aRegs->r14 = TaggedUWord();
1128 aRegs->r15 = TaggedUWord();
1129 #elif defined(GP_ARCH_arm64)
1130 aRegs->x29 = TaggedUWord();
1131 aRegs->x30 = TaggedUWord();
1132 aRegs->sp = TaggedUWord();
1133 aRegs->pc = TaggedUWord();
1134 #elif defined(GP_ARCH_mips64)
1135 aRegs->sp = TaggedUWord();
1136 aRegs->fp = TaggedUWord();
1137 aRegs->pc = TaggedUWord();
1138 #else
1139 # error "Unsupported arch"
1140 #endif
1142 // This is generally useful.
1143 const TaggedUWord inval = TaggedUWord();
1145 // First, compute the CFA.
1146 TaggedUWord cfa = aRS->mCfaExpr.EvaluateExpr(&old_regs, inval /*old cfa*/,
1147 aStackImg, aPfxInstrs);
1149 // If we didn't manage to compute the CFA, well .. that's ungood,
1150 // but keep going anyway. It'll be OK provided none of the register
1151 // value rules mention the CFA. In any case, compute the new values
1152 // for each register that we're tracking.
1154 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1155 aRegs->xbp =
1156 aRS->mXbpExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1157 aRegs->xsp =
1158 aRS->mXspExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1159 aRegs->xip =
1160 aRS->mXipExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1161 #elif defined(GP_ARCH_arm)
1162 aRegs->r7 = aRS->mR7expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1163 aRegs->r11 =
1164 aRS->mR11expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1165 aRegs->r12 =
1166 aRS->mR12expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1167 aRegs->r13 =
1168 aRS->mR13expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1169 aRegs->r14 =
1170 aRS->mR14expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1171 aRegs->r15 =
1172 aRS->mR15expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1173 #elif defined(GP_ARCH_arm64)
1174 aRegs->x29 =
1175 aRS->mX29expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1176 aRegs->x30 =
1177 aRS->mX30expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1178 aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1179 #elif defined(GP_ARCH_mips64)
1180 aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1181 aRegs->fp = aRS->mFPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1182 aRegs->pc = aRS->mPCexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1183 #else
1184 # error "Unsupported arch"
1185 #endif
1187 // We're done. Any regs for which we didn't manage to compute a
1188 // new value will now be marked as invalid.
1191 // RUNS IN NO-MALLOC CONTEXT
1192 void LUL::Unwind(/*OUT*/ uintptr_t* aFramePCs,
1193 /*OUT*/ uintptr_t* aFrameSPs,
1194 /*OUT*/ size_t* aFramesUsed,
1195 /*OUT*/ size_t* aFramePointerFramesAcquired,
1196 size_t aFramesAvail, UnwindRegs* aStartRegs,
1197 StackImage* aStackImg) {
1198 MOZ_RELEASE_ASSERT(!mAdminMode);
1200 /////////////////////////////////////////////////////////
1201 // BEGIN UNWIND
1203 *aFramesUsed = 0;
1205 UnwindRegs regs = *aStartRegs;
1206 TaggedUWord last_valid_sp = TaggedUWord();
1208 while (true) {
1209 if (DEBUG_MAIN) {
1210 char buf[300];
1211 mLog("\n");
1212 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1213 SprintfLiteral(
1214 buf, "LoopTop: rip %d/%llx rsp %d/%llx rbp %d/%llx\n",
1215 (int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(),
1216 (int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(),
1217 (int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value());
1218 buf[sizeof(buf) - 1] = 0;
1219 mLog(buf);
1220 #elif defined(GP_ARCH_arm)
1221 SprintfLiteral(
1222 buf,
1223 "LoopTop: r15 %d/%llx r7 %d/%llx r11 %d/%llx"
1224 " r12 %d/%llx r13 %d/%llx r14 %d/%llx\n",
1225 (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(),
1226 (int)regs.r7.Valid(), (unsigned long long int)regs.r7.Value(),
1227 (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(),
1228 (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(),
1229 (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(),
1230 (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value());
1231 buf[sizeof(buf) - 1] = 0;
1232 mLog(buf);
1233 #elif defined(GP_ARCH_arm64)
1234 SprintfLiteral(
1235 buf,
1236 "LoopTop: pc %d/%llx x29 %d/%llx x30 %d/%llx"
1237 " sp %d/%llx\n",
1238 (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
1239 (int)regs.x29.Valid(), (unsigned long long int)regs.x29.Value(),
1240 (int)regs.x30.Valid(), (unsigned long long int)regs.x30.Value(),
1241 (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value());
1242 buf[sizeof(buf) - 1] = 0;
1243 mLog(buf);
1244 #elif defined(GP_ARCH_mips64)
1245 SprintfLiteral(
1246 buf, "LoopTop: pc %d/%llx sp %d/%llx fp %d/%llx\n",
1247 (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
1248 (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value(),
1249 (int)regs.fp.Valid(), (unsigned long long int)regs.fp.Value());
1250 buf[sizeof(buf) - 1] = 0;
1251 mLog(buf);
1252 #else
1253 # error "Unsupported arch"
1254 #endif
1257 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1258 TaggedUWord ia = regs.xip;
1259 TaggedUWord sp = regs.xsp;
1260 #elif defined(GP_ARCH_arm)
1261 TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14);
1262 TaggedUWord sp = regs.r13;
1263 #elif defined(GP_ARCH_arm64)
1264 TaggedUWord ia = (*aFramesUsed == 0 ? regs.pc : regs.x30);
1265 TaggedUWord sp = regs.sp;
1266 #elif defined(GP_ARCH_mips64)
1267 TaggedUWord ia = regs.pc;
1268 TaggedUWord sp = regs.sp;
1269 #else
1270 # error "Unsupported arch"
1271 #endif
1273 if (*aFramesUsed >= aFramesAvail) {
1274 break;
1277 // If we don't have a valid value for the PC, give up.
1278 if (!ia.Valid()) {
1279 break;
1282 // If this is the innermost frame, record the SP value, which
1283 // presumably is valid. If this isn't the innermost frame, and we
1284 // have a valid SP value, check that its SP value isn't less that
1285 // the one we've seen so far, so as to catch potential SP value
1286 // cycles.
1287 if (*aFramesUsed == 0) {
1288 last_valid_sp = sp;
1289 } else {
1290 MOZ_ASSERT(last_valid_sp.Valid());
1291 if (sp.Valid()) {
1292 if (sp.Value() < last_valid_sp.Value()) {
1293 // Hmm, SP going in the wrong direction. Let's stop.
1294 break;
1296 // Remember where we got to.
1297 last_valid_sp = sp;
1301 // For the innermost frame, the IA value is what we need. For all
1302 // other frames, it's actually the return address, so back up one
1303 // byte so as to get it into the calling instruction.
1304 aFramePCs[*aFramesUsed] = ia.Value() - (*aFramesUsed == 0 ? 0 : 1);
1305 aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0;
1306 (*aFramesUsed)++;
1308 // Find the RuleSet for the current IA, if any. This will also
1309 // query the backing (secondary) maps if it isn't found in the
1310 // thread-local cache.
1312 // If this isn't the innermost frame, back up into the calling insn.
1313 if (*aFramesUsed > 1) {
1314 ia = ia + TaggedUWord((uintptr_t)(-1));
1317 pair<const RuleSet*, const vector<PfxInstr>*> ruleset_and_pfxinstrs =
1318 mPriMap->Lookup(ia.Value());
1319 const RuleSet* ruleset = ruleset_and_pfxinstrs.first;
1320 const vector<PfxInstr>* pfxinstrs = ruleset_and_pfxinstrs.second;
1322 if (DEBUG_MAIN) {
1323 char buf[100];
1324 SprintfLiteral(buf, "ruleset for 0x%llx = %p\n",
1325 (unsigned long long int)ia.Value(), ruleset);
1326 buf[sizeof(buf) - 1] = 0;
1327 mLog(buf);
1330 #if defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)
1331 /////////////////////////////////////////////
1332 ////
1333 // On 32 bit x86-linux, syscalls are often done via the VDSO
1334 // function __kernel_vsyscall, which doesn't have a corresponding
1335 // object that we can read debuginfo from. That effectively kills
1336 // off all stack traces for threads blocked in syscalls. Hence
1337 // special-case by looking at the code surrounding the program
1338 // counter.
1340 // 0xf7757420 <__kernel_vsyscall+0>: push %ecx
1341 // 0xf7757421 <__kernel_vsyscall+1>: push %edx
1342 // 0xf7757422 <__kernel_vsyscall+2>: push %ebp
1343 // 0xf7757423 <__kernel_vsyscall+3>: mov %esp,%ebp
1344 // 0xf7757425 <__kernel_vsyscall+5>: sysenter
1345 // 0xf7757427 <__kernel_vsyscall+7>: nop
1346 // 0xf7757428 <__kernel_vsyscall+8>: nop
1347 // 0xf7757429 <__kernel_vsyscall+9>: nop
1348 // 0xf775742a <__kernel_vsyscall+10>: nop
1349 // 0xf775742b <__kernel_vsyscall+11>: nop
1350 // 0xf775742c <__kernel_vsyscall+12>: nop
1351 // 0xf775742d <__kernel_vsyscall+13>: nop
1352 // 0xf775742e <__kernel_vsyscall+14>: int $0x80
1353 // 0xf7757430 <__kernel_vsyscall+16>: pop %ebp
1354 // 0xf7757431 <__kernel_vsyscall+17>: pop %edx
1355 // 0xf7757432 <__kernel_vsyscall+18>: pop %ecx
1356 // 0xf7757433 <__kernel_vsyscall+19>: ret
1358 // In cases where the sampled thread is blocked in a syscall, its
1359 // program counter will point at "pop %ebp". Hence we look for
1360 // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and
1361 // the corresponding register-recovery actions are:
1362 // new_ebp = *(old_esp + 0)
1363 // new eip = *(old_esp + 12)
1364 // new_esp = old_esp + 16
1366 // It may also be the case that the program counter points two
1367 // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in
1368 // the case where the syscall has been restarted but the thread
1369 // hasn't been rescheduled. The code below doesn't handle that;
1370 // it could easily be made to.
1372 if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) {
1373 uintptr_t insns_min, insns_max;
1374 uintptr_t eip = ia.Value();
1375 bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip);
1376 if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) {
1377 uint8_t* eipC = (uint8_t*)eip;
1378 if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D &&
1379 eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) {
1380 TaggedUWord sp_plus_0 = sp;
1381 TaggedUWord sp_plus_12 = sp;
1382 TaggedUWord sp_plus_16 = sp;
1383 sp_plus_12 = sp_plus_12 + TaggedUWord(12);
1384 sp_plus_16 = sp_plus_16 + TaggedUWord(16);
1385 TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg);
1386 TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg);
1387 TaggedUWord new_esp = sp_plus_16;
1388 if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) {
1389 regs.xbp = new_ebp;
1390 regs.xip = new_eip;
1391 regs.xsp = new_esp;
1392 continue;
1397 ////
1398 /////////////////////////////////////////////
1399 #endif // defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)
1401 // So, do we have a ruleset for this address? If so, use it now.
1402 if (ruleset) {
1403 if (DEBUG_MAIN) {
1404 ruleset->Print(mLog);
1405 mLog("\n");
1407 // Use the RuleSet to compute the registers for the previous
1408 // frame. |regs| is modified in-place.
1409 UseRuleSet(&regs, aStackImg, ruleset, pfxinstrs);
1410 continue;
1413 #if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \
1414 defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \
1415 defined(GP_PLAT_amd64_freebsd)
1416 // There's no RuleSet for the specified address. On amd64/x86_linux, see if
1417 // it's possible to recover the caller's frame by using the frame pointer.
1419 // We seek to compute (new_IP, new_SP, new_BP) from (old_BP, stack image),
1420 // and assume the following layout:
1422 // <--- new_SP
1423 // +----------+
1424 // | new_IP | (return address)
1425 // +----------+
1426 // | new_BP | <--- old_BP
1427 // +----------+
1428 // | .... |
1429 // | .... |
1430 // | .... |
1431 // +----------+ <---- old_SP (arbitrary, but must be <= old_BP)
1433 const size_t wordSzB = sizeof(uintptr_t);
1434 TaggedUWord old_xsp = regs.xsp;
1436 // points at new_BP ?
1437 TaggedUWord old_xbp = regs.xbp;
1438 // points at new_IP ?
1439 TaggedUWord old_xbp_plus1 = regs.xbp + TaggedUWord(1 * wordSzB);
1440 // is the new_SP ?
1441 TaggedUWord old_xbp_plus2 = regs.xbp + TaggedUWord(2 * wordSzB);
1443 if (old_xbp.Valid() && old_xbp.IsAligned() && old_xsp.Valid() &&
1444 old_xsp.IsAligned() && old_xsp.Value() <= old_xbp.Value()) {
1445 // We don't need to do any range, alignment or validity checks for
1446 // addresses passed to DerefTUW, since that performs them itself, and
1447 // returns an invalid value on failure. Any such value will poison
1448 // subsequent uses, and we do a final check for validity before putting
1449 // the computed values into |regs|.
1450 TaggedUWord new_xbp = DerefTUW(old_xbp, aStackImg);
1451 if (new_xbp.Valid() && new_xbp.IsAligned() &&
1452 old_xbp.Value() < new_xbp.Value()) {
1453 TaggedUWord new_xip = DerefTUW(old_xbp_plus1, aStackImg);
1454 TaggedUWord new_xsp = old_xbp_plus2;
1455 if (new_xbp.Valid() && new_xip.Valid() && new_xsp.Valid()) {
1456 regs.xbp = new_xbp;
1457 regs.xip = new_xip;
1458 regs.xsp = new_xsp;
1459 (*aFramePointerFramesAcquired)++;
1460 continue;
1464 #elif defined(GP_ARCH_arm64)
1465 // Here is an example of generated code for prologue and epilogue..
1467 // stp x29, x30, [sp, #-16]!
1468 // mov x29, sp
1469 // ...
1470 // ldp x29, x30, [sp], #16
1471 // ret
1473 // Next is another example of generated code.
1475 // stp x20, x19, [sp, #-32]!
1476 // stp x29, x30, [sp, #16]
1477 // add x29, sp, #0x10
1478 // ...
1479 // ldp x29, x30, [sp, #16]
1480 // ldp x20, x19, [sp], #32
1481 // ret
1483 // Previous x29 and x30 register are stored in the address of x29 register.
1484 // But since sp register value depends on local variables, we cannot compute
1485 // previous sp register from current sp/fp/lr register and there is no
1486 // regular rule for sp register in prologue. But since return address is lr
1487 // register, if x29 is valid, we will get return address without sp
1488 // register.
1490 // So we assume the following layout that if no rule set. x29 is frame
1491 // pointer, so we will be able to compute x29 and x30 .
1493 // +----------+ <--- new_sp (cannot compute)
1494 // | .... |
1495 // +----------+
1496 // | new_lr | (return address)
1497 // +----------+
1498 // | new_fp | <--- old_fp
1499 // +----------+
1500 // | .... |
1501 // | .... |
1502 // +----------+ <---- old_sp (arbitrary, but unused)
1504 TaggedUWord old_fp = regs.x29;
1505 if (old_fp.Valid() && old_fp.IsAligned() && last_valid_sp.Valid() &&
1506 last_valid_sp.Value() <= old_fp.Value()) {
1507 TaggedUWord new_fp = DerefTUW(old_fp, aStackImg);
1508 if (new_fp.Valid() && new_fp.IsAligned() &&
1509 old_fp.Value() < new_fp.Value()) {
1510 TaggedUWord old_fp_plus1 = old_fp + TaggedUWord(8);
1511 TaggedUWord new_lr = DerefTUW(old_fp_plus1, aStackImg);
1512 if (new_lr.Valid()) {
1513 regs.x29 = new_fp;
1514 regs.x30 = new_lr;
1515 // When using frame pointer to walk stack, we cannot compute sp
1516 // register since we cannot compute sp register from fp/lr/sp
1517 // register, and there is no regular rule to compute previous sp
1518 // register. So mark as invalid.
1519 regs.sp = TaggedUWord();
1520 (*aFramePointerFramesAcquired)++;
1521 continue;
1525 #endif // defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) ||
1526 // defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android)
1528 // We failed to recover a frame either using CFI or FP chasing, and we
1529 // have no other ways to recover the frame. So we have to give up.
1530 break;
1532 } // top level unwind loop
1534 // END UNWIND
1535 /////////////////////////////////////////////////////////
1538 ////////////////////////////////////////////////////////////////
1539 // LUL Unit Testing //
1540 ////////////////////////////////////////////////////////////////
1542 static const int LUL_UNIT_TEST_STACK_SIZE = 32768;
1544 #if defined(GP_ARCH_mips64)
1545 static __attribute__((noinline)) unsigned long __getpc(void) {
1546 unsigned long rtaddr;
1547 __asm__ volatile("move %0, $31" : "=r"(rtaddr));
1548 return rtaddr;
1550 #endif
1552 // This function is innermost in the test call sequence. It uses LUL
1553 // to unwind, and compares the result with the sequence specified in
1554 // the director string. These need to agree in order for the test to
1555 // pass. In order not to screw up the results, this function needs
1556 // to have a not-very big stack frame, since we're only presenting
1557 // the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and
1558 // that chunk unavoidably includes the frame for this function.
1560 // This function must not be inlined into its callers. Doing so will
1561 // cause the expected-vs-actual backtrace consistency checking to
1562 // fail. Prints summary results to |aLUL|'s logging sink and also
1563 // returns a boolean indicating whether or not the test failed.
1564 static __attribute__((noinline)) bool GetAndCheckStackTrace(
1565 LUL* aLUL, const char* dstring) {
1566 // Get hold of the current unwind-start registers.
1567 UnwindRegs startRegs;
1568 memset(&startRegs, 0, sizeof(startRegs));
1569 #if defined(GP_ARCH_amd64)
1570 volatile uintptr_t block[3];
1571 MOZ_ASSERT(sizeof(block) == 24);
1572 __asm__ __volatile__(
1573 "leaq 0(%%rip), %%r15"
1574 "\n\t"
1575 "movq %%r15, 0(%0)"
1576 "\n\t"
1577 "movq %%rsp, 8(%0)"
1578 "\n\t"
1579 "movq %%rbp, 16(%0)"
1580 "\n"
1582 : "r"(&block[0])
1583 : "memory", "r15");
1584 startRegs.xip = TaggedUWord(block[0]);
1585 startRegs.xsp = TaggedUWord(block[1]);
1586 startRegs.xbp = TaggedUWord(block[2]);
1587 const uintptr_t REDZONE_SIZE = 128;
1588 uintptr_t start = block[1] - REDZONE_SIZE;
1589 #elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
1590 volatile uintptr_t block[3];
1591 MOZ_ASSERT(sizeof(block) == 12);
1592 __asm__ __volatile__(
1593 ".byte 0xE8,0x00,0x00,0x00,0x00" /*call next insn*/
1594 "\n\t"
1595 "popl %%edi"
1596 "\n\t"
1597 "movl %%edi, 0(%0)"
1598 "\n\t"
1599 "movl %%esp, 4(%0)"
1600 "\n\t"
1601 "movl %%ebp, 8(%0)"
1602 "\n"
1604 : "r"(&block[0])
1605 : "memory", "edi");
1606 startRegs.xip = TaggedUWord(block[0]);
1607 startRegs.xsp = TaggedUWord(block[1]);
1608 startRegs.xbp = TaggedUWord(block[2]);
1609 const uintptr_t REDZONE_SIZE = 0;
1610 uintptr_t start = block[1] - REDZONE_SIZE;
1611 #elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
1612 volatile uintptr_t block[6];
1613 MOZ_ASSERT(sizeof(block) == 24);
1614 __asm__ __volatile__(
1615 "mov r0, r15"
1616 "\n\t"
1617 "str r0, [%0, #0]"
1618 "\n\t"
1619 "str r14, [%0, #4]"
1620 "\n\t"
1621 "str r13, [%0, #8]"
1622 "\n\t"
1623 "str r12, [%0, #12]"
1624 "\n\t"
1625 "str r11, [%0, #16]"
1626 "\n\t"
1627 "str r7, [%0, #20]"
1628 "\n"
1630 : "r"(&block[0])
1631 : "memory", "r0");
1632 startRegs.r15 = TaggedUWord(block[0]);
1633 startRegs.r14 = TaggedUWord(block[1]);
1634 startRegs.r13 = TaggedUWord(block[2]);
1635 startRegs.r12 = TaggedUWord(block[3]);
1636 startRegs.r11 = TaggedUWord(block[4]);
1637 startRegs.r7 = TaggedUWord(block[5]);
1638 const uintptr_t REDZONE_SIZE = 0;
1639 uintptr_t start = block[1] - REDZONE_SIZE;
1640 #elif defined(GP_ARCH_arm64)
1641 volatile uintptr_t block[4];
1642 MOZ_ASSERT(sizeof(block) == 32);
1643 __asm__ __volatile__(
1644 "adr x0, . \n\t"
1645 "str x0, [%0, #0] \n\t"
1646 "str x29, [%0, #8] \n\t"
1647 "str x30, [%0, #16] \n\t"
1648 "mov x0, sp \n\t"
1649 "str x0, [%0, #24] \n\t"
1651 : "r"(&block[0])
1652 : "memory", "x0");
1653 startRegs.pc = TaggedUWord(block[0]);
1654 startRegs.x29 = TaggedUWord(block[1]);
1655 startRegs.x30 = TaggedUWord(block[2]);
1656 startRegs.sp = TaggedUWord(block[3]);
1657 const uintptr_t REDZONE_SIZE = 0;
1658 uintptr_t start = block[1] - REDZONE_SIZE;
1659 #elif defined(GP_ARCH_mips64)
1660 volatile uintptr_t block[3];
1661 MOZ_ASSERT(sizeof(block) == 24);
1662 __asm__ __volatile__(
1663 "sd $29, 8(%0) \n"
1664 "sd $30, 16(%0) \n"
1666 : "r"(block)
1667 : "memory");
1668 block[0] = __getpc();
1669 startRegs.pc = TaggedUWord(block[0]);
1670 startRegs.sp = TaggedUWord(block[1]);
1671 startRegs.fp = TaggedUWord(block[2]);
1672 const uintptr_t REDZONE_SIZE = 0;
1673 uintptr_t start = block[1] - REDZONE_SIZE;
1674 #else
1675 # error "Unsupported platform"
1676 #endif
1678 // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the
1679 // stack.
1680 uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE;
1681 uintptr_t ws = sizeof(void*);
1682 start &= ~(ws - 1);
1683 end &= ~(ws - 1);
1684 uintptr_t nToCopy = end - start;
1685 if (nToCopy > lul::N_STACK_BYTES) {
1686 nToCopy = lul::N_STACK_BYTES;
1688 MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
1689 StackImage* stackImg = new StackImage();
1690 stackImg->mLen = nToCopy;
1691 stackImg->mStartAvma = start;
1692 if (nToCopy > 0) {
1693 MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy);
1694 memcpy(&stackImg->mContents[0], (void*)start, nToCopy);
1697 // Unwind it.
1698 const int MAX_TEST_FRAMES = 64;
1699 uintptr_t framePCs[MAX_TEST_FRAMES];
1700 uintptr_t frameSPs[MAX_TEST_FRAMES];
1701 size_t framesAvail = std::size(framePCs);
1702 size_t framesUsed = 0;
1703 size_t framePointerFramesAcquired = 0;
1704 aLUL->Unwind(&framePCs[0], &frameSPs[0], &framesUsed,
1705 &framePointerFramesAcquired, framesAvail, &startRegs, stackImg);
1707 delete stackImg;
1709 // if (0) {
1710 // // Show what we have.
1711 // fprintf(stderr, "Got %d frames:\n", (int)framesUsed);
1712 // for (size_t i = 0; i < framesUsed; i++) {
1713 // fprintf(stderr, " [%2d] SP %p PC %p\n",
1714 // (int)i, (void*)frameSPs[i], (void*)framePCs[i]);
1715 // }
1716 // fprintf(stderr, "\n");
1719 // Check to see if there's a consistent binding between digits in
1720 // the director string ('1' .. '8') and the PC values acquired by
1721 // the unwind. If there isn't, the unwinding has failed somehow.
1722 uintptr_t binding[8]; // binding for '1' .. binding for '8'
1723 memset((void*)binding, 0, sizeof(binding));
1725 // The general plan is to work backwards along the director string
1726 // and forwards along the framePCs array. Doing so corresponds to
1727 // working outwards from the innermost frame of the recursive test set.
1728 const char* cursor = dstring;
1730 // Find the end. This leaves |cursor| two bytes past the first
1731 // character we want to look at -- see comment below.
1732 while (*cursor) cursor++;
1734 // Counts the number of consistent frames.
1735 size_t nConsistent = 0;
1737 // Iterate back to the start of the director string. The starting
1738 // points are a bit complex. We can't use framePCs[0] because that
1739 // contains the PC in this frame (above). We can't use framePCs[1]
1740 // because that will contain the PC at return point in the recursive
1741 // test group (TestFn[1-8]) for their call "out" to this function,
1742 // GetAndCheckStackTrace. Although LUL will compute a correct
1743 // return address, that will not be the same return address as for a
1744 // recursive call out of the the function to another function in the
1745 // group. Hence we can only start consistency checking at
1746 // framePCs[2].
1748 // To be consistent, then, we must ignore the last element in the
1749 // director string as that corresponds to framePCs[1]. Hence the
1750 // start points are: framePCs[2] and the director string 2 bytes
1751 // before the terminating zero.
1753 // Also as a result of this, the number of consistent frames counted
1754 // will always be one less than the length of the director string
1755 // (not including its terminating zero).
1756 size_t frameIx;
1757 for (cursor = cursor - 2, frameIx = 2;
1758 cursor >= dstring && frameIx < framesUsed; cursor--, frameIx++) {
1759 char c = *cursor;
1760 uintptr_t pc = framePCs[frameIx];
1761 // If this doesn't hold, the director string is ill-formed.
1762 MOZ_ASSERT(c >= '1' && c <= '8');
1763 int n = ((int)c) - ((int)'1');
1764 if (binding[n] == 0) {
1765 // There's no binding for |c| yet, so install |pc| and carry on.
1766 binding[n] = pc;
1767 nConsistent++;
1768 continue;
1770 // There's a pre-existing binding for |c|. Check it's consistent.
1771 if (binding[n] != pc) {
1772 // Not consistent. Give up now.
1773 break;
1775 // Consistent. Keep going.
1776 nConsistent++;
1779 // So, did we succeed?
1780 bool passed = nConsistent + 1 == strlen(dstring);
1782 // Show the results.
1783 char buf[200];
1784 SprintfLiteral(buf, "LULUnitTest: dstring = %s\n", dstring);
1785 buf[sizeof(buf) - 1] = 0;
1786 aLUL->mLog(buf);
1787 SprintfLiteral(buf, "LULUnitTest: %d consistent, %d in dstring: %s\n",
1788 (int)nConsistent, (int)strlen(dstring),
1789 passed ? "PASS" : "FAIL");
1790 buf[sizeof(buf) - 1] = 0;
1791 aLUL->mLog(buf);
1793 return !passed;
1796 // Macro magic to create a set of 8 mutually recursive functions with
1797 // varying frame sizes. These will recurse amongst themselves as
1798 // specified by |strP|, the directory string, and call
1799 // GetAndCheckStackTrace when the string becomes empty, passing it the
1800 // original value of the string. This checks the result, printing
1801 // results on |aLUL|'s logging sink, and also returns a boolean
1802 // indicating whether or not the results are acceptable (correct).
1804 #define DECL_TEST_FN(NAME) \
1805 bool NAME(LUL* aLUL, const char* strPorig, const char* strP);
1807 #define GEN_TEST_FN(NAME, FRAMESIZE) \
1808 bool NAME(LUL* aLUL, const char* strPorig, const char* strP) { \
1809 /* Create a frame of size (at least) FRAMESIZE, so that the */ \
1810 /* 8 functions created by this macro offer some variation in frame */ \
1811 /* sizes. This isn't as simple as it might seem, since a clever */ \
1812 /* optimizing compiler (eg, clang-5) detects that the array is unused */ \
1813 /* and removes it. We try to defeat this by passing it to a function */ \
1814 /* in a different compilation unit, and hoping that clang does not */ \
1815 /* notice that the call is a no-op. */ \
1816 char space[FRAMESIZE]; \
1817 Unused << write(1, space, 0); /* write zero bytes of |space| to stdout */ \
1819 if (*strP == '\0') { \
1820 /* We've come to the end of the director string. */ \
1821 /* Take a stack snapshot. */ \
1822 /* We purposefully use a negation to avoid tail-call optimization */ \
1823 return !GetAndCheckStackTrace(aLUL, strPorig); \
1824 } else { \
1825 /* Recurse onwards. This is a bit subtle. The obvious */ \
1826 /* thing to do here is call onwards directly, from within the */ \
1827 /* arms of the case statement. That gives a problem in that */ \
1828 /* there will be multiple return points inside each function when */ \
1829 /* unwinding, so it will be difficult to check for consistency */ \
1830 /* against the director string. Instead, we make an indirect */ \
1831 /* call, so as to guarantee that there is only one call site */ \
1832 /* within each function. This does assume that the compiler */ \
1833 /* won't transform it back to the simple direct-call form. */ \
1834 /* To discourage it from doing so, the call is bracketed with */ \
1835 /* __asm__ __volatile__ sections so as to make it not-movable. */ \
1836 bool (*nextFn)(LUL*, const char*, const char*) = NULL; \
1837 switch (*strP) { \
1838 case '1': \
1839 nextFn = TestFn1; \
1840 break; \
1841 case '2': \
1842 nextFn = TestFn2; \
1843 break; \
1844 case '3': \
1845 nextFn = TestFn3; \
1846 break; \
1847 case '4': \
1848 nextFn = TestFn4; \
1849 break; \
1850 case '5': \
1851 nextFn = TestFn5; \
1852 break; \
1853 case '6': \
1854 nextFn = TestFn6; \
1855 break; \
1856 case '7': \
1857 nextFn = TestFn7; \
1858 break; \
1859 case '8': \
1860 nextFn = TestFn8; \
1861 break; \
1862 default: \
1863 nextFn = TestFn8; \
1864 break; \
1866 /* "use" |space| immediately after the recursive call, */ \
1867 /* so as to dissuade clang from deallocating the space while */ \
1868 /* the call is active, or otherwise messing with the stack frame. */ \
1869 __asm__ __volatile__("" ::: "cc", "memory"); \
1870 bool passed = nextFn(aLUL, strPorig, strP + 1); \
1871 Unused << write(1, space, 0); \
1872 __asm__ __volatile__("" ::: "cc", "memory"); \
1873 return passed; \
1877 // The test functions are mutually recursive, so it is necessary to
1878 // declare them before defining them.
1879 DECL_TEST_FN(TestFn1)
1880 DECL_TEST_FN(TestFn2)
1881 DECL_TEST_FN(TestFn3)
1882 DECL_TEST_FN(TestFn4)
1883 DECL_TEST_FN(TestFn5)
1884 DECL_TEST_FN(TestFn6)
1885 DECL_TEST_FN(TestFn7)
1886 DECL_TEST_FN(TestFn8)
1888 GEN_TEST_FN(TestFn1, 123)
1889 GEN_TEST_FN(TestFn2, 456)
1890 GEN_TEST_FN(TestFn3, 789)
1891 GEN_TEST_FN(TestFn4, 23)
1892 GEN_TEST_FN(TestFn5, 47)
1893 GEN_TEST_FN(TestFn6, 117)
1894 GEN_TEST_FN(TestFn7, 1)
1895 GEN_TEST_FN(TestFn8, 99)
1897 // This starts the test sequence going. Call here to generate a
1898 // sequence of calls as directed by the string |dstring|. The call
1899 // sequence will, from its innermost frame, finish by calling
1900 // GetAndCheckStackTrace() and passing it |dstring|.
1901 // GetAndCheckStackTrace() will unwind the stack, check consistency
1902 // of those results against |dstring|, and print a pass/fail message
1903 // to aLUL's logging sink. It also updates the counters in *aNTests
1904 // and aNTestsPassed.
1905 __attribute__((noinline)) void TestUnw(/*OUT*/ int* aNTests,
1906 /*OUT*/ int* aNTestsPassed, LUL* aLUL,
1907 const char* dstring) {
1908 // Ensure that the stack has at least this much space on it. This
1909 // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes
1910 // and hand it to LUL. Safe in the sense that no segfault can
1911 // happen because the stack is at least this big. This is all
1912 // somewhat dubious in the sense that a sufficiently clever compiler
1913 // (clang, for one) can figure out that space[] is unused and delete
1914 // it from the frame. Hence the somewhat elaborate hoop jumping to
1915 // fill it up before the call and to at least appear to use the
1916 // value afterwards.
1917 int i;
1918 volatile char space[LUL_UNIT_TEST_STACK_SIZE];
1919 for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
1920 space[i] = (char)(i & 0x7F);
1923 // Really run the test.
1924 bool passed = TestFn1(aLUL, dstring, dstring);
1926 // Appear to use space[], by visiting the value to compute some kind
1927 // of checksum, and then (apparently) using the checksum.
1928 int sum = 0;
1929 for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
1930 // If this doesn't fool LLVM, I don't know what will.
1931 sum += space[i] - 3 * i;
1933 __asm__ __volatile__("" : : "r"(sum));
1935 // Update the counters.
1936 (*aNTests)++;
1937 if (passed) {
1938 (*aNTestsPassed)++;
1942 void RunLulUnitTests(/*OUT*/ int* aNTests, /*OUT*/ int* aNTestsPassed,
1943 LUL* aLUL) {
1944 aLUL->mLog(":\n");
1945 aLUL->mLog("LULUnitTest: BEGIN\n");
1946 *aNTests = *aNTestsPassed = 0;
1947 TestUnw(aNTests, aNTestsPassed, aLUL, "11111111");
1948 TestUnw(aNTests, aNTestsPassed, aLUL, "11222211");
1949 TestUnw(aNTests, aNTestsPassed, aLUL, "111222333");
1950 TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212");
1951 TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258");
1952 TestUnw(aNTests, aNTestsPassed, aLUL,
1953 "123456781122334455667788777777777777777777777");
1954 aLUL->mLog("LULUnitTest: END\n");
1955 aLUL->mLog(":\n");
1958 } // namespace lul