From e14827f0828d14ef17ab76316e8449d1b76e2617 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Wed, 20 Nov 2024 10:08:58 -0800 Subject: [PATCH] [MemProf] Templatize CallStackRadixTreeBuilder (NFC) (#117014) Prepare for usage in the bitcode reader/writer where we already have a LinearFrameId: - templatize input frame id type in CallStackRadixTreeBuilder - templatize input frame id type in computeFrameHistogram - make the map from FrameId to LinearFrameId optional We plan to use the same radix format in the ThinLTO summary records, where we already have a LinearFrameId. --- llvm/include/llvm/ProfileData/MemProf.h | 25 +++++++++-------- llvm/lib/ProfileData/InstrProfWriter.cpp | 2 +- llvm/lib/ProfileData/MemProf.cpp | 44 ++++++++++++++++++++---------- llvm/unittests/ProfileData/MemProfTest.cpp | 16 +++++------ 4 files changed, 52 insertions(+), 35 deletions(-) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 9415e554bcc0..f97fbd4bd644 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -1050,8 +1050,9 @@ struct FrameStat { }; // Compute a histogram of Frames in call stacks. -llvm::DenseMap -computeFrameHistogram(llvm::MapVector> +template +llvm::DenseMap +computeFrameHistogram(llvm::MapVector> &MemProfCallStackData); // Construct a radix tree of call stacks. @@ -1109,7 +1110,7 @@ computeFrameHistogram(llvm::MapVector> // On-disk IndexedMemProfRecord will refer to call stacks by their indexes into // the radix tree array, so we do not explicitly encode mappings like: // "CallStackId 1 -> 11". -class CallStackRadixTreeBuilder { +template class CallStackRadixTreeBuilder { // The radix tree array. std::vector RadixArray; @@ -1136,23 +1137,25 @@ class CallStackRadixTreeBuilder { // RadixArray[Indexes[5 - 1]] is the last frame of the common prefix. std::vector Indexes; - using CSIdPair = std::pair>; + using CSIdPair = std::pair>; // Encode a call stack into RadixArray. Return the starting index within // RadixArray. - LinearCallStackId encodeCallStack( - const llvm::SmallVector *CallStack, - const llvm::SmallVector *Prev, - const llvm::DenseMap &MemProfFrameIndexes); + LinearCallStackId + encodeCallStack(const llvm::SmallVector *CallStack, + const llvm::SmallVector *Prev, + std::optional> + MemProfFrameIndexes); public: CallStackRadixTreeBuilder() = default; // Build a radix tree array. - void build(llvm::MapVector> + void build(llvm::MapVector> &&MemProfCallStackData, - const llvm::DenseMap &MemProfFrameIndexes, - llvm::DenseMap &FrameHistogram); + std::optional> + MemProfFrameIndexes, + llvm::DenseMap &FrameHistogram); ArrayRef getRadixArray() const { return RadixArray; } diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index 5580fe04ffe7..d90629ad57f5 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -635,7 +635,7 @@ writeMemProfCallStackArray( llvm::DenseMap MemProfCallStackIndexes; - memprof::CallStackRadixTreeBuilder Builder; + memprof::CallStackRadixTreeBuilder Builder; Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes, FrameHistogram); for (auto I : Builder.getRadixArray()) diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp index 6c4bda2d9264..9d5ac748d797 100644 --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -436,10 +436,12 @@ CallStackId hashCallStack(ArrayRef CS) { // To quickly determine the location of the common prefix within RadixArray, // Indexes caches the indexes of the previous call stack's frames within // RadixArray. -LinearCallStackId CallStackRadixTreeBuilder::encodeCallStack( - const llvm::SmallVector *CallStack, - const llvm::SmallVector *Prev, - const llvm::DenseMap &MemProfFrameIndexes) { +template +LinearCallStackId CallStackRadixTreeBuilder::encodeCallStack( + const llvm::SmallVector *CallStack, + const llvm::SmallVector *Prev, + std::optional> + MemProfFrameIndexes) { // Compute the length of the common root prefix between Prev and CallStack. uint32_t CommonLen = 0; if (Prev) { @@ -464,10 +466,11 @@ LinearCallStackId CallStackRadixTreeBuilder::encodeCallStack( // Copy the part of the call stack beyond the common prefix to RadixArray. assert(CommonLen <= CallStack->size()); - for (FrameId F : llvm::drop_begin(llvm::reverse(*CallStack), CommonLen)) { + for (FrameIdTy F : llvm::drop_begin(llvm::reverse(*CallStack), CommonLen)) { // Remember the index of F in RadixArray. Indexes.push_back(RadixArray.size()); - RadixArray.push_back(MemProfFrameIndexes.find(F)->second); + RadixArray.push_back( + MemProfFrameIndexes ? MemProfFrameIndexes->find(F)->second : F); } assert(CallStack->size() == Indexes.size()); @@ -479,11 +482,13 @@ LinearCallStackId CallStackRadixTreeBuilder::encodeCallStack( return RadixArray.size() - 1; } -void CallStackRadixTreeBuilder::build( - llvm::MapVector> +template +void CallStackRadixTreeBuilder::build( + llvm::MapVector> &&MemProfCallStackData, - const llvm::DenseMap &MemProfFrameIndexes, - llvm::DenseMap &FrameHistogram) { + std::optional> + MemProfFrameIndexes, + llvm::DenseMap &FrameHistogram) { // Take the vector portion of MemProfCallStackData. The vector is exactly // what we need to sort. Also, we no longer need its lookup capability. llvm::SmallVector CallStacks = MemProfCallStackData.takeVector(); @@ -535,7 +540,7 @@ void CallStackRadixTreeBuilder::build( // root. return std::lexicographical_compare( L.second.rbegin(), L.second.rend(), R.second.rbegin(), R.second.rend(), - [&](FrameId F1, FrameId F2) { + [&](FrameIdTy F1, FrameIdTy F2) { uint64_t H1 = FrameHistogram[F1].Count; uint64_t H2 = FrameHistogram[F2].Count; // Popular frames should come later because we encode call stacks from @@ -585,7 +590,7 @@ void CallStackRadixTreeBuilder::build( // traverse CallStacks in the reverse order, then Call Stack 3 has the // complete call stack encoded without any pointers. Call Stack 1 and 2 point // to appropriate prefixes of Call Stack 3. - const llvm::SmallVector *Prev = nullptr; + const llvm::SmallVector *Prev = nullptr; for (const auto &[CSId, CallStack] : llvm::reverse(CallStacks)) { LinearCallStackId Pos = encodeCallStack(&CallStack, Prev, MemProfFrameIndexes); @@ -608,10 +613,14 @@ void CallStackRadixTreeBuilder::build( V = RadixArray.size() - 1 - V; } -llvm::DenseMap -computeFrameHistogram(llvm::MapVector> +// Explicitly instantiate class with the utilized FrameIdTy. +template class CallStackRadixTreeBuilder; + +template +llvm::DenseMap +computeFrameHistogram(llvm::MapVector> &MemProfCallStackData) { - llvm::DenseMap Histogram; + llvm::DenseMap Histogram; for (const auto &KV : MemProfCallStackData) { const auto &CS = KV.second; @@ -624,6 +633,11 @@ computeFrameHistogram(llvm::MapVector> return Histogram; } +// Explicitly instantiate function with the utilized FrameIdTy. +template llvm::DenseMap computeFrameHistogram( + llvm::MapVector> + &MemProfCallStackData); + void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record) { for (const auto &AS : Record.AllocSites) { assert(AS.CSId == hashCallStack(AS.CallStack)); diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp index e5d41db06dcc..e68cc0943387 100644 --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -659,8 +659,8 @@ TEST(MemProf, RadixTreeBuilderEmpty) { llvm::MapVector> MemProfCallStackData; llvm::DenseMap FrameHistogram = - llvm::memprof::computeFrameHistogram(MemProfCallStackData); - llvm::memprof::CallStackRadixTreeBuilder Builder; + llvm::memprof::computeFrameHistogram(MemProfCallStackData); + llvm::memprof::CallStackRadixTreeBuilder Builder; Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes, FrameHistogram); ASSERT_THAT(Builder.getRadixArray(), testing::IsEmpty()); @@ -677,8 +677,8 @@ TEST(MemProf, RadixTreeBuilderOne) { MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS1), CS1}); llvm::DenseMap FrameHistogram = - llvm::memprof::computeFrameHistogram(MemProfCallStackData); - llvm::memprof::CallStackRadixTreeBuilder Builder; + llvm::memprof::computeFrameHistogram(MemProfCallStackData); + llvm::memprof::CallStackRadixTreeBuilder Builder; Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes, FrameHistogram); EXPECT_THAT(Builder.getRadixArray(), testing::ElementsAreArray({ @@ -704,8 +704,8 @@ TEST(MemProf, RadixTreeBuilderTwo) { MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS2), CS2}); llvm::DenseMap FrameHistogram = - llvm::memprof::computeFrameHistogram(MemProfCallStackData); - llvm::memprof::CallStackRadixTreeBuilder Builder; + llvm::memprof::computeFrameHistogram(MemProfCallStackData); + llvm::memprof::CallStackRadixTreeBuilder Builder; Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes, FrameHistogram); EXPECT_THAT(Builder.getRadixArray(), @@ -742,8 +742,8 @@ TEST(MemProf, RadixTreeBuilderSuccessiveJumps) { MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS4), CS4}); llvm::DenseMap FrameHistogram = - llvm::memprof::computeFrameHistogram(MemProfCallStackData); - llvm::memprof::CallStackRadixTreeBuilder Builder; + llvm::memprof::computeFrameHistogram(MemProfCallStackData); + llvm::memprof::CallStackRadixTreeBuilder Builder; Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes, FrameHistogram); EXPECT_THAT(Builder.getRadixArray(), -- 2.11.4.GIT