1 /*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\
3 |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 |* See https://llvm.org/LICENSE.txt for license information.
5 |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 \*===----------------------------------------------------------------------===*/
15 #include "InstrProfiling.h"
16 #include "InstrProfilingInternal.h"
17 #include "InstrProfilingUtil.h"
19 #define INSTR_PROF_VALUE_PROF_DATA
20 #define INSTR_PROF_COMMON_API_IMPL
21 #define INSTR_PROF_VALUE_PROF_MEMOP_API
22 #include "profile/InstrProfData.inc"
24 static int hasStaticCounters
= 1;
25 static int OutOfNodesWarnings
= 0;
26 static int hasNonDefaultValsPerSite
= 0;
27 #define INSTR_PROF_MAX_VP_WARNS 10
28 #define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 24
29 #define INSTR_PROF_VNODE_POOL_SIZE 1024
32 /* A shared static pool in addition to the vnodes statically
33 * allocated by the compiler. */
34 COMPILER_RT_VISIBILITY ValueProfNode
35 lprofValueProfNodes
[INSTR_PROF_VNODE_POOL_SIZE
] COMPILER_RT_SECTION(
36 COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME
);
39 COMPILER_RT_VISIBILITY
uint32_t VPMaxNumValsPerSite
=
40 INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE
;
42 COMPILER_RT_VISIBILITY
void lprofSetupValueProfiler(void) {
44 Str
= getenv("LLVM_VP_MAX_NUM_VALS_PER_SITE");
46 VPMaxNumValsPerSite
= atoi(Str
);
47 hasNonDefaultValsPerSite
= 1;
49 if (VPMaxNumValsPerSite
> INSTR_PROF_MAX_NUM_VAL_PER_SITE
)
50 VPMaxNumValsPerSite
= INSTR_PROF_MAX_NUM_VAL_PER_SITE
;
53 COMPILER_RT_VISIBILITY
void lprofSetMaxValsPerSite(uint32_t MaxVals
) {
54 VPMaxNumValsPerSite
= MaxVals
;
55 hasNonDefaultValsPerSite
= 1;
58 /* This method is only used in value profiler mock testing. */
59 COMPILER_RT_VISIBILITY
void
60 __llvm_profile_set_num_value_sites(__llvm_profile_data
*Data
,
61 uint32_t ValueKind
, uint16_t NumValueSites
) {
63 #pragma GCC diagnostic push
64 #pragma GCC diagnostic ignored "-Wcast-qual"
65 #elif defined(__clang__)
66 #pragma clang diagnostic push
67 #pragma clang diagnostic ignored "-Wcast-qual"
69 *((uint16_t *)&Data
->NumValueSites
[ValueKind
]) = NumValueSites
;
71 #pragma GCC diagnostic pop
72 #elif defined(__clang__)
73 #pragma clang diagnostic pop
77 /* This method is only used in value profiler mock testing. */
78 COMPILER_RT_VISIBILITY
const __llvm_profile_data
*
79 __llvm_profile_iterate_data(const __llvm_profile_data
*Data
) {
83 /* This method is only used in value profiler mock testing. */
84 COMPILER_RT_VISIBILITY
void *
85 __llvm_get_function_addr(const __llvm_profile_data
*Data
) {
86 return Data
->FunctionPointer
;
89 /* Allocate an array that holds the pointers to the linked lists of
90 * value profile counter nodes. The number of element of the array
91 * is the total number of value profile sites instrumented. Returns
92 * 0 if allocation fails.
95 static int allocateValueProfileCounters(__llvm_profile_data
*Data
) {
96 uint64_t NumVSites
= 0;
99 /* This function will never be called when value site array is allocated
100 statically at compile time. */
101 hasStaticCounters
= 0;
102 /* When dynamic allocation is enabled, allow tracking the max number of
104 if (!hasNonDefaultValsPerSite
)
105 VPMaxNumValsPerSite
= INSTR_PROF_MAX_NUM_VAL_PER_SITE
;
107 for (VKI
= IPVK_First
; VKI
<= IPVK_Last
; ++VKI
)
108 NumVSites
+= Data
->NumValueSites
[VKI
];
110 // If NumVSites = 0, calloc is allowed to return a non-null pointer.
111 assert(NumVSites
> 0 && "NumVSites can't be zero");
112 ValueProfNode
**Mem
=
113 (ValueProfNode
**)calloc(NumVSites
, sizeof(ValueProfNode
*));
116 if (!COMPILER_RT_BOOL_CMPXCHG(&Data
->Values
, 0, Mem
)) {
123 static ValueProfNode
*allocateOneNode(void) {
126 if (!hasStaticCounters
)
127 return (ValueProfNode
*)calloc(1, sizeof(ValueProfNode
));
129 /* Early check to avoid value wrapping around. */
130 if (CurrentVNode
+ 1 > EndVNode
) {
131 if (OutOfNodesWarnings
++ < INSTR_PROF_MAX_VP_WARNS
) {
132 PROF_WARN("Unable to track new values: %s. "
133 " Consider using option -mllvm -vp-counters-per-site=<n> to "
135 " value profile counters at compile time. \n",
136 "Running out of static counters");
140 Node
= COMPILER_RT_PTR_FETCH_ADD(ValueProfNode
, CurrentVNode
, 1);
141 /* Due to section padding, EndVNode point to a byte which is one pass
142 * an incomplete VNode, so we need to skip the last incomplete node. */
143 if (Node
+ 1 > EndVNode
)
149 static COMPILER_RT_ALWAYS_INLINE
void
150 instrumentTargetValueImpl(uint64_t TargetValue
, void *Data
,
151 uint32_t CounterIndex
, uint64_t CountValue
) {
152 __llvm_profile_data
*PData
= (__llvm_profile_data
*)Data
;
157 if (!PData
->Values
) {
158 if (!allocateValueProfileCounters(PData
))
162 ValueProfNode
**ValueCounters
= (ValueProfNode
**)PData
->Values
;
163 ValueProfNode
*PrevVNode
= NULL
;
164 ValueProfNode
*MinCountVNode
= NULL
;
165 ValueProfNode
*CurVNode
= ValueCounters
[CounterIndex
];
166 uint64_t MinCount
= UINT64_MAX
;
168 uint8_t VDataCount
= 0;
170 if (TargetValue
== CurVNode
->Value
) {
171 CurVNode
->Count
+= CountValue
;
174 if (CurVNode
->Count
< MinCount
) {
175 MinCount
= CurVNode
->Count
;
176 MinCountVNode
= CurVNode
;
178 PrevVNode
= CurVNode
;
179 CurVNode
= CurVNode
->Next
;
183 if (VDataCount
>= VPMaxNumValsPerSite
) {
184 /* Bump down the min count node's count. If it reaches 0,
185 * evict it. This eviction/replacement policy makes hot
186 * targets more sticky while cold targets less so. In other
187 * words, it makes it less likely for the hot targets to be
188 * prematurally evicted during warmup/establishment period,
189 * when their counts are still low. In a special case when
190 * the number of values tracked is reduced to only one, this
191 * policy will guarantee that the dominating target with >50%
192 * total count will survive in the end. Note that this scheme
193 * allows the runtime to track the min count node in an adaptive
194 * manner. It can correct previous mistakes and eventually
195 * lock on a cold target that is alread in stable state.
197 * In very rare cases, this replacement scheme may still lead
198 * to target loss. For instance, out of \c N value slots, \c N-1
199 * slots are occupied by luke warm targets during the warmup
200 * period and the remaining one slot is competed by two or more
201 * very hot targets. If those hot targets occur in an interleaved
202 * way, none of them will survive (gain enough weight to throw out
203 * other established entries) due to the ping-pong effect.
204 * To handle this situation, user can choose to increase the max
205 * number of tracked values per value site. Alternatively, a more
206 * expensive eviction mechanism can be implemented. It requires
207 * the runtime to track the total number of evictions per-site.
208 * When the total number of evictions reaches certain threshold,
209 * the runtime can wipe out more than one lowest count entries
210 * to give space for hot targets.
212 if (MinCountVNode
->Count
<= CountValue
) {
213 CurVNode
= MinCountVNode
;
214 CurVNode
->Value
= TargetValue
;
215 CurVNode
->Count
= CountValue
;
217 MinCountVNode
->Count
-= CountValue
;
222 CurVNode
= allocateOneNode();
225 CurVNode
->Value
= TargetValue
;
226 CurVNode
->Count
+= CountValue
;
228 uint32_t Success
= 0;
229 if (!ValueCounters
[CounterIndex
])
231 COMPILER_RT_BOOL_CMPXCHG(&ValueCounters
[CounterIndex
], 0, CurVNode
);
232 else if (PrevVNode
&& !PrevVNode
->Next
)
233 Success
= COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode
->Next
), 0, CurVNode
);
235 if (!Success
&& !hasStaticCounters
) {
241 COMPILER_RT_VISIBILITY
void
242 __llvm_profile_instrument_target(uint64_t TargetValue
, void *Data
,
243 uint32_t CounterIndex
) {
244 instrumentTargetValueImpl(TargetValue
, Data
, CounterIndex
, 1);
246 COMPILER_RT_VISIBILITY
void
247 __llvm_profile_instrument_target_value(uint64_t TargetValue
, void *Data
,
248 uint32_t CounterIndex
,
249 uint64_t CountValue
) {
250 instrumentTargetValueImpl(TargetValue
, Data
, CounterIndex
, CountValue
);
254 * The target values are partitioned into multiple ranges. The range spec is
255 * defined in InstrProfData.inc.
257 COMPILER_RT_VISIBILITY
void
258 __llvm_profile_instrument_memop(uint64_t TargetValue
, void *Data
,
259 uint32_t CounterIndex
) {
260 // Map the target value to the representative value of its range.
261 uint64_t RepValue
= InstrProfGetRangeRepValue(TargetValue
);
262 __llvm_profile_instrument_target(RepValue
, Data
, CounterIndex
);
266 * A wrapper struct that represents value profile runtime data.
267 * Like InstrProfRecord class which is used by profiling host tools,
268 * ValueProfRuntimeRecord also implements the abstract interfaces defined in
269 * ValueProfRecordClosure so that the runtime data can be serialized using
270 * shared C implementation.
272 typedef struct ValueProfRuntimeRecord
{
273 const __llvm_profile_data
*Data
;
274 ValueProfNode
**NodesKind
[IPVK_Last
+ 1];
275 uint8_t **SiteCountArray
;
276 } ValueProfRuntimeRecord
;
278 /* ValueProfRecordClosure Interface implementation. */
280 static uint32_t getNumValueSitesRT(const void *R
, uint32_t VK
) {
281 return ((const ValueProfRuntimeRecord
*)R
)->Data
->NumValueSites
[VK
];
284 static uint32_t getNumValueDataRT(const void *R
, uint32_t VK
) {
286 const ValueProfRuntimeRecord
*Record
= (const ValueProfRuntimeRecord
*)R
;
287 if (Record
->SiteCountArray
[VK
] == INSTR_PROF_NULLPTR
)
289 for (I
= 0; I
< Record
->Data
->NumValueSites
[VK
]; I
++)
290 S
+= Record
->SiteCountArray
[VK
][I
];
294 static uint32_t getNumValueDataForSiteRT(const void *R
, uint32_t VK
,
296 const ValueProfRuntimeRecord
*Record
= (const ValueProfRuntimeRecord
*)R
;
297 return Record
->SiteCountArray
[VK
][S
];
300 static ValueProfRuntimeRecord RTRecord
;
301 static ValueProfRecordClosure RTRecordClosure
= {
302 &RTRecord
, INSTR_PROF_NULLPTR
, /* GetNumValueKinds */
303 getNumValueSitesRT
, getNumValueDataRT
, getNumValueDataForSiteRT
,
304 INSTR_PROF_NULLPTR
, /* RemapValueData */
305 INSTR_PROF_NULLPTR
, /* GetValueForSite, */
306 INSTR_PROF_NULLPTR
/* AllocValueProfData */
310 initializeValueProfRuntimeRecord(const __llvm_profile_data
*Data
,
311 uint8_t *SiteCountArray
[]) {
312 unsigned I
, J
, S
= 0, NumValueKinds
= 0;
313 ValueProfNode
**Nodes
= (ValueProfNode
**)Data
->Values
;
314 RTRecord
.Data
= Data
;
315 RTRecord
.SiteCountArray
= SiteCountArray
;
316 for (I
= 0; I
<= IPVK_Last
; I
++) {
317 uint16_t N
= Data
->NumValueSites
[I
];
323 RTRecord
.NodesKind
[I
] = Nodes
? &Nodes
[S
] : INSTR_PROF_NULLPTR
;
324 for (J
= 0; J
< N
; J
++) {
325 /* Compute value count for each site. */
327 ValueProfNode
*Site
=
328 Nodes
? RTRecord
.NodesKind
[I
][J
] : INSTR_PROF_NULLPTR
;
335 RTRecord
.SiteCountArray
[I
][J
] = C
;
339 return NumValueKinds
;
342 static ValueProfNode
*getNextNValueData(uint32_t VK
, uint32_t Site
,
343 InstrProfValueData
*Dst
,
344 ValueProfNode
*StartNode
, uint32_t N
) {
346 ValueProfNode
*VNode
= StartNode
? StartNode
: RTRecord
.NodesKind
[VK
][Site
];
347 for (I
= 0; I
< N
; I
++) {
348 Dst
[I
].Value
= VNode
->Value
;
349 Dst
[I
].Count
= VNode
->Count
;
355 static uint32_t getValueProfDataSizeWrapper(void) {
356 return getValueProfDataSize(&RTRecordClosure
);
359 static uint32_t getNumValueDataForSiteWrapper(uint32_t VK
, uint32_t S
) {
360 return getNumValueDataForSiteRT(&RTRecord
, VK
, S
);
363 static VPDataReaderType TheVPDataReader
= {
364 initializeValueProfRuntimeRecord
, getValueProfRecordHeaderSize
,
365 getFirstValueProfRecord
, getNumValueDataForSiteWrapper
,
366 getValueProfDataSizeWrapper
, getNextNValueData
};
368 COMPILER_RT_VISIBILITY VPDataReaderType
*lprofGetVPDataReader(void) {
369 return &TheVPDataReader
;