1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 2019
4 * Author: Daniel Gröber
6 * Generalised profiling heap traversal.
8 * ---------------------------------------------------------------------------*/
12 #if defined(PROFILING)
14 #include <rts/Types.h>
15 #include "RetainerSet.h"
17 #include "BeginPrivate.h"
20 // Object with fixed layout. Keeps an information about that
21 // element was processed. (stackPos.next.step)
23 // Description of the pointers-first heap object. Keeps information
24 // about layout. (stackPos.next.ptrs)
26 // Keeps SRT bitmap (stackPos.next.srt)
28 // Keeps a new object that was not inspected yet. Keeps a parent
29 // element (stackPos.next.parent)
31 // This stackElement is empty
36 // fixed layout or layout specified by a field in the closure
41 // See StgClosureInfo in InfoTables.h
52 // parent of the current closure, used only when posTypeFresh is set
57 * Position pointer into a closure. Determines what the next element to return
58 * for a stackElement is.
60 typedef struct stackPos_
{
65 typedef union stackData_
{
67 * Most recent retainer for the corresponding closure on the stack.
72 extern const stackData nullStackData
;
74 typedef union stackAccum_
{
75 StgWord subtree_sizeW
;
79 * An element of the traversal work-stack. Besides the closure itself this also
80 * stores it's parent, associated data and an accumulator.
82 * When 'info.type == posTypeFresh' a 'stackElement' represents just one
83 * closure, namely 'c' and 'cp' being it's parent. Otherwise 'info' specifies an
84 * offset into the children of 'c'. This is to support returning a closure's
85 * children one-by-one without pushing one element per child onto the stack. See
86 * traverseGetChildren() and traversePop().
89 typedef struct stackElement_
{
92 struct stackElement_
*sep
; // stackElement of parent closure
97 typedef struct traverseState_
{
98 /** Note [Profiling heap traversal visited bit]
99 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
100 * If the RTS is compiled with profiling enabled StgProfHeader can be used
101 * by profiling code to store per-heap object information. Specifically the
102 * 'hp_hdr' field is used to store heap profiling information.
104 * The generic heap traversal code reserves the least significant bit of the
105 * heap profiling word to decide whether we've already visited a given
106 * closure in the current pass or not. The rest of the field is free to be
107 * used by the calling profiler.
109 * By doing things this way we implicitly assume that the LSB is not used by
110 * the user. This is true at least for the word aligned pointers which the
111 * retainer profiler currently stores there and should be maintained by new
112 * users for example by shifting the real data up by one bit.
114 * Since we don't want to have to scan the entire heap a second time just to
115 * reset the per-object visited bit before/after the real traversal we make
116 * the interpretation of this bit dependent on the value of a global
117 * variable, 'flip' and "flip" this variable when we want to invalidate all
120 * When the visited bit is equal to the value of 'flip' the closure data is
121 * valid otherwise not (see isTravDataValid). Both the value of the closure
122 * and global 'flip' value start out as zero, so all closures are considered
123 * valid. Before every traversal we invert the value of 'flip' (see
124 * traverseInvalidateClosureData) invalidating all closures.
126 * There are some complications with this approach, namely: static objects
127 * and mutable data. There we do just go over all existing objects to reset
128 * the bit manually. See 'resetStaticObjectForProfiling' and
129 * 'resetMutableObjects'.
136 * firstStack points to the first block group.
138 * currentStack points to the block group currently being used.
140 * currentStack->free == stackLimit.
142 * stackTop points to the topmost byte in the stack of currentStack.
144 * Unless the whole stack is empty, stackTop must point to the topmost
145 * object (or byte) in the whole stack. Thus, it is only when the whole
146 * stack is empty that stackTop == stackLimit (not during the execution
147 * of pushStackElement() and popStackElement()).
149 * stackBottom == currentStack->start.
152 * == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
156 * When a current stack becomes empty, stackTop is set to point to
157 * the topmost element on the previous block group so as to satisfy
158 * the invariants described above.
161 bdescr
*currentStack
;
162 stackElement
*stackBottom
, *stackTop
, *stackLimit
;
165 * stackSize: records the current size of the stack.
166 * maxStackSize: records its high water mark.
170 * stackSize <= maxStackSize
174 * When return_cb == NULL stackSize is just an estimate measure of the
175 * depth of the graph. The reason is that some heap objects have only a
176 * single child and may not result in a new element being pushed onto the
177 * stack. Therefore, at the end of retainer profiling, maxStackSize is
178 * some value no greater than the actual depth of the graph.
180 int stackSize
, maxStackSize
;
183 * Callback called when processing of a closure 'c' is complete, i.e. when
184 * all it's children have been processed. Note: This includes leaf nodes
187 * @param c The closure who's processing just completed.
188 * @param acc The current value of the accumulator for 'c' on the
189 * stack. It's about to be removed, hence the 'const'
190 * qualifier. This is the same accumulator 'visit_cb' got
191 * passed when 'c' was visited.
193 * @param c_parent The parent closure of 'c'
194 * @param acc_parent The accumulator associated with 'c_parent', currently
197 void (*return_cb
)(StgClosure
*c
, const stackAccum acc
,
198 StgClosure
*c_parent
, stackAccum
*acc_parent
);
202 * Callback called when heap traversal visits a closure.
204 * The callback can assume that the closure's profiling data has been
205 * initialized to zero if this is the first visit during a pass.
207 * See Note [Profiling heap traversal visited bit].
209 * Returning 'false' will instruct the heap traversal code to skip processing
210 * this closure's children. If you don't need to traverse any closure more than
211 * once you can simply return 'first_visit'.
213 typedef bool (*visitClosure_cb
) (
215 const StgClosure
*cp
,
216 const stackData data
,
217 const bool first_visit
,
219 stackData
*child_data
);
221 StgWord
getTravData(const StgClosure
*c
);
222 void setTravData(const traverseState
*ts
, StgClosure
*c
, StgWord w
);
223 bool isTravDataValid(const traverseState
*ts
, const StgClosure
*c
);
225 void traverseWorkStack(traverseState
*ts
, visitClosure_cb visit_cb
);
226 void traversePushRoot(traverseState
*ts
, StgClosure
*c
, StgClosure
*cp
, stackData data
);
227 void traversePushClosure(traverseState
*ts
, StgClosure
*c
, StgClosure
*cp
, stackElement
*sep
, stackData data
);
228 bool traverseMaybeInitClosureData(const traverseState
* ts
, StgClosure
*c
);
229 void traverseInvalidateClosureData(traverseState
* ts
);
231 void initializeTraverseStack(traverseState
*ts
);
232 void closeTraverseStack(traverseState
*ts
);
233 int getTraverseStackMaxSize(traverseState
*ts
);
236 W_
traverseWorkStackBlocks(traverseState
*ts
);
237 void resetStaticObjectForProfiling(const traverseState
*ts
, StgClosure
*static_objects
);
239 #include "EndPrivate.h"
241 #endif /* PROFILING */