1 /** The DOM stack interface.
5 * The DOM stack interface is used by the parser when building a DOM
6 * tree and by the various DOM tree traversers. It allows for several
7 * stack contexts to be registered, each defining their own type
8 * specific node handlers or callbacks (#dom_stack_callback_T) which
9 * will be called upon a node being pushed onto or popped from the
10 * stack. As an example a example, by defining DOM_STACK_TRACE it is be
11 * possible to add a DOM stack tracer (using #add_dom_stack_tracer) to
12 * debug all push and pop operations.
14 * The stack interface provides a method for automatically having
15 * objects allocated when a new node is pushed onto the stack. This
16 * object can then be used for storing private state information
17 * belonging to the individual contexts. This is done by setting the
18 * #dom_stack_context_info.object_size member to a non-zero value,
19 * usually using sizeof().
21 * States on the stack can be marked immutable meaning that they will
22 * be "locked" down so that any operations to pop them will fail. This
23 * can be used when parsing a "subdocument", e.g. output from
24 * ECMAScripts "document.write" function, where badly formatted SGML
25 * should not be allowed to change the root document.
27 * In some situations, it may be desired to avoid building a complete
28 * DOM tree in memory when only one document traversal is required, for
29 * example when highlighting SGML code. This can be done by passing the
30 * #DOM_STACK_FLAG_FREE_NODES flag to #init_dom_stack. Nodes that are
31 * popped from the stack will immediately be deallocated. A pop node
32 * callback can also request that a node is deallocated and removed from
33 * the DOM tree by returning the #DOM_CODE_FREE_NODE return code. An
34 * example of this can be seen in the DOM configuration module where
35 * comment nodes may be pruned from the tree.
38 * - How to access stack states and context state objects.
39 * - Using search_dom_stack() and walk_dom_nodes().
42 #ifndef EL_DOM_STACK_H
43 #define EL_DOM_STACK_H
47 #include "util/error.h"
48 #include "util/hash.h"
52 /** DOM stack callback
54 * Used by contexts, for 'hooking' into the node traversing.
56 * This callback must not call done_dom_node() to free the node it
57 * gets as a parameter, because call_dom_node_callbacks() may be
58 * intending to call more callbacks for the same node. Instead, the
59 * callback can return ::DOM_CODE_FREE_NODE, and the node will then
60 * be freed after the callbacks of all contexts have been called. */
62 (*dom_stack_callback_T
)(struct dom_stack
*, struct dom_node
*, void *);
64 #define DOM_STACK_MAX_DEPTH 4096
68 * This state records what node and where it is placed. */
69 struct dom_stack_state
{
70 /** The node assiciated with the state */
71 struct dom_node
*node
;
73 * The depth of the state in the stack. This is amongst other things
74 * used to get the state object data. */
76 /** Whether this stack state can be popped with pop_dom_node(),
77 * pop_dom_nodes(), or pop_dom_state(). */
78 unsigned int immutable
:1;
81 /** DOM stack context info
83 * To add a DOM stack context define this struct either statically or on the
85 struct dom_stack_context_info
{
87 * The number of bytes to allocate on the stack for each state's
88 * data member. Zero means no state data should be allocated.
92 /** Callbacks to be called when pushing nodes. */
93 dom_stack_callback_T push
[DOM_NODES
];
94 /** Callbacks to be called when popping nodes. */
95 dom_stack_callback_T pop
[DOM_NODES
];
100 * This holds 'runtime' data for the stack context. */
101 struct dom_stack_context
{
102 /** Data specific to the context. */
106 * This is one big array of context specific objects. For the SGML
107 * parser this holds DTD-oriented info about the node (recorded in
108 * struct #sgml_node_info). E.g. whether an element node is optional.
110 unsigned char *state_objects
;
112 /** Info about node callbacks and such. */
113 struct dom_stack_context_info
*info
;
116 /** Flags for controlling the DOM stack */
117 enum dom_stack_flag
{
118 /** No flag needed. */
119 DOM_STACK_FLAG_NONE
= 0,
121 /** Free nodes when popping by calling done_dom_node(). */
122 DOM_STACK_FLAG_FREE_NODES
= 1,
127 * The DOM stack is a convenient way to traverse DOM trees. Also it
128 * maintains needed state info and is therefore also a holder of the current
129 * context since the stack is used to when the DOM tree is manipulated. */
131 /** The states currently on the stack. */
132 struct dom_stack_state
*states
;
133 /** The depth of the stack. */
136 /** Flags given to ref:[init_dom_stack]. */
137 enum dom_stack_flag flags
;
139 /** Contexts for the pushed and popped nodes. */
140 struct dom_stack_context
**contexts
;
141 /** The number of active contexts. */
142 size_t contexts_size
;
145 * The current context. Only meaningful within
146 * #dom_stack_callback_T functions. */
147 struct dom_stack_context
*current
;
150 /** Check whether stack is empty or not
152 * @param stack The stack to check.
154 * @returns Non-zero if stack is empty. */
155 #define dom_stack_is_empty(stack) \
156 (!(stack)->states || (stack)->depth == 0)
158 /** Access state by offset from top
160 * @param stack The stack to fetch the state from.
161 * @param top_offset The offset from the stack top, zero is the top.
163 * @returns The requested state. */
164 static inline struct dom_stack_state
*
165 get_dom_stack_state(struct dom_stack
*stack
, int top_offset
)
167 assertm(stack
->depth
- 1 - top_offset
>= 0,
168 "Attempting to access invalid state");
169 return &stack
->states
[stack
->depth
- 1 - top_offset
];
172 /** Access the stack top
174 * @param stack The stack to get the top state from.
176 * @returns The top state. */
177 #define get_dom_stack_top(stack) \
178 get_dom_stack_state(stack, 0)
180 /** Access context specific state data
182 * Similar to ref:[get_dom_stack_state], this will fetch the data
183 * associated with the state for the given context.
185 * @param context The context to get data from.
186 * @param state The stack state to get data from.
188 * @returns The state data or NULL if
189 * #dom_stack_context_info.object_size was zero. */
191 get_dom_stack_state_data(struct dom_stack_context
*context
,
192 struct dom_stack_state
*state
)
194 size_t object_size
= context
->info
->object_size
;
196 if (!object_size
) return NULL
;
198 assert(context
->state_objects
);
200 return (void *) &context
->state_objects
[state
->depth
* object_size
];
203 /*#define DOM_STACK_TRACE*/
205 /** Get debug info from the DOM stack
207 * Define `DOM_STACK_TRACE` to have debug info about the nodes added printed to
208 * the log. It will define add_dom_stack_tracer() to not be a no-op.
212 * ELINKS_LOG=/tmp/dom-dump.txt ./elinks -no-connect URL
214 * to have the debug dumped into a file. */
215 #ifdef DOM_STACK_TRACE
216 #define add_dom_stack_tracer(stack, name) \
217 add_dom_stack_context(stack, name, &dom_stack_trace_context_info)
218 extern struct dom_stack_context_info dom_stack_trace_context_info
;
220 #define add_dom_stack_tracer(stack, name) /* Nada */
223 /** The state iterators
225 * To safely iterate through the stack state iterators. */
227 /** Iterate the stack from bottom to top. */
228 #define foreach_dom_stack_state(stack, state, pos) \
229 for ((pos) = 0; (pos) < (stack)->depth; (pos)++) \
230 if (((state) = &(stack)->states[(pos)]))
232 /** Iterate the stack from top to bottom. */
233 #define foreachback_dom_stack_state(stack, state, pos) \
234 for ((pos) = (stack)->depth - 1; (pos) >= 0; (pos)--) \
235 if (((state) = &(stack)->states[(pos)]))
238 /* Life cycle functions. */
240 /** Initialise a DOM stack
242 * @param stack Pointer to a (preallocated) stack.
243 * @param flags Any flags needed for controlling the behaviour of the stack.
245 void init_dom_stack(struct dom_stack
*stack
, enum dom_stack_flag flags
);
247 /** Release a DOM stack
249 * Free all resources collected by the stack.
251 * @param stack The stack to release. */
252 void done_dom_stack(struct dom_stack
*stack
);
254 /** Add a context to the stack
256 * This is needed if either you want to have the stack allocated objects for
257 * created states and/or if you want to install callbacks for pushing or
260 * @param stack The stack where the context should be created.
261 * @param data Private data to be stored in ref:[dom_stack_context.data].
262 * @param context_info Information about state objects and node callbacks.
264 * @returns A pointer to the newly created context or NULL. */
265 struct dom_stack_context
*
266 add_dom_stack_context(struct dom_stack
*stack
, void *data
,
267 struct dom_stack_context_info
*context_info
);
269 /** Unregister a stack context
271 * This should be done especially for temporary stack contexts (without any
272 * callbacks) so that they do not increasing the memory usage. */
273 void done_dom_stack_context(struct dom_stack
*stack
, struct dom_stack_context
*context
);
275 /** Push a node onto the stack
277 * Makes the pushed node the new top of the stack.
279 * @param stack The stack to push onto.
280 * @param node The node to push onto the stack.
282 * @returns If an error occurs the node is released with
283 * #done_dom_node and NULL is returned. Else the pushed
284 * node is returned. */
285 enum dom_code
push_dom_node(struct dom_stack
*stack
, struct dom_node
*node
);
287 /** Pop the top stack state
289 * @param stack The stack to pop from. */
290 void pop_dom_node(struct dom_stack
*stack
);
292 /** Conditionally pop the stack states
294 * Searches the stack (using ref:[search_dom_stack]) for a specific node and
295 * pops all states until that particular state is met.
297 * @note The popping is stopped if an immutable state is
299 void pop_dom_nodes(struct dom_stack
*stack
, enum dom_node_type type
,
300 struct dom_string
*string
);
302 /** Pop all states until target state
304 * Pop all stack states until a specific state is reached. The target state
307 * @param stack The stack to pop from.
308 * @param target The state to pop until and including. */
309 void pop_dom_state(struct dom_stack
*stack
, struct dom_stack_state
*target
);
311 /** Search the stack states
313 * The string comparison is done against the ref:[dom_node.string] member of
314 * the of the state nodes.
316 * @param stack The stack to search in.
317 * @param type The type of node to match against.
318 * @param string The string to match against.
320 * @returns A state that matched the type and string or NULL. */
321 struct dom_stack_state
*
322 search_dom_stack(struct dom_stack
*stack
, enum dom_node_type type
,
323 struct dom_string
*string
);
325 /** Walk all nodes reachable from a given node
327 * Visits each node in the DOM tree rooted at a given node, pre-order style.
329 * @param stack The stack to use for walking the nodes.
330 * @param root The root node to start from.
332 * It is assummed that the given stack has been initialised with
333 * #init_dom_stack and that the caller already added one or more context
335 void walk_dom_nodes(struct dom_stack
*stack
, struct dom_node
*root
);