5 #include "dom/string.h"
11 DOM_NODE_UNKNOWN
= 0, /* for internal purpose only */
14 DOM_NODE_ATTRIBUTE
= 2,
16 DOM_NODE_CDATA_SECTION
= 4,
17 DOM_NODE_ENTITY_REFERENCE
= 5,
19 DOM_NODE_PROCESSING_INSTRUCTION
= 7,
21 DOM_NODE_DOCUMENT
= 9,
22 DOM_NODE_DOCUMENT_TYPE
= 10,
23 DOM_NODE_DOCUMENT_FRAGMENT
= 11,
24 DOM_NODE_NOTATION
= 12,
29 /* Following is the node specific datastructures. They may contain no more
30 * than 3 pointers or something equivalent. */
32 struct dom_node_id_item
{
33 /* The attibute node containing the id value */
34 struct dom_node
*id_attribute
;
36 /* The node with the @id attribute */
37 struct dom_node
*node
;
40 struct dom_document_node
{
41 /* The document URI is stored in the string / length members. */
42 /* An id to node hash for fast lookup. */
43 struct hash
*element_ids
; /* -> {struct dom_node_id_item} */
45 /* Any meta data the root node carries such as document type nodes,
46 * entity and notation map nodes and maybe some internal CSS stylesheet
48 struct dom_node_list
*meta_nodes
;
50 /* The child nodes. May be NULL. Ordered like they where inserted. */
51 struct dom_node_list
*children
;
55 struct dom_string public_id
;
56 struct dom_string system_id
;
59 struct dom_doctype_subset_info
{
60 struct dom_string internal
;
61 struct dom_id external
;
64 struct dom_document_type_node
{
65 /* These are really maps and should be sorted alphabetically. */
66 struct dom_node_list
*entities
;
67 struct dom_node_list
*notations
;
69 /* The string/length members of dom_node hold the name of the document
70 * type "<!DOCTYPE {name} ...>". This holds the ids for the external
71 * subset and the string of the internal subset. */
72 struct dom_doctype_subset_infot
*subset
;
75 /* Element nodes are indexed nodes stored in node lists of either
76 * other child nodes or the root node. */
77 struct dom_element_node
{
78 /* The child nodes. May be NULL. Ordered like they where inserted. */
79 struct dom_node_list
*children
;
81 /* Only element nodes can have attributes and element nodes can only be
82 * child nodes so the map is put here.
84 * The @map may be NULL if there are none. The @map nodes are sorted
85 * alphabetically according to the attributes name so it has fast
87 struct dom_node_list
*map
;
89 /* For <xsl:stylesheet ...> elements this holds the offset of
91 uint16_t namespace_offset
;
93 /* Special implementation dependent type specifier for example
94 * containing an enum value representing the element to reduce string
95 * comparing and only do one fast find mapping. */
99 /* Attribute nodes are named nodes stored in a node map of an element node. */
100 struct dom_attribute_node
{
101 /* The string that hold the attribute value. The @string / @length
102 * members of {struct dom_node} holds the name that identifies the node
104 struct dom_string value
;
106 /* For xml:lang="en" attributes this holds the offset of 'lang' */
107 uint16_t namespace_offset
;
109 /* Special implementation dependent type specifier. For HTML it (will)
110 * contain an enum value representing the attribute HTML_CLASS, HTML_ID etc.
111 * to reduce string comparing and only do one fast find mapping. */
114 /* Was the attribute specified in the DTD as a default attribute or was
115 * it added from the document source. */
116 unsigned int specified
:1;
118 /* Was the node->string allocated */
119 unsigned int allocated
:1;
121 /* Has the node->string been converted to internal charset. */
122 unsigned int converted
:1;
124 /* Is the attribute a unique identifier meaning the owner (element)
125 * should be added to the document nodes @element_id hash. */
128 /* The attribute value references some other resource */
129 unsigned int reference
:1;
131 /* The attribute value is delimited by quotes */
132 unsigned int quoted
:1;
135 struct dom_text_node
{
136 /* The number of newlines the text string contains */
137 unsigned int newlines
;
139 /* We will need to add text nodes even if they contain only whitespace.
140 * In order to quickly identify such nodes this member is used. */
141 unsigned int only_space
:1;
143 /* Was the node->string allocated */
144 unsigned int allocated
:1;
146 /* Has the node->string been converted to internal charset. */
147 unsigned int converted
:1;
150 enum dom_proc_instruction_type
{
151 DOM_PROC_INSTRUCTION
,
153 /* Keep this group sorted */
154 DOM_PROC_INSTRUCTION_DBHTML
, /* DocBook toolchain instruction */
155 DOM_PROC_INSTRUCTION_ELINKS
, /* Internal instruction hook */
156 DOM_PROC_INSTRUCTION_XML
, /* XML instructions */
158 DOM_PROC_INSTRUCTION_TYPES
161 struct dom_proc_instruction_node
{
162 /* The target of the processing instruction (xml for '<?xml ... ?>')
163 * is in the @string / @length members. */
164 /* This holds the value to be processed */
165 struct dom_string instruction
;
167 /* For fast checking of the target type */
168 uint16_t type
; /* enum dom_proc_instruction_type */
170 /* For some processing instructions like xml the instructions contain
171 * attributes and those attribute can be collected in this @map. */
172 struct dom_node_list
*map
;
175 union dom_node_data
{
176 struct dom_document_node document
;
177 struct dom_document_type_node document_type
;
178 struct dom_element_node element
;
179 struct dom_attribute_node attribute
;
180 struct dom_text_node text
;
181 struct dom_id notation
;
182 /* For entities string/length hold the notation name */
183 struct dom_id entity
;
184 struct dom_proc_instruction_node proc_instruction
;
186 /* Node types without a union member yet
188 * DOM_NODE_CDATA_SECTION,
190 * DOM_NODE_DOCUMENT_FRAGMENT,
191 * DOM_NODE_ENTITY_REFERENCE,
195 /* This structure is size critical so keep ordering to make it easier to pack
196 * and avoid unneeded members. */
198 /* The type of the node */
199 uint16_t type
; /* -> enum dom_node_type */
201 /* Can contain either stuff like element name or for attributes the
203 struct dom_string string
;
205 struct dom_node
*parent
;
207 /* Various info depending on the type of the node. */
208 union dom_node_data data
;
211 /* A node list can be used for storing indexed nodes */
212 struct dom_node_list
{
214 struct dom_node
*entries
[1];
217 #define foreach_dom_node(list, node, i) \
218 for ((i) = 0; (i) < (list)->size; (i)++) \
219 if (((node) = (list)->entries[(i)]))
221 #define foreachback_dom_node(list, node, i) \
222 for ((i) = (list)->size - 1; (i) > 0; (i)--) \
223 if (((node) = (list)->entries[(i)]))
225 #define is_dom_node_list_member(list, member) \
226 ((list) && 0 <= (member) && (member) < (list)->size)
228 /* Adds @node to the list pointed to by @list_ptr at the given @position. If
229 * @position is -1 the node is added at the end. */
230 struct dom_node_list
*
231 add_to_dom_node_list(struct dom_node_list
**list_ptr
,
232 struct dom_node
*node
, int position
);
234 void done_dom_node_list(struct dom_node_list
*list
);
236 /* Returns the position or index where the @node has been inserted into the
237 * 'default' list of the @parent node. (Default means use get_dom_node_list()
238 * to acquire the list to search in. Returns -1, if the node is not found. */
239 int get_dom_node_list_index(struct dom_node
*parent
, struct dom_node
*node
);
241 /* Returns the position or index where the @node should be inserted into the
242 * node @list in order to the list to be alphabetically sorted. Assumes that
243 * @list is already sorted properly. */
244 int get_dom_node_map_index(struct dom_node_list
*list
, struct dom_node
*node
);
246 /* Looks up the @node_map for a node matching the requested type and name.
247 * The @subtype maybe be 0 indication unknown subtype and only name should be
248 * tested else it will indicate either the element or attribute private
251 get_dom_node_map_entry(struct dom_node_list
*node_map
,
252 enum dom_node_type type
, uint16_t subtype
,
253 struct dom_string
*name
);
256 init_dom_node_(unsigned char *file
, int line
,
257 struct dom_node
*parent
, enum dom_node_type type
,
258 struct dom_string
*string
);
259 #define init_dom_node(type, string) init_dom_node_(__FILE__, __LINE__, NULL, type, string)
260 #define add_dom_node(parent, type, string) init_dom_node_(__FILE__, __LINE__, parent, type, string)
262 #define add_dom_element(parent, string) \
263 add_dom_node(parent, DOM_NODE_ELEMENT, string)
265 static inline struct dom_node
*
266 add_dom_attribute(struct dom_node
*parent
, struct dom_string
*name
,
267 struct dom_string
*value
)
269 struct dom_node
*node
= add_dom_node(parent
, DOM_NODE_ATTRIBUTE
, name
);
272 copy_dom_string(&node
->data
.attribute
.value
, value
);
278 static inline struct dom_node
*
279 add_dom_proc_instruction(struct dom_node
*parent
, struct dom_string
*string
,
280 struct dom_string
*instruction
)
282 struct dom_node
*node
= add_dom_node(parent
, DOM_NODE_PROCESSING_INSTRUCTION
, string
);
284 if (node
&& instruction
) {
285 copy_dom_string(&node
->data
.proc_instruction
.instruction
, instruction
);
291 /* Removes the node and all its children and free()s itself */
292 void done_dom_node(struct dom_node
*node
);
294 /* Compare two nodes returning non-zero if they differ. */
295 int dom_node_casecmp(struct dom_node
*node1
, struct dom_node
*node2
);
297 /* Returns the name of the node in an allocated string. */
298 struct dom_string
*get_dom_node_name(struct dom_node
*node
);
300 /* Returns the value of the node or NULL if no value is defined for the node
302 struct dom_string
*get_dom_node_value(struct dom_node
*node
);
304 /* Returns the name used for identifying the node type. */
305 struct dom_string
*get_dom_node_type_name(enum dom_node_type type
);
307 /* Based on the type of the parent and the node return a proper list
308 * or NULL. This is useful when adding a node to a parent node. */
309 static inline struct dom_node_list
**
310 get_dom_node_list(struct dom_node
*parent
, struct dom_node
*node
)
312 switch (parent
->type
) {
313 case DOM_NODE_DOCUMENT
:
314 return &parent
->data
.document
.children
;
316 case DOM_NODE_ELEMENT
:
317 switch (node
->type
) {
318 case DOM_NODE_ATTRIBUTE
:
319 return &parent
->data
.element
.map
;
322 return &parent
->data
.element
.children
;
325 case DOM_NODE_DOCUMENT_TYPE
:
326 switch (node
->type
) {
327 case DOM_NODE_ENTITY
:
328 return &parent
->data
.document_type
.entities
;
330 case DOM_NODE_NOTATION
:
331 return &parent
->data
.document_type
.notations
;
337 case DOM_NODE_PROCESSING_INSTRUCTION
:
338 switch (node
->type
) {
339 case DOM_NODE_ATTRIBUTE
:
340 return &parent
->data
.proc_instruction
.map
;