grafthistory: support curl
[elinks/elinks-j605.git] / src / dom / node.h
blob542b8930119a2eb4084bce61bfdb34f07428c14b
2 #ifndef EL_DOM_NODE_H
3 #define EL_DOM_NODE_H
5 #include "dom/string.h"
6 #include "util/hash.h"
8 struct dom_node_list;
10 enum dom_node_type {
11 DOM_NODE_UNKNOWN = 0, /* for internal purpose only */
13 DOM_NODE_ELEMENT = 1,
14 DOM_NODE_ATTRIBUTE = 2,
15 DOM_NODE_TEXT = 3,
16 DOM_NODE_CDATA_SECTION = 4,
17 DOM_NODE_ENTITY_REFERENCE = 5,
18 DOM_NODE_ENTITY = 6,
19 DOM_NODE_PROCESSING_INSTRUCTION = 7,
20 DOM_NODE_COMMENT = 8,
21 DOM_NODE_DOCUMENT = 9,
22 DOM_NODE_DOCUMENT_TYPE = 10,
23 DOM_NODE_DOCUMENT_FRAGMENT = 11,
24 DOM_NODE_NOTATION = 12,
26 DOM_NODES
29 /* Following is the node specific datastructures. They may contain no more
30 * than 3 pointers or something equivalent. */
32 struct dom_node_id_item {
33 /* The attibute node containing the id value */
34 struct dom_node *id_attribute;
36 /* The node with the @id attribute */
37 struct dom_node *node;
40 struct dom_document_node {
41 /* The document URI is stored in the string / length members. */
42 /* An id to node hash for fast lookup. */
43 struct hash *element_ids; /* -> {struct dom_node_id_item} */
45 /* Any meta data the root node carries such as document type nodes,
46 * entity and notation map nodes and maybe some internal CSS stylesheet
47 * node. */
48 struct dom_node_list *meta_nodes;
50 /* The child nodes. May be NULL. Ordered like they where inserted. */
51 struct dom_node_list *children;
54 struct dom_id {
55 struct dom_string public_id;
56 struct dom_string system_id;
59 struct dom_doctype_subset_info {
60 struct dom_string internal;
61 struct dom_id external;
64 struct dom_document_type_node {
65 /* These are really maps and should be sorted alphabetically. */
66 struct dom_node_list *entities;
67 struct dom_node_list *notations;
69 /* The string/length members of dom_node hold the name of the document
70 * type "<!DOCTYPE {name} ...>". This holds the ids for the external
71 * subset and the string of the internal subset. */
72 struct dom_doctype_subset_infot *subset;
75 /* Element nodes are indexed nodes stored in node lists of either
76 * other child nodes or the root node. */
77 struct dom_element_node {
78 /* The child nodes. May be NULL. Ordered like they where inserted. */
79 struct dom_node_list *children;
81 /* Only element nodes can have attributes and element nodes can only be
82 * child nodes so the map is put here.
84 * The @map may be NULL if there are none. The @map nodes are sorted
85 * alphabetically according to the attributes name so it has fast
86 * lookup. */
87 struct dom_node_list *map;
89 /* For <xsl:stylesheet ...> elements this holds the offset of
90 * 'stylesheet' */
91 uint16_t namespace_offset;
93 /* Special implementation dependent type specifier for example
94 * containing an enum value representing the element to reduce string
95 * comparing and only do one fast find mapping. */
96 uint16_t type;
99 /* Attribute nodes are named nodes stored in a node map of an element node. */
100 struct dom_attribute_node {
101 /* The string that hold the attribute value. The @string / @length
102 * members of {struct dom_node} holds the name that identifies the node
103 * in the map. */
104 struct dom_string value;
106 /* For xml:lang="en" attributes this holds the offset of 'lang' */
107 uint16_t namespace_offset;
109 /* Special implementation dependent type specifier. For HTML it (will)
110 * contain an enum value representing the attribute HTML_CLASS, HTML_ID etc.
111 * to reduce string comparing and only do one fast find mapping. */
112 uint16_t type;
114 /* Was the attribute specified in the DTD as a default attribute or was
115 * it added from the document source. */
116 unsigned int specified:1;
118 /* Was the node->string allocated */
119 unsigned int allocated:1;
121 /* Has the node->string been converted to internal charset. */
122 unsigned int converted:1;
124 /* Is the attribute a unique identifier meaning the owner (element)
125 * should be added to the document nodes @element_id hash. */
126 unsigned int id:1;
128 /* The attribute value references some other resource */
129 unsigned int reference:1;
131 /* The attribute value is delimited by quotes */
132 unsigned int quoted:1;
135 struct dom_text_node {
136 /* The number of newlines the text string contains */
137 unsigned int newlines;
139 /* We will need to add text nodes even if they contain only whitespace.
140 * In order to quickly identify such nodes this member is used. */
141 unsigned int only_space:1;
143 /* Was the node->string allocated */
144 unsigned int allocated:1;
146 /* Has the node->string been converted to internal charset. */
147 unsigned int converted:1;
150 enum dom_proc_instruction_type {
151 DOM_PROC_INSTRUCTION,
153 /* Keep this group sorted */
154 DOM_PROC_INSTRUCTION_DBHTML, /* DocBook toolchain instruction */
155 DOM_PROC_INSTRUCTION_ELINKS, /* Internal instruction hook */
156 DOM_PROC_INSTRUCTION_XML, /* XML instructions */
158 DOM_PROC_INSTRUCTION_TYPES
161 struct dom_proc_instruction_node {
162 /* The target of the processing instruction (xml for '<?xml ... ?>')
163 * is in the @string / @length members. */
164 /* This holds the value to be processed */
165 struct dom_string instruction;
167 /* For fast checking of the target type */
168 uint16_t type; /* enum dom_proc_instruction_type */
170 /* For some processing instructions like xml the instructions contain
171 * attributes and those attribute can be collected in this @map. */
172 struct dom_node_list *map;
175 union dom_node_data {
176 struct dom_document_node document;
177 struct dom_document_type_node document_type;
178 struct dom_element_node element;
179 struct dom_attribute_node attribute;
180 struct dom_text_node text;
181 struct dom_id notation;
182 /* For entities string/length hold the notation name */
183 struct dom_id entity;
184 struct dom_proc_instruction_node proc_instruction;
186 /* Node types without a union member yet
188 * DOM_NODE_CDATA_SECTION,
189 * DOM_NODE_COMMENT,
190 * DOM_NODE_DOCUMENT_FRAGMENT,
191 * DOM_NODE_ENTITY_REFERENCE,
195 /* This structure is size critical so keep ordering to make it easier to pack
196 * and avoid unneeded members. */
197 struct dom_node {
198 /* The type of the node */
199 uint16_t type; /* -> enum dom_node_type */
201 /* Can contain either stuff like element name or for attributes the
202 * attribute name. */
203 struct dom_string string;
205 struct dom_node *parent;
207 /* Various info depending on the type of the node. */
208 union dom_node_data data;
211 /* A node list can be used for storing indexed nodes */
212 struct dom_node_list {
213 size_t size;
214 struct dom_node *entries[1];
217 #define foreach_dom_node(list, node, i) \
218 for ((i) = 0; (i) < (list)->size; (i)++) \
219 if (((node) = (list)->entries[(i)]))
221 #define foreachback_dom_node(list, node, i) \
222 for ((i) = (list)->size - 1; (i) > 0; (i)--) \
223 if (((node) = (list)->entries[(i)]))
225 #define is_dom_node_list_member(list, member) \
226 ((list) && 0 <= (member) && (member) < (list)->size)
228 /* Adds @node to the list pointed to by @list_ptr at the given @position. If
229 * @position is -1 the node is added at the end. */
230 struct dom_node_list *
231 add_to_dom_node_list(struct dom_node_list **list_ptr,
232 struct dom_node *node, int position);
234 void done_dom_node_list(struct dom_node_list *list);
236 /* Returns the position or index where the @node has been inserted into the
237 * 'default' list of the @parent node. (Default means use get_dom_node_list()
238 * to acquire the list to search in. Returns -1, if the node is not found. */
239 int get_dom_node_list_index(struct dom_node *parent, struct dom_node *node);
241 /* Returns the position or index where the @node should be inserted into the
242 * node @list in order to the list to be alphabetically sorted. Assumes that
243 * @list is already sorted properly. */
244 int get_dom_node_map_index(struct dom_node_list *list, struct dom_node *node);
246 /* Looks up the @node_map for a node matching the requested type and name.
247 * The @subtype maybe be 0 indication unknown subtype and only name should be
248 * tested else it will indicate either the element or attribute private
249 * subtype. */
250 struct dom_node *
251 get_dom_node_map_entry(struct dom_node_list *node_map,
252 enum dom_node_type type, uint16_t subtype,
253 struct dom_string *name);
255 struct dom_node *
256 init_dom_node_(unsigned char *file, int line,
257 struct dom_node *parent, enum dom_node_type type,
258 struct dom_string *string);
259 #define init_dom_node(type, string) init_dom_node_(__FILE__, __LINE__, NULL, type, string)
260 #define add_dom_node(parent, type, string) init_dom_node_(__FILE__, __LINE__, parent, type, string)
262 #define add_dom_element(parent, string) \
263 add_dom_node(parent, DOM_NODE_ELEMENT, string)
265 static inline struct dom_node *
266 add_dom_attribute(struct dom_node *parent, struct dom_string *name,
267 struct dom_string *value)
269 struct dom_node *node = add_dom_node(parent, DOM_NODE_ATTRIBUTE, name);
271 if (node && value) {
272 copy_dom_string(&node->data.attribute.value, value);
275 return node;
278 static inline struct dom_node *
279 add_dom_proc_instruction(struct dom_node *parent, struct dom_string *string,
280 struct dom_string *instruction)
282 struct dom_node *node = add_dom_node(parent, DOM_NODE_PROCESSING_INSTRUCTION, string);
284 if (node && instruction) {
285 copy_dom_string(&node->data.proc_instruction.instruction, instruction);
288 return node;
291 /* Removes the node and all its children and free()s itself */
292 void done_dom_node(struct dom_node *node);
294 /* Compare two nodes returning non-zero if they differ. */
295 int dom_node_casecmp(struct dom_node *node1, struct dom_node *node2);
297 /* Returns the name of the node in an allocated string. */
298 struct dom_string *get_dom_node_name(struct dom_node *node);
300 /* Returns the value of the node or NULL if no value is defined for the node
301 * type. */
302 struct dom_string *get_dom_node_value(struct dom_node *node);
304 /* Returns the name used for identifying the node type. */
305 struct dom_string *get_dom_node_type_name(enum dom_node_type type);
307 /* Based on the type of the parent and the node return a proper list
308 * or NULL. This is useful when adding a node to a parent node. */
309 static inline struct dom_node_list **
310 get_dom_node_list(struct dom_node *parent, struct dom_node *node)
312 switch (parent->type) {
313 case DOM_NODE_DOCUMENT:
314 return &parent->data.document.children;
316 case DOM_NODE_ELEMENT:
317 switch (node->type) {
318 case DOM_NODE_ATTRIBUTE:
319 return &parent->data.element.map;
321 default:
322 return &parent->data.element.children;
325 case DOM_NODE_DOCUMENT_TYPE:
326 switch (node->type) {
327 case DOM_NODE_ENTITY:
328 return &parent->data.document_type.entities;
330 case DOM_NODE_NOTATION:
331 return &parent->data.document_type.notations;
333 default:
334 return NULL;
337 case DOM_NODE_PROCESSING_INSTRUCTION:
338 switch (node->type) {
339 case DOM_NODE_ATTRIBUTE:
340 return &parent->data.proc_instruction.map;
342 default:
343 return NULL;
346 default:
347 return NULL;
351 #endif