iconv: Bail out of the loop when an illegal sequence of bytes occurs.
[elinks/elinks-j605.git] / src / document / html / parser.h
blobe0e74dbbabee33ffff43a117b7f1a9db94b79b2d
2 #ifndef EL__DOCUMENT_HTML_PARSER_H
3 #define EL__DOCUMENT_HTML_PARSER_H
5 #include "document/format.h"
6 #include "document/forms.h"
7 #include "document/html/renderer.h" /* enum html_special_type */
8 #include "intl/charsets.h" /* unicode_val_T */
9 #include "util/align.h"
10 #include "util/color.h"
11 #include "util/lists.h"
13 struct document_options;
14 struct form_control;
15 struct frameset_desc;
16 struct html_context;
17 struct memory_list;
18 struct menu_item;
19 struct part;
20 struct string;
21 struct uri;
23 /* XXX: This is just terible - this interface is from 75% only for other HTML
24 * files - there's lack of any well defined interface and it's all randomly
25 * mixed up :/. */
26 struct text_attrib_color {
27 color_T clink;
28 color_T vlink;
29 #ifdef CONFIG_BOOKMARKS
30 color_T bookmark_link;
31 #endif
32 color_T image_link;
35 struct text_attrib {
36 struct text_style style;
38 int fontsize;
39 unsigned char *link;
40 unsigned char *target;
41 unsigned char *image;
43 /* Any entities in the title have already been decoded. */
44 unsigned char *title;
46 struct form_control *form;
48 struct text_attrib_color color;
50 #ifdef CONFIG_CSS
51 /* Bug 766: CSS speedup. 56% of CPU time was going to
52 * get_attr_value(). Of those calls, 97% were asking for "id"
53 * or "class". So cache the results. start_element() sets up
54 * these pointers if html_context->options->css_enable;
55 * otherwise they remain NULL. */
56 unsigned char *id;
57 unsigned char *class;
58 #endif
60 unsigned char *select;
61 enum form_mode select_disabled;
62 unsigned int tabindex;
63 unicode_val_T accesskey;
65 unsigned char *onclick;
66 unsigned char *ondblclick;
67 unsigned char *onmouseover;
68 unsigned char *onhover;
69 unsigned char *onfocus;
70 unsigned char *onmouseout;
71 unsigned char *onblur;
74 /* This enum is pretty ugly, yes ;). */
75 enum format_list_flag {
76 P_NONE = 0,
78 P_NUMBER = 1,
79 P_alpha = 2,
80 P_ALPHA = 3,
81 P_roman = 4,
82 P_ROMAN = 5,
83 P_NO_BULLET = 6,
85 P_STAR = 1,
86 P_O = 2,
87 P_PLUS = 3,
89 P_LISTMASK = 7,
91 P_COMPACT = 8,
94 struct par_attrib {
95 enum format_align align;
96 int leftmargin;
97 int rightmargin;
98 int width;
99 int list_level;
100 unsigned list_number;
101 int dd_margin;
102 enum format_list_flag flags;
103 struct {
104 color_T background;
105 } color;
108 /* HTML parser stack mortality info */
109 enum html_element_mortality_type {
110 /* Elements of this type can not be removed from the stack. This type
111 * is created by the renderer when formatting an HTML part. */
112 ELEMENT_IMMORTAL,
113 /* Elements of this type can only be removed by elements of the start
114 * type. This type is created whenever an HTML state is created using
115 * init_html_parser_state(). */
116 /* The element has been created by*/
117 ELEMENT_DONT_KILL,
118 /* These elements can safely be removed from the stack by both */
119 ELEMENT_KILLABLE,
120 /* These elements not only cannot bear any other elements inside but
121 * any attempt to do so will cause them to terminate. This is so deadly
122 * that it affects even invisible elements. Ie. <title>foo<body>. */
123 ELEMENT_WEAK,
126 struct html_element {
127 LIST_HEAD(struct html_element);
129 enum html_element_mortality_type type;
131 struct text_attrib attr;
132 struct par_attrib parattr;
134 /* invisible is a flag using which element handlers can control
135 * processing in start_element. 0 indicates that start_element should
136 * process tags, 1 indicates that it should not, and 2 or greater
137 * indicates that it should process only script tags. */
138 int invisible;
140 /* The name of the element without NUL termination. name is a pointer
141 * into the actual document source. */
142 unsigned char *name;
143 int namelen;
145 unsigned char *options;
146 /* See document/html/parser/parse.c's element_info.linebreak
147 * description. */
148 int linebreak;
149 struct frameset_desc *frameset;
151 /* For the needs of CSS engine. A wannabe bitmask. */
152 enum html_element_pseudo_class {
153 ELEMENT_LINK = 1,
154 ELEMENT_VISITED = 2,
155 } pseudo_class;
158 #define is_inline_element(e) ((e)->linebreak == 0)
159 #define is_block_element(e) ((e)->linebreak > 0)
161 /* Interface for the renderer */
163 struct html_context *
164 init_html_parser(struct uri *uri, struct document_options *options,
165 unsigned char *start, unsigned char *end,
166 struct string *head, struct string *title,
167 void (*put_chars)(struct html_context *, unsigned char *, int),
168 void (*line_break)(struct html_context *),
169 void *(*special)(struct html_context *, enum html_special_type,
170 ...));
171 void done_html_parser(struct html_context *html_context);
173 void *init_html_parser_state(struct html_context *html_context, enum html_element_mortality_type type, int align, int margin, int width);
174 void done_html_parser_state(struct html_context *html_context, void *state);
176 /* Interface for the table handling */
178 int get_bgcolor(struct html_context *html_context, unsigned char *a, color_T *rgb);
179 void set_fragment_identifier(struct html_context *html_context,
180 unsigned char *attr_name, unsigned char *attr);
181 void add_fragment_identifier(struct html_context *html_context,
182 struct part *, unsigned char *attr);
184 /* Interface for the viewer */
187 get_image_map(unsigned char *head, unsigned char *pos, unsigned char *eof,
188 struct menu_item **menu, struct memory_list **ml, struct uri *uri,
189 struct document_options *options, unsigned char *target_base,
190 int to, int def, int hdef);
192 /* For html/parser/forms.c,general.c,link.c,parse.c,stack.c */
194 /* Ensure that there are at least n successive line-breaks at the current
195 * position, but don't add more than necessary to bring the current number
196 * of successive line-breaks above n.
198 * For example, there should be two line-breaks after a <br>, but multiple
199 * successive <br>'s warrant still only two line-breaks. ln_break will be
200 * called with n = 2 for each of multiple successive <br>'s, but ln_break
201 * will only add two line-breaks for the entire run of <br>'s. */
202 void ln_break(struct html_context *html_context, int n);
204 int get_color(struct html_context *html_context, unsigned char *a, unsigned char *c, color_T *rgb);
206 #endif