4 #define _GNU_SOURCE /* XXX: we _WANT_ strcasestr() ! */
19 #include "bfu/listmenu.h"
21 #include "document/css/apply.h"
22 #include "document/css/css.h"
23 #include "document/css/stylesheet.h"
24 #include "document/html/frames.h"
25 #include "document/html/parse-meta-refresh.h"
26 #include "document/html/parser/link.h"
27 #include "document/html/parser/stack.h"
28 #include "document/html/parser/parse.h"
29 #include "document/html/parser.h"
30 #include "document/html/renderer.h"
31 #include "document/options.h"
32 #include "document/renderer.h"
33 #include "intl/charsets.h"
34 #include "protocol/date.h"
35 #include "protocol/header.h"
36 #include "protocol/uri.h"
37 #include "session/task.h"
38 #include "terminal/draw.h"
39 #include "util/align.h"
41 #include "util/color.h"
42 #include "util/conv.h"
43 #include "util/error.h"
44 #include "util/memdebug.h"
45 #include "util/memlist.h"
46 #include "util/memory.h"
47 #include "util/string.h"
50 #include "document/html/internal.h"
52 /* TODO: This needs rewrite. Yes, no kidding. */
55 extract_color(struct html_context
*html_context
, unsigned char *a
,
56 unsigned char *attribute
, color_T
*rgb
)
61 value
= get_attr_val(a
, attribute
, html_context
->doc_cp
);
62 if (!value
) return -1;
64 retval
= decode_color(value
, strlen(value
), rgb
);
71 get_color(struct html_context
*html_context
, unsigned char *a
,
72 unsigned char *attribute
, color_T
*rgb
)
74 if (!use_document_fg_colors(html_context
->options
))
77 return extract_color(html_context
, a
, attribute
, rgb
);
81 get_bgcolor(struct html_context
*html_context
, unsigned char *a
, color_T
*rgb
)
83 if (!use_document_bg_colors(html_context
->options
))
86 return extract_color(html_context
, a
, "bgcolor", rgb
);
90 get_target(struct document_options
*options
, unsigned char *a
)
92 /* FIXME (bug 784): options->cp is the terminal charset;
93 * should use the document charset instead. */
94 unsigned char *v
= get_attr_val(a
, "target", options
->cp
);
98 if (!*v
|| !c_strcasecmp(v
, "_self")) {
99 mem_free_set(&v
, stracpy(options
->framename
));
107 ln_break(struct html_context
*html_context
, int n
)
109 if (!n
|| html_top
->invisible
) return;
110 while (n
> html_context
->line_breax
) {
111 html_context
->line_breax
++;
112 html_context
->line_break_f(html_context
);
114 html_context
->position
= 0;
115 html_context
->putsp
= HTML_SPACE_SUPPRESS
;
119 put_chrs(struct html_context
*html_context
, unsigned char *start
, int len
)
121 if (html_is_preformatted())
122 html_context
->putsp
= HTML_SPACE_NORMAL
;
124 if (!len
|| html_top
->invisible
)
127 switch (html_context
->putsp
) {
128 case HTML_SPACE_NORMAL
:
132 html_context
->put_chars_f(html_context
, " ", 1);
133 html_context
->position
++;
134 html_context
->putsp
= HTML_SPACE_SUPPRESS
;
138 case HTML_SPACE_SUPPRESS
:
139 html_context
->putsp
= HTML_SPACE_NORMAL
;
140 if (isspace(start
[0])) {
144 html_context
->putsp
= HTML_SPACE_SUPPRESS
;
152 if (isspace(start
[len
- 1]) && !html_is_preformatted())
153 html_context
->putsp
= HTML_SPACE_SUPPRESS
;
154 html_context
->was_br
= 0;
156 html_context
->put_chars_f(html_context
, start
, len
);
158 html_context
->position
+= len
;
159 html_context
->line_breax
= 0;
160 if (html_context
->was_li
> 0)
161 html_context
->was_li
--;
165 set_fragment_identifier(struct html_context
*html_context
,
166 unsigned char *attr_name
, unsigned char *attr
)
168 unsigned char *id_attr
;
170 id_attr
= get_attr_val(attr_name
, attr
, html_context
->doc_cp
);
173 html_context
->special_f(html_context
, SP_TAG
, id_attr
);
179 add_fragment_identifier(struct html_context
*html_context
,
180 struct part
*part
, unsigned char *attr
)
182 struct part
*saved_part
= html_context
->part
;
184 html_context
->part
= part
;
185 html_context
->special_f(html_context
, SP_TAG
, attr
);
186 html_context
->part
= saved_part
;
191 import_css_stylesheet(struct css_stylesheet
*css
, struct uri
*base_uri
,
192 const unsigned char *unterminated_url
, int len
)
194 struct html_context
*html_context
= css
->import_data
;
196 unsigned char *import_url
;
199 assert(html_context
);
202 if (!html_context
->options
->css_enable
203 || !html_context
->options
->css_import
)
206 /* unterminated_url might not end with '\0', but join_urls
207 * requires that, so make a copy. */
208 url
= memacpy(unterminated_url
, len
);
211 /* HTML <head> urls should already be fine but we can.t detect them. */
212 import_url
= join_urls(base_uri
, url
);
215 if (!import_url
) return;
217 uri
= get_uri(import_url
, URI_BASE
);
218 mem_free(import_url
);
222 /* Request the imported stylesheet as part of the document ... */
223 html_context
->special_f(html_context
, SP_STYLESHEET
, uri
);
225 /* ... and then attempt to import from the cache. */
226 import_css(css
, uri
);
232 /* Extract the extra information that is available for elements which can
233 * receive focus. Call this from each element which supports tabindex or
235 /* Note that in ELinks, we support those attributes (I mean, we call this
236 * function) while processing any focusable element (otherwise it'd have zero
237 * tabindex, thus messing up navigation between links), thus we support these
238 * attributes even near tags where we're not supposed to (like IFRAME, FRAME or
239 * LINK). I think this doesn't make any harm ;). --pasky */
241 html_focusable(struct html_context
*html_context
, unsigned char *a
)
243 unsigned char *accesskey
;
247 format
.accesskey
= 0;
248 format
.tabindex
= 0x80000000;
252 cp
= html_context
->doc_cp
;
254 accesskey
= get_attr_val(a
, "accesskey", cp
);
256 format
.accesskey
= accesskey_string_to_unicode(accesskey
);
260 tabindex
= get_num(a
, "tabindex", cp
);
261 if (0 < tabindex
&& tabindex
< 32767) {
262 format
.tabindex
= (tabindex
& 0x7fff) << 16;
265 mem_free_set(&format
.onclick
, get_attr_val(a
, "onclick", cp
));
266 mem_free_set(&format
.ondblclick
, get_attr_val(a
, "ondblclick", cp
));
267 mem_free_set(&format
.onmouseover
, get_attr_val(a
, "onmouseover", cp
));
268 mem_free_set(&format
.onhover
, get_attr_val(a
, "onhover", cp
));
269 mem_free_set(&format
.onfocus
, get_attr_val(a
, "onfocus", cp
));
270 mem_free_set(&format
.onmouseout
, get_attr_val(a
, "onmouseout", cp
));
271 mem_free_set(&format
.onblur
, get_attr_val(a
, "onblur", cp
));
275 html_skip(struct html_context
*html_context
, unsigned char *a
)
277 html_top
->invisible
= 1;
278 html_top
->type
= ELEMENT_DONT_KILL
;
282 check_head_for_refresh(struct html_context
*html_context
, unsigned char *head
)
284 unsigned char *refresh
;
285 unsigned char *url
= NULL
;
286 unsigned char *joined_url
= NULL
;
287 unsigned long seconds
;
289 refresh
= parse_header(head
, "Refresh", NULL
);
290 if (!refresh
) return;
292 if (html_parse_meta_refresh(refresh
, &seconds
, &url
) == 0) {
294 /* If the URL parameter is missing assume that the
295 * document being processed should be refreshed. */
296 url
= get_uri_string(html_context
->base_href
,
302 joined_url
= join_urls(html_context
->base_href
, url
);
305 if (seconds
> HTTP_REFRESH_MAX_DELAY
)
306 seconds
= HTTP_REFRESH_MAX_DELAY
;
308 html_focusable(html_context
, NULL
);
310 put_link_line("Refresh: ", url
, joined_url
,
311 html_context
->options
->framename
, html_context
);
312 html_context
->special_f(html_context
, SP_REFRESH
, seconds
, joined_url
);
315 mem_free_if(joined_url
);
321 check_head_for_cache_control(struct html_context
*html_context
,
328 if (get_opt_bool("document.cache.ignore_cache_control", NULL
))
331 /* XXX: Code duplication with HTTP protocol backend. */
332 /* I am not entirely sure in what order we should process these
333 * headers and if we should still process Cache-Control max-age
334 * if we already set max age to date mentioned in Expires.
336 if ((d
= parse_header(head
, "Pragma", NULL
))) {
337 if (strstr(d
, "no-cache")) {
343 if (!no_cache
&& (d
= parse_header(head
, "Cache-Control", NULL
))) {
344 if (strstr(d
, "no-cache") || strstr(d
, "must-revalidate")) {
348 unsigned char *pos
= strstr(d
, "max-age=");
353 /* Grab the number of seconds. */
354 timeval_T max_age
, seconds
;
356 timeval_from_seconds(&seconds
, atol(pos
+ 8));
357 timeval_now(&max_age
);
358 timeval_add_interval(&max_age
, &seconds
);
360 expires
= timeval_to_seconds(&max_age
);
367 if (!no_cache
&& (d
= parse_header(head
, "Expires", NULL
))) {
368 /* Convert date to seconds. */
369 if (strstr(d
, "now")) {
373 expires
= timeval_to_seconds(&now
);
375 expires
= parse_date(&d
, NULL
, 0, 1);
382 html_context
->special_f(html_context
, SP_CACHE_CONTROL
);
384 html_context
->special_f(html_context
,
385 SP_CACHE_EXPIRES
, expires
);
389 process_head(struct html_context
*html_context
, unsigned char *head
)
391 check_head_for_refresh(html_context
, head
);
393 check_head_for_cache_control(html_context
, head
);
400 look_for_map(unsigned char **pos
, unsigned char *eof
, struct uri
*uri
,
401 struct document_options
*options
)
403 unsigned char *al
, *attr
, *name
;
406 while (*pos
< eof
&& **pos
!= '<') {
410 if (*pos
>= eof
) return 0;
412 if (*pos
+ 2 <= eof
&& ((*pos
)[1] == '!' || (*pos
)[1] == '?')) {
413 *pos
= skip_comment(*pos
, eof
);
417 if (parse_element(*pos
, eof
, &name
, &namelen
, &attr
, pos
)) {
422 if (c_strlcasecmp(name
, namelen
, "MAP", 3)) return 1;
424 if (uri
&& uri
->fragment
) {
425 /* FIXME (bug 784): options->cp is the terminal charset;
426 * should use the document charset instead. */
427 al
= get_attr_val(attr
, "name", options
->cp
);
430 if (c_strlcasecmp(al
, -1, uri
->fragment
, uri
->fragmentlen
)) {
442 look_for_tag(unsigned char **pos
, unsigned char *eof
,
443 unsigned char *name
, int namelen
, unsigned char **label
)
448 if (!init_string(&str
)) {
449 /* Is this the right way to bail out? --jonas */
455 while (pos2
< eof
&& *pos2
!= '<') {
465 add_bytes_to_string(&str
, *pos
, pos2
- *pos
);
470 if (*pos
+ 2 <= eof
&& ((*pos
)[1] == '!' || (*pos
)[1] == '?')) {
471 *pos
= skip_comment(*pos
, eof
);
475 if (parse_element(*pos
, eof
, NULL
, NULL
, NULL
, &pos2
)) return 1;
477 if (c_strlcasecmp(name
, namelen
, "A", 1)
478 && c_strlcasecmp(name
, namelen
, "/A", 2)
479 && c_strlcasecmp(name
, namelen
, "MAP", 3)
480 && c_strlcasecmp(name
, namelen
, "/MAP", 4)
481 && c_strlcasecmp(name
, namelen
, "AREA", 4)
482 && c_strlcasecmp(name
, namelen
, "/AREA", 5)) {
490 /** @return -1 if EOF is hit without the closing tag; 0 if the closing
491 * tag is found (in which case this also adds *@a menu to *@a ml); or
492 * 1 if this should be called again. */
494 look_for_link(unsigned char **pos
, unsigned char *eof
, struct menu_item
**menu
,
495 struct memory_list
**ml
, struct uri
*href_base
,
496 unsigned char *target_base
, struct conv_table
*ct
,
497 struct document_options
*options
)
499 unsigned char *attr
, *href
, *name
, *target
;
500 unsigned char *label
= NULL
; /* shut up warning */
502 struct menu_item
*nm
;
506 while (*pos
< eof
&& **pos
!= '<') {
510 if (*pos
>= eof
) return -1;
512 if (*pos
+ 2 <= eof
&& ((*pos
)[1] == '!' || (*pos
)[1] == '?')) {
513 *pos
= skip_comment(*pos
, eof
);
517 if (parse_element(*pos
, eof
, &name
, &namelen
, &attr
, pos
)) {
522 if (!c_strlcasecmp(name
, namelen
, "A", 1)) {
523 while (look_for_tag(pos
, eof
, name
, namelen
, &label
));
525 if (*pos
>= eof
) return -1;
527 } else if (!c_strlcasecmp(name
, namelen
, "AREA", 4)) {
528 /* FIXME (bug 784): options->cp is the terminal charset;
529 * should use the document charset instead. */
530 unsigned char *alt
= get_attr_val(attr
, "alt", options
->cp
);
533 /* CSM_NONE because get_attr_val() already
534 * decoded entities. */
535 label
= convert_string(ct
, alt
, strlen(alt
),
536 options
->cp
, CSM_NONE
,
543 } else if (!c_strlcasecmp(name
, namelen
, "/MAP", 4)) {
544 /* This is the only successful return from here! */
545 add_to_ml(ml
, (void *) *menu
, (void *) NULL
);
552 target
= get_target(options
, attr
);
553 if (!target
) target
= stracpy(empty_string_or_(target_base
));
559 ld
= mem_alloc(sizeof(*ld
));
566 /* FIXME (bug 784): options->cp is the terminal charset;
567 * should use the document charset instead. */
568 href
= get_url_val(attr
, "href", options
->cp
);
577 ld
->link
= join_urls(href_base
, href
);
588 for (nmenu
= 0; !mi_is_end_of_menu(&(*menu
)[nmenu
]); nmenu
++) {
589 struct link_def
*ll
= (*menu
)[nmenu
].data
;
591 if (!strcmp(ll
->link
, ld
->link
) &&
592 !strcmp(ll
->target
, ld
->target
)) {
594 mem_free(ld
->target
);
611 label
= stracpy(ld
->link
);
620 nm
= mem_realloc(*menu
, (nmenu
+ 2) * sizeof(*nm
));
623 memset(&nm
[nmenu
], 0, 2 * sizeof(*nm
));
624 nm
[nmenu
].text
= label
;
625 nm
[nmenu
].func
= map_selected
;
627 nm
[nmenu
].flags
= NO_INTL
;
630 add_to_ml(ml
, (void *) ld
, (void *) ld
->link
, (void *) ld
->target
,
631 (void *) label
, (void *) NULL
);
638 get_image_map(unsigned char *head
, unsigned char *pos
, unsigned char *eof
,
639 struct menu_item
**menu
, struct memory_list
**ml
, struct uri
*uri
,
640 struct document_options
*options
, unsigned char *target_base
,
641 int to
, int def
, int hdef
)
643 struct conv_table
*ct
;
647 if (!init_string(&hd
)) return -1;
649 if (head
) add_to_string(&hd
, head
);
650 /* FIXME (bug 784): cp is the terminal charset;
651 * should use the document charset instead. */
652 scan_http_equiv(pos
, eof
, &hd
, NULL
, options
->cp
);
653 ct
= get_convert_table(hd
.source
, to
, def
, NULL
, NULL
, hdef
);
656 *menu
= mem_calloc(1, sizeof(**menu
));
657 if (!*menu
) return -1;
659 while (look_for_map(&pos
, eof
, uri
, options
));
669 /* This call can modify both *ml and *menu. */
670 look_result
= look_for_link(&pos
, eof
, menu
, ml
, uri
,
671 target_base
, ct
, options
);
672 } while (look_result
> 0);
674 if (look_result
< 0) {
687 init_html_parser_state(struct html_context
*html_context
,
688 enum html_element_mortality_type type
,
689 int align
, int margin
, int width
)
691 html_stack_dup(html_context
, type
);
693 par_format
.align
= align
;
695 if (type
<= ELEMENT_IMMORTAL
) {
696 par_format
.leftmargin
= margin
;
697 par_format
.rightmargin
= margin
;
698 par_format
.width
= width
;
699 par_format
.list_level
= 0;
700 par_format
.list_number
= 0;
701 par_format
.dd_margin
= 0;
702 html_top
->namelen
= 0;
711 done_html_parser_state(struct html_context
*html_context
,
714 struct html_element
*element
= state
;
716 html_context
->line_breax
= 1;
718 while (html_top
!= element
) {
719 pop_html_element(html_context
);
721 /* I've preserved this bit to show an example of the Old Code
722 * of the Mikulas days (I _HOPE_ it's by Mikulas, at least ;-).
723 * I think this assert() can never fail, for one. --pasky */
724 assertm(html_top
&& (void *) html_top
!= (void *) &html_stack
,
725 "html stack trashed");
726 if_assert_failed
break;
730 html_top
->type
= ELEMENT_KILLABLE
;
731 pop_html_element(html_context
);
735 /* This function does not set html_context.doc_cp = document.cp,
736 * because it does not know the document, and because the codepage has
737 * not even been decided when it is called.
740 * The title of the document. This is in the document charset,
741 * and entities have not been decoded. */
742 struct html_context
*
743 init_html_parser(struct uri
*uri
, struct document_options
*options
,
744 unsigned char *start
, unsigned char *end
,
745 struct string
*head
, struct string
*title
,
746 void (*put_chars
)(struct html_context
*, unsigned char *, int),
747 void (*line_break
)(struct html_context
*),
748 void *(*special
)(struct html_context
*, enum html_special_type
, ...))
750 struct html_context
*html_context
;
751 struct html_element
*e
;
753 assert(uri
&& options
);
754 if_assert_failed
return NULL
;
756 html_context
= mem_calloc(1, sizeof(*html_context
));
757 if (!html_context
) return NULL
;
760 html_context
->css_styles
.import
= import_css_stylesheet
;
761 init_css_selector_set(&html_context
->css_styles
.selectors
);
764 init_list(html_context
->stack
);
766 html_context
->startf
= start
;
767 html_context
->put_chars_f
= put_chars
;
768 html_context
->line_break_f
= line_break
;
769 html_context
->special_f
= special
;
771 html_context
->base_href
= get_uri_reference(uri
);
772 html_context
->base_target
= null_or_stracpy(options
->framename
);
774 html_context
->options
= options
;
776 /* FIXME (bug 784): cp is the terminal charset;
777 * should use the document charset instead. */
778 scan_http_equiv(start
, end
, head
, title
, options
->cp
);
780 e
= mem_calloc(1, sizeof(*e
));
782 add_to_list(html_context
->stack
, e
);
784 format
.style
.attr
= 0;
786 format
.link
= format
.target
= format
.image
= NULL
;
787 format
.onclick
= format
.ondblclick
= format
.onmouseover
= format
.onhover
788 = format
.onfocus
= format
.onmouseout
= format
.onblur
= NULL
;
789 format
.select
= NULL
;
793 format
.style
= options
->default_style
;
794 format
.color
.clink
= options
->default_color
.link
;
795 format
.color
.vlink
= options
->default_color
.vlink
;
796 #ifdef CONFIG_BOOKMARKS
797 format
.color
.bookmark_link
= options
->default_color
.bookmark_link
;
799 format
.color
.image_link
= options
->default_color
.image_link
;
801 par_format
.align
= ALIGN_LEFT
;
802 par_format
.leftmargin
= options
->margin
;
803 par_format
.rightmargin
= options
->margin
;
805 par_format
.width
= options
->box
.width
;
806 par_format
.list_level
= par_format
.list_number
= 0;
807 par_format
.dd_margin
= options
->margin
;
808 par_format
.flags
= P_NONE
;
810 par_format
.color
.background
= options
->default_style
.color
.background
;
812 html_top
->invisible
= 0;
813 html_top
->name
= NULL
;
814 html_top
->namelen
= 0;
815 html_top
->options
= NULL
;
816 html_top
->linebreak
= 1;
817 html_top
->type
= ELEMENT_DONT_KILL
;
819 html_context
->has_link_lines
= 0;
820 html_context
->table_level
= 0;
823 html_context
->css_styles
.import_data
= html_context
;
825 if (options
->css_enable
)
826 mirror_css_stylesheet(&default_stylesheet
,
827 &html_context
->css_styles
);
834 done_html_parser(struct html_context
*html_context
)
837 if (html_context
->options
->css_enable
)
838 done_css_stylesheet(&html_context
->css_styles
);
841 mem_free(html_context
->base_target
);
842 done_uri(html_context
->base_href
);
844 kill_html_stack_item(html_context
, html_context
->stack
.next
);
846 assertm(list_empty(html_context
->stack
),
847 "html stack not empty after operation");
848 if_assert_failed
init_list(html_context
->stack
);
850 mem_free(html_context
);