iconv: Bail out of the loop when an illegal sequence of bytes occurs.
[elinks/elinks-j605.git] / src / document / plain / renderer.c
blob062df22b4d916a65df4f24b857b62092452e0339
1 /* Plain text document renderer */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <ctype.h>
8 #include <stdlib.h>
9 #include <string.h>
11 #include "elinks.h"
13 #include "bookmarks/bookmarks.h"
14 #include "cache/cache.h"
15 #include "config/options.h"
16 #include "document/docdata.h"
17 #include "document/document.h"
18 #include "document/format.h"
19 #include "document/options.h"
20 #include "document/plain/renderer.h"
21 #include "document/renderer.h"
22 #include "globhist/globhist.h"
23 #include "intl/charsets.h"
24 #include "protocol/protocol.h"
25 #include "protocol/uri.h"
26 #include "terminal/color.h"
27 #include "terminal/draw.h"
28 #include "util/color.h"
29 #include "util/error.h"
30 #include "util/memory.h"
31 #include "util/string.h"
34 struct plain_renderer {
35 /* The document being renderered */
36 struct document *document;
38 /* The data and data length of the defragmented cache entry */
39 unsigned char *source;
40 int length;
42 /* The convert table that should be used for converting line strings to
43 * the rendered strings. */
44 struct conv_table *convert_table;
46 /* The default template char data for text */
47 struct screen_char template;
49 /* The maximum width any line can have (used for wrapping text) */
50 int max_width;
52 /* The current line number */
53 int lineno;
55 /* Are we doing line compression */
56 unsigned int compress:1;
59 #define realloc_document_links(doc, size) \
60 ALIGN_LINK(&(doc)->links, (doc)->nlinks, size)
62 static struct screen_char *
63 realloc_line(struct document *document, int x, int y)
65 struct line *line = realloc_lines(document, y);
67 if (!line) return NULL;
69 if (x != line->length) {
70 if (!ALIGN_LINE(&line->chars, line->length, x))
71 return NULL;
73 line->length = x;
76 return line->chars;
79 static inline struct link *
80 add_document_link(struct document *document, unsigned char *uri, int length,
81 int x, int y)
83 struct link *link;
84 struct point *point;
86 if (!realloc_document_links(document, document->nlinks + 1))
87 return NULL;
89 link = &document->links[document->nlinks];
91 if (!realloc_points(link, length))
92 return NULL;
94 link->npoints = length;
95 link->type = LINK_HYPERTEXT;
96 link->where = uri;
97 link->color.background = document->options.default_style.color.background;
98 link->color.foreground = document->options.default_color.link;
99 link->number = document->nlinks;
101 for (point = link->points; length > 0; length--, point++, x++) {
102 point->x = x;
103 point->y = y;
106 document->nlinks++;
107 document->links_sorted = 0;
108 return link;
111 /* Searches a word to find an email adress or an URI to add as a link. */
112 static inline struct link *
113 check_link_word(struct document *document, unsigned char *uri, int length,
114 int x, int y)
116 struct uri test;
117 unsigned char *where = NULL;
118 unsigned char *mailto = memchr(uri, '@', length);
119 int keep = uri[length];
120 struct link *new_link;
122 assert(document);
123 if_assert_failed return NULL;
125 uri[length] = 0;
127 if (mailto && mailto > uri && mailto - uri < length - 1) {
128 where = straconcat("mailto:", uri, (unsigned char *) NULL);
130 } else if (parse_uri(&test, uri) == URI_ERRNO_OK
131 && test.protocol != PROTOCOL_UNKNOWN
132 && (test.datalen || test.hostlen)) {
133 where = memacpy(uri, length);
136 uri[length] = keep;
138 if (!where) return NULL;
140 /* We need to reparse the URI and normalize it so that the protocol and
141 * host part are converted to lowercase. */
142 normalize_uri(NULL, where);
144 new_link = add_document_link(document, where, length, x, y);
146 if (!new_link) mem_free(where);
148 return new_link;
151 #define url_char(c) ( \
152 (c) > ' ' \
153 && (c) != '<' \
154 && (c) != '>' \
155 && (c) != '(' \
156 && (c) != ')' \
157 && !isquote(c))
159 static inline int
160 get_uri_length(unsigned char *line, int length)
162 int uri_end = 0;
164 while (uri_end < length
165 && url_char(line[uri_end]))
166 uri_end++;
168 for (; uri_end > 0; uri_end--) {
169 if (line[uri_end - 1] != '.'
170 && line[uri_end - 1] != ',')
171 break;
174 return uri_end;
177 static int
178 print_document_link(struct plain_renderer *renderer, int lineno,
179 unsigned char *line, int line_pos, int width,
180 int expanded, struct screen_char *pos, int cells)
182 struct document *document = renderer->document;
183 unsigned char *start = &line[line_pos];
184 int len = get_uri_length(start, width - line_pos);
185 int screen_column = cells + expanded;
186 struct link *new_link;
187 int link_end = line_pos + len;
188 unsigned char saved_char;
189 struct document_options *doc_opts = &document->options;
190 struct screen_char template = renderer->template;
191 int i;
193 if (!len) return 0;
195 new_link = check_link_word(document, start, len, screen_column,
196 lineno);
198 if (!new_link) return 0;
200 saved_char = line[link_end];
201 line[link_end] = '\0';
203 if (0)
204 ; /* Shut up compiler */
205 #ifdef CONFIG_GLOBHIST
206 else if (get_global_history_item(start))
207 new_link->color.foreground = doc_opts->default_color.vlink;
208 #endif
209 #ifdef CONFIG_BOOKMARKS
210 else if (get_bookmark(start))
211 new_link->color.foreground = doc_opts->default_color.bookmark_link;
212 #endif
213 else
214 new_link->color.foreground = doc_opts->default_color.link;
216 line[link_end] = saved_char;
218 new_link->color.background = doc_opts->default_style.color.background;
220 set_term_color(&template, &new_link->color,
221 doc_opts->color_flags, doc_opts->color_mode);
223 for (i = len; i; i--) {
224 template.data = line[line_pos++];
225 copy_screen_chars(pos++, &template, 1);
228 return len;
231 static void
232 decode_esc_color(unsigned char *text, int *line_pos, int width,
233 struct screen_char *template, enum color_mode mode,
234 int *was_reversed)
236 struct screen_char ch;
237 struct color_pair color;
238 char *buf, *tail, *begin, *end;
239 int k, foreground, background, f1, b1; /* , intensity; */
241 ++(*line_pos);
242 buf = (char *)&text[*line_pos];
244 if (*buf != '[') return;
245 ++buf;
246 ++(*line_pos);
248 k = strspn(buf, "0123456789;");
249 *line_pos += k;
250 if (!k || buf[k] != 'm') return;
252 end = buf + k;
253 begin = tail = buf;
255 get_screen_char_color(template, &color, 0, mode);
256 set_term_color(&ch, &color, 0, COLOR_MODE_16);
257 b1 = background = (ch.c.color[0] >> 4) & 7;
258 f1 = foreground = ch.c.color[0] & 15;
260 while (tail < end) {
261 unsigned char kod = (unsigned char)strtol(begin, &tail, 10);
263 begin = tail + 1;
264 switch (kod) {
265 case 0:
266 background = 0;
267 foreground = 7;
268 break;
269 case 7:
270 if (*was_reversed == 0) {
271 background = f1 & 7;
272 foreground = b1;
273 *was_reversed = 1;
275 break;
276 case 27:
277 if (*was_reversed == 1) {
278 background = f1 & 7;
279 foreground = b1;
280 *was_reversed = 0;
282 break;
283 case 30:
284 case 31:
285 case 32:
286 case 33:
287 case 34:
288 case 35:
289 case 36:
290 case 37:
291 foreground = kod - 30;
292 break;
293 case 40:
294 case 41:
295 case 42:
296 case 43:
297 case 44:
298 case 45:
299 case 46:
300 case 47:
301 background = kod - 40;
302 break;
303 default:
304 break;
307 color.background = get_term_color16(background);
308 color.foreground = get_term_color16(foreground);
309 set_term_color(template, &color, 0, mode);
312 static inline int
313 add_document_line(struct plain_renderer *renderer,
314 unsigned char *line, int line_width)
316 struct document *document = renderer->document;
317 struct screen_char *template = &renderer->template;
318 struct screen_char saved_renderer_template = *template;
319 struct screen_char *pos, *startpos;
320 struct document_options *doc_opts = &document->options;
321 int was_reversed = 0;
323 #ifdef CONFIG_UTF8
324 int utf8 = doc_opts->utf8;
325 #endif /* CONFIG_UTF8 */
326 int cells = 0;
327 int lineno = renderer->lineno;
328 int expanded = 0;
329 int width = line_width;
330 int line_pos;
332 line = convert_string(renderer->convert_table, line, width,
333 document->options.cp, CSM_NONE, &width,
334 NULL, NULL);
335 if (!line) return 0;
337 /* Now expand tabs */
338 for (line_pos = 0; line_pos < width;) {
339 unsigned char line_char = line[line_pos];
340 int charlen = 1;
341 int cell = 1;
342 #ifdef CONFIG_UTF8
343 unicode_val_T data;
345 if (utf8) {
346 unsigned char *line_char2 = &line[line_pos];
347 charlen = utf8charlen(&line_char);
348 data = utf8_to_unicode(&line_char2, &line[width]);
350 if (data == UCS_NO_CHAR) {
351 line_pos += charlen;
352 continue;
355 cell = unicode_to_cell(data);
357 #endif /* CONFIG_UTF8 */
359 if (line_char == ASCII_TAB
360 && (line_pos + charlen == width
361 || line[line_pos + charlen] != ASCII_BS)) {
362 int tab_width = 7 - ((cells + expanded) & 7);
364 expanded += tab_width;
365 } else if (line_char == ASCII_BS) {
366 #if 0
367 This does not work: Suppose we have seventeen spaces
368 followed by a back-space; that will call for sixteen
369 bytes of memory, but we will print seventeen spaces
370 before we hit the back-space -- overflow!
372 /* Don't count the character
373 * that the back-space character will delete */
374 if (expanded + line_pos)
375 expanded--;
376 #endif
377 #if 0
378 /* Don't count the back-space character */
379 if (expanded > 0)
380 expanded--;
381 #endif
383 line_pos += charlen;
384 cells += cell;
387 assert(expanded >= 0);
389 startpos = pos = realloc_line(document, width + expanded, lineno);
390 if (!pos) {
391 mem_free(line);
392 return 0;
395 cells = 0;
396 expanded = 0;
397 for (line_pos = 0; line_pos < width;) {
398 unsigned char line_char = line[line_pos];
399 unsigned char next_char, prev_char;
400 int charlen = 1;
401 int cell = 1;
402 #ifdef CONFIG_UTF8
403 unicode_val_T data = UCS_NO_CHAR;
405 if (utf8) {
406 unsigned char *line_char2 = &line[line_pos];
407 charlen = utf8charlen(&line_char);
408 data = utf8_to_unicode(&line_char2, &line[width]);
410 if (data == UCS_NO_CHAR) {
411 line_pos += charlen;
412 continue;
415 cell = unicode_to_cell(data);
417 #endif /* CONFIG_UTF8 */
419 prev_char = line_pos > 0 ? line[line_pos - 1] : '\0';
420 next_char = (line_pos + charlen < width) ?
421 line[line_pos + charlen] : '\0';
423 /* Do not expand tabs that precede back-spaces; this saves the
424 * back-space code some trouble. */
425 if (line_char == ASCII_TAB && next_char != ASCII_BS) {
426 int tab_width = 7 - ((cells + expanded) & 7);
428 expanded += tab_width;
430 template->data = ' ';
432 copy_screen_chars(pos++, template, 1);
433 while (tab_width--);
435 *template = saved_renderer_template;
437 } else if (line_char == ASCII_BS) {
438 if (!(expanded + cells)) {
439 /* We've backspaced to the start of the line */
440 goto next;
442 if (pos > startpos)
443 pos--; /* Backspace */
445 /* Handle x^H_ as _^Hx, but prevent an infinite loop
446 * swapping two underscores. */
447 if (next_char == '_' && prev_char != '_') {
448 /* x^H_ becomes _^Hx */
449 if (line_pos - 1 >= 0)
450 line[line_pos - 1] = next_char;
451 if (line_pos + charlen < width)
452 line[line_pos + charlen] = prev_char;
454 /* Go back and reparse the swapped characters */
455 if (line_pos - 2 >= 0) {
456 cells--;
457 line_pos--;
459 continue;
462 if ((expanded + line_pos) - 2 >= 0) {
463 /* Don't count the backspace character or the
464 * deleted character when returning the line's
465 * width or when expanding tabs. */
466 expanded -= 2;
469 if (pos->data == '_' && next_char == '_') {
470 /* Is _^H_ an underlined underscore
471 * or an emboldened underscore? */
473 if (expanded + line_pos >= 0
474 && pos - 1 >= startpos
475 && (pos - 1)->attr) {
476 /* There is some preceding text,
477 * and it has an attribute; copy it */
478 template->attr |= (pos - 1)->attr;
479 } else {
480 /* Default to bold; seems more useful
481 * than underlining the underscore */
482 template->attr |= SCREEN_ATTR_BOLD;
485 } else if (pos->data == '_') {
486 /* Underline _^Hx */
488 template->attr |= SCREEN_ATTR_UNDERLINE;
490 } else if (pos->data == next_char) {
491 /* Embolden x^Hx */
493 template->attr |= SCREEN_ATTR_BOLD;
496 /* Handle _^Hx^Hx as both bold and underlined */
497 if (template->attr)
498 template->attr |= pos->attr;
499 } else if (line_char == 27) {
500 decode_esc_color(line, &line_pos, width,
501 &saved_renderer_template,
502 doc_opts->color_mode, &was_reversed);
503 *template = saved_renderer_template;
504 } else {
505 int added_chars = 0;
507 if (document->options.plain_display_links
508 && isalpha(line_char) && isalpha(next_char)) {
509 /* We only want to check for a URI if there are
510 * at least two consecutive alphabetic
511 * characters, or if we are at the very start of
512 * the line. It improves performance a bit.
513 * --Zas */
514 added_chars = print_document_link(renderer,
515 lineno, line,
516 line_pos,
517 width,
518 expanded,
519 pos, cells);
522 if (added_chars) {
523 line_pos += added_chars - 1;
524 cells += added_chars - 1;
525 pos += added_chars;
526 } else {
527 #ifdef CONFIG_UTF8
528 if (utf8) {
529 if (data == UCS_NO_CHAR) {
530 line_pos += charlen;
531 continue;
534 template->data = (unicode_val_T)data;
535 copy_screen_chars(pos++, template, 1);
537 if (cell == 2) {
538 template->data = UCS_NO_CHAR;
539 copy_screen_chars(pos++,
540 template, 1);
542 } else
543 #endif /* CONFIG_UTF8 */
545 if (!isscreensafe(line_char))
546 line_char = '.';
547 template->data = line_char;
548 copy_screen_chars(pos++, template, 1);
550 /* Detect copy of nul chars to screen,
551 * this should not occur. --Zas */
552 assert(line_char);
556 *template = saved_renderer_template;
558 next:
559 line_pos += charlen;
560 cells += cell;
562 mem_free(line);
564 realloc_line(document, pos - startpos, lineno);
566 return width + expanded;
569 static void
570 init_template(struct screen_char *template, struct document_options *options)
572 get_screen_char_template(template, options, options->default_style);
575 static struct node *
576 add_node(struct plain_renderer *renderer, int x, int width, int height)
578 struct node *node = mem_alloc(sizeof(*node));
580 if (node) {
581 struct document *document = renderer->document;
583 set_box(&node->box, x, renderer->lineno, width, height);
585 int_lower_bound(&document->width, width);
586 int_lower_bound(&document->height, height);
588 add_to_list(document->nodes, node);
591 return node;
594 static void
595 add_document_lines(struct plain_renderer *renderer)
597 unsigned char *source = renderer->source;
598 int length = renderer->length;
599 int was_empty_line = 0;
600 int was_wrapped = 0;
601 #ifdef CONFIG_UTF8
602 int utf8 = is_cp_utf8(renderer->document->cp);
603 #endif
604 for (; length > 0; renderer->lineno++) {
605 unsigned char *xsource;
606 int width, added, only_spaces = 1, spaces = 0, was_spaces = 0;
607 int last_space = 0;
608 int tab_spaces = 0;
609 int step = 0;
610 int cells = 0;
612 /* End of line detection: We handle \r, \r\n and \n types. */
613 for (width = 0; (width < length) &&
614 (cells < renderer->max_width);) {
615 if (source[width] == ASCII_CR)
616 step++;
617 if (source[width + step] == ASCII_LF)
618 step++;
619 if (step) break;
621 if (isspace(source[width])) {
622 last_space = width;
623 if (only_spaces)
624 spaces++;
625 else
626 was_spaces++;
627 if (source[width] == '\t')
628 tab_spaces += 7 - ((width + tab_spaces) % 8);
629 } else {
630 only_spaces = 0;
631 was_spaces = 0;
633 #ifdef CONFIG_UTF8
634 if (utf8) {
635 unsigned char *text = &source[width];
636 unicode_val_T data = utf8_to_unicode(&text,
637 &source[length]);
639 if (data == UCS_NO_CHAR) return;
641 cells += unicode_to_cell(data);
642 width += utf8charlen(&source[width]);
643 } else
644 #endif /* CONFIG_UTF8 */
646 cells++;
647 width++;
651 if (only_spaces && step) {
652 if (was_wrapped || (renderer->compress && was_empty_line)) {
653 /* Successive empty lines will appear as one. */
654 length -= step + spaces;
655 source += step + spaces;
656 renderer->lineno--;
657 assert(renderer->lineno >= 0);
658 continue;
660 was_empty_line = 1;
662 /* No need to keep whitespaces on an empty line. */
663 source += spaces;
664 length -= spaces;
665 width -= spaces;
667 } else {
668 was_empty_line = 0;
669 was_wrapped = !step;
671 if (was_spaces && step) {
672 /* Drop trailing whitespaces. */
673 width -= was_spaces;
674 step += was_spaces;
677 if (!step && (width < length) && last_space) {
678 width = last_space;
679 step = 1;
683 assert(width >= 0);
685 /* We will touch the supplied source, so better replicate it. */
686 xsource = memacpy(source, width);
687 if (!xsource) continue;
689 added = add_document_line(renderer, xsource, width);
690 mem_free(xsource);
692 if (added) {
693 /* Add (search) nodes on a line by line basis */
694 add_node(renderer, 0, added, 1);
697 /* Skip end of line chars too. */
698 width += step;
699 length -= width;
700 source += width;
703 assert(!length);
706 void
707 render_plain_document(struct cache_entry *cached, struct document *document,
708 struct string *buffer)
710 struct conv_table *convert_table;
711 unsigned char *head = empty_string_or_(cached->head);
712 struct plain_renderer renderer;
714 convert_table = get_convert_table(head, document->options.cp,
715 document->options.assume_cp,
716 &document->cp,
717 &document->cp_status,
718 document->options.hard_assume);
720 renderer.source = buffer->source;
721 renderer.length = buffer->length;
723 renderer.document = document;
724 renderer.lineno = 0;
725 renderer.convert_table = convert_table;
726 renderer.compress = document->options.plain_compress_empty_lines;
727 renderer.max_width = document->options.wrap ? document->options.box.width
728 : INT_MAX;
730 document->color.background = document->options.default_style.color.background;
731 document->width = 0;
732 #ifdef CONFIG_UTF8
733 document->options.utf8 = is_cp_utf8(document->options.cp);
734 #endif /* CONFIG_UTF8 */
736 /* Setup the style */
737 init_template(&renderer.template, &document->options);
739 add_document_lines(&renderer);