turns printfs back on
[freebsd-src/fkvm-freebsd.git] / contrib / texinfo / makeinfo / html.c
blobbe2c136d41ff17dde581fc8a30bd59fcff399a70
1 /* html.c -- html-related utilities.
2 $Id: html.c,v 1.28 2004/12/06 01:13:06 karl Exp $
4 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004 Free Software
5 Foundation, Inc.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
21 #include "system.h"
22 #include "cmds.h"
23 #include "files.h"
24 #include "html.h"
25 #include "lang.h"
26 #include "makeinfo.h"
27 #include "node.h"
28 #include "sectioning.h"
31 /* Append CHAR to BUFFER, (re)allocating as necessary. We don't handle
32 null characters. */
34 typedef struct
36 unsigned size; /* allocated */
37 unsigned length; /* used */
38 char *buffer;
39 } buffer_type;
41 static buffer_type *
42 init_buffer (void)
44 buffer_type *buf = xmalloc (sizeof (buffer_type));
45 buf->length = 0;
46 buf->size = 0;
47 buf->buffer = NULL;
49 return buf;
52 static void
53 append_char (buffer_type *buf, int c)
55 buf->length++;
56 if (buf->length >= buf->size)
58 buf->size += 100;
59 buf->buffer = xrealloc (buf->buffer, buf->size);
61 buf->buffer[buf->length - 1] = c;
62 buf->buffer[buf->length] = 0;
65 /* Read the cascading style-sheet file FILENAME. Write out any @import
66 commands, which must come first, by the definition of css. If the
67 file contains any actual css code following the @imports, return it;
68 else return NULL. */
69 static char *
70 process_css_file (char *filename)
72 int c;
73 int lastchar = 0;
74 FILE *f;
75 buffer_type *import_text = init_buffer ();
76 buffer_type *inline_text = init_buffer ();
77 unsigned lineno = 1;
78 enum { null_state, comment_state, import_state, inline_state } state
79 = null_state, prev_state;
81 prev_state = null_state;
83 /* read from stdin if `-' is the filename. */
84 f = STREQ (filename, "-") ? stdin : fopen (filename, "r");
85 if (!f)
87 error (_("%s: could not open --css-file: %s"), progname, filename);
88 return NULL;
91 /* Read the file. The @import statements must come at the beginning,
92 with only whitespace and comments allowed before any inline css code. */
93 while ((c = getc (f)) >= 0)
95 if (c == '\n')
96 lineno++;
98 switch (state)
100 case null_state: /* between things */
101 if (c == '@')
102 { /* Only @import and @charset should switch into
103 import_state, other @-commands, such as @media, should
104 put us into inline_state. I don't think any other css
105 @-commands start with `i' or `c', although of course
106 this will break when such a command is defined. */
107 int nextchar = getc (f);
108 if (nextchar == 'i' || nextchar == 'c')
110 append_char (import_text, c);
111 state = import_state;
113 else
115 ungetc (nextchar, f); /* wasn't an @import */
116 state = inline_state;
119 else if (c == '/')
120 { /* possible start of a comment */
121 int nextchar = getc (f);
122 if (nextchar == '*')
123 state = comment_state;
124 else
126 ungetc (nextchar, f); /* wasn't a comment */
127 state = inline_state;
130 else if (isspace (c))
131 ; /* skip whitespace; maybe should use c_isspace? */
133 else
134 /* not an @import, not a comment, not whitespace: we must
135 have started the inline text. */
136 state = inline_state;
138 if (state == inline_state)
139 append_char (inline_text, c);
141 if (state != null_state)
142 prev_state = null_state;
143 break;
145 case comment_state:
146 if (c == '/' && lastchar == '*')
147 state = prev_state; /* end of comment */
148 break; /* else ignore this comment char */
150 case import_state:
151 append_char (import_text, c); /* include this import char */
152 if (c == ';')
153 { /* done with @import */
154 append_char (import_text, '\n'); /* make the output nice */
155 state = null_state;
156 prev_state = import_state;
158 break;
160 case inline_state:
161 /* No harm in writing out comments, so don't bother parsing
162 them out, just append everything. */
163 append_char (inline_text, c);
164 break;
167 lastchar = c;
170 /* Reached the end of the file. We should not be still in a comment. */
171 if (state == comment_state)
172 warning (_("%s:%d: --css-file ended in comment"), filename, lineno);
174 /* Write the @import text, if any. */
175 if (import_text->buffer)
177 add_word (import_text->buffer);
178 free (import_text->buffer);
179 free (import_text);
182 /* We're wasting the buffer struct memory, but so what. */
183 return inline_text->buffer;
186 HSTACK *htmlstack = NULL;
188 /* See html.h. */
189 int html_output_head_p = 0;
190 int html_title_written = 0;
192 void
193 html_output_head (void)
195 static const char *html_title = NULL;
196 char *encoding;
198 if (html_output_head_p)
199 return;
200 html_output_head_p = 1;
202 encoding = current_document_encoding ();
204 /* The <title> should not have markup, so use text_expansion. */
205 if (!html_title)
206 html_title = escape_string (title ?
207 text_expansion (title) : (char *) _("Untitled"));
209 /* Make sure this is the very first string of the output document. */
210 output_paragraph_offset = 0;
212 add_html_block_elt_args ("<html lang=\"%s\">\n<head>\n",
213 language_table[language_code].abbrev);
215 /* When splitting, add current node's name to title if it's available and not
216 Top. */
217 if (splitting && current_node && !STREQ (current_node, "Top"))
218 add_word_args ("<title>%s - %s</title>\n",
219 escape_string (xstrdup (current_node)), html_title);
220 else
221 add_word_args ("<title>%s</title>\n", html_title);
223 add_word ("<meta http-equiv=\"Content-Type\" content=\"text/html");
224 if (encoding && *encoding)
225 add_word_args ("; charset=%s", encoding);
227 add_word ("\">\n");
229 if (!document_description)
230 document_description = html_title;
232 add_word_args ("<meta name=\"description\" content=\"%s\">\n",
233 document_description);
234 add_word_args ("<meta name=\"generator\" content=\"makeinfo %s\">\n",
235 VERSION);
237 /* Navigation bar links. */
238 if (!splitting)
239 add_word ("<link title=\"Top\" rel=\"top\" href=\"#Top\">\n");
240 else if (tag_table)
242 /* Always put a top link. */
243 add_word ("<link title=\"Top\" rel=\"start\" href=\"index.html#Top\">\n");
245 /* We already have a top link, avoid duplication. */
246 if (tag_table->up && !STREQ (tag_table->up, "Top"))
247 add_link (tag_table->up, "rel=\"up\"");
249 if (tag_table->prev)
250 add_link (tag_table->prev, "rel=\"prev\"");
252 if (tag_table->next)
253 add_link (tag_table->next, "rel=\"next\"");
255 /* fixxme: Look for a way to put links to various indices in the
256 document. Also possible candidates to be added here are First and
257 Last links. */
259 else
261 /* We are splitting, but we neither have a tag_table. So this must be
262 index.html. So put a link to Top. */
263 add_word ("<link title=\"Top\" rel=\"start\" href=\"#Top\">\n");
266 add_word ("<link href=\"http://www.gnu.org/software/texinfo/\" \
267 rel=\"generator-home\" title=\"Texinfo Homepage\">\n");
269 if (copying_text)
270 { /* It is not ideal that we include the html markup here within
271 <head>, so we use text_expansion. */
272 insert_string ("<!--\n");
273 insert_string (text_expansion (copying_text));
274 insert_string ("-->\n");
277 /* Put the style definitions in a comment for the sake of browsers
278 that don't support <style>. */
279 add_word ("<meta http-equiv=\"Content-Style-Type\" content=\"text/css\">\n");
280 add_word ("<style type=\"text/css\"><!--\n");
283 char *css_inline = NULL;
285 if (css_include)
286 /* This writes out any @import commands from the --css-file,
287 and returns any actual css code following the imports. */
288 css_inline = process_css_file (css_include);
290 /* This seems cleaner than adding <br>'s at the end of each line for
291 these "roman" displays. It's hardly the end of the world if the
292 browser doesn't do <style>s, in any case; they'll just come out in
293 typewriter. */
294 #define CSS_FONT_INHERIT "font-family:inherit"
295 add_word_args (" pre.display { %s }\n", CSS_FONT_INHERIT);
296 add_word_args (" pre.format { %s }\n", CSS_FONT_INHERIT);
298 /* Alternatively, we could do <font size=-1> in insertion.c, but this
299 way makes it easier to override. */
300 #define CSS_FONT_SMALLER "font-size:smaller"
301 add_word_args (" pre.smalldisplay { %s; %s }\n", CSS_FONT_INHERIT,
302 CSS_FONT_SMALLER);
303 add_word_args (" pre.smallformat { %s; %s }\n", CSS_FONT_INHERIT,
304 CSS_FONT_SMALLER);
305 add_word_args (" pre.smallexample { %s }\n", CSS_FONT_SMALLER);
306 add_word_args (" pre.smalllisp { %s }\n", CSS_FONT_SMALLER);
308 /* Since HTML doesn't have a sc element, we use span with a bit of
309 CSS spice instead. */
310 #define CSS_FONT_SMALL_CAPS "font-variant:small-caps"
311 add_word_args (" span.sc { %s }\n", CSS_FONT_SMALL_CAPS);
313 /* Roman (default) font class, closest we can come. */
314 #define CSS_FONT_ROMAN "font-family:serif; font-weight:normal;"
315 add_word_args (" span.roman { %s } \n", CSS_FONT_ROMAN);
317 /* Sans serif font class. */
318 #define CSS_FONT_SANSSERIF "font-family:sans-serif; font-weight:normal;"
319 add_word_args (" span.sansserif { %s } \n", CSS_FONT_SANSSERIF);
321 /* Write out any css code from the user's --css-file. */
322 if (css_inline)
323 insert_string (css_inline);
325 add_word ("--></style>\n");
328 add_word ("</head>\n<body>\n");
330 if (title && !html_title_written && titlepage_cmd_present)
332 add_word_args ("<h1 class=\"settitle\">%s</h1>\n", html_title);
333 html_title_written = 1;
336 free (encoding);
339 /* Escape HTML special characters in the string if necessary,
340 returning a pointer to a possibly newly-allocated one. */
341 char *
342 escape_string (char *string)
344 char *newstring;
345 int i = 0, newlen = 0;
349 /* Find how much to allocate. */
350 switch (string[i])
352 case '"':
353 newlen += 6; /* `&quot;' */
354 break;
355 case '&':
356 newlen += 5; /* `&amp;' */
357 break;
358 case '<':
359 case '>':
360 newlen += 4; /* `&lt;', `&gt;' */
361 break;
362 default:
363 newlen++;
366 while (string[i++]);
368 if (newlen == i) return string; /* Already OK. */
370 newstring = xmalloc (newlen);
371 i = 0;
374 switch (string[i])
376 case '"':
377 strcpy (newstring, "&quot;");
378 newstring += 6;
379 break;
380 case '&':
381 strcpy (newstring, "&amp;");
382 newstring += 5;
383 break;
384 case '<':
385 strcpy (newstring, "&lt;");
386 newstring += 4;
387 break;
388 case '>':
389 strcpy (newstring, "&gt;");
390 newstring += 4;
391 break;
392 default:
393 newstring[0] = string[i];
394 newstring++;
397 while (string[i++]);
398 free (string);
399 return newstring - newlen;
402 /* Save current tag. */
403 static void
404 push_tag (char *tag, char *attribs)
406 HSTACK *newstack = xmalloc (sizeof (HSTACK));
408 newstack->tag = tag;
409 newstack->attribs = xstrdup (attribs);
410 newstack->next = htmlstack;
411 htmlstack = newstack;
414 /* Get last tag. */
415 static void
416 pop_tag (void)
418 HSTACK *tos = htmlstack;
420 if (!tos)
422 line_error (_("[unexpected] no html tag to pop"));
423 return;
426 free (htmlstack->attribs);
428 htmlstack = htmlstack->next;
429 free (tos);
432 /* Check if tag is an empty or a whitespace only element.
433 If so, remove it, keeping whitespace intact. */
435 rollback_empty_tag (char *tag)
437 int check_position = output_paragraph_offset;
438 int taglen = strlen (tag);
439 int rollback_happened = 0;
440 char *contents = "";
441 char *contents_canon_white = "";
443 /* If output_paragraph is empty, we cannot rollback :-\ */
444 if (output_paragraph_offset <= 0)
445 return 0;
447 /* Find the end of the previous tag. */
448 while (output_paragraph[check_position-1] != '>' && check_position > 0)
449 check_position--;
451 /* Save stuff between tag's end to output_paragraph's end. */
452 if (check_position != output_paragraph_offset)
454 contents = xmalloc (output_paragraph_offset - check_position + 1);
455 memcpy (contents, output_paragraph + check_position,
456 output_paragraph_offset - check_position);
458 contents[output_paragraph_offset - check_position] = '\0';
460 contents_canon_white = xstrdup (contents);
461 canon_white (contents_canon_white);
464 /* Find the start of the previous tag. */
465 while (output_paragraph[check_position-1] != '<' && check_position > 0)
466 check_position--;
468 /* Check to see if this is the tag. */
469 if (strncmp ((char *) output_paragraph + check_position, tag, taglen) == 0
470 && (whitespace (output_paragraph[check_position + taglen])
471 || output_paragraph[check_position + taglen] == '>'))
473 if (!contents_canon_white || !*contents_canon_white)
475 /* Empty content after whitespace removal, so roll it back. */
476 output_paragraph_offset = check_position - 1;
477 rollback_happened = 1;
479 /* Original contents may not be empty (whitespace.) */
480 if (contents && *contents)
482 insert_string (contents);
483 free (contents);
488 return rollback_happened;
491 /* Open or close TAG according to START_OR_END. */
492 void
493 #if defined (VA_FPRINTF) && __STDC__
494 insert_html_tag_with_attribute (int start_or_end, char *tag, char *format, ...)
495 #else
496 insert_html_tag_with_attribute (start_or_end, tag, format, va_alist)
497 int start_or_end;
498 char *tag;
499 char *format;
500 va_dcl
501 #endif
503 char *old_tag = NULL;
504 char *old_attribs = NULL;
505 char formatted_attribs[2000]; /* xx no fixed limits */
506 int do_return = 0;
507 extern int in_html_elt;
509 if (start_or_end != START)
510 pop_tag ();
512 if (htmlstack)
514 old_tag = htmlstack->tag;
515 old_attribs = htmlstack->attribs;
518 if (format)
520 #ifdef VA_SPRINTF
521 va_list ap;
522 #endif
524 VA_START (ap, format);
525 #ifdef VA_SPRINTF
526 VA_SPRINTF (formatted_attribs, format, ap);
527 #else
528 sprintf (formatted_attribs, format, a1, a2, a3, a4, a5, a6, a7, a8);
529 #endif
530 va_end (ap);
532 else
533 formatted_attribs[0] = '\0';
535 /* Exception: can nest multiple spans. */
536 if (htmlstack
537 && STREQ (htmlstack->tag, tag)
538 && !(STREQ (tag, "span") && STREQ (old_attribs, formatted_attribs)))
539 do_return = 1;
541 if (start_or_end == START)
542 push_tag (tag, formatted_attribs);
544 if (do_return)
545 return;
547 in_html_elt++;
549 /* texinfo.tex doesn't support more than one font attribute
550 at the same time. */
551 if ((start_or_end == START) && old_tag && *old_tag
552 && !rollback_empty_tag (old_tag))
553 add_word_args ("</%s>", old_tag);
555 if (*tag)
557 if (start_or_end == START)
558 add_word_args (format ? "<%s %s>" : "<%s>", tag, formatted_attribs);
559 else if (!rollback_empty_tag (tag))
560 /* Insert close tag only if we didn't rollback,
561 in which case the opening tag is removed. */
562 add_word_args ("</%s>", tag);
565 if ((start_or_end != START) && old_tag && *old_tag)
566 add_word_args (strlen (old_attribs) > 0 ? "<%s %s>" : "<%s>",
567 old_tag, old_attribs);
569 in_html_elt--;
572 void
573 insert_html_tag (int start_or_end, char *tag)
575 insert_html_tag_with_attribute (start_or_end, tag, NULL);
578 /* Output an HTML <link> to the filename for NODE, including the
579 other string as extra attributes. */
580 void
581 add_link (char *nodename, char *attributes)
583 if (nodename)
585 add_html_elt ("<link ");
586 add_word_args ("%s", attributes);
587 add_word_args (" href=\"");
588 add_anchor_name (nodename, 1);
589 add_word_args ("\" title=\"%s\">\n", nodename);
593 /* Output NAME with characters escaped as appropriate for an anchor
594 name, i.e., escape URL special characters with our _00hh convention
595 if OLD is zero. (See the manual for details on the new scheme.)
597 If OLD is nonzero, generate the node name with the 4.6-and-earlier
598 convention of %hh (and more special characters output as-is, notably
599 - and *). This is only so that external references to old names can
600 still work with HTML generated by the new makeinfo; the gcc folks
601 needed this. Our own HTML does not refer to these names. */
603 void
604 add_escaped_anchor_name (char *name, int old)
606 canon_white (name);
608 if (!old && !strchr ("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
609 *name))
610 { /* XHTML does not allow anything but an ASCII letter to start an
611 identifier. Therefore kludge in this constant string if we
612 have a nonletter. */
613 add_word ("g_t");
616 for (; *name; name++)
618 if (cr_or_whitespace (*name))
619 add_char ('-');
621 else if (!old && !URL_SAFE_CHAR (*name))
622 /* Cast so characters with the high bit set are treated as >128,
623 for example o-umlaut should be 246, not -10. */
624 add_word_args ("_00%x", (unsigned char) *name);
626 else if (old && !URL_SAFE_CHAR (*name) && !OLD_URL_SAFE_CHAR (*name))
627 /* Different output convention, but still cast as above. */
628 add_word_args ("%%%x", (unsigned char) *name);
630 else
631 add_char (*name);
635 /* Insert the text for the name of a reference in an HTML anchor
636 appropriate for NODENAME.
638 If HREF is zero, generate text for name= in the new node name
639 conversion convention.
640 If HREF is negative, generate text for name= in the old convention.
641 If HREF is positive, generate the name for an href= attribute, i.e.,
642 including the `#' if it's an internal reference. */
643 void
644 add_anchor_name (char *nodename, int href)
646 if (href > 0)
648 if (splitting)
649 add_url_name (nodename, href);
650 add_char ('#');
652 /* Always add NODENAME, so that the reference would pinpoint the
653 exact node on its file. This is so several nodes could share the
654 same file, in case of file-name clashes, but also for more
655 accurate browser positioning. */
656 if (strcasecmp (nodename, "(dir)") == 0)
657 /* Strip the parens, but keep the original letter-case. */
658 add_word_args ("%.3s", nodename + 1);
659 else if (strcasecmp (nodename, "top") == 0)
660 add_word ("Top");
661 else
662 add_escaped_anchor_name (nodename, href < 0);
665 /* Insert the text for the name of a reference in an HTML url, aprropriate
666 for NODENAME */
667 void
668 add_url_name (char *nodename, int href)
670 add_nodename_to_filename (nodename, href);
673 /* Convert non [A-Za-z0-9] to _00xx, where xx means the hexadecimal
674 representation of the ASCII character. Also convert spaces and
675 newlines to dashes. */
676 static void
677 fix_filename (char *filename)
679 int i;
680 int len = strlen (filename);
681 char *oldname = xstrdup (filename);
683 *filename = '\0';
685 for (i = 0; i < len; i++)
687 if (cr_or_whitespace (oldname[i]))
688 strcat (filename, "-");
689 else if (URL_SAFE_CHAR (oldname[i]))
690 strncat (filename, (char *) oldname + i, 1);
691 else
693 char *hexchar = xmalloc (6 * sizeof (char));
694 sprintf (hexchar, "_00%x", (unsigned char) oldname[i]);
695 strcat (filename, hexchar);
696 free (hexchar);
699 /* Check if we are nearing boundaries. */
700 if (strlen (filename) >= PATH_MAX - 20)
701 break;
704 free (oldname);
707 /* As we can't look-up a (forward-referenced) nodes' html filename
708 from the tentry, we take the easy way out. We assume that
709 nodenames are unique, and generate the html filename from the
710 nodename, that's always known. */
711 static char *
712 nodename_to_filename_1 (char *nodename, int href)
714 char *p;
715 char *filename;
716 char dirname[PATH_MAX];
718 if (strcasecmp (nodename, "Top") == 0)
720 /* We want to convert references to the Top node into
721 "index.html#Top". */
722 if (href)
723 filename = xstrdup ("index.html"); /* "#Top" is added by our callers */
724 else
725 filename = xstrdup ("Top");
727 else if (strcasecmp (nodename, "(dir)") == 0)
728 /* We want to convert references to the (dir) node into
729 "../index.html". */
730 filename = xstrdup ("../index.html");
731 else
733 filename = xmalloc (PATH_MAX);
734 dirname[0] = '\0';
735 *filename = '\0';
737 /* Check for external reference: ``(info-document)node-name''
738 Assume this node lives at: ``../info-document/node-name.html''
740 We need to handle the special case (sigh): ``(info-document)'',
741 ie, an external top-node, which should translate to:
742 ``../info-document/info-document.html'' */
744 p = nodename;
745 if (*nodename == '(')
747 int length;
749 p = strchr (nodename, ')');
750 if (p == NULL)
752 line_error (_("[unexpected] invalid node name: `%s'"), nodename);
753 xexit (1);
756 length = p - nodename - 1;
757 if (length > 5 &&
758 FILENAME_CMPN (p - 5, ".info", 5) == 0)
759 length -= 5;
760 /* This is for DOS, and also for Windows and GNU/Linux
761 systems that might have Info files copied from a DOS 8+3
762 filesystem. */
763 if (length > 4 &&
764 FILENAME_CMPN (p - 4, ".inf", 4) == 0)
765 length -= 4;
766 strcpy (filename, "../");
767 strncpy (dirname, nodename + 1, length);
768 *(dirname + length) = '\0';
769 fix_filename (dirname);
770 strcat (filename, dirname);
771 strcat (filename, "/");
772 p++;
775 /* In the case of just (info-document), there will be nothing
776 remaining, and we will refer to ../info-document/, which will
777 work fine. */
778 strcat (filename, p);
779 if (*p)
781 /* Hmm */
782 fix_filename (filename + strlen (filename) - strlen (p));
783 strcat (filename, ".html");
787 /* Produce a file name suitable for the underlying filesystem. */
788 normalize_filename (filename);
790 #if 0
791 /* We add ``#Nodified-filename'' anchor to external references to be
792 prepared for non-split HTML support. Maybe drop this. */
793 if (href && *dirname)
795 strcat (filename, "#");
796 strcat (filename, p);
797 /* Hmm, again */
798 fix_filename (filename + strlen (filename) - strlen (p));
800 #endif
802 return filename;
805 /* If necessary, ie, if current filename != filename of node, output
806 the node name. */
807 void
808 add_nodename_to_filename (char *nodename, int href)
810 /* for now, don't check: always output filename */
811 char *filename = nodename_to_filename_1 (nodename, href);
812 add_word (filename);
813 free (filename);
816 char *
817 nodename_to_filename (char *nodename)
819 /* The callers of nodename_to_filename use the result to produce
820 <a href=, so call nodename_to_filename_1 with last arg non-zero. */
821 return nodename_to_filename_1 (nodename, 1);