Sync usage with man page.
[netbsd-mini2440.git] / gnu / dist / texinfo / makeinfo / html.c
blob9dbd0b6544bf41be7024423be4e151a9d102db5c
1 /* $NetBSD$ */
3 /* html.c -- html-related utilities.
4 Id: html.c,v 1.28 2004/12/06 01:13:06 karl Exp
6 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004 Free Software
7 Foundation, Inc.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software Foundation,
21 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 #include "system.h"
24 #include "cmds.h"
25 #include "files.h"
26 #include "html.h"
27 #include "lang.h"
28 #include "makeinfo.h"
29 #include "node.h"
30 #include "sectioning.h"
33 /* Append CHAR to BUFFER, (re)allocating as necessary. We don't handle
34 null characters. */
36 typedef struct
38 unsigned size; /* allocated */
39 unsigned length; /* used */
40 char *buffer;
41 } buffer_type;
43 static buffer_type *
44 init_buffer (void)
46 buffer_type *buf = xmalloc (sizeof (buffer_type));
47 buf->length = 0;
48 buf->size = 0;
49 buf->buffer = NULL;
51 return buf;
54 static void
55 append_char (buffer_type *buf, int c)
57 buf->length++;
58 if (buf->length >= buf->size)
60 buf->size += 100;
61 buf->buffer = xrealloc (buf->buffer, buf->size);
63 buf->buffer[buf->length - 1] = c;
64 buf->buffer[buf->length] = 0;
67 /* Read the cascading style-sheet file FILENAME. Write out any @import
68 commands, which must come first, by the definition of css. If the
69 file contains any actual css code following the @imports, return it;
70 else return NULL. */
71 static char *
72 process_css_file (char *filename)
74 int c;
75 int lastchar = 0;
76 FILE *f;
77 buffer_type *import_text = init_buffer ();
78 buffer_type *inline_text = init_buffer ();
79 unsigned lineno = 1;
80 enum { null_state, comment_state, import_state, inline_state } state
81 = null_state, prev_state;
83 prev_state = null_state;
85 /* read from stdin if `-' is the filename. */
86 f = STREQ (filename, "-") ? stdin : fopen (filename, "r");
87 if (!f)
89 error (_("%s: could not open --css-file: %s"), progname, filename);
90 return NULL;
93 /* Read the file. The @import statements must come at the beginning,
94 with only whitespace and comments allowed before any inline css code. */
95 while ((c = getc (f)) >= 0)
97 if (c == '\n')
98 lineno++;
100 switch (state)
102 case null_state: /* between things */
103 if (c == '@')
104 { /* Only @import and @charset should switch into
105 import_state, other @-commands, such as @media, should
106 put us into inline_state. I don't think any other css
107 @-commands start with `i' or `c', although of course
108 this will break when such a command is defined. */
109 int nextchar = getc (f);
110 if (nextchar == 'i' || nextchar == 'c')
112 append_char (import_text, c);
113 state = import_state;
115 else
117 ungetc (nextchar, f); /* wasn't an @import */
118 state = inline_state;
121 else if (c == '/')
122 { /* possible start of a comment */
123 int nextchar = getc (f);
124 if (nextchar == '*')
125 state = comment_state;
126 else
128 ungetc (nextchar, f); /* wasn't a comment */
129 state = inline_state;
132 else if (isspace (c))
133 ; /* skip whitespace; maybe should use c_isspace? */
135 else
136 /* not an @import, not a comment, not whitespace: we must
137 have started the inline text. */
138 state = inline_state;
140 if (state == inline_state)
141 append_char (inline_text, c);
143 if (state != null_state)
144 prev_state = null_state;
145 break;
147 case comment_state:
148 if (c == '/' && lastchar == '*')
149 state = prev_state; /* end of comment */
150 break; /* else ignore this comment char */
152 case import_state:
153 append_char (import_text, c); /* include this import char */
154 if (c == ';')
155 { /* done with @import */
156 append_char (import_text, '\n'); /* make the output nice */
157 state = null_state;
158 prev_state = import_state;
160 break;
162 case inline_state:
163 /* No harm in writing out comments, so don't bother parsing
164 them out, just append everything. */
165 append_char (inline_text, c);
166 break;
169 lastchar = c;
172 /* Reached the end of the file. We should not be still in a comment. */
173 if (state == comment_state)
174 warning (_("%s:%d: --css-file ended in comment"), filename, lineno);
176 /* Write the @import text, if any. */
177 if (import_text->buffer)
179 add_word (import_text->buffer);
180 free (import_text->buffer);
181 free (import_text);
184 /* We're wasting the buffer struct memory, but so what. */
185 return inline_text->buffer;
188 HSTACK *htmlstack = NULL;
190 /* See html.h. */
191 int html_output_head_p = 0;
192 int html_title_written = 0;
194 void
195 html_output_head (void)
197 static const char *html_title = NULL;
198 char *encoding;
200 if (html_output_head_p)
201 return;
202 html_output_head_p = 1;
204 encoding = current_document_encoding ();
206 /* The <title> should not have markup, so use text_expansion. */
207 if (!html_title)
208 html_title = escape_string (title ?
209 text_expansion (title) : (char *) _("Untitled"));
211 /* Make sure this is the very first string of the output document. */
212 output_paragraph_offset = 0;
214 add_html_block_elt_args ("<html lang=\"%s\">\n<head>\n",
215 language_table[language_code].abbrev);
217 /* When splitting, add current node's name to title if it's available and not
218 Top. */
219 if (splitting && current_node && !STREQ (current_node, "Top"))
220 add_word_args ("<title>%s - %s</title>\n",
221 escape_string (xstrdup (current_node)), html_title);
222 else
223 add_word_args ("<title>%s</title>\n", html_title);
225 add_word ("<meta http-equiv=\"Content-Type\" content=\"text/html");
226 if (encoding && *encoding)
227 add_word_args ("; charset=%s", encoding);
229 add_word ("\">\n");
231 if (!document_description)
232 document_description = html_title;
234 add_word_args ("<meta name=\"description\" content=\"%s\">\n",
235 document_description);
236 add_word_args ("<meta name=\"generator\" content=\"makeinfo %s\">\n",
237 VERSION);
239 /* Navigation bar links. */
240 if (!splitting)
241 add_word ("<link title=\"Top\" rel=\"top\" href=\"#Top\">\n");
242 else if (tag_table)
244 /* Always put a top link. */
245 add_word ("<link title=\"Top\" rel=\"start\" href=\"index.html#Top\">\n");
247 /* We already have a top link, avoid duplication. */
248 if (tag_table->up && !STREQ (tag_table->up, "Top"))
249 add_link (tag_table->up, "rel=\"up\"");
251 if (tag_table->prev)
252 add_link (tag_table->prev, "rel=\"prev\"");
254 if (tag_table->next)
255 add_link (tag_table->next, "rel=\"next\"");
257 /* fixxme: Look for a way to put links to various indices in the
258 document. Also possible candidates to be added here are First and
259 Last links. */
261 else
263 /* We are splitting, but we neither have a tag_table. So this must be
264 index.html. So put a link to Top. */
265 add_word ("<link title=\"Top\" rel=\"start\" href=\"#Top\">\n");
268 add_word ("<link href=\"http://www.gnu.org/software/texinfo/\" \
269 rel=\"generator-home\" title=\"Texinfo Homepage\">\n");
271 if (copying_text)
272 { /* It is not ideal that we include the html markup here within
273 <head>, so we use text_expansion. */
274 insert_string ("<!--\n");
275 insert_string (text_expansion (copying_text));
276 insert_string ("-->\n");
279 /* Put the style definitions in a comment for the sake of browsers
280 that don't support <style>. */
281 add_word ("<meta http-equiv=\"Content-Style-Type\" content=\"text/css\">\n");
282 add_word ("<style type=\"text/css\"><!--\n");
285 char *css_inline = NULL;
287 if (css_include)
288 /* This writes out any @import commands from the --css-file,
289 and returns any actual css code following the imports. */
290 css_inline = process_css_file (css_include);
292 /* This seems cleaner than adding <br>'s at the end of each line for
293 these "roman" displays. It's hardly the end of the world if the
294 browser doesn't do <style>s, in any case; they'll just come out in
295 typewriter. */
296 #define CSS_FONT_INHERIT "font-family:inherit"
297 add_word_args (" pre.display { %s }\n", CSS_FONT_INHERIT);
298 add_word_args (" pre.format { %s }\n", CSS_FONT_INHERIT);
300 /* Alternatively, we could do <font size=-1> in insertion.c, but this
301 way makes it easier to override. */
302 #define CSS_FONT_SMALLER "font-size:smaller"
303 add_word_args (" pre.smalldisplay { %s; %s }\n", CSS_FONT_INHERIT,
304 CSS_FONT_SMALLER);
305 add_word_args (" pre.smallformat { %s; %s }\n", CSS_FONT_INHERIT,
306 CSS_FONT_SMALLER);
307 add_word_args (" pre.smallexample { %s }\n", CSS_FONT_SMALLER);
308 add_word_args (" pre.smalllisp { %s }\n", CSS_FONT_SMALLER);
310 /* Since HTML doesn't have a sc element, we use span with a bit of
311 CSS spice instead. */
312 #define CSS_FONT_SMALL_CAPS "font-variant:small-caps"
313 add_word_args (" span.sc { %s }\n", CSS_FONT_SMALL_CAPS);
315 /* Roman (default) font class, closest we can come. */
316 #define CSS_FONT_ROMAN "font-family:serif; font-weight:normal;"
317 add_word_args (" span.roman { %s } \n", CSS_FONT_ROMAN);
319 /* Sans serif font class. */
320 #define CSS_FONT_SANSSERIF "font-family:sans-serif; font-weight:normal;"
321 add_word_args (" span.sansserif { %s } \n", CSS_FONT_SANSSERIF);
323 /* Write out any css code from the user's --css-file. */
324 if (css_inline)
325 insert_string (css_inline);
327 add_word ("--></style>\n");
330 add_word ("</head>\n<body>\n");
332 if (title && !html_title_written && titlepage_cmd_present)
334 add_word_args ("<h1 class=\"settitle\">%s</h1>\n", html_title);
335 html_title_written = 1;
338 free (encoding);
341 /* Escape HTML special characters in the string if necessary,
342 returning a pointer to a possibly newly-allocated one. */
343 char *
344 escape_string (char *string)
346 char *newstring;
347 int i = 0, newlen = 0;
351 /* Find how much to allocate. */
352 switch (string[i])
354 case '"':
355 newlen += 6; /* `&quot;' */
356 break;
357 case '&':
358 newlen += 5; /* `&amp;' */
359 break;
360 case '<':
361 case '>':
362 newlen += 4; /* `&lt;', `&gt;' */
363 break;
364 default:
365 newlen++;
368 while (string[i++]);
370 if (newlen == i) return string; /* Already OK. */
372 newstring = xmalloc (newlen);
373 i = 0;
376 switch (string[i])
378 case '"':
379 strcpy (newstring, "&quot;");
380 newstring += 6;
381 break;
382 case '&':
383 strcpy (newstring, "&amp;");
384 newstring += 5;
385 break;
386 case '<':
387 strcpy (newstring, "&lt;");
388 newstring += 4;
389 break;
390 case '>':
391 strcpy (newstring, "&gt;");
392 newstring += 4;
393 break;
394 default:
395 newstring[0] = string[i];
396 newstring++;
399 while (string[i++]);
400 free (string);
401 return newstring - newlen;
404 /* Save current tag. */
405 static void
406 push_tag (char *tag, char *attribs)
408 HSTACK *newstack = xmalloc (sizeof (HSTACK));
410 newstack->tag = tag;
411 newstack->attribs = xstrdup (attribs);
412 newstack->next = htmlstack;
413 htmlstack = newstack;
416 /* Get last tag. */
417 static void
418 pop_tag (void)
420 HSTACK *tos = htmlstack;
422 if (!tos)
424 line_error (_("[unexpected] no html tag to pop"));
425 return;
428 free (htmlstack->attribs);
430 htmlstack = htmlstack->next;
431 free (tos);
434 /* Check if tag is an empty or a whitespace only element.
435 If so, remove it, keeping whitespace intact. */
437 rollback_empty_tag (char *tag)
439 int check_position = output_paragraph_offset;
440 int taglen = strlen (tag);
441 int rollback_happened = 0;
442 char *contents = "";
443 char *contents_canon_white = "";
445 /* If output_paragraph is empty, we cannot rollback :-\ */
446 if (output_paragraph_offset <= 0)
447 return 0;
449 /* Find the end of the previous tag. */
450 while (output_paragraph[check_position-1] != '>' && check_position > 0)
451 check_position--;
453 /* Save stuff between tag's end to output_paragraph's end. */
454 if (check_position != output_paragraph_offset)
456 contents = xmalloc (output_paragraph_offset - check_position + 1);
457 memcpy (contents, output_paragraph + check_position,
458 output_paragraph_offset - check_position);
460 contents[output_paragraph_offset - check_position] = '\0';
462 contents_canon_white = xstrdup (contents);
463 canon_white (contents_canon_white);
466 /* Find the start of the previous tag. */
467 while (output_paragraph[check_position-1] != '<' && check_position > 0)
468 check_position--;
470 /* Check to see if this is the tag. */
471 if (strncmp ((char *) output_paragraph + check_position, tag, taglen) == 0
472 && (whitespace (output_paragraph[check_position + taglen])
473 || output_paragraph[check_position + taglen] == '>'))
475 if (!contents_canon_white || !*contents_canon_white)
477 /* Empty content after whitespace removal, so roll it back. */
478 output_paragraph_offset = check_position - 1;
479 rollback_happened = 1;
481 /* Original contents may not be empty (whitespace.) */
482 if (contents && *contents)
484 insert_string (contents);
485 free (contents);
490 return rollback_happened;
493 /* Open or close TAG according to START_OR_END. */
494 void
495 #if defined (VA_FPRINTF) && __STDC__
496 insert_html_tag_with_attribute (int start_or_end, char *tag, char *format, ...)
497 #else
498 insert_html_tag_with_attribute (start_or_end, tag, format, va_alist)
499 int start_or_end;
500 char *tag;
501 char *format;
502 va_dcl
503 #endif
505 char *old_tag = NULL;
506 char *old_attribs = NULL;
507 char formatted_attribs[2000]; /* xx no fixed limits */
508 int do_return = 0;
509 extern int in_html_elt;
511 if (start_or_end != START)
512 pop_tag ();
514 if (htmlstack)
516 old_tag = htmlstack->tag;
517 old_attribs = htmlstack->attribs;
520 if (format)
522 #ifdef VA_SPRINTF
523 va_list ap;
524 #endif
526 VA_START (ap, format);
527 #ifdef VA_SPRINTF
528 VA_SPRINTF (formatted_attribs, format, ap);
529 #else
530 sprintf (formatted_attribs, format, a1, a2, a3, a4, a5, a6, a7, a8);
531 #endif
532 va_end (ap);
534 else
535 formatted_attribs[0] = '\0';
537 /* Exception: can nest multiple spans. */
538 if (htmlstack
539 && STREQ (htmlstack->tag, tag)
540 && !(STREQ (tag, "span") && STREQ (old_attribs, formatted_attribs)))
541 do_return = 1;
543 if (start_or_end == START)
544 push_tag (tag, formatted_attribs);
546 if (do_return)
547 return;
549 in_html_elt++;
551 /* texinfo.tex doesn't support more than one font attribute
552 at the same time. */
553 if ((start_or_end == START) && old_tag && *old_tag
554 && !rollback_empty_tag (old_tag))
555 add_word_args ("</%s>", old_tag);
557 if (*tag)
559 if (start_or_end == START)
560 add_word_args (format ? "<%s %s>" : "<%s>", tag, formatted_attribs);
561 else if (!rollback_empty_tag (tag))
562 /* Insert close tag only if we didn't rollback,
563 in which case the opening tag is removed. */
564 add_word_args ("</%s>", tag);
567 if ((start_or_end != START) && old_tag && *old_tag)
568 add_word_args (strlen (old_attribs) > 0 ? "<%s %s>" : "<%s>",
569 old_tag, old_attribs);
571 in_html_elt--;
574 void
575 insert_html_tag (int start_or_end, char *tag)
577 insert_html_tag_with_attribute (start_or_end, tag, NULL);
580 /* Output an HTML <link> to the filename for NODE, including the
581 other string as extra attributes. */
582 void
583 add_link (char *nodename, char *attributes)
585 if (nodename)
587 add_html_elt ("<link ");
588 add_word_args ("%s", attributes);
589 add_word_args (" href=\"");
590 add_anchor_name (nodename, 1);
591 add_word_args ("\" title=\"%s\">\n", nodename);
595 /* Output NAME with characters escaped as appropriate for an anchor
596 name, i.e., escape URL special characters with our _00hh convention
597 if OLD is zero. (See the manual for details on the new scheme.)
599 If OLD is nonzero, generate the node name with the 4.6-and-earlier
600 convention of %hh (and more special characters output as-is, notably
601 - and *). This is only so that external references to old names can
602 still work with HTML generated by the new makeinfo; the gcc folks
603 needed this. Our own HTML does not refer to these names. */
605 void
606 add_escaped_anchor_name (char *name, int old)
608 canon_white (name);
610 if (!old && !strchr ("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
611 *name))
612 { /* XHTML does not allow anything but an ASCII letter to start an
613 identifier. Therefore kludge in this constant string if we
614 have a nonletter. */
615 add_word ("g_t");
618 for (; *name; name++)
620 if (cr_or_whitespace (*name))
621 add_char ('-');
623 else if (!old && !URL_SAFE_CHAR (*name))
624 /* Cast so characters with the high bit set are treated as >128,
625 for example o-umlaut should be 246, not -10. */
626 add_word_args ("_00%x", (unsigned char) *name);
628 else if (old && !URL_SAFE_CHAR (*name) && !OLD_URL_SAFE_CHAR (*name))
629 /* Different output convention, but still cast as above. */
630 add_word_args ("%%%x", (unsigned char) *name);
632 else
633 add_char (*name);
637 /* Insert the text for the name of a reference in an HTML anchor
638 appropriate for NODENAME.
640 If HREF is zero, generate text for name= in the new node name
641 conversion convention.
642 If HREF is negative, generate text for name= in the old convention.
643 If HREF is positive, generate the name for an href= attribute, i.e.,
644 including the `#' if it's an internal reference. */
645 void
646 add_anchor_name (char *nodename, int href)
648 if (href > 0)
650 if (splitting)
651 add_url_name (nodename, href);
652 add_char ('#');
654 /* Always add NODENAME, so that the reference would pinpoint the
655 exact node on its file. This is so several nodes could share the
656 same file, in case of file-name clashes, but also for more
657 accurate browser positioning. */
658 if (strcasecmp (nodename, "(dir)") == 0)
659 /* Strip the parens, but keep the original letter-case. */
660 add_word_args ("%.3s", nodename + 1);
661 else if (strcasecmp (nodename, "top") == 0)
662 add_word ("Top");
663 else
664 add_escaped_anchor_name (nodename, href < 0);
667 /* Insert the text for the name of a reference in an HTML url, aprropriate
668 for NODENAME */
669 void
670 add_url_name (char *nodename, int href)
672 add_nodename_to_filename (nodename, href);
675 /* Convert non [A-Za-z0-9] to _00xx, where xx means the hexadecimal
676 representation of the ASCII character. Also convert spaces and
677 newlines to dashes. */
678 static void
679 fix_filename (char *filename)
681 int i;
682 int len = strlen (filename);
683 char *oldname = xstrdup (filename);
685 *filename = '\0';
687 for (i = 0; i < len; i++)
689 if (cr_or_whitespace (oldname[i]))
690 strcat (filename, "-");
691 else if (URL_SAFE_CHAR (oldname[i]))
692 strncat (filename, (char *) oldname + i, 1);
693 else
695 char *hexchar = xmalloc (6 * sizeof (char));
696 sprintf (hexchar, "_00%x", (unsigned char) oldname[i]);
697 strcat (filename, hexchar);
698 free (hexchar);
701 /* Check if we are nearing boundaries. */
702 if (strlen (filename) >= PATH_MAX - 20)
703 break;
706 free (oldname);
709 /* As we can't look-up a (forward-referenced) nodes' html filename
710 from the tentry, we take the easy way out. We assume that
711 nodenames are unique, and generate the html filename from the
712 nodename, that's always known. */
713 static char *
714 nodename_to_filename_1 (char *nodename, int href)
716 char *p;
717 char *filename;
718 char dirname[PATH_MAX];
720 if (strcasecmp (nodename, "Top") == 0)
722 /* We want to convert references to the Top node into
723 "index.html#Top". */
724 if (href)
725 filename = xstrdup ("index.html"); /* "#Top" is added by our callers */
726 else
727 filename = xstrdup ("Top");
729 else if (strcasecmp (nodename, "(dir)") == 0)
730 /* We want to convert references to the (dir) node into
731 "../index.html". */
732 filename = xstrdup ("../index.html");
733 else
735 filename = xmalloc (PATH_MAX);
736 dirname[0] = '\0';
737 *filename = '\0';
739 /* Check for external reference: ``(info-document)node-name''
740 Assume this node lives at: ``../info-document/node-name.html''
742 We need to handle the special case (sigh): ``(info-document)'',
743 ie, an external top-node, which should translate to:
744 ``../info-document/info-document.html'' */
746 p = nodename;
747 if (*nodename == '(')
749 int length;
751 p = strchr (nodename, ')');
752 if (p == NULL)
754 line_error (_("[unexpected] invalid node name: `%s'"), nodename);
755 xexit (1);
758 length = p - nodename - 1;
759 if (length > 5 &&
760 FILENAME_CMPN (p - 5, ".info", 5) == 0)
761 length -= 5;
762 /* This is for DOS, and also for Windows and GNU/Linux
763 systems that might have Info files copied from a DOS 8+3
764 filesystem. */
765 if (length > 4 &&
766 FILENAME_CMPN (p - 4, ".inf", 4) == 0)
767 length -= 4;
768 strcpy (filename, "../");
769 strncpy (dirname, nodename + 1, length);
770 *(dirname + length) = '\0';
771 fix_filename (dirname);
772 strcat (filename, dirname);
773 strcat (filename, "/");
774 p++;
777 /* In the case of just (info-document), there will be nothing
778 remaining, and we will refer to ../info-document/, which will
779 work fine. */
780 strcat (filename, p);
781 if (*p)
783 /* Hmm */
784 fix_filename (filename + strlen (filename) - strlen (p));
785 strcat (filename, ".html");
789 /* Produce a file name suitable for the underlying filesystem. */
790 normalize_filename (filename);
792 #if 0
793 /* We add ``#Nodified-filename'' anchor to external references to be
794 prepared for non-split HTML support. Maybe drop this. */
795 if (href && *dirname)
797 strcat (filename, "#");
798 strcat (filename, p);
799 /* Hmm, again */
800 fix_filename (filename + strlen (filename) - strlen (p));
802 #endif
804 return filename;
807 /* If necessary, ie, if current filename != filename of node, output
808 the node name. */
809 void
810 add_nodename_to_filename (char *nodename, int href)
812 /* for now, don't check: always output filename */
813 char *filename = nodename_to_filename_1 (nodename, href);
814 add_word (filename);
815 free (filename);
818 char *
819 nodename_to_filename (char *nodename)
821 /* The callers of nodename_to_filename use the result to produce
822 <a href=, so call nodename_to_filename_1 with last arg non-zero. */
823 return nodename_to_filename_1 (nodename, 1);