1 /* html.c -- html-related utilities.
2 $Id: html.c,v 1.28 2004/12/06 01:13:06 karl Exp $
4 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004 Free Software
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
28 #include "sectioning.h"
31 /* Append CHAR to BUFFER, (re)allocating as necessary. We don't handle
36 unsigned size
; /* allocated */
37 unsigned length
; /* used */
44 buffer_type
*buf
= xmalloc (sizeof (buffer_type
));
53 append_char (buffer_type
*buf
, int c
)
56 if (buf
->length
>= buf
->size
)
59 buf
->buffer
= xrealloc (buf
->buffer
, buf
->size
);
61 buf
->buffer
[buf
->length
- 1] = c
;
62 buf
->buffer
[buf
->length
] = 0;
65 /* Read the cascading style-sheet file FILENAME. Write out any @import
66 commands, which must come first, by the definition of css. If the
67 file contains any actual css code following the @imports, return it;
70 process_css_file (char *filename
)
75 buffer_type
*import_text
= init_buffer ();
76 buffer_type
*inline_text
= init_buffer ();
78 enum { null_state
, comment_state
, import_state
, inline_state
} state
79 = null_state
, prev_state
;
81 prev_state
= null_state
;
83 /* read from stdin if `-' is the filename. */
84 f
= STREQ (filename
, "-") ? stdin
: fopen (filename
, "r");
87 error (_("%s: could not open --css-file: %s"), progname
, filename
);
91 /* Read the file. The @import statements must come at the beginning,
92 with only whitespace and comments allowed before any inline css code. */
93 while ((c
= getc (f
)) >= 0)
100 case null_state
: /* between things */
102 { /* Only @import and @charset should switch into
103 import_state, other @-commands, such as @media, should
104 put us into inline_state. I don't think any other css
105 @-commands start with `i' or `c', although of course
106 this will break when such a command is defined. */
107 int nextchar
= getc (f
);
108 if (nextchar
== 'i' || nextchar
== 'c')
110 append_char (import_text
, c
);
111 state
= import_state
;
115 ungetc (nextchar
, f
); /* wasn't an @import */
116 state
= inline_state
;
120 { /* possible start of a comment */
121 int nextchar
= getc (f
);
123 state
= comment_state
;
126 ungetc (nextchar
, f
); /* wasn't a comment */
127 state
= inline_state
;
130 else if (isspace (c
))
131 ; /* skip whitespace; maybe should use c_isspace? */
134 /* not an @import, not a comment, not whitespace: we must
135 have started the inline text. */
136 state
= inline_state
;
138 if (state
== inline_state
)
139 append_char (inline_text
, c
);
141 if (state
!= null_state
)
142 prev_state
= null_state
;
146 if (c
== '/' && lastchar
== '*')
147 state
= prev_state
; /* end of comment */
148 break; /* else ignore this comment char */
151 append_char (import_text
, c
); /* include this import char */
153 { /* done with @import */
154 append_char (import_text
, '\n'); /* make the output nice */
156 prev_state
= import_state
;
161 /* No harm in writing out comments, so don't bother parsing
162 them out, just append everything. */
163 append_char (inline_text
, c
);
170 /* Reached the end of the file. We should not be still in a comment. */
171 if (state
== comment_state
)
172 warning (_("%s:%d: --css-file ended in comment"), filename
, lineno
);
174 /* Write the @import text, if any. */
175 if (import_text
->buffer
)
177 add_word (import_text
->buffer
);
178 free (import_text
->buffer
);
182 /* We're wasting the buffer struct memory, but so what. */
183 return inline_text
->buffer
;
186 HSTACK
*htmlstack
= NULL
;
189 int html_output_head_p
= 0;
190 int html_title_written
= 0;
193 html_output_head (void)
195 static const char *html_title
= NULL
;
198 if (html_output_head_p
)
200 html_output_head_p
= 1;
202 encoding
= current_document_encoding ();
204 /* The <title> should not have markup, so use text_expansion. */
206 html_title
= escape_string (title
?
207 text_expansion (title
) : (char *) _("Untitled"));
209 /* Make sure this is the very first string of the output document. */
210 output_paragraph_offset
= 0;
212 add_html_block_elt_args ("<html lang=\"%s\">\n<head>\n",
213 language_table
[language_code
].abbrev
);
215 /* When splitting, add current node's name to title if it's available and not
217 if (splitting
&& current_node
&& !STREQ (current_node
, "Top"))
218 add_word_args ("<title>%s - %s</title>\n",
219 escape_string (xstrdup (current_node
)), html_title
);
221 add_word_args ("<title>%s</title>\n", html_title
);
223 add_word ("<meta http-equiv=\"Content-Type\" content=\"text/html");
224 if (encoding
&& *encoding
)
225 add_word_args ("; charset=%s", encoding
);
229 if (!document_description
)
230 document_description
= html_title
;
232 add_word_args ("<meta name=\"description\" content=\"%s\">\n",
233 document_description
);
234 add_word_args ("<meta name=\"generator\" content=\"makeinfo %s\">\n",
237 /* Navigation bar links. */
239 add_word ("<link title=\"Top\" rel=\"top\" href=\"#Top\">\n");
242 /* Always put a top link. */
243 add_word ("<link title=\"Top\" rel=\"start\" href=\"index.html#Top\">\n");
245 /* We already have a top link, avoid duplication. */
246 if (tag_table
->up
&& !STREQ (tag_table
->up
, "Top"))
247 add_link (tag_table
->up
, "rel=\"up\"");
250 add_link (tag_table
->prev
, "rel=\"prev\"");
253 add_link (tag_table
->next
, "rel=\"next\"");
255 /* fixxme: Look for a way to put links to various indices in the
256 document. Also possible candidates to be added here are First and
261 /* We are splitting, but we neither have a tag_table. So this must be
262 index.html. So put a link to Top. */
263 add_word ("<link title=\"Top\" rel=\"start\" href=\"#Top\">\n");
266 add_word ("<link href=\"http://www.gnu.org/software/texinfo/\" \
267 rel=\"generator-home\" title=\"Texinfo Homepage\">\n");
270 { /* It is not ideal that we include the html markup here within
271 <head>, so we use text_expansion. */
272 insert_string ("<!--\n");
273 insert_string (text_expansion (copying_text
));
274 insert_string ("-->\n");
277 /* Put the style definitions in a comment for the sake of browsers
278 that don't support <style>. */
279 add_word ("<meta http-equiv=\"Content-Style-Type\" content=\"text/css\">\n");
280 add_word ("<style type=\"text/css\"><!--\n");
283 char *css_inline
= NULL
;
286 /* This writes out any @import commands from the --css-file,
287 and returns any actual css code following the imports. */
288 css_inline
= process_css_file (css_include
);
290 /* This seems cleaner than adding <br>'s at the end of each line for
291 these "roman" displays. It's hardly the end of the world if the
292 browser doesn't do <style>s, in any case; they'll just come out in
294 #define CSS_FONT_INHERIT "font-family:inherit"
295 add_word_args (" pre.display { %s }\n", CSS_FONT_INHERIT
);
296 add_word_args (" pre.format { %s }\n", CSS_FONT_INHERIT
);
298 /* Alternatively, we could do <font size=-1> in insertion.c, but this
299 way makes it easier to override. */
300 #define CSS_FONT_SMALLER "font-size:smaller"
301 add_word_args (" pre.smalldisplay { %s; %s }\n", CSS_FONT_INHERIT
,
303 add_word_args (" pre.smallformat { %s; %s }\n", CSS_FONT_INHERIT
,
305 add_word_args (" pre.smallexample { %s }\n", CSS_FONT_SMALLER
);
306 add_word_args (" pre.smalllisp { %s }\n", CSS_FONT_SMALLER
);
308 /* Since HTML doesn't have a sc element, we use span with a bit of
309 CSS spice instead. */
310 #define CSS_FONT_SMALL_CAPS "font-variant:small-caps"
311 add_word_args (" span.sc { %s }\n", CSS_FONT_SMALL_CAPS
);
313 /* Roman (default) font class, closest we can come. */
314 #define CSS_FONT_ROMAN "font-family:serif; font-weight:normal;"
315 add_word_args (" span.roman { %s } \n", CSS_FONT_ROMAN
);
317 /* Sans serif font class. */
318 #define CSS_FONT_SANSSERIF "font-family:sans-serif; font-weight:normal;"
319 add_word_args (" span.sansserif { %s } \n", CSS_FONT_SANSSERIF
);
321 /* Write out any css code from the user's --css-file. */
323 insert_string (css_inline
);
325 add_word ("--></style>\n");
328 add_word ("</head>\n<body>\n");
330 if (title
&& !html_title_written
&& titlepage_cmd_present
)
332 add_word_args ("<h1 class=\"settitle\">%s</h1>\n", html_title
);
333 html_title_written
= 1;
339 /* Escape HTML special characters in the string if necessary,
340 returning a pointer to a possibly newly-allocated one. */
342 escape_string (char *string
)
345 int i
= 0, newlen
= 0;
349 /* Find how much to allocate. */
353 newlen
+= 6; /* `"' */
356 newlen
+= 5; /* `&' */
360 newlen
+= 4; /* `<', `>' */
368 if (newlen
== i
) return string
; /* Already OK. */
370 newstring
= xmalloc (newlen
);
377 strcpy (newstring
, """);
381 strcpy (newstring
, "&");
385 strcpy (newstring
, "<");
389 strcpy (newstring
, ">");
393 newstring
[0] = string
[i
];
399 return newstring
- newlen
;
402 /* Save current tag. */
404 push_tag (char *tag
, char *attribs
)
406 HSTACK
*newstack
= xmalloc (sizeof (HSTACK
));
409 newstack
->attribs
= xstrdup (attribs
);
410 newstack
->next
= htmlstack
;
411 htmlstack
= newstack
;
418 HSTACK
*tos
= htmlstack
;
422 line_error (_("[unexpected] no html tag to pop"));
426 free (htmlstack
->attribs
);
428 htmlstack
= htmlstack
->next
;
432 /* Check if tag is an empty or a whitespace only element.
433 If so, remove it, keeping whitespace intact. */
435 rollback_empty_tag (char *tag
)
437 int check_position
= output_paragraph_offset
;
438 int taglen
= strlen (tag
);
439 int rollback_happened
= 0;
441 char *contents_canon_white
= "";
443 /* If output_paragraph is empty, we cannot rollback :-\ */
444 if (output_paragraph_offset
<= 0)
447 /* Find the end of the previous tag. */
448 while (output_paragraph
[check_position
-1] != '>' && check_position
> 0)
451 /* Save stuff between tag's end to output_paragraph's end. */
452 if (check_position
!= output_paragraph_offset
)
454 contents
= xmalloc (output_paragraph_offset
- check_position
+ 1);
455 memcpy (contents
, output_paragraph
+ check_position
,
456 output_paragraph_offset
- check_position
);
458 contents
[output_paragraph_offset
- check_position
] = '\0';
460 contents_canon_white
= xstrdup (contents
);
461 canon_white (contents_canon_white
);
464 /* Find the start of the previous tag. */
465 while (output_paragraph
[check_position
-1] != '<' && check_position
> 0)
468 /* Check to see if this is the tag. */
469 if (strncmp ((char *) output_paragraph
+ check_position
, tag
, taglen
) == 0
470 && (whitespace (output_paragraph
[check_position
+ taglen
])
471 || output_paragraph
[check_position
+ taglen
] == '>'))
473 if (!contents_canon_white
|| !*contents_canon_white
)
475 /* Empty content after whitespace removal, so roll it back. */
476 output_paragraph_offset
= check_position
- 1;
477 rollback_happened
= 1;
479 /* Original contents may not be empty (whitespace.) */
480 if (contents
&& *contents
)
482 insert_string (contents
);
488 return rollback_happened
;
491 /* Open or close TAG according to START_OR_END. */
493 #if defined (VA_FPRINTF) && __STDC__
494 insert_html_tag_with_attribute (int start_or_end
, char *tag
, char *format
, ...)
496 insert_html_tag_with_attribute (start_or_end
, tag
, format
, va_alist
)
503 char *old_tag
= NULL
;
504 char *old_attribs
= NULL
;
505 char formatted_attribs
[2000]; /* xx no fixed limits */
507 extern int in_html_elt
;
509 if (start_or_end
!= START
)
514 old_tag
= htmlstack
->tag
;
515 old_attribs
= htmlstack
->attribs
;
524 VA_START (ap
, format
);
526 VA_SPRINTF (formatted_attribs
, format
, ap
);
528 sprintf (formatted_attribs
, format
, a1
, a2
, a3
, a4
, a5
, a6
, a7
, a8
);
533 formatted_attribs
[0] = '\0';
535 /* Exception: can nest multiple spans. */
537 && STREQ (htmlstack
->tag
, tag
)
538 && !(STREQ (tag
, "span") && STREQ (old_attribs
, formatted_attribs
)))
541 if (start_or_end
== START
)
542 push_tag (tag
, formatted_attribs
);
549 /* texinfo.tex doesn't support more than one font attribute
551 if ((start_or_end
== START
) && old_tag
&& *old_tag
552 && !rollback_empty_tag (old_tag
))
553 add_word_args ("</%s>", old_tag
);
557 if (start_or_end
== START
)
558 add_word_args (format
? "<%s %s>" : "<%s>", tag
, formatted_attribs
);
559 else if (!rollback_empty_tag (tag
))
560 /* Insert close tag only if we didn't rollback,
561 in which case the opening tag is removed. */
562 add_word_args ("</%s>", tag
);
565 if ((start_or_end
!= START
) && old_tag
&& *old_tag
)
566 add_word_args (strlen (old_attribs
) > 0 ? "<%s %s>" : "<%s>",
567 old_tag
, old_attribs
);
573 insert_html_tag (int start_or_end
, char *tag
)
575 insert_html_tag_with_attribute (start_or_end
, tag
, NULL
);
578 /* Output an HTML <link> to the filename for NODE, including the
579 other string as extra attributes. */
581 add_link (char *nodename
, char *attributes
)
585 add_html_elt ("<link ");
586 add_word_args ("%s", attributes
);
587 add_word_args (" href=\"");
588 add_anchor_name (nodename
, 1);
589 add_word_args ("\" title=\"%s\">\n", nodename
);
593 /* Output NAME with characters escaped as appropriate for an anchor
594 name, i.e., escape URL special characters with our _00hh convention
595 if OLD is zero. (See the manual for details on the new scheme.)
597 If OLD is nonzero, generate the node name with the 4.6-and-earlier
598 convention of %hh (and more special characters output as-is, notably
599 - and *). This is only so that external references to old names can
600 still work with HTML generated by the new makeinfo; the gcc folks
601 needed this. Our own HTML does not refer to these names. */
604 add_escaped_anchor_name (char *name
, int old
)
608 if (!old
&& !strchr ("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
610 { /* XHTML does not allow anything but an ASCII letter to start an
611 identifier. Therefore kludge in this constant string if we
616 for (; *name
; name
++)
618 if (cr_or_whitespace (*name
))
621 else if (!old
&& !URL_SAFE_CHAR (*name
))
622 /* Cast so characters with the high bit set are treated as >128,
623 for example o-umlaut should be 246, not -10. */
624 add_word_args ("_00%x", (unsigned char) *name
);
626 else if (old
&& !URL_SAFE_CHAR (*name
) && !OLD_URL_SAFE_CHAR (*name
))
627 /* Different output convention, but still cast as above. */
628 add_word_args ("%%%x", (unsigned char) *name
);
635 /* Insert the text for the name of a reference in an HTML anchor
636 appropriate for NODENAME.
638 If HREF is zero, generate text for name= in the new node name
639 conversion convention.
640 If HREF is negative, generate text for name= in the old convention.
641 If HREF is positive, generate the name for an href= attribute, i.e.,
642 including the `#' if it's an internal reference. */
644 add_anchor_name (char *nodename
, int href
)
649 add_url_name (nodename
, href
);
652 /* Always add NODENAME, so that the reference would pinpoint the
653 exact node on its file. This is so several nodes could share the
654 same file, in case of file-name clashes, but also for more
655 accurate browser positioning. */
656 if (strcasecmp (nodename
, "(dir)") == 0)
657 /* Strip the parens, but keep the original letter-case. */
658 add_word_args ("%.3s", nodename
+ 1);
659 else if (strcasecmp (nodename
, "top") == 0)
662 add_escaped_anchor_name (nodename
, href
< 0);
665 /* Insert the text for the name of a reference in an HTML url, aprropriate
668 add_url_name (char *nodename
, int href
)
670 add_nodename_to_filename (nodename
, href
);
673 /* Convert non [A-Za-z0-9] to _00xx, where xx means the hexadecimal
674 representation of the ASCII character. Also convert spaces and
675 newlines to dashes. */
677 fix_filename (char *filename
)
680 int len
= strlen (filename
);
681 char *oldname
= xstrdup (filename
);
685 for (i
= 0; i
< len
; i
++)
687 if (cr_or_whitespace (oldname
[i
]))
688 strcat (filename
, "-");
689 else if (URL_SAFE_CHAR (oldname
[i
]))
690 strncat (filename
, (char *) oldname
+ i
, 1);
693 char *hexchar
= xmalloc (6 * sizeof (char));
694 sprintf (hexchar
, "_00%x", (unsigned char) oldname
[i
]);
695 strcat (filename
, hexchar
);
699 /* Check if we are nearing boundaries. */
700 if (strlen (filename
) >= PATH_MAX
- 20)
707 /* As we can't look-up a (forward-referenced) nodes' html filename
708 from the tentry, we take the easy way out. We assume that
709 nodenames are unique, and generate the html filename from the
710 nodename, that's always known. */
712 nodename_to_filename_1 (char *nodename
, int href
)
716 char dirname
[PATH_MAX
];
718 if (strcasecmp (nodename
, "Top") == 0)
720 /* We want to convert references to the Top node into
723 filename
= xstrdup ("index.html"); /* "#Top" is added by our callers */
725 filename
= xstrdup ("Top");
727 else if (strcasecmp (nodename
, "(dir)") == 0)
728 /* We want to convert references to the (dir) node into
730 filename
= xstrdup ("../index.html");
733 filename
= xmalloc (PATH_MAX
);
737 /* Check for external reference: ``(info-document)node-name''
738 Assume this node lives at: ``../info-document/node-name.html''
740 We need to handle the special case (sigh): ``(info-document)'',
741 ie, an external top-node, which should translate to:
742 ``../info-document/info-document.html'' */
745 if (*nodename
== '(')
749 p
= strchr (nodename
, ')');
752 line_error (_("[unexpected] invalid node name: `%s'"), nodename
);
756 length
= p
- nodename
- 1;
758 FILENAME_CMPN (p
- 5, ".info", 5) == 0)
760 /* This is for DOS, and also for Windows and GNU/Linux
761 systems that might have Info files copied from a DOS 8+3
764 FILENAME_CMPN (p
- 4, ".inf", 4) == 0)
766 strcpy (filename
, "../");
767 strncpy (dirname
, nodename
+ 1, length
);
768 *(dirname
+ length
) = '\0';
769 fix_filename (dirname
);
770 strcat (filename
, dirname
);
771 strcat (filename
, "/");
775 /* In the case of just (info-document), there will be nothing
776 remaining, and we will refer to ../info-document/, which will
778 strcat (filename
, p
);
782 fix_filename (filename
+ strlen (filename
) - strlen (p
));
783 strcat (filename
, ".html");
787 /* Produce a file name suitable for the underlying filesystem. */
788 normalize_filename (filename
);
791 /* We add ``#Nodified-filename'' anchor to external references to be
792 prepared for non-split HTML support. Maybe drop this. */
793 if (href
&& *dirname
)
795 strcat (filename
, "#");
796 strcat (filename
, p
);
798 fix_filename (filename
+ strlen (filename
) - strlen (p
));
805 /* If necessary, ie, if current filename != filename of node, output
808 add_nodename_to_filename (char *nodename
, int href
)
810 /* for now, don't check: always output filename */
811 char *filename
= nodename_to_filename_1 (nodename
, href
);
817 nodename_to_filename (char *nodename
)
819 /* The callers of nodename_to_filename use the result to produce
820 <a href=, so call nodename_to_filename_1 with last arg non-zero. */
821 return nodename_to_filename_1 (nodename
, 1);