Ticket #1781: array index is out of bounds.
[kaloumi3.git] / src / man2hlp.c
blobaa95df6cc56df66b6fcfd57cb6691c6f5dcdb3e7
1 /* Man page to help file converter
2 Copyright (C) 1994, 1995, 1998, 2000, 2001, 2002, 2003, 2004, 2005,
3 2007 Free Software Foundation, Inc.
4 2002 Andrew V. Samoilov
5 2002 Pavel Roskin
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
21 /** \file man2hlp.c
22 * \brief Source: man page to help file converter
25 #include <config.h>
27 #include <stdarg.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
32 #include <glib.h>
34 #include "help.h"
36 #define BUFFER_SIZE 256
38 static int col = 0; /* Current output column */
39 static int out_row = 1; /* Current output row */
40 static int in_row = 0; /* Current input row */
41 static int no_split_flag = 0; /* Flag: Don't split section on next ".SH" */
42 static int skip_flag = 0; /* Flag: Skip this section.
43 0 = don't skip,
44 1 = skipping title,
45 2 = title skipped, skipping text */
46 static int link_flag = 0; /* Flag: Next line is a link */
47 static int verbatim_flag = 0; /* Flag: Copy input to output verbatim */
48 static int node = 0; /* Flag: This line is an original ".SH" */
50 static const char *c_out; /* Output filename */
51 static FILE *f_out; /* Output file */
53 static const char *c_in; /* Current input filename */
55 static int indentation; /* Indentation level, n spaces */
56 static int tp_flag; /* Flag: .TP paragraph
57 1 = this line is .TP label,
58 2 = first line of label description. */
59 static char *topics = NULL;
61 struct node {
62 char *node; /* Section name */
63 char *lname; /* Translated .SH, NULL if not translated */
64 struct node *next;
65 int heading_level;
68 static struct node nodes;
69 static struct node *cnode; /* Current node */
71 #define MAX_STREAM_BLOCK 8192
74 * Read in blocks of reasonable size and make sure we read everything.
75 * Failure to read everything is an error, indicated by returning 0.
77 static size_t
78 persistent_fread (void *data, size_t len, FILE *stream)
80 size_t count;
81 size_t bytes_done = 0;
82 char *ptr = (char *) data;
84 if (len <= 0)
85 return 0;
87 while (bytes_done < len) {
88 count = len - bytes_done;
89 if (count > MAX_STREAM_BLOCK)
90 count = MAX_STREAM_BLOCK;
92 count = fread (ptr, 1, count, stream);
94 if (count <= 0)
95 return 0;
97 bytes_done += count;
98 ptr += count;
101 return bytes_done;
105 * Write in blocks of reasonable size and make sure we write everything.
106 * Failure to write everything is an error, indicated by returning 0.
108 static size_t
109 persistent_fwrite (const void *data, size_t len, FILE *stream)
111 size_t count;
112 size_t bytes_done = 0;
113 const char *ptr = (const char *) data;
115 if (len <= 0)
116 return 0;
118 while (bytes_done < len) {
119 count = len - bytes_done;
120 if (count > MAX_STREAM_BLOCK)
121 count = MAX_STREAM_BLOCK;
123 count = fwrite (ptr, 1, count, stream);
125 if (count <= 0)
126 return 0;
128 bytes_done += count;
129 ptr += count;
132 return bytes_done;
135 /* Report error in input */
136 static void
137 print_error (const char *message)
139 fprintf (stderr, "man2hlp: %s in file \"%s\" on line %d\n", message,
140 c_in, in_row);
143 /* Do fopen(), exit if it fails */
144 static FILE *
145 fopen_check (const char *filename, const char *flags)
147 char tmp[BUFFER_SIZE];
148 FILE *f;
150 f = fopen (filename, flags);
151 if (f == NULL) {
152 g_snprintf (tmp, sizeof (tmp), "man2hlp: Cannot open file \"%s\"",
153 filename);
154 perror (tmp);
155 exit (3);
158 return f;
161 /* Do fclose(), exit if it fails */
162 static void
163 fclose_check (FILE *f)
165 if (ferror (f)) {
166 perror ("man2hlp: File error");
167 exit (3);
170 if (fclose (f)) {
171 perror ("man2hlp: Cannot close file");
172 exit (3);
176 /* Change output line */
177 static void
178 newline (void)
180 out_row++;
181 col = 0;
182 fprintf (f_out, "\n");
185 /* Calculate the length of string */
186 static int
187 string_len (const char *buffer)
189 static int anchor_flag = 0; /* Flag: Inside hypertext anchor name */
190 static int lc_link_flag = 0; /* Flag: Inside hypertext link target name */
191 int backslash_flag = 0; /* Flag: Backslash quoting */
192 int c; /* Current character */
193 int len = 0; /* Result: the length of the string */
195 while (*(buffer)) {
196 c = *buffer++;
197 if (c == CHAR_LINK_POINTER)
198 lc_link_flag = 1; /* Link target name starts */
199 else if (c == CHAR_LINK_END)
200 lc_link_flag = 0; /* Link target name ends */
201 else if (c == CHAR_NODE_END) {
202 /* Node anchor name starts */
203 anchor_flag = 1;
204 /* Ugly hack to prevent loss of one space */
205 len++;
207 /* Don't add control characters to the length */
208 if (c >= 0 && c < 32)
209 continue;
210 /* Attempt to handle backslash quoting */
211 if (c == '\\' && !backslash_flag) {
212 backslash_flag = 1;
213 continue;
215 backslash_flag = 0;
216 /* Increase length if not inside anchor name or link target name */
217 if (!anchor_flag && !lc_link_flag)
218 len++;
219 if (anchor_flag && c == ']') {
220 /* Node anchor name ends */
221 anchor_flag = 0;
224 return len;
227 /* Output the string */
228 static void
229 print_string (char *buffer)
231 int len; /* The length of current word */
232 int c; /* Current character */
233 int backslash_flag = 0;
235 /* Skipping lines? */
236 if (skip_flag)
237 return;
238 /* Copying verbatim? */
239 if (verbatim_flag) {
240 /* Attempt to handle backslash quoting */
241 while (*(buffer)) {
242 c = *buffer++;
243 if (c == '\\' && !backslash_flag) {
244 backslash_flag = 1;
245 continue;
247 backslash_flag = 0;
248 fputc (c, f_out);
250 } else {
251 /* Split into words */
252 buffer = strtok (buffer, " \t\n");
253 /* Repeat for each word */
254 while (buffer) {
255 /* Skip empty strings */
256 if (*(buffer)) {
257 len = string_len (buffer);
258 /* Change the line if about to break the right margin */
259 if (col + len >= HELP_TEXT_WIDTH)
260 newline ();
261 /* Words are separated by spaces */
262 if (col > 0) {
263 fputc (' ', f_out);
264 col++;
265 } else if (indentation) {
266 while (col++ < indentation)
267 fputc (' ', f_out);
269 /* Attempt to handle backslash quoting */
270 while (*(buffer)) {
271 c = *buffer++;
272 if (c == '\\' && !backslash_flag) {
273 backslash_flag = 1;
274 continue;
276 backslash_flag = 0;
277 fputc (c, f_out);
279 /* Increase column */
280 col += len;
282 /* Get the next word */
283 buffer = strtok (NULL, " \t\n");
284 } /* while */
288 /* Like print_string but with printf-like syntax */
289 static void
290 printf_string (const char *format, ...)
292 va_list args;
293 char buffer[BUFFER_SIZE];
295 va_start (args, format);
296 g_vsnprintf (buffer, sizeof (buffer), format, args);
297 va_end (args);
298 print_string (buffer);
301 /* Handle NODE and .SH commands. is_sh is 1 for .SH, 0 for NODE */
302 static void
303 handle_node (char *buffer, int is_sh)
305 int len, heading_level;
307 /* If we already skipped a section, don't skip another */
308 if (skip_flag == 2) {
309 skip_flag = 0;
311 /* Get the command parameters */
312 buffer = strtok (NULL, "");
313 if (buffer == NULL) {
314 print_error ("Syntax error: .SH: no title");
315 return;
316 } else {
317 /* Remove quotes */
318 if (buffer[0] == '"') {
319 buffer++;
320 len = strlen (buffer);
321 if (buffer[len - 1] == '"') {
322 len--;
323 buffer[len] = 0;
326 /* Calculate heading level */
327 heading_level = 0;
328 while (buffer[heading_level] == ' ')
329 heading_level++;
330 /* Heading level must be even */
331 if (heading_level & 1)
332 print_error ("Syntax error: .SH: odd heading level");
333 if (no_split_flag) {
334 /* Don't start a new section */
335 newline ();
336 print_string (buffer);
337 newline ();
338 newline ();
339 no_split_flag = 0;
340 } else if (skip_flag) {
341 /* Skipping title and marking text for skipping */
342 skip_flag = 2;
343 } else {
344 buffer += heading_level;
345 if (!is_sh || !node) {
346 /* Start a new section, but omit empty section names */
347 if (*buffer) {
348 fprintf (f_out, "%c[%s]", CHAR_NODE_END, buffer);
349 col++;
350 newline ();
353 /* Add section to the linked list */
354 if (!cnode) {
355 cnode = &nodes;
356 } else {
357 cnode->next = malloc (sizeof (nodes));
358 cnode = cnode->next;
360 cnode->node = strdup (buffer);
361 cnode->lname = NULL;
362 cnode->next = NULL;
363 cnode->heading_level = heading_level;
365 if (is_sh) {
366 /* print_string() strtok()es buffer, so */
367 cnode->lname = strdup (buffer);
368 print_string (buffer);
369 newline ();
370 newline ();
372 } /* Start new section */
373 } /* Has parameters */
374 node = !is_sh;
377 /* Convert character from the macro name to the font marker */
378 static inline char
379 char_to_font (char c)
381 switch (c) {
382 case 'R':
383 return CHAR_FONT_NORMAL;
384 case 'B':
385 return CHAR_FONT_BOLD;
386 case 'I':
387 return CHAR_FONT_ITALIC;
388 default:
389 return 0;
394 * Handle alternate font commands (.BR, .IR, .RB, .RI, .BI, .IB)
395 * Return 0 if the command wasn't recognized, 1 otherwise
397 static int
398 handle_alt_font (char *buffer)
400 char *p;
401 char *w;
402 char font[2];
403 int in_quotes = 0;
404 int alt_state = 0;
406 if (strlen (buffer) != 3)
407 return 0;
409 if (buffer[0] != '.')
410 return 0;
412 font[0] = char_to_font (buffer[1]);
413 font[1] = char_to_font (buffer[2]);
415 /* Exclude names with unknown characters, .BB, .II and .RR */
416 if (font[0] == 0 || font[1] == 0 || font[0] == font[1])
417 return 0;
419 p = strtok (NULL, "");
420 if (p == NULL) {
421 return 1;
424 w = buffer;
425 *w++ = font[0];
427 while (*p) {
429 if (*p == '"') {
430 in_quotes = !in_quotes;
431 p++;
432 continue;
435 if (*p == ' ' && !in_quotes) {
436 p++;
437 /* Don't change font if we are at the end */
438 if (*p != 0) {
439 alt_state = !alt_state;
440 *w++ = font[alt_state];
443 /* Skip more spaces */
444 while (*p == ' ')
445 p++;
447 continue;
450 *w++ = *p++;
453 /* Turn off attributes if necessary */
454 if (font[alt_state] != CHAR_FONT_NORMAL)
455 *w++ = CHAR_FONT_NORMAL;
457 *w = 0;
458 print_string (buffer);
460 return 1;
463 /* Handle .IP and .TP commands. is_tp is 1 for .TP, 0 for .IP */
464 static void
465 handle_tp_ip (int is_tp)
467 if (col > 0)
468 newline ();
469 newline ();
470 if (is_tp) {
471 tp_flag = 1;
472 indentation = 0;
473 } else
474 indentation = 8;
477 /* Handle all the roff dot commands. See man groff_man for details */
478 static void
479 handle_command (char *buffer)
481 int len;
483 /* Get the command name */
484 strtok (buffer, " \t");
486 if (strcmp (buffer, ".SH") == 0) {
487 indentation = 0;
488 handle_node (buffer, 1);
489 } else if (strcmp (buffer, ".\\\"NODE") == 0) {
490 handle_node (buffer, 0);
491 } else if (strcmp (buffer, ".\\\"DONT_SPLIT\"") == 0) {
492 no_split_flag = 1;
493 } else if (strcmp (buffer, ".\\\"SKIP_SECTION\"") == 0) {
494 skip_flag = 1;
495 } else if (strcmp (buffer, ".\\\"LINK2\"") == 0) {
496 /* Next two input lines form a link */
497 link_flag = 2;
498 } else if ((strcmp (buffer, ".PP") == 0)
499 || (strcmp (buffer, ".P") == 0)
500 || (strcmp (buffer, ".LP") == 0)) {
501 indentation = 0;
502 /* End of paragraph */
503 if (col > 0)
504 newline ();
505 newline ();
506 } else if (strcmp (buffer, ".nf") == 0) {
507 /* Following input lines are to be handled verbatim */
508 verbatim_flag = 1;
509 if (col > 0)
510 newline ();
511 } else if (strcmp (buffer, ".I") == 0 || strcmp (buffer, ".B") == 0
512 || strcmp (buffer, ".SB") == 0) {
513 /* Bold text or italics text */
514 char *p;
515 char *w;
516 int backslash_flag = 0;
518 /* .SB [text]
519 * Causes the text on the same line or the text on the
520 * next line to appear in boldface font, one point
521 * size smaller than the default font.
524 /* FIXME: text is optional, so there is no error */
525 p = strtok (NULL, "");
526 if (p == NULL) {
527 print_error ("Syntax error: .I | .B | .SB : no text");
528 return;
531 *buffer = (buffer[1] == 'I') ? CHAR_FONT_ITALIC : CHAR_FONT_BOLD;
533 /* Attempt to handle backslash quoting */
534 for (w = &buffer[1]; *p; p++) {
535 if (*p == '\\' && !backslash_flag) {
536 backslash_flag = 1;
537 continue;
539 backslash_flag = 0;
540 *w++ = *p;
543 *w++ = CHAR_FONT_NORMAL;
544 *w = 0;
545 print_string (buffer);
546 } else if (strcmp (buffer, ".TP") == 0) {
547 handle_tp_ip (1);
548 } else if (strcmp (buffer, ".IP") == 0) {
549 handle_tp_ip (0);
550 } else if (strcmp (buffer, ".\\\"TOPICS") == 0) {
551 if (out_row > 1) {
552 print_error
553 ("Syntax error: .\\\"TOPICS must be first command");
554 return;
556 buffer = strtok (NULL, "");
557 if (buffer == NULL) {
558 print_error ("Syntax error: .\\\"TOPICS: no text");
559 return;
561 /* Remove quotes */
562 if (buffer[0] == '"') {
563 buffer++;
564 len = strlen (buffer);
565 if (buffer[len - 1] == '"') {
566 len--;
567 buffer[len] = 0;
570 topics = strdup (buffer);
571 } else if (strcmp (buffer, ".br") == 0) {
572 if (col)
573 newline ();
574 } else if (strncmp (buffer, ".\\\"", 3) == 0) {
575 /* Comment */
576 } else if (strcmp (buffer, ".TH") == 0) {
577 /* Title header */
578 } else if (strcmp (buffer, ".SM") == 0) {
579 /* Causes the text on the same line or the text on the
580 * next line to appear in a font that is one point
581 * size smaller than the default font. */
582 buffer = strtok (NULL, "");
583 if (buffer)
584 print_string (buffer);
585 } else if (handle_alt_font (buffer) == 1) {
586 return;
587 } else {
588 /* Other commands are ignored */
589 char warn_str[BUFFER_SIZE];
590 g_snprintf (warn_str, sizeof (warn_str),
591 "Warning: unsupported command %s", buffer);
592 print_error (warn_str);
593 return;
597 static struct links {
598 char *linkname; /* Section name */
599 int line; /* Input line in ... */
600 const char *filename;
601 struct links *next;
602 } links, *current_link;
604 static void
605 handle_link (char *buffer)
607 static char old[80];
608 int len;
609 char *amp;
610 const char *amp_arg;
612 switch (link_flag) {
613 case 1:
614 /* Old format link, not supported */
615 break;
616 case 2:
617 /* First part of new format link */
618 /* Bold text or italics text */
619 if (buffer[0] == '.' && (buffer[1] == 'I' || buffer[1] == 'B'))
620 for (buffer += 2; *buffer == ' ' || *buffer == '\t'; buffer++);
621 g_strlcpy (old, buffer, sizeof (old));
622 link_flag = 3;
623 break;
624 case 3:
625 /* Second part of new format link */
626 if (buffer[0] == '.')
627 buffer++;
628 if (buffer[0] == '\\')
629 buffer++;
630 if (buffer[0] == '"')
631 buffer++;
632 len = strlen (buffer);
633 if (len && buffer[len - 1] == '"') {
634 buffer[--len] = 0;
637 /* "Layout\&)," -- "Layout" should be highlighted, but not ")," */
638 amp = strstr (old, "\\&");
639 if (amp) {
640 *amp = 0;
641 amp += 2;
642 amp_arg = amp;
643 } else {
644 amp_arg = "";
647 printf_string ("%c%s%c%s%c%s\n", CHAR_LINK_START, old,
648 CHAR_LINK_POINTER, buffer, CHAR_LINK_END, amp_arg);
649 link_flag = 0;
650 /* Add to the linked list */
651 if (current_link) {
652 current_link->next = malloc (sizeof (links));
653 current_link = current_link->next;
654 current_link->next = NULL;
655 } else {
656 current_link = &links;
658 current_link->linkname = strdup (buffer);
659 current_link->filename = c_in;
660 current_link->line = in_row;
661 break;
666 main (int argc, char **argv)
668 int len; /* Length of input line */
669 const char *c_man; /* Manual filename */
670 const char *c_tmpl; /* Template filename */
671 FILE *f_man; /* Manual file */
672 FILE *f_tmpl; /* Template file */
673 char buffer[BUFFER_SIZE]; /* Full input line */
674 char *lc_node = NULL;
675 char *outfile_buffer; /* Large buffer to keep the output file */
676 long cont_start; /* Start of [Contents] */
677 long file_end; /* Length of the output file */
679 /* Validity check for arguments */
680 if (argc != 4) {
681 fprintf (stderr,
682 "Usage: man2hlp file.man template_file helpfile\n");
683 return 3;
686 c_man = argv[1];
687 c_tmpl = argv[2];
688 c_out = argv[3];
690 /* First stage - process the manual, write to the output file */
691 f_man = fopen_check (c_man, "r");
692 f_out = fopen_check (c_out, "w");
693 c_in = c_man;
695 /* Repeat for each input line */
696 while (fgets (buffer, BUFFER_SIZE, f_man)) {
697 char *input_line; /* Input line without initial "\&" */
699 if (buffer[0] == '\\' && buffer[1] == '&')
700 input_line = buffer + 2;
701 else
702 input_line = buffer;
704 in_row++;
705 len = strlen (input_line);
706 /* Remove terminating newline */
707 if (input_line[len - 1] == '\n') {
708 len--;
709 input_line[len] = 0;
712 if (verbatim_flag) {
713 /* Copy the line verbatim */
714 if (strcmp (input_line, ".fi") == 0) {
715 verbatim_flag = 0;
716 } else {
717 print_string (input_line);
718 newline ();
720 } else if (link_flag) {
721 /* The line is a link */
722 handle_link (input_line);
723 } else if (buffer[0] == '.') {
724 /* The line is a roff command */
725 handle_command (input_line);
726 } else {
727 /* A normal line, just output it */
728 print_string (input_line);
730 /* .TP label processed as usual line */
731 if (tp_flag) {
732 if (tp_flag == 1) {
733 tp_flag = 2;
734 } else {
735 tp_flag = 0;
736 indentation = 8;
737 if (col >= indentation)
738 newline ();
739 else
740 while (++col < indentation)
741 fputc (' ', f_out);
746 newline ();
747 fclose_check (f_man);
748 /* First stage ends here, closing the manual */
750 /* Second stage - process the template file */
751 f_tmpl = fopen_check (c_tmpl, "r");
752 c_in = c_tmpl;
754 /* Repeat for each input line */
755 /* Read a line */
756 while (fgets (buffer, BUFFER_SIZE, f_tmpl)) {
757 if (lc_node) {
758 if (*buffer && *buffer != '\n') {
759 cnode->lname = strdup (buffer);
760 lc_node = strchr (cnode->lname, '\n');
761 if (lc_node)
762 *lc_node = 0;
764 lc_node = NULL;
765 } else {
766 lc_node = strchr (buffer, CHAR_NODE_END);
767 if (lc_node && (lc_node[1] == '[')) {
768 char *p = strchr (lc_node, ']');
769 if (p) {
770 if (strncmp (lc_node + 1, "[main]", 6) == 0) {
771 lc_node = NULL;
772 } else {
773 if (!cnode) {
774 cnode = &nodes;
775 } else {
776 cnode->next = malloc (sizeof (nodes));
777 cnode = cnode->next;
779 cnode->node = strdup (lc_node + 2);
780 cnode->node[p - lc_node - 2] = 0;
781 cnode->lname = NULL;
782 cnode->next = NULL;
783 cnode->heading_level = 0;
785 } else
786 lc_node = NULL;
787 } else
788 lc_node = NULL;
790 fputs (buffer, f_out);
793 cont_start = ftell (f_out);
794 if (cont_start <= 0) {
795 perror (c_out);
796 return 1;
799 if (topics)
800 fprintf (f_out, "\004[Contents]\n%s\n\n", topics);
801 else
802 fprintf (f_out, "\004[Contents]\n");
804 for (current_link = &links; current_link && current_link->linkname;) {
805 int found = 0;
806 struct links *next = current_link->next;
808 if (strcmp (current_link->linkname, "Contents") == 0) {
809 found = 1;
810 } else {
811 for (cnode = &nodes; cnode && cnode->node; cnode = cnode->next) {
812 if (strcmp (cnode->node, current_link->linkname) == 0) {
813 found = 1;
814 break;
818 if (!found) {
819 g_snprintf (buffer, sizeof (buffer), "Stale link \"%s\"",
820 current_link->linkname);
821 c_in = current_link->filename;
822 in_row = current_link->line;
823 print_error (buffer);
825 free (current_link->linkname);
826 if (current_link != &links)
827 free (current_link);
828 current_link = next;
831 for (cnode = &nodes; cnode && cnode->node;) {
832 struct node *next = cnode->next;
833 lc_node = cnode->node;
835 if (*lc_node)
836 fprintf (f_out, " %*s\001%s\002%s\003", cnode->heading_level,
837 "", cnode->lname ? cnode->lname : lc_node, lc_node);
838 fprintf (f_out, "\n");
840 free (cnode->node);
841 if (cnode->lname)
842 free (cnode->lname);
843 if (cnode != &nodes)
844 free (cnode);
845 cnode = next;
848 file_end = ftell (f_out);
850 /* Sanity check */
851 if ((file_end <= 0) || (file_end - cont_start <= 0)) {
852 perror (c_out);
853 return 1;
856 fclose_check (f_out);
857 fclose_check (f_tmpl);
858 /* Second stage ends here, closing all files, note the end of output */
861 * Third stage - swap two parts of the output file.
862 * First, open the output file for reading and load it into the memory.
864 f_out = fopen_check (c_out, "r");
866 outfile_buffer = malloc (file_end);
867 if (!outfile_buffer)
868 return 1;
870 if (!persistent_fread (outfile_buffer, file_end, f_out)) {
871 perror (c_out);
872 return 1;
875 fclose_check (f_out);
876 /* Now the output file is in the memory */
878 /* Again open output file for writing */
879 f_out = fopen_check (c_out, "w");
881 /* Write part after the "Contents" node */
882 if (!persistent_fwrite
883 (outfile_buffer + cont_start, file_end - cont_start, f_out)) {
884 perror (c_out);
885 return 1;
888 /* Write part before the "Contents" node */
889 if (!persistent_fwrite (outfile_buffer, cont_start, f_out)) {
890 perror (c_out);
891 return 1;
894 free (outfile_buffer);
895 fclose_check (f_out);
896 /* Closing everything */
898 return 0;