add more spacing
[personal-kdebase.git] / runtime / kioslave / man / man2html.cpp
blobb336d8f345cf2d9bd4d990ae8586da4c94846a81
1 /*
2 This file is part of the KDE libraries
4 Copyright (C) 2005 Nicolas GOUTTE <goutte@kde.org>
6 ### TODO: who else?
7 */
9 // Start of verbatim comment
12 ** This program was written by Richard Verhoeven (NL:5482ZX35)
13 ** at the Eindhoven University of Technology. Email: rcb5@win.tue.nl
15 ** Permission is granted to distribute, modify and use this program as long
16 ** as this comment is not removed or changed.
19 // End of verbatim comment
22 * man2html-linux-1.0/1.1
23 * This version modified for Redhat/Caldera linux - March 1996.
24 * Michael Hamilton <michael@actrix.gen.nz>.
26 * man2html-linux-1.2
27 * Added support for BSD mandoc pages - I didn't have any documentation
28 * on the mandoc macros, so I may have missed some.
29 * Michael Hamilton <michael@actrix.gen.nz>.
31 * vh-man2html-1.3
32 * Renamed to avoid confusion (V for Verhoeven, H for Hamilton).
34 * vh-man2html-1.4
35 * Now uses /etc/man.config
36 * Added support for compressed pages.
37 * Added "length-safe" string operations for client input parameters.
38 * More secure, -M secured, and client input string lengths checked.
43 ** If you want to use this program for your WWW server, adjust the line
44 ** which defines the CGIBASE or compile it with the -DCGIBASE='"..."' option.
46 ** You have to adjust the built-in manpath to your local system. Note that
47 ** every directory should start and end with the '/' and that the first
48 ** directory should be "/" to allow a full path as an argument.
50 ** The program first check if PATH_INFO contains some information.
51 ** If it does (t.i. man2html/some/thing is used), the program will look
52 ** for a manpage called PATH_INFO in the manpath.
54 ** Otherwise the manpath is searched for the specified command line argument,
55 ** where the following options can be used:
57 ** name name of manpage (csh, printf, xv, troff)
58 ** section the section (1 2 3 4 5 6 7 8 9 n l 1v ...)
59 ** -M path an extra directory to look for manpages (replaces "/")
61 ** If man2html finds multiple manpages that satisfy the options, an index
62 ** is displayed and the user can make a choice. If only one page is
63 ** found, that page will be displayed.
65 ** man2html will add links to the converted manpages. The function add_links
66 ** is used for that. At the moment it will add links as follows, where
67 ** indicates what should match to start with:
68 ** ^^^
69 ** Recognition Item Link
70 ** ----------------------------------------------------------
71 ** name(*) Manpage ../man?/name.*
72 ** ^
73 ** name@hostname Email address mailto:name@hostname
74 ** ^
75 ** method://string URL method://string
76 ** ^^^
77 ** www.host.name WWW server http://www.host.name
78 ** ^^^^
79 ** ftp.host.name FTP server ftp://ftp.host.name
80 ** ^^^^
81 ** <file.h> Include file file:/usr/include/file.h
82 ** ^^^
84 ** Since man2html does not check if manpages, hosts or email addresses exist,
85 ** some links might not work. For manpages, some extra checks are performed
86 ** to make sure not every () pair creates a link. Also out of date pages
87 ** might point to incorrect places.
89 ** The program will not allow users to get system specific files, such as
90 ** /etc/passwd. It will check that "man" is part of the specified file and
91 ** that "/../" isn't. Even if someone manages to get such file, man2html will
92 ** handle it like a manpage and will usually not produce any output (or crash).
94 ** If you find any bugs when normal manpages are converted, please report
95 ** them to me (rcb5@win.tue.nl) after you have checked that man(1) can handle
96 ** the manpage correct.
98 ** Known bugs and missing features:
100 ** * Equations are not converted at all.
101 ** * Tables are converted but some features are not possible in html.
102 ** * The tabbing environment is converted by counting characters and adding
103 ** spaces. This might go wrong (outside <PRE>)
104 ** * Some manpages rely on the fact that troff/nroff is used to convert
105 ** them and use features which are not descripted in the man manpages.
106 ** (definitions, calculations, conditionals, requests). I can't guarantee
107 ** that all these features work on all manpages. (I didn't have the
108 ** time to look through all the available manpages.)
111 #include "man2html.h"
113 # include <config-runtime.h>
115 #include <ctype.h>
117 #include <unistd.h>
118 #include <string.h>
120 #include <stdio.h>
122 #include <QtCore/QByteArray>
123 #include <QtCore/QDateTime>
124 #include <QtCore/QMap>
125 #include <QtCore/QStack>
126 #include <QtCore/QString>
128 #ifdef SIMPLE_MAN2HTML
129 # include <stdlib.h>
130 # include <iostream>
131 # include <dirent.h>
132 # include <sys/stat.h>
133 # define kDebug(x) cerr
134 # define kWarning(x) cerr << "WARNING "
135 # define BYTEARRAY(x) x.constData()
136 #else
137 # include <QTextCodec>
138 # include <kdebug.h>
139 # include <kdeversion.h>
140 # define BYTEARRAY(x) x
141 #endif
145 using namespace std;
147 #define NULL_TERMINATED(n) ((n) + 1)
149 #define HUGE_STR_MAX 10000
150 #define LARGE_STR_MAX 2000
151 #define MED_STR_MAX 500
152 #define SMALL_STR_MAX 100
153 #define TINY_STR_MAX 10
156 #if 1
157 // The output is current too horrible to be called HTML 4.01, so give no
158 // DOCTYPE at all.
159 #define DOCTYPE ""
160 #else
161 #define DOCTYPE "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n"
162 #endif
164 /* mdoc(7) Bl/El lists to HTML list types */
165 #define BL_DESC_LIST 1
166 #define BL_BULLET_LIST 2
167 #define BL_ENUM_LIST 4
169 /* mdoc(7) Bd/Ed example(?) blocks */
170 #define BD_LITERAL 1
171 #define BD_INDENT 2
173 static int s_nroff = 1; // NROFF mode by default
175 static int mandoc_name_count = 0; /* Don't break on the first Nm */
177 static char *stralloc(int len)
179 /* allocate enough for len + NULL */
180 char *news = new char [len+1];
181 #ifdef SIMPLE_MAN2HTML
182 if (!news)
184 cerr << "man2html: out of memory" << endl;
185 exit(EXIT_FAILURE);
187 #else
188 // modern compilers do not return a NULL pointer for a new
189 #endif
190 return news;
193 static char *strlimitcpy(char *to, char *from, int n, int limit)
194 { /* Assumes space for limit plus a null */
195 const int len = n > limit ? limit : n;
196 qstrncpy(to, from, len + 1);
197 to[len] = '\0';
198 return to;
201 /* below this you should not change anything unless you know a lot
202 ** about this program or about troff.
206 /// Structure for character definitions
207 struct CSTRDEF {
208 int nr, slen;
209 const char *st;
214 const char NEWLINE[2]="\n";
217 * Class for defining strings and macros
219 class StringDefinition
221 public:
222 StringDefinition( void ) : m_length(0) {}
223 StringDefinition( int len, const char* cstr ) : m_length( len ), m_output( cstr ) {}
224 public:
225 int m_length; ///< Length of output text
226 QByteArray m_output; ///< Defined string
230 * Class for defining number registers
231 * \note Not for internal read-only registers
233 class NumberDefinition
235 public:
236 NumberDefinition( void ) : m_value(0), m_increment(0) {}
237 NumberDefinition( int value ) : m_value( value ), m_increment(0) {}
238 NumberDefinition( int value, int incr) : m_value( value ), m_increment( incr ) {}
239 public:
240 int m_value; ///< value of number register
241 int m_increment; ///< Increment of number register
242 // ### TODO: display form (.af)
246 * Map of character definitions
248 static QMap<QByteArray,StringDefinition> s_characterDefinitionMap;
251 * Map of string variable and macro definitions
252 * \note String variables and macros are the same thing!
254 static QMap<QByteArray,StringDefinition> s_stringDefinitionMap;
257 * Map of number registers
258 * \note Intern number registers (starting with a dot are not handled here)
260 static QMap<QByteArray,NumberDefinition> s_numberDefinitionMap;
262 static void fill_old_character_definitions( void );
265 * Initialize character variables
267 static void InitCharacterDefinitions( void )
269 fill_old_character_definitions();
270 // ### HACK: as we are converting to HTML too early, define characters with HTML references
271 s_characterDefinitionMap.insert( "&lt;-", StringDefinition( 1, "&larr;" ) ); // <-
272 s_characterDefinitionMap.insert( "-&gt;", StringDefinition( 1, "&rarr;" ) ); // ->
273 s_characterDefinitionMap.insert( "&lt;&gt;", StringDefinition( 1, "&harr;" ) ); // <>
274 s_characterDefinitionMap.insert( "&lt;=", StringDefinition( 1, "&le;" ) ); // <=
275 s_characterDefinitionMap.insert( "&gt;=", StringDefinition( 1, "&ge;" ) ); // >=
276 // End HACK
280 * Initialize string variables
282 static void InitStringDefinitions( void )
284 // mdoc-only, see mdoc.samples(7)
285 s_stringDefinitionMap.insert( "<=", StringDefinition( 1, "&le;" ) );
286 s_stringDefinitionMap.insert( ">=", StringDefinition( 1, "&ge;" ) );
287 s_stringDefinitionMap.insert( "Rq", StringDefinition( 1, "&rdquo;" ) );
288 s_stringDefinitionMap.insert( "Lq", StringDefinition( 1, "&ldquo;" ) );
289 s_stringDefinitionMap.insert( "ua", StringDefinition( 1, "&circ" ) ); // Note this is different from \(ua
290 s_stringDefinitionMap.insert( "aa", StringDefinition( 1, "&acute;" ) );
291 s_stringDefinitionMap.insert( "ga", StringDefinition( 1, "`" ) );
292 s_stringDefinitionMap.insert( "q", StringDefinition( 1, "&quot;" ) );
293 s_stringDefinitionMap.insert( "Pi", StringDefinition( 1, "&pi;" ) );
294 s_stringDefinitionMap.insert( "Ne", StringDefinition( 1, "&ne;" ) );
295 s_stringDefinitionMap.insert( "Le", StringDefinition( 1, "&le;" ) );
296 s_stringDefinitionMap.insert( "Ge", StringDefinition( 1, "&ge;" ) );
297 s_stringDefinitionMap.insert( "Lt", StringDefinition( 1, "&lt;" ) );
298 s_stringDefinitionMap.insert( "Gt", StringDefinition( 1, "&gt;" ) );
299 s_stringDefinitionMap.insert( "Pm", StringDefinition( 1, "&plusmn;" ) );
300 s_stringDefinitionMap.insert( "If", StringDefinition( 1, "&infin;" ) );
301 s_stringDefinitionMap.insert( "Na", StringDefinition( 3, "NaN" ) );
302 s_stringDefinitionMap.insert( "Ba", StringDefinition( 1, "|" ) );
303 // end mdoc-only
304 // man(7)
305 s_stringDefinitionMap.insert( "Tm", StringDefinition( 1, "&trade;" ) ); // \*(TM
306 s_stringDefinitionMap.insert( "R", StringDefinition( 1, "&reg;" ) ); // \*R
307 // end man(7)
308 // Missing characters from man(7):
309 // \*S "Change to default font size"
310 #ifndef SIMPLE_MAN2HTML
311 // Special KDE KIO man:
312 const QByteArray kdeversion(KDE_VERSION_STRING);
313 s_stringDefinitionMap.insert( ".KDE_VERSION_STRING", StringDefinition( kdeversion.length(), kdeversion ) );
314 #endif
318 * Initialize number registers
319 * \note Internal read-only registers are not handled here
321 static void InitNumberDefinitions( void )
323 // As the date number registers are more for end-users, better choose local time.
324 // Groff seems to support Gregorian dates only
325 QDate today( QDate::currentDate() );
326 s_numberDefinitionMap.insert( "year", today.year() ); // Y2K-correct year
327 s_numberDefinitionMap.insert( "yr", today.year() - 1900 ); // Y2K-incorrect year
328 s_numberDefinitionMap.insert( "mo", today.month() );
329 s_numberDefinitionMap.insert( "dy", today.day() );
330 s_numberDefinitionMap.insert( "dw", today.dayOfWeek() );
334 #define V(A,B) ((A)*256+(B))
336 //used in expand_char, e.g. for "\(bu"
337 // see groff_char(7) for list
338 static CSTRDEF standardchar[] = {
339 { V('*','*'), 1, "*" },
340 { V('*','A'), 1, "&Alpha;" },
341 { V('*','B'), 1, "&Beta;" },
342 { V('*','C'), 1, "&Xi;" },
343 { V('*','D'), 1, "&Delta;" },
344 { V('*','E'), 1, "&Epsilon;" },
345 { V('*','F'), 1, "&Phi;" },
346 { V('*','G'), 1, "&Gamma;" },
347 { V('*','H'), 1, "&Theta;" },
348 { V('*','I'), 1, "&Iota;" },
349 { V('*','K'), 1, "&Kappa;" },
350 { V('*','L'), 1, "&Lambda;" },
351 { V('*','M'), 1, "&Mu:" },
352 { V('*','N'), 1, "&Nu;" },
353 { V('*','O'), 1, "&Omicron;" },
354 { V('*','P'), 1, "&Pi;" },
355 { V('*','Q'), 1, "&Psi;" },
356 { V('*','R'), 1, "&Rho;" },
357 { V('*','S'), 1, "&Sigma;" },
358 { V('*','T'), 1, "&Tau;" },
359 { V('*','U'), 1, "&Upsilon;" },
360 { V('*','W'), 1, "&Omega;" },
361 { V('*','X'), 1, "&Chi;" },
362 { V('*','Y'), 1, "&Eta;" },
363 { V('*','Z'), 1, "&Zeta;" },
364 { V('*','a'), 1, "&alpha;"},
365 { V('*','b'), 1, "&beta;"},
366 { V('*','c'), 1, "&xi;"},
367 { V('*','d'), 1, "&delta;"},
368 { V('*','e'), 1, "&epsilon;"},
369 { V('*','f'), 1, "&phi;"},
370 { V('*','g'), 1, "&gamma;"},
371 { V('*','h'), 1, "&theta;"},
372 { V('*','i'), 1, "&iota;"},
373 { V('*','k'), 1, "&kappa;"},
374 { V('*','l'), 1, "&lambda;"},
375 { V('*','m'), 1, "&mu;" },
376 { V('*','n'), 1, "&nu;"},
377 { V('*','o'), 1, "&omicron;"},
378 { V('*','p'), 1, "&pi;"},
379 { V('*','q'), 1, "&psi;"},
380 { V('*','r'), 1, "&rho;"},
381 { V('*','s'), 1, "&sigma;"},
382 { V('*','t'), 1, "&tau;"},
383 { V('*','u'), 1, "&upsilon;"},
384 { V('*','w'), 1, "&omega;"},
385 { V('*','x'), 1, "&chi;"},
386 { V('*','y'), 1, "&eta;"},
387 { V('*','z'), 1, "&zeta;"},
388 { V('+','-'), 1, "&plusmn;" }, // not in groff_char(7)
389 { V('+','f'), 1, "&phi;"}, // phi1, we use the standard phi
390 { V('+','h'), 1, "&theta;"}, // theta1, we use the standard theta
391 { V('+','p'), 1, "&omega;"}, // omega1, we use the standard omega
392 { V('1','2'), 1, "&frac12;" },
393 { V('1','4'), 1, "&frac14;" },
394 { V('3','4'), 1, "&frac34;" },
395 { V('F','i'), 1, "&#xFB03;" }, // ffi ligature
396 { V('F','l'), 1, "&#xFB04;" }, // ffl ligature
397 { V('a','p'), 1, "~" },
398 { V('b','r'), 1, "|" },
399 { V('b','u'), 1, "&bull;" },
400 { V('b','v'), 1, "|" },
401 { V('c','i'), 1, "&#x25CB;" }, // circle ### TODO verify
402 { V('c','o'), 1, "&copy;" },
403 { V('c','t'), 1, "&cent;" },
404 { V('d','e'), 1, "&deg;" },
405 { V('d','g'), 1, "&dagger;" },
406 { V('d','i'), 1, "&divide;" },
407 { V('e','m'), 1, "&emdash;" },
408 { V('e','n'), 1, "&endash;"},
409 { V('e','q'), 1, "=" },
410 { V('e','s'), 1, "&empty;" },
411 { V('f','f'), 1, "&#0xFB00;" }, // ff ligature
412 { V('f','i'), 1, "&#0xFB01;" }, // fi ligature
413 { V('f','l'), 1, "&#0xFB02;" }, // fl ligature
414 { V('f','m'), 1, "&prime;" },
415 { V('g','a'), 1, "`" },
416 { V('h','y'), 1, "-" },
417 { V('l','c'), 2, "|&#175;" }, // ### TODO: not in groff_char(7)
418 { V('l','f'), 2, "|_" }, // ### TODO: not in groff_char(7)
419 { V('l','k'), 1, "<FONT SIZE=+2>{</FONT>" }, // ### TODO: not in groff_char(7)
420 { V('m','i'), 1, "-" }, // ### TODO: not in groff_char(7)
421 { V('m','u'), 1, "&times;" },
422 { V('n','o'), 1, "&not;" },
423 { V('o','r'), 1, "|" },
424 { V('p','l'), 1, "+" },
425 { V('r','c'), 2, "&#175;|" }, // ### TODO: not in groff_char(7)
426 { V('r','f'), 2, "_|" }, // ### TODO: not in groff_char(7)
427 { V('r','g'), 1, "&reg;" },
428 { V('r','k'), 1, "<FONT SIZE=+2>}</FONT>" }, // ### TODO: not in groff_char(7)
429 { V('r','n'), 1, "&oline;" },
430 { V('r','u'), 1, "_" },
431 { V('s','c'), 1, "&sect;" },
432 { V('s','l'), 1, "/" },
433 { V('s','q'), 2, "&#x25A1" }, // WHITE SQUARE
434 { V('t','s'), 1, "&#x03C2;" }, // FINAL SIGMA
435 { V('u','l'), 1, "_" },
436 { V('-','D'), 1, "&ETH;" },
437 { V('S','d'), 1, "&eth;" },
438 { V('T','P'), 1, "&THORN;" },
439 { V('T','p'), 1, "&thorn;" },
440 { V('A','E'), 1, "&AElig;" },
441 { V('a','e'), 1, "&aelig;" },
442 { V('O','E'), 1, "&OElig;" },
443 { V('o','e'), 1, "&oelig;" },
444 { V('s','s'), 1, "&szlig;" },
445 { V('\'','A'), 1, "&Aacute;" },
446 { V('\'','E'), 1, "&Eacute;" },
447 { V('\'','I'), 1, "&Iacute;" },
448 { V('\'','O'), 1, "&Oacute;" },
449 { V('\'','U'), 1, "&Uacute;" },
450 { V('\'','Y'), 1, "&Yacute;" },
451 { V('\'','a'), 1, "&aacute;" },
452 { V('\'','e'), 1, "&eacute;" },
453 { V('\'','i'), 1, "&iacute;" },
454 { V('\'','o'), 1, "&oacute;" },
455 { V('\'','u'), 1, "&uacute;" },
456 { V('\'','y'), 1, "&yacute;" },
457 { V(':','A'), 1, "&Auml;" },
458 { V(':','E'), 1, "&Euml;" },
459 { V(':','I'), 1, "&Iuml;" },
460 { V(':','O'), 1, "&Ouml;" },
461 { V(':','U'), 1, "&Uuml;" },
462 { V(':','a'), 1, "&auml;" },
463 { V(':','e'), 1, "&euml;" },
464 { V(':','i'), 1, "&iuml;" },
465 { V(':','o'), 1, "&ouml;" },
466 { V(':','u'), 1, "&uuml;" },
467 { V(':','y'), 1, "&yuml;" },
468 { V('^','A'), 1, "&Acirc;" },
469 { V('^','E'), 1, "&Ecirc;" },
470 { V('^','I'), 1, "&Icirc;" },
471 { V('^','O'), 1, "&Ocirc;" },
472 { V('^','U'), 1, "&Ucirc;" },
473 { V('^','a'), 1, "&acirc;" },
474 { V('^','e'), 1, "&ecirc;" },
475 { V('^','i'), 1, "&icirc;" },
476 { V('^','o'), 1, "&ocirc;" },
477 { V('^','u'), 1, "&ucirc;" },
478 { V('`','A'), 1, "&Agrave;" },
479 { V('`','E'), 1, "&Egrave;" },
480 { V('`','I'), 1, "&Igrave;" },
481 { V('`','O'), 1, "&Ograve;" },
482 { V('`','U'), 1, "&Ugrave;" },
483 { V('`','a'), 1, "&agrave;" },
484 { V('`','e'), 1, "&egrave;" },
485 { V('`','i'), 1, "&igrave;" },
486 { V('`','o'), 1, "&ograve;" },
487 { V('`','u'), 1, "&ugrave;" },
488 { V('~','A'), 1, "&Atilde;" },
489 { V('~','N'), 1, "&Ntilde;" },
490 { V('~','O'), 1, "&Otilde;" },
491 { V('~','a'), 1, "&atilde" },
492 { V('~','n'), 1, "&ntidle;" },
493 { V('~','o'), 1, "&otidle;" },
494 { V(',','C'), 1, "&Ccedil;" },
495 { V(',','c'), 1, "&ccedil;" },
496 { V('/','L'), 1, "&#x0141;" },
497 { V('/','l'), 1, "&#x0142;" },
498 { V('/','O'), 1, "&Oslash;" },
499 { V('/','o'), 1, "&oslash;" },
500 { V('o','A'), 1, "&Aring;" },
501 { V('o','a'), 1, "&aring;" },
502 { V('a','"'), 1, "\"" },
503 { V('a','-'), 1, "&macr;" },
504 { V('a','.'), 1, "." },
505 { V('a','^'), 1, "&circ;" },
506 { V('a','a'), 1, "&acute;" },
507 { V('a','b'), 1, "`" },
508 { V('a','c'), 1, "&cedil;" },
509 { V('a','d'), 1, "&uml;" },
510 { V('a','h'), 1, "&#x02C2;" }, // caron
511 { V('a','o'), 1, "&#x02DA;" }, // ring
512 { V('a','~'), 1, "&tilde;" },
513 { V('h','o'), 1, "&#x02DB;" }, // ogonek
514 { V('.','i'), 1, "&#x0131;" }, // dot less i
515 { V('C','s'), 1, "&curren;" }, //krazy:exclude=spelling
516 { V('D','o'), 1, "$" },
517 { V('P','o'), 1, "&pound;" },
518 { V('Y','e'), 1, "&yen;" },
519 { V('F','n'), 1, "&fnof;" },
520 { V('F','o'), 1, "&laquo;" },
521 { V('F','c'), 1, "&raquo;" },
522 { V('f','o'), 1, "&#x2039;" }, // single left guillemet
523 { V('f','c'), 1, "&#x203A;" }, // single right guillemet
524 { V('r','!'), 1, "&iecl;" },
525 { V('r','?'), 1, "&iquest;" },
526 { V('O','f'), 1, "&ordf" },
527 { V('O','m'), 1, "&ordm;" },
528 { V('p','c'), 1, "&middot;" },
529 { V('S','1'), 1, "&sup1;" },
530 { V('S','2'), 1, "&sup2;" },
531 { V('S','3'), 1, "&sup3;" },
532 { V('<','-'), 1, "&larr;" },
533 { V('-','>'), 1, "&rarr;" },
534 { V('<','>'), 1, "&harr;" },
535 { V('d','a'), 1, "&darr;" },
536 { V('u','a'), 1, "&uarr;" },
537 { V('l','A'), 1, "&lArr;" },
538 { V('r','A'), 1, "&rArr;" },
539 { V('h','A'), 1, "&hArr;" },
540 { V('d','A'), 1, "&dArr;" },
541 { V('u','A'), 1, "&uArr;" },
542 { V('b','a'), 1, "|" },
543 { V('b','b'), 1, "&brvbar;" },
544 { V('t','m'), 1, "&trade;" },
545 { V('d','d'), 1, "&Dagger;" },
546 { V('p','s'), 1, "&para;" },
547 { V('%','0'), 1, "&permil;" },
548 { V('f','/'), 1, "&frasl;" }, // Fraction slash
549 { V('s','d'), 1, "&Prime;" },
550 { V('h','a'), 1, "^" },
551 { V('t','i'), 1, "&tidle;" },
552 { V('l','B'), 1, "[" },
553 { V('r','B'), 1, "]" },
554 { V('l','C'), 1, "{" },
555 { V('r','C'), 1, "}" },
556 { V('l','a'), 1, "&lt;" },
557 { V('r','a'), 1, "&gt;" },
558 { V('l','h'), 1, "&le;" },
559 { V('r','h'), 1, "&ge;" },
560 { V('B','q'), 1, "&bdquo;" },
561 { V('b','q'), 1, "&sbquo;" },
562 { V('l','q'), 1, "&ldquo;" },
563 { V('r','q'), 1, "&rdquo;" },
564 { V('o','q'), 1, "&lsquo;" },
565 { V('c','q'), 1, "&rsquo;" },
566 { V('a','q'), 1, "'" },
567 { V('d','q'), 1, "\"" },
568 { V('a','t'), 1, "@" },
569 { V('s','h'), 1, "#" },
570 { V('r','s'), 1, "\\" },
571 { V('t','f'), 1, "&there4;" },
572 { V('~','~'), 1, "&cong;" },
573 { V('~','='), 1, "&asymp;" },
574 { V('!','='), 1, "&ne;" },
575 { V('<','='), 1, "&le;" },
576 { V('=','='), 1, "&equiv;" },
577 { V('=','~'), 1, "&cong;" }, // ### TODO: verify
578 { V('>','='), 1, "&ge;" },
579 { V('A','N'), 1, "&and;" },
580 { V('O','R'), 1, "&or;" },
581 { V('t','e'), 1, "&exist;" },
582 { V('f','a'), 1, "&forall;" },
583 { V('A','h'), 1, "&alefsym;" },
584 { V('I','m'), 1, "&image;" },
585 { V('R','e'), 1, "&real;" },
586 { V('i','f'), 1, "&infin;" },
587 { V('m','d'), 1, "&sdot;" },
588 { V('m','o'), 1, "&#x2206;" }, // element ### TODO verify
589 { V('n','m'), 1, "&notin;" },
590 { V('p','t'), 1, "&prop;" },
591 { V('p','p'), 1, "&perp;" },
592 { V('s','b'), 1, "&sub;" },
593 { V('s','p'), 1, "&sup;" },
594 { V('i','b'), 1, "&sube;" },
595 { V('i','p'), 1, "&supe;" },
596 { V('i','s'), 1, "&int;" },
597 { V('s','r'), 1, "&radic;" },
598 { V('p','d'), 1, "&part;" },
599 { V('c','*'), 1, "&otimes;" },
600 { V('c','+'), 1, "&oplus;" },
601 { V('c','a'), 1, "&cap;" },
602 { V('c','u'), 1, "&cup;" },
603 { V('g','r'), 1, "V" }, // gradient ### TODO Where in Unicode?
604 { V('C','R'), 1, "&crarr;" },
605 { V('s','t'), 2, "-)" }, // "such that" ### TODO Where in Unicode?
606 { V('/','_'), 1, "&ang;" },
607 { V('w','p'), 1, "&weierp;" },
608 { V('l','z'), 1, "&loz;" },
609 { V('a','n'), 1, "-" }, // "horizontal arrow extension" ### TODO Where in Unicode?
612 /* default: print code */
615 /* static char eqndelimopen=0, eqndelimclose=0; */
616 static char escapesym='\\', nobreaksym='\'', controlsym='.', fieldsym=0, padsym=0;
618 static char *buffer=NULL;
619 static int buffpos=0, buffmax=0;
620 static bool scaninbuff=false;
621 static int itemdepth=0;
622 static int section=0;
623 static int dl_set[20]= { 0 };
624 static bool still_dd=0;
625 static int tabstops[20] = { 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96 };
626 static int maxtstop=12;
627 static int curpos=0;
629 static char *scan_troff(char *c, bool san, char **result);
630 static char *scan_troff_mandoc(char *c, bool san, char **result);
632 static QList<char*> s_argumentList;
634 static QByteArray cssPath, cssFile;
636 static QByteArray s_dollarZero; // Value of $0
638 void setResourcePath(const QByteArray& _cssPath)
640 cssPath=_cssPath;
643 void setCssFile(const QByteArray& _cssFile)
645 cssFile = _cssFile;
648 static void fill_old_character_definitions( void )
650 for (size_t i = 0; i < sizeof(standardchar)/sizeof(CSTRDEF); i++)
652 const int nr = standardchar[i].nr;
653 const char temp[3] = { nr / 256, nr % 256, 0 };
654 QByteArray name( temp );
655 s_characterDefinitionMap.insert( name, StringDefinition( standardchar[i].slen, standardchar[i].st ) );
659 static char outbuffer[NULL_TERMINATED(HUGE_STR_MAX)];
660 static int no_newline_output=0;
661 static int newline_for_fun=0;
662 static bool output_possible=false;
664 static const char *includedirs[] = {
665 "/usr/include",
666 "/usr/include/sys",
667 "/usr/local/include",
668 "/opt/local/include",
669 "/usr/ccs",
670 "/usr/X11R6/include",
671 "/usr/openwin/include",
672 "/usr/include/g++",
676 static bool ignore_links=false;
678 static void add_links(char *c)
681 ** Add the links to the output.
682 ** At the moment the following are recognized:
684 ** name(*) -> ../man?/name.*
685 ** method://string -> method://string
686 ** www.host.name -> http://www.host.name
687 ** ftp.host.name -> ftp://ftp.host.name
688 ** name@host -> mailto:name@host
689 ** <name.h> -> file:/usr/include/name.h (guess)
691 ** Other possible links to add in the future:
693 ** /dir/dir/file -> file:/dir/dir/file
695 if (ignore_links)
697 output_real(c);
698 return;
701 int i,j,nr;
702 char *f, *g,*h;
703 const int numtests=6; // Nmber of tests
704 char *idtest[numtests]; // url, mailto, www, ftp, manpage, C header file
705 bool ok;
706 /* search for (section) */
707 nr=0;
708 idtest[0]=strstr(c+1,"://");
709 idtest[1]=strchr(c+1,'@');
710 idtest[2]=strstr(c,"www.");
711 idtest[3]=strstr(c,"ftp.");
712 idtest[4]=strchr(c+1,'(');
713 idtest[5]=strstr(c+1,".h&gt;");
714 for (i=0; i<numtests; ++i) nr += (idtest[i]!=NULL);
715 while (nr) {
716 j=-1;
717 for (i=0; i<numtests; i++)
718 if (idtest[i] && (j<0 || idtest[i]<idtest[j])) j=i;
719 switch (j) {
720 case 5: { /* <name.h> */
721 f=idtest[5];
722 h=f+2;
723 g=f;
724 while (g>c && g[-1]!=';') g--;
725 bool wrote_include = false;
727 if (g!=c) {
729 QByteArray dir;
730 QByteArray file(g, h - g + 1);
731 file = file.trimmed();
732 for (int index = 0; includedirs[index]; index++) {
733 QByteArray str( includedirs[index] );
734 str.append('/');
735 str.append(file);
736 if (!access(str.data(), R_OK)) {
737 dir = includedirs[index];
738 break;
741 if (!dir.isEmpty()) {
743 char t;
744 t=*g;
745 *g=0;
746 output_real(c);
747 *g=t;*h=0;
749 QByteArray str;
750 str.append( "<A HREF=\"file:" );
751 str.append( dir.data() );
752 str.append( "/" );
753 str.append( file.data() );
754 str.append( "\">" );
755 str.append( file.data() );
756 str.append( "</A>&gt;" );
758 output_real(str.data());
759 c=f+6;
760 wrote_include = true;
765 if (!wrote_include) {
766 f[5]=0;
767 output_real(c);
768 f[5]=';';
769 c=f+5;
772 break;
773 case 4: /* manpage */
774 f=idtest[j];
775 /* check section */
776 g=strchr(f,')');
777 // The character before f must alphanumeric, the end of a HTML tag or the end of a &nbsp;
778 if (g!=NULL && f>c && (g-f)<12 && (isalnum(f[-1]) || f[-1]=='>' || ( f[-1] == ';' ) ) &&
779 isdigit(f[1]) && f[1]!='0' && ((g-f)<=2 || isalpha(f[2])))
781 ok = true;
782 h = f+2;
783 while (h<g)
785 if (!isalnum(*h++))
787 ok = false;
788 break;
792 else
793 ok = false;
795 h = f - 1;
796 if ( ok )
798 // Skip &nbsp;
799 kDebug(7107) << "BEFORE SECTION:" << *h;
800 if ( ( h > c + 5 ) && ( ! memcmp( h-5, "&nbsp;", 6 ) ) )
802 h -= 6;
803 kDebug(7107) << "Skip &nbsp;";
805 else if ( *h == ';' )
807 // Not a non-breaking space, so probably not ok
808 ok = false;
812 if (ok)
814 /* this might be a link */
815 /* skip html makeup */
816 while (h>c && *h=='>') {
817 while (h!=c && *h!='<') h--;
818 if (h!=c) h--;
820 if (isalnum(*h)) {
821 char t,sec, *e;
822 QByteArray fstr(f);
823 e=h+1;
824 sec=f[1];
825 const int index = fstr.indexOf(')', 2);
826 QByteArray subsec;
827 if (index != -1)
828 subsec = fstr.mid(2, index - 2);
829 else // No closing ')' found, take first character as subsection.
830 subsec = fstr.mid(2, 1);
831 while (h>c && (isalnum(h[-1]) || h[-1]=='_'
832 || h[-1]==':' || h[-1]=='-' || h[-1]=='.'))
833 h--;
834 t=*h;
835 *h='\0';
836 output_real(c);
837 *h=t;
838 t=*e;
839 *e='\0';
840 QByteArray str("<a href=\"man:");
841 str += h;
842 str += '(';
843 str += char( sec );
844 if ( !subsec.isEmpty() )
845 str += subsec.toLower();
846 str += ")\">";
847 str += h;
848 str += "</a>";
849 output_real(str.data());
850 *e=t;
851 c=e;
854 *f='\0';
855 output_real(c);
856 *f='(';
857 idtest[4]=f-1;
858 c=f;
859 break; /* manpage */
860 case 3: /* ftp */
861 case 2: /* www */
862 g=f=idtest[j];
863 while (*g && (isalnum(*g) || *g=='_' || *g=='-' || *g=='+' ||
864 *g=='.' || *g=='/')) g++;
865 if (g[-1]=='.') g--;
866 if (g-f>4) {
867 char t;
868 t=*f; *f='\0';
869 output_real(c);
870 *f=t; t=*g;*g='\0';
871 QByteArray str;
872 str.append( "<A HREF=\"" );
873 str.append( j == 3 ? "ftp" : "http" );
874 str.append( "://" );
875 str.append( f );
876 str.append( "\">" );
877 str.append( f );
878 str.append( "</A>" );
879 output_real(str.data());
880 *g=t;
881 c=g;
882 } else {
883 f[3]='\0';
884 output_real(c);
885 c=f+3;
886 f[3]='.';
888 break;
889 case 1: /* mailto */
890 g=f=idtest[1];
891 while (g>c && (isalnum(g[-1]) || g[-1]=='_' || g[-1]=='-' ||
892 g[-1]=='+' || g[-1]=='.' || g[-1]=='%')) g--;
893 if (g-7>=c && g[-1]==':')
895 // We have perhaps an email address starting with mailto:
896 if (!qstrncmp("mailto:",g-7,7))
897 g-=7;
899 h=f+1;
900 while (*h && (isalnum(*h) || *h=='_' || *h=='-' || *h=='+' ||
901 *h=='.')) h++;
902 if (*h=='.') h--;
903 if (h-f>4 && f-g>1) {
904 char t;
905 t=*g;
906 *g='\0';
907 output_real(c);
908 *g=t;t=*h;*h='\0';
909 QByteArray str;
910 str.append( "<A HREF=\"mailto:" );
911 str.append( g );
912 str.append( "\">" );
913 str.append( g );
914 str.append( "</A>" );
915 output_real(str.data());
916 *h=t;
917 c=h;
918 } else {
919 *f='\0';
920 output_real(c);
921 *f='@';
922 idtest[1]=c;
923 c=f;
925 break;
926 case 0: /* url */
927 g=f=idtest[0]; // ://foo...
929 // backup before :// to get protocol
930 while (g>c && isalpha(g[-1]) && islower(g[-1])) g--;
931 h=f+3; // start past ://
932 // determine length of path and part of query it looks like...
933 while (*h && !isspace(*h) && *h!='<' && *h!='>' && *h!='"' &&
934 *h!='&') h++;
935 // if protocol length 3-6 characters and path has any length at all...
936 // more tests added because this code breaks stylesheet links that use
937 // the correct file:/// stuff.
938 if (f-g>2 && f-g<7 && h-f>3 && (strstr(c,"http://") != NULL || strstr(c,"ftp://") != NULL) ) {
939 char t;
940 t=*g;
941 *g='\0';
942 output_real(c);
943 *g=t; t=*h; *h='\0';
944 QByteArray str;
945 str.append( "<A HREF=\"" );
946 str.append( g );
947 str.append( "\">" );
948 str.append( g );
949 str.append( "</A>" );
950 output_real(str.data());
951 *h=t;
952 c=h;
953 } else {
954 f[1]='\0';
955 output_real(c);
956 f[1]='/';
957 c=f+1;
959 break;
960 default:
961 break;
963 nr=0;
964 if (idtest[0] && idtest[0]<=c) idtest[0]=strstr(c+1,"://");
965 if (idtest[1] && idtest[1]<=c) idtest[1]=strchr(c+1,'@');
966 if (idtest[2] && idtest[2]<c) idtest[2]=strstr(c,"www.");
967 if (idtest[3] && idtest[3]<c) idtest[3]=strstr(c,"ftp.");
968 if (idtest[4] && idtest[4]<=c) idtest[4]=strchr(c+1,'(');
969 if (idtest[5] && idtest[5]<=c) idtest[5]=strstr(c+1,".h&gt;");
970 for (i=0; i<numtests; i++) nr += (idtest[i]!=NULL);
972 output_real(c);
975 static QByteArray current_font;
976 static int current_size=0;
977 static int fillout=1;
979 static void out_html(const char *c)
981 if (!c) return;
983 // Added, probably due to the const?
984 char *c2 = qstrdup(c);
985 char *c3 = c2;
987 static int obp=0;
989 if (no_newline_output) {
990 int i=0;
991 no_newline_output=1;
992 while (c2[i]) {
993 if (!no_newline_output) c2[i-1]=c2[i];
994 if (c2[i]=='\n') no_newline_output=0;
995 i++;
997 if (!no_newline_output) c2[i-1]=0;
999 if (scaninbuff) {
1000 while (*c2) {
1001 if (buffpos>=buffmax) {
1002 char *h = new char[buffmax*2];
1004 #ifdef SIMPLE_MAN2HTML
1005 if (!h)
1007 cerr << "Memory full, cannot output!" << endl;
1008 exit(1);
1010 #else
1011 // modern compiler do not return a NULL for a new
1012 #endif
1013 memcpy(h, buffer, buffmax);
1014 delete [] buffer;
1015 buffer=h;
1016 buffmax=buffmax*2;
1018 buffer[buffpos++]=*c2++;
1020 } else
1021 if (output_possible) {
1022 while (*c2) {
1023 outbuffer[obp++]=*c2;
1024 if (*c=='\n' || obp >= HUGE_STR_MAX) {
1025 outbuffer[obp]='\0';
1026 add_links(outbuffer);
1027 obp=0;
1029 c2++;
1032 delete [] c3;
1035 static QByteArray set_font( const QByteArray& name )
1037 // Every font but R (Regular) creates <span> elements
1038 QByteArray markup;
1039 if ( current_font != "R" && !current_font.isEmpty() )
1040 markup += "</span>";
1041 const uint len = name.length();
1042 bool fontok = true;
1043 if ( len == 1 )
1045 const char lead = name[0];
1046 switch (lead)
1048 case 'P': // ### TODO: this seems to mean "precedent font"
1049 case 'R': break; // regular, do nothing
1050 case 'I': markup += "<span style=\"font-style:italic\">"; break;
1051 case 'B': markup += "<span style=\"font-weight:bold\">"; break;
1052 case 'L': markup += "<span style=\"font-family:monospace\">"; break; // ### What's L?
1053 default: fontok = false;
1056 else if ( len == 2 )
1058 if ( name == "BI" )
1059 markup += "<span style=\"font-style:italic;font-weight:bold\">";
1060 // Courier
1061 else if ( name == "CR" )
1062 markup += "<span style=\"font-family:monospace\">";
1063 else if ( name == "CW" ) // CW is used by pod2man(1) (part of perldoc(1))
1064 markup += "<span style=\"font-family:monospace\">";
1065 else if ( name == "CI" )
1066 markup += "<span style=\"font-family:monospace;font-style:italic\">";
1067 else if ( name == "CB" )
1068 markup += "<span style=\"font-family:monospace;font-weight:bold\">";
1069 // Times
1070 else if ( name == "TR" )
1071 markup += "<span style=\"font-family:serif\">";
1072 else if ( name == "TI" )
1073 markup += "<span style=\"font-family:serif;font-style:italic\">";
1074 else if ( name == "TB" )
1075 markup += "<span style=\"font-family:serif;font-weight:bold\">";
1076 // Helvetica
1077 else if ( name == "HR" )
1078 markup += "<span style=\"font-family:sans-serif\">";
1079 else if ( name == "HI" )
1080 markup += "<span style=\"font-family:sans-serif;font-style:italic\">";
1081 else if ( name == "HB" )
1082 markup += "<span style=\"font-family:sans-serif;font-weight:bold\">";
1083 else
1084 fontok = false;
1086 else if ( len == 3 )
1088 if ( name == "CBI" )
1089 markup += "<span style=\"font-family:monospace;font-style:italic;font-weight:bold\">";
1090 else if ( name == "TBI" )
1091 markup += "<span style=\"font-family:serif;font-style:italic;font-weight:bold\">";
1092 else if ( name == "HBI" )
1093 markup += "<span style=\"font-family:sans-serif;font-style:italic;font-weight:bold\">";
1095 if (fontok)
1096 current_font = name;
1097 else
1098 current_font = "R"; // Still nothing, then it is 'R' (Regular)
1099 return markup;
1102 static QByteArray change_to_size(int nr)
1104 switch (nr)
1106 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
1107 case '7': case '8': case '9': nr=nr-'0'; break;
1108 case '\0': break;
1109 default: nr=current_size+nr; if (nr>9) nr=9; if (nr< -9) nr=-9; break;
1111 if ( nr == current_size )
1112 return "";
1113 const QByteArray font ( current_font );
1114 QByteArray markup;
1115 markup = set_font("R");
1116 if (current_size)
1117 markup += "</FONT>";
1118 current_size=nr;
1119 if (nr)
1121 markup += "<FONT SIZE=\"";
1122 if (nr>0)
1123 markup += '+';
1124 else
1126 markup += '-';
1127 nr=-nr;
1129 markup += char( nr + '0' );
1130 markup += "\">";
1132 markup += set_font( font );
1133 return markup;
1136 /* static int asint=0; */
1137 static int intresult=0;
1139 #define SKIPEOL while (*c && *c++!='\n') {}
1141 static bool skip_escape=false;
1142 static bool single_escape=false;
1144 static char *scan_escape_direct( char *c, QByteArray& cstr );
1147 * scan a named character
1148 * param c position
1150 static QByteArray scan_named_character( char*& c )
1152 QByteArray name;
1153 if ( *c == '(' )
1155 // \*(ab Name of two characters
1156 if ( c[1] == escapesym )
1158 QByteArray cstr;
1159 c = scan_escape_direct( c+2, cstr );
1160 // ### HACK: as we convert characters too early to HTML, we need to support more than 2 characters here and assume that all characters passed by the variable are to be used.
1161 name = cstr;
1163 else
1165 name+=c[1];
1166 name+=c[2];
1167 c+=3;
1170 else if ( *c == '[' )
1172 // \*[long_name] Long name
1173 // Named character groff(7)
1174 // We must find the ] to get a name
1175 c++;
1176 while ( *c && *c != ']' && *c != '\n' )
1178 if ( *c == escapesym )
1180 QByteArray cstr;
1181 c = scan_escape_direct( c+1, cstr );
1182 const int result = cstr.indexOf(']');
1183 if ( result == -1 )
1184 name += cstr;
1185 else
1187 // Note: we drop the characters after the ]
1188 name += cstr.left( result );
1191 else
1193 name+=*c;
1194 c++;
1197 if ( !*c || *c == '\n' )
1199 kDebug(7107) << "Found linefeed! Could not parse character name: " << BYTEARRAY( name );
1200 return "";
1202 c++;
1204 else if ( *c =='C' || c[1]== '\'' )
1206 // \C'name'
1207 c+=2;
1208 while ( *c && *c != '\'' && *c != '\n' )
1210 if ( *c == escapesym )
1212 QByteArray cstr;
1213 c = scan_escape_direct( c+1, cstr );
1214 const int result = cstr.indexOf('\'');
1215 if ( result == -1 )
1216 name += cstr;
1217 else
1219 // Note: we drop the characters after the ]
1220 name += cstr.left( result );
1223 else
1225 name+=*c;
1226 c++;
1229 if ( !*c || *c == '\n' )
1231 kDebug(7107) << "Found linefeed! Could not parse (\\C mode) character name: " << BYTEARRAY( name );
1232 return "";
1234 c++;
1236 // Note: characters with a one character length name doe not exist, as they would collide with other escapes
1238 // Now we have the name, let us find it between the string names
1239 QMap<QByteArray,StringDefinition>::const_iterator it=s_characterDefinitionMap.constFind(name);
1240 if (it==s_characterDefinitionMap.constEnd())
1242 kDebug(7107) << "EXCEPTION: cannot find character with name: " << BYTEARRAY( name );
1243 // No output, as an undefined string is empty by default
1244 return "";
1246 else
1248 kDebug(7107) << "Character with name: \"" << BYTEARRAY( name ) << "\" => " << BYTEARRAY( (*it).m_output );
1249 return (*it).m_output;
1253 static QByteArray scan_named_string(char*& c)
1255 QByteArray name;
1256 if ( *c == '(' )
1258 // \*(ab Name of two characters
1259 if ( c[1] == escapesym )
1261 QByteArray cstr;
1262 c = scan_escape_direct( c+2, cstr );
1263 kDebug(7107) << "\\(" << BYTEARRAY( cstr );
1264 // ### HACK: as we convert characters too early to HTML, we need to support more than 2 characters here and assume that all characters passed by the variable are to be used.
1265 name = cstr;
1267 else
1269 name+=c[1];
1270 name+=c[2];
1271 c+=3;
1274 else if ( *c == '[' )
1276 // \*[long_name] Long name
1277 // Named character groff(7)
1278 // We must find the ] to get a name
1279 c++;
1280 while ( *c && *c != ']' && *c != '\n' )
1282 if ( *c == escapesym )
1284 QByteArray cstr;
1285 c = scan_escape_direct( c+1, cstr );
1286 const int result = cstr.indexOf(']');
1287 if ( result == -1 )
1288 name += cstr;
1289 else
1291 // Note: we drop the characters after the ]
1292 name += cstr.left( result );
1295 else
1297 name+=*c;
1298 c++;
1301 if ( !*c || *c == '\n' )
1303 kDebug(7107) << "Found linefeed! Could not parse string name: " << BYTEARRAY( name );
1304 return "";
1306 c++;
1308 else
1310 // \*a Name of one character
1311 name+=*c;
1312 c++;
1314 // Now we have the name, let us find it between the string names
1315 QMap<QByteArray,StringDefinition>::const_iterator it=s_stringDefinitionMap.constFind(name);
1316 if (it==s_stringDefinitionMap.constEnd())
1318 kDebug(7107) << "EXCEPTION: cannot find string with name: " << BYTEARRAY( name );
1319 // No output, as an undefined string is empty by default
1320 return "";
1322 else
1324 kDebug(7107) << "String with name: \"" << BYTEARRAY( name ) << "\" => " << BYTEARRAY( (*it).m_output );
1325 return (*it).m_output;
1329 static QByteArray scan_dollar_parameter(char*& c)
1331 int argno = 0; // No dollar argument number yet!
1332 if ( *c == '0' )
1334 //kDebug(7107) << "$0";
1335 c++;
1336 return s_dollarZero;
1338 else if ( *c >= '1' && *c <= '9' )
1340 //kDebug(7107) << "$ direct";
1341 argno = ( *c - '0' );
1342 c++;
1344 else if ( *c == '(' )
1346 //kDebug(7107) << "$(";
1347 if ( c[1] && c[2] && c[1] >= '0' && c[1] <= '9' && c[2] >= '0' && c[2] <= '9' )
1349 argno = ( c[1] - '0' ) * 10 + ( c[2] - '0' );
1350 c += 3;
1352 else
1354 if ( !c[1] )
1355 c++;
1356 else if ( !c[2] )
1357 c+=2;
1358 else
1359 c += 3;
1360 return "";
1363 else if ( *c == '[' )
1365 //kDebug(7107) << "$[";
1366 argno = 0;
1367 c++;
1368 while ( *c && *c>='0' && *c<='9' && *c!=']' )
1370 argno *= 10;
1371 argno += ( *c - '0' );
1372 c++;
1374 if ( *c != ']' )
1376 return "";
1378 c++;
1380 else if ( ( *c == '*' ) || ( *c == '@' ) )
1382 const bool quote = ( *c == '@' );
1383 QList<char*>::const_iterator it = s_argumentList.constBegin();
1384 QByteArray param;
1385 bool space = false;
1386 for ( ; it != s_argumentList.constEnd(); ++it )
1388 if (space)
1389 param += ' ';
1390 if (quote)
1391 param += '\"'; // Not as HTML, as it could be used by macros !
1392 param += (*it);
1393 if (quote)
1394 param += '\"'; // Not as HTML, as it could be used by macros!
1395 space = true;
1397 c++;
1398 return param;
1400 else
1402 kDebug(7107) << "EXCEPTION: unknown parameter $" << *c;
1403 return "";
1405 //kDebug(7107) << "ARG $" << argno;
1406 if ( !s_argumentList.isEmpty() && argno > 0 )
1408 //kDebug(7107) << "ARG $" << argno << " OK!";
1409 argno--;
1410 if ( argno >= s_argumentList.size() )
1412 kDebug(7107) << "EXCEPTION: cannot find parameter $" << (argno+1);
1413 return "";
1416 return s_argumentList[argno];
1418 return "";
1421 /// return the value of read-only number registers
1422 static int read_only_number_register( const QByteArray& name )
1424 // Internal read-only variables
1425 if ( name == ".$" )
1427 kDebug(7107) << "\\n[.$] == " << s_argumentList.size();
1428 return s_argumentList.size();
1430 else if ( name == ".g" )
1431 return 0; // We are not groff(1)
1432 else if ( name == ".s" )
1433 return current_size;
1434 #if 0
1435 // ### TODO: map the fonts to a number
1436 else if ( name == ".f" )
1437 return current_font;
1438 #endif
1439 else if ( name == ".P" )
1440 return 0; // We are not printing
1441 else if ( name == ".A" )
1442 return s_nroff;
1443 #ifndef SIMPLE_MAN2HTML
1444 // Special KDE KIO man:
1445 else if ( name == ".KDE_VERSION_MAJOR" )
1446 return KDE_VERSION_MAJOR;
1447 else if ( name == ".KDE_VERSION_MINOR" )
1448 return KDE_VERSION_MINOR;
1449 else if ( name == ".KDE_VERSION_RELEASE" )
1450 return KDE_VERSION_RELEASE;
1451 else if ( name == ".KDE_VERSION" )
1452 return KDE_VERSION;
1453 #endif
1454 // ### TODO: should .T be set to "html"? But we are not the HTML post-processor. :-(
1456 // ### TODO: groff defines many more read-only number registers
1457 kDebug(7107) << "EXCEPTION: unknown read-only number register: " << BYTEARRAY( name );
1459 return 0; // Undefined variable
1463 /// get the value of a number register and auto-increment if asked
1464 static int scan_number_register( char*& c)
1466 int sign = 0; // Sign for auto-increment (if any)
1467 switch (*c)
1469 case '+': sign = 1; c++; break;
1470 case '-': sign = -1; c++; break;
1471 default: break;
1473 QByteArray name;
1474 if ( *c == '[' )
1476 c++;
1477 if ( *c == '+' )
1479 sign = 1;
1480 c++;
1482 else if ( *c == '-' )
1484 sign = -1;
1485 c++;
1487 while ( *c && *c != ']' && *c != '\n' )
1489 // ### TODO: a \*[string] could be inside and should be processed
1490 name+=*c;
1491 c++;
1493 if ( !*c || *c == '\n' )
1495 kDebug(7107) << "Found linefeed! Could not parse number register name: " << BYTEARRAY( name );
1496 return 0;
1498 c++;
1500 else if ( *c == '(' )
1502 c++;
1503 if ( *c == '+' )
1505 sign = 1;
1506 c++;
1508 else if ( *c == '-' )
1510 sign = -1;
1511 c++;
1513 name+=c[0];
1514 name+=c[1];
1515 c+=2;
1517 else
1519 name += *c;
1520 c++;
1522 if ( name[0] == '.' )
1524 return read_only_number_register( name );
1526 else
1528 QMap< QByteArray, NumberDefinition >::iterator it = s_numberDefinitionMap.find( name );
1529 if ( it == s_numberDefinitionMap.end() )
1531 return 0; // Undefined variable
1533 else
1535 (*it).m_value += sign * (*it).m_increment;
1536 return (*it).m_value;
1541 /// get and set font
1542 static QByteArray scan_named_font( char*& c )
1544 QByteArray name;
1545 if ( *c == '(' )
1547 // \f(ab Name of two characters
1548 if ( c[1] == escapesym )
1550 QByteArray cstr;
1551 c = scan_escape_direct( c+2, cstr );
1552 kDebug(7107) << "\\(" << BYTEARRAY( cstr );
1553 // ### HACK: as we convert characters too early to HTML, we need to support more than 2 characters here and assume that all characters passed by the variable are to be used.
1554 name = cstr;
1556 else
1558 name+=c[1];
1559 name+=c[2];
1560 c+=3;
1563 else if ( *c == '[' )
1565 // \f[long_name] Long name
1566 // We must find the ] to get a name
1567 c++;
1568 while ( *c && *c != ']' && *c != '\n' )
1570 if ( *c == escapesym )
1572 QByteArray cstr;
1573 c = scan_escape_direct( c+1, cstr );
1574 const int result = cstr.indexOf(']');
1575 if ( result == -1 )
1576 name += cstr;
1577 else
1579 // Note: we drop the characters after the ]
1580 name += cstr.left( result );
1583 else
1585 name+=*c;
1586 c++;
1589 if ( !*c || *c == '\n' )
1591 kDebug(7107) << "Found linefeed! Could not parse font name: " << BYTEARRAY( name );
1592 return "";
1594 c++;
1596 else
1598 // \fa Font name with one character or one digit
1599 // ### HACK do *not* use: name = *c; or name would be empty
1600 name += *c;
1601 c++;
1603 //kDebug(7107) << "FONT NAME: " << BYTEARRAY( name );
1604 // Now we have the name, let us find the font
1605 bool ok = false;
1606 const unsigned int number = name.toUInt( &ok );
1607 if ( ok )
1609 if ( number < 5 )
1611 const char* fonts[] = { "R", "I", "B", "BI", "CR" }; // Regular, Italic, Bold, Bold Italic, Courier regular
1612 name = fonts[ number ];
1614 else
1616 kDebug(7107) << "EXCEPTION: font has too big number: " << BYTEARRAY( name ) << " => " << number;
1617 name = "R"; // Let assume Regular
1620 else if ( name.isEmpty() )
1622 kDebug(7107) << "EXCEPTION: font has no name: " << BYTEARRAY( name );
1623 name = "R"; // Let assume Regular
1625 if ( !skip_escape )
1626 return set_font( name );
1627 else
1628 return "";
1631 static QByteArray scan_number_code( char*& c )
1633 QByteArray number;
1634 if ( *c != '\'' )
1635 return "";
1636 c++; // Go past the opening single quote
1637 while ( *c && ( *c != '\n' ) && ( *c != '\'' ) )
1639 number += *c;
1640 c++;
1642 bool ok = false;
1643 unsigned int result = number.toUInt( &ok );
1644 if ( ( result < ' ' ) || ( result > 65535 ) )
1645 return "";
1646 else if ( result == '\t' )
1648 curpos += 8;
1649 curpos &= 0xfff8;
1650 return "\t";
1652 number.setNum( result );
1653 number.prepend( "&#" );
1654 number.append( ";" );
1655 curpos ++;
1656 c++; // Go past the closing single quote
1657 return number;
1660 // ### TODO known missing escapes from groff(7):
1661 // ### TODO \& \! \) \: \R
1663 static char *scan_escape_direct( char *c, QByteArray& cstr )
1665 bool exoutputp;
1666 bool exskipescape;
1667 int i,j;
1668 bool cplusplus = true; // Should the c++ call be executed at the end of the function
1670 cstr = "";
1671 intresult=0;
1672 switch (*c) {
1673 case 'e': cstr = "\\"; curpos++;break; // ### FIXME: it should be the current escape symbol
1674 case '0': // ### TODO Where in Unicode? (space of digit width)
1675 case '~': // non-breakable-space (resizeable!)
1676 case ' ':
1677 case '|': // half-non-breakable-space
1678 case '^': // quarter-non-breakable-space
1679 cstr = "&nbsp;"; curpos++; break;
1680 case '"': SKIPEOL; c--; break;
1681 // ### TODO \# like \" but does not ignore the end of line (groff(7))
1682 case '$':
1684 c++;
1685 cstr = scan_dollar_parameter( c );
1686 cplusplus = false;
1687 break;
1689 case 'z':
1691 c++;
1692 if (*c=='\\')
1694 c=scan_escape_direct( c+1, cstr );
1695 c--;
1697 else
1698 cstr = QByteArray( c, 1 );
1699 break;
1701 case 'k': c++; if (*c=='(') c+=2; // ### FIXME \k[REG] exists too
1702 case '!':
1703 case '%':
1704 case 'a':
1705 case 'd':
1706 case 'r':
1707 case 'u':
1708 case '\n':
1709 case '&':
1710 cstr = ""; break;
1711 case '(':
1712 case '[':
1713 case 'C':
1715 // Do not go forward as scan_named_character needs the leading symbol
1716 cstr = scan_named_character( c );
1717 cplusplus = false;
1718 break;
1720 case '*':
1722 c++;
1723 cstr = scan_named_string( c );
1724 cplusplus = false;
1725 break;
1727 case 'f':
1729 c++;
1730 cstr = scan_named_font( c );
1731 cplusplus = false;
1732 break;
1734 case 's': // ### FIXME: many forms are missing
1735 c++;
1736 j=0;i=0;
1737 if (*c=='-') {j= -1; c++;} else if (*c=='+') {j=1; c++;}
1738 if (*c=='0') c++; else if (*c=='\\') {
1739 c++;
1740 c=scan_escape_direct( c, cstr );
1741 i=intresult; if (!j) j=1;
1742 } else
1743 while (isdigit(*c) && (!i || (!j && i<4))) i=i*10+(*c++)-'0';
1744 if (!j) { j=1; if (i) i=i-10; }
1745 if (!skip_escape) cstr=change_to_size(i*j);
1746 c--;
1747 break;
1748 case 'n':
1750 c++;
1751 intresult = scan_number_register( c );
1752 cplusplus = false;
1753 break;
1755 case 'w':
1756 c++;
1757 i=*c;
1758 c++;
1759 exoutputp=output_possible;
1760 exskipescape=skip_escape;
1761 output_possible=false;
1762 skip_escape=true;
1763 j=0;
1764 while (*c!=i)
1766 j++;
1767 if ( *c == escapesym )
1768 c = scan_escape_direct( c+1, cstr);
1769 else
1770 c++;
1772 output_possible=exoutputp;
1773 skip_escape=exskipescape;
1774 intresult=j;
1775 break;
1776 case 'l': cstr = "<HR>"; curpos=0;
1777 case 'b':
1778 case 'v':
1779 case 'x':
1780 case 'o':
1781 case 'L':
1782 case 'h':
1783 c++;
1784 i=*c;
1785 c++;
1786 exoutputp=output_possible;
1787 exskipescape=skip_escape;
1788 output_possible=0;
1789 skip_escape=true;
1790 while (*c != i)
1791 if (*c==escapesym) c=scan_escape_direct( c+1, cstr );
1792 else c++;
1793 output_possible=exoutputp;
1794 skip_escape=exskipescape;
1795 break;
1796 case 'c': no_newline_output=1; break;
1797 case '{': newline_for_fun++; break; // Start conditional block
1798 case '}': if (newline_for_fun) newline_for_fun--; break; // End conditional block
1799 case 'p': cstr = "<BR>\n";curpos=0; break;
1800 case 't': cstr = "\t";curpos=(curpos+8)&0xfff8; break;
1801 case '<': cstr = "&lt;";curpos++; break;
1802 case '>': cstr = "&gt;";curpos++; break;
1803 case '\\':
1805 if (single_escape)
1806 c--;
1807 else
1808 cstr="\\";
1809 break;
1811 case 'N':
1813 c++;
1814 cstr = scan_number_code( c );
1815 cplusplus = false;
1816 break;
1818 case '\'': cstr = "&acute;";curpos++; break; // groff(7) ### TODO verify
1819 case '`': cstr = "`";curpos++; break; // groff(7)
1820 case '-': cstr = "-";curpos++; break; // groff(7)
1821 case '.': cstr = ".";curpos++; break; // groff(7)
1822 default: cstr = QByteArray( c, 1 ); curpos++; break;
1824 if (cplusplus)
1825 c++;
1826 return c;
1829 static char *scan_escape(char *c)
1831 QByteArray cstr;
1832 char* result = scan_escape_direct( c, cstr );
1833 if ( !skip_escape )
1834 out_html(cstr);
1835 return result;
1838 class TABLEROW;
1840 class TABLEITEM {
1841 public:
1842 TABLEITEM(TABLEROW *row);
1843 ~TABLEITEM() {
1844 delete [] contents;
1846 void setContents(const char *_contents) {
1847 delete [] contents;
1848 contents = qstrdup(_contents);
1850 const char *getContents() const { return contents; }
1852 void init() {
1853 delete [] contents;
1854 contents = 0;
1855 size = 0;
1856 align = 0;
1857 valign = 0;
1858 colspan = 1;
1859 rowspan = 1;
1860 font = 0;
1861 vleft = 0;
1862 vright = 0;
1863 space = 0;
1864 width = 0;
1867 void copyLayout(const TABLEITEM *orig) {
1868 size = orig->size;
1869 align = orig->align;
1870 valign = orig->valign;
1871 colspan = orig->colspan;
1872 rowspan = orig->rowspan;
1873 font = orig->font;
1874 vleft = orig->vleft;
1875 vright = orig->vright;
1876 space = orig->space;
1877 width = orig->width;
1880 public:
1881 int size,align,valign,colspan,rowspan,font,vleft,vright,space,width;
1883 private:
1884 char *contents;
1885 TABLEROW *_parent;
1888 class TABLEROW {
1889 char *test;
1890 public:
1891 TABLEROW() {
1892 test = new char;
1893 prev = 0; next = 0;
1895 ~TABLEROW() {
1896 qDeleteAll(items);
1897 items.clear();
1898 delete test;
1901 int length() const { return items.count(); }
1902 bool has(int index) {
1903 return (index >= 0) && (index < (int)items.count());
1905 TABLEITEM &at(int index) {
1906 return *items.at(index);
1909 TABLEROW *copyLayout() const;
1911 void addItem(TABLEITEM *item) {
1912 items.append(item);
1914 TABLEROW *prev, *next;
1916 private:
1917 QList<TABLEITEM*> items;
1920 TABLEITEM::TABLEITEM(TABLEROW *row) : contents(0), _parent(row) {
1921 init();
1922 _parent->addItem(this);
1925 TABLEROW *TABLEROW::copyLayout() const {
1926 TABLEROW *newrow = new TABLEROW();
1928 QListIterator<TABLEITEM *> it(items);
1929 while (it.hasNext()){
1930 TABLEITEM *newitem = new TABLEITEM(newrow);
1931 newitem->copyLayout(it.next());
1933 return newrow;
1936 static const char *tableopt[]= { "center", "expand", "box", "allbox",
1937 "doublebox", "tab", "linesize",
1938 "delim", NULL };
1939 static int tableoptl[] = { 6,6,3,6,9,3,8,5,0};
1942 static void clear_table(TABLEROW *table)
1944 TABLEROW *tr1,*tr2;
1946 tr1=table;
1947 while (tr1->prev) tr1=tr1->prev;
1948 while (tr1) {
1949 tr2=tr1;
1950 tr1=tr1->next;
1951 delete tr2;
1955 static char *scan_expression(char *c, int *result);
1957 static char *scan_format(char *c, TABLEROW **result, int *maxcol)
1959 TABLEROW *layout, *currow;
1960 TABLEITEM *curfield;
1961 int i,j;
1962 if (*result) {
1963 clear_table(*result);
1965 layout= currow=new TABLEROW();
1966 curfield=new TABLEITEM(currow);
1967 while (*c && *c!='.') {
1968 switch (*c) {
1969 case 'C': case 'c': case 'N': case 'n':
1970 case 'R': case 'r': case 'A': case 'a':
1971 case 'L': case 'l': case 'S': case 's':
1972 case '^': case '_':
1973 if (curfield->align)
1974 curfield=new TABLEITEM(currow);
1975 curfield->align=toupper(*c);
1976 c++;
1977 break;
1978 case 'i': case 'I': case 'B': case 'b':
1979 curfield->font = toupper(*c);
1980 c++;
1981 break;
1982 case 'f': case 'F':
1983 c++;
1984 curfield->font = toupper(*c);
1985 c++;
1986 if (!isspace(*c) && *c!='.') c++;
1987 break;
1988 case 't': case 'T': curfield->valign='t'; c++; break;
1989 case 'p': case 'P':
1990 c++;
1991 i=j=0;
1992 if (*c=='+') { j=1; c++; }
1993 if (*c=='-') { j=-1; c++; }
1994 while (isdigit(*c)) i=i*10+(*c++)-'0';
1995 if (j) curfield->size= i*j; else curfield->size=j-10;
1996 break;
1997 case 'v': case 'V':
1998 case 'w': case 'W':
1999 c=scan_expression(c+2,&curfield->width);
2000 break;
2001 case '|':
2002 if (curfield->align) curfield->vleft++;
2003 else curfield->vright++;
2004 c++;
2005 break;
2006 case 'e': case 'E':
2007 c++;
2008 break;
2009 case '0': case '1': case '2': case '3': case '4':
2010 case '5': case '6': case '7': case '8': case '9':
2011 i=0;
2012 while (isdigit(*c)) i=i*10+(*c++)-'0';
2013 curfield->space=i;
2014 break;
2015 case ',': case '\n':
2016 currow->next=new TABLEROW();
2017 currow->next->prev=currow;
2018 currow=currow->next;
2019 currow->next=NULL;
2020 curfield=new TABLEITEM(currow);
2021 c++;
2022 break;
2023 default:
2024 c++;
2025 break;
2028 if (*c=='.') while (*c++!='\n');
2029 *maxcol=0;
2030 currow=layout;
2031 while (currow) {
2032 i=currow->length();
2033 if (i>*maxcol) *maxcol=i;
2034 currow=currow->next;
2036 *result=layout;
2037 return c;
2040 static TABLEROW *next_row(TABLEROW *tr)
2042 if (tr->next) {
2043 tr=tr->next;
2044 if (!tr->next)
2045 return next_row(tr);
2046 return tr;
2047 } else {
2048 tr->next = tr->copyLayout();
2049 tr->next->prev = tr;
2050 return tr->next;
2054 static char itemreset[20]="\\fR\\s0";
2056 #define FORWARDCUR do { curfield++; } while (currow->has(curfield) && currow->at(curfield).align=='S');
2058 static char *scan_table(char *c)
2060 char *h;
2061 char *g;
2062 int center=0, expand=0, box=0, border=0, linesize=1;
2063 int i,j,maxcol=0, finished=0;
2064 QByteArray oldfont;
2065 int oldsize,oldfillout;
2066 char itemsep='\t';
2067 TABLEROW *layout=NULL, *currow;
2068 int curfield = -1;
2069 while (*c++!='\n');
2070 h=c;
2071 if (*h=='.') return c-1;
2072 oldfont=current_font;
2073 oldsize=current_size;
2074 oldfillout=fillout;
2075 out_html(set_font("R"));
2076 out_html(change_to_size(0));
2077 if (!fillout) {
2078 fillout=1;
2079 out_html("</PRE>");
2081 while (*h && *h!='\n') h++;
2082 if (h[-1]==';') {
2083 /* scan table options */
2084 while (c<h) {
2085 while (isspace(*c)) c++;
2086 for (i=0; tableopt[i] && qstrncmp(tableopt[i],c,tableoptl[i]);i++);
2087 c=c+tableoptl[i];
2088 switch (i) {
2089 case 0: center=1; break;
2090 case 1: expand=1; break;
2091 case 2: box=1; break;
2092 case 3: border=1; break;
2093 case 4: box=2; break;
2094 case 5: while (*c++!='('); itemsep=*c++; break;
2095 case 6: while (*c++!='('); linesize=0;
2096 while (isdigit(*c)) linesize=linesize*10+(*c++)-'0';
2097 break;
2098 case 7: while (*c!=')') c++;
2099 default: break;
2101 c++;
2103 c=h+1;
2105 /* scan layout */
2106 c=scan_format(c,&layout, &maxcol);
2107 // currow=layout;
2108 currow=next_row(layout);
2109 curfield=0;
2110 i=0;
2111 while (!finished && *c) {
2112 /* search item */
2113 h=c;
2114 if ((*c=='_' || *c=='=') && (c[1]==itemsep || c[1]=='\n')) {
2115 if (c[-1]=='\n' && c[1]=='\n') {
2116 if (currow->prev) {
2117 currow->prev->next=new TABLEROW();
2118 currow->prev->next->next=currow;
2119 currow->prev->next->prev=currow->prev;
2120 currow->prev=currow->prev->next;
2121 } else {
2122 currow->prev=layout=new TABLEROW();
2123 currow->prev->prev=NULL;
2124 currow->prev->next=currow;
2126 TABLEITEM *newitem = new TABLEITEM(currow->prev);
2127 newitem->align=*c;
2128 newitem->colspan=maxcol;
2129 curfield=0;
2130 c=c+2;
2131 } else {
2132 if (currow->has(curfield)) {
2133 currow->at(curfield).align=*c;
2134 FORWARDCUR;
2136 if (c[1]=='\n') {
2137 currow=next_row(currow);
2138 curfield=0;
2140 c=c+2;
2142 } else if (*c=='T' && c[1]=='{') {
2143 h=c+2;
2144 c=strstr(h,"\nT}");
2145 c++;
2146 *c='\0';
2147 g=NULL;
2148 scan_troff(h,0,&g);
2149 scan_troff(itemreset, 0, &g);
2150 *c='T';
2151 c+=3;
2152 if (currow->has(curfield)) {
2153 currow->at(curfield).setContents(g);
2154 FORWARDCUR;
2156 delete [] g;
2158 if (c[-1]=='\n') {
2159 currow=next_row(currow);
2160 curfield=0;
2162 } else if (*c=='.' && c[1]=='T' && c[2]=='&' && c[-1]=='\n') {
2163 TABLEROW *hr;
2164 while (*c++!='\n');
2165 hr=currow;
2166 currow=currow->prev;
2167 hr->prev=NULL;
2168 c=scan_format(c,&hr, &i);
2169 hr->prev=currow;
2170 currow->next=hr;
2171 currow=hr;
2172 next_row(currow);
2173 curfield=0;
2174 } else if (*c=='.' && c[1]=='T' && c[2]=='E' && c[-1]=='\n') {
2175 finished=1;
2176 while (*c++!='\n');
2177 if (currow->prev)
2178 currow->prev->next=NULL;
2179 currow->prev=NULL;
2180 clear_table(currow);
2181 currow = 0;
2182 } else if (*c=='.' && c[-1]=='\n' && !isdigit(c[1])) {
2183 /* skip troff request inside table (usually only .sp ) */
2184 while (*c++!='\n');
2185 } else {
2186 h=c;
2187 while (*c && (*c!=itemsep || c[-1]=='\\') &&
2188 (*c!='\n' || c[-1]=='\\')) c++;
2189 i=0;
2190 if (*c==itemsep) {i=1; *c='\n'; }
2191 if (h[0]=='\\' && h[2]=='\n' &&
2192 (h[1]=='_' || h[1]=='^')) {
2193 if (currow->has(curfield)) {
2194 currow->at(curfield).align=h[1];
2195 FORWARDCUR;
2197 h=h+3;
2198 } else {
2199 g=NULL;
2200 h=scan_troff(h,1,&g);
2201 scan_troff(itemreset,0, &g);
2202 if (currow->has(curfield)) {
2203 currow->at(curfield).setContents(g);
2204 FORWARDCUR;
2206 delete [] g;
2208 if (i) *c=itemsep;
2209 c=h;
2210 if (c[-1]=='\n') {
2211 currow=next_row(currow);
2212 curfield=0;
2216 /* calculate colspan and rowspan */
2217 currow=layout;
2218 while (currow->next) currow=currow->next;
2219 while (currow) {
2220 int ti = 0, ti1 = 0, ti2 = -1;
2221 TABLEROW *prev = currow->prev;
2222 if (!prev)
2223 break;
2225 while (prev->has(ti1)) {
2226 if (currow->has(ti))
2227 switch (currow->at(ti).align) {
2228 case 'S':
2229 if (currow->has(ti2)) {
2230 currow->at(ti2).colspan++;
2231 if (currow->at(ti2).rowspan<prev->at(ti1).rowspan)
2232 currow->at(ti2).rowspan=prev->at(ti1).rowspan;
2234 break;
2235 case '^':
2236 if (prev->has(ti1)) prev->at(ti1).rowspan++;
2237 default:
2238 if (ti2 < 0) ti2=ti;
2239 else {
2240 do {
2241 ti2++;
2242 } while (currow->has(ti2) && currow->at(ti2).align=='S');
2244 break;
2246 ti++;
2247 if (ti1 >= 0) ti1++;
2249 currow=currow->prev;
2251 /* produce html output */
2252 if (center) out_html("<CENTER>");
2253 if (box==2) out_html("<TABLE BORDER><TR><TD>");
2254 out_html("<TABLE");
2255 if (box || border) {
2256 out_html(" BORDER");
2257 if (!border) out_html("><TR><TD><TABLE");
2258 if (expand) out_html(" WIDTH=\"100%\"");
2260 out_html(">\n");
2261 currow=layout;
2262 while (currow) {
2263 j=0;
2264 out_html("<TR VALIGN=top>");
2265 curfield=0;
2266 while (currow->has(curfield)) {
2267 if (currow->at(curfield).align!='S' && currow->at(curfield).align!='^') {
2268 out_html("<TD");
2269 switch (currow->at(curfield).align) {
2270 case 'N':
2271 currow->at(curfield).space+=4;
2272 case 'R':
2273 out_html(" ALIGN=right");
2274 break;
2275 case 'C':
2276 out_html(" ALIGN=center");
2277 default:
2278 break;
2280 if (!currow->at(curfield).valign && currow->at(curfield).rowspan>1)
2281 out_html(" VALIGN=center");
2282 if (currow->at(curfield).colspan>1) {
2283 char buf[5];
2284 out_html(" COLSPAN=");
2285 sprintf(buf, "%i", currow->at(curfield).colspan);
2286 out_html(buf);
2288 if (currow->at(curfield).rowspan>1) {
2289 char buf[5];
2290 out_html(" ROWSPAN=");
2291 sprintf(buf, "%i", currow->at(curfield).rowspan);
2292 out_html(buf);
2294 j=j+currow->at(curfield).colspan;
2295 out_html(">");
2296 if (currow->at(curfield).size) out_html(change_to_size(currow->at(curfield).size));
2297 if (currow->at(curfield).font)
2298 out_html(set_font(QByteArray::number(currow->at(curfield).font) ));
2299 switch (currow->at(curfield).align) {
2300 case '=': out_html("<HR><HR>"); break;
2301 case '_': out_html("<HR>"); break;
2302 default:
2303 out_html(currow->at(curfield).getContents());
2304 break;
2306 if (currow->at(curfield).space)
2307 for (i=0; i<currow->at(curfield).space;i++) out_html("&nbsp;");
2308 if (currow->at(curfield).font) out_html(set_font("R"));
2309 if (currow->at(curfield).size) out_html(change_to_size(0));
2310 if (j>=maxcol && currow->at(curfield).align>'@' && currow->at(curfield).align!='_')
2311 out_html("<BR>");
2312 out_html("</TD>");
2314 curfield++;
2316 out_html("</TR>\n");
2317 currow=currow->next;
2320 clear_table(layout);
2322 if (box && !border) out_html("</TABLE>");
2323 out_html("</TABLE>");
2324 if (box==2) out_html("</TABLE>");
2325 if (center) out_html("</CENTER>\n");
2326 else out_html("\n");
2327 if (!oldfillout) out_html("<PRE>");
2328 fillout=oldfillout;
2329 out_html(change_to_size(oldsize));
2330 out_html(set_font(oldfont));
2331 return c;
2334 static char *scan_expression( char *c, int *result, const unsigned int numLoop )
2336 int value=0,value2,sign=1,opex=0;
2337 char oper='c';
2339 if (*c=='!') {
2340 c=scan_expression(c+1, &value);
2341 value= (!value);
2342 } else if (*c=='n') {
2343 c++;
2344 value=s_nroff;
2345 } else if (*c=='t') {
2346 c++;
2347 value=1-s_nroff;
2348 } else if (*c=='\'' || *c=='"' || *c<' ' || (*c=='\\' && c[1]=='(')) {
2349 /* ?string1?string2?
2350 ** test if string1 equals string2.
2352 char *st1=NULL, *st2=NULL, *h;
2353 char *tcmp=NULL;
2354 char sep;
2355 sep=*c;
2356 if (sep=='\\') {
2357 tcmp=c;
2358 c=c+3;
2360 c++;
2361 h=c;
2362 while (*c!= sep && (!tcmp || qstrncmp(c,tcmp,4))) c++;
2363 *c='\n';
2364 scan_troff(h, 1, &st1);
2365 *c=sep;
2366 if (tcmp) c=c+3;
2367 c++;
2368 h=c;
2369 while (*c!=sep && (!tcmp || qstrncmp(c,tcmp,4))) c++;
2370 *c='\n';
2371 scan_troff(h,1,&st2);
2372 *c=sep;
2373 if (!st1 && !st2) value=1;
2374 else if (!st1 || !st2) value=0;
2375 else value=(!qstrcmp(st1, st2));
2376 delete [] st1;
2377 delete [] st2;
2378 if (tcmp) c=c+3;
2379 c++;
2380 } else {
2381 while (*c && ( !isspace(*c) || ( numLoop > 0 ) ) && *c!=')' && opex >= 0) {
2382 opex=0;
2383 switch (*c) {
2384 case '(':
2385 c = scan_expression( c + 1, &value2, numLoop + 1 );
2386 value2=sign*value2;
2387 opex=1;
2388 break;
2389 case '.':
2390 case '0': case '1':
2391 case '2': case '3':
2392 case '4': case '5':
2393 case '6': case '7':
2394 case '8': case '9': {
2395 int num=0,denum=1;
2396 value2=0;
2397 while (isdigit(*c)) value2=value2*10+((*c++)-'0');
2398 if (*c=='.' && isdigit(c[1])) {
2399 c++;
2400 while (isdigit(*c)) {
2401 num=num*10+((*c++)-'0');
2402 denum=denum*10;
2405 if (isalpha(*c)) {
2406 /* scale indicator */
2407 switch (*c) {
2408 case 'i': /* inch -> 10pt */
2409 value2=value2*10+(num*10+denum/2)/denum;
2410 num=0;
2411 break;
2412 default:
2413 break;
2415 c++;
2417 value2=value2+(num+denum/2)/denum;
2418 value2=sign*value2;
2419 opex=1;
2420 if (*c=='.')
2421 opex = -1;
2424 break;
2425 case '\\':
2426 c=scan_escape(c+1);
2427 value2=intresult*sign;
2428 if (isalpha(*c)) c++; /* scale indicator */
2429 opex=1;
2430 break;
2431 case '-':
2432 if (oper) { sign=-1; c++; break; }
2433 case '>':
2434 case '<':
2435 case '+':
2436 case '/':
2437 case '*':
2438 case '%':
2439 case '&':
2440 case '=':
2441 case ':':
2442 if (c[1]=='=') oper=(*c++) +16; else oper=*c;
2443 c++;
2444 break;
2445 default: c++; break;
2447 if (opex > 0) {
2448 sign=1;
2449 switch (oper) {
2450 case 'c': value=value2; break;
2451 case '-': value=value-value2; break;
2452 case '+': value=value+value2; break;
2453 case '*': value=value*value2; break;
2454 case '/': if (value2) value=value/value2; break;
2455 case '%': if (value2) value=value%value2; break;
2456 case '<': value=(value<value2); break;
2457 case '>': value=(value>value2); break;
2458 case '>'+16: value=(value>=value2); break;
2459 case '<'+16: value=(value<=value2); break;
2460 case '=': case '='+16: value=(value==value2); break;
2461 case '&': value = (value && value2); break;
2462 case ':': value = (value || value2); break;
2463 default:
2465 kDebug(7107) << "Unknown operator " << char(oper);
2468 oper=0;
2471 if (*c==')') c++;
2473 *result=value;
2474 return c;
2477 static char *scan_expression(char *c, int *result)
2479 return scan_expression( c, result, 0 );
2482 static void trans_char(char *c, char s, char t)
2484 char *sl=c;
2485 int slash=0;
2486 while (*sl!='\n' || slash) {
2487 if (!slash) {
2488 if (*sl==escapesym)
2489 slash=1;
2490 else if (*sl==s)
2491 *sl=t;
2492 } else slash=0;
2493 sl++;
2497 // 2004-10-19, patched by Waldo Bastian <bastian@kde.org>:
2498 // Fix handling of lines like:
2499 // .TH FIND 1L \" -*- nroff -*-
2500 // Where \" indicates the start of comment.
2502 // The problem is the \" handling in fill_words(), the return value
2503 // indicates the end of the word as well as the end of the line, which makes it
2504 // basically impossible to express that the end of the last word is not the end of
2505 // the line.
2507 // I have corrected that by adding an extra parameter 'next_line' that returns a
2508 // pointer to the next line, while the function itself returns a pointer to the end
2509 // of the last word.
2510 static char *fill_words(char *c, char *words[], int *n, bool newline, char **next_line)
2512 char *sl=c;
2513 int slash=0;
2514 int skipspace=0;
2515 *n=0;
2516 words[*n]=sl;
2517 while (*sl && (*sl!='\n' || slash)) {
2518 if (!slash) {
2519 if (*sl=='"') {
2520 if (skipspace && (*(sl+1)=='"'))
2521 *sl++ = '\a';
2522 else {
2523 *sl='\a';
2524 skipspace=!skipspace;
2526 } else if (*sl==escapesym) {
2527 slash=1;
2528 if (sl[1]=='\n')
2529 *sl='\a';
2530 } else if ((*sl==' ' || *sl=='\t') && !skipspace) {
2531 if (newline) *sl='\n';
2532 if (words[*n]!=sl) (*n)++;
2533 words[*n]=sl+1;
2535 } else {
2536 if (*sl=='"') {
2537 sl--;
2538 if (newline) *sl='\n';
2539 if (words[*n]!=sl) (*n)++;
2540 if (next_line)
2542 char *eow = sl;
2543 sl++;
2544 while (*sl && *sl !='\n') sl++;
2545 *next_line = sl;
2546 return eow;
2548 return sl;
2550 slash=0;
2552 sl++;
2554 if (sl!=words[*n]) (*n)++;
2555 if (next_line) *next_line = sl+1;
2556 return sl;
2559 static const char *abbrev_list[] = {
2560 "GSBG", "Getting Started ",
2561 "SUBG", "Customizing SunOS",
2562 "SHBG", "Basic Troubleshooting",
2563 "SVBG", "SunView User's Guide",
2564 "MMBG", "Mail and Messages",
2565 "DMBG", "Doing More with SunOS",
2566 "UNBG", "Using the Network",
2567 "GDBG", "Games, Demos &amp; Other Pursuits",
2568 "CHANGE", "SunOS 4.1 Release Manual",
2569 "INSTALL", "Installing SunOS 4.1",
2570 "ADMIN", "System and Network Administration",
2571 "SECUR", "Security Features Guide",
2572 "PROM", "PROM User's Manual",
2573 "DIAG", "Sun System Diagnostics",
2574 "SUNDIAG", "Sundiag User's Guide",
2575 "MANPAGES", "SunOS Reference Manual",
2576 "REFMAN", "SunOS Reference Manual",
2577 "SSI", "Sun System Introduction",
2578 "SSO", "System Services Overview",
2579 "TEXT", "Editing Text Files",
2580 "DOCS", "Formatting Documents",
2581 "TROFF", "Using <B>nroff</B> and <B>troff</B>",
2582 "INDEX", "Global Index",
2583 "CPG", "C Programmer's Guide",
2584 "CREF", "C Reference Manual",
2585 "ASSY", "Assembly Language Reference",
2586 "PUL", "Programming Utilities and Libraries",
2587 "DEBUG", "Debugging Tools",
2588 "NETP", "Network Programming",
2589 "DRIVER", "Writing Device Drivers",
2590 "STREAMS", "STREAMS Programming",
2591 "SBDK", "SBus Developer's Kit",
2592 "WDDS", "Writing Device Drivers for the SBus",
2593 "FPOINT", "Floating-Point Programmer's Guide",
2594 "SVPG", "SunView 1 Programmer's Guide",
2595 "SVSPG", "SunView 1 System Programmer's Guide",
2596 "PIXRCT", "Pixrect Reference Manual",
2597 "CGI", "SunCGI Reference Manual",
2598 "CORE", "SunCore Reference Manual",
2599 "4ASSY", "Sun-4 Assembly Language Reference",
2600 "SARCH", "<FONT SIZE=\"-1\">SPARC</FONT> Architecture Manual",
2601 "KR", "The C Programming Language",
2602 NULL, NULL };
2604 static const char *lookup_abbrev(char *c)
2606 int i=0;
2608 if (!c) return "";
2609 while (abbrev_list[i] && qstrcmp(c,abbrev_list[i])) i=i+2;
2610 if (abbrev_list[i]) return abbrev_list[i+1];
2611 else return c;
2614 static const char *section_list[] = {
2615 #ifdef Q_OS_SOLARIS
2616 // for Solaris
2617 "1", "User Commands",
2618 "1B", "SunOS/BSD Compatibility Package Commands",
2619 "1b", "SunOS/BSD Compatibility Package Commands",
2620 "1C", "Communication Commands ",
2621 "1c", "Communication Commands",
2622 "1F", "FMLI Commands ",
2623 "1f", "FMLI Commands",
2624 "1G", "Graphics and CAD Commands ",
2625 "1g", "Graphics and CAD Commands ",
2626 "1M", "Maintenance Commands",
2627 "1m", "Maintenance Commands",
2628 "1S", "SunOS Specific Commands",
2629 "1s", "SunOS Specific Commands",
2630 "2", "System Calls",
2631 "3", "C Library Functions",
2632 "3B", "SunOS/BSD Compatibility Library Functions",
2633 "3b", "SunOS/BSD Compatibility Library Functions",
2634 "3C", "C Library Functions",
2635 "3c", "C Library Functions",
2636 "3E", "C Library Functions",
2637 "3e", "C Library Functions",
2638 "3F", "Fortran Library Routines",
2639 "3f", "Fortran Library Routines",
2640 "3G", "C Library Functions",
2641 "3g", "C Library Functions",
2642 "3I", "Wide Character Functions",
2643 "3i", "Wide Character Functions",
2644 "3K", "Kernel VM Library Functions",
2645 "3k", "Kernel VM Library Functions",
2646 "3L", "Lightweight Processes Library",
2647 "3l", "Lightweight Processes Library",
2648 "3M", "Mathematical Library",
2649 "3m", "Mathematical Library",
2650 "3N", "Network Functions",
2651 "3n", "Network Functions",
2652 "3R", "Realtime Library",
2653 "3r", "Realtime Library",
2654 "3S", "Standard I/O Functions",
2655 "3s", "Standard I/O Functions",
2656 "3T", "Threads Library",
2657 "3t", "Threads Library",
2658 "3W", "C Library Functions",
2659 "3w", "C Library Functions",
2660 "3X", "Miscellaneous Library Functions",
2661 "3x", "Miscellaneous Library Functions",
2662 "4", "File Formats",
2663 "4B", "SunOS/BSD Compatibility Package File Formats",
2664 "4b", "SunOS/BSD Compatibility Package File Formats",
2665 "5", "Headers, Tables, and Macros",
2666 "6", "Games and Demos",
2667 "7", "Special Files",
2668 "7B", "SunOS/BSD Compatibility Special Files",
2669 "7b", "SunOS/BSD Compatibility Special Files",
2670 "8", "Maintenance Procedures",
2671 "8C", "Maintenance Procedures",
2672 "8c", "Maintenance Procedures",
2673 "8S", "Maintenance Procedures",
2674 "8s", "Maintenance Procedures",
2675 "9", "DDI and DKI",
2676 "9E", "DDI and DKI Driver Entry Points",
2677 "9e", "DDI and DKI Driver Entry Points",
2678 "9F", "DDI and DKI Kernel Functions",
2679 "9f", "DDI and DKI Kernel Functions",
2680 "9S", "DDI and DKI Data Structures",
2681 "9s", "DDI and DKI Data Structures",
2682 "L", "Local Commands",
2683 #elif defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
2684 "1", "General Commands",
2685 "2", "System Calls",
2686 "3", "Library Functions",
2687 "4", "Kernel Interfaces",
2688 "5", "File Formats",
2689 "6", "Games",
2690 "7", "Miscellaneous Information",
2691 "8", "System Manager's Manuals",
2692 "9", "Kernel Developer's Manuals",
2693 #else
2694 // Other OS
2695 "1", "User Commands ",
2696 "1C", "User Commands",
2697 "1G", "User Commands",
2698 "1S", "User Commands",
2699 "1V", "User Commands ",
2700 "2", "System Calls",
2701 "2V", "System Calls",
2702 "3", "C Library Functions",
2703 "3C", "Compatibility Functions",
2704 "3F", "Fortran Library Routines",
2705 "3K", "Kernel VM Library Functions",
2706 "3L", "Lightweight Processes Library",
2707 "3M", "Mathematical Library",
2708 "3N", "Network Functions",
2709 "3R", "RPC Services Library",
2710 "3S", "Standard I/O Functions",
2711 "3V", "C Library Functions",
2712 "3X", "Miscellaneous Library Functions",
2713 "4", "Devices and Network Interfaces",
2714 "4F", "Protocol Families",
2715 "4I", "Devices and Network Interfaces",
2716 "4M", "Devices and Network Interfaces",
2717 "4N", "Devices and Network Interfaces",
2718 "4P", "Protocols",
2719 "4S", "Devices and Network Interfaces",
2720 "4V", "Devices and Network Interfaces",
2721 "5", "File Formats",
2722 "5V", "File Formats",
2723 "6", "Games and Demos",
2724 "7", "Environments, Tables, and Troff Macros",
2725 "7V", "Environments, Tables, and Troff Macros",
2726 "8", "Maintenance Commands",
2727 "8C", "Maintenance Commands",
2728 "8S", "Maintenance Commands",
2729 "8V", "Maintenance Commands",
2730 "L", "Local Commands",
2731 #endif
2732 // The defaults
2733 NULL, "Misc. Reference Manual Pages",
2734 NULL, NULL
2737 static const char *section_name(char *c)
2739 int i=0;
2741 if (!c) return "";
2742 while (section_list[i] && qstrcmp(c,section_list[i])) i=i+2;
2743 if (section_list[i+1]) return section_list[i+1];
2744 else return c;
2747 static char *skip_till_newline(char *c)
2749 int lvl=0;
2751 while (*c && (*c!='\n' || lvl>0)) {
2752 if (*c=='\\') {
2753 c++;
2754 if (*c=='}')
2755 lvl--;
2756 else if (*c=='{')
2757 lvl++;
2758 else if (*c=='\0')
2759 break;
2761 c++;
2763 if (*c) c++;
2764 if (lvl<0 && newline_for_fun) {
2765 newline_for_fun = newline_for_fun+lvl;
2766 if (newline_for_fun<0) newline_for_fun=0;
2768 return c;
2771 static bool s_whileloop = false;
2773 /// Processing the .while request
2774 static void request_while( char*& c, int j, bool mdoc )
2776 // ### TODO: .break and .continue
2777 kDebug(7107) << "Entering .while";
2778 c += j;
2779 char* newline = skip_till_newline( c );
2780 const char oldchar = *newline;
2781 *newline = 0;
2782 // We store the full .while stuff into a QCString as if it would be a macro
2783 const QByteArray macro = c ;
2784 kDebug(7107) << "'Macro' of .while"<< BYTEARRAY( macro );
2785 // Prepare for continuing after .while loop end
2786 *newline = oldchar;
2787 c = newline;
2788 // Process -while loop
2789 const bool oldwhileloop = s_whileloop;
2790 s_whileloop = true;
2791 int result = true; // It must be an int due to the call to scan_expression
2792 while ( result )
2794 // Unlike for a normal macro, we have the condition at start, so we do not need to prepend extra bytes
2795 char* liveloop = qstrdup( macro.data() );
2796 kDebug(7107) << "Scanning .while condition";
2797 kDebug(7101) << "Loop macro " << liveloop;
2798 char* end_expression = scan_expression( liveloop, &result );
2799 kDebug(7101) << "After " << end_expression;
2800 if ( result )
2802 kDebug(7107) << "New .while iteration";
2803 // The condition is true, so call the .while's content
2804 char* help = end_expression + 1;
2805 while ( *help && ( *help == ' ' || *help == '\t' ) )
2806 ++help;
2807 if ( ! *help )
2809 // We have a problem, so stop .while
2810 result = false;
2811 break;
2813 if ( mdoc )
2814 scan_troff_mandoc( help, false, 0 );
2815 else
2816 scan_troff( help, false, 0 );
2818 delete[] liveloop;
2822 s_whileloop = oldwhileloop;
2823 kDebug(7107) << "Ending .while";
2826 const int max_wordlist = 100;
2828 /// Processing mixed fonts reqiests like .BI
2829 static void request_mixed_fonts( char*& c, int j, const char* font1, const char* font2, const bool mode, const bool inFMode )
2831 c += j;
2832 if (*c=='\n') c++;
2833 int words;
2834 char *wordlist[max_wordlist];
2835 fill_words(c, wordlist, &words, true, &c);
2836 for (int i=0; i<words; i++)
2838 if ((mode) || (inFMode))
2840 out_html(" ");
2841 curpos++;
2843 wordlist[i][-1]=' ';
2844 out_html( set_font( (i&1) ? font2 : font1 ) );
2845 scan_troff(wordlist[i],1,NULL);
2847 out_html(set_font("R"));
2848 if (mode)
2850 out_html(" ]");
2851 curpos++;
2853 out_html(NEWLINE);
2854 if (!fillout)
2855 curpos=0;
2856 else
2857 curpos++;
2860 // Some known missing requests from man(7):
2861 // - see "safe subset": .tr
2863 // Some known missing requests from mdoc(7):
2864 // - start or end of quotings
2866 // Some of the requests are from mdoc.
2867 // On Linux see the man pages mdoc(7), mdoc.samples(7) and groff_mdoc(7)
2868 // See also the online man pages of FreeBSD: mdoc(7)
2870 #define REQ_UNKNOWN -1
2871 #define REQ_ab 0
2872 #define REQ_di 1
2873 #define REQ_ds 2
2874 #define REQ_as 3
2875 #define REQ_br 4
2876 #define REQ_c2 5
2877 #define REQ_cc 6
2878 #define REQ_ce 7
2879 #define REQ_ec 8
2880 #define REQ_eo 9
2881 #define REQ_ex 10
2882 #define REQ_fc 11
2883 #define REQ_fi 12
2884 #define REQ_ft 13 // groff(7) "FonT"
2885 #define REQ_el 14
2886 #define REQ_ie 15
2887 #define REQ_if 16
2888 #define REQ_ig 17
2889 #define REQ_nf 18
2890 #define REQ_ps 19
2891 #define REQ_sp 20
2892 #define REQ_so 21
2893 #define REQ_ta 22
2894 #define REQ_ti 23
2895 #define REQ_tm 24
2896 #define REQ_B 25
2897 #define REQ_I 26
2898 #define REQ_Fd 27
2899 #define REQ_Fn 28
2900 #define REQ_Fo 29
2901 #define REQ_Fc 30
2902 #define REQ_OP 31
2903 #define REQ_Ft 32
2904 #define REQ_Fa 33
2905 #define REQ_BR 34
2906 #define REQ_BI 35
2907 #define REQ_IB 36
2908 #define REQ_IR 37
2909 #define REQ_RB 38
2910 #define REQ_RI 39
2911 #define REQ_DT 40
2912 #define REQ_IP 41 // man(7) "Indent Paragraph"
2913 #define REQ_TP 42
2914 #define REQ_IX 43
2915 #define REQ_P 44
2916 #define REQ_LP 45
2917 #define REQ_PP 46
2918 #define REQ_HP 47
2919 #define REQ_PD 48
2920 #define REQ_Rs 49
2921 #define REQ_RS 50
2922 #define REQ_Re 51
2923 #define REQ_RE 52
2924 #define REQ_SB 53
2925 #define REQ_SM 54
2926 #define REQ_Ss 55
2927 #define REQ_SS 56
2928 #define REQ_Sh 57
2929 #define REQ_SH 58 // man(7) "Sub Header"
2930 #define REQ_Sx 59
2931 #define REQ_TS 60
2932 #define REQ_Dt 61
2933 #define REQ_TH 62
2934 #define REQ_TX 63
2935 #define REQ_rm 64
2936 #define REQ_rn 65
2937 #define REQ_nx 66
2938 #define REQ_in 67
2939 #define REQ_nr 68 // groff(7) "Number Register"
2940 #define REQ_am 69
2941 #define REQ_de 70
2942 #define REQ_Bl 71 // mdoc(7) "Begin List"
2943 #define REQ_El 72 // mdoc(7) "End List"
2944 #define REQ_It 73 // mdoc(7) "ITem"
2945 #define REQ_Bk 74
2946 #define REQ_Ek 75
2947 #define REQ_Dd 76
2948 #define REQ_Os 77 // mdoc(7)
2949 #define REQ_Bt 78
2950 #define REQ_At 79 // mdoc(7) "AT&t" (not parsable, not callable)
2951 #define REQ_Fx 80 // mdoc(7) "Freebsd" (not parsable, not callable)
2952 #define REQ_Nx 81
2953 #define REQ_Ox 82
2954 #define REQ_Bx 83 // mdoc(7) "Bsd"
2955 #define REQ_Ux 84 // mdoc(7) "UniX"
2956 #define REQ_Dl 85
2957 #define REQ_Bd 86
2958 #define REQ_Ed 87
2959 #define REQ_Be 88
2960 #define REQ_Xr 89 // mdoc(7) "eXternal Reference"
2961 #define REQ_Fl 90 // mdoc(7) "FLag"
2962 #define REQ_Pa 91
2963 #define REQ_Pf 92
2964 #define REQ_Pp 93
2965 #define REQ_Dq 94 // mdoc(7) "Double Quote"
2966 #define REQ_Op 95
2967 #define REQ_Oo 96
2968 #define REQ_Oc 97
2969 #define REQ_Pq 98 // mdoc(7) "Parenthese Quote"
2970 #define REQ_Ql 99
2971 #define REQ_Sq 100 // mdoc(7) "Single Quote"
2972 #define REQ_Ar 101
2973 #define REQ_Ad 102
2974 #define REQ_Em 103 // mdoc(7) "EMphasis"
2975 #define REQ_Va 104
2976 #define REQ_Xc 105
2977 #define REQ_Nd 106
2978 #define REQ_Nm 107
2979 #define REQ_Cd 108
2980 #define REQ_Cm 109
2981 #define REQ_Ic 110
2982 #define REQ_Ms 111
2983 #define REQ_Or 112
2984 #define REQ_Sy 113
2985 #define REQ_Dv 114
2986 #define REQ_Ev 115
2987 #define REQ_Fr 116
2988 #define REQ_Li 117
2989 #define REQ_No 118
2990 #define REQ_Ns 119
2991 #define REQ_Tn 120
2992 #define REQ_nN 121
2993 #define REQ_perc_A 122
2994 #define REQ_perc_D 123
2995 #define REQ_perc_N 124
2996 #define REQ_perc_O 125
2997 #define REQ_perc_P 126
2998 #define REQ_perc_Q 127
2999 #define REQ_perc_V 128
3000 #define REQ_perc_B 129
3001 #define REQ_perc_J 130
3002 #define REQ_perc_R 131
3003 #define REQ_perc_T 132
3004 #define REQ_An 133 // mdoc(7) "Author Name"
3005 #define REQ_Aq 134 // mdoc(7) "Angle bracket Quote"
3006 #define REQ_Bq 135 // mdoc(7) "Bracket Quote"
3007 #define REQ_Qq 136 // mdoc(7) "straight double Quote"
3008 #define REQ_UR 137 // man(7) "URl"
3009 #define REQ_UE 138 // man(7) "Url End"
3010 #define REQ_UN 139 // man(7) "Url Name" (a.k.a. anchors)
3011 #define REQ_troff 140 // groff(7) "TROFF mode"
3012 #define REQ_nroff 141 // groff(7) "NROFF mode"
3013 #define REQ_als 142 // groff(7) "ALias String"
3014 #define REQ_rr 143 // groff(7) "Remove number Register"
3015 #define REQ_rnn 144 // groff(7) "ReName Number register"
3016 #define REQ_aln 145 // groff(7) "ALias Number register"
3017 #define REQ_shift 146 // groff(7) "SHIFT parameter"
3018 #define REQ_while 147 // groff(7) "WHILE loop"
3019 #define REQ_do 148 // groff(7) "DO command"
3020 #define REQ_Dx 149 // mdoc(7) "DragonFly" macro
3022 static int get_request(char *req, int len)
3024 static const char *requests[] = {
3025 "ab", "di", "ds", "as", "br", "c2", "cc", "ce", "ec", "eo", "ex", "fc",
3026 "fi", "ft", "el", "ie", "if", "ig", "nf", "ps", "sp", "so", "ta", "ti",
3027 "tm", "B", "I", "Fd", "Fn", "Fo", "Fc", "OP", "Ft", "Fa", "BR", "BI",
3028 "IB", "IR", "RB", "RI", "DT", "IP", "TP", "IX", "P", "LP", "PP", "HP",
3029 "PD", "Rs", "RS", "Re", "RE", "SB", "SM", "Ss", "SS", "Sh", "SH", "Sx",
3030 "TS", "Dt", "TH", "TX", "rm", "rn", "nx", "in", "nr", "am", "de", "Bl",
3031 "El", "It", "Bk", "Ek", "Dd", "Os", "Bt", "At", "Fx", "Nx", "Ox", "Bx",
3032 "Ux", "Dl", "Bd", "Ed", "Be", "Xr", "Fl", "Pa", "Pf", "Pp", "Dq", "Op",
3033 "Oo", "Oc", "Pq", "Ql", "Sq", "Ar", "Ad", "Em", "Va", "Xc", "Nd", "Nm",
3034 "Cd", "Cm", "Ic", "Ms", "Or", "Sy", "Dv", "Ev", "Fr", "Li", "No", "Ns",
3035 "Tn", "nN", "%A", "%D", "%N", "%O", "%P", "%Q", "%V", "%B", "%J", "%R",
3036 "%T", "An", "Aq", "Bq", "Qq", "UR", "UE", "UN", "troff", "nroff", "als",
3037 "rr", "rnn", "aln", "shift", "while", "do", "Dx", 0 };
3038 int r = 0;
3039 while (requests[r] && qstrncmp(req, requests[r], len)) r++;
3040 return requests[r] ? r : REQ_UNKNOWN;
3043 // &%(#@ c programs !!!
3044 //static int ifelseval=0;
3045 // If/else can be nested!
3046 static QStack<int> s_ifelseval;
3048 // Process a (mdoc) request involving quotes
3049 static char* process_quote(char* c, int j, const char* open, const char* close)
3051 trans_char(c,'"','\a');
3052 c+=j;
3053 if (*c=='\n') c++; // ### TODO: why? Quote requests cannot be empty!
3054 out_html(open);
3055 c=scan_troff_mandoc(c,1,0);
3056 out_html(close);
3057 out_html(NEWLINE);
3058 if (fillout)
3059 curpos++;
3060 else
3061 curpos=0;
3062 return c;
3066 * Is the char \p ch a puntuaction in sence of mdoc(7)
3068 static bool is_mdoc_punctuation( const char ch )
3070 if ( ( ch >= '0' && ch <= '9' ) || ( ch >='A' && ch <='Z' ) || ( ch >= 'a' && ch <= 'z' ) )
3071 return false;
3072 else if ( ch == '.' || ch == ',' || ch == ';' || ch == ':' || ch == '(' || ch == ')'
3073 || ch == '[' || ch == ']' )
3074 return true;
3075 else
3076 return false;
3080 * Can the char \p c be part of an identifier
3081 * \note For groff, an identifier can consist of nearly all ASCII printable non-white-space characters
3082 * See info:/groff/Identifiers
3084 static bool is_identifier_char( const char c )
3086 if ( c >= '!' && c <= '[' ) // Include digits and upper case
3087 return true;
3088 else if ( c >= ']' && c <= '~' ) // Include lower case
3089 return true;
3090 else if ( c== '\\' )
3091 return false; // ### TODO: it should be treated as escape instead!
3092 return false;
3095 static QByteArray scan_identifier( char*& c )
3097 char* h = c; // help pointer
3098 // ### TODO Groff seems to eat nearly everything as identifier name (info:/groff/Identifiers)
3099 while ( *h && *h != '\a' && *h != '\n' && is_identifier_char( *h ) )
3100 ++h;
3101 const char tempchar = *h;
3102 *h = 0;
3103 const QByteArray name = c;
3104 *h = tempchar;
3105 if ( name.isEmpty() )
3107 kDebug(7107) << "EXCEPTION: identifier empty!";
3109 c = h;
3110 return name;
3113 static char *scan_request(char *c)
3115 // mdoc(7) stuff
3116 static bool mandoc_synopsis=false; /* True if we are in the synopsis section */
3117 static bool mandoc_command=false; /* True if this is mdoc(7) page */
3118 static int mandoc_bd_options; /* Only copes with non-nested Bd's */
3119 static int function_argument=0; // Number of function argument (.Fo, .Fa, .Fc)
3120 // man(7) stuff
3121 static bool ur_ignore=false; // Has .UR a parameter : (for .UE to know if or not to write </a>)
3123 int i=0;
3124 bool mode=false;
3125 char *h=0;
3126 char *wordlist[max_wordlist];
3127 int words;
3128 char *sl;
3129 while (*c==' ' || *c=='\t') c++; // Spaces or tabs allowed between control character and request
3130 if (c[0]=='\n') return c+1;
3131 if (c[0]==escapesym)
3133 /* some pages use .\" .\$1 .\} */
3134 /* .\$1 is too difficult/stuppid */
3135 if (c[1]=='$')
3137 kDebug(7107) << "Found .\\$";
3138 c=skip_till_newline(c); // ### TODO
3140 else
3142 c = scan_escape(c+1);
3144 else
3146 int nlen = 0;
3147 QByteArray macroName;
3148 while (c[nlen] && (c[nlen] != ' ') && (c[nlen] != '\t') && (c[nlen] != '\n') && (c[nlen] != escapesym))
3150 macroName+=c[nlen];
3151 nlen++;
3153 int j = nlen;
3154 while (c[j]==' ' || c[j]=='\t') j++;
3155 /* search macro database of self-defined macros */
3156 QMap<QByteArray,StringDefinition>::const_iterator it=s_stringDefinitionMap.constFind(macroName);
3157 if (it!=s_stringDefinitionMap.constEnd())
3159 kDebug(7107) << "CALLING MACRO: " << BYTEARRAY( macroName );
3160 const QByteArray oldDollarZero = s_dollarZero; // Previous value of $0
3161 s_dollarZero = macroName;
3162 sl=fill_words(c+j, wordlist, &words, true, &c);
3163 *sl='\0';
3164 for (i=1;i<words; i++) wordlist[i][-1]='\0';
3165 for (i=0; i<words; i++)
3167 char *h=NULL;
3168 if (mandoc_command)
3169 scan_troff_mandoc(wordlist[i],1,&h);
3170 else
3171 scan_troff(wordlist[i],1,&h);
3172 wordlist[i] = qstrdup(h);
3173 delete [] h;
3175 for ( i=words; i<max_wordlist; i++ ) wordlist[i]=NULL;
3176 if ( !(*it).m_output.isEmpty() )
3178 //kDebug(7107) << "Macro content is: "<< BYTEARRAY( (*it).m_output );
3179 const unsigned int length = (*it).m_output.length();
3180 char* work = new char [length+2];
3181 work[0] = '\n'; // The macro must start after an end of line to allow a request on first line
3182 qstrncpy(work+1,(*it).m_output.data(),length+1);
3183 const QList<char*> oldArgumentList( s_argumentList );
3184 s_argumentList.clear();
3185 for ( i = 0 ; i < max_wordlist; i++ )
3187 if (!wordlist[i])
3188 break;
3189 s_argumentList.push_back( wordlist[i] );
3191 const int onff=newline_for_fun;
3192 if (mandoc_command)
3193 scan_troff_mandoc( work + 1, 0, NULL );
3194 else
3195 scan_troff( work + 1, 0, NULL);
3196 delete[] work;
3197 newline_for_fun=onff;
3198 s_argumentList = oldArgumentList;
3200 for (i=0; i<words; i++) delete [] wordlist[i];
3201 *sl='\n';
3202 s_dollarZero = oldDollarZero;
3203 kDebug(7107) << "ENDING MACRO: " << BYTEARRAY( macroName );
3205 else
3207 kDebug(7107) << "REQUEST: " << BYTEARRAY( macroName );
3208 switch (int request = get_request(c, nlen))
3210 case REQ_ab: // groff(7) "ABort"
3212 h=c+j;
3213 while (*h && *h !='\n') h++;
3214 *h='\0';
3215 if (scaninbuff && buffpos)
3217 buffer[buffpos]='\0';
3218 kDebug(7107) << "ABORT: " << buffer;
3220 // ### TODO find a way to display it to the user
3221 kDebug(7107) << "Aborting: .ab " << (c+j);
3222 return 0;
3223 break;
3225 case REQ_An: // mdoc(7) "Author Name"
3227 c+=j;
3228 c=scan_troff_mandoc(c,1,0);
3229 break;
3231 case REQ_di: // groff(7) "end current DIversion"
3233 kDebug(7107) << "Start .di";
3234 c+=j;
3235 if (*c=='\n')
3237 ++c;
3238 break;
3240 const QByteArray name ( scan_identifier( c ) );
3241 while (*c && *c!='\n') c++;
3242 c++;
3243 h=c;
3244 while (*c && qstrncmp(c,".di",3)) while (*c && *c++!='\n');
3245 *c='\0';
3246 char* result=0;
3247 scan_troff(h,0,&result);
3248 QMap<QByteArray,StringDefinition>::iterator it=s_stringDefinitionMap.find(name);
3249 if (it==s_stringDefinitionMap.end())
3251 StringDefinition def;
3252 def.m_length=0;
3253 def.m_output=result;
3254 s_stringDefinitionMap.insert(name,def);
3256 else
3258 (*it).m_length=0;
3259 (*it).m_output=result;
3261 delete[] result;
3262 if (*c) *c='.';
3263 c=skip_till_newline(c);
3264 kDebug(7107) << "end .di";
3265 break;
3267 case REQ_ds: // groff(7) "Define String variable"
3268 mode=true;
3269 case REQ_as: // groff (7) "Append String variable"
3271 kDebug(7107) << "start .ds/.as";
3272 int oldcurpos=curpos;
3273 c+=j;
3274 const QByteArray name( scan_identifier( c) );
3275 if ( name.isEmpty() )
3276 break;
3277 while (*c && isspace(*c)) c++;
3278 if (*c && *c=='"') c++;
3279 single_escape=true;
3280 curpos=0;
3281 char* result=0;
3282 c=scan_troff(c,1,&result);
3283 QMap<QByteArray,StringDefinition>::iterator it=s_stringDefinitionMap.find(name);
3284 if (it==s_stringDefinitionMap.end())
3286 StringDefinition def;
3287 def.m_length=curpos;
3288 def.m_output=result;
3289 s_stringDefinitionMap.insert(name,def);
3291 else
3293 if (mode)
3294 { // .ds Defining String
3295 (*it).m_length=curpos;
3296 (*it).m_output=result;
3298 else
3299 { // .as Appending String
3300 (*it).m_length+=curpos;
3301 (*it).m_output+=result;
3304 delete[] result;
3305 single_escape=false;
3306 curpos=oldcurpos;
3307 kDebug(7107) << "end .ds/.as";
3308 break;
3310 case REQ_br: // groff(7) "line BReak"
3312 if (still_dd)
3313 out_html("<DD>"); // ### VERIFY (does not look like generating good HTML)
3314 else
3315 out_html("<BR>\n");
3316 curpos=0;
3317 c=c+j;
3318 if (c[0]==escapesym) c=scan_escape(c+1);
3319 c=skip_till_newline(c);
3320 break;
3322 case REQ_c2: // groff(7) "reset non-break Control character" (2 means non-break)
3324 c=c+j;
3325 if (*c!='\n')
3326 nobreaksym=*c;
3327 else
3328 nobreaksym='\'';
3329 c=skip_till_newline(c);
3330 break;
3332 case REQ_cc: // groff(7) "reset Control Character"
3334 c=c+j;
3335 if (*c!='\n')
3336 controlsym=*c;
3337 else
3338 controlsym='.';
3339 c=skip_till_newline(c);
3340 break;
3342 case REQ_ce: // groff (7) "CEnter"
3344 c=c+j;
3345 if (*c=='\n')
3346 i=1;
3347 else
3349 i=0;
3350 while ('0'<=*c && *c<='9')
3352 i=i*10+*c-'0';
3353 c++;
3356 c=skip_till_newline(c);
3357 /* center next i lines */
3358 if (i>0)
3360 out_html("<CENTER>\n");
3361 while (i && *c)
3363 char *line=NULL;
3364 c=scan_troff(c,1, &line);
3365 if (line && qstrncmp(line, "<BR>", 4))
3367 out_html(line);
3368 out_html("<BR>\n");
3369 delete [] line; // ### FIXME: memory leak!
3370 i--;
3373 out_html("</CENTER>\n");
3374 curpos=0;
3376 break;
3378 case REQ_ec: // groff(7) "reset Escape Character"
3380 c=c+j;
3381 if (*c!='\n')
3382 escapesym=*c;
3383 else
3384 escapesym='\\';
3385 break;
3386 c=skip_till_newline(c);
3388 case REQ_eo: // groff(7) "turn Escape character Off"
3390 escapesym='\0';
3391 c=skip_till_newline(c);
3392 break;
3394 case REQ_ex: // groff(7) "EXit"
3396 return 0;
3397 break;
3399 case REQ_fc: // groff(7) "set Field and pad Character"
3401 c=c+j;
3402 if (*c=='\n')
3403 fieldsym=padsym='\0';
3404 else
3406 fieldsym=c[0];
3407 padsym=c[1];
3409 c=skip_till_newline(c);
3410 break;
3412 case REQ_fi: // groff(7) "FIll"
3414 if (!fillout)
3416 out_html(set_font("R"));
3417 out_html(change_to_size('0'));
3418 out_html("</PRE>\n");
3420 curpos=0;
3421 fillout=1;
3422 c=skip_till_newline(c);
3423 break;
3425 case REQ_ft: // groff(7) "FonT"
3427 c += j;
3428 h = skip_till_newline( c );
3429 const char oldChar = *h;
3430 *h = 0;
3431 const QByteArray name = c;
3432 // ### TODO: name might contain a variable
3433 if ( name.isEmpty() )
3434 out_html( set_font( "P" ) ); // Previous font
3435 else
3436 out_html( set_font( name ) );
3437 *h = oldChar;
3438 c = h;
3439 break;
3441 case REQ_el: // groff(7) "ELse"
3443 int ifelseval = s_ifelseval.pop();
3444 /* .el anything : else part of if else */
3445 if (ifelseval)
3447 c=c+j;
3448 c[-1]='\n';
3449 c=scan_troff(c,1,NULL);
3451 else
3452 c=skip_till_newline(c+j);
3453 break;
3455 case REQ_ie: // groff(7) "If with Else"
3456 /* .ie c anything : then part of if else */
3457 case REQ_if: // groff(7) "IF"
3459 /* .if c anything
3460 * .if !c anything
3461 * .if N anything
3462 * .if !N anything
3463 * .if 'string1'string2' anything
3464 * .if !'string1'string2' anything
3466 c=c+j;
3467 c=scan_expression(c, &i);
3468 if (request == REQ_ie)
3470 int ifelseval=!i;
3471 s_ifelseval.push( ifelseval );
3473 if (i)
3475 *c='\n';
3476 c++;
3477 c=scan_troff(c,1,NULL);
3479 else
3480 c=skip_till_newline(c);
3481 break;
3483 case REQ_ig: // groff(7) "IGnore"
3485 const char *endwith="..\n";
3486 i=3;
3487 c=c+j;
3488 if (*c!='\n' && *c != '\\')
3490 /* Not newline or comment */
3491 endwith=c-1;i=1;
3492 c[-1]='.';
3493 while (*c && *c!='\n') c++,i++;
3495 c++;
3496 while (*c && qstrncmp(c,endwith,i)) while (*c++!='\n');
3497 while (*c && *c++!='\n');
3498 break;
3500 case REQ_nf: // groff(7) "No Filling"
3502 if (fillout)
3504 out_html(set_font("R"));
3505 out_html(change_to_size('0'));
3506 out_html("<PRE>\n");
3508 curpos=0;
3509 fillout=0;
3510 c=skip_till_newline(c);
3511 break;
3513 case REQ_ps: // groff(7) "previous Point Size"
3515 c=c+j;
3516 if (*c=='\n')
3517 out_html(change_to_size('0'));
3518 else
3520 j=0; i=0;
3521 if (*c=='-')
3523 j= -1;
3524 c++;
3526 else if (*c=='+')
3527 j=1;c++;
3528 c=scan_expression(c, &i);
3529 if (!j)
3531 j=1;
3532 if (i>5) i=i-10;
3534 out_html(change_to_size(i*j));
3536 c=skip_till_newline(c);
3537 break;
3539 case REQ_sp: // groff(7) "SKip one line"
3541 c=c+j;
3542 if (fillout)
3543 out_html("<br><br>");
3544 else
3546 out_html(NEWLINE);
3548 curpos=0;
3549 c=skip_till_newline(c);
3550 break;
3552 case REQ_so: // groff(7) "Include SOurce file"
3554 char *buf;
3555 char *name=NULL;
3556 curpos=0;
3557 c=c+j;
3558 if (*c=='/')
3559 h=c;
3560 else
3562 h=c-3;
3563 h[0]='.';
3564 h[1]='.';
3565 h[2]='/';
3567 while (*c!='\n') c++;
3568 *c='\0';
3569 scan_troff(h,1, &name);
3570 if (name[3]=='/')
3571 h=name+3;
3572 else
3573 h=name;
3574 /* this works alright, except for section 3 */
3575 buf=read_man_page(h);
3576 if (!buf)
3578 kDebug(7107) << "Unable to open or read file: .so " << (h);
3579 out_html("<BLOCKQUOTE>"
3580 "man2html: unable to open or read file.\n");
3581 out_html(h);
3582 out_html("</BLOCKQUOTE>\n");
3584 else
3585 scan_troff(buf+1,0,NULL);
3586 delete [] buf;
3587 delete [] name;
3589 *c++='\n';
3590 break;
3592 case REQ_ta: // gorff(7) "set TAbulators"
3594 c=c+j;
3595 j=0;
3596 while (*c!='\n')
3598 sl=scan_expression(c, &tabstops[j]);
3599 if (j>0 && (*c=='-' || *c=='+')) tabstops[j]+=tabstops[j-1];
3600 c=sl;
3601 while (*c==' ' || *c=='\t') c++;
3602 j++;
3604 maxtstop=j;
3605 curpos=0;
3606 break;
3608 case REQ_ti: // groff(7) "Temporary Indent"
3610 /*while (itemdepth || dl_set[itemdepth]) {
3611 out_html("</DL>\n");
3612 if (dl_set[itemdepth]) dl_set[itemdepth]=0;
3613 else itemdepth--;
3615 out_html("<BR>\n");
3616 c=c+j;
3617 c=scan_expression(c, &j);
3618 for (i=0; i<j; i++) out_html("&nbsp;");
3619 curpos=j;
3620 c=skip_till_newline(c);
3621 break;
3623 case REQ_tm: // groff(7) "TerMinal" ### TODO: what are useful uses for it
3625 c=c+j;
3626 h=c;
3627 while (*c!='\n') c++;
3628 *c='\0';
3629 kDebug(7107) << ".tm " << (h);
3630 *c='\n';
3631 break;
3633 case REQ_B: // man(7) "Bold"
3634 mode=1;
3635 case REQ_I: // man(7) "Italic"
3637 /* parse one line in a certain font */
3638 out_html( set_font( mode?"B":"I" ) );
3639 fill_words(c, wordlist, &words, false, 0);
3640 c=c+j;
3641 if (*c=='\n') c++;
3642 c=scan_troff(c, 1, NULL);
3643 out_html(set_font("R"));
3644 out_html(NEWLINE);
3645 if (fillout)
3646 curpos++;
3647 else
3648 curpos=0;
3649 break;
3651 case REQ_Fd: // mdoc(7) "Function Definition"
3653 // Normal text must be printed in bold, punctuation in regular font
3654 c+=j;
3655 if (*c=='\n') c++; // ### TODO: verify
3656 sl=fill_words(c, wordlist, &words, true, &c);
3657 for (i=0; i<words; i++)
3659 wordlist[i][-1]=' ';
3660 // ### FIXME In theory, only a single punctuation character is recognized as punctuation
3661 if ( is_mdoc_punctuation ( *wordlist[i] ) )
3662 out_html( set_font ( "R" ) );
3663 else
3664 out_html( set_font ( "B" ) );
3665 scan_troff(wordlist[i],1,NULL);
3666 out_html(" ");
3668 // In the mdoc synopsis, there are automatical line breaks (### TODO: before or after?)
3669 if (mandoc_synopsis)
3671 out_html("<br>");
3673 out_html(set_font("R"));
3674 out_html(NEWLINE);
3675 if (!fillout)
3676 curpos=0;
3677 else
3678 curpos++;
3679 break;
3681 case REQ_Fn: // mdoc(7) for "Function calls"
3683 // brackets and commas have to be inserted automatically
3684 c+=j;
3685 if (*c=='\n') c++;
3686 sl=fill_words(c, wordlist, &words, true, &c);
3687 if ( words )
3689 for (i=0; i<words; i++)
3691 wordlist[i][-1]=' ';
3692 if ( i )
3693 out_html( set_font( "I" ) );
3694 else
3695 out_html( set_font( "B" ) );
3696 scan_troff(wordlist[i],1,NULL);
3697 out_html( set_font( "R" ) );
3698 if (i==0)
3700 out_html(" (");
3702 else if (i<words-1)
3703 out_html(", ");
3705 out_html(")");
3707 out_html(set_font("R"));
3708 if (mandoc_synopsis)
3709 out_html("<br>");
3710 out_html(NEWLINE);
3711 if (!fillout)
3712 curpos=0;
3713 else
3714 curpos++;
3715 break;
3717 case REQ_Fo: // mdoc(7) "Function definition Opening"
3719 char* font[2] = { (char*)"B", (char*)"R" };
3720 c+=j;
3721 if (*c=='\n') c++;
3722 char *eol=strchr(c,'\n');
3723 char *semicolon=strchr(c,';');
3724 if ((semicolon!=0) && (semicolon<eol)) *semicolon=' ';
3726 sl=fill_words(c, wordlist, &words, true, &c);
3727 // Normally a .Fo has only one parameter
3728 for (i=0; i<words; i++)
3730 wordlist[i][-1]=' ';
3731 out_html(set_font(font[i&1]));
3732 scan_troff(wordlist[i],1,NULL);
3733 if (i==0)
3735 out_html(" (");
3737 // ### TODO What should happen if there is more than one argument
3738 // else if (i<words-1) out_html(", ");
3740 function_argument=1; // Must be > 0
3741 out_html(set_font("R"));
3742 out_html(NEWLINE);
3743 if (!fillout)
3744 curpos=0;
3745 else
3746 curpos++;
3747 break;
3749 case REQ_Fc:// mdoc(7) "Function definition Close"
3751 // .Fc has no parameter
3752 c+=j;
3753 c=skip_till_newline(c);
3754 char* font[2] = { (char*)"B", (char*)"R" };
3755 out_html(set_font(font[i&1]));
3756 out_html(")");
3757 out_html(set_font("R"));
3758 if (mandoc_synopsis)
3759 out_html("<br>");
3760 out_html(NEWLINE);
3761 if (!fillout)
3762 curpos=0;
3763 else
3764 curpos++;
3765 function_argument=0; // Reset the count variable
3766 break;
3768 case REQ_Fa: // mdoc(7) "Function definition argument"
3770 char* font[2] = { (char*)"B", (char*)"R" };
3771 c+=j;
3772 if (*c=='\n') c++;
3773 sl=fill_words(c, wordlist, &words, true, &c);
3774 out_html(set_font(font[i&1]));
3775 // function_argument==0 means that we had no .Fo before, e.g. in mdoc.samples(7)
3776 if (function_argument > 1)
3778 out_html(", ");
3779 curpos+=2;
3780 function_argument++;
3782 else if (function_argument==1)
3784 // We are only at the first parameter
3785 function_argument++;
3787 for (i=0; i<words; i++)
3789 wordlist[i][-1]=' ';
3790 scan_troff(wordlist[i],1,NULL);
3792 out_html(set_font("R"));
3793 if (!fillout)
3794 curpos=0;
3795 else
3796 curpos++;
3797 break;
3800 case REQ_OP: /* groff manpages use this construction */
3802 /* .OP a b : [ <B>a</B> <I>b</I> ] */
3803 mode=true;
3804 out_html(set_font("R"));
3805 out_html("[");
3806 curpos++;
3807 request_mixed_fonts( c, j, "B", "I", true, false );
3808 break;
3809 // Do not break!
3811 case REQ_Ft: //perhaps "Function return type"
3813 request_mixed_fonts( c, j, "B", "I", false, true );
3814 break;
3816 case REQ_BR:
3818 request_mixed_fonts( c, j, "B", "R", false, false );
3819 break;
3821 case REQ_BI:
3823 request_mixed_fonts( c, j, "B", "I", false, false );
3824 break;
3826 case REQ_IB:
3828 request_mixed_fonts( c, j, "I", "B", false, false );
3829 break;
3831 case REQ_IR:
3833 request_mixed_fonts( c, j, "I", "R", false, false );
3834 break;
3836 case REQ_RB:
3838 request_mixed_fonts( c, j, "R", "B", false, false );
3839 break;
3841 case REQ_RI:
3843 request_mixed_fonts( c, j, "R", "I", false, false );
3844 break;
3846 case REQ_DT: // man(7) "Default Tabulators"
3848 for (j=0;j<20; j++) tabstops[j]=(j+1)*8;
3849 maxtstop=20;
3850 c=skip_till_newline(c);
3851 break;
3853 case REQ_IP: // man(7) "Ident Paragraph"
3855 sl=fill_words(c+j, wordlist, &words, true, &c);
3856 if (!dl_set[itemdepth])
3858 out_html("<DL>\n");
3859 dl_set[itemdepth]=1;
3861 out_html("<DT>");
3862 if (words)
3863 scan_troff(wordlist[0], 1,NULL);
3864 out_html("<DD>");
3865 curpos=0;
3866 break;
3868 case REQ_TP: // man(7) "hanging Tag Paragraph"
3870 if (!dl_set[itemdepth])
3872 out_html("<br><br><DL>\n");
3873 dl_set[itemdepth]=1;
3875 out_html("<DT>");
3876 c=skip_till_newline(c);
3877 /* somewhere a definition ends with '.TP' */
3878 if (!*c)
3879 still_dd=true;
3880 else
3882 // HACK for proc(5)
3883 while (c[0]=='.' && c[1]=='\\' && c[2]=='\"')
3885 // We have a comment, so skip the line
3886 c=skip_till_newline(c);
3888 c=scan_troff(c,1,NULL);
3889 out_html("<DD>");
3891 curpos=0;
3892 break;
3894 case REQ_IX: // "INdex" ### TODO: where is it defined?
3896 /* general index */
3897 c=skip_till_newline(c);
3898 break;
3900 case REQ_P: // man(7) "Paragraph"
3901 case REQ_LP:// man(7) "Paragraph"
3902 case REQ_PP:// man(7) "Paragraph; reset Prevailing indent"
3904 if (dl_set[itemdepth])
3906 out_html("</DL>\n");
3907 dl_set[itemdepth]=0;
3909 if (fillout)
3910 out_html("<br><br>\n");
3911 else
3913 out_html(NEWLINE);
3915 curpos=0;
3916 c=skip_till_newline(c);
3917 break;
3919 case REQ_HP: // man(7) "Hanging indent Paragraph"
3921 if (!dl_set[itemdepth])
3923 out_html("<DL>");
3924 dl_set[itemdepth]=1;
3926 out_html("<DT>\n");
3927 still_dd=true;
3928 c=skip_till_newline(c);
3929 curpos=0;
3930 break;
3932 case REQ_PD: // man(7) "Paragraph Distance"
3934 c=skip_till_newline(c);
3935 break;
3937 case REQ_Rs: // mdoc(7) "Relative margin Start"
3938 case REQ_RS: // man(7) "Relative margin Start"
3940 sl=fill_words(c+j, wordlist, &words, true, 0);
3941 j=1;
3942 if (words>0) scan_expression(wordlist[0], &j);
3943 if (j>=0)
3945 itemdepth++;
3946 dl_set[itemdepth]=0;
3947 out_html("<DL><DT><DD>");
3948 c=skip_till_newline(c);
3949 curpos=0;
3950 break;
3953 case REQ_Re: // mdoc(7) "Relative margin End"
3954 case REQ_RE: // man(7) "Relative margin End"
3956 if (itemdepth > 0)
3958 if (dl_set[itemdepth]) out_html("</DL>");
3959 out_html("</DL>\n");
3960 itemdepth--;
3962 c=skip_till_newline(c);
3963 curpos=0;
3964 break;
3966 case REQ_SB: // man(7) "Small; Bold"
3968 out_html(set_font("B"));
3969 out_html("<small>");
3970 trans_char(c,'"','\a'); // ### VERIFY
3971 c=scan_troff(c+j, 1, NULL);
3972 out_html("</small>");
3973 out_html(set_font("R"));
3974 break;
3976 case REQ_SM: // man(7) "SMall"
3978 c=c+j;
3979 if (*c=='\n') c++;
3980 out_html("<small>");
3981 trans_char(c,'"','\a'); // ### VERIFY
3982 c=scan_troff(c,1,NULL);
3983 out_html("</small>");
3984 break;
3986 case REQ_Ss: // mdoc(7) "Sub Section"
3987 mandoc_command = 1;
3988 case REQ_SS: // mdoc(7) "Sub Section"
3989 mode=true;
3990 case REQ_Sh: // mdoc(7) "Sub Header"
3991 /* hack for fallthru from above */
3992 mandoc_command = !mode || mandoc_command;
3993 case REQ_SH: // man(7) "Sub Header"
3995 c=c+j;
3996 if (*c=='\n') c++;
3997 while (itemdepth || dl_set[itemdepth])
3999 out_html("</DL>\n");
4000 if (dl_set[itemdepth])
4001 dl_set[itemdepth]=0;
4002 else if (itemdepth > 0)
4003 itemdepth--;
4005 out_html(set_font("R"));
4006 out_html(change_to_size(0));
4007 if (!fillout)
4009 fillout=1;
4010 out_html("</PRE>");
4012 trans_char(c,'"', '\a');
4013 if (section)
4015 out_html("</div>\n");
4016 section=0;
4018 if (mode)
4019 out_html("\n<H3>");
4020 else
4021 out_html("\n<H2>");
4022 mandoc_synopsis = qstrncmp(c, "SYNOPSIS", 8) == 0;
4023 c = mandoc_command ? scan_troff_mandoc(c,1,NULL) : scan_troff(c,1,NULL);
4024 if (mode)
4025 out_html("</H3>\n");
4026 else
4027 out_html("</H2>\n");
4028 out_html("<div>\n");
4030 section=1;
4031 curpos=0;
4032 break;
4034 case REQ_Sx: // mdoc(7)
4036 // reference to a section header
4037 out_html(set_font("B"));
4038 trans_char(c,'"','\a');
4039 c=c+j;
4040 if (*c=='\n') c++;
4041 c=scan_troff(c, 1, NULL);
4042 out_html(set_font("R"));
4043 out_html(NEWLINE);
4044 if (fillout)
4045 curpos++;
4046 else
4047 curpos=0;
4048 break;
4050 case REQ_TS: // Table Start tbl(1)
4052 c=scan_table(c);
4053 break;
4055 case REQ_Dt: /* mdoc(7) */
4056 mandoc_command = true;
4057 case REQ_TH: // man(7) "Title Header"
4059 if (!output_possible)
4061 sl = fill_words(c+j, wordlist, &words, true, &c);
4062 // ### TODO: the page should be displayed even if it is "anonymous" (words==0)
4063 if (words>=1)
4065 for (i=1; i<words; i++) wordlist[i][-1]='\0';
4066 *sl='\0';
4067 for (i=0; i<words; i++)
4069 if (wordlist[i][0] == '\007')
4070 wordlist[i]++;
4071 if (wordlist[i][qstrlen(wordlist[i])-1] == '\007')
4072 wordlist[i][qstrlen(wordlist[i])-1] = 0;
4074 output_possible=true;
4075 out_html( DOCTYPE"<HTML>\n<HEAD>\n");
4076 #ifdef SIMPLE_MAN2HTML
4077 // Most English man pages are in ISO-8859-1
4078 out_html("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-1\">\n");
4079 #else
4080 //let KEncodingDetector decide. (it should be better than charset="System")
4081 //TODO can we check if the charset could be determined from path? like share/man/ru.UTF8
4082 // kio_man transforms from local to UTF-8
4083 // out_html("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=");
4084 // out_html(QTextCodec::codecForLocale()->name());
4085 // out_html("\">\n");
4086 #endif
4087 out_html("<TITLE>");
4088 out_html(scan_troff(wordlist[0], 0, NULL));
4089 out_html( " Manpage</TITLE>\n");
4091 // KDE defaults.
4092 out_html( "<link rel=\"stylesheet\" href=\"help:/common/kde-default.css\"");
4093 out_html( " type=\"text/css\">\n" );
4095 // Output our custom stylesheet.
4096 out_html( "<link rel=\"stylesheet\" href=\"");
4097 out_html(cssFile);
4098 out_html("\" type=\"text/css\">\n" );
4100 // Some elements need background images, but this
4101 // could not be included in the stylesheet,
4102 // include it now.
4103 out_html("<style>\n#header_top { "
4104 "background-image: url(\"help:/common/top.jpg\"); }\n\n"
4105 "#header_top div { "
4106 "background-image: url(\"help:/common/top-left.jpg\"); }\n\n"
4107 "#header_top div div { "
4108 "background-image: url(\"help:/common/top-right.jpg\"); }\n\n"
4109 "</style>\n\n"
4112 out_html( "<meta name=\"ROFF Type\" content=\"");
4113 if (mandoc_command)
4114 out_html("mdoc");
4115 else
4116 out_html("man");
4117 out_html("\">\n");
4119 out_html( "</HEAD>\n\n" );
4120 out_html("<BODY>\n\n" );
4122 out_html("<div id=\"header\"><div id=\"header_top\">\n");
4123 out_html("<div><div>\n");
4124 out_html("<img src=\"help:/common/top-kde.jpg\"> ");
4125 out_html( scan_troff(wordlist[0], 0, NULL ) );
4126 out_html(" - KDE Man Page Viewer");
4127 out_html("</div></div></div></div>\n");
4129 out_html("<div style=\"margin-left: 5em; margin-right: 5em;\">\n");
4130 out_html("<h1>" );
4131 out_html( scan_troff(wordlist[0], 0, NULL ) );
4132 out_html( "</h1>\n" );
4133 if (words>1)
4135 out_html("Section: " );
4136 if (!mandoc_command && words>4)
4137 out_html(scan_troff(wordlist[4], 0, NULL) );
4138 else
4139 out_html(section_name(wordlist[1]));
4140 out_html(" (");
4141 out_html(scan_troff(wordlist[1], 0, NULL));
4142 out_html(")\n");
4144 else
4146 out_html("Section not specified");
4148 *sl='\n';
4151 else
4153 kWarning(7107) << ".TH found but output not possible" ;
4154 c=skip_till_newline(c);
4156 curpos=0;
4157 break;
4159 case REQ_TX: // mdoc(7)
4161 sl=fill_words(c+j, wordlist, &words, true, &c);
4162 *sl='\0';
4163 out_html(set_font("I"));
4164 if (words>1) wordlist[1][-1]='\0';
4165 const char *c2=lookup_abbrev(wordlist[0]);
4166 curpos+=qstrlen(c2);
4167 out_html(c2);
4168 out_html(set_font("R"));
4169 if (words>1)
4170 out_html(wordlist[1]);
4171 *sl='\n';
4172 break;
4174 case REQ_rm: // groff(7) "ReMove"
4175 /* .rm xx : Remove request, macro or string */
4176 mode=true;
4177 case REQ_rn: // groff(7) "ReName"
4178 /* .rn xx yy : Rename request, macro or string xx to yy */
4180 kDebug(7107) << "start .rm/.rn";
4181 c+=j;
4182 const QByteArray name( scan_identifier( c ) );
4183 if ( name.isEmpty() )
4185 kDebug(7107) << "EXCEPTION: empty origin string to remove/rename";
4186 break;
4188 QByteArray name2;
4189 if ( !mode )
4191 while (*c && isspace(*c) && *c!='\n') ++c;
4192 name2 = scan_identifier( c );
4193 if ( name2.isEmpty() )
4195 kDebug(7107) << "EXCEPTION: empty destination string to rename";
4196 break;
4199 c=skip_till_newline(c);
4200 QMap<QByteArray,StringDefinition>::iterator it=s_stringDefinitionMap.find(name);
4201 if (it==s_stringDefinitionMap.end())
4203 kDebug(7107) << "EXCEPTION: cannot find string to rename or remove: " << BYTEARRAY( name );
4205 else
4207 if (mode)
4209 // .rm ReMove
4210 s_stringDefinitionMap.remove(name); // ### QT4: removeAll
4212 else
4214 // .rn ReName
4215 StringDefinition def=(*it);
4216 s_stringDefinitionMap.remove(name); // ### QT4: removeAll
4217 s_stringDefinitionMap.insert(name2,def);
4220 kDebug(7107) << "end .rm/.rn";
4221 break;
4223 case REQ_nx:
4224 case REQ_in: // groff(7) "INdent"
4226 /* .in +-N : Indent */
4227 c=skip_till_newline(c);
4228 break;
4230 case REQ_nr: // groff(7) "Number Register"
4232 kDebug(7107) << "start .nr";
4233 c += j;
4234 const QByteArray name( scan_identifier( c ) );
4235 if ( name.isEmpty() )
4237 kDebug(7107) << "EXCEPTION: empty name for register variable";
4238 break;
4240 while ( *c && ( *c==' ' || *c=='\t' ) ) c++;
4241 int sign = 0;
4242 if ( *c && ( *c == '+' || *c == '-' ) )
4244 if ( *c == '+' )
4245 sign = 1;
4246 else if ( *c == '-' )
4247 sign = -1;
4249 int value = 0;
4250 int increment = 0;
4251 c=scan_expression( c, &value );
4252 if ( *c && *c!='\n')
4254 while ( *c && ( *c==' ' || *c=='\t' ) ) c++;
4255 c=scan_expression( c, &increment );
4257 c = skip_till_newline( c );
4258 QMap <QByteArray, NumberDefinition>::iterator it = s_numberDefinitionMap.find( name );
4259 if ( it == s_numberDefinitionMap.end() )
4261 if ( sign < 1 )
4262 value = -value;
4263 NumberDefinition def( value, increment );
4264 s_numberDefinitionMap.insert( name, def );
4266 else
4268 if ( sign > 0 )
4269 (*it).m_value += value;
4270 else if ( sign < 0 )
4271 (*it).m_value += - value;
4272 else
4273 (*it).m_value = value;
4274 (*it).m_increment = increment;
4276 kDebug(7107) << "end .nr";
4277 break;
4279 case REQ_am: // groff(7) "Append Macro"
4280 /* .am xx yy : append to a macro. */
4281 /* define or handle as .ig yy */
4282 mode=true;
4283 case REQ_de: // groff(7) "DEfine macro"
4284 /* .de xx yy : define or redefine macro xx; end at .yy (..) */
4285 /* define or handle as .ig yy */
4287 kDebug(7107) << "Start .am/.de";
4288 c+=j;
4289 char *next_line;
4290 sl = fill_words(c, wordlist, &words, true, &next_line);
4291 char *nameStart = wordlist[0];
4292 c = nameStart;
4293 while (*c && (*c != ' ') && (*c != '\n')) c++;
4294 *c = '\0';
4295 const QByteArray name(nameStart);
4297 QByteArray endmacro;
4298 if (words == 1)
4300 endmacro="..";
4302 else
4304 endmacro=".";
4305 c = wordlist[1];
4306 while (*c && (*c != ' ') && (*c != '\n'))
4307 endmacro+=*c++;
4309 c = next_line;
4310 sl=c;
4311 const int length=qstrlen(endmacro);
4312 while (*c && qstrncmp(c,endmacro,length))
4313 c=skip_till_newline(c);
4315 QByteArray macro;
4316 while (sl!=c)
4318 if (sl[0]=='\\' && sl[1]=='\\')
4320 macro+='\\';
4321 sl++;
4323 else
4324 macro+=*sl;
4325 sl++;
4328 QMap<QByteArray,StringDefinition>::iterator it=s_stringDefinitionMap.find(name);
4329 if (it==s_stringDefinitionMap.end())
4331 StringDefinition def;
4332 def.m_length=0;
4333 def.m_output=macro;
4334 s_stringDefinitionMap.insert(name,def);
4336 else if (mode)
4338 // .am Append Macro
4339 (*it).m_length=0; // It could be formerly a string
4340 if ( ! (*it).m_output.endsWith( '\n' ) )
4341 (*it).m_output+='\n';
4342 (*it).m_output+=macro;
4344 else
4346 // .de DEfine macro
4347 (*it).m_length=0; // It could be formerly a string
4348 (*it).m_output=macro;
4350 c=skip_till_newline(c);
4351 kDebug(7107) << "End .am/.de";
4352 break;
4354 case REQ_Bl: // mdoc(7) "Begin List"
4356 char list_options[NULL_TERMINATED(MED_STR_MAX)];
4357 char *nl = strchr(c,'\n');
4358 c=c+j;
4359 if (dl_set[itemdepth])
4360 /* These things can nest. */
4361 itemdepth++;
4362 if (nl)
4364 /* Parse list options */
4365 strlimitcpy(list_options, c, nl - c, MED_STR_MAX);
4367 if (strstr(list_options, "-bullet"))
4369 /* HTML Unnumbered List */
4370 dl_set[itemdepth] = BL_BULLET_LIST;
4371 out_html("<UL>\n");
4373 else if (strstr(list_options, "-enum"))
4375 /* HTML Ordered List */
4376 dl_set[itemdepth] = BL_ENUM_LIST;
4377 out_html("<OL>\n");
4379 else
4381 /* HTML Descriptive List */
4382 dl_set[itemdepth] = BL_DESC_LIST;
4383 out_html("<DL>\n");
4385 if (fillout)
4386 out_html("<br><br>\n");
4387 else
4389 out_html(NEWLINE);
4391 curpos=0;
4392 c=skip_till_newline(c);
4393 break;
4395 case REQ_El: // mdoc(7) "End List"
4397 c=c+j;
4398 if (dl_set[itemdepth] & BL_DESC_LIST)
4399 out_html("</DL>\n");
4400 else if (dl_set[itemdepth] & BL_BULLET_LIST)
4401 out_html("</UL>\n");
4402 else if (dl_set[itemdepth] & BL_ENUM_LIST)
4403 out_html("</OL>\n");
4404 dl_set[itemdepth]=0;
4405 if (itemdepth > 0) itemdepth--;
4406 if (fillout)
4407 out_html("<br><br>\n");
4408 else
4410 out_html(NEWLINE);
4412 curpos=0;
4413 c=skip_till_newline(c);
4414 break;
4416 case REQ_It: // mdoc(7) "list ITem"
4418 c=c+j;
4419 if (qstrncmp(c, "Xo", 2) == 0 && isspace(*(c+2)))
4420 c = skip_till_newline(c);
4421 if (dl_set[itemdepth] & BL_DESC_LIST)
4423 out_html("<DT>");
4424 out_html(set_font("B"));
4425 if (*c=='\n')
4427 /* Don't allow embedded comms after a newline */
4428 c++;
4429 c=scan_troff(c,1,NULL);
4431 else
4433 /* Do allow embedded comms on the same line. */
4434 c=scan_troff_mandoc(c,1,NULL);
4436 out_html(set_font("R"));
4437 out_html(NEWLINE);
4438 out_html("<DD>");
4440 else if (dl_set[itemdepth] & (BL_BULLET_LIST | BL_ENUM_LIST))
4442 out_html("<LI>");
4443 c=scan_troff_mandoc(c,1,NULL);
4444 out_html(NEWLINE);
4446 if (fillout)
4447 curpos++;
4448 else
4449 curpos=0;
4450 break;
4452 case REQ_Bk: /* mdoc(7) */
4453 case REQ_Ek: /* mdoc(7) */
4454 case REQ_Dd: /* mdoc(7) */
4455 case REQ_Os: // mdoc(7) "Operating System"
4457 trans_char(c,'"','\a');
4458 c=c+j;
4459 if (*c=='\n') c++;
4460 c=scan_troff_mandoc(c, 1, NULL);
4461 out_html(NEWLINE);
4462 if (fillout)
4463 curpos++;
4464 else
4465 curpos=0;
4466 break;
4468 case REQ_Bt: // mdoc(7) "Beta Test"
4470 trans_char(c,'"','\a');
4471 c=c+j;
4472 out_html(" is currently in beta test.");
4473 if (fillout)
4474 curpos++;
4475 else
4476 curpos=0;
4477 break;
4479 case REQ_At: /* mdoc(7) */
4480 case REQ_Fx: /* mdoc(7) */
4481 case REQ_Nx: /* mdoc(7) */
4482 case REQ_Ox: /* mdoc(7) */
4483 case REQ_Bx: /* mdoc(7) */
4484 case REQ_Ux: /* mdoc(7) */
4485 case REQ_Dx: /* mdoc(7) */
4487 bool parsable=true;
4488 trans_char(c,'"','\a');
4489 c=c+j;
4490 if (*c=='\n') c++;
4491 if (request==REQ_At)
4493 out_html("AT&amp;T UNIX ");
4494 parsable=false;
4496 else if (request==REQ_Fx)
4498 out_html("FreeBSD ");
4499 parsable=false;
4501 else if (request==REQ_Nx)
4502 out_html("NetBSD ");
4503 else if (request==REQ_Ox)
4504 out_html("OpenBSD ");
4505 else if (request==REQ_Bx)
4506 out_html("BSD ");
4507 else if (request==REQ_Ux)
4508 out_html("UNIX ");
4509 else if (request==REQ_Dx)
4510 out_html("DragonFly ");
4511 if (parsable)
4512 c=scan_troff_mandoc(c,1,0);
4513 else
4514 c=scan_troff(c,1,0);
4515 if (fillout)
4516 curpos++;
4517 else
4518 curpos=0;
4519 break;
4521 case REQ_Dl: /* mdoc(7) */
4523 c=c+j;
4524 out_html(NEWLINE);
4525 out_html("<BLOCKQUOTE>");
4526 if (*c=='\n') c++;
4527 c=scan_troff_mandoc(c, 1, NULL);
4528 out_html("</BLOCKQUOTE>");
4529 if (fillout)
4530 curpos++;
4531 else
4532 curpos=0;
4533 break;
4535 case REQ_Bd: /* mdoc(7) */
4536 { /* Seems like a kind of example/literal mode */
4537 char bd_options[NULL_TERMINATED(MED_STR_MAX)];
4538 char *nl = strchr(c,'\n');
4539 c=c+j;
4540 if (nl)
4541 strlimitcpy(bd_options, c, nl - c, MED_STR_MAX);
4542 out_html(NEWLINE);
4543 mandoc_bd_options = 0; /* Remember options for terminating Bl */
4544 if (strstr(bd_options, "-offset indent"))
4546 mandoc_bd_options |= BD_INDENT;
4547 out_html("<BLOCKQUOTE>\n");
4549 if ( strstr(bd_options, "-literal") || strstr(bd_options, "-unfilled"))
4551 if (fillout)
4553 mandoc_bd_options |= BD_LITERAL;
4554 out_html(set_font("R"));
4555 out_html(change_to_size('0'));
4556 out_html("<PRE>\n");
4558 curpos=0;
4559 fillout=0;
4561 c=skip_till_newline(c);
4562 break;
4564 case REQ_Ed: /* mdoc(7) */
4566 if (mandoc_bd_options & BD_LITERAL)
4568 if (!fillout)
4570 out_html(set_font("R"));
4571 out_html(change_to_size('0'));
4572 out_html("</PRE>\n");
4575 if (mandoc_bd_options & BD_INDENT)
4576 out_html("</BLOCKQUOTE>\n");
4577 curpos=0;
4578 fillout=1;
4579 c=skip_till_newline(c);
4580 break;
4582 case REQ_Be: /* mdoc(7) */
4584 c=c+j;
4585 if (fillout)
4586 out_html("<br><br>");
4587 else
4589 out_html(NEWLINE);
4591 curpos=0;
4592 c=skip_till_newline(c);
4593 break;
4595 case REQ_Xr: /* mdoc(7) */ // ### FIXME: it should issue a <a href="man:somewhere(x)"> directly
4597 /* Translate xyz 1 to xyz(1)
4598 * Allow for multiple spaces. Allow the section to be missing.
4600 char buff[NULL_TERMINATED(MED_STR_MAX)];
4601 char *bufptr;
4602 trans_char(c,'"','\a');
4603 bufptr = buff;
4604 c = c+j;
4605 if (*c == '\n') c++; /* Skip spaces */
4606 while (isspace(*c) && *c != '\n') c++;
4607 while (isalnum(*c) || *c == '.' || *c == ':' || *c == '_' || *c == '-')
4609 /* Copy the xyz part */
4610 *bufptr = *c;
4611 bufptr++;
4612 if (bufptr >= buff + MED_STR_MAX) break;
4613 c++;
4615 while (isspace(*c) && *c != '\n') c++; /* Skip spaces */
4616 if (isdigit(*c))
4618 /* Convert the number if there is one */
4619 *bufptr = '(';
4620 bufptr++;
4621 if (bufptr < buff + MED_STR_MAX)
4623 while (isalnum(*c))
4625 *bufptr = *c;
4626 bufptr++;
4627 if (bufptr >= buff + MED_STR_MAX) break;
4628 c++;
4630 if (bufptr < buff + MED_STR_MAX)
4632 *bufptr = ')';
4633 bufptr++;
4637 while (*c != '\n')
4639 /* Copy the remainder */
4640 if (!isspace(*c))
4642 *bufptr = *c;
4643 bufptr++;
4644 if (bufptr >= buff + MED_STR_MAX) break;
4646 c++;
4648 *bufptr = '\n';
4649 bufptr[1] = 0;
4650 scan_troff_mandoc(buff, 1, NULL);
4651 out_html(NEWLINE);
4652 if (fillout)
4653 curpos++;
4654 else
4655 curpos=0;
4656 break;
4658 case REQ_Fl: // mdoc(7) "FLags"
4660 trans_char(c,'"','\a');
4661 c+=j;
4662 sl=fill_words(c, wordlist, &words, true, &c);
4663 out_html(set_font("B"));
4664 if (!words)
4666 out_html("-"); // stdin or stdout
4668 else
4670 for (i=0;i<words;++i)
4672 if (ispunct(wordlist[i][0]) && wordlist[i][0]!='-')
4674 scan_troff_mandoc(wordlist[i], 1, NULL);
4676 else
4678 if (i>0)
4679 out_html(" "); // Put a space between flags
4680 out_html("-");
4681 scan_troff_mandoc(wordlist[i], 1, NULL);
4685 out_html(set_font("R"));
4686 out_html(NEWLINE);
4687 if (fillout)
4688 curpos++;
4689 else
4690 curpos=0;
4691 break;
4693 case REQ_Pa: /* mdoc(7) */
4694 case REQ_Pf: /* mdoc(7) */
4696 trans_char(c,'"','\a');
4697 c=c+j;
4698 if (*c=='\n') c++;
4699 c=scan_troff_mandoc(c, 1, NULL);
4700 out_html(NEWLINE);
4701 if (fillout)
4702 curpos++;
4703 else
4704 curpos=0;
4705 break;
4707 case REQ_Pp: /* mdoc(7) */
4709 if (fillout)
4710 out_html("<br><br>\n");
4711 else
4713 out_html(NEWLINE);
4715 curpos=0;
4716 c=skip_till_newline(c);
4717 break;
4719 case REQ_Aq: // mdoc(7) "Angle bracket Quote"
4720 c=process_quote(c,j,"&lt;","&gt;");
4721 break;
4722 case REQ_Bq: // mdoc(7) "Bracket Quote"
4723 c=process_quote(c,j,"[","]");
4724 break;
4725 case REQ_Dq: // mdoc(7) "Double Quote"
4726 c=process_quote(c,j,"&ldquo;","&rdquo;");
4727 break;
4728 case REQ_Pq: // mdoc(7) "Parenthese Quote"
4729 c=process_quote(c,j,"(",")");
4730 break;
4731 case REQ_Qq: // mdoc(7) "straight double Quote"
4732 c=process_quote(c,j,"&quot;","&quot;");
4733 break;
4734 case REQ_Sq: // mdoc(7) "Single Quote"
4735 c=process_quote(c,j,"&lsquo;","&rsquo;");
4736 break;
4737 case REQ_Op: /* mdoc(7) */
4739 trans_char(c,'"','\a');
4740 c=c+j;
4741 if (*c=='\n') c++;
4742 out_html(set_font("R"));
4743 out_html("[");
4744 c=scan_troff_mandoc(c, 1, NULL);
4745 out_html(set_font("R"));
4746 out_html("]");
4747 out_html(NEWLINE);
4748 if (fillout)
4749 curpos++;
4750 else
4751 curpos=0;
4752 break;
4754 case REQ_Oo: /* mdoc(7) */
4756 trans_char(c,'"','\a');
4757 c=c+j;
4758 if (*c=='\n') c++;
4759 out_html(set_font("R"));
4760 out_html("[");
4761 c=scan_troff_mandoc(c, 1, NULL);
4762 if (fillout)
4763 curpos++;
4764 else
4765 curpos=0;
4766 break;
4768 case REQ_Oc: /* mdoc(7) */
4770 trans_char(c,'"','\a');
4771 c=c+j;
4772 c=scan_troff_mandoc(c, 1, NULL);
4773 out_html(set_font("R"));
4774 out_html("]");
4775 if (fillout)
4776 curpos++;
4777 else
4778 curpos=0;
4779 break;
4781 case REQ_Ql: /* mdoc(7) */
4783 /* Single quote first word in the line */
4784 char *sp;
4785 trans_char(c,'"','\a');
4786 c=c+j;
4787 if (*c=='\n') c++;
4788 sp = c;
4791 /* Find first whitespace after the
4792 * first word that isn't a mandoc macro
4794 while (*sp && isspace(*sp)) sp++;
4795 while (*sp && !isspace(*sp)) sp++;
4796 } while (*sp && isupper(*(sp-2)) && islower(*(sp-1)));
4798 /* Use a newline to mark the end of text to
4799 * be quoted
4801 if (*sp) *sp = '\n';
4802 out_html("`"); /* Quote the text */
4803 c=scan_troff_mandoc(c, 1, NULL);
4804 out_html("'");
4805 out_html(NEWLINE);
4806 if (fillout)
4807 curpos++;
4808 else
4809 curpos=0;
4810 break;
4812 case REQ_Ar: /* mdoc(7) */
4814 /* parse one line in italics */
4815 out_html(set_font("I"));
4816 trans_char(c,'"','\a');
4817 c=c+j;
4818 if (*c=='\n')
4820 /* An empty Ar means "file ..." */
4821 out_html("file ...");
4823 else
4824 c=scan_troff_mandoc(c, 1, NULL);
4825 out_html(set_font("R"));
4826 out_html(NEWLINE);
4827 if (fillout)
4828 curpos++;
4829 else
4830 curpos=0;
4831 break;
4833 case REQ_Em: /* mdoc(7) */
4835 out_html("<em>");
4836 trans_char(c,'"','\a');
4837 c+=j;
4838 if (*c=='\n') c++;
4839 c=scan_troff_mandoc(c, 1, NULL);
4840 out_html("</em>");
4841 out_html(NEWLINE);
4842 if (fillout)
4843 curpos++;
4844 else
4845 curpos=0;
4846 break;
4848 case REQ_Ad: /* mdoc(7) */
4849 case REQ_Va: /* mdoc(7) */
4850 case REQ_Xc: /* mdoc(7) */
4852 /* parse one line in italics */
4853 out_html(set_font("I"));
4854 trans_char(c,'"','\a');
4855 c=c+j;
4856 if (*c=='\n') c++;
4857 c=scan_troff_mandoc(c, 1, NULL);
4858 out_html(set_font("R"));
4859 out_html(NEWLINE);
4860 if (fillout)
4861 curpos++;
4862 else
4863 curpos=0;
4864 break;
4866 case REQ_Nd: /* mdoc(7) */
4868 trans_char(c,'"','\a');
4869 c=c+j;
4870 if (*c=='\n') c++;
4871 out_html(" - ");
4872 c=scan_troff_mandoc(c, 1, NULL);
4873 out_html(NEWLINE);
4874 if (fillout)
4875 curpos++;
4876 else
4877 curpos=0;
4878 break;
4880 case REQ_Nm: // mdoc(7) "Name Macro" ### FIXME
4882 static char mandoc_name[NULL_TERMINATED(SMALL_STR_MAX)] = ""; // ### TODO Use QByteArray
4883 trans_char(c,'"','\a');
4884 c=c+j;
4886 if (mandoc_synopsis && mandoc_name_count)
4888 /* Break lines only in the Synopsis.
4889 * The Synopsis section seems to be treated
4890 * as a special case - Bummer!
4892 out_html("<BR>");
4894 else if (!mandoc_name_count)
4896 const char *nextbreak = strchr(c, '\n');
4897 const char *nextspace = strchr(c, ' ');
4898 if (nextspace < nextbreak)
4899 nextbreak = nextspace;
4901 if (nextbreak)
4903 /* Remember the name for later. */
4904 strlimitcpy(mandoc_name, c, nextbreak - c, SMALL_STR_MAX);
4907 mandoc_name_count++;
4909 out_html(set_font("B"));
4910 // ### FIXME: fill_words must be used
4911 while (*c == ' '|| *c == '\t') c++;
4912 if ((tolower(*c) >= 'a' && tolower(*c) <= 'z' ) || (*c >= '0' && *c <= '9'))
4914 // alphanumeric argument
4915 c=scan_troff_mandoc(c, 1, NULL);
4916 out_html(set_font("R"));
4917 out_html(NEWLINE);
4919 else
4921 /* If Nm has no argument, use one from an earlier
4922 * Nm command that did have one. Hope there aren't
4923 * too many commands that do this.
4925 out_html(mandoc_name);
4926 out_html(set_font("R"));
4929 if (fillout)
4930 curpos++;
4931 else
4932 curpos=0;
4933 break;
4935 case REQ_Cd: /* mdoc(7) */
4936 case REQ_Cm: /* mdoc(7) */
4937 case REQ_Ic: /* mdoc(7) */
4938 case REQ_Ms: /* mdoc(7) */
4939 case REQ_Or: /* mdoc(7) */
4940 case REQ_Sy: /* mdoc(7) */
4942 /* parse one line in bold */
4943 out_html(set_font("B"));
4944 trans_char(c,'"','\a');
4945 c=c+j;
4946 if (*c=='\n') c++;
4947 c=scan_troff_mandoc(c, 1, NULL);
4948 out_html(set_font("R"));
4949 out_html(NEWLINE);
4950 if (fillout)
4951 curpos++;
4952 else
4953 curpos=0;
4954 break;
4956 // ### FIXME: punctuation is handled badly!
4957 case REQ_Dv: /* mdoc(7) */
4958 case REQ_Ev: /* mdoc(7) */
4959 case REQ_Fr: /* mdoc(7) */
4960 case REQ_Li: /* mdoc(7) */
4961 case REQ_No: /* mdoc(7) */
4962 case REQ_Ns: /* mdoc(7) */
4963 case REQ_Tn: /* mdoc(7) */
4964 case REQ_nN: /* mdoc(7) */
4966 trans_char(c,'"','\a');
4967 c=c+j;
4968 if (*c=='\n') c++;
4969 out_html(set_font("B"));
4970 c=scan_troff_mandoc(c, 1, NULL);
4971 out_html(set_font("R"));
4972 out_html(NEWLINE);
4973 if (fillout)
4974 curpos++;
4975 else
4976 curpos=0;
4977 break;
4979 case REQ_perc_A: /* mdoc(7) biblio stuff */
4980 case REQ_perc_D:
4981 case REQ_perc_N:
4982 case REQ_perc_O:
4983 case REQ_perc_P:
4984 case REQ_perc_Q:
4985 case REQ_perc_V:
4987 c=c+j;
4988 if (*c=='\n') c++;
4989 c=scan_troff(c, 1, NULL); /* Don't allow embedded mandoc coms */
4990 if (fillout)
4991 curpos++;
4992 else
4993 curpos=0;
4994 break;
4996 case REQ_perc_B:
4997 case REQ_perc_J:
4998 case REQ_perc_R:
4999 case REQ_perc_T:
5001 c=c+j;
5002 out_html(set_font("I"));
5003 if (*c=='\n') c++;
5004 c=scan_troff(c, 1, NULL); /* Don't allow embedded mandoc coms */
5005 out_html(set_font("R"));
5006 if (fillout)
5007 curpos++;
5008 else
5009 curpos=0;
5010 break;
5012 case REQ_UR: // ### FIXME man(7) "URl"
5014 ignore_links=true;
5015 c+=j;
5016 char* newc;
5017 h=fill_words(c, wordlist, &words, false, &newc);
5018 *h=0;
5019 if (words>0)
5021 h=wordlist[0];
5022 // A parameter : means that we do not want an URL, not here and not until .UE
5023 ur_ignore=(!qstrcmp(h,":"));
5025 else
5027 // We cannot find the URL, assume :
5028 ur_ignore=true;
5029 h=0;
5031 if (!ur_ignore && words>0)
5033 out_html("<a href=\"");
5034 out_html(h);
5035 out_html("\">");
5037 c=newc; // Go to next line
5038 break;
5040 case REQ_UE: // ### FIXME man(7) "Url End"
5042 c+=j;
5043 c = skip_till_newline(c);
5044 if (!ur_ignore)
5046 out_html("</a>");
5048 ur_ignore=false;
5049 ignore_links=false;
5050 break;
5052 case REQ_UN: // ### FIXME man(7) "Url Named anchor"
5054 c+=j;
5055 char* newc;
5056 h=fill_words(c, wordlist, &words, false, &newc);
5057 *h=0;
5058 if (words>0)
5060 h=wordlist[0];
5061 out_html("<a name=\">");
5062 out_html(h);
5063 out_html("\" id=\"");
5064 out_html(h);
5065 out_html("\"></a>");
5067 c=newc;
5068 break;
5070 case REQ_nroff: // groff(7) "NROFF mode"
5071 mode = true;
5072 case REQ_troff: // groff(7) "TROFF mode"
5074 s_nroff = mode;
5075 c+=j;
5076 c = skip_till_newline(c);
5078 case REQ_als: // groff(7) "ALias String"
5081 * Note an alias is supposed to be something like a hard link
5082 * However to make it simplier, we only copy the string.
5084 // Be careful: unlike .rn, the destination is first, origin is second
5085 kDebug(7107) << "start .als";
5086 c+=j;
5087 const QByteArray name ( scan_identifier( c ) );
5088 if ( name.isEmpty() )
5090 kDebug(7107) << "EXCEPTION: empty destination string to alias";
5091 break;
5093 while (*c && isspace(*c) && *c!='\n') ++c;
5094 const QByteArray name2 ( scan_identifier ( c ) );
5095 if ( name2.isEmpty() )
5097 kDebug(7107) << "EXCEPTION: empty origin string to alias";
5098 break;
5100 kDebug(7107) << "Alias " << BYTEARRAY( name2 ) << " to " << BYTEARRAY( name );
5101 c=skip_till_newline(c);
5102 if ( name == name2 )
5104 kDebug(7107) << "EXCEPTION: same origin and destination string to alias: " << BYTEARRAY( name );
5105 break;
5107 // Second parameter is origin (unlike in .rn)
5108 QMap<QByteArray,StringDefinition>::iterator it=s_stringDefinitionMap.find(name2);
5109 if (it==s_stringDefinitionMap.end())
5111 kDebug(7107) << "EXCEPTION: cannot find string to make alias of " << BYTEARRAY( name2 );
5113 else
5115 StringDefinition def=(*it);
5116 s_stringDefinitionMap.insert(name,def);
5118 kDebug(7107) << "end .als";
5119 break;
5121 case REQ_rr: // groff(7) "Remove number Register"
5123 kDebug(7107) << "start .rr";
5124 c += j;
5125 const QByteArray name ( scan_identifier( c ) );
5126 if ( name.isEmpty() )
5128 kDebug(7107) << "EXCEPTION: empty origin string to remove/rename: ";
5129 break;
5131 c = skip_till_newline( c );
5132 QMap <QByteArray, NumberDefinition>::iterator it = s_numberDefinitionMap.find( name );
5133 if ( it == s_numberDefinitionMap.end() )
5135 kDebug(7107) << "EXCEPTION: trying to remove inexistant number register: ";
5137 else
5139 s_numberDefinitionMap.remove( name );
5141 kDebug(7107) << "end .rr";
5142 break;
5144 case REQ_rnn: // groff(7) "ReName Number register"
5146 kDebug(7107) << "start .rnn";
5147 c+=j;
5148 const QByteArray name ( scan_identifier ( c ) );
5149 if ( name.isEmpty() )
5151 kDebug(7107) << "EXCEPTION: empty origin to remove/rename number register";
5152 break;
5154 while (*c && isspace(*c) && *c!='\n') ++c;
5155 const QByteArray name2 ( scan_identifier ( c ) );
5156 if ( name2.isEmpty() )
5158 kDebug(7107) << "EXCEPTION: empty destination to rename number register";
5159 break;
5161 c = skip_till_newline( c );
5162 QMap<QByteArray,NumberDefinition>::iterator it=s_numberDefinitionMap.find(name);
5163 if (it==s_numberDefinitionMap.end())
5165 kDebug(7107) << "EXCEPTION: cannot find number register to rename" << BYTEARRAY( name );
5167 else
5169 NumberDefinition def=(*it);
5170 s_numberDefinitionMap.remove(name); // ### QT4: removeAll
5171 s_numberDefinitionMap.insert(name2,def);
5173 kDebug(7107) << "end .rnn";
5174 break;
5176 case REQ_aln: // groff(7) "ALias Number Register"
5179 * Note an alias is supposed to be something like a hard link
5180 * However to make it simplier, we only copy the string.
5182 // Be careful: unlike .rnn, the destination is first, origin is second
5183 kDebug(7107) << "start .aln";
5184 c+=j;
5185 const QByteArray name ( scan_identifier( c ) );
5186 if ( name.isEmpty() )
5188 kDebug(7107) << "EXCEPTION: empty destination number register to alias";
5189 break;
5191 while (*c && isspace(*c) && *c!='\n') ++c;
5192 const QByteArray name2 ( scan_identifier( c ) );
5193 if ( name2.isEmpty() )
5195 kDebug(7107) << "EXCEPTION: empty origin number register to alias";
5196 break;
5198 kDebug(7107) << "Alias " << BYTEARRAY( name2 ) << " to " << BYTEARRAY( name );
5199 c = skip_till_newline( c );
5200 if ( name == name2 )
5202 kDebug(7107) << "EXCEPTION: same origin and destination number register to alias: " << BYTEARRAY( name );
5203 break;
5205 // Second parameter is origin (unlike in .rnn)
5206 QMap<QByteArray,NumberDefinition>::iterator it=s_numberDefinitionMap.find(name2);
5207 if (it==s_numberDefinitionMap.end())
5209 kDebug(7107) << "EXCEPTION: cannot find string to make alias: " << BYTEARRAY( name2 );
5211 else
5213 NumberDefinition def=(*it);
5214 s_numberDefinitionMap.insert(name,def);
5216 kDebug(7107) << "end .aln";
5217 break;
5219 case REQ_shift: // groff(7) "SHIFT parameter"
5221 c+=j;
5222 h=c;
5223 while (*h && *h!='\n' && isdigit(*h) ) ++h;
5224 const char tempchar = *h;
5225 *h = 0;
5226 const QByteArray number( c );
5227 *h = tempchar;
5228 c = skip_till_newline( h );
5229 unsigned int result = 1; // Numbers of shifts to do
5230 if ( !number.isEmpty() )
5232 bool ok = false;
5233 result = number.toUInt(&ok);
5234 if ( !ok || result < 1 )
5235 result = 1;
5237 for ( unsigned int num = 0; num < result; ++num )
5239 if ( !s_argumentList.isEmpty() )
5240 s_argumentList.pop_front();
5242 break;
5244 case REQ_while: // groff(7) "WHILE loop"
5246 request_while( c, j, mandoc_command );
5247 break;
5249 case REQ_do: // groff(7) "DO command"
5251 // ### HACK: we just replace do by a \n and a .
5252 *c = '\n';
5253 c++;
5254 *c = '.';
5255 // The . will be treated as next character
5256 break;
5258 default:
5260 if (mandoc_command &&
5261 ((isupper(*c) && islower(*(c+1)))
5262 || (islower(*c) && isupper(*(c+1)))) )
5264 /* Let through any mdoc(7) commands that haven't
5265 * been delt with.
5266 * I don't want to miss anything out of the text.
5268 char buf[4] = { c[0], c[1], ' ', 0 };
5269 out_html(buf); /* Print the command (it might just be text). */
5270 c=c+j;
5271 trans_char(c,'"','\a');
5272 if (*c=='\n') c++;
5273 out_html(set_font("R"));
5274 c=scan_troff(c, 1, NULL);
5275 out_html(NEWLINE);
5276 if (fillout)
5277 curpos++;
5278 else
5279 curpos=0;
5281 else
5282 c=skip_till_newline(c);
5283 break;
5288 if (fillout)
5290 out_html(NEWLINE);
5291 curpos++;
5293 return c;
5296 static int contained_tab=0;
5297 static bool mandoc_line=false; /* Signals whether to look for embedded mandoc
5298 * commands.
5301 static char *scan_troff(char *c, bool san, char **result)
5302 { /* san : stop at newline */
5303 char *h;
5304 char intbuff[NULL_TERMINATED(MED_STR_MAX)];
5305 int ibp=0;
5306 #define FLUSHIBP if (ibp) { intbuff[ibp]=0; out_html(intbuff); ibp=0; }
5307 char *exbuffer;
5308 int exbuffpos, exbuffmax, exnewline_for_fun;
5309 bool exscaninbuff;
5310 int usenbsp=0;
5312 exbuffer=buffer;
5313 exbuffpos=buffpos;
5314 exbuffmax=buffmax;
5315 exnewline_for_fun=newline_for_fun;
5316 exscaninbuff=scaninbuff;
5317 newline_for_fun=0;
5318 if (result) {
5319 if (*result) {
5320 buffer=*result;
5321 buffpos=qstrlen(buffer);
5322 buffmax=buffpos;
5323 } else {
5324 buffer = stralloc(LARGE_STR_MAX);
5325 buffpos=0;
5326 buffmax=LARGE_STR_MAX;
5328 scaninbuff=true;
5330 h=c; // ### FIXME below are too many tests that may go before the position of c
5331 /* start scanning */
5333 // ### VERIFY: a dot must be at first position, we cannot add newlines or it would allow spaces before a dot
5334 while (*h == ' ')
5336 #if 1
5337 ++h;
5338 #else
5339 *h++ = '\n';
5340 #endif
5343 while (h && *h && (!san || newline_for_fun || *h!='\n')) {
5345 if (*h==escapesym) {
5346 h++;
5347 FLUSHIBP;
5348 h = scan_escape(h);
5349 } else if (*h==controlsym && h[-1]=='\n') {
5350 h++;
5351 FLUSHIBP;
5352 h = scan_request(h);
5353 if (h && san && h[-1]=='\n') h--;
5354 } else if (mandoc_line
5355 && ((*(h-1)) && (isspace(*(h-1)) || (*(h-1))=='\n'))
5356 && *(h) && isupper(*(h))
5357 && *(h+1) && islower(*(h+1))
5358 && *(h+2) && isspace(*(h+2))) {
5359 // mdoc(7) embedded command eg ".It Fl Ar arg1 Fl Ar arg2"
5360 FLUSHIBP;
5361 h = scan_request(h);
5362 if (san && h[-1]=='\n') h--;
5363 } else if (*h==nobreaksym && h[-1]=='\n') {
5364 h++;
5365 FLUSHIBP;
5366 h = scan_request(h);
5367 if (san && h[-1]=='\n') h--;
5368 } else {
5369 /* int mx; */
5370 if (still_dd && isalnum(*h) && h[-1]=='\n') {
5371 /* sometimes a .HP request is not followed by a .br request */
5372 FLUSHIBP;
5373 out_html("<DD>");
5374 curpos=0;
5375 still_dd=false;
5377 switch (*h) {
5378 case '&':
5379 intbuff[ibp++]='&';
5380 intbuff[ibp++]='a';
5381 intbuff[ibp++]='m';
5382 intbuff[ibp++]='p';
5383 intbuff[ibp++]=';';
5384 curpos++;
5385 break;
5386 case '<':
5387 intbuff[ibp++]='&';
5388 intbuff[ibp++]='l';
5389 intbuff[ibp++]='t';
5390 intbuff[ibp++]=';';
5391 curpos++;
5392 break;
5393 case '>':
5394 intbuff[ibp++]='&';
5395 intbuff[ibp++]='g';
5396 intbuff[ibp++]='t';
5397 intbuff[ibp++]=';';
5398 curpos++;
5399 break;
5400 case '"':
5401 intbuff[ibp++]='&';
5402 intbuff[ibp++]='q';
5403 intbuff[ibp++]='u';
5404 intbuff[ibp++]='o';
5405 intbuff[ibp++]='t';
5406 intbuff[ibp++]=';';
5407 curpos++;
5408 break;
5409 case '\n':
5410 if (h != c && h[-1]=='\n' && fillout) {
5411 intbuff[ibp++]='<';
5412 intbuff[ibp++]='P';
5413 intbuff[ibp++]='>';
5415 if (contained_tab && fillout) {
5416 intbuff[ibp++]='<';
5417 intbuff[ibp++]='B';
5418 intbuff[ibp++]='R';
5419 intbuff[ibp++]='>';
5421 contained_tab=0;
5422 curpos=0;
5423 usenbsp=0;
5424 intbuff[ibp++]='\n';
5425 break;
5426 case '\t':
5428 int curtab=0;
5429 contained_tab=1;
5430 FLUSHIBP;
5431 /* like a typewriter, not like TeX */
5432 tabstops[19]=curpos+1;
5433 while (curtab<maxtstop && tabstops[curtab]<=curpos)
5434 curtab++;
5435 if (curtab<maxtstop) {
5436 if (!fillout) {
5437 while (curpos<tabstops[curtab]) {
5438 intbuff[ibp++]=' ';
5439 if (ibp>480) { FLUSHIBP; }
5440 curpos++;
5442 } else {
5443 out_html("<TT>");
5444 while (curpos<tabstops[curtab]) {
5445 out_html("&nbsp;");
5446 curpos++;
5448 out_html("</TT>");
5452 break;
5453 default:
5454 if (*h==' ' && (h[-1]=='\n' || usenbsp)) {
5455 FLUSHIBP;
5456 if (!usenbsp && fillout) {
5457 out_html("<BR>");
5458 curpos=0;
5460 usenbsp=fillout;
5461 if (usenbsp) out_html("&nbsp;"); else intbuff[ibp++]=' ';
5462 } else if (*h>31 && *h<127) intbuff[ibp++]=*h;
5463 else if (((unsigned char)(*h))>127) {
5464 intbuff[ibp++]=*h;
5466 curpos++;
5467 break;
5469 if (ibp > (MED_STR_MAX - 20)) FLUSHIBP;
5470 h++;
5473 FLUSHIBP;
5474 if (buffer) buffer[buffpos]='\0';
5475 if (san && h && *h) h++;
5476 newline_for_fun=exnewline_for_fun;
5477 if (result) {
5478 *result = buffer;
5479 buffer=exbuffer;
5480 buffpos=exbuffpos;
5481 buffmax=exbuffmax;
5482 scaninbuff=exscaninbuff;
5485 return h;
5489 static char *scan_troff_mandoc(char *c, bool san, char **result)
5491 char *ret;
5492 char *end = c;
5493 bool oldval = mandoc_line;
5494 mandoc_line = true;
5495 while (*end && *end != '\n') {
5496 end++;
5499 if (end > c + 2
5500 && ispunct(*(end - 1))
5501 && isspace(*(end - 2)) && *(end - 2) != '\n') {
5502 /* Don't format lonely punctuation E.g. in "xyz ," format
5503 * the xyz and then append the comma removing the space.
5505 *(end - 2) = '\n';
5506 ret = scan_troff(c, san, result);
5507 *(end - 2) = *(end - 1);
5508 *(end - 1) = ' ';
5510 else {
5511 ret = scan_troff(c, san, result);
5513 mandoc_line = oldval;
5514 return ret;
5517 // Entry point
5518 void scan_man_page(const char *man_page)
5520 if (!man_page)
5521 return;
5523 kDebug(7107) << "Start scanning man page";
5525 // ### Do more init
5526 // Unlike man2html, we actually call this several times, hence the need to
5527 // properly cleanup all those static vars
5528 s_ifelseval.clear();
5530 s_characterDefinitionMap.clear();
5531 InitCharacterDefinitions();
5533 s_stringDefinitionMap.clear();
5534 InitStringDefinitions();
5536 s_numberDefinitionMap.clear();
5537 InitNumberDefinitions();
5539 s_argumentList.clear();
5541 section = 0;
5543 s_dollarZero = ""; // No macro called yet!
5545 output_possible = false;
5546 int strLength = qstrlen(man_page);
5547 char *buf = new char[strLength + 2];
5548 qstrcpy(buf+1, man_page);
5549 buf[0] = '\n';
5551 kDebug(7107) << "Parse man page";
5553 scan_troff(buf+1,0,NULL);
5555 kDebug(7107) << "Man page parsed!";
5557 while (itemdepth || dl_set[itemdepth]) {
5558 out_html("</DL>\n");
5559 if (dl_set[itemdepth]) dl_set[itemdepth]=0;
5560 else if (itemdepth > 0) itemdepth--;
5563 out_html(set_font("R"));
5564 out_html(change_to_size(0));
5565 if (!fillout) {
5566 fillout=1;
5567 out_html("</PRE>");
5569 out_html(NEWLINE);
5571 if (section) {
5572 output_real("</div><div style=\"margin-left: 2cm\">\n");
5573 section = 0;
5576 if (output_possible) {
5577 // The output is buggy wrt to how divs are handled. Fixing it would
5578 // require closing divs before other block-level elements are output,
5579 // and I do not feel like going to find them all.
5580 output_real("</div></div></div></div>\n");
5582 output_real("<div id=\"footer\"><div id=\"footer_text\">\n");
5583 #ifdef SIMPLE_MAN2HTML
5584 output_real("Generated by kio_man");
5585 #else
5586 output_real("Generated by kio_man, KDE version " KDE_VERSION_STRING);
5587 #endif
5588 output_real("</div></div>\n\n");
5590 output_real("</BODY>\n</HTML>\n");
5592 delete [] buf;
5594 // Release memory
5595 s_characterDefinitionMap.clear();
5596 s_stringDefinitionMap.clear();
5597 s_numberDefinitionMap.clear();
5598 s_argumentList.clear();
5600 // reinit static variables for reuse
5601 delete [] buffer;
5602 buffer = 0;
5604 escapesym='\\';
5605 nobreaksym='\'';
5606 controlsym='.';
5607 fieldsym=0;
5608 padsym=0;
5610 buffpos=0;
5611 buffmax=0;
5612 scaninbuff=false;
5613 itemdepth=0;
5614 for (int i = 0; i < 20; i++)
5615 dl_set[i] = 0;
5616 still_dd=false;
5617 for (int i = 0; i < 12; i++)
5618 tabstops[i] = (i+1)*8;
5619 maxtstop=12;
5620 curpos=0;
5622 mandoc_name_count = 0;
5625 #ifdef SIMPLE_MAN2HTML
5626 void output_real(const char *insert)
5628 cout << insert;
5631 char *read_man_page(const char *filename)
5633 int man_pipe = 0;
5634 char *man_buf = NULL;
5636 FILE *man_stream = NULL;
5637 struct stat stbuf;
5638 size_t buf_size;
5639 if (stat(filename, &stbuf) == -1) {
5640 std::cerr << "read_man_page: can not find " << filename << endl;
5641 return NULL;
5643 if (!S_ISREG(stbuf.st_mode)) {
5644 std::cerr << "read_man_page: no file " << filename << endl;
5645 return NULL;
5647 buf_size = stbuf.st_size;
5648 man_buf = stralloc(buf_size+5);
5649 man_pipe = 0;
5650 man_stream = fopen(filename, "r");
5651 if (man_stream) {
5652 man_buf[0] = '\n';
5653 if (fread(man_buf+1, 1, buf_size, man_stream) == buf_size) {
5654 man_buf[buf_size] = '\n';
5655 man_buf[buf_size + 1] = man_buf[buf_size + 2] = '\0';
5657 else {
5658 man_buf = NULL;
5660 fclose(man_stream);
5662 return man_buf;
5665 #ifndef KIO_MAN_TEST
5666 int main(int argc, char **argv)
5668 cssPath = ".";
5669 if (argc < 2) {
5670 std::cerr << "call: " << argv[0] << " <filename>\n";
5671 return 1;
5673 if (chdir(argv[1])) {
5674 char *buf = read_man_page(argv[1]);
5675 if (buf) {
5676 scan_man_page(buf);
5677 delete [] buf;
5679 } else {
5680 DIR *dir = opendir(".");
5681 struct dirent *ent;
5682 while ((ent = readdir(dir)) != NULL) {
5683 cerr << "converting " << ent->d_name << endl;
5684 char *buf = read_man_page(ent->d_name);
5685 if (buf) {
5686 scan_man_page(buf);
5687 delete [] buf;
5690 closedir(dir);
5692 return 0;
5694 #endif
5697 #endif
5699 // kate: space-indent on; indent-width 4; replace-tabs on;