1 /***********************************************************************
3 * This software is part of the ast package *
4 * Copyright (c) 2000-2009 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Common Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
9 * A copy of the License is available at *
10 * http://www.opensource.org/licenses/cpl1.0.txt *
11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
13 * Information and Software Systems Research *
17 * Glenn Fowler <gsf@research.att.com> *
19 ***********************************************************************/
26 static const char usage
[] =
27 "[-?\n@(#)$Id: msgcvt (AT&T Research) 2000-05-01 $\n]"
29 "[+NAME?msgcvt - convert message file to/from html]"
30 "[+DESCRIPTION?\bmsgcvt\b reads a \bgencat\b(1) format file on the standard"
31 " input and converts it to \bhtml\b on the standard output. The input"
32 " file must contain the control statement \b$quote \"\b and use the \""
33 " character to quote message text. The output is in a form suitable for"
34 " automatic translation by web sites like"
35 " \bhttp://babelfish.altavista.com/\b or filters like"
37 "[h:html?Generate \bhtml\b from \bgencat\b(1) input. This is the default.]"
38 "[m:msg?Generate a \bgencat\b(1) message file from (presumably translated)"
39 " \bhtml\b. Wide characters are UTF-8 encoded.]"
40 "[r:raw?The message file is raw message text, one message per line, with no"
41 " quoting or line numbering.]"
42 "[+SEE ALSO?\bgencat\b(1), \bmsgcc\b(1), \bmsggen\b(1), \btranslate\b(1)]"
49 #define MSG_RAW (1<<0)
50 #define MSG_SPLICE (1<<1)
52 #define SPACE(s) (isspace(*s)&&(s+=1)||*s=='\\'&&(*(s+1)=='n'||*(s+1)=='t')&&(s+=2))
54 typedef void (*Convert_f
)(Sfio_t
*, Sfio_t
*, int);
62 static const Code_t codes
[] =
136 if ((c
= sfgetc(ip
)) == EOF
)
140 if (c
!= '#' && !isalpha(c
))
142 while ((c
= sfgetc(ip
)) != EOF
&& c
!= ';')
149 if (!isalnum(c
) && (i
> 1 || c
!= '#') || i
>= (elementsof(name
) - 1))
156 switch (c
= strtol(name
+ 1, NiL
, 10))
168 for (i
= 0; i
< elementsof(codes
); i
++)
169 if (streq(codes
[i
].name
, name
))
174 if (i
>= elementsof(codes
))
181 error(1, "&%s: unknown HTML special character -- & assumed", name
);
183 error(1, "&%s: invalid HTML special character -- & assumed", name
);
185 sfungetc(ip
, name
[i
]);
190 sfpututf(Sfio_t
* op
, register int w
)
193 return sfputc(op
, w
);
194 else if (!(w
& ~0x7FF))
195 sfputc(op
, 0xC0 + (w
>> 6));
196 else if (!(w
& ~0xFFFF))
198 sfputc(op
, 0xE0 + (w
>> 12));
199 sfputc(op
, 0x80 + (w
>> 6 ) & 0x3F);
202 return sfputc(op
, '?');
203 return sfputc(op
, 0x80 + (w
& 0x3F));
211 while (isspace(c
= sfgetc(ip
)));
216 html2msg(register Sfio_t
* ip
, register Sfio_t
* op
, int flags
)
222 while ((c
= sfgetc(ip
)) != EOF
)
225 if ((c
= sfnext(ip
)) == 'O' &&
226 (c
= sfnext(ip
)) == 'L' &&
227 isspace(c
= sfgetc(ip
)) &&
228 (c
= sfnext(ip
)) == 'S' &&
229 (c
= sfnext(ip
)) == 'T' &&
230 (c
= sfnext(ip
)) == 'A' &&
231 (c
= sfnext(ip
)) == 'R' &&
232 (c
= sfnext(ip
)) == 'T' &&
233 (c
= sfnext(ip
)) == '=' &&
234 (c
= sfnext(ip
)) == '"' &&
235 (c
= sfnext(ip
)) == '5' &&
236 (c
= sfnext(ip
)) == '5' &&
237 (c
= sfnext(ip
)) == '0' &&
238 (c
= sfnext(ip
)) == '7' &&
239 (c
= sfnext(ip
)) == '1' &&
240 (c
= sfnext(ip
)) == '7' &&
241 (c
= sfnext(ip
)) == '"' &&
242 (c
= sfnext(ip
)) == '>')
244 while (c
!= EOF
&& c
!= '>')
247 if ((c
= sfnext(ip
)) != EOF
)
252 switch (c
= sfgetc(ip
))
261 while (isspace(c
= sfgetc(ip
)));
268 switch (c
= sfnext(ip
))
271 if ((c
= sfnext(ip
)) == 'O' &&
272 (c
= sfgetc(ip
)) == 'L' &&
273 (c
= sfnext(ip
)) == '>')
284 if ((c
= sfgetc(ip
)) == 'R' &&
285 (c
= sfnext(ip
)) == '>')
289 if ((c
= sfgetc(ip
)) == 'I' &&
290 (c
= sfnext(ip
)) == '>' &&
291 isdigit(c
= sfnext(ip
)))
301 } while (isdigit(c
= sfgetc(ip
)));
309 (c
= sfnext(ip
)) == 'L' &&
310 (c
= sfgetc(ip
)) == 'I' &&
311 (c
= sfnext(ip
)) == '>')
317 if ((c
= sfnext(ip
)) == '>')
320 (c
= sfgetc(ip
)) == 'L' &&
321 (c
= sfgetc(ip
)) == 'A' &&
322 (c
= sfgetc(ip
)) == 'S' &&
323 (c
= sfgetc(ip
)) == 'S' &&
324 (c
= sfnext(ip
)) == '=' &&
325 (c
= sfnext(ip
)) == '"')
328 switch (c
= sfgetc(ip
))
345 while (c
!= EOF
&& c
!= '>')
347 if (c
== EOF
|| (c
= sfgetc(ip
)) == EOF
)
365 while ((c
= sfgetc(ip
)) != EOF
)
374 else if (!isspace(c
))
383 if (c
!= 'L' && c
!= '/')
411 encode(Sfio_t
* op
, register int c
)
414 sfprintf(op
, "<");
416 sfprintf(op
, ">");
418 sfprintf(op
, """);
420 sfprintf(op
, "&");
422 sfprintf(op
, "[");
424 sfprintf(op
, "]");
430 msg2html(register Sfio_t
* ip
, register Sfio_t
* op
, register int flags
)
437 sfprintf(op
, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\"><HTML><HEAD><!-- text massaged for external translation --></HEAD><BODY>\n");
438 sfprintf(op
, "<OL START=\"550717\">\n");
440 while (s
= sfgetr(ip
, '\n', 1))
453 sfprintf(op
, "<P CLASS=\"", s
);
456 sfprintf(op
, "\">\n");
462 sfprintf(op
, "<LI>");
463 while (isdigit(c
= *s
++))
465 sfprintf(op
, "<LI>");
466 while (c
&& c
!= '"')
470 else if (isspace(*s
))
473 sfprintf(op
, "<BR>");
481 flags
&= ~MSG_SPLICE
;
490 sfprintf(op
, "<");
493 sfprintf(op
, ">");
496 sfprintf(op
, "&");
499 sfprintf(op
, "[");
502 sfprintf(op
, "]");
508 sfprintf(op
, "<P CLASS=\"");
511 while (isalnum(c
= *s
++))
519 sfprintf(op
, "<P CLASS=\"");
527 if (!(c
= *s
++) || c
== '"')
533 } while (!isalpha(c
) || (!islower(c
) || c
== 'h' || c
== 'l') && isalpha(*s
));
535 sfprintf(op
, " ");
538 if (!(flags
& MSG_RAW
))
552 sfprintf(op
, "<P CLASS=\"");
559 sfprintf(op
, " ");
567 if (c
!= 'n' && c
!= 't')
572 sfprintf(op
, "<P CLASS=\"");
580 if (!(c
= *s
++) || c
== '"')
601 if (*s
== 'a' || *s
== 'b' || *s
== '0')
611 else if (isdigit(c
) && isdigit(*s
))
618 sfprintf(op
, " ");
624 while (isspace(*s
) || *s
== '\\' && (*(s
+ 1) == 'n' || *(s
+ 1) == 't') && s
++)
631 sfprintf(op
, " \">");
634 sfprintf(op
, "<BR>");
651 sfprintf(op
, "</OL>\n");
652 sfprintf(op
, "</BODY></HTML>\n");
657 main(int argc
, char** argv
)
660 Convert_f convert
= msg2html
;
663 error_info
.id
= "msgcvt";
666 switch (optget(argv
, usage
))
678 error(ERROR_USAGE
|4, "%s", opt_info
.arg
);
681 error(2, "%s", opt_info
.arg
);
686 argv
+= opt_info
.index
;
687 if (error_info
.errors
)
688 error(ERROR_USAGE
|4, "%s", optusage(NiL
));
689 (*convert
)(sfstdin
, sfstdout
, flags
);
690 return error_info
.errors
!= 0;