1 /* $NetBSD: idnconv.c,v 1.4 2014/12/10 04:37:56 christos Exp $ */
4 static char *rcsid
= "Id: idnconv.c,v 1.1 2003/06/04 00:27:07 marka Exp ";
8 * Copyright (c) 2000,2001,2002 Japan Network Information Center.
11 * By using this file, you agree to the terms and conditions set forth bellow.
13 * LICENSE TERMS AND CONDITIONS
15 * The following License Terms and Conditions apply, unless a different
16 * license is obtained from Japan Network Information Center ("JPNIC"),
17 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
18 * Chiyoda-ku, Tokyo 101-0047, Japan.
20 * 1. Use, Modification and Redistribution (including distribution of any
21 * modified or derived work) in source and/or binary forms is permitted
22 * under this License Terms and Conditions.
24 * 2. Redistribution of source code must retain the copyright notices as they
25 * appear in each source code file, this License Terms and Conditions.
27 * 3. Redistribution in binary form must reproduce the Copyright Notice,
28 * this License Terms and Conditions, in the documentation and/or other
29 * materials provided with the distribution. For the purposes of binary
30 * distribution the "Copyright Notice" refers to the following language:
31 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
33 * 4. The name of JPNIC may not be used to endorse or promote products
34 * derived from this Software without specific prior written approval of
37 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
40 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
42 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
43 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
44 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
45 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
46 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
51 * idnconv -- Codeset converter for named.conf and zone files
65 #include <idn/result.h>
66 #include <idn/converter.h>
67 #include <idn/normalizer.h>
69 #include <idn/resconf.h>
72 #include <idn/version.h>
76 #define MAX_DELIMITER 10
77 #define MAX_LOCALMAPPER 10
79 #define MAX_NORMALIZER 10
82 #define FLAG_REVERSE 0x0001
83 #define FLAG_DELIMMAP 0x0002
84 #define FLAG_LOCALMAP 0x0004
85 #define FLAG_MAP 0x0008
86 #define FLAG_NORMALIZE 0x0010
87 #define FLAG_PROHIBITCHECK 0x0020
88 #define FLAG_UNASSIGNCHECK 0x0040
89 #define FLAG_BIDICHECK 0x0080
90 #define FLAG_ASCIICHECK 0x0100
91 #define FLAG_LENGTHCHECK 0x0200
92 #define FLAG_ROUNDTRIPCHECK 0x0400
93 #define FLAG_SELECTIVE 0x0800
95 #define FLAG_NAMEPREP \
96 (FLAG_MAP|FLAG_NORMALIZE|FLAG_PROHIBITCHECK|FLAG_UNASSIGNCHECK|\
99 #define DEFAULT_FLAGS \
100 (FLAG_LOCALMAP|FLAG_NAMEPREP|FLAG_ASCIICHECK|FLAG_LENGTHCHECK|\
101 FLAG_ROUNDTRIPCHECK|FLAG_SELECTIVE|FLAG_DELIMMAP)
103 int line_number
; /* current input file line number */
104 static int flush_every_line
= 0; /* pretty obvious */
106 static int encode_file(idn_resconf_t conf1
, idn_resconf_t conf2
,
107 FILE *fp
, int flags
);
108 static int decode_file(idn_resconf_t conf1
, idn_resconf_t conf2
,
109 FILE *fp
, int flags
);
110 static int trim_newline(idnconv_strbuf_t
*buf
);
111 static idn_result_t
convert_line(idnconv_strbuf_t
*from
,
112 idnconv_strbuf_t
*to
,
114 idn_action_t actions
, int flags
);
115 static void print_usage(char *cmd
);
116 static void print_version(void);
117 static unsigned long get_ucs(const char *p
);
120 main(int ac
, char **av
) {
123 unsigned long delimiters
[MAX_DELIMITER
];
124 char *localmappers
[MAX_LOCALMAPPER
];
125 char *nameprep_version
= NULL
;
127 int nlocalmappers
= 0;
128 char *in_code
= NULL
;
129 char *out_code
= NULL
;
130 char *resconf_file
= NULL
;
132 char *encoding_alias
= NULL
;
133 int flags
= DEFAULT_FLAGS
;
136 idn_resconf_t resconf1
, resconf2
;
137 idn_converter_t conv
;
140 #ifdef HAVE_SETLOCALE
141 (void)setlocale(LC_ALL
, "");
145 * If the command name begins with 'r', reverse mode is assumed.
147 if ((cname
= strrchr(cmd
, '/')) != NULL
)
152 flags
|= FLAG_REVERSE
;
156 while (ac
> 0 && **av
== '-') {
158 #define OPT_MATCH(opt) (strcmp(*av, opt) == 0)
159 #define MUST_HAVE_ARG if (ac < 2) print_usage(cmd)
160 #define APPEND_LIST(array, size, item, what) \
161 if (size >= (sizeof(array) / sizeof(array[0]))) { \
162 errormsg("too many " what "\n"); \
165 array[size++] = item; \
168 if (OPT_MATCH("-in") || OPT_MATCH("-i")) {
173 } else if (OPT_MATCH("-out") || OPT_MATCH("-o")) {
178 } else if (OPT_MATCH("-conf") || OPT_MATCH("-c")) {
180 resconf_file
= av
[1];
183 } else if (OPT_MATCH("-nameprep") || OPT_MATCH("-n")) {
185 nameprep_version
= av
[1];
188 } else if (OPT_MATCH("-noconf") || OPT_MATCH("-C")) {
190 } else if (OPT_MATCH("-reverse") || OPT_MATCH("-r")) {
191 flags
|= FLAG_REVERSE
;
192 } else if (OPT_MATCH("-nolocalmap") || OPT_MATCH("-L")) {
193 flags
&= ~FLAG_LOCALMAP
;
194 } else if (OPT_MATCH("-nonameprep") || OPT_MATCH("-N")) {
195 flags
&= ~FLAG_NAMEPREP
;
196 } else if (OPT_MATCH("-unassigncheck") || OPT_MATCH("-u")) {
197 flags
|= FLAG_UNASSIGNCHECK
;
198 } else if (OPT_MATCH("-nounassigncheck") || OPT_MATCH("-U")) {
199 flags
&= ~FLAG_UNASSIGNCHECK
;
200 } else if (OPT_MATCH("-nobidicheck") || OPT_MATCH("-B")) {
201 flags
&= ~FLAG_BIDICHECK
;
202 } else if (OPT_MATCH("-noasciicheck") || OPT_MATCH("-A")) {
203 flags
&= ~FLAG_ASCIICHECK
;
204 } else if (OPT_MATCH("-nolengthcheck")) {
205 flags
&= ~FLAG_LENGTHCHECK
;
206 } else if (OPT_MATCH("-noroundtripcheck")) {
207 flags
&= ~FLAG_ROUNDTRIPCHECK
;
208 } else if (OPT_MATCH("-whole") || OPT_MATCH("-w")) {
209 flags
&= ~FLAG_SELECTIVE
;
210 } else if (OPT_MATCH("-localmap")) {
212 APPEND_LIST(localmappers
, nlocalmappers
, av
[1],
214 } else if (OPT_MATCH("-delimiter")) {
218 APPEND_LIST(delimiters
, ndelimiters
, v
,
220 } else if (OPT_MATCH("-alias") || OPT_MATCH("-a")) {
222 encoding_alias
= av
[1];
225 } else if (OPT_MATCH("-flush")) {
226 flush_every_line
= 1;
227 } else if (OPT_MATCH("-version") || OPT_MATCH("-v")) {
244 if ((r
= idn_resconf_initialize()) != idn_success
) {
245 errormsg("error initializing library\n");
250 * Create resource contexts.
251 * `resconf1' and `resconf2' are almost the same but local and
252 * IDN encodings are reversed.
256 if (idn_resconf_create(&resconf1
) != idn_success
||
257 idn_resconf_create(&resconf2
) != idn_success
) {
258 errormsg("error initializing configuration contexts\n");
262 /* Load configuration file. */
264 set_defaults(resconf1
);
265 set_defaults(resconf2
);
267 load_conf_file(resconf1
, resconf_file
);
268 load_conf_file(resconf2
, resconf_file
);
271 /* Set encoding alias file. */
272 if (encoding_alias
!= NULL
)
273 set_encoding_alias(encoding_alias
);
275 /* Set input codeset. */
276 if (flags
& FLAG_REVERSE
) {
277 if (in_code
== NULL
) {
278 conv
= idn_resconf_getidnconverter(resconf1
);
280 errormsg("cannot get the IDN encoding.\n"
281 "please specify an appropriate one "
282 "with `-in' option.\n");
285 idn_resconf_setlocalconverter(resconf2
, conv
);
286 idn_converter_destroy(conv
);
288 set_idncode(resconf1
, in_code
);
289 set_localcode(resconf2
, in_code
);
292 if (in_code
== NULL
) {
293 conv
= idn_resconf_getlocalconverter(resconf1
);
295 errormsg("cannot get the local encoding.\n"
296 "please specify an appropriate one "
297 "with `-in' option.\n");
300 idn_resconf_setidnconverter(resconf2
, conv
);
301 idn_converter_destroy(conv
);
303 set_localcode(resconf1
, in_code
);
304 set_idncode(resconf2
, in_code
);
308 /* Set output codeset. */
309 if (flags
& FLAG_REVERSE
) {
310 if (out_code
== NULL
) {
311 conv
= idn_resconf_getlocalconverter(resconf1
);
313 errormsg("cannot get the local encoding.\n"
314 "please specify an appropriate one "
315 "with `-out' option.\n");
318 idn_resconf_setidnconverter(resconf2
, conv
);
319 idn_converter_destroy(conv
);
321 set_localcode(resconf1
, out_code
);
322 set_idncode(resconf2
, out_code
);
325 if (out_code
== NULL
) {
326 conv
= idn_resconf_getidnconverter(resconf1
);
328 errormsg("cannot get the IDN encoding.\n"
329 "please specify an appropriate one "
330 "with `-out' option.\n");
333 idn_resconf_setlocalconverter(resconf2
, conv
);
334 idn_converter_destroy(conv
);
336 set_idncode(resconf1
, out_code
);
337 set_localcode(resconf2
, out_code
);
341 /* Set delimiter map(s). */
342 if (ndelimiters
> 0) {
343 set_delimitermapper(resconf1
, delimiters
, ndelimiters
);
344 set_delimitermapper(resconf2
, delimiters
, ndelimiters
);
347 /* Set local map(s). */
348 if (nlocalmappers
> 0) {
349 set_localmapper(resconf1
, localmappers
, nlocalmappers
);
350 set_localmapper(resconf2
, localmappers
, nlocalmappers
);
353 /* Set NAMEPREP version. */
354 if (nameprep_version
!= NULL
) {
355 set_nameprep(resconf1
, nameprep_version
);
356 set_nameprep(resconf2
, nameprep_version
);
361 /* Open input file. */
363 if ((fp
= fopen(av
[0], "r")) == NULL
) {
364 errormsg("cannot open file %s: %s\n",
365 av
[0], strerror(errno
));
372 /* Do the conversion. */
373 if (flags
& FLAG_REVERSE
)
374 exit_value
= decode_file(resconf1
, resconf2
, fp
, flags
);
376 exit_value
= encode_file(resconf1
, resconf2
, fp
, flags
);
378 idn_resconf_destroy(resconf1
);
379 idn_resconf_destroy(resconf2
);
385 encode_file(idn_resconf_t conf1
, idn_resconf_t conf2
, FILE *fp
, int flags
) {
387 idnconv_strbuf_t buf1
, buf2
;
388 idn_action_t actions1
, actions2
;
391 idn_converter_t conv
;
394 * See if the input codeset is an ACE.
396 conv
= idn_resconf_getlocalconverter(conf1
);
397 if (conv
!= NULL
&& idn_converter_isasciicompatible(conv
) &&
398 (flags
& FLAG_SELECTIVE
))
403 idn_converter_destroy(conv
);
405 if (local_ace_hack
) {
406 actions1
= IDN_IDNCONV
;
407 if (flags
& FLAG_ROUNDTRIPCHECK
)
408 actions1
|= IDN_RTCHECK
;
410 actions1
= IDN_LOCALCONV
;
413 actions2
= IDN_IDNCONV
;
414 if (flags
& FLAG_DELIMMAP
)
415 actions2
|= IDN_DELIMMAP
;
416 if (flags
& FLAG_LOCALMAP
)
417 actions2
|= IDN_LOCALMAP
;
418 if (flags
& FLAG_MAP
)
420 if (flags
& FLAG_NORMALIZE
)
421 actions2
|= IDN_NORMALIZE
;
422 if (flags
& FLAG_PROHIBITCHECK
)
423 actions2
|= IDN_PROHCHECK
;
424 if (flags
& FLAG_UNASSIGNCHECK
)
425 actions2
|= IDN_UNASCHECK
;
426 if (flags
& FLAG_BIDICHECK
)
427 actions2
|= IDN_BIDICHECK
;
428 if (flags
& FLAG_ASCIICHECK
)
429 actions2
|= IDN_ASCCHECK
;
430 if (flags
& FLAG_LENGTHCHECK
)
431 actions2
|= IDN_LENCHECK
;
436 while (strbuf_getline(&buf1
, fp
) != NULL
) {
438 * Trim newline at the end. This is needed for
439 * those ascii-comatible encodings such as UTF-5 or RACE
440 * not to try converting newlines, which will result
441 * in `invalid encoding' error.
443 nl_trimmed
= trim_newline(&buf1
);
446 * Convert input line to UTF-8.
449 r
= convert_line(&buf1
, &buf2
, conf2
, actions1
,
450 FLAG_REVERSE
|FLAG_SELECTIVE
);
452 r
= convert_line(&buf1
, &buf2
, conf1
, actions1
,
455 if (r
!= idn_success
) {
456 errormsg("conversion failed at line %d: %s\n",
458 idn_result_tostring(r
));
461 if (!idn_utf8_isvalidstring(strbuf_get(&buf2
))) {
462 errormsg("conversion to utf-8 failed at line %d\n",
468 * Perform local mapping and NAMEPREP, and convert to
469 * the output codeset.
471 r
= convert_line(&buf2
, &buf1
, conf1
, actions2
,
472 flags
& FLAG_SELECTIVE
);
474 if (r
!= idn_success
) {
475 errormsg("error in nameprep or output conversion "
477 line_number
, idn_result_tostring(r
));
481 fputs(strbuf_get(&buf1
), stdout
);
485 if (flush_every_line
)
502 decode_file(idn_resconf_t conf1
, idn_resconf_t conf2
, FILE *fp
, int flags
) {
504 idnconv_strbuf_t buf1
, buf2
;
505 idn_action_t actions1
, actions2
;
507 int local_ace_hack
, idn_ace_hack
;
508 idn_converter_t conv
;
511 * See if the input codeset is an ACE.
513 conv
= idn_resconf_getidnconverter(conf1
);
514 if (conv
!= NULL
&& idn_converter_isasciicompatible(conv
) &&
515 (flags
& FLAG_SELECTIVE
))
520 idn_converter_destroy(conv
);
522 conv
= idn_resconf_getlocalconverter(conf1
);
523 if (conv
!= NULL
&& idn_converter_isasciicompatible(conv
) &&
524 (flags
& FLAG_SELECTIVE
))
529 idn_converter_destroy(conv
);
531 actions1
= IDN_IDNCONV
;
533 if (local_ace_hack
) {
534 actions2
= IDN_IDNCONV
;
535 if (flags
& FLAG_MAP
)
537 if (flags
& FLAG_NORMALIZE
)
538 actions2
|= IDN_NORMALIZE
;
539 if (flags
& FLAG_PROHIBITCHECK
)
540 actions2
|= IDN_PROHCHECK
;
541 if (flags
& FLAG_UNASSIGNCHECK
)
542 actions2
|= IDN_UNASCHECK
;
543 if (flags
& FLAG_BIDICHECK
)
544 actions2
|= IDN_BIDICHECK
;
545 if (flags
& FLAG_ASCIICHECK
)
546 actions2
|= IDN_ASCCHECK
;
547 if (flags
& FLAG_LENGTHCHECK
)
548 actions2
|= IDN_LENCHECK
;
550 actions2
= IDN_LOCALCONV
;
553 if (flags
& FLAG_DELIMMAP
)
554 actions1
|= IDN_DELIMMAP
;
555 if (flags
& FLAG_MAP
)
557 if (flags
& FLAG_NORMALIZE
)
558 actions1
|= IDN_NORMALIZE
;
559 if (flags
& FLAG_NORMALIZE
)
560 actions1
|= IDN_NORMALIZE
;
561 if (flags
& FLAG_PROHIBITCHECK
)
562 actions1
|= IDN_PROHCHECK
;
563 if (flags
& FLAG_UNASSIGNCHECK
)
564 actions1
|= IDN_UNASCHECK
;
565 if (flags
& FLAG_BIDICHECK
)
566 actions1
|= IDN_BIDICHECK
;
567 if (flags
& FLAG_ASCIICHECK
)
568 actions1
|= IDN_ASCCHECK
;
569 if (flags
& FLAG_ROUNDTRIPCHECK
)
570 actions1
|= IDN_RTCHECK
;
575 while (strbuf_getline(&buf1
, fp
) != NULL
) {
577 * Trim newline at the end. This is needed for
578 * those ascii-comatible encodings such as UTF-5 or RACE
579 * not to try converting newlines, which will result
580 * in `invalid encoding' error.
582 nl_trimmed
= trim_newline(&buf1
);
585 * Treat input line as the string encoded in local
586 * encoding and convert it to UTF-8 encoded string.
588 if (local_ace_hack
) {
589 if (strbuf_copy(&buf2
, strbuf_get(&buf1
)) == NULL
)
594 r
= convert_line(&buf1
, &buf2
, conf1
, IDN_LOCALCONV
,
597 if (r
!= idn_success
) {
598 errormsg("conversion failed at line %d: %s\n",
599 line_number
, idn_result_tostring(r
));
604 * Convert internationalized domain names in the line.
607 r
= convert_line(&buf2
, &buf1
, conf1
, actions1
,
608 FLAG_REVERSE
|FLAG_SELECTIVE
);
610 r
= convert_line(&buf2
, &buf1
, conf1
, actions1
,
613 if (r
!= idn_success
) {
614 errormsg("conversion failed at line %d: %s\n",
616 idn_result_tostring(r
));
619 if (!idn_utf8_isvalidstring(strbuf_get(&buf1
))) {
620 errormsg("conversion to utf-8 failed at line %d\n",
626 * Perform round trip check and convert to the output
629 if (local_ace_hack
) {
630 r
= convert_line(&buf1
, &buf2
, conf2
, actions2
,
633 r
= convert_line(&buf1
, &buf2
, conf1
, actions2
,
637 if (r
!= idn_success
) {
638 errormsg("error in nameprep or output conversion "
640 line_number
, idn_result_tostring(r
));
644 fputs(strbuf_get(&buf2
), stdout
);
648 if (flush_every_line
)
664 trim_newline(idnconv_strbuf_t
*buf
) {
666 * If the string in BUF ends with a newline, trim it and
667 * return 1. Otherwise, just return 0 without modifying BUF.
669 char *s
= strbuf_get(buf
);
670 size_t len
= strlen(s
);
672 if (s
[len
- 1] == '\n') {
681 convert_line(idnconv_strbuf_t
*from
, idnconv_strbuf_t
*to
,
682 idn_resconf_t conf
, idn_action_t actions
, int flags
)
684 idn_result_t r
= idn_success
;
685 char *from_str
= strbuf_get(from
);
688 char *to_str
= strbuf_get(to
);
689 size_t to_size
= strbuf_size(to
);
691 switch (flags
& (FLAG_REVERSE
|FLAG_SELECTIVE
)) {
693 r
= idn_res_encodename(conf
, actions
, from_str
,
697 r
= idn_res_decodename(conf
, actions
, from_str
,
701 r
= selective_encode(conf
, actions
, from_str
,
704 case FLAG_REVERSE
|FLAG_SELECTIVE
:
705 r
= selective_decode(conf
, actions
, from_str
,
709 if (r
== idn_buffer_overflow
) {
711 * Conversion is not successful because
712 * the size of the target buffer is not enough.
713 * Double the size and retry.
715 if (strbuf_double(to
) == NULL
) {
716 /* oops. allocation failed. */
717 return (idn_nomemory
);
726 static char *options
[] = {
727 "-in INPUT-CODESET : specifies input codeset name.",
728 "-i INPUT-CODESET : synonym for -in",
729 "-out OUTPUT-CODESET : specifies output codeset name.",
730 "-o OUTPUT-CODESET : synonym for -out",
731 "-conf CONF-FILE : specifies idnkit configuration file.",
732 "-c CONF-FILE : synonym for -conf",
733 "-noconf : do not load idnkit configuration file.",
734 "-C : synonym for -noconf",
735 "-reverse : specifies reverse conversion.",
736 " (i.e. IDN encoding to local encoding)",
737 "-r : synonym for -reverse",
738 "-nameprep VERSION : specifies version name of NAMEPREP.",
739 "-n VERSION : synonym for -nameprep",
740 "-nonameprep : do not perform NAMEPREP.",
741 "-N : synonym for -nonameprep",
742 "-localmap MAPPING : specifies local mapping.",
743 "-nolocalmap : do not perform local mapping.",
744 "-L : synonym for -nolocalmap",
745 "-nounassigncheck : do not perform unassigned codepoint check.",
746 "-U : synonym for -nounassigncheck",
747 "-nobidicheck : do not perform bidirectional text check.",
748 "-B : synonym for -nobidicheck",
749 "-nolengthcheck : do not check label length.",
750 "-noasciicheck : do not check ASCII range characters.",
751 "-A : synonym for -noasciicheck",
752 "-noroundtripcheck : do not perform round trip check.",
753 "-delimiter U+XXXX : specifies local delimiter code point.",
754 "-alias alias-file : specifies codeset alias file.",
755 "-a : synonym for -alias",
756 "-flush : line-buffering mode.",
757 "-whole : convert the whole region instead of",
758 " regions containing non-ascii characters.",
759 "-w : synonym for -whole",
760 "-version : print version number, then exit.",
761 "-v : synonym for -version",
763 " The following options can be specified multiple times",
764 " -localmap, -delimiter",
770 fprintf(stderr
, "idnconv (idnkit) version: %s\n"
771 "library version: %s\n",
773 idn_version_getstring());
778 print_usage(char *cmd
) {
781 fprintf(stderr
, "Usage: %s [options..] [file]\n", cmd
);
783 for (i
= 0; options
[i
] != NULL
; i
++)
784 fprintf(stderr
, "\t%s\n", options
[i
]);
790 get_ucs(const char *p
) {
794 /* Skip optional 'U+' */
795 if (strncmp(p
, "U+", 2) == 0)
798 v
= strtoul(p
, &end
, 16);
800 fprintf(stderr
, "invalid UCS code point \"%s\"\n", p
);