etc/protocols - sync with NetBSD-8
[minix.git] / external / bsd / nvi / dist / common / conv.c
blob60cfc9f6803b34b29c19ad6ec58bda11dcaa44b2
1 /* $NetBSD: conv.c,v 1.4 2014/01/26 21:43:45 christos Exp $ */
2 /*-
3 * Copyright (c) 1993, 1994
4 * The Regents of the University of California. All rights reserved.
5 * Copyright (c) 1993, 1994, 1995, 1996
6 * Keith Bostic. All rights reserved.
8 * See the LICENSE file for redistribution information.
9 */
11 #include "config.h"
13 #include <sys/cdefs.h>
14 #if 0
15 #ifndef lint
16 static const char sccsid[] = "Id: conv.c,v 1.27 2001/08/18 21:41:41 skimo Exp (Berkeley) Date: 2001/08/18 21:41:41 ";
17 #endif /* not lint */
18 #else
19 __RCSID("$NetBSD: conv.c,v 1.4 2014/01/26 21:43:45 christos Exp $");
20 #endif
22 #include <sys/types.h>
23 #include <sys/queue.h>
24 #include <sys/time.h>
26 #include <bitstring.h>
27 #include <errno.h>
28 #include <limits.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
34 #include "common.h"
36 #ifdef USE_ICONV
37 #include <langinfo.h>
38 #include <iconv.h>
40 #define LANGCODESET nl_langinfo(CODESET)
41 #else
42 typedef int iconv_t;
44 #define LANGCODESET ""
45 #endif
47 #include <locale.h>
49 #ifdef USE_WIDECHAR
50 static int
51 raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
52 const CHAR_T **dst)
54 int i;
55 CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1;
56 size_t *blen = &cw->blen1;
58 BINC_RETW(NULL, *tostr, *blen, len);
60 *tolen = len;
61 for (i = 0; i < len; ++i) {
62 CHAR_T w = (u_char)str[i];
63 memcpy((*tostr) + i, &w, sizeof(**tostr));
66 *dst = cw->bp1;
68 return 0;
71 #ifndef ERROR_ON_CONVERT
72 #define HANDLE_ICONV_ERROR(o, i, ol, il) do { \
73 *o++ = *i++; \
74 ol--; il--; \
75 } while (/*CONSTCOND*/0)
76 #define HANDLE_MBR_ERROR(n, mbs, d, s) do { \
77 d = s; \
78 MEMSET(&mbs, 0, 1); \
79 n = 1; \
80 } while (/*CONSTCOND*/0)
81 #else
82 #define HANDLE_ICONV_ERROR goto err
83 #define HANDLE_MBR_ERROR goto err
84 #endif
86 #define CONV_BUFFER_SIZE 512
87 /* fill the buffer with codeset encoding of string pointed to by str
88 * left has the number of bytes left in str and is adjusted
89 * len contains the number of bytes put in the buffer
91 #ifdef USE_ICONV
92 #define CONVERT(str, left, src, len) \
93 do { \
94 size_t outleft; \
95 char *bp = buffer; \
96 outleft = CONV_BUFFER_SIZE; \
97 errno = 0; \
98 if (iconv(id, (const char **)&str, &left, &bp, &outleft) \
99 == (size_t)-1 /* && errno != E2BIG */) \
100 HANDLE_ICONV_ERROR(bp, str, outleft, left); \
101 if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \
102 error = -left; \
103 goto err; \
105 src = buffer; \
106 } while (0)
107 #else
108 #define CONVERT(str, left, src, len)
109 #endif
111 static int
112 default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
113 size_t *tolen, const CHAR_T **dst, const char *enc)
115 int j;
116 size_t i = 0;
117 CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1;
118 size_t *blen = &cw->blen1;
119 mbstate_t mbs;
120 size_t n;
121 ssize_t nlen = len;
122 const char *src = (const char *)str;
123 iconv_t id = (iconv_t)-1;
124 char buffer[CONV_BUFFER_SIZE];
125 size_t left = len;
126 int error = 1;
128 MEMSET(&mbs, 0, 1);
129 BINC_RETW(NULL, *tostr, *blen, nlen);
131 #ifdef USE_ICONV
132 if (strcmp(nl_langinfo(CODESET), enc)) {
133 id = iconv_open(nl_langinfo(CODESET), enc);
134 if (id == (iconv_t)-1)
135 goto err;
136 CONVERT(str, left, src, len);
138 #endif
140 for (i = 0, j = 0; j < len; ) {
141 CHAR_T w;
142 n = mbrtowc(&w, src + j, len - j, &mbs);
143 memcpy((*tostr) + i, &w, sizeof(**tostr));
144 /* NULL character converted */
145 if (n == (size_t)-2) error = -(len - j);
146 if (n == (size_t)-1 || n == (size_t)-2) {
147 HANDLE_MBR_ERROR(n, mbs, w, src[j]);
148 memcpy((*tostr) + i, &w, sizeof(**tostr));
150 if (n == 0) n = 1;
151 j += n;
152 if (++i >= *blen) {
153 nlen += 256;
154 BINC_RETW(NULL, *tostr, *blen, nlen);
156 if (id != (iconv_t)-1 && j == len && left) {
157 CONVERT(str, left, src, len);
158 j = 0;
161 *tolen = i;
163 if (id != (iconv_t)-1)
164 iconv_close(id);
166 *dst = cw->bp1;
168 return 0;
169 err:
170 *tolen = i;
171 if (id != (iconv_t)-1)
172 iconv_close(id);
173 *dst = cw->bp1;
175 return error;
178 static int
179 fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
180 size_t *tolen, const CHAR_T **dst)
182 return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING));
185 static int
186 ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
187 size_t *tolen, const CHAR_T **dst)
189 return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_INPUTENCODING));
192 static int
193 cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
194 size_t *tolen, const CHAR_T **dst)
196 return default_char2int(sp, str, len, cw, tolen, dst, LANGCODESET);
199 static int
200 CHAR_T_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
201 size_t *tolen, const char **dst)
203 *tolen = len * sizeof(CHAR_T);
204 *dst = (const char *)(const void *)str;
206 return 0;
209 static int
210 CHAR_T_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
211 size_t *tolen, const CHAR_T **dst)
213 *tolen = len / sizeof(CHAR_T);
214 *dst = (const CHAR_T*) str;
216 return 0;
219 static int
220 int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen,
221 const char **dst)
223 int i;
224 char **tostr = (char **)(void *)&cw->bp1;
225 size_t *blen = &cw->blen1;
227 BINC_RETC(NULL, *tostr, *blen, len);
229 *tolen = len;
230 for (i = 0; i < len; ++i) {
231 CHAR_T w;
232 memcpy(&w, str + i, sizeof(w));
233 (*tostr)[i] = w;
236 *dst = cw->bp1;
238 return 0;
241 static int
242 default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
243 size_t *tolen, const char **pdst, const char *enc)
245 size_t i, j;
246 int offset = 0;
247 char **tostr = (char **)(void *)&cw->bp1;
248 size_t *blen = &cw->blen1;
249 mbstate_t mbs;
250 size_t n;
251 ssize_t nlen = len + MB_CUR_MAX;
252 char *dst;
253 size_t buflen;
254 char buffer[CONV_BUFFER_SIZE];
255 iconv_t id = (iconv_t)-1;
257 /* convert first len bytes of buffer and append it to cw->bp
258 * len is adjusted => 0
259 * offset contains the offset in cw->bp and is adjusted
260 * cw->bp is grown as required
262 #ifdef USE_ICONV
263 #define CONVERT2(len, cw, offset) \
264 do { \
265 const char *bp = buffer; \
266 while (len != 0) { \
267 size_t outleft = cw->blen1 - offset; \
268 char *obp = (char *)cw->bp1 + offset; \
269 if (cw->blen1 < offset + MB_CUR_MAX) { \
270 nlen += 256; \
271 BINC_RETC(NULL, cw->bp1, cw->blen1, nlen); \
273 errno = 0; \
274 if (iconv(id, &bp, &len, &obp, &outleft) == (size_t)-1 && \
275 errno != E2BIG) \
276 HANDLE_ICONV_ERROR(obp, bp, outleft, len); \
277 offset = cw->blen1 - outleft; \
279 } while (0)
280 #else
281 #define CONVERT2(len, cw, offset)
282 #endif
285 MEMSET(&mbs, 0, 1);
286 BINC_RETC(NULL, *tostr, *blen, nlen);
287 dst = *tostr; buflen = *blen;
289 #ifdef USE_ICONV
290 if (strcmp(nl_langinfo(CODESET), enc)) {
291 id = iconv_open(enc, nl_langinfo(CODESET));
292 if (id == (iconv_t)-1)
293 goto err;
294 dst = buffer; buflen = CONV_BUFFER_SIZE;
296 #endif
298 for (i = 0, j = 0; i < (size_t)len; ++i) {
299 CHAR_T w;
300 memcpy(&w, str + i, sizeof(w));
301 n = wcrtomb(dst + j, w, &mbs);
302 if (n == (size_t)-1)
303 HANDLE_MBR_ERROR(n, mbs, dst[j], w);
304 j += n;
305 if (buflen < j + MB_CUR_MAX) {
306 if (id != (iconv_t)-1) {
307 CONVERT2(j, cw, offset);
308 } else {
309 nlen += 256;
310 BINC_RETC(NULL, *tostr, *blen, nlen);
311 dst = *tostr; buflen = *blen;
316 n = wcrtomb(dst + j, L'\0', &mbs);
317 j += n - 1; /* don't count NUL at the end */
318 *tolen = j;
320 if (id != (iconv_t)-1) {
321 CONVERT2(j, cw, offset);
322 *tolen = offset;
325 *pdst = cw->bp1;
327 return 0;
328 err:
329 *tolen = j;
331 *pdst = cw->bp1;
333 return 1;
336 static int
337 fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
338 size_t *tolen, const char **dst)
340 return default_int2char(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING));
343 static int
344 cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
345 size_t *tolen, const char **dst)
347 return default_int2char(sp, str, len, cw, tolen, dst, LANGCODESET);
350 #endif
353 void
354 conv_init (SCR *orig, SCR *sp)
356 if (orig != NULL)
357 MEMCPY(&sp->conv, &orig->conv, 1);
358 else {
359 setlocale(LC_ALL, "");
360 #ifdef USE_WIDECHAR
361 sp->conv.sys2int = cs_char2int;
362 sp->conv.int2sys = cs_int2char;
363 sp->conv.file2int = fe_char2int;
364 sp->conv.int2file = fe_int2char;
365 sp->conv.input2int = ie_char2int;
366 #endif
367 #ifdef USE_ICONV
368 o_set(sp, O_FILEENCODING, OS_STRDUP, nl_langinfo(CODESET), 0);
369 o_set(sp, O_INPUTENCODING, OS_STRDUP, nl_langinfo(CODESET), 0);
370 #endif
375 conv_enc (SCR *sp, int option, const char *enc)
377 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
378 iconv_t id;
379 char2wchar_t *c2w;
380 wchar2char_t *w2c;
382 switch (option) {
383 case O_FILEENCODING:
384 c2w = &sp->conv.file2int;
385 w2c = &sp->conv.int2file;
386 break;
387 case O_INPUTENCODING:
388 c2w = &sp->conv.input2int;
389 w2c = NULL;
390 break;
391 default:
392 c2w = NULL;
393 w2c = NULL;
394 break;
397 if (!*enc) {
398 if (c2w) *c2w = raw2int;
399 if (w2c) *w2c = int2raw;
400 return 0;
403 if (!strcmp(enc, "WCHAR_T")) {
404 if (c2w) *c2w = CHAR_T_char2int;
405 if (w2c) *w2c = CHAR_T_int2char;
406 return 0;
409 id = iconv_open(enc, nl_langinfo(CODESET));
410 if (id == (iconv_t)-1)
411 goto err;
412 iconv_close(id);
413 id = iconv_open(nl_langinfo(CODESET), enc);
414 if (id == (iconv_t)-1)
415 goto err;
416 iconv_close(id);
418 switch (option) {
419 case O_FILEENCODING:
420 *c2w = fe_char2int;
421 *w2c = fe_int2char;
422 break;
423 case O_INPUTENCODING:
424 *c2w = ie_char2int;
425 break;
428 F_CLR(sp, SC_CONV_ERROR);
429 F_SET(sp, SC_SCR_REFORMAT);
431 return 0;
432 err:
433 switch (option) {
434 case O_FILEENCODING:
435 msgq(sp, M_ERR,
436 "321|File encoding conversion not supported");
437 break;
438 case O_INPUTENCODING:
439 msgq(sp, M_ERR,
440 "322|Input encoding conversion not supported");
441 break;
443 #endif
444 return 1;