tools/llvm: Do not build with symbols
[minix3.git] / external / bsd / nvi / dist / common / conv.c
blob805a92dfb003c55910865af465b4054d16f09af2
1 /* $NetBSD: conv.c,v 1.2 2013/11/22 15:52:05 christos Exp $ */
2 /*-
3 * Copyright (c) 1993, 1994
4 * The Regents of the University of California. All rights reserved.
5 * Copyright (c) 1993, 1994, 1995, 1996
6 * Keith Bostic. All rights reserved.
8 * See the LICENSE file for redistribution information.
9 */
11 #include "config.h"
13 #ifndef lint
14 static const char sccsid[] = "Id: conv.c,v 1.27 2001/08/18 21:41:41 skimo Exp (Berkeley) Date: 2001/08/18 21:41:41 ";
15 #endif /* not lint */
17 #include <sys/types.h>
18 #include <sys/queue.h>
19 #include <sys/time.h>
21 #include <bitstring.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <unistd.h>
29 #include "common.h"
31 #ifdef USE_ICONV
32 #include <langinfo.h>
33 #include <iconv.h>
35 #define LANGCODESET nl_langinfo(CODESET)
36 #else
37 typedef int iconv_t;
39 #define LANGCODESET ""
40 #endif
42 #include <locale.h>
44 #ifdef USE_WIDECHAR
45 static int
46 raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
47 const CHAR_T **dst)
49 int i;
50 CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1;
51 size_t *blen = &cw->blen1;
53 BINC_RETW(NULL, *tostr, *blen, len);
55 *tolen = len;
56 for (i = 0; i < len; ++i)
57 (*tostr)[i] = (u_char) str[i];
59 *dst = cw->bp1;
61 return 0;
64 #ifndef ERROR_ON_CONVERT
65 #define HANDLE_ICONV_ERROR(o, i, ol, il) do { \
66 *o++ = *i++; \
67 ol--; il--; \
68 } while (/*CONSTCOND*/0)
69 #define HANDLE_MBR_ERROR(n, mbs, d, s) do { \
70 d = s; \
71 MEMSET(&mbs, 0, 1); \
72 n = 1; \
73 } while (/*CONSTCOND*/0)
74 #else
75 #define HANDLE_ICONV_ERROR goto err
76 #define HANDLE_MBR_ERROR goto err
77 #endif
79 #define CONV_BUFFER_SIZE 512
80 /* fill the buffer with codeset encoding of string pointed to by str
81 * left has the number of bytes left in str and is adjusted
82 * len contains the number of bytes put in the buffer
84 #ifdef USE_ICONV
85 #define CONVERT(str, left, src, len) \
86 do { \
87 size_t outleft; \
88 char *bp = buffer; \
89 outleft = CONV_BUFFER_SIZE; \
90 errno = 0; \
91 if (iconv(id, (const char **)&str, &left, &bp, &outleft) \
92 == (size_t)-1 /* && errno != E2BIG */) \
93 HANDLE_ICONV_ERROR(bp, str, outleft, left); \
94 if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \
95 error = -left; \
96 goto err; \
97 } \
98 src = buffer; \
99 } while (0)
100 #else
101 #define CONVERT(str, left, src, len)
102 #endif
104 static int
105 default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
106 size_t *tolen, const CHAR_T **dst, const char *enc)
108 int j;
109 size_t i = 0;
110 CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1;
111 size_t *blen = &cw->blen1;
112 mbstate_t mbs;
113 size_t n;
114 ssize_t nlen = len;
115 const char *src = (const char *)str;
116 iconv_t id = (iconv_t)-1;
117 char buffer[CONV_BUFFER_SIZE];
118 size_t left = len;
119 int error = 1;
121 MEMSET(&mbs, 0, 1);
122 BINC_RETW(NULL, *tostr, *blen, nlen);
124 #ifdef USE_ICONV
125 if (strcmp(nl_langinfo(CODESET), enc)) {
126 id = iconv_open(nl_langinfo(CODESET), enc);
127 if (id == (iconv_t)-1)
128 goto err;
129 CONVERT(str, left, src, len);
131 #endif
133 for (i = 0, j = 0; j < len; ) {
134 n = mbrtowc((*tostr)+i, src+j, len-j, &mbs);
135 /* NULL character converted */
136 if (n == (size_t)-2) error = -(len-j);
137 if (n == (size_t)-1 || n == (size_t)-2)
138 HANDLE_MBR_ERROR(n, mbs, (*tostr)[i], src[j]);
139 if (n == 0) n = 1;
140 j += n;
141 if (++i >= *blen) {
142 nlen += 256;
143 BINC_RETW(NULL, *tostr, *blen, nlen);
145 if (id != (iconv_t)-1 && j == len && left) {
146 CONVERT(str, left, src, len);
147 j = 0;
150 *tolen = i;
152 if (id != (iconv_t)-1)
153 iconv_close(id);
155 *dst = cw->bp1;
157 return 0;
158 err:
159 *tolen = i;
160 if (id != (iconv_t)-1)
161 iconv_close(id);
162 *dst = cw->bp1;
164 return error;
167 static int
168 fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
169 size_t *tolen, const CHAR_T **dst)
171 return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING));
174 static int
175 ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
176 size_t *tolen, const CHAR_T **dst)
178 return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_INPUTENCODING));
181 static int
182 cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
183 size_t *tolen, const CHAR_T **dst)
185 return default_char2int(sp, str, len, cw, tolen, dst, LANGCODESET);
188 static int
189 CHAR_T_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
190 size_t *tolen, const char **dst)
192 *tolen = len * sizeof(CHAR_T);
193 *dst = (const char *)(const void *)str;
195 return 0;
198 static int
199 CHAR_T_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
200 size_t *tolen, const CHAR_T **dst)
202 *tolen = len / sizeof(CHAR_T);
203 *dst = (const CHAR_T*) str;
205 return 0;
208 static int
209 int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen,
210 const char **dst)
212 int i;
213 char **tostr = (char **)(void *)&cw->bp1;
214 size_t *blen = &cw->blen1;
216 BINC_RETC(NULL, *tostr, *blen, len);
218 *tolen = len;
219 for (i = 0; i < len; ++i)
220 (*tostr)[i] = str[i];
222 *dst = cw->bp1;
224 return 0;
227 static int
228 default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
229 size_t *tolen, const char **pdst, const char *enc)
231 size_t i, j;
232 int offset = 0;
233 char **tostr = (char **)(void *)&cw->bp1;
234 size_t *blen = &cw->blen1;
235 mbstate_t mbs;
236 size_t n;
237 ssize_t nlen = len + MB_CUR_MAX;
238 char *dst;
239 size_t buflen;
240 char buffer[CONV_BUFFER_SIZE];
241 iconv_t id = (iconv_t)-1;
243 /* convert first len bytes of buffer and append it to cw->bp
244 * len is adjusted => 0
245 * offset contains the offset in cw->bp and is adjusted
246 * cw->bp is grown as required
248 #ifdef USE_ICONV
249 #define CONVERT2(len, cw, offset) \
250 do { \
251 const char *bp = buffer; \
252 while (len != 0) { \
253 size_t outleft = cw->blen1 - offset; \
254 char *obp = (char *)cw->bp1 + offset; \
255 if (cw->blen1 < offset + MB_CUR_MAX) { \
256 nlen += 256; \
257 BINC_RETC(NULL, cw->bp1, cw->blen1, nlen); \
259 errno = 0; \
260 if (iconv(id, &bp, &len, &obp, &outleft) == (size_t)-1 && \
261 errno != E2BIG) \
262 HANDLE_ICONV_ERROR(obp, bp, outleft, len); \
263 offset = cw->blen1 - outleft; \
265 } while (0)
266 #else
267 #define CONVERT2(len, cw, offset)
268 #endif
271 MEMSET(&mbs, 0, 1);
272 BINC_RETC(NULL, *tostr, *blen, nlen);
273 dst = *tostr; buflen = *blen;
275 #ifdef USE_ICONV
276 if (strcmp(nl_langinfo(CODESET), enc)) {
277 id = iconv_open(enc, nl_langinfo(CODESET));
278 if (id == (iconv_t)-1)
279 goto err;
280 dst = buffer; buflen = CONV_BUFFER_SIZE;
282 #endif
284 for (i = 0, j = 0; i < (size_t)len; ++i) {
285 n = wcrtomb(dst+j, str[i], &mbs);
286 if (n == (size_t)-1)
287 HANDLE_MBR_ERROR(n, mbs, dst[j], str[i]);
288 j += n;
289 if (buflen < j + MB_CUR_MAX) {
290 if (id != (iconv_t)-1) {
291 CONVERT2(j, cw, offset);
292 } else {
293 nlen += 256;
294 BINC_RETC(NULL, *tostr, *blen, nlen);
295 dst = *tostr; buflen = *blen;
300 n = wcrtomb(dst+j, L'\0', &mbs);
301 j += n - 1; /* don't count NUL at the end */
302 *tolen = j;
304 if (id != (iconv_t)-1) {
305 CONVERT2(j, cw, offset);
306 *tolen = offset;
309 *pdst = cw->bp1;
311 return 0;
312 err:
313 *tolen = j;
315 *pdst = cw->bp1;
317 return 1;
320 static int
321 fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
322 size_t *tolen, const char **dst)
324 return default_int2char(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING));
327 static int
328 cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
329 size_t *tolen, const char **dst)
331 return default_int2char(sp, str, len, cw, tolen, dst, LANGCODESET);
334 #endif
337 void
338 conv_init (SCR *orig, SCR *sp)
340 if (orig != NULL)
341 MEMCPY(&sp->conv, &orig->conv, 1);
342 else {
343 setlocale(LC_ALL, "");
344 #ifdef USE_WIDECHAR
345 sp->conv.sys2int = cs_char2int;
346 sp->conv.int2sys = cs_int2char;
347 sp->conv.file2int = fe_char2int;
348 sp->conv.int2file = fe_int2char;
349 sp->conv.input2int = ie_char2int;
350 #endif
351 #ifdef USE_ICONV
352 o_set(sp, O_FILEENCODING, OS_STRDUP, nl_langinfo(CODESET), 0);
353 o_set(sp, O_INPUTENCODING, OS_STRDUP, nl_langinfo(CODESET), 0);
354 #endif
359 conv_enc (SCR *sp, int option, const char *enc)
361 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
362 iconv_t id;
363 char2wchar_t *c2w;
364 wchar2char_t *w2c;
366 switch (option) {
367 case O_FILEENCODING:
368 c2w = &sp->conv.file2int;
369 w2c = &sp->conv.int2file;
370 break;
371 case O_INPUTENCODING:
372 c2w = &sp->conv.input2int;
373 w2c = NULL;
374 break;
375 default:
376 c2w = NULL;
377 w2c = NULL;
378 break;
381 if (!*enc) {
382 if (c2w) *c2w = raw2int;
383 if (w2c) *w2c = int2raw;
384 return 0;
387 if (!strcmp(enc, "WCHAR_T")) {
388 if (c2w) *c2w = CHAR_T_char2int;
389 if (w2c) *w2c = CHAR_T_int2char;
390 return 0;
393 id = iconv_open(enc, nl_langinfo(CODESET));
394 if (id == (iconv_t)-1)
395 goto err;
396 iconv_close(id);
397 id = iconv_open(nl_langinfo(CODESET), enc);
398 if (id == (iconv_t)-1)
399 goto err;
400 iconv_close(id);
402 switch (option) {
403 case O_FILEENCODING:
404 *c2w = fe_char2int;
405 *w2c = fe_int2char;
406 break;
407 case O_INPUTENCODING:
408 *c2w = ie_char2int;
409 break;
412 F_CLR(sp, SC_CONV_ERROR);
413 F_SET(sp, SC_SCR_REFORMAT);
415 return 0;
416 err:
417 switch (option) {
418 case O_FILEENCODING:
419 msgq(sp, M_ERR,
420 "321|File encoding conversion not supported");
421 break;
422 case O_INPUTENCODING:
423 msgq(sp, M_ERR,
424 "322|Input encoding conversion not supported");
425 break;
427 #endif
428 return 1;