Sync usage with man page.
[netbsd-mini2440.git] / dist / nvi / common / conv.c
blob539c8a52d76c44e1a938fcb4011c335f77b93614
1 /* $NetBSD: conv.c,v 1.5 2008/12/06 18:39:20 christos Exp $ */
3 /*-
4 * Copyright (c) 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 * Copyright (c) 1993, 1994, 1995, 1996
7 * Keith Bostic. All rights reserved.
9 * See the LICENSE file for redistribution information.
12 #include "config.h"
14 #ifndef lint
15 static const char sccsid[] = "Id: conv.c,v 1.27 2001/08/18 21:41:41 skimo Exp (Berkeley) Date: 2001/08/18 21:41:41";
16 #endif /* not lint */
18 #include <sys/types.h>
19 #include <sys/queue.h>
20 #include <sys/time.h>
22 #include <bitstring.h>
23 #include <errno.h>
24 #include <limits.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
30 #include "common.h"
32 #ifdef USE_ICONV
33 #include <langinfo.h>
34 #include <iconv.h>
36 #define LANGCODESET nl_langinfo(CODESET)
37 #else
38 typedef int iconv_t;
40 #define LANGCODESET ""
41 #endif
43 #include <locale.h>
45 #ifdef USE_WIDECHAR
46 static int
47 raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
48 const CHAR_T **dst)
50 int i;
51 CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1;
52 size_t *blen = &cw->blen1;
54 BINC_RETW(NULL, *tostr, *blen, len);
56 *tolen = len;
57 for (i = 0; i < len; ++i)
58 (*tostr)[i] = (u_char) str[i];
60 *dst = cw->bp1;
62 return 0;
65 #define CONV_BUFFER_SIZE 512
66 /* fill the buffer with codeset encoding of string pointed to by str
67 * left has the number of bytes left in str and is adjusted
68 * len contains the number of bytes put in the buffer
70 #ifdef USE_ICONV
71 #define CONVERT(str, left, src, len) \
72 do { \
73 size_t outleft; \
74 char *bp = buffer; \
75 outleft = CONV_BUFFER_SIZE; \
76 errno = 0; \
77 if (iconv(id, (const char **)&str, &left, &bp, &outleft) == (size_t)-1 \
78 /* && errno != E2BIG */) \
79 goto err; \
80 if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \
81 error = -left; \
82 goto err; \
83 } \
84 src = buffer; \
85 } while (0)
86 #else
87 #define CONVERT(str, left, src, len)
88 #endif
90 static int
91 default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
92 size_t *tolen, const CHAR_T **dst, const char *enc)
94 int j;
95 size_t i = 0;
96 CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1;
97 size_t *blen = &cw->blen1;
98 mbstate_t mbs;
99 size_t n;
100 ssize_t nlen = len;
101 const char *src = (const char *)str;
102 iconv_t id = (iconv_t)-1;
103 char buffer[CONV_BUFFER_SIZE];
104 size_t left = len;
105 int error = 1;
107 MEMSET(&mbs, 0, 1);
108 BINC_RETW(NULL, *tostr, *blen, nlen);
110 #ifdef USE_ICONV
111 if (strcmp(nl_langinfo(CODESET), enc)) {
112 id = iconv_open(nl_langinfo(CODESET), enc);
113 if (id == (iconv_t)-1)
114 goto err;
115 CONVERT(str, left, src, len);
117 #endif
119 for (i = 0, j = 0; j < len; ) {
120 n = mbrtowc((*tostr)+i, src+j, len-j, &mbs);
121 /* NULL character converted */
122 if (n == (size_t)-2) error = -(len-j);
123 if (n == (size_t)-1 || n == (size_t)-2) goto err;
124 if (n == 0) n = 1;
125 j += n;
126 if (++i >= *blen) {
127 nlen += 256;
128 BINC_RETW(NULL, *tostr, *blen, nlen);
130 if (id != (iconv_t)-1 && j == len && left) {
131 CONVERT(str, left, src, len);
132 j = 0;
135 *tolen = i;
137 if (id != (iconv_t)-1)
138 iconv_close(id);
140 *dst = cw->bp1;
142 return 0;
143 err:
144 *tolen = i;
145 if (id != (iconv_t)-1)
146 iconv_close(id);
147 *dst = cw->bp1;
149 return error;
152 static int
153 fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
154 size_t *tolen, const CHAR_T **dst)
156 return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING));
159 static int
160 ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
161 size_t *tolen, const CHAR_T **dst)
163 return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_INPUTENCODING));
166 static int
167 cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
168 size_t *tolen, const CHAR_T **dst)
170 return default_char2int(sp, str, len, cw, tolen, dst, LANGCODESET);
173 static int
174 CHAR_T_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
175 size_t *tolen, const char **dst)
177 *tolen = len * sizeof(CHAR_T);
178 *dst = (const char *)(const void *)str;
180 return 0;
183 static int
184 CHAR_T_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
185 size_t *tolen, const CHAR_T **dst)
187 *tolen = len / sizeof(CHAR_T);
188 *dst = (const CHAR_T*) str;
190 return 0;
193 static int
194 int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen,
195 const char **dst)
197 int i;
198 char **tostr = (char **)(void *)&cw->bp1;
199 size_t *blen = &cw->blen1;
201 BINC_RETC(NULL, *tostr, *blen, len);
203 *tolen = len;
204 for (i = 0; i < len; ++i)
205 (*tostr)[i] = str[i];
207 *dst = cw->bp1;
209 return 0;
212 static int
213 default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
214 size_t *tolen, const char **pdst, const char *enc)
216 size_t i, j;
217 int offset = 0;
218 char **tostr = (char **)(void *)&cw->bp1;
219 size_t *blen = &cw->blen1;
220 mbstate_t mbs;
221 size_t n;
222 ssize_t nlen = len + MB_CUR_MAX;
223 char *dst;
224 size_t buflen;
225 char buffer[CONV_BUFFER_SIZE];
226 iconv_t id = (iconv_t)-1;
228 /* convert first len bytes of buffer and append it to cw->bp
229 * len is adjusted => 0
230 * offset contains the offset in cw->bp and is adjusted
231 * cw->bp is grown as required
233 #ifdef USE_ICONV
234 #define CONVERT2(len, cw, offset) \
235 do { \
236 const char *bp = buffer; \
237 while (len != 0) { \
238 size_t outleft = cw->blen1 - offset; \
239 char *obp = (char *)cw->bp1 + offset; \
240 if (cw->blen1 < offset + MB_CUR_MAX) { \
241 nlen += 256; \
242 BINC_RETC(NULL, cw->bp1, cw->blen1, nlen); \
244 errno = 0; \
245 if (iconv(id, &bp, &len, &obp, &outleft) == (size_t)-1 && \
246 errno != E2BIG) \
247 goto err; \
248 offset = cw->blen1 - outleft; \
250 } while (0)
251 #else
252 #define CONVERT2(len, cw, offset)
253 #endif
256 MEMSET(&mbs, 0, 1);
257 BINC_RETC(NULL, *tostr, *blen, nlen);
258 dst = *tostr; buflen = *blen;
260 #ifdef USE_ICONV
261 if (strcmp(nl_langinfo(CODESET), enc)) {
262 id = iconv_open(enc, nl_langinfo(CODESET));
263 if (id == (iconv_t)-1)
264 goto err;
265 dst = buffer; buflen = CONV_BUFFER_SIZE;
267 #endif
269 for (i = 0, j = 0; i < (size_t)len; ++i) {
270 n = wcrtomb(dst+j, str[i], &mbs);
271 if (n == (size_t)-1) goto err;
272 j += n;
273 if (buflen < j + MB_CUR_MAX) {
274 if (id != (iconv_t)-1) {
275 CONVERT2(j, cw, offset);
276 } else {
277 nlen += 256;
278 BINC_RETC(NULL, *tostr, *blen, nlen);
279 dst = *tostr; buflen = *blen;
284 n = wcrtomb(dst+j, L'\0', &mbs);
285 j += n - 1; /* don't count NUL at the end */
286 *tolen = j;
288 if (id != (iconv_t)-1) {
289 CONVERT2(j, cw, offset);
290 *tolen = offset;
293 *pdst = cw->bp1;
295 return 0;
296 err:
297 *tolen = j;
299 *pdst = cw->bp1;
301 return 1;
304 static int
305 fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
306 size_t *tolen, const char **dst)
308 return default_int2char(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING));
311 static int
312 cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
313 size_t *tolen, const char **dst)
315 return default_int2char(sp, str, len, cw, tolen, dst, LANGCODESET);
318 #endif
321 void
322 conv_init (SCR *orig, SCR *sp)
324 if (orig != NULL)
325 MEMCPY(&sp->conv, &orig->conv, 1);
326 else {
327 setlocale(LC_ALL, "");
328 #ifdef USE_WIDECHAR
329 sp->conv.sys2int = cs_char2int;
330 sp->conv.int2sys = cs_int2char;
331 sp->conv.file2int = fe_char2int;
332 sp->conv.int2file = fe_int2char;
333 sp->conv.input2int = ie_char2int;
334 #endif
335 #ifdef USE_ICONV
336 o_set(sp, O_FILEENCODING, OS_STRDUP, nl_langinfo(CODESET), 0);
337 o_set(sp, O_INPUTENCODING, OS_STRDUP, nl_langinfo(CODESET), 0);
338 #endif
343 conv_enc (SCR *sp, int option, const char *enc)
345 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
346 iconv_t id;
347 char2wchar_t *c2w;
348 wchar2char_t *w2c;
350 switch (option) {
351 case O_FILEENCODING:
352 c2w = &sp->conv.file2int;
353 w2c = &sp->conv.int2file;
354 break;
355 case O_INPUTENCODING:
356 c2w = &sp->conv.input2int;
357 w2c = NULL;
358 break;
359 default:
360 c2w = NULL;
361 w2c = NULL;
362 break;
365 if (!*enc) {
366 if (c2w) *c2w = raw2int;
367 if (w2c) *w2c = int2raw;
368 return 0;
371 if (!strcmp(enc, "WCHAR_T")) {
372 if (c2w) *c2w = CHAR_T_char2int;
373 if (w2c) *w2c = CHAR_T_int2char;
374 return 0;
377 id = iconv_open(enc, nl_langinfo(CODESET));
378 if (id == (iconv_t)-1)
379 goto err;
380 iconv_close(id);
381 id = iconv_open(nl_langinfo(CODESET), enc);
382 if (id == (iconv_t)-1)
383 goto err;
384 iconv_close(id);
386 switch (option) {
387 case O_FILEENCODING:
388 *c2w = fe_char2int;
389 *w2c = fe_int2char;
390 break;
391 case O_INPUTENCODING:
392 *c2w = ie_char2int;
393 break;
396 F_CLR(sp, SC_CONV_ERROR);
397 F_SET(sp, SC_SCR_REFORMAT);
399 return 0;
400 err:
401 switch (option) {
402 case O_FILEENCODING:
403 msgq(sp, M_ERR,
404 "321|File encoding conversion not supported");
405 break;
406 case O_INPUTENCODING:
407 msgq(sp, M_ERR,
408 "322|Input encoding conversion not supported");
409 break;
411 #endif
412 return 1;