1 /* $NetBSD: conv.c,v 1.4 2014/01/26 21:43:45 christos Exp $ */
3 * Copyright (c) 1993, 1994
4 * The Regents of the University of California. All rights reserved.
5 * Copyright (c) 1993, 1994, 1995, 1996
6 * Keith Bostic. All rights reserved.
8 * See the LICENSE file for redistribution information.
13 #include <sys/cdefs.h>
16 static const char sccsid
[] = "Id: conv.c,v 1.27 2001/08/18 21:41:41 skimo Exp (Berkeley) Date: 2001/08/18 21:41:41 ";
19 __RCSID("$NetBSD: conv.c,v 1.4 2014/01/26 21:43:45 christos Exp $");
22 #include <sys/types.h>
23 #include <sys/queue.h>
26 #include <bitstring.h>
40 #define LANGCODESET nl_langinfo(CODESET)
44 #define LANGCODESET ""
51 raw2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
, size_t *tolen
,
55 CHAR_T
**tostr
= (CHAR_T
**)(void *)&cw
->bp1
;
56 size_t *blen
= &cw
->blen1
;
58 BINC_RETW(NULL
, *tostr
, *blen
, len
);
61 for (i
= 0; i
< len
; ++i
) {
62 CHAR_T w
= (u_char
)str
[i
];
63 memcpy((*tostr
) + i
, &w
, sizeof(**tostr
));
71 #ifndef ERROR_ON_CONVERT
72 #define HANDLE_ICONV_ERROR(o, i, ol, il) do { \
75 } while (/*CONSTCOND*/0)
76 #define HANDLE_MBR_ERROR(n, mbs, d, s) do { \
80 } while (/*CONSTCOND*/0)
82 #define HANDLE_ICONV_ERROR goto err
83 #define HANDLE_MBR_ERROR goto err
86 #define CONV_BUFFER_SIZE 512
87 /* fill the buffer with codeset encoding of string pointed to by str
88 * left has the number of bytes left in str and is adjusted
89 * len contains the number of bytes put in the buffer
92 #define CONVERT(str, left, src, len) \
96 outleft = CONV_BUFFER_SIZE; \
98 if (iconv(id, (const char **)&str, &left, &bp, &outleft) \
99 == (size_t)-1 /* && errno != E2BIG */) \
100 HANDLE_ICONV_ERROR(bp, str, outleft, left); \
101 if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \
108 #define CONVERT(str, left, src, len)
112 default_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
113 size_t *tolen
, const CHAR_T
**dst
, const char *enc
)
117 CHAR_T
**tostr
= (CHAR_T
**)(void *)&cw
->bp1
;
118 size_t *blen
= &cw
->blen1
;
122 const char *src
= (const char *)str
;
123 iconv_t id
= (iconv_t
)-1;
124 char buffer
[CONV_BUFFER_SIZE
];
129 BINC_RETW(NULL
, *tostr
, *blen
, nlen
);
132 if (strcmp(nl_langinfo(CODESET
), enc
)) {
133 id
= iconv_open(nl_langinfo(CODESET
), enc
);
134 if (id
== (iconv_t
)-1)
136 CONVERT(str
, left
, src
, len
);
140 for (i
= 0, j
= 0; j
< len
; ) {
142 n
= mbrtowc(&w
, src
+ j
, len
- j
, &mbs
);
143 memcpy((*tostr
) + i
, &w
, sizeof(**tostr
));
144 /* NULL character converted */
145 if (n
== (size_t)-2) error
= -(len
- j
);
146 if (n
== (size_t)-1 || n
== (size_t)-2) {
147 HANDLE_MBR_ERROR(n
, mbs
, w
, src
[j
]);
148 memcpy((*tostr
) + i
, &w
, sizeof(**tostr
));
154 BINC_RETW(NULL
, *tostr
, *blen
, nlen
);
156 if (id
!= (iconv_t
)-1 && j
== len
&& left
) {
157 CONVERT(str
, left
, src
, len
);
163 if (id
!= (iconv_t
)-1)
171 if (id
!= (iconv_t
)-1)
179 fe_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
180 size_t *tolen
, const CHAR_T
**dst
)
182 return default_char2int(sp
, str
, len
, cw
, tolen
, dst
, O_STR(sp
, O_FILEENCODING
));
186 ie_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
187 size_t *tolen
, const CHAR_T
**dst
)
189 return default_char2int(sp
, str
, len
, cw
, tolen
, dst
, O_STR(sp
, O_INPUTENCODING
));
193 cs_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
194 size_t *tolen
, const CHAR_T
**dst
)
196 return default_char2int(sp
, str
, len
, cw
, tolen
, dst
, LANGCODESET
);
200 CHAR_T_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
201 size_t *tolen
, const char **dst
)
203 *tolen
= len
* sizeof(CHAR_T
);
204 *dst
= (const char *)(const void *)str
;
210 CHAR_T_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
211 size_t *tolen
, const CHAR_T
**dst
)
213 *tolen
= len
/ sizeof(CHAR_T
);
214 *dst
= (const CHAR_T
*) str
;
220 int2raw(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
, size_t *tolen
,
224 char **tostr
= (char **)(void *)&cw
->bp1
;
225 size_t *blen
= &cw
->blen1
;
227 BINC_RETC(NULL
, *tostr
, *blen
, len
);
230 for (i
= 0; i
< len
; ++i
) {
232 memcpy(&w
, str
+ i
, sizeof(w
));
242 default_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
243 size_t *tolen
, const char **pdst
, const char *enc
)
247 char **tostr
= (char **)(void *)&cw
->bp1
;
248 size_t *blen
= &cw
->blen1
;
251 ssize_t nlen
= len
+ MB_CUR_MAX
;
254 char buffer
[CONV_BUFFER_SIZE
];
255 iconv_t id
= (iconv_t
)-1;
257 /* convert first len bytes of buffer and append it to cw->bp
258 * len is adjusted => 0
259 * offset contains the offset in cw->bp and is adjusted
260 * cw->bp is grown as required
263 #define CONVERT2(len, cw, offset) \
265 const char *bp = buffer; \
267 size_t outleft = cw->blen1 - offset; \
268 char *obp = (char *)cw->bp1 + offset; \
269 if (cw->blen1 < offset + MB_CUR_MAX) { \
271 BINC_RETC(NULL, cw->bp1, cw->blen1, nlen); \
274 if (iconv(id, &bp, &len, &obp, &outleft) == (size_t)-1 && \
276 HANDLE_ICONV_ERROR(obp, bp, outleft, len); \
277 offset = cw->blen1 - outleft; \
281 #define CONVERT2(len, cw, offset)
286 BINC_RETC(NULL
, *tostr
, *blen
, nlen
);
287 dst
= *tostr
; buflen
= *blen
;
290 if (strcmp(nl_langinfo(CODESET
), enc
)) {
291 id
= iconv_open(enc
, nl_langinfo(CODESET
));
292 if (id
== (iconv_t
)-1)
294 dst
= buffer
; buflen
= CONV_BUFFER_SIZE
;
298 for (i
= 0, j
= 0; i
< (size_t)len
; ++i
) {
300 memcpy(&w
, str
+ i
, sizeof(w
));
301 n
= wcrtomb(dst
+ j
, w
, &mbs
);
303 HANDLE_MBR_ERROR(n
, mbs
, dst
[j
], w
);
305 if (buflen
< j
+ MB_CUR_MAX
) {
306 if (id
!= (iconv_t
)-1) {
307 CONVERT2(j
, cw
, offset
);
310 BINC_RETC(NULL
, *tostr
, *blen
, nlen
);
311 dst
= *tostr
; buflen
= *blen
;
316 n
= wcrtomb(dst
+ j
, L
'\0', &mbs
);
317 j
+= n
- 1; /* don't count NUL at the end */
320 if (id
!= (iconv_t
)-1) {
321 CONVERT2(j
, cw
, offset
);
337 fe_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
338 size_t *tolen
, const char **dst
)
340 return default_int2char(sp
, str
, len
, cw
, tolen
, dst
, O_STR(sp
, O_FILEENCODING
));
344 cs_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
345 size_t *tolen
, const char **dst
)
347 return default_int2char(sp
, str
, len
, cw
, tolen
, dst
, LANGCODESET
);
354 conv_init (SCR
*orig
, SCR
*sp
)
357 MEMCPY(&sp
->conv
, &orig
->conv
, 1);
359 setlocale(LC_ALL
, "");
361 sp
->conv
.sys2int
= cs_char2int
;
362 sp
->conv
.int2sys
= cs_int2char
;
363 sp
->conv
.file2int
= fe_char2int
;
364 sp
->conv
.int2file
= fe_int2char
;
365 sp
->conv
.input2int
= ie_char2int
;
368 o_set(sp
, O_FILEENCODING
, OS_STRDUP
, nl_langinfo(CODESET
), 0);
369 o_set(sp
, O_INPUTENCODING
, OS_STRDUP
, nl_langinfo(CODESET
), 0);
375 conv_enc (SCR
*sp
, int option
, const char *enc
)
377 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
384 c2w
= &sp
->conv
.file2int
;
385 w2c
= &sp
->conv
.int2file
;
387 case O_INPUTENCODING
:
388 c2w
= &sp
->conv
.input2int
;
398 if (c2w
) *c2w
= raw2int
;
399 if (w2c
) *w2c
= int2raw
;
403 if (!strcmp(enc
, "WCHAR_T")) {
404 if (c2w
) *c2w
= CHAR_T_char2int
;
405 if (w2c
) *w2c
= CHAR_T_int2char
;
409 id
= iconv_open(enc
, nl_langinfo(CODESET
));
410 if (id
== (iconv_t
)-1)
413 id
= iconv_open(nl_langinfo(CODESET
), enc
);
414 if (id
== (iconv_t
)-1)
423 case O_INPUTENCODING
:
428 F_CLR(sp
, SC_CONV_ERROR
);
429 F_SET(sp
, SC_SCR_REFORMAT
);
436 "321|File encoding conversion not supported");
438 case O_INPUTENCODING
:
440 "322|Input encoding conversion not supported");