1 /* $NetBSD: conv.c,v 1.2 2013/11/22 15:52:05 christos Exp $ */
3 * Copyright (c) 1993, 1994
4 * The Regents of the University of California. All rights reserved.
5 * Copyright (c) 1993, 1994, 1995, 1996
6 * Keith Bostic. All rights reserved.
8 * See the LICENSE file for redistribution information.
14 static const char sccsid
[] = "Id: conv.c,v 1.27 2001/08/18 21:41:41 skimo Exp (Berkeley) Date: 2001/08/18 21:41:41 ";
17 #include <sys/types.h>
18 #include <sys/queue.h>
21 #include <bitstring.h>
35 #define LANGCODESET nl_langinfo(CODESET)
39 #define LANGCODESET ""
46 raw2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
, size_t *tolen
,
50 CHAR_T
**tostr
= (CHAR_T
**)(void *)&cw
->bp1
;
51 size_t *blen
= &cw
->blen1
;
53 BINC_RETW(NULL
, *tostr
, *blen
, len
);
56 for (i
= 0; i
< len
; ++i
)
57 (*tostr
)[i
] = (u_char
) str
[i
];
64 #ifndef ERROR_ON_CONVERT
65 #define HANDLE_ICONV_ERROR(o, i, ol, il) do { \
68 } while (/*CONSTCOND*/0)
69 #define HANDLE_MBR_ERROR(n, mbs, d, s) do { \
73 } while (/*CONSTCOND*/0)
75 #define HANDLE_ICONV_ERROR goto err
76 #define HANDLE_MBR_ERROR goto err
79 #define CONV_BUFFER_SIZE 512
80 /* fill the buffer with codeset encoding of string pointed to by str
81 * left has the number of bytes left in str and is adjusted
82 * len contains the number of bytes put in the buffer
85 #define CONVERT(str, left, src, len) \
89 outleft = CONV_BUFFER_SIZE; \
91 if (iconv(id, (const char **)&str, &left, &bp, &outleft) \
92 == (size_t)-1 /* && errno != E2BIG */) \
93 HANDLE_ICONV_ERROR(bp, str, outleft, left); \
94 if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \
101 #define CONVERT(str, left, src, len)
105 default_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
106 size_t *tolen
, const CHAR_T
**dst
, const char *enc
)
110 CHAR_T
**tostr
= (CHAR_T
**)(void *)&cw
->bp1
;
111 size_t *blen
= &cw
->blen1
;
115 const char *src
= (const char *)str
;
116 iconv_t id
= (iconv_t
)-1;
117 char buffer
[CONV_BUFFER_SIZE
];
122 BINC_RETW(NULL
, *tostr
, *blen
, nlen
);
125 if (strcmp(nl_langinfo(CODESET
), enc
)) {
126 id
= iconv_open(nl_langinfo(CODESET
), enc
);
127 if (id
== (iconv_t
)-1)
129 CONVERT(str
, left
, src
, len
);
133 for (i
= 0, j
= 0; j
< len
; ) {
134 n
= mbrtowc((*tostr
)+i
, src
+j
, len
-j
, &mbs
);
135 /* NULL character converted */
136 if (n
== (size_t)-2) error
= -(len
-j
);
137 if (n
== (size_t)-1 || n
== (size_t)-2)
138 HANDLE_MBR_ERROR(n
, mbs
, (*tostr
)[i
], src
[j
]);
143 BINC_RETW(NULL
, *tostr
, *blen
, nlen
);
145 if (id
!= (iconv_t
)-1 && j
== len
&& left
) {
146 CONVERT(str
, left
, src
, len
);
152 if (id
!= (iconv_t
)-1)
160 if (id
!= (iconv_t
)-1)
168 fe_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
169 size_t *tolen
, const CHAR_T
**dst
)
171 return default_char2int(sp
, str
, len
, cw
, tolen
, dst
, O_STR(sp
, O_FILEENCODING
));
175 ie_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
176 size_t *tolen
, const CHAR_T
**dst
)
178 return default_char2int(sp
, str
, len
, cw
, tolen
, dst
, O_STR(sp
, O_INPUTENCODING
));
182 cs_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
183 size_t *tolen
, const CHAR_T
**dst
)
185 return default_char2int(sp
, str
, len
, cw
, tolen
, dst
, LANGCODESET
);
189 CHAR_T_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
190 size_t *tolen
, const char **dst
)
192 *tolen
= len
* sizeof(CHAR_T
);
193 *dst
= (const char *)(const void *)str
;
199 CHAR_T_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
200 size_t *tolen
, const CHAR_T
**dst
)
202 *tolen
= len
/ sizeof(CHAR_T
);
203 *dst
= (const CHAR_T
*) str
;
209 int2raw(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
, size_t *tolen
,
213 char **tostr
= (char **)(void *)&cw
->bp1
;
214 size_t *blen
= &cw
->blen1
;
216 BINC_RETC(NULL
, *tostr
, *blen
, len
);
219 for (i
= 0; i
< len
; ++i
)
220 (*tostr
)[i
] = str
[i
];
228 default_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
229 size_t *tolen
, const char **pdst
, const char *enc
)
233 char **tostr
= (char **)(void *)&cw
->bp1
;
234 size_t *blen
= &cw
->blen1
;
237 ssize_t nlen
= len
+ MB_CUR_MAX
;
240 char buffer
[CONV_BUFFER_SIZE
];
241 iconv_t id
= (iconv_t
)-1;
243 /* convert first len bytes of buffer and append it to cw->bp
244 * len is adjusted => 0
245 * offset contains the offset in cw->bp and is adjusted
246 * cw->bp is grown as required
249 #define CONVERT2(len, cw, offset) \
251 const char *bp = buffer; \
253 size_t outleft = cw->blen1 - offset; \
254 char *obp = (char *)cw->bp1 + offset; \
255 if (cw->blen1 < offset + MB_CUR_MAX) { \
257 BINC_RETC(NULL, cw->bp1, cw->blen1, nlen); \
260 if (iconv(id, &bp, &len, &obp, &outleft) == (size_t)-1 && \
262 HANDLE_ICONV_ERROR(obp, bp, outleft, len); \
263 offset = cw->blen1 - outleft; \
267 #define CONVERT2(len, cw, offset)
272 BINC_RETC(NULL
, *tostr
, *blen
, nlen
);
273 dst
= *tostr
; buflen
= *blen
;
276 if (strcmp(nl_langinfo(CODESET
), enc
)) {
277 id
= iconv_open(enc
, nl_langinfo(CODESET
));
278 if (id
== (iconv_t
)-1)
280 dst
= buffer
; buflen
= CONV_BUFFER_SIZE
;
284 for (i
= 0, j
= 0; i
< (size_t)len
; ++i
) {
285 n
= wcrtomb(dst
+j
, str
[i
], &mbs
);
287 HANDLE_MBR_ERROR(n
, mbs
, dst
[j
], str
[i
]);
289 if (buflen
< j
+ MB_CUR_MAX
) {
290 if (id
!= (iconv_t
)-1) {
291 CONVERT2(j
, cw
, offset
);
294 BINC_RETC(NULL
, *tostr
, *blen
, nlen
);
295 dst
= *tostr
; buflen
= *blen
;
300 n
= wcrtomb(dst
+j
, L
'\0', &mbs
);
301 j
+= n
- 1; /* don't count NUL at the end */
304 if (id
!= (iconv_t
)-1) {
305 CONVERT2(j
, cw
, offset
);
321 fe_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
322 size_t *tolen
, const char **dst
)
324 return default_int2char(sp
, str
, len
, cw
, tolen
, dst
, O_STR(sp
, O_FILEENCODING
));
328 cs_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
329 size_t *tolen
, const char **dst
)
331 return default_int2char(sp
, str
, len
, cw
, tolen
, dst
, LANGCODESET
);
338 conv_init (SCR
*orig
, SCR
*sp
)
341 MEMCPY(&sp
->conv
, &orig
->conv
, 1);
343 setlocale(LC_ALL
, "");
345 sp
->conv
.sys2int
= cs_char2int
;
346 sp
->conv
.int2sys
= cs_int2char
;
347 sp
->conv
.file2int
= fe_char2int
;
348 sp
->conv
.int2file
= fe_int2char
;
349 sp
->conv
.input2int
= ie_char2int
;
352 o_set(sp
, O_FILEENCODING
, OS_STRDUP
, nl_langinfo(CODESET
), 0);
353 o_set(sp
, O_INPUTENCODING
, OS_STRDUP
, nl_langinfo(CODESET
), 0);
359 conv_enc (SCR
*sp
, int option
, const char *enc
)
361 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
368 c2w
= &sp
->conv
.file2int
;
369 w2c
= &sp
->conv
.int2file
;
371 case O_INPUTENCODING
:
372 c2w
= &sp
->conv
.input2int
;
382 if (c2w
) *c2w
= raw2int
;
383 if (w2c
) *w2c
= int2raw
;
387 if (!strcmp(enc
, "WCHAR_T")) {
388 if (c2w
) *c2w
= CHAR_T_char2int
;
389 if (w2c
) *w2c
= CHAR_T_int2char
;
393 id
= iconv_open(enc
, nl_langinfo(CODESET
));
394 if (id
== (iconv_t
)-1)
397 id
= iconv_open(nl_langinfo(CODESET
), enc
);
398 if (id
== (iconv_t
)-1)
407 case O_INPUTENCODING
:
412 F_CLR(sp
, SC_CONV_ERROR
);
413 F_SET(sp
, SC_SCR_REFORMAT
);
420 "321|File encoding conversion not supported");
422 case O_INPUTENCODING
:
424 "322|Input encoding conversion not supported");