1 /* $NetBSD: conv.c,v 1.5 2008/12/06 18:39:20 christos Exp $ */
4 * Copyright (c) 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 * Copyright (c) 1993, 1994, 1995, 1996
7 * Keith Bostic. All rights reserved.
9 * See the LICENSE file for redistribution information.
15 static const char sccsid
[] = "Id: conv.c,v 1.27 2001/08/18 21:41:41 skimo Exp (Berkeley) Date: 2001/08/18 21:41:41";
18 #include <sys/types.h>
19 #include <sys/queue.h>
22 #include <bitstring.h>
36 #define LANGCODESET nl_langinfo(CODESET)
40 #define LANGCODESET ""
47 raw2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
, size_t *tolen
,
51 CHAR_T
**tostr
= (CHAR_T
**)(void *)&cw
->bp1
;
52 size_t *blen
= &cw
->blen1
;
54 BINC_RETW(NULL
, *tostr
, *blen
, len
);
57 for (i
= 0; i
< len
; ++i
)
58 (*tostr
)[i
] = (u_char
) str
[i
];
65 #define CONV_BUFFER_SIZE 512
66 /* fill the buffer with codeset encoding of string pointed to by str
67 * left has the number of bytes left in str and is adjusted
68 * len contains the number of bytes put in the buffer
71 #define CONVERT(str, left, src, len) \
75 outleft = CONV_BUFFER_SIZE; \
77 if (iconv(id, (const char **)&str, &left, &bp, &outleft) == (size_t)-1 \
78 /* && errno != E2BIG */) \
80 if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \
87 #define CONVERT(str, left, src, len)
91 default_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
92 size_t *tolen
, const CHAR_T
**dst
, const char *enc
)
96 CHAR_T
**tostr
= (CHAR_T
**)(void *)&cw
->bp1
;
97 size_t *blen
= &cw
->blen1
;
101 const char *src
= (const char *)str
;
102 iconv_t id
= (iconv_t
)-1;
103 char buffer
[CONV_BUFFER_SIZE
];
108 BINC_RETW(NULL
, *tostr
, *blen
, nlen
);
111 if (strcmp(nl_langinfo(CODESET
), enc
)) {
112 id
= iconv_open(nl_langinfo(CODESET
), enc
);
113 if (id
== (iconv_t
)-1)
115 CONVERT(str
, left
, src
, len
);
119 for (i
= 0, j
= 0; j
< len
; ) {
120 n
= mbrtowc((*tostr
)+i
, src
+j
, len
-j
, &mbs
);
121 /* NULL character converted */
122 if (n
== (size_t)-2) error
= -(len
-j
);
123 if (n
== (size_t)-1 || n
== (size_t)-2) goto err
;
128 BINC_RETW(NULL
, *tostr
, *blen
, nlen
);
130 if (id
!= (iconv_t
)-1 && j
== len
&& left
) {
131 CONVERT(str
, left
, src
, len
);
137 if (id
!= (iconv_t
)-1)
145 if (id
!= (iconv_t
)-1)
153 fe_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
154 size_t *tolen
, const CHAR_T
**dst
)
156 return default_char2int(sp
, str
, len
, cw
, tolen
, dst
, O_STR(sp
, O_FILEENCODING
));
160 ie_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
161 size_t *tolen
, const CHAR_T
**dst
)
163 return default_char2int(sp
, str
, len
, cw
, tolen
, dst
, O_STR(sp
, O_INPUTENCODING
));
167 cs_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
168 size_t *tolen
, const CHAR_T
**dst
)
170 return default_char2int(sp
, str
, len
, cw
, tolen
, dst
, LANGCODESET
);
174 CHAR_T_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
175 size_t *tolen
, const char **dst
)
177 *tolen
= len
* sizeof(CHAR_T
);
178 *dst
= (const char *)(const void *)str
;
184 CHAR_T_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
185 size_t *tolen
, const CHAR_T
**dst
)
187 *tolen
= len
/ sizeof(CHAR_T
);
188 *dst
= (const CHAR_T
*) str
;
194 int2raw(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
, size_t *tolen
,
198 char **tostr
= (char **)(void *)&cw
->bp1
;
199 size_t *blen
= &cw
->blen1
;
201 BINC_RETC(NULL
, *tostr
, *blen
, len
);
204 for (i
= 0; i
< len
; ++i
)
205 (*tostr
)[i
] = str
[i
];
213 default_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
214 size_t *tolen
, const char **pdst
, const char *enc
)
218 char **tostr
= (char **)(void *)&cw
->bp1
;
219 size_t *blen
= &cw
->blen1
;
222 ssize_t nlen
= len
+ MB_CUR_MAX
;
225 char buffer
[CONV_BUFFER_SIZE
];
226 iconv_t id
= (iconv_t
)-1;
228 /* convert first len bytes of buffer and append it to cw->bp
229 * len is adjusted => 0
230 * offset contains the offset in cw->bp and is adjusted
231 * cw->bp is grown as required
234 #define CONVERT2(len, cw, offset) \
236 const char *bp = buffer; \
238 size_t outleft = cw->blen1 - offset; \
239 char *obp = (char *)cw->bp1 + offset; \
240 if (cw->blen1 < offset + MB_CUR_MAX) { \
242 BINC_RETC(NULL, cw->bp1, cw->blen1, nlen); \
245 if (iconv(id, &bp, &len, &obp, &outleft) == (size_t)-1 && \
248 offset = cw->blen1 - outleft; \
252 #define CONVERT2(len, cw, offset)
257 BINC_RETC(NULL
, *tostr
, *blen
, nlen
);
258 dst
= *tostr
; buflen
= *blen
;
261 if (strcmp(nl_langinfo(CODESET
), enc
)) {
262 id
= iconv_open(enc
, nl_langinfo(CODESET
));
263 if (id
== (iconv_t
)-1)
265 dst
= buffer
; buflen
= CONV_BUFFER_SIZE
;
269 for (i
= 0, j
= 0; i
< (size_t)len
; ++i
) {
270 n
= wcrtomb(dst
+j
, str
[i
], &mbs
);
271 if (n
== (size_t)-1) goto err
;
273 if (buflen
< j
+ MB_CUR_MAX
) {
274 if (id
!= (iconv_t
)-1) {
275 CONVERT2(j
, cw
, offset
);
278 BINC_RETC(NULL
, *tostr
, *blen
, nlen
);
279 dst
= *tostr
; buflen
= *blen
;
284 n
= wcrtomb(dst
+j
, L
'\0', &mbs
);
285 j
+= n
- 1; /* don't count NUL at the end */
288 if (id
!= (iconv_t
)-1) {
289 CONVERT2(j
, cw
, offset
);
305 fe_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
306 size_t *tolen
, const char **dst
)
308 return default_int2char(sp
, str
, len
, cw
, tolen
, dst
, O_STR(sp
, O_FILEENCODING
));
312 cs_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
313 size_t *tolen
, const char **dst
)
315 return default_int2char(sp
, str
, len
, cw
, tolen
, dst
, LANGCODESET
);
322 conv_init (SCR
*orig
, SCR
*sp
)
325 MEMCPY(&sp
->conv
, &orig
->conv
, 1);
327 setlocale(LC_ALL
, "");
329 sp
->conv
.sys2int
= cs_char2int
;
330 sp
->conv
.int2sys
= cs_int2char
;
331 sp
->conv
.file2int
= fe_char2int
;
332 sp
->conv
.int2file
= fe_int2char
;
333 sp
->conv
.input2int
= ie_char2int
;
336 o_set(sp
, O_FILEENCODING
, OS_STRDUP
, nl_langinfo(CODESET
), 0);
337 o_set(sp
, O_INPUTENCODING
, OS_STRDUP
, nl_langinfo(CODESET
), 0);
343 conv_enc (SCR
*sp
, int option
, const char *enc
)
345 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
352 c2w
= &sp
->conv
.file2int
;
353 w2c
= &sp
->conv
.int2file
;
355 case O_INPUTENCODING
:
356 c2w
= &sp
->conv
.input2int
;
366 if (c2w
) *c2w
= raw2int
;
367 if (w2c
) *w2c
= int2raw
;
371 if (!strcmp(enc
, "WCHAR_T")) {
372 if (c2w
) *c2w
= CHAR_T_char2int
;
373 if (w2c
) *w2c
= CHAR_T_int2char
;
377 id
= iconv_open(enc
, nl_langinfo(CODESET
));
378 if (id
== (iconv_t
)-1)
381 id
= iconv_open(nl_langinfo(CODESET
), enc
);
382 if (id
== (iconv_t
)-1)
391 case O_INPUTENCODING
:
396 F_CLR(sp
, SC_CONV_ERROR
);
397 F_SET(sp
, SC_SCR_REFORMAT
);
404 "321|File encoding conversion not supported");
406 case O_INPUTENCODING
:
408 "322|Input encoding conversion not supported");