dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / usr / src / lib / libast / common / comp / iconv.c
blobd45c4747c6840979aca828d00e140eeaecc12156
1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 1985-2010 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Common Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.opensource.org/licenses/cpl1.0.txt *
11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Glenn Fowler <gsf@research.att.com> *
18 * David Korn <dgk@research.att.com> *
19 * Phong Vo <kpv@research.att.com> *
20 * *
21 ***********************************************************************/
22 #pragma prototyped
25 * Glenn Fowler
26 * AT&T Research
28 * iconv intercept
29 * minimally provides { utf*<=>bin ascii<=>ebcdic* }
32 #include <ast.h>
33 #include <dirent.h>
35 #define DEBUG_TRACE 0
36 #define _ICONV_LIST_PRIVATE_
38 #include <ccode.h>
39 #include <ctype.h>
40 #include <iconv.h>
42 #include "lclib.h"
44 #if !_lib_iconv_open
46 #define _ast_iconv_t iconv_t
47 #define _ast_iconv_f iconv_f
48 #define _ast_iconv_list_t iconv_list_t
49 #define _ast_iconv_open iconv_open
50 #define _ast_iconv iconv
51 #define _ast_iconv_close iconv_close
52 #define _ast_iconv_list iconv_list
53 #define _ast_iconv_move iconv_move
54 #define _ast_iconv_name iconv_name
55 #define _ast_iconv_write iconv_write
57 #endif
59 #ifndef E2BIG
60 #define E2BIG ENOMEM
61 #endif
62 #ifndef EILSEQ
63 #define EILSEQ EIO
64 #endif
66 #define RETURN(e,n,fn) \
67 if (*fn && !e) e = E2BIG; \
68 if (e) { errno = e; return (size_t)(-1); } \
69 return n;
71 typedef struct Map_s
73 char* name;
74 const unsigned char* map;
75 _ast_iconv_f fun;
76 int index;
77 } Map_t;
79 typedef struct Conv_s
81 iconv_t cvt;
82 char* buf;
83 size_t size;
84 Map_t from;
85 Map_t to;
86 } Conv_t;
88 static Conv_t* freelist[4];
89 static int freeindex;
91 static const char name_local[] = "local";
92 static const char name_native[] = "native";
94 static const _ast_iconv_list_t codes[] =
97 "utf",
98 "un|unicode|utf",
99 "multibyte 8-bit unicode",
100 "UTF-%s",
101 "8",
102 CC_UTF,
106 "ume",
107 "um|ume|utf?(-)7",
108 "multibyte 7-bit unicode",
109 "UTF-7",
111 CC_UME,
115 "euc",
116 "(big|euc)*",
117 "euc family",
120 CC_ICONV,
124 "dos",
125 "dos?(-)?(855)",
126 "dos code page",
127 "DOS855",
129 CC_ICONV,
133 "ucs",
134 "ucs?(-)?(2)?(be)|utf-16?(be)",
135 "unicode runes",
136 "UCS-%s",
137 "2",
138 CC_UCS,
142 "ucs-le",
143 "ucs?(-)?(2)le|utf-16le",
144 "little endian unicode runes",
145 "UCS-%sLE",
146 "2",
147 CC_SCU,
150 { 0 },
153 #if _UWIN
155 #include <ast_windows.h>
157 #ifndef CP_UCS2
158 #define CP_UCS2 0x0000
159 #endif
161 static char _win_maps[] = "/reg/local_machine/SOFTWARE/Classes/MIME/Database/Charset";
164 * return the codeset index given its name or alias
165 * the map is in the what? oh, the registry
168 static int
169 _win_codeset(const char* name)
171 register char* s;
172 char* e;
173 int n;
174 Sfio_t* sp;
175 char aka[128];
176 char tmp[128];
178 #if DEBUG_TRACE
179 error(DEBUG_TRACE, "AHA#%d _win_codeset name=%s", __LINE__, name);
180 #endif
181 if (name == name_native)
182 return CP_ACP;
183 if (!strcasecmp(name, "utf") || !strcasecmp(name, "utf8") || !strcasecmp(name, "utf-8"))
184 return CP_UTF8;
185 if (!strcasecmp(name, "ucs") || !strcasecmp(name, "ucs2") || !strcasecmp(name, "ucs-2"))
186 return CP_UCS2;
187 if (name[0] == '0' && name[1] == 'x' && (n = strtol(name, &e, 0)) > 0 && !*e)
188 return n;
189 for (;;)
191 sfsprintf(tmp, sizeof(tmp), "%s/%s", _win_maps, name);
192 if (!(sp = sfopen(0, tmp, "r")))
194 s = (char*)name;
195 if ((s[0] == 'c' || s[0] == 'C') && (s[1] == 'p' || s[1] == 'P'))
196 s += 2;
197 if (!isdigit(s[0]))
198 break;
199 sfsprintf(tmp, sizeof(tmp), "%s/windows-%s", _win_maps, s);
200 if (!(sp = sfopen(0, tmp, "r")))
201 break;
203 for (;;)
205 if (!(s = sfgetr(sp, '\n', 0)))
207 sfclose(sp);
208 return -1;
210 if (!strncasecmp(s, "AliasForCharSet=", 16))
212 n = sfvalue(sp) - 17;
213 s += 16;
214 if (n >= sizeof(aka))
215 n = sizeof(aka) - 1;
216 memcpy(aka, s, n);
217 aka[n] = 0;
218 sfclose(sp);
219 name = (const char*)aka;
220 break;
222 if (!strncasecmp(s, "CodePage=", 9))
224 s += 9;
225 n = strtol(s, 0, 0);
226 sfclose(sp);
227 return n;
231 return -1;
235 * get and check the codeset indices
238 static _ast_iconv_t
239 _win_iconv_open(register Conv_t* cc, const char* t, const char* f)
241 #if DEBUG_TRACE
242 error(DEBUG_TRACE, "AHA#%d _win_iconv_open f=%s t=%s\n", __LINE__, f, t);
243 #endif
244 if ((cc->from.index = _win_codeset(f)) < 0)
245 return (_ast_iconv_t)(-1);
246 if ((cc->to.index = _win_codeset(t)) < 0)
247 return (_ast_iconv_t)(-1);
248 #if DEBUG_TRACE
249 error(DEBUG_TRACE, "AHA#%d _win_iconv_open f=0x%04x t=0x%04x\n", __LINE__, cc->from.index, cc->to.index);
250 #endif
251 return (_ast_iconv_t)cc;
255 * even though the indices already check out
256 * they could still be rejected
259 static size_t
260 _win_iconv(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
262 Conv_t* cc = (Conv_t*)cd;
263 size_t un;
264 size_t tz;
265 size_t fz;
266 size_t bz;
267 size_t pz;
268 size_t oz;
269 LPWSTR ub;
271 #if DEBUG_TRACE
272 error(DEBUG_TRACE, "AHA#%d _win_iconv from=0x%04x to=0x%04x\n", __LINE__, cc->from.index, cc->to.index);
273 #endif
274 if (cc->from.index == cc->to.index)
277 * easy
280 fz = tz = (*fn < *tn) ? *fn : *tn;
281 memcpy(*tb, *fb, fz);
283 else
285 ub = 0;
286 un = *fn;
289 * from => ucs-2
292 if (cc->to.index == CP_UCS2)
294 if ((tz = MultiByteToWideChar(cc->from.index, 0, (LPCSTR)*fb, (int)*fn, (LPWSTR)*tb, *tn)) && tz <= *tn)
296 fz = *fn;
297 tz *= sizeof(WCHAR);
299 else
302 * target too small
303 * binary search on input size to make it fit
306 oz = 0;
307 pz = *fn / 2;
308 fz = *fn - pz;
309 for (;;)
311 while (!(tz = MultiByteToWideChar(cc->from.index, 0, (LPCSTR)*fb, (int)fz, (LPWSTR)*tb, 0)))
312 if (++fz >= *fn)
313 goto nope;
314 tz *= sizeof(WCHAR);
315 if (tz == *tn)
316 break;
317 if (!(pz /= 2))
319 if (!(fz = oz))
320 goto nope;
321 break;
323 if (tz > *tn)
324 fz -= pz;
325 else
327 oz = fz;
328 fz += pz;
333 else
335 if (cc->from.index == CP_UCS2)
337 un = *fn / sizeof(WCHAR);
338 ub = (LPWSTR)*fb;
340 else if (!(un = MultiByteToWideChar(cc->from.index, 0, (LPCSTR)*fb, (int)*fn, (LPWSTR)*tb, 0)))
341 goto nope;
342 else if (!(ub = (LPWSTR)malloc(un * sizeof(WCHAR))))
343 goto nope;
344 else if (!(un = MultiByteToWideChar(cc->from.index, 0, (LPCSTR)*fb, (int)*fn, (LPWSTR)ub, un)))
345 goto nope;
348 * ucs-2 => to
351 if (tz = WideCharToMultiByte(cc->to.index, 0, (LPCWSTR)ub, un, *tb, *tn, 0, 0))
352 fz = *fn;
353 else
356 * target too small
357 * binary search on input size to make it fit
360 oz = 0;
361 pz = *fn / 2;
362 bz = *fn - pz;
363 for (;;)
365 while (!(fz = MultiByteToWideChar(cc->from.index, 0, (LPCSTR)*fb, (int)bz, (LPWSTR)ub, un)))
366 if (++bz > *fn)
367 goto nope;
368 if (!(tz = WideCharToMultiByte(cc->to.index, 0, (LPCWSTR)ub, fz, *tb, 0, 0, 0)))
369 goto nope;
370 if (tz == *tn)
371 break;
372 if (!(pz /= 2))
374 if (!(fz = oz))
375 goto nope;
376 break;
378 if (tz > *tn)
379 bz -= pz;
380 else
382 oz = bz;
383 bz += pz;
386 if (!(tz = WideCharToMultiByte(cc->to.index, 0, (LPCWSTR)ub, fz, *tb, tz, 0, 0)))
387 goto nope;
388 #if DEBUG_TRACE
389 error(DEBUG_TRACE, "AHA#%d _win_iconv *fn=%u fz=%u[%u] *tn=%u tz=%u\n", __LINE__, *fn, fz, fz * sizeof(WCHAR), *tn, tz);
390 #endif
391 #if 0
392 fz *= sizeof(WCHAR);
393 #endif
395 if (ub != (LPWSTR)*fb)
396 free(ub);
399 *fb += fz;
400 *fn -= fz;
401 *tb += tz;
402 *tn -= tz;
403 return fz;
404 nope:
405 if (ub && ub != (LPWSTR)*fb)
406 free(ub);
407 errno = EINVAL;
408 return (size_t)(-1);
411 #endif
414 * return canonical character code set name for m
415 * if b!=0 then canonical name placed in b of size n
416 * <ccode.h> index returned
420 _ast_iconv_name(register const char* m, register char* b, size_t n)
422 register const _ast_iconv_list_t* cp;
423 const _ast_iconv_list_t* bp;
424 register int c;
425 register char* e;
426 int sub[2];
427 char buf[16];
428 #if DEBUG_TRACE
429 char* o;
430 #endif
432 if (!b)
434 b = buf;
435 n = sizeof(buf);
437 #if DEBUG_TRACE
438 o = b;
439 #endif
440 e = b + n - 1;
441 bp = 0;
442 n = 0;
443 cp = ccmaplist(NiL);
444 #if DEBUG_TRACE
445 if (error_info.trace < DEBUG_TRACE) sfprintf(sfstderr, "%s: debug-%d: AHA%d _ast_iconv_name m=\"%s\"\n", error_info.id, error_info.trace, __LINE__, m);
446 #endif
447 for (;;)
449 #if DEBUG_TRACE
450 if (error_info.trace < DEBUG_TRACE) sfprintf(sfstderr, "%s: debug-%d: AHA%d _ast_iconv_name n=%d bp=%p cp=%p ccode=%d name=\"%s\"\n", error_info.id, error_info.trace, __LINE__, n, bp, cp, cp->ccode, cp->name);
451 #endif
452 if (strgrpmatch(m, cp->match, sub, elementsof(sub) / 2, STR_MAXIMAL|STR_LEFT|STR_ICASE))
454 if (!(c = m[sub[1]]))
456 bp = cp;
457 break;
459 if (sub[1] > n && !isalpha(c))
461 bp = cp;
462 n = sub[1];
465 if (cp->ccode < 0)
467 if (!(++cp)->name)
468 break;
470 else if (!(cp = (const _ast_iconv_list_t*)ccmaplist((_ast_iconv_list_t*)cp)))
471 cp = codes;
473 if (cp = bp)
475 if (cp->canon)
477 if (cp->index)
479 for (m += sub[1]; *m && !isalnum(*m); m++);
480 if (!isdigit(*m))
481 m = cp->index;
483 else
484 m = "1";
485 b += sfsprintf(b, e - b, cp->canon, m);
487 else if (cp->ccode == CC_NATIVE)
489 if ((locales[AST_LC_CTYPE]->flags & LC_default) || !locales[AST_LC_CTYPE]->charset || !(m = locales[AST_LC_CTYPE]->charset->code) || streq(m, "iso8859-1"))
490 switch (CC_NATIVE)
492 case CC_EBCDIC:
493 m = (const char*)"EBCDIC";
494 break;
495 case CC_EBCDIC_I:
496 m = (const char*)"EBCDIC-I";
497 break;
498 case CC_EBCDIC_O:
499 m = (const char*)"EBCDIC-O";
500 break;
501 default:
502 m = (const char*)"ISO-8859-1";
503 break;
505 b += sfsprintf(b, e - b, "%s", m);
507 *b = 0;
508 #if DEBUG_TRACE
509 if (error_info.trace < DEBUG_TRACE) sfprintf(sfstderr, "%s: debug-%d: AHA%d _ast_iconv_name ccode=%d canon=\"%s\"\n", error_info.id, error_info.trace, __LINE__, cp->ccode, o);
510 #endif
511 return cp->ccode;
513 while (b < e && (c = *m++))
515 if (islower(c))
516 c = toupper(c);
517 *b++ = c;
519 *b = 0;
520 #if DEBUG_TRACE
521 if (error_info.trace < DEBUG_TRACE) sfprintf(sfstderr, "%s: debug-%d: AHA%d _ast_iconv_name ccode=%d canon=\"%s\"\n", error_info.id, error_info.trace, __LINE__, CC_ICONV, o);
522 #endif
523 return CC_ICONV;
527 * convert utf-8 to bin
530 static size_t
531 utf2bin(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
533 register unsigned char* f;
534 register unsigned char* fe;
535 register unsigned char* t;
536 register unsigned char* te;
537 register unsigned char* p;
538 register int c;
539 register int w;
540 size_t n;
541 int e;
543 e = 0;
544 f = (unsigned char*)(*fb);
545 fe = f + (*fn);
546 t = (unsigned char*)(*tb);
547 te = t + (*tn);
548 while (t < te && f < fe)
550 p = f;
551 c = *f++;
552 if (c & 0x80)
554 if (!(c & 0x40))
556 f = p;
557 e = EILSEQ;
558 break;
560 if (c & 0x20)
562 w = (c & 0x0F) << 12;
563 if (f >= fe)
565 f = p;
566 e = EINVAL;
567 break;
569 c = *f++;
570 if (c & 0x40)
572 f = p;
573 e = EILSEQ;
574 break;
576 w |= (c & 0x3F) << 6;
578 else
579 w = (c & 0x1F) << 6;
580 if (f >= fe)
582 f = p;
583 e = EINVAL;
584 break;
586 c = *f++;
587 w |= (c & 0x3F);
589 else
590 w = c;
591 *t++ = w;
593 *fn -= (char*)f - (*fb);
594 *fb = (char*)f;
595 *tn -= (n = (char*)t - (*tb));
596 *tb = (char*)t;
597 RETURN(e, n, fn);
601 * convert bin to utf-8
604 static size_t
605 bin2utf(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
607 register unsigned char* f;
608 register unsigned char* fe;
609 register unsigned char* t;
610 register unsigned char* te;
611 register int c;
612 wchar_t w;
613 size_t n;
614 int e;
616 e = 0;
617 f = (unsigned char*)(*fb);
618 fe = f + (*fn);
619 t = (unsigned char*)(*tb);
620 te = t + (*tn);
621 while (f < fe && t < te)
623 if (!mbwide())
625 c = 1;
626 w = *f;
628 else if ((c = (*_ast_info.mb_towc)(&w, (char*)f, fe - f)) < 0)
630 e = EINVAL;
631 break;
633 else if (!c)
634 c = 1;
635 if (!(w & ~0x7F))
636 *t++ = w;
637 else
639 if (!(w & ~0x7FF))
641 if (t >= (te - 2))
643 e = E2BIG;
644 break;
646 *t++ = 0xC0 + (w >> 6);
648 else if (!(w & ~0xffff))
650 if (t >= (te - 3))
652 e = E2BIG;
653 break;
655 *t++ = 0xE0 + (w >> 12);
656 *t++ = 0x80 + ((w >> 6 ) & 0x3F);
658 else
660 e = EILSEQ;
661 break;
663 *t++ = 0x80 + (w & 0x3F);
665 f += c;
667 *fn -= (n = (char*)f - (*fb));
668 *fb = (char*)f;
669 *tn -= (char*)t - (*tb);
670 *tb = (char*)t;
671 RETURN(e, n, fn);
674 static const unsigned char ume_D[] =
675 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?!\"#$%&*;<=>@[]^_`{|} \t\n";
677 static const unsigned char ume_M[] =
678 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
680 static unsigned char ume_d[UCHAR_MAX+1];
682 static unsigned char ume_m[UCHAR_MAX+1];
684 #define NOE 0xFF
685 #define UMEINIT() (ume_d[ume_D[0]]?0:umeinit())
688 * initialize the ume tables
691 static int
692 umeinit(void)
694 register const unsigned char* s;
695 register int i;
696 register int c;
698 if (!ume_d[ume_D[0]])
700 s = ume_D;
701 while (c = *s++)
702 ume_d[c] = 1;
703 memset(ume_m, NOE, sizeof(ume_m));
704 for (i = 0; c = ume_M[i]; i++)
705 ume_m[c] = i;
707 return 0;
711 * convert utf-7 to bin
714 static size_t
715 ume2bin(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
717 register unsigned char* f;
718 register unsigned char* fe;
719 register unsigned char* t;
720 register unsigned char* te;
721 register unsigned char* p;
722 register int s;
723 register int c;
724 register int w;
725 size_t n;
726 int e;
728 e = 0;
729 UMEINIT();
730 f = (unsigned char*)(*fb);
731 fe = f + (*fn);
732 t = (unsigned char*)(*tb);
733 te = t + (*tn);
734 s = 0;
735 while (f < fe && t < te)
737 p = f;
738 c = *f++;
739 if (s)
741 if (c == '-' && s > 1)
742 s = 0;
743 else if ((w = ume_m[c]) == NOE)
745 s = 0;
746 *t++ = c;
748 else if (f >= (fe - 2))
750 f = p;
751 e = EINVAL;
752 break;
754 else
756 s = 2;
757 w = (w << 6) | ume_m[*f++];
758 w = (w << 6) | ume_m[*f++];
759 if (!(w & ~0xFF))
760 *t++ = w;
761 else if (t >= (te - 1))
763 f = p;
764 e = E2BIG;
765 break;
767 else
769 *t++ = (w >> 8) & 0xFF;
770 *t++ = w & 0xFF;
774 else if (c == '+')
775 s = 1;
776 else
777 *t++ = c;
779 *fn -= (char*)f - (*fb);
780 *fb = (char*)f;
781 *tn -= (n = (char*)t - (*tb));
782 *tb = (char*)t;
783 RETURN(e, n, fn);
787 * convert bin to utf-7
790 static size_t
791 bin2ume(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
793 register unsigned char* f;
794 register unsigned char* fe;
795 register unsigned char* t;
796 register unsigned char* te;
797 register int c;
798 register int s;
799 wchar_t w;
800 size_t n;
801 int e;
803 e = 0;
804 UMEINIT();
805 f = (unsigned char*)(*fb);
806 fe = f + (*fn);
807 t = (unsigned char*)(*tb);
808 te = t + (*tn);
809 s = 0;
810 while (f < fe && t < (te - s))
812 if (!mbwide())
814 c = 1;
815 w = *f;
817 else if ((c = (*_ast_info.mb_towc)(&w, (char*)f, fe - f)) < 0)
819 e = EINVAL;
820 break;
822 else if (!c)
823 c = 1;
824 if (!(w & ~0x7F) && ume_d[w])
826 if (s)
828 s = 0;
829 *t++ = '-';
831 *t++ = w;
833 else if (t >= (te - (4 + s)))
835 e = E2BIG;
836 break;
838 else
840 if (!s)
842 s = 1;
843 *t++ = '+';
845 *t++ = ume_M[(w >> 12) & 0x3F];
846 *t++ = ume_M[(w >> 6) & 0x3F];
847 *t++ = ume_M[w & 0x3F];
849 f += c;
851 if (s)
852 *t++ = '-';
853 *fn -= (n = (char*)f - (*fb));
854 *fb = (char*)f;
855 *tn -= (char*)t - (*tb);
856 *tb = (char*)t;
857 RETURN(e, n, fn);
861 * convert ucs-2 to bin with no byte swap
864 static size_t
865 ucs2bin(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
867 register unsigned char* f;
868 register unsigned char* fe;
869 register unsigned char* t;
870 register unsigned char* te;
871 register int w;
872 size_t n;
873 int e;
875 e = 0;
876 f = (unsigned char*)(*fb);
877 fe = f + (*fn);
878 t = (unsigned char*)(*tb);
879 te = t + (*tn);
880 while (f < (fe - 1) && t < te)
882 w = *f++;
883 w = (w << 8) | *f++;
884 if (!(w & ~0xFF))
885 *t++ = w;
886 else if (t >= (te - 1))
888 f -= 2;
889 e = E2BIG;
890 break;
892 else
894 *t++ = (w >> 8) & 0xFF;
895 *t++ = w & 0xFF;
898 *fn -= (char*)f - (*fb);
899 *fb = (char*)f;
900 *tn -= (n = (char*)t - (*tb));
901 *tb = (char*)t;
902 RETURN(e, n, fn);
906 * convert bin to ucs-2 with no byte swap
909 static size_t
910 bin2ucs(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
912 register unsigned char* f;
913 register unsigned char* fe;
914 register unsigned char* t;
915 register unsigned char* te;
916 register int c;
917 wchar_t w;
918 size_t n;
919 int e;
921 e = 0;
922 f = (unsigned char*)(*fb);
923 fe = f + (*fn);
924 t = (unsigned char*)(*tb);
925 te = t + (*tn);
926 while (f < fe && t < (te - 1))
928 if (!mbwide())
930 c = 1;
931 w = *f;
933 if ((c = (*_ast_info.mb_towc)(&w, (char*)f, fe - f)) < 0)
935 e = EINVAL;
936 break;
938 else if (!c)
939 c = 1;
940 *t++ = (w >> 8) & 0xFF;
941 *t++ = w & 0xFF;
942 f += c;
944 *fn -= (n = (char*)f - (*fb));
945 *fb = (char*)f;
946 *tn -= (char*)t - (*tb);
947 *tb = (char*)t;
948 RETURN(e, n, fn);
952 * convert ucs-2 to bin with byte swap
955 static size_t
956 scu2bin(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
958 register unsigned char* f;
959 register unsigned char* fe;
960 register unsigned char* t;
961 register unsigned char* te;
962 register int w;
963 size_t n;
964 int e;
966 e = 0;
967 f = (unsigned char*)(*fb);
968 fe = f + (*fn);
969 t = (unsigned char*)(*tb);
970 te = t + (*tn);
971 while (f < (fe - 1) && t < te)
973 w = *f++;
974 w = w | (*f++ << 8);
975 if (!(w & ~0xFF))
976 *t++ = w;
977 else if (t >= (te - 1))
979 f -= 2;
980 e = E2BIG;
981 break;
983 else
985 *t++ = (w >> 8) & 0xFF;
986 *t++ = w & 0xFF;
989 *fn -= (char*)f - (*fb);
990 *fb = (char*)f;
991 *tn -= (n = (char*)t - (*tb));
992 *tb = (char*)t;
993 RETURN(e, n, fn);
997 * convert bin to ucs-2 with byte swap
1000 static size_t
1001 bin2scu(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
1003 register unsigned char* f;
1004 register unsigned char* fe;
1005 register unsigned char* t;
1006 register unsigned char* te;
1007 register int c;
1008 wchar_t w;
1009 size_t n;
1010 int e;
1012 e = 0;
1013 f = (unsigned char*)(*fb);
1014 fe = f + (*fn);
1015 t = (unsigned char*)(*tb);
1016 te = t + (*tn);
1017 while (f < fe && t < (te - 1))
1019 if (!mbwide())
1021 c = 1;
1022 w = *f;
1024 else if ((c = (*_ast_info.mb_towc)(&w, (char*)f, fe - f)) < 0)
1026 e = EINVAL;
1027 break;
1029 else if (!c)
1030 c = 1;
1031 *t++ = w & 0xFF;
1032 *t++ = (w >> 8) & 0xFF;
1033 f += c;
1035 *fn -= (n = (char*)f - (*fb));
1036 *fb = (char*)f;
1037 *tn -= (char*)t - (*tb);
1038 *tb = (char*)t;
1039 RETURN(e, n, fn);
1043 * open a character code conversion map from f to t
1046 _ast_iconv_t
1047 _ast_iconv_open(const char* t, const char* f)
1049 register Conv_t* cc;
1050 int fc;
1051 int tc;
1052 int i;
1054 char fr[64];
1055 char to[64];
1057 #if DEBUG_TRACE
1058 error(DEBUG_TRACE, "AHA#%d _ast_iconv_open f=%s t=%s\n", __LINE__, f, t);
1059 #endif
1060 if (!t || !*t || *t == '-' && !*(t + 1) || !strcasecmp(t, name_local) || !strcasecmp(t, name_native))
1061 t = name_native;
1062 if (!f || !*f || *f == '-' && !*(f + 1) || !strcasecmp(t, name_local) || !strcasecmp(f, name_native))
1063 f = name_native;
1066 * the ast identify is always (iconv_t)(0)
1069 if (t == f)
1070 return (iconv_t)(0);
1071 fc = _ast_iconv_name(f, fr, sizeof(fr));
1072 tc = _ast_iconv_name(t, to, sizeof(to));
1073 #if DEBUG_TRACE
1074 error(DEBUG_TRACE, "AHA#%d _ast_iconv_open f=%s:%s:%d t=%s:%s:%d\n", __LINE__, f, fr, fc, t, to, tc);
1075 #endif
1076 if (fc != CC_ICONV && fc == tc || streq(fr, to))
1077 return (iconv_t)(0);
1080 * first check the free list
1083 for (i = 0; i < elementsof(freelist); i++)
1084 if ((cc = freelist[i]) && streq(to, cc->to.name) && streq(fr, cc->from.name))
1086 freelist[i] = 0;
1087 #if _lib_iconv_open
1089 * reset the shift state if any
1092 if (cc->cvt != (iconv_t)(-1))
1093 iconv(cc->cvt, NiL, NiL, NiL, NiL);
1094 #endif
1095 return cc;
1099 * allocate a new one
1102 if (!(cc = newof(0, Conv_t, 1, strlen(to) + strlen(fr) + 2)))
1103 return (iconv_t)(-1);
1104 cc->to.name = (char*)(cc + 1);
1105 cc->from.name = strcopy(cc->to.name, to) + 1;
1106 strcpy(cc->from.name, fr);
1107 cc->cvt = (iconv_t)(-1);
1110 * 8 bit maps are the easiest
1113 if (fc >= 0 && tc >= 0)
1114 cc->from.map = ccmap(fc, tc);
1115 #if _lib_iconv_open
1116 else if ((cc->cvt = iconv_open(to, fr)) != (iconv_t)(-1))
1117 cc->from.fun = (_ast_iconv_f)iconv;
1118 #endif
1119 #if _UWIN
1120 else if ((cc->cvt = _win_iconv_open(cc, to, fr)) != (_ast_iconv_t)(-1))
1121 cc->from.fun = (_ast_iconv_f)_win_iconv;
1122 #endif
1123 else
1125 switch (fc)
1127 case CC_UTF:
1128 cc->from.fun = utf2bin;
1129 break;
1130 case CC_UME:
1131 cc->from.fun = ume2bin;
1132 break;
1133 case CC_UCS:
1134 cc->from.fun = ucs2bin;
1135 break;
1136 case CC_SCU:
1137 cc->from.fun = scu2bin;
1138 break;
1139 case CC_ASCII:
1140 break;
1141 default:
1142 if (fc < 0)
1143 goto nope;
1144 cc->from.map = ccmap(fc, CC_ASCII);
1145 break;
1147 switch (tc)
1149 case CC_UTF:
1150 cc->to.fun = bin2utf;
1151 break;
1152 case CC_UME:
1153 cc->to.fun = bin2ume;
1154 break;
1155 case CC_UCS:
1156 cc->to.fun = bin2ucs;
1157 break;
1158 case CC_SCU:
1159 cc->to.fun = bin2scu;
1160 break;
1161 case CC_ASCII:
1162 break;
1163 default:
1164 if (tc < 0)
1165 goto nope;
1166 cc->to.map = ccmap(CC_ASCII, tc);
1167 break;
1170 return (iconv_t)cc;
1171 nope:
1172 return (iconv_t)(-1);
1176 * close a character code conversion map
1180 _ast_iconv_close(_ast_iconv_t cd)
1182 Conv_t* cc;
1183 Conv_t* oc;
1184 int i;
1185 int r = 0;
1187 if (cd == (_ast_iconv_t)(-1))
1188 return -1;
1189 if (!(cc = (Conv_t*)cd))
1190 return 0;
1193 * add to the free list
1196 i = freeindex;
1197 for (;;)
1199 if (++ i >= elementsof(freelist))
1200 i = 0;
1201 if (!freelist[i])
1202 break;
1203 if (i == freeindex)
1205 if (++ i >= elementsof(freelist))
1206 i = 0;
1209 * close the oldest
1212 if (oc = freelist[i])
1214 #if _lib_iconv_open
1215 if (oc->cvt != (iconv_t)(-1))
1216 r = iconv_close(oc->cvt);
1217 #endif
1218 free(oc->buf);
1219 free(oc);
1221 break;
1224 freelist[freeindex = i] = cc;
1225 return r;
1229 * copy *fb size *fn to *tb size *tn
1230 * fb,fn tb,tn updated on return
1233 size_t
1234 _ast_iconv(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
1236 Conv_t* cc = (Conv_t*)cd;
1237 register unsigned char* f;
1238 register unsigned char* t;
1239 register unsigned char* e;
1240 register const unsigned char* m;
1241 register size_t n;
1242 char* b;
1243 char* tfb;
1244 size_t tfn;
1245 size_t i;
1247 if (!fb || !*fb)
1249 /* TODO: reset to the initial state */
1250 if (!tb || !*tb)
1251 return 0;
1252 /* TODO: write the initial state shift sequence */
1253 return 0;
1255 n = *tn;
1256 if (cc)
1258 if (cc->from.fun)
1260 if (cc->to.fun)
1262 if (!cc->buf && !(cc->buf = oldof(0, char, cc->size = SF_BUFSIZE, 0)))
1264 errno = ENOMEM;
1265 return -1;
1267 b = cc->buf;
1268 i = cc->size;
1269 tfb = *fb;
1270 tfn = *fn;
1271 if ((*cc->from.fun)(cc->cvt, &tfb, &tfn, &b, &i) == (size_t)(-1))
1272 return -1;
1273 tfn = b - cc->buf;
1274 tfb = cc->buf;
1275 n = (*cc->to.fun)(cc->cvt, &tfb, &tfn, tb, tn);
1276 i = tfb - cc->buf;
1277 *fb += i;
1278 *fn -= i;
1279 return n;
1281 if ((*cc->from.fun)(cc->cvt, fb, fn, tb, tn) == (size_t)(-1))
1282 return -1;
1283 n -= *tn;
1284 if (m = cc->to.map)
1286 e = (unsigned char*)(*tb);
1287 for (t = e - n; t < e; t++)
1288 *t = m[*t];
1290 return n;
1292 else if (cc->to.fun)
1294 if (!(m = cc->from.map))
1295 return (*cc->to.fun)(cc->cvt, fb, fn, tb, tn);
1296 if (!cc->buf && !(cc->buf = oldof(0, char, cc->size = SF_BUFSIZE, 0)))
1298 errno = ENOMEM;
1299 return -1;
1301 if ((n = *fn) > cc->size)
1302 n = cc->size;
1303 f = (unsigned char*)(*fb);
1304 e = f + n;
1305 t = (unsigned char*)(b = cc->buf);
1306 while (f < e)
1307 *t++ = m[*f++];
1308 n = (*cc->to.fun)(cc->cvt, &b, fn, tb, tn);
1309 *fb += b - cc->buf;
1310 return n;
1313 if (n > *fn)
1314 n = *fn;
1315 if (cc && (m = cc->from.map))
1317 f = (unsigned char*)(*fb);
1318 e = f + n;
1319 t = (unsigned char*)(*tb);
1320 while (f < e)
1321 *t++ = m[*f++];
1323 else
1324 memcpy(*tb, *fb, n);
1325 *fb += n;
1326 *fn -= n;
1327 *tb += n;
1328 *tn -= n;
1329 return n;
1333 * write *fb size *fn to op
1334 * fb,fn updated on return
1335 * total bytes written to op returned
1338 ssize_t
1339 _ast_iconv_write(_ast_iconv_t cd, Sfio_t* op, char** fb, size_t* fn, size_t* e)
1341 char* tb;
1342 char* ts;
1343 size_t tn;
1344 size_t r;
1346 r = 0;
1347 tn = 0;
1348 while (*fn > 0)
1350 if (!(tb = (char*)sfreserve(op, -(tn + 1), SF_WRITE|SF_LOCKR)))
1351 return r ? r : -1;
1352 ts = tb;
1353 tn = sfvalue(op);
1354 #if DEBUG_TRACE
1355 error(DEBUG_TRACE, "AHA#%d iconv_write ts=%p tn=%d", __LINE__, ts, tn);
1356 for (;;)
1357 #else
1358 while (_ast_iconv(cd, fb, fn, &ts, &tn) == (size_t)(-1))
1359 #endif
1361 #if DEBUG_TRACE
1362 ssize_t _r;
1363 error(DEBUG_TRACE, "AHA#%d iconv_write %d => %d `%-.*s'", __LINE__, *fn, tn, *fn, *fb);
1364 _r = _ast_iconv(cd, fb, fn, &ts, &tn);
1365 error(DEBUG_TRACE, "AHA#%d iconv_write %d => %d [%d]", __LINE__, *fn, tn, _r);
1366 if (_r != (size_t)(-1))
1367 break;
1368 #endif
1369 if (errno == E2BIG)
1370 break;
1371 if (e)
1372 (*e)++;
1373 if (!tn)
1374 break;
1375 *ts++ = *(*fb)++;
1376 tn--;
1377 (*fn)--;
1379 #if DEBUG_TRACE
1380 error(DEBUG_TRACE, "AHA#%d iconv_write %d", __LINE__, ts - tb);
1381 #endif
1383 sfwrite(op, tb, ts - tb);
1384 r += ts - tb;
1386 return r;
1390 * move n bytes from ip to op
1393 ssize_t
1394 _ast_iconv_move(_ast_iconv_t cd, Sfio_t* ip, Sfio_t* op, size_t n, size_t* e)
1396 char* fb;
1397 char* fs;
1398 char* tb;
1399 char* ts;
1400 size_t fn;
1401 size_t fo;
1402 size_t tn;
1403 size_t i;
1404 ssize_t r = 0;
1405 int locked;
1407 fn = n;
1408 for (;;)
1410 if (fn != SF_UNBOUND)
1411 fn = -((ssize_t)(fn & (((size_t)(~0))>>1)));
1412 if (!(fb = (char*)sfreserve(ip, fn, locked = SF_LOCKR)) &&
1413 !(fb = (char*)sfreserve(ip, fn, locked = 0)))
1414 break;
1415 fs = fb;
1416 fn = fo = sfvalue(ip);
1417 if (!(tb = (char*)sfreserve(op, SF_UNBOUND, SF_WRITE|SF_LOCKR)))
1419 sfread(ip, fb, 0);
1420 return r ? r : -1;
1422 ts = tb;
1423 tn = sfvalue(op);
1424 while (_ast_iconv(cd, &fs, &fn, &ts, &tn) != (size_t)(-1) && fn > 0)
1426 if (tn > 0)
1428 *ts++ = '_';
1429 tn--;
1431 if (e)
1432 (*e)++;
1433 fs++;
1434 fn--;
1436 sfwrite(op, tb, ts - tb);
1437 r += ts - tb;
1438 if (locked)
1439 sfread(ip, fb, fs - fb);
1440 else
1441 for (i = fn; --i >= (fs - fb);)
1442 sfungetc(ip, fb[i]);
1443 if (n != SF_UNBOUND)
1445 if (n <= (fs - fb))
1446 break;
1447 n -= fs - fb;
1449 if (fn == fo)
1450 fn++;
1452 return r;
1456 * iconv_list_t iterator
1457 * call with arg 0 to start
1458 * prev return value is current arg
1461 _ast_iconv_list_t*
1462 _ast_iconv_list(_ast_iconv_list_t* cp)
1464 #if _UWIN
1465 struct dirent* ent;
1467 if (!cp)
1469 if (!(cp = newof(0, _ast_iconv_list_t, 1, 0)))
1470 return ccmaplist(NiL);
1471 if (!(cp->data = opendir(_win_maps)))
1473 free(cp);
1474 return ccmaplist(NiL);
1477 if (cp->data)
1479 if (ent = readdir((DIR*)cp->data))
1481 cp->name = cp->match = cp->desc = (const char*)ent->d_name;
1482 return cp;
1484 closedir((DIR*)cp->data);
1485 free(cp);
1486 return ccmaplist(NiL);
1488 #else
1489 if (!cp)
1490 return ccmaplist(NiL);
1491 #endif
1492 if (cp->ccode >= 0)
1493 return (cp = ccmaplist(cp)) ? cp : (_ast_iconv_list_t*)codes;
1494 return (++cp)->name ? cp : (_ast_iconv_list_t*)0;