lib/loop_wchar.h

   1 /*
   2  * Copyright (C) 2000-2001 Free Software Foundation, Inc.
   3  * This file is part of the GNU LIBICONV Library.
   4  *
   5  * The GNU LIBICONV Library is free software; you can redistribute it
   6  * and/or modify it under the terms of the GNU Library General Public
   7  * License as published by the Free Software Foundation; either version 2
   8  * of the License, or (at your option) any later version.
   9  *
  10  * The GNU LIBICONV Library is distributed in the hope that it will be
  11  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Library General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Library General Public
  16  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
  17  * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
  18  * Suite 330, Boston, MA 02111-1307, USA.
  19  */
  20
  21 /* This file defines three conversion loops:
  22      - from wchar_t to anything else,
  23      - from anything else to wchar_t,
  24      - from wchar_t to wchar_t.
  25  */
  26
  27 #if HAVE_WCRTOMB || HAVE_MBRTOWC
  28 #include <wchar.h>
  29 #define BUF_SIZE 64  /* assume MB_LEN_MAX <= 64 */
  30 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t.  */
  31 extern size_t mbrtowc ();
  32 #ifdef mbstate_t
  33 #define mbrtowc(pwc, s, n, ps) (mbrtowc)(pwc, s, n, 0)
  34 #define mbsinit(ps) 1
  35 #endif
  36 #else
  37 #ifndef mbstate_t
  38 typedef int mbstate_t;
  39 #endif
  40 #endif
  41
  42 /*
  43  * The first two conversion loops have an extended conversion descriptor.
  44  */
  45 struct wchar_conv_struct {
  46   struct conv_struct parent;
  47   mbstate_t state;
  48 };
  49
  50
  51 #if HAVE_WCRTOMB
  52
  53 /* From wchar_t to anything else. */
  54
  55 static size_t wchar_from_loop_convert (iconv_t icd,
  56                                        const char* * inbuf, size_t *inbytesleft,
  57                                        char* * outbuf, size_t *outbytesleft)
  58 {
  59   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
  60   size_t result = 0;
  61   while (*inbytesleft >= sizeof(wchar_t)) {
  62     const wchar_t * inptr = (const wchar_t *) *inbuf;
  63     size_t inleft = *inbytesleft;
  64     char buf[BUF_SIZE];
  65     mbstate_t state = wcd->state;
  66     size_t bufcount = 0;
  67     while (inleft >= sizeof(wchar_t)) {
  68       /* Convert one wchar_t to multibyte representation. */
  69       size_t count = wcrtomb(buf+bufcount,*inptr,&state);
  70       if (count == (size_t)(-1)) {
  71         /* Invalid input. */
  72         errno = EILSEQ;
  73         return -1;
  74       }
  75       inptr++;
  76       inleft -= sizeof(wchar_t);
  77       bufcount += count;
  78       if (count == 0) {
  79         /* Continue, append next wchar_t. */
  80       } else {
  81         /* Attempt to convert the accumulated multibyte representations
  82            to the target encoding. */
  83         const char* bufptr = buf;
  84         size_t bufleft = bufcount;
  85         char* outptr = *outbuf;
  86         size_t outleft = *outbytesleft;
  87         size_t res = unicode_loop_convert(&wcd->parent,
  88                                           &bufptr,&bufleft,
  89                                           &outptr,&outleft);
  90         if (res == (size_t)(-1)) {
  91           if (errno == EILSEQ)
  92             /* Invalid input. */
  93             return -1;
  94           else if (errno == E2BIG)
  95             /* Output buffer too small. */
  96             return -1;
  97           else if (errno == EINVAL) {
  98             /* Continue, append next wchar_t, but avoid buffer overrun. */
  99             if (bufcount + MB_CUR_MAX > BUF_SIZE)
 100               abort();
 101           } else
 102             abort();
 103         } else {
 104           /* Successful conversion. */
 105           wcd->state = state;
 106           *inbuf = (const char *) inptr;
 107           *inbytesleft = inleft;
 108           *outbuf = outptr;
 109           *outbytesleft = outleft;
 110           result += res;
 111           break;
 112         }
 113       }
 114     }
 115   }
 116   return result;
 117 }
 118
 119 static size_t wchar_from_loop_reset (iconv_t icd,
 120                                      char* * outbuf, size_t *outbytesleft)
 121 {
 122   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
 123   if (outbuf == NULL || *outbuf == NULL) {
 124     /* Reset the states. */
 125     memset(&wcd->state,'\0',sizeof(mbstate_t));
 126     return unicode_loop_reset(&wcd->parent,NULL,NULL);
 127   } else {
 128     if (!mbsinit(&wcd->state)) {
 129       mbstate_t state = wcd->state;
 130       char buf[BUF_SIZE];
 131       size_t bufcount = wcrtomb(buf,(wchar_t)0,&state);
 132       if (bufcount == (size_t)(-1) || bufcount == 0 || buf[bufcount-1] != '\0')
 133         abort();
 134       else {
 135         const char* bufptr = buf;
 136         size_t bufleft = bufcount-1;
 137         char* outptr = *outbuf;
 138         size_t outleft = *outbytesleft;
 139         size_t res = unicode_loop_convert(&wcd->parent,
 140                                           &bufptr,&bufleft,
 141                                           &outptr,&outleft);
 142         if (res == (size_t)(-1)) {
 143           if (errno == E2BIG)
 144             return -1;
 145           else
 146             abort();
 147         } else {
 148           res = unicode_loop_reset(&wcd->parent,&outptr,&outleft);
 149           if (res == (size_t)(-1))
 150             return res;
 151           else {
 152             /* Successful. */
 153             wcd->state = state;
 154             *outbuf = outptr;
 155             *outbytesleft = outleft;
 156             return 0;
 157           }
 158         }
 159       }
 160     } else
 161       return unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
 162   }
 163 }
 164
 165 #endif
 166
 167
 168 #if HAVE_MBRTOWC
 169
 170 /* From anything else to wchar_t. */
 171
 172 static size_t wchar_to_loop_convert (iconv_t icd,
 173                                      const char* * inbuf, size_t *inbytesleft,
 174                                      char* * outbuf, size_t *outbytesleft)
 175 {
 176   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
 177   size_t result = 0;
 178   while (*inbytesleft > 0) {
 179     size_t try;
 180     for (try = 1; try <= *inbytesleft; try++) {
 181       char buf[BUF_SIZE];
 182       const char* inptr = *inbuf;
 183       size_t inleft = try;
 184       char* bufptr = buf;
 185       size_t bufleft = BUF_SIZE;
 186       size_t res = unicode_loop_convert(&wcd->parent,
 187                                         &inptr,&inleft,
 188                                         &bufptr,&bufleft);
 189       if (res == (size_t)(-1)) {
 190         if (errno == EILSEQ)
 191           /* Invalid input. */
 192           return -1;
 193         else if (errno == EINVAL) {
 194           /* Incomplete input. Next try with one more input byte. */
 195         } else
 196           /* E2BIG shouldn't occur. */
 197           abort();
 198       } else {
 199         /* Successful conversion. */
 200         size_t bufcount = bufptr-buf; /* = BUF_SIZE-bufleft */
 201         mbstate_t state = wcd->state;
 202         wchar_t wc;
 203         res = mbrtowc(&wc,buf,bufcount,&state);
 204         if (res == (size_t)(-2)) {
 205           /* Next try with one more input byte. */
 206         } else if (res == (size_t)(-1)) {
 207           /* Invalid input. */
 208           return -1;
 209         } else {
 210           if (*outbytesleft < sizeof(wchar_t)) {
 211             errno = E2BIG;
 212             return -1;
 213           }
 214           *(wchar_t*) *outbuf = wc;
 215           *outbuf += sizeof(wchar_t);
 216           *outbytesleft -= sizeof(wchar_t);
 217           *inbuf += try;
 218           *inbytesleft -= try;
 219           result += res;
 220           break;
 221         }
 222       }
 223     }
 224   }
 225   return result;
 226 }
 227
 228 static size_t wchar_to_loop_reset (iconv_t icd,
 229                                    char* * outbuf, size_t *outbytesleft)
 230 {
 231   struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
 232   size_t res = unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
 233   if (res == (size_t)(-1))
 234     return res;
 235   memset(&wcd->state,0,sizeof(mbstate_t));
 236   return 0;
 237 }
 238
 239 #endif
 240
 241
 242 /* From wchar_t to wchar_t. */
 243
 244 static size_t wchar_id_loop_convert (iconv_t icd,
 245                                      const char* * inbuf, size_t *inbytesleft,
 246                                      char* * outbuf, size_t *outbytesleft)
 247 {
 248   const wchar_t* inptr = (const wchar_t*) *inbuf;
 249   size_t inleft = *inbytesleft / sizeof(wchar_t);
 250   wchar_t* outptr = (wchar_t*) *outbuf;
 251   size_t outleft = *outbytesleft / sizeof(wchar_t);
 252   size_t count = (inleft <= outleft ? inleft : outleft);
 253   if (count > 0) {
 254     *inbytesleft -= count * sizeof(wchar_t);
 255     *outbytesleft -= count * sizeof(wchar_t);
 256     do
 257       *outptr++ = *inptr++;
 258     while (--count > 0);
 259     *inbuf = (const char*) inptr;
 260     *outbuf = (char*) outptr;
 261   }
 262   return 0;
 263 }
 264
 265 static size_t wchar_id_loop_reset (iconv_t icd,
 266                                    char* * outbuf, size_t *outbytesleft)
 267 {
 268   return 0;
 269 }