lib/converters.h

   1 /*
   2  * Copyright (C) 1999-2001 Free Software Foundation, Inc.
   3  * This file is part of the GNU LIBICONV Library.
   4  *
   5  * The GNU LIBICONV Library is free software; you can redistribute it
   6  * and/or modify it under the terms of the GNU Library General Public
   7  * License as published by the Free Software Foundation; either version 2
   8  * of the License, or (at your option) any later version.
   9  *
  10  * The GNU LIBICONV Library is distributed in the hope that it will be
  11  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Library General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Library General Public
  16  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
  17  * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
  18  * Suite 330, Boston, MA 02111-1307, USA.
  19  */
  20
  21 /* This file defines all the converters. */
  22
  23
  24 /* Our own notion of wide character, as UCS-4, according to ISO-10646-1. */
  25 typedef unsigned int ucs4_t;
  26
  27 /* State used by a conversion. 0 denotes the initial state. */
  28 typedef unsigned int state_t;
  29
  30 /* iconv_t is an opaque type. This is the real iconv_t type. */
  31 typedef struct conv_struct * conv_t;
  32
  33 /*
  34  * Data type for conversion multibyte -> unicode
  35  */
  36 struct mbtowc_funcs {
  37   int (*xxx_mbtowc) (conv_t conv, ucs4_t *pwc, unsigned char const *s, int n);
  38   /*
  39    * int xxx_mbtowc (conv_t conv, ucs4_t *pwc, unsigned char const *s, int n)
  40    * converts the byte sequence starting at s to a wide character. Up to n bytes
  41    * are available at s. n is >= 1.
  42    * Result is number of bytes consumed (if a wide character was read),
  43    * or 0 if invalid, or -1 if n too small, or -1-(number of bytes consumed)
  44    * if only a shift sequence was read.
  45    */
  46 };
  47
  48 /*
  49  * Data type for conversion unicode -> multibyte
  50  */
  51 struct wctomb_funcs {
  52   int (*xxx_wctomb) (conv_t conv, unsigned char *r, ucs4_t wc, int n);
  53   /*
  54    * int xxx_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
  55    * converts the wide character wc to the character set xxx, and stores the
  56    * result beginning at r. Up to n bytes may be written at r. n is >= 1.
  57    * Result is number of bytes written, or 0 if invalid, or -1 if n too small.
  58    */
  59   int (*xxx_reset) (conv_t conv, unsigned char *r, int n);
  60   /*
  61    * int xxx_reset (conv_t conv, unsigned char *r, int n)
  62    * stores a shift sequences returning to the initial state beginning at r.
  63    * Up to n bytes may be written at r. n is >= 0.
  64    * Result is number of bytes written, or -1 if n too small.
  65    */
  66 };
  67
  68 /* Return code if invalid. (xxx_mbtowc, xxx_wctomb) */
  69 #define RET_ILSEQ      0
  70 /* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */
  71 #define RET_TOOFEW(n)  (-1-(n))
  72 /* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
  73 #define RET_TOOSMALL   -1
  74
  75 /*
  76  * Contents of a conversion descriptor.
  77  */
  78 struct conv_struct {
  79   struct loop_funcs lfuncs;
  80   /* Input (conversion multibyte -> unicode) */
  81   int iindex;
  82   struct mbtowc_funcs ifuncs;
  83   state_t istate;
  84   /* Output (conversion unicode -> multibyte) */
  85   int oindex;
  86   struct wctomb_funcs ofuncs;
  87   int oflags;
  88   state_t ostate;
  89   /* Operation flags */
  90   int transliterate;
  91 };
  92
  93 /*
  94  * Include all the converters.
  95  */
  96
  97 #include "ascii.h"
  98
  99 /* General multi-byte encodings */
 100 #include "utf8.h"
 101 #include "ucs2.h"
 102 #include "ucs2be.h"
 103 #include "ucs2le.h"
 104 #include "ucs4.h"
 105 #include "ucs4be.h"
 106 #include "ucs4le.h"
 107 #include "utf16.h"
 108 #include "utf16be.h"
 109 #include "utf16le.h"
 110 #include "utf32.h"
 111 #include "utf32be.h"
 112 #include "utf32le.h"
 113 #include "utf7.h"
 114 #include "ucs2internal.h"
 115 #include "ucs2swapped.h"
 116 #include "ucs4internal.h"
 117 #include "ucs4swapped.h"
 118 #include "java.h"
 119
 120 /* 8-bit encodings */
 121 #include "iso8859_1.h"
 122 #include "iso8859_2.h"
 123 #include "iso8859_3.h"
 124 #include "iso8859_4.h"
 125 #include "iso8859_5.h"
 126 #include "iso8859_6.h"
 127 #include "iso8859_7.h"
 128 #include "iso8859_8.h"
 129 #include "iso8859_9.h"
 130 #include "iso8859_10.h"
 131 #include "iso8859_13.h"
 132 #include "iso8859_14.h"
 133 #include "iso8859_15.h"
 134 #include "iso8859_16.h"
 135 #include "koi8_r.h"
 136 #include "koi8_u.h"
 137 #include "koi8_ru.h"
 138 #include "cp1250.h"
 139 #include "cp1251.h"
 140 #include "cp1252.h"
 141 #include "cp1253.h"
 142 #include "cp1254.h"
 143 #include "cp1255.h"
 144 #include "cp1256.h"
 145 #include "cp1257.h"
 146 #include "cp1258.h"
 147 #include "cp850.h"
 148 #include "cp862.h"
 149 #include "cp866.h"
 150 #include "mac_roman.h"
 151 #include "mac_centraleurope.h"
 152 #include "mac_iceland.h"
 153 #include "mac_croatian.h"
 154 #include "mac_romania.h"
 155 #include "mac_cyrillic.h"
 156 #include "mac_ukraine.h"
 157 #include "mac_greek.h"
 158 #include "mac_turkish.h"
 159 #include "mac_hebrew.h"
 160 #include "mac_arabic.h"
 161 #include "mac_thai.h"
 162 #include "hp_roman8.h"
 163 #include "nextstep.h"
 164 #include "armscii_8.h"
 165 #include "georgian_academy.h"
 166 #include "georgian_ps.h"
 167 #include "mulelao.h"
 168 #include "cp1133.h"
 169 #include "tis620.h"
 170 #include "cp874.h"
 171 #include "viscii.h"
 172 #include "tcvn.h"
 173
 174 /* CJK character sets [CCS = coded character set] [CJKV.INF chapter 3] */
 175
 176 typedef struct {
 177   unsigned short indx; /* index into big table */
 178   unsigned short used; /* bitmask of used entries */
 179 } Summary16;
 180
 181 #include "iso646_jp.h"
 182 #include "jisx0201.h"
 183 #include "jisx0208.h"
 184 #include "jisx0212.h"
 185
 186 #include "iso646_cn.h"
 187 #include "gb2312.h"
 188 #include "isoir165.h"
 189 /*#include "gb12345.h"*/
 190 #include "gbk.h"
 191 #include "cns11643.h"
 192 #include "big5.h"
 193
 194 #include "ksc5601.h"
 195 #include "johab_hangul.h"
 196
 197 /* CJK encodings [CES = character encoding scheme] [CJKV.INF chapter 4] */
 198
 199 #include "euc_jp.h"
 200 #include "sjis.h"
 201 #include "cp932.h"
 202 #include "iso2022_jp.h"
 203 #include "iso2022_jp1.h"
 204 #include "iso2022_jp2.h"
 205
 206 #include "euc_cn.h"
 207 #include "ces_gbk.h"
 208 #include "gb18030.h"
 209 #include "iso2022_cn.h"
 210 #include "iso2022_cnext.h"
 211 #include "hz.h"
 212 #include "euc_tw.h"
 213 #include "ces_big5.h"
 214 #include "cp950.h"
 215 #include "big5hkscs.h"
 216
 217 #include "euc_kr.h"
 218 #include "cp949.h"
 219 #include "johab.h"
 220 #include "iso2022_kr.h"
 221
 222 /* Encodings used by system dependent locales. */
 223
 224 #ifdef USE_AIX
 225 #include "cp856.h"
 226 #include "cp922.h"
 227 #include "cp943.h"
 228 #include "cp1046.h"
 229 #include "cp1124.h"
 230 #include "cp1129.h"
 231 #endif
 232
 233 #ifdef USE_OSF1
 234 #include "dec_kanji.h"
 235 #include "dec_hanyu.h"
 236 #endif
 237
 238 #ifdef USE_DOS
 239 #include "cp437.h"
 240 #include "cp775.h"
 241 #include "cp852.h"
 242 #include "cp855.h"
 243 #include "cp857.h"
 244 #include "cp861.h"
 245 #include "cp864.h"
 246 #include "cp865.h"
 247 #include "cp869.h"
 248 #endif
 249