Don't use 'return' where you should use 'PG_RETURN_xxx'.
[PostgreSQL.git] / src / backend / utils / mb / encnames.c
blob476418893d3a2f366f47dbe4ce6d7522cc427545
1 /*
2 * Encoding names and routines for work with it. All
3 * in this file is shared bedween FE and BE.
5 * $PostgreSQL$
6 */
7 #ifdef FRONTEND
8 #include "postgres_fe.h"
9 #define Assert(condition)
10 #else
11 #include "postgres.h"
12 #include "utils/builtins.h"
13 #endif
15 #include <ctype.h>
16 #include <unistd.h>
18 #include "mb/pg_wchar.h"
21 /* ----------
22 * All encoding names, sorted: *** A L P H A B E T I C ***
24 * All names must be without irrelevant chars, search routines use
25 * isalnum() chars only. It means ISO-8859-1, iso_8859-1 and Iso8859_1
26 * are always converted to 'iso88591'. All must be lower case.
28 * The table doesn't contain 'cs' aliases (like csISOLatin1). It's needed?
30 * Karel Zak, Aug 2001
31 * ----------
33 pg_encname pg_encname_tbl[] =
36 "abc", PG_WIN1258
37 }, /* alias for WIN1258 */
39 "alt", PG_WIN866
40 }, /* IBM866 */
42 "big5", PG_BIG5
43 }, /* Big5; Chinese for Taiwan multibyte set */
45 "euccn", PG_EUC_CN
46 }, /* EUC-CN; Extended Unix Code for simplified
47 * Chinese */
49 "eucjis2004", PG_EUC_JIS_2004
50 }, /* EUC-JIS-2004; Extended UNIX Code fixed
51 * Width for Japanese, standard JIS X 0213 */
53 "eucjp", PG_EUC_JP
54 }, /* EUC-JP; Extended UNIX Code fixed Width for
55 * Japanese, standard OSF */
57 "euckr", PG_EUC_KR
58 }, /* EUC-KR; Extended Unix Code for Korean , KS
59 * X 1001 standard */
61 "euctw", PG_EUC_TW
62 }, /* EUC-TW; Extended Unix Code for
64 * traditional Chinese */
66 "gb18030", PG_GB18030
67 }, /* GB18030;GB18030 */
69 "gbk", PG_GBK
70 }, /* GBK; Chinese Windows CodePage 936
71 * simplified Chinese */
73 "iso88591", PG_LATIN1
74 }, /* ISO-8859-1; RFC1345,KXS2 */
76 "iso885910", PG_LATIN6
77 }, /* ISO-8859-10; RFC1345,KXS2 */
79 "iso885913", PG_LATIN7
80 }, /* ISO-8859-13; RFC1345,KXS2 */
82 "iso885914", PG_LATIN8
83 }, /* ISO-8859-14; RFC1345,KXS2 */
85 "iso885915", PG_LATIN9
86 }, /* ISO-8859-15; RFC1345,KXS2 */
88 "iso885916", PG_LATIN10
89 }, /* ISO-8859-16; RFC1345,KXS2 */
91 "iso88592", PG_LATIN2
92 }, /* ISO-8859-2; RFC1345,KXS2 */
94 "iso88593", PG_LATIN3
95 }, /* ISO-8859-3; RFC1345,KXS2 */
97 "iso88594", PG_LATIN4
98 }, /* ISO-8859-4; RFC1345,KXS2 */
100 "iso88595", PG_ISO_8859_5
101 }, /* ISO-8859-5; RFC1345,KXS2 */
103 "iso88596", PG_ISO_8859_6
104 }, /* ISO-8859-6; RFC1345,KXS2 */
106 "iso88597", PG_ISO_8859_7
107 }, /* ISO-8859-7; RFC1345,KXS2 */
109 "iso88598", PG_ISO_8859_8
110 }, /* ISO-8859-8; RFC1345,KXS2 */
112 "iso88599", PG_LATIN5
113 }, /* ISO-8859-9; RFC1345,KXS2 */
115 "johab", PG_JOHAB
116 }, /* JOHAB; Extended Unix Code for simplified
117 * Chinese */
119 "koi8", PG_KOI8R
120 }, /* _dirty_ alias for KOI8-R (backward
121 * compatibility) */
123 "koi8r", PG_KOI8R
124 }, /* KOI8-R; RFC1489 */
126 "koi8u", PG_KOI8U
127 }, /* KOI8-U; RFC2319 */
129 "latin1", PG_LATIN1
130 }, /* alias for ISO-8859-1 */
132 "latin10", PG_LATIN10
133 }, /* alias for ISO-8859-16 */
135 "latin2", PG_LATIN2
136 }, /* alias for ISO-8859-2 */
138 "latin3", PG_LATIN3
139 }, /* alias for ISO-8859-3 */
141 "latin4", PG_LATIN4
142 }, /* alias for ISO-8859-4 */
144 "latin5", PG_LATIN5
145 }, /* alias for ISO-8859-9 */
147 "latin6", PG_LATIN6
148 }, /* alias for ISO-8859-10 */
150 "latin7", PG_LATIN7
151 }, /* alias for ISO-8859-13 */
153 "latin8", PG_LATIN8
154 }, /* alias for ISO-8859-14 */
156 "latin9", PG_LATIN9
157 }, /* alias for ISO-8859-15 */
159 "mskanji", PG_SJIS
160 }, /* alias for Shift_JIS */
162 "muleinternal", PG_MULE_INTERNAL
165 "shiftjis", PG_SJIS
166 }, /* Shift_JIS; JIS X 0202-1991 */
169 "shiftjis2004", PG_SHIFT_JIS_2004
170 }, /* SHIFT-JIS-2004; Shift JIS for Japanese,
171 * standard JIS X 0213 */
173 "sjis", PG_SJIS
174 }, /* alias for Shift_JIS */
176 "sqlascii", PG_SQL_ASCII
179 "tcvn", PG_WIN1258
180 }, /* alias for WIN1258 */
182 "tcvn5712", PG_WIN1258
183 }, /* alias for WIN1258 */
185 "uhc", PG_UHC
186 }, /* UHC; Korean Windows CodePage 949 */
188 "unicode", PG_UTF8
189 }, /* alias for UTF8 */
191 "utf8", PG_UTF8
192 }, /* alias for UTF8 */
194 "vscii", PG_WIN1258
195 }, /* alias for WIN1258 */
197 "win", PG_WIN1251
198 }, /* _dirty_ alias for windows-1251 (backward
199 * compatibility) */
201 "win1250", PG_WIN1250
202 }, /* alias for Windows-1250 */
204 "win1251", PG_WIN1251
205 }, /* alias for Windows-1251 */
207 "win1252", PG_WIN1252
208 }, /* alias for Windows-1252 */
210 "win1253", PG_WIN1253
211 }, /* alias for Windows-1253 */
213 "win1254", PG_WIN1254
214 }, /* alias for Windows-1254 */
216 "win1255", PG_WIN1255
217 }, /* alias for Windows-1255 */
219 "win1256", PG_WIN1256
220 }, /* alias for Windows-1256 */
222 "win1257", PG_WIN1257
223 }, /* alias for Windows-1257 */
225 "win1258", PG_WIN1258
226 }, /* alias for Windows-1258 */
228 "win866", PG_WIN866
229 }, /* IBM866 */
231 "win874", PG_WIN874
232 }, /* alias for Windows-874 */
234 "win932", PG_SJIS
235 }, /* alias for Shift_JIS */
237 "win936", PG_GBK
238 }, /* alias for GBK */
240 "win949", PG_UHC
241 }, /* alias for UHC */
243 "win950", PG_BIG5
244 }, /* alias for BIG5 */
246 "windows1250", PG_WIN1250
247 }, /* Windows-1251; Microsoft */
249 "windows1251", PG_WIN1251
250 }, /* Windows-1251; Microsoft */
252 "windows1252", PG_WIN1252
253 }, /* Windows-1252; Microsoft */
255 "windows1253", PG_WIN1253
256 }, /* Windows-1253; Microsoft */
258 "windows1254", PG_WIN1254
259 }, /* Windows-1254; Microsoft */
261 "windows1255", PG_WIN1255
262 }, /* Windows-1255; Microsoft */
264 "windows1256", PG_WIN1256
265 }, /* Windows-1256; Microsoft */
267 "windows1257", PG_WIN1257
268 }, /* Windows-1257; Microsoft */
270 "windows1258", PG_WIN1258
271 }, /* Windows-1258; Microsoft */
273 "windows866", PG_WIN866
274 }, /* IBM866 */
276 "windows874", PG_WIN874
277 }, /* Windows-874; Microsoft */
279 "windows932", PG_SJIS
280 }, /* alias for Shift_JIS */
282 "windows936", PG_GBK
283 }, /* alias for GBK */
285 "windows949", PG_UHC
286 }, /* alias for UHC */
288 "windows950", PG_BIG5
289 }, /* alias for BIG5 */
291 NULL, 0
292 } /* last */
295 unsigned int pg_encname_tbl_sz = \
296 sizeof(pg_encname_tbl) / sizeof(pg_encname_tbl[0]) - 1;
298 /* ----------
299 * These are "official" encoding names.
300 * XXX must be sorted by the same order as enum pg_enc (in mb/pg_wchar.h)
301 * ----------
303 pg_enc2name pg_enc2name_tbl[] =
306 "SQL_ASCII", PG_SQL_ASCII
309 "EUC_JP", PG_EUC_JP
312 "EUC_CN", PG_EUC_CN
315 "EUC_KR", PG_EUC_KR
318 "EUC_TW", PG_EUC_TW
321 "EUC_JIS_2004", PG_EUC_JIS_2004
324 "UTF8", PG_UTF8
327 "MULE_INTERNAL", PG_MULE_INTERNAL
330 "LATIN1", PG_LATIN1
333 "LATIN2", PG_LATIN2
336 "LATIN3", PG_LATIN3
339 "LATIN4", PG_LATIN4
342 "LATIN5", PG_LATIN5
345 "LATIN6", PG_LATIN6
348 "LATIN7", PG_LATIN7
351 "LATIN8", PG_LATIN8
354 "LATIN9", PG_LATIN9
357 "LATIN10", PG_LATIN10
360 "WIN1256", PG_WIN1256
363 "WIN1258", PG_WIN1258
366 "WIN866", PG_WIN866
369 "WIN874", PG_WIN874
372 "KOI8R", PG_KOI8R
375 "WIN1251", PG_WIN1251
378 "WIN1252", PG_WIN1252
381 "ISO_8859_5", PG_ISO_8859_5
384 "ISO_8859_6", PG_ISO_8859_6
387 "ISO_8859_7", PG_ISO_8859_7
390 "ISO_8859_8", PG_ISO_8859_8
393 "WIN1250", PG_WIN1250
396 "WIN1253", PG_WIN1253
399 "WIN1254", PG_WIN1254
402 "WIN1255", PG_WIN1255
405 "WIN1257", PG_WIN1257
408 "KOI8U", PG_KOI8U
411 "SJIS", PG_SJIS
414 "BIG5", PG_BIG5
417 "GBK", PG_GBK
420 "UHC", PG_UHC
423 "GB18030", PG_GB18030
426 "JOHAB", PG_JOHAB
429 "SHIFT_JIS_2004", PG_SHIFT_JIS_2004
433 /* ----------
434 * These are encoding names for gettext.
435 * ----------
437 pg_enc2gettext pg_enc2gettext_tbl[] =
439 {PG_UTF8, "UTF-8"},
440 {PG_LATIN1, "LATIN1"},
441 {PG_LATIN2, "LATIN2"},
442 {PG_LATIN3, "LATIN3"},
443 {PG_LATIN4, "LATIN4"},
444 {PG_ISO_8859_5, "ISO-8859-5"},
445 {PG_ISO_8859_6, "ISO_8859-6"},
446 {PG_ISO_8859_7, "ISO-8859-7"},
447 {PG_ISO_8859_8, "ISO-8859-8"},
448 {PG_LATIN5, "LATIN5"},
449 {PG_LATIN6, "LATIN6"},
450 {PG_LATIN7, "LATIN7"},
451 {PG_LATIN8, "LATIN8"},
452 {PG_LATIN9, "LATIN-9"},
453 {PG_LATIN10, "LATIN10"},
454 {PG_KOI8R, "KOI8-R"},
455 {PG_KOI8U, "KOI8-U"},
456 {PG_WIN1250, "CP1250"},
457 {PG_WIN1251, "CP1251"},
458 {PG_WIN1252, "CP1252"},
459 {PG_WIN1253, "CP1253"},
460 {PG_WIN1254, "CP1254"},
461 {PG_WIN1255, "CP1255"},
462 {PG_WIN1256, "CP1256"},
463 {PG_WIN1257, "CP1257"},
464 {PG_WIN1258, "CP1258"},
465 {PG_WIN866, "CP866"},
466 {PG_WIN874, "CP874"},
467 {PG_EUC_CN, "EUC-CN"},
468 {PG_EUC_JP, "EUC-JP"},
469 {PG_EUC_KR, "EUC-KR"},
470 {PG_EUC_TW, "EUC-TW"},
471 {PG_EUC_JIS_2004, "EUC-JP"},
472 {0, NULL}
476 /* ----------
477 * Encoding checks, for error returns -1 else encoding id
478 * ----------
481 pg_valid_client_encoding(const char *name)
483 int enc;
485 if ((enc = pg_char_to_encoding(name)) < 0)
486 return -1;
488 if (!PG_VALID_FE_ENCODING(enc))
489 return -1;
491 return enc;
495 pg_valid_server_encoding(const char *name)
497 int enc;
499 if ((enc = pg_char_to_encoding(name)) < 0)
500 return -1;
502 if (!PG_VALID_BE_ENCODING(enc))
503 return -1;
505 return enc;
509 pg_valid_server_encoding_id(int encoding)
511 return PG_VALID_BE_ENCODING(encoding);
514 /* ----------
515 * Remove irrelevant chars from encoding name
516 * ----------
518 static char *
519 clean_encoding_name(const char *key, char *newkey)
521 const char *p;
522 char *np;
524 for (p = key, np = newkey; *p != '\0'; p++)
526 if (isalnum((unsigned char) *p))
528 if (*p >= 'A' && *p <= 'Z')
529 *np++ = *p + 'a' - 'A';
530 else
531 *np++ = *p;
534 *np = '\0';
535 return newkey;
538 /* ----------
539 * Search encoding by encoding name
540 * ----------
542 pg_encname *
543 pg_char_to_encname_struct(const char *name)
545 unsigned int nel = pg_encname_tbl_sz;
546 pg_encname *base = pg_encname_tbl,
547 *last = base + nel - 1,
548 *position;
549 int result;
550 char buff[NAMEDATALEN],
551 *key;
553 if (name == NULL || *name == '\0')
554 return NULL;
556 if (strlen(name) >= NAMEDATALEN)
558 #ifdef FRONTEND
559 fprintf(stderr, "encoding name too long\n");
560 return NULL;
561 #else
562 ereport(ERROR,
563 (errcode(ERRCODE_NAME_TOO_LONG),
564 errmsg("encoding name too long")));
565 #endif
567 key = clean_encoding_name(name, buff);
569 while (last >= base)
571 position = base + ((last - base) >> 1);
572 result = key[0] - position->name[0];
574 if (result == 0)
576 result = strcmp(key, position->name);
577 if (result == 0)
578 return position;
580 if (result < 0)
581 last = position - 1;
582 else
583 base = position + 1;
585 return NULL;
589 * Returns encoding or -1 for error
592 pg_char_to_encoding(const char *name)
594 pg_encname *p;
596 if (!name)
597 return -1;
599 p = pg_char_to_encname_struct(name);
600 return p ? p->encoding : -1;
603 #ifndef FRONTEND
604 Datum
605 PG_char_to_encoding(PG_FUNCTION_ARGS)
607 Name s = PG_GETARG_NAME(0);
609 PG_RETURN_INT32(pg_char_to_encoding(NameStr(*s)));
611 #endif
613 const char *
614 pg_encoding_to_char(int encoding)
616 if (PG_VALID_ENCODING(encoding))
618 pg_enc2name *p = &pg_enc2name_tbl[encoding];
620 Assert(encoding == p->encoding);
621 return p->name;
623 return "";
626 #ifndef FRONTEND
627 Datum
628 PG_encoding_to_char(PG_FUNCTION_ARGS)
630 int32 encoding = PG_GETARG_INT32(0);
631 const char *encoding_name = pg_encoding_to_char(encoding);
633 return DirectFunctionCall1(namein, CStringGetDatum(encoding_name));
636 #endif