2 * Encoding names and routines for work with it. All
3 * in this file is shared bedween FE and BE.
8 #include "postgres_fe.h"
9 #define Assert(condition)
12 #include "utils/builtins.h"
18 #include "mb/pg_wchar.h"
22 * All encoding names, sorted: *** A L P H A B E T I C ***
24 * All names must be without irrelevant chars, search routines use
25 * isalnum() chars only. It means ISO-8859-1, iso_8859-1 and Iso8859_1
26 * are always converted to 'iso88591'. All must be lower case.
28 * The table doesn't contain 'cs' aliases (like csISOLatin1). It's needed?
33 pg_encname pg_encname_tbl
[] =
37 }, /* alias for WIN1258 */
43 }, /* Big5; Chinese for Taiwan multibyte set */
46 }, /* EUC-CN; Extended Unix Code for simplified
49 "eucjis2004", PG_EUC_JIS_2004
50 }, /* EUC-JIS-2004; Extended UNIX Code fixed
51 * Width for Japanese, standard JIS X 0213 */
54 }, /* EUC-JP; Extended UNIX Code fixed Width for
55 * Japanese, standard OSF */
58 }, /* EUC-KR; Extended Unix Code for Korean , KS
62 }, /* EUC-TW; Extended Unix Code for
64 * traditional Chinese */
67 }, /* GB18030;GB18030 */
70 }, /* GBK; Chinese Windows CodePage 936
71 * simplified Chinese */
74 }, /* ISO-8859-1; RFC1345,KXS2 */
76 "iso885910", PG_LATIN6
77 }, /* ISO-8859-10; RFC1345,KXS2 */
79 "iso885913", PG_LATIN7
80 }, /* ISO-8859-13; RFC1345,KXS2 */
82 "iso885914", PG_LATIN8
83 }, /* ISO-8859-14; RFC1345,KXS2 */
85 "iso885915", PG_LATIN9
86 }, /* ISO-8859-15; RFC1345,KXS2 */
88 "iso885916", PG_LATIN10
89 }, /* ISO-8859-16; RFC1345,KXS2 */
92 }, /* ISO-8859-2; RFC1345,KXS2 */
95 }, /* ISO-8859-3; RFC1345,KXS2 */
98 }, /* ISO-8859-4; RFC1345,KXS2 */
100 "iso88595", PG_ISO_8859_5
101 }, /* ISO-8859-5; RFC1345,KXS2 */
103 "iso88596", PG_ISO_8859_6
104 }, /* ISO-8859-6; RFC1345,KXS2 */
106 "iso88597", PG_ISO_8859_7
107 }, /* ISO-8859-7; RFC1345,KXS2 */
109 "iso88598", PG_ISO_8859_8
110 }, /* ISO-8859-8; RFC1345,KXS2 */
112 "iso88599", PG_LATIN5
113 }, /* ISO-8859-9; RFC1345,KXS2 */
116 }, /* JOHAB; Extended Unix Code for simplified
120 }, /* _dirty_ alias for KOI8-R (backward
124 }, /* KOI8-R; RFC1489 */
127 }, /* KOI8-U; RFC2319 */
130 }, /* alias for ISO-8859-1 */
132 "latin10", PG_LATIN10
133 }, /* alias for ISO-8859-16 */
136 }, /* alias for ISO-8859-2 */
139 }, /* alias for ISO-8859-3 */
142 }, /* alias for ISO-8859-4 */
145 }, /* alias for ISO-8859-9 */
148 }, /* alias for ISO-8859-10 */
151 }, /* alias for ISO-8859-13 */
154 }, /* alias for ISO-8859-14 */
157 }, /* alias for ISO-8859-15 */
160 }, /* alias for Shift_JIS */
162 "muleinternal", PG_MULE_INTERNAL
166 }, /* Shift_JIS; JIS X 0202-1991 */
169 "shiftjis2004", PG_SHIFT_JIS_2004
170 }, /* SHIFT-JIS-2004; Shift JIS for Japanese,
171 * standard JIS X 0213 */
174 }, /* alias for Shift_JIS */
176 "sqlascii", PG_SQL_ASCII
180 }, /* alias for WIN1258 */
182 "tcvn5712", PG_WIN1258
183 }, /* alias for WIN1258 */
186 }, /* UHC; Korean Windows CodePage 949 */
189 }, /* alias for UTF8 */
192 }, /* alias for UTF8 */
195 }, /* alias for WIN1258 */
198 }, /* _dirty_ alias for windows-1251 (backward
201 "win1250", PG_WIN1250
202 }, /* alias for Windows-1250 */
204 "win1251", PG_WIN1251
205 }, /* alias for Windows-1251 */
207 "win1252", PG_WIN1252
208 }, /* alias for Windows-1252 */
210 "win1253", PG_WIN1253
211 }, /* alias for Windows-1253 */
213 "win1254", PG_WIN1254
214 }, /* alias for Windows-1254 */
216 "win1255", PG_WIN1255
217 }, /* alias for Windows-1255 */
219 "win1256", PG_WIN1256
220 }, /* alias for Windows-1256 */
222 "win1257", PG_WIN1257
223 }, /* alias for Windows-1257 */
225 "win1258", PG_WIN1258
226 }, /* alias for Windows-1258 */
232 }, /* alias for Windows-874 */
235 }, /* alias for Shift_JIS */
238 }, /* alias for GBK */
241 }, /* alias for UHC */
244 }, /* alias for BIG5 */
246 "windows1250", PG_WIN1250
247 }, /* Windows-1251; Microsoft */
249 "windows1251", PG_WIN1251
250 }, /* Windows-1251; Microsoft */
252 "windows1252", PG_WIN1252
253 }, /* Windows-1252; Microsoft */
255 "windows1253", PG_WIN1253
256 }, /* Windows-1253; Microsoft */
258 "windows1254", PG_WIN1254
259 }, /* Windows-1254; Microsoft */
261 "windows1255", PG_WIN1255
262 }, /* Windows-1255; Microsoft */
264 "windows1256", PG_WIN1256
265 }, /* Windows-1256; Microsoft */
267 "windows1257", PG_WIN1257
268 }, /* Windows-1257; Microsoft */
270 "windows1258", PG_WIN1258
271 }, /* Windows-1258; Microsoft */
273 "windows866", PG_WIN866
276 "windows874", PG_WIN874
277 }, /* Windows-874; Microsoft */
279 "windows932", PG_SJIS
280 }, /* alias for Shift_JIS */
283 }, /* alias for GBK */
286 }, /* alias for UHC */
288 "windows950", PG_BIG5
289 }, /* alias for BIG5 */
295 unsigned int pg_encname_tbl_sz
= \
296 sizeof(pg_encname_tbl
) / sizeof(pg_encname_tbl
[0]) - 1;
299 * These are "official" encoding names.
300 * XXX must be sorted by the same order as enum pg_enc (in mb/pg_wchar.h)
303 pg_enc2name pg_enc2name_tbl
[] =
306 "SQL_ASCII", PG_SQL_ASCII
321 "EUC_JIS_2004", PG_EUC_JIS_2004
327 "MULE_INTERNAL", PG_MULE_INTERNAL
357 "LATIN10", PG_LATIN10
360 "WIN1256", PG_WIN1256
363 "WIN1258", PG_WIN1258
375 "WIN1251", PG_WIN1251
378 "WIN1252", PG_WIN1252
381 "ISO_8859_5", PG_ISO_8859_5
384 "ISO_8859_6", PG_ISO_8859_6
387 "ISO_8859_7", PG_ISO_8859_7
390 "ISO_8859_8", PG_ISO_8859_8
393 "WIN1250", PG_WIN1250
396 "WIN1253", PG_WIN1253
399 "WIN1254", PG_WIN1254
402 "WIN1255", PG_WIN1255
405 "WIN1257", PG_WIN1257
423 "GB18030", PG_GB18030
429 "SHIFT_JIS_2004", PG_SHIFT_JIS_2004
434 * These are encoding names for gettext.
437 pg_enc2gettext pg_enc2gettext_tbl
[] =
440 {PG_LATIN1
, "LATIN1"},
441 {PG_LATIN2
, "LATIN2"},
442 {PG_LATIN3
, "LATIN3"},
443 {PG_LATIN4
, "LATIN4"},
444 {PG_ISO_8859_5
, "ISO-8859-5"},
445 {PG_ISO_8859_6
, "ISO_8859-6"},
446 {PG_ISO_8859_7
, "ISO-8859-7"},
447 {PG_ISO_8859_8
, "ISO-8859-8"},
448 {PG_LATIN5
, "LATIN5"},
449 {PG_LATIN6
, "LATIN6"},
450 {PG_LATIN7
, "LATIN7"},
451 {PG_LATIN8
, "LATIN8"},
452 {PG_LATIN9
, "LATIN-9"},
453 {PG_LATIN10
, "LATIN10"},
454 {PG_KOI8R
, "KOI8-R"},
455 {PG_KOI8U
, "KOI8-U"},
456 {PG_WIN1250
, "CP1250"},
457 {PG_WIN1251
, "CP1251"},
458 {PG_WIN1252
, "CP1252"},
459 {PG_WIN1253
, "CP1253"},
460 {PG_WIN1254
, "CP1254"},
461 {PG_WIN1255
, "CP1255"},
462 {PG_WIN1256
, "CP1256"},
463 {PG_WIN1257
, "CP1257"},
464 {PG_WIN1258
, "CP1258"},
465 {PG_WIN866
, "CP866"},
466 {PG_WIN874
, "CP874"},
467 {PG_EUC_CN
, "EUC-CN"},
468 {PG_EUC_JP
, "EUC-JP"},
469 {PG_EUC_KR
, "EUC-KR"},
470 {PG_EUC_TW
, "EUC-TW"},
471 {PG_EUC_JIS_2004
, "EUC-JP"},
477 * Encoding checks, for error returns -1 else encoding id
481 pg_valid_client_encoding(const char *name
)
485 if ((enc
= pg_char_to_encoding(name
)) < 0)
488 if (!PG_VALID_FE_ENCODING(enc
))
495 pg_valid_server_encoding(const char *name
)
499 if ((enc
= pg_char_to_encoding(name
)) < 0)
502 if (!PG_VALID_BE_ENCODING(enc
))
509 pg_valid_server_encoding_id(int encoding
)
511 return PG_VALID_BE_ENCODING(encoding
);
515 * Remove irrelevant chars from encoding name
519 clean_encoding_name(const char *key
, char *newkey
)
524 for (p
= key
, np
= newkey
; *p
!= '\0'; p
++)
526 if (isalnum((unsigned char) *p
))
528 if (*p
>= 'A' && *p
<= 'Z')
529 *np
++ = *p
+ 'a' - 'A';
539 * Search encoding by encoding name
543 pg_char_to_encname_struct(const char *name
)
545 unsigned int nel
= pg_encname_tbl_sz
;
546 pg_encname
*base
= pg_encname_tbl
,
547 *last
= base
+ nel
- 1,
550 char buff
[NAMEDATALEN
],
553 if (name
== NULL
|| *name
== '\0')
556 if (strlen(name
) >= NAMEDATALEN
)
559 fprintf(stderr
, "encoding name too long\n");
563 (errcode(ERRCODE_NAME_TOO_LONG
),
564 errmsg("encoding name too long")));
567 key
= clean_encoding_name(name
, buff
);
571 position
= base
+ ((last
- base
) >> 1);
572 result
= key
[0] - position
->name
[0];
576 result
= strcmp(key
, position
->name
);
589 * Returns encoding or -1 for error
592 pg_char_to_encoding(const char *name
)
599 p
= pg_char_to_encname_struct(name
);
600 return p
? p
->encoding
: -1;
605 PG_char_to_encoding(PG_FUNCTION_ARGS
)
607 Name s
= PG_GETARG_NAME(0);
609 PG_RETURN_INT32(pg_char_to_encoding(NameStr(*s
)));
614 pg_encoding_to_char(int encoding
)
616 if (PG_VALID_ENCODING(encoding
))
618 pg_enc2name
*p
= &pg_enc2name_tbl
[encoding
];
620 Assert(encoding
== p
->encoding
);
628 PG_encoding_to_char(PG_FUNCTION_ARGS
)
630 int32 encoding
= PG_GETARG_INT32(0);
631 const char *encoding_name
= pg_encoding_to_char(encoding
);
633 return DirectFunctionCall1(namein
, CStringGetDatum(encoding_name
));