Add support for user-defined I/O conversion casts.
[PostgreSQL.git] / src / backend / utils / mb / mbutils.c
blob9b41fb94eb320c2e17ab5eb9ab06189f43453985
1 /*
2 * This file contains public functions for conversion between
3 * client encoding and server internal encoding.
4 * (currently mule internal code (mic) is used)
5 * Tatsuo Ishii
7 * $PostgreSQL$
8 */
9 #include "postgres.h"
11 #include "access/xact.h"
12 #include "catalog/namespace.h"
13 #include "mb/pg_wchar.h"
14 #include "utils/builtins.h"
15 #include "utils/memutils.h"
16 #include "utils/pg_locale.h"
17 #include "utils/syscache.h"
20 * When converting strings between different encodings, we assume that space
21 * for converted result is 4-to-1 growth in the worst case. The rate for
22 * currently supported encoding pairs are within 3 (SJIS JIS X0201 half width
23 * kanna -> UTF8 is the worst case). So "4" should be enough for the moment.
25 * Note that this is not the same as the maximum character width in any
26 * particular encoding.
28 #define MAX_CONVERSION_GROWTH 4
31 * We handle for actual FE and BE encoding setting encoding-identificator
32 * and encoding-name too. It prevent searching and conversion from encoding
33 * to encoding name in getdatabaseencoding() and other routines.
35 static pg_enc2name *ClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
36 static pg_enc2name *DatabaseEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
39 * Caches for conversion function info. These values are allocated in
40 * MbProcContext. That context is a child of TopMemoryContext,
41 * which allows these values to survive across transactions. See
42 * SetClientEncoding() for more details.
44 static MemoryContext MbProcContext = NULL;
45 static FmgrInfo *ToServerConvProc = NULL;
46 static FmgrInfo *ToClientConvProc = NULL;
49 * During backend startup we can't set client encoding because we (a)
50 * can't look up the conversion functions, and (b) may not know the database
51 * encoding yet either. So SetClientEncoding() just accepts anything and
52 * remembers it for InitializeClientEncoding() to apply later.
54 static bool backend_startup_complete = false;
55 static int pending_client_encoding = PG_SQL_ASCII;
58 /* Internal functions */
59 static char *perform_default_encoding_conversion(const char *src,
60 int len, bool is_client_to_server);
61 static int cliplen(const char *str, int len, int limit);
65 * Set the client encoding and save fmgrinfo for the conversion
66 * function if necessary. Returns 0 if okay, -1 if not (bad encoding
67 * or can't support conversion)
69 int
70 SetClientEncoding(int encoding, bool doit)
72 int current_server_encoding;
73 Oid to_server_proc,
74 to_client_proc;
75 FmgrInfo *to_server;
76 FmgrInfo *to_client;
77 MemoryContext oldcontext;
79 if (!PG_VALID_FE_ENCODING(encoding))
80 return -1;
82 /* Can't do anything during startup, per notes above */
83 if (!backend_startup_complete)
85 if (doit)
86 pending_client_encoding = encoding;
87 return 0;
90 current_server_encoding = GetDatabaseEncoding();
93 * Check for cases that require no conversion function.
95 if (current_server_encoding == encoding ||
96 current_server_encoding == PG_SQL_ASCII ||
97 encoding == PG_SQL_ASCII)
99 if (doit)
101 ClientEncoding = &pg_enc2name_tbl[encoding];
102 ToServerConvProc = NULL;
103 ToClientConvProc = NULL;
104 if (MbProcContext)
105 MemoryContextReset(MbProcContext);
107 return 0;
111 * If we're not inside a transaction then we can't do catalog lookups, so
112 * fail. After backend startup, this could only happen if we are
113 * re-reading postgresql.conf due to SIGHUP --- so basically this just
114 * constrains the ability to change client_encoding on the fly from
115 * postgresql.conf. Which would probably be a stupid thing to do anyway.
117 if (!IsTransactionState())
118 return -1;
121 * Look up the conversion functions.
123 to_server_proc = FindDefaultConversionProc(encoding,
124 current_server_encoding);
125 if (!OidIsValid(to_server_proc))
126 return -1;
127 to_client_proc = FindDefaultConversionProc(current_server_encoding,
128 encoding);
129 if (!OidIsValid(to_client_proc))
130 return -1;
133 * Done if not wanting to actually apply setting.
135 if (!doit)
136 return 0;
138 /* Before loading the new fmgr info, remove the old info, if any */
139 ToServerConvProc = NULL;
140 ToClientConvProc = NULL;
141 if (MbProcContext != NULL)
143 MemoryContextReset(MbProcContext);
145 else
148 * This is the first time through, so create the context. Make it a
149 * child of TopMemoryContext so that these values survive across
150 * transactions.
152 MbProcContext = AllocSetContextCreate(TopMemoryContext,
153 "MbProcContext",
154 ALLOCSET_SMALL_MINSIZE,
155 ALLOCSET_SMALL_INITSIZE,
156 ALLOCSET_SMALL_MAXSIZE);
159 /* Load the fmgr info into MbProcContext */
160 oldcontext = MemoryContextSwitchTo(MbProcContext);
161 to_server = palloc(sizeof(FmgrInfo));
162 to_client = palloc(sizeof(FmgrInfo));
163 fmgr_info(to_server_proc, to_server);
164 fmgr_info(to_client_proc, to_client);
165 MemoryContextSwitchTo(oldcontext);
167 ClientEncoding = &pg_enc2name_tbl[encoding];
168 ToServerConvProc = to_server;
169 ToClientConvProc = to_client;
171 return 0;
175 * Initialize client encoding if necessary.
176 * called from InitPostgres() once during backend starting up.
178 void
179 InitializeClientEncoding(void)
181 Assert(!backend_startup_complete);
182 backend_startup_complete = true;
184 if (SetClientEncoding(pending_client_encoding, true) < 0)
187 * Oops, the requested conversion is not available. We couldn't fail
188 * before, but we can now.
190 ereport(FATAL,
191 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
192 errmsg("conversion between %s and %s is not supported",
193 pg_enc2name_tbl[pending_client_encoding].name,
194 GetDatabaseEncodingName())));
199 * returns the current client encoding */
201 pg_get_client_encoding(void)
203 Assert(ClientEncoding);
204 return ClientEncoding->encoding;
208 * returns the current client encoding name
210 const char *
211 pg_get_client_encoding_name(void)
213 Assert(ClientEncoding);
214 return ClientEncoding->name;
218 * Apply encoding conversion on src and return it. The encoding
219 * conversion function is chosen from the pg_conversion system catalog
220 * marked as "default". If it is not found in the schema search path,
221 * it's taken from pg_catalog schema. If it even is not in the schema,
222 * warn and return src.
224 * In the case of no conversion, src is returned.
226 * Note: we try to avoid raising error, since that could get us into
227 * infinite recursion when this function is invoked during error message
228 * sending. It should be OK to raise error for overlength strings though,
229 * since the recursion will come with a shorter message.
231 unsigned char *
232 pg_do_encoding_conversion(unsigned char *src, int len,
233 int src_encoding, int dest_encoding)
235 unsigned char *result;
236 Oid proc;
238 if (!IsTransactionState())
239 return src;
241 if (src_encoding == dest_encoding)
242 return src;
244 if (src_encoding == PG_SQL_ASCII || dest_encoding == PG_SQL_ASCII)
245 return src;
247 if (len <= 0)
248 return src;
250 proc = FindDefaultConversionProc(src_encoding, dest_encoding);
251 if (!OidIsValid(proc))
253 ereport(LOG,
254 (errcode(ERRCODE_UNDEFINED_FUNCTION),
255 errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
256 pg_encoding_to_char(src_encoding),
257 pg_encoding_to_char(dest_encoding))));
258 return src;
262 * XXX we should avoid throwing errors in OidFunctionCall. Otherwise we
263 * are going into infinite loop! So we have to make sure that the
264 * function exists before calling OidFunctionCall.
266 if (!SearchSysCacheExists(PROCOID,
267 ObjectIdGetDatum(proc),
268 0, 0, 0))
270 elog(LOG, "cache lookup failed for function %u", proc);
271 return src;
275 * Allocate space for conversion result, being wary of integer overflow
277 if ((Size) len >= (MaxAllocSize / (Size) MAX_CONVERSION_GROWTH))
278 ereport(ERROR,
279 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
280 errmsg("out of memory"),
281 errdetail("String of %d bytes is too long for encoding conversion.",
282 len)));
284 result = palloc(len * MAX_CONVERSION_GROWTH + 1);
286 OidFunctionCall5(proc,
287 Int32GetDatum(src_encoding),
288 Int32GetDatum(dest_encoding),
289 CStringGetDatum(src),
290 CStringGetDatum(result),
291 Int32GetDatum(len));
292 return result;
296 * Convert string using encoding_name. The source
297 * encoding is the DB encoding.
299 * BYTEA convert_to(TEXT string, NAME encoding_name) */
300 Datum
301 pg_convert_to(PG_FUNCTION_ARGS)
303 Datum string = PG_GETARG_DATUM(0);
304 Datum dest_encoding_name = PG_GETARG_DATUM(1);
305 Datum src_encoding_name = DirectFunctionCall1(namein,
306 CStringGetDatum(DatabaseEncoding->name));
307 Datum result;
310 * pg_convert expects a bytea as its first argument. We're passing it a
311 * text argument here, relying on the fact that they are both in fact
312 * varlena types, and thus structurally identical.
314 result = DirectFunctionCall3(pg_convert, string,
315 src_encoding_name, dest_encoding_name);
317 PG_RETURN_DATUM(result);
321 * Convert string using encoding_name. The destination
322 * encoding is the DB encoding.
324 * TEXT convert_from(BYTEA string, NAME encoding_name) */
325 Datum
326 pg_convert_from(PG_FUNCTION_ARGS)
328 Datum string = PG_GETARG_DATUM(0);
329 Datum src_encoding_name = PG_GETARG_DATUM(1);
330 Datum dest_encoding_name = DirectFunctionCall1(namein,
331 CStringGetDatum(DatabaseEncoding->name));
332 Datum result;
334 result = DirectFunctionCall3(pg_convert, string,
335 src_encoding_name, dest_encoding_name);
338 * pg_convert returns a bytea, which we in turn return as text, relying on
339 * the fact that they are both in fact varlena types, and thus
340 * structurally identical. Although not all bytea values are valid text,
341 * in this case it will be because we've told pg_convert to return one
342 * that is valid as text in the current database encoding.
344 PG_RETURN_DATUM(result);
348 * Convert string using encoding_names.
350 * BYTEA convert(BYTEA string, NAME src_encoding_name, NAME dest_encoding_name)
352 Datum
353 pg_convert(PG_FUNCTION_ARGS)
355 bytea *string = PG_GETARG_BYTEA_P(0);
356 char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
357 int src_encoding = pg_char_to_encoding(src_encoding_name);
358 char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
359 int dest_encoding = pg_char_to_encoding(dest_encoding_name);
360 unsigned char *result;
361 bytea *retval;
362 unsigned char *str;
363 int len;
365 if (src_encoding < 0)
366 ereport(ERROR,
367 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
368 errmsg("invalid source encoding name \"%s\"",
369 src_encoding_name)));
370 if (dest_encoding < 0)
371 ereport(ERROR,
372 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
373 errmsg("invalid destination encoding name \"%s\"",
374 dest_encoding_name)));
376 /* make sure that source string is valid and null terminated */
377 len = VARSIZE(string) - VARHDRSZ;
378 pg_verify_mbstr(src_encoding, VARDATA(string), len, false);
379 str = palloc(len + 1);
380 memcpy(str, VARDATA(string), len);
381 *(str + len) = '\0';
383 result = pg_do_encoding_conversion(str, len, src_encoding, dest_encoding);
384 if (result == NULL)
385 elog(ERROR, "encoding conversion failed");
388 * build bytea data type structure.
390 len = strlen((char *) result) + VARHDRSZ;
391 retval = palloc(len);
392 SET_VARSIZE(retval, len);
393 memcpy(VARDATA(retval), result, len - VARHDRSZ);
395 if (result != str)
396 pfree(result);
397 pfree(str);
399 /* free memory if allocated by the toaster */
400 PG_FREE_IF_COPY(string, 0);
402 PG_RETURN_BYTEA_P(retval);
406 * get the length of the string considered as text in the specified
407 * encoding. Raises an error if the data is not valid in that
408 * encoding.
410 * INT4 length (BYTEA string, NAME src_encoding_name)
412 Datum
413 length_in_encoding(PG_FUNCTION_ARGS)
415 bytea *string = PG_GETARG_BYTEA_P(0);
416 char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
417 int src_encoding = pg_char_to_encoding(src_encoding_name);
418 int len = VARSIZE(string) - VARHDRSZ;
419 int retval;
421 if (src_encoding < 0)
422 ereport(ERROR,
423 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
424 errmsg("invalid encoding name \"%s\"",
425 src_encoding_name)));
427 retval = pg_verify_mbstr_len(src_encoding, VARDATA(string), len, false);
428 PG_RETURN_INT32(retval);
433 * convert client encoding to server encoding.
435 char *
436 pg_client_to_server(const char *s, int len)
438 Assert(DatabaseEncoding);
439 Assert(ClientEncoding);
441 if (len <= 0)
442 return (char *) s;
444 if (ClientEncoding->encoding == DatabaseEncoding->encoding ||
445 ClientEncoding->encoding == PG_SQL_ASCII)
448 * No conversion is needed, but we must still validate the data.
450 (void) pg_verify_mbstr(DatabaseEncoding->encoding, s, len, false);
451 return (char *) s;
454 if (DatabaseEncoding->encoding == PG_SQL_ASCII)
457 * No conversion is possible, but we must still validate the data,
458 * because the client-side code might have done string escaping using
459 * the selected client_encoding. If the client encoding is ASCII-safe
460 * then we just do a straight validation under that encoding. For an
461 * ASCII-unsafe encoding we have a problem: we dare not pass such data
462 * to the parser but we have no way to convert it. We compromise by
463 * rejecting the data if it contains any non-ASCII characters.
465 if (PG_VALID_BE_ENCODING(ClientEncoding->encoding))
466 (void) pg_verify_mbstr(ClientEncoding->encoding, s, len, false);
467 else
469 int i;
471 for (i = 0; i < len; i++)
473 if (s[i] == '\0' || IS_HIGHBIT_SET(s[i]))
474 ereport(ERROR,
475 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
476 errmsg("invalid byte value for encoding \"%s\": 0x%02x",
477 pg_enc2name_tbl[PG_SQL_ASCII].name,
478 (unsigned char) s[i])));
481 return (char *) s;
484 return perform_default_encoding_conversion(s, len, true);
488 * convert server encoding to client encoding.
490 char *
491 pg_server_to_client(const char *s, int len)
493 Assert(DatabaseEncoding);
494 Assert(ClientEncoding);
496 if (len <= 0)
497 return (char *) s;
499 if (ClientEncoding->encoding == DatabaseEncoding->encoding ||
500 ClientEncoding->encoding == PG_SQL_ASCII ||
501 DatabaseEncoding->encoding == PG_SQL_ASCII)
502 return (char *) s; /* assume data is valid */
504 return perform_default_encoding_conversion(s, len, false);
508 * Perform default encoding conversion using cached FmgrInfo. Since
509 * this function does not access database at all, it is safe to call
510 * outside transactions. Explicit setting client encoding required
511 * before calling this function. Otherwise no conversion is
512 * performed.
514 static char *
515 perform_default_encoding_conversion(const char *src, int len, bool is_client_to_server)
517 char *result;
518 int src_encoding,
519 dest_encoding;
520 FmgrInfo *flinfo;
522 if (is_client_to_server)
524 src_encoding = ClientEncoding->encoding;
525 dest_encoding = DatabaseEncoding->encoding;
526 flinfo = ToServerConvProc;
528 else
530 src_encoding = DatabaseEncoding->encoding;
531 dest_encoding = ClientEncoding->encoding;
532 flinfo = ToClientConvProc;
535 if (flinfo == NULL)
536 return (char *) src;
539 * Allocate space for conversion result, being wary of integer overflow
541 if ((Size) len >= (MaxAllocSize / (Size) MAX_CONVERSION_GROWTH))
542 ereport(ERROR,
543 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
544 errmsg("out of memory"),
545 errdetail("String of %d bytes is too long for encoding conversion.",
546 len)));
548 result = palloc(len * MAX_CONVERSION_GROWTH + 1);
550 FunctionCall5(flinfo,
551 Int32GetDatum(src_encoding),
552 Int32GetDatum(dest_encoding),
553 CStringGetDatum(src),
554 CStringGetDatum(result),
555 Int32GetDatum(len));
556 return result;
561 #ifdef USE_WIDE_UPPER_LOWER
564 * wchar2char --- convert wide characters to multibyte format
566 * This has the same API as the standard wcstombs() function; in particular,
567 * tolen is the maximum number of bytes to store at *to, and *from must be
568 * zero-terminated. The output will be zero-terminated iff there is room.
570 size_t
571 wchar2char(char *to, const wchar_t *from, size_t tolen)
573 size_t result;
575 if (tolen == 0)
576 return 0;
578 #ifdef WIN32
580 * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding,
581 * and for some reason mbstowcs and wcstombs won't do this for us,
582 * so we use MultiByteToWideChar().
584 if (GetDatabaseEncoding() == PG_UTF8)
586 result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
587 NULL, NULL);
588 /* A zero return is failure */
589 if (result <= 0)
590 result = -1;
591 else
593 Assert(result <= tolen);
594 /* Microsoft counts the zero terminator in the result */
595 result--;
598 else
599 #endif /* WIN32 */
600 result = wcstombs(to, from, tolen);
601 return result;
605 * char2wchar --- convert multibyte characters to wide characters
607 * This has almost the API of mbstowcs(), except that *from need not be
608 * null-terminated; instead, the number of input bytes is specified as
609 * fromlen. Also, we ereport() rather than returning -1 for invalid
610 * input encoding. tolen is the maximum number of wchar_t's to store at *to.
611 * The output will be zero-terminated iff there is room.
613 size_t
614 char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
616 size_t result;
618 if (tolen == 0)
619 return 0;
621 #ifdef WIN32
622 /* See WIN32 "Unicode" comment above */
623 if (GetDatabaseEncoding() == PG_UTF8)
625 /* Win32 API does not work for zero-length input */
626 if (fromlen == 0)
627 result = 0;
628 else
630 result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
631 /* A zero return is failure */
632 if (result == 0)
633 result = -1;
636 if (result != -1)
638 Assert(result < tolen);
639 /* Append trailing null wchar (MultiByteToWideChar() does not) */
640 to[result] = 0;
643 else
644 #endif /* WIN32 */
646 if (lc_ctype_is_c())
649 * pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
650 * allocated with sufficient space
652 result = pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen);
654 else
656 /* mbstowcs requires ending '\0' */
657 char *str = pnstrdup(from, fromlen);
659 result = mbstowcs(to, str, tolen);
660 pfree(str);
664 if (result == -1)
667 * Invalid multibyte character encountered. We try to give a useful
668 * error message by letting pg_verifymbstr check the string. But it's
669 * possible that the string is OK to us, and not OK to mbstowcs ---
670 * this suggests that the LC_CTYPE locale is different from the
671 * database encoding. Give a generic error message if verifymbstr
672 * can't find anything wrong.
674 pg_verifymbstr(from, fromlen, false); /* might not return */
675 /* but if it does ... */
676 ereport(ERROR,
677 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
678 errmsg("invalid multibyte character for locale"),
679 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
682 return result;
685 #endif
687 /* convert a multibyte string to a wchar */
689 pg_mb2wchar(const char *from, pg_wchar *to)
691 return (*pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len) ((const unsigned char *) from, to, strlen(from));
694 /* convert a multibyte string to a wchar with a limited length */
696 pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
698 return (*pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len) ((const unsigned char *) from, to, len);
701 /* same, with any encoding */
703 pg_encoding_mb2wchar_with_len(int encoding,
704 const char *from, pg_wchar *to, int len)
706 return (*pg_wchar_table[encoding].mb2wchar_with_len) ((const unsigned char *) from, to, len);
709 /* returns the byte length of a multibyte word */
711 pg_mblen(const char *mbstr)
713 return ((*pg_wchar_table[DatabaseEncoding->encoding].mblen) ((const unsigned char *) mbstr));
716 /* returns the display length of a multibyte word */
718 pg_dsplen(const char *mbstr)
720 return ((*pg_wchar_table[DatabaseEncoding->encoding].dsplen) ((const unsigned char *) mbstr));
723 /* returns the length (counted in wchars) of a multibyte string */
725 pg_mbstrlen(const char *mbstr)
727 int len = 0;
729 /* optimization for single byte encoding */
730 if (pg_database_encoding_max_length() == 1)
731 return strlen(mbstr);
733 while (*mbstr)
735 mbstr += pg_mblen(mbstr);
736 len++;
738 return len;
741 /* returns the length (counted in wchars) of a multibyte string
742 * (not necessarily NULL terminated)
745 pg_mbstrlen_with_len(const char *mbstr, int limit)
747 int len = 0;
749 /* optimization for single byte encoding */
750 if (pg_database_encoding_max_length() == 1)
751 return limit;
753 while (limit > 0 && *mbstr)
755 int l = pg_mblen(mbstr);
757 limit -= l;
758 mbstr += l;
759 len++;
761 return len;
765 * returns the byte length of a multibyte string
766 * (not necessarily NULL terminated)
767 * that is no longer than limit.
768 * this function does not break multibyte word boundary.
771 pg_mbcliplen(const char *mbstr, int len, int limit)
773 int clen = 0;
774 int l;
776 /* optimization for single byte encoding */
777 if (pg_database_encoding_max_length() == 1)
778 return cliplen(mbstr, len, limit);
780 while (len > 0 && *mbstr)
782 l = pg_mblen(mbstr);
783 if ((clen + l) > limit)
784 break;
785 clen += l;
786 if (clen == limit)
787 break;
788 len -= l;
789 mbstr += l;
791 return clen;
795 * Similar to pg_mbcliplen except the limit parameter specifies the
796 * character length, not the byte length. */
798 pg_mbcharcliplen(const char *mbstr, int len, int limit)
800 int clen = 0;
801 int nch = 0;
802 int l;
804 /* optimization for single byte encoding */
805 if (pg_database_encoding_max_length() == 1)
806 return cliplen(mbstr, len, limit);
808 while (len > 0 && *mbstr)
810 l = pg_mblen(mbstr);
811 nch++;
812 if (nch > limit)
813 break;
814 clen += l;
815 len -= l;
816 mbstr += l;
818 return clen;
821 void
822 SetDatabaseEncoding(int encoding)
824 if (!PG_VALID_BE_ENCODING(encoding))
825 elog(ERROR, "invalid database encoding: %d", encoding);
827 DatabaseEncoding = &pg_enc2name_tbl[encoding];
828 Assert(DatabaseEncoding->encoding == encoding);
831 * On Windows, we allow UTF-8 database encoding to be used with any
832 * locale setting, because UTF-8 requires special handling anyway.
833 * But this means that gettext() might be misled about what output
834 * encoding it should use, so we have to tell it explicitly.
836 * In future we might want to call bind_textdomain_codeset
837 * unconditionally, but that requires knowing how to spell the codeset
838 * name properly for all encodings on all platforms, which might be
839 * problematic.
841 * This is presently unnecessary, but harmless, on non-Windows platforms.
843 #ifdef ENABLE_NLS
844 if (encoding == PG_UTF8)
845 if (bind_textdomain_codeset("postgres", "UTF-8") == NULL)
846 elog(LOG, "bind_textdomain_codeset failed");
847 #endif
850 void
851 SetDefaultClientEncoding(void)
853 ClientEncoding = &pg_enc2name_tbl[GetDatabaseEncoding()];
857 GetDatabaseEncoding(void)
859 Assert(DatabaseEncoding);
860 return DatabaseEncoding->encoding;
863 const char *
864 GetDatabaseEncodingName(void)
866 Assert(DatabaseEncoding);
867 return DatabaseEncoding->name;
870 Datum
871 getdatabaseencoding(PG_FUNCTION_ARGS)
873 Assert(DatabaseEncoding);
874 return DirectFunctionCall1(namein, CStringGetDatum(DatabaseEncoding->name));
877 Datum
878 pg_client_encoding(PG_FUNCTION_ARGS)
880 Assert(ClientEncoding);
881 return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name));
884 static int
885 cliplen(const char *str, int len, int limit)
887 int l = 0;
888 const char *s;
890 for (s = str; *s; s++, l++)
892 if (l >= len || l >= limit)
893 return l;
895 return (s - str);