Don't use 'return' where you should use 'PG_RETURN_xxx'.
[PostgreSQL.git] / src / backend / utils / mb / mbutils.c
blobfa4ec39dc167da4214bcb132869f3b15ed0539ec
1 /*
2 * This file contains public functions for conversion between
3 * client encoding and server (database) encoding.
5 * Tatsuo Ishii
7 * $PostgreSQL$
8 */
9 #include "postgres.h"
11 #include "access/xact.h"
12 #include "catalog/namespace.h"
13 #include "mb/pg_wchar.h"
14 #include "utils/builtins.h"
15 #include "utils/memutils.h"
16 #include "utils/pg_locale.h"
17 #include "utils/syscache.h"
20 * When converting strings between different encodings, we assume that space
21 * for converted result is 4-to-1 growth in the worst case. The rate for
22 * currently supported encoding pairs are within 3 (SJIS JIS X0201 half width
23 * kanna -> UTF8 is the worst case). So "4" should be enough for the moment.
25 * Note that this is not the same as the maximum character width in any
26 * particular encoding.
28 #define MAX_CONVERSION_GROWTH 4
31 * We maintain a simple linked list caching the fmgr lookup info for the
32 * currently selected conversion functions, as well as any that have been
33 * selected previously in the current session. (We remember previous
34 * settings because we must be able to restore a previous setting during
35 * transaction rollback, without doing any fresh catalog accesses.)
37 * Since we'll never release this data, we just keep it in TopMemoryContext.
39 typedef struct ConvProcInfo
41 int s_encoding; /* server and client encoding IDs */
42 int c_encoding;
43 FmgrInfo to_server_info; /* lookup info for conversion procs */
44 FmgrInfo to_client_info;
45 } ConvProcInfo;
47 static List *ConvProcList = NIL; /* List of ConvProcInfo */
50 * These variables point to the currently active conversion functions,
51 * or are NULL when no conversion is needed.
53 static FmgrInfo *ToServerConvProc = NULL;
54 static FmgrInfo *ToClientConvProc = NULL;
57 * These variables track the currently selected FE and BE encodings.
59 static pg_enc2name *ClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
60 static pg_enc2name *DatabaseEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
63 * During backend startup we can't set client encoding because we (a)
64 * can't look up the conversion functions, and (b) may not know the database
65 * encoding yet either. So SetClientEncoding() just accepts anything and
66 * remembers it for InitializeClientEncoding() to apply later.
68 static bool backend_startup_complete = false;
69 static int pending_client_encoding = PG_SQL_ASCII;
72 /* Internal functions */
73 static char *perform_default_encoding_conversion(const char *src,
74 int len, bool is_client_to_server);
75 static int cliplen(const char *str, int len, int limit);
79 * Set the client encoding and save fmgrinfo for the conversion
80 * function if necessary. Returns 0 if okay, -1 if not (bad encoding
81 * or can't support conversion)
83 int
84 SetClientEncoding(int encoding, bool doit)
86 int current_server_encoding;
87 ListCell *lc;
89 if (!PG_VALID_FE_ENCODING(encoding))
90 return -1;
92 /* Can't do anything during startup, per notes above */
93 if (!backend_startup_complete)
95 if (doit)
96 pending_client_encoding = encoding;
97 return 0;
100 current_server_encoding = GetDatabaseEncoding();
103 * Check for cases that require no conversion function.
105 if (current_server_encoding == encoding ||
106 current_server_encoding == PG_SQL_ASCII ||
107 encoding == PG_SQL_ASCII)
109 if (doit)
111 ClientEncoding = &pg_enc2name_tbl[encoding];
112 ToServerConvProc = NULL;
113 ToClientConvProc = NULL;
115 return 0;
118 if (IsTransactionState())
121 * If we're in a live transaction, it's safe to access the catalogs,
122 * so look up the functions. We repeat the lookup even if the info is
123 * already cached, so that we can react to changes in the contents of
124 * pg_conversion.
126 Oid to_server_proc,
127 to_client_proc;
128 ConvProcInfo *convinfo;
129 MemoryContext oldcontext;
131 to_server_proc = FindDefaultConversionProc(encoding,
132 current_server_encoding);
133 if (!OidIsValid(to_server_proc))
134 return -1;
135 to_client_proc = FindDefaultConversionProc(current_server_encoding,
136 encoding);
137 if (!OidIsValid(to_client_proc))
138 return -1;
141 * Done if not wanting to actually apply setting.
143 if (!doit)
144 return 0;
147 * Load the fmgr info into TopMemoryContext (could still fail here)
149 convinfo = (ConvProcInfo *) MemoryContextAlloc(TopMemoryContext,
150 sizeof(ConvProcInfo));
151 convinfo->s_encoding = current_server_encoding;
152 convinfo->c_encoding = encoding;
153 fmgr_info_cxt(to_server_proc, &convinfo->to_server_info,
154 TopMemoryContext);
155 fmgr_info_cxt(to_client_proc, &convinfo->to_client_info,
156 TopMemoryContext);
158 /* Attach new info to head of list */
159 oldcontext = MemoryContextSwitchTo(TopMemoryContext);
160 ConvProcList = lcons(convinfo, ConvProcList);
161 MemoryContextSwitchTo(oldcontext);
164 * Everything is okay, so apply the setting.
166 ClientEncoding = &pg_enc2name_tbl[encoding];
167 ToServerConvProc = &convinfo->to_server_info;
168 ToClientConvProc = &convinfo->to_client_info;
171 * Remove any older entry for the same encoding pair (this is just to
172 * avoid memory leakage).
174 foreach(lc, ConvProcList)
176 ConvProcInfo *oldinfo = (ConvProcInfo *) lfirst(lc);
178 if (oldinfo == convinfo)
179 continue;
180 if (oldinfo->s_encoding == convinfo->s_encoding &&
181 oldinfo->c_encoding == convinfo->c_encoding)
183 ConvProcList = list_delete_ptr(ConvProcList, oldinfo);
184 pfree(oldinfo);
185 break; /* need not look further */
189 return 0; /* success */
191 else
194 * If we're not in a live transaction, the only thing we can do is
195 * restore a previous setting using the cache. This covers all
196 * transaction-rollback cases. The only case it might not work for is
197 * trying to change client_encoding on the fly by editing
198 * postgresql.conf and SIGHUP'ing. Which would probably be a stupid
199 * thing to do anyway.
201 foreach(lc, ConvProcList)
203 ConvProcInfo *oldinfo = (ConvProcInfo *) lfirst(lc);
205 if (oldinfo->s_encoding == current_server_encoding &&
206 oldinfo->c_encoding == encoding)
208 if (doit)
210 ClientEncoding = &pg_enc2name_tbl[encoding];
211 ToServerConvProc = &oldinfo->to_server_info;
212 ToClientConvProc = &oldinfo->to_client_info;
214 return 0;
218 return -1; /* it's not cached, so fail */
223 * Initialize client encoding if necessary.
224 * called from InitPostgres() once during backend startup.
226 void
227 InitializeClientEncoding(void)
229 Assert(!backend_startup_complete);
230 backend_startup_complete = true;
232 if (SetClientEncoding(pending_client_encoding, true) < 0)
235 * Oops, the requested conversion is not available. We couldn't fail
236 * before, but we can now.
238 ereport(FATAL,
239 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
240 errmsg("conversion between %s and %s is not supported",
241 pg_enc2name_tbl[pending_client_encoding].name,
242 GetDatabaseEncodingName())));
247 * returns the current client encoding
250 pg_get_client_encoding(void)
252 Assert(ClientEncoding);
253 return ClientEncoding->encoding;
257 * returns the current client encoding name
259 const char *
260 pg_get_client_encoding_name(void)
262 Assert(ClientEncoding);
263 return ClientEncoding->name;
267 * Apply encoding conversion on src and return it. The encoding
268 * conversion function is chosen from the pg_conversion system catalog
269 * marked as "default". If it is not found in the schema search path,
270 * it's taken from pg_catalog schema. If it even is not in the schema,
271 * warn and return src.
273 * If conversion occurs, a palloc'd null-terminated string is returned.
274 * In the case of no conversion, src is returned.
276 * CAUTION: although the presence of a length argument means that callers
277 * can pass non-null-terminated strings, care is required because the same
278 * string will be passed back if no conversion occurs. Such callers *must*
279 * check whether result == src and handle that case differently.
281 * Note: we try to avoid raising error, since that could get us into
282 * infinite recursion when this function is invoked during error message
283 * sending. It should be OK to raise error for overlength strings though,
284 * since the recursion will come with a shorter message.
286 unsigned char *
287 pg_do_encoding_conversion(unsigned char *src, int len,
288 int src_encoding, int dest_encoding)
290 unsigned char *result;
291 Oid proc;
293 if (!IsTransactionState())
294 return src;
296 if (src_encoding == dest_encoding)
297 return src;
299 if (src_encoding == PG_SQL_ASCII || dest_encoding == PG_SQL_ASCII)
300 return src;
302 if (len <= 0)
303 return src;
305 proc = FindDefaultConversionProc(src_encoding, dest_encoding);
306 if (!OidIsValid(proc))
308 ereport(LOG,
309 (errcode(ERRCODE_UNDEFINED_FUNCTION),
310 errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
311 pg_encoding_to_char(src_encoding),
312 pg_encoding_to_char(dest_encoding))));
313 return src;
317 * XXX we should avoid throwing errors in OidFunctionCall. Otherwise we
318 * are going into infinite loop! So we have to make sure that the
319 * function exists before calling OidFunctionCall.
321 if (!SearchSysCacheExists(PROCOID,
322 ObjectIdGetDatum(proc),
323 0, 0, 0))
325 elog(LOG, "cache lookup failed for function %u", proc);
326 return src;
330 * Allocate space for conversion result, being wary of integer overflow
332 if ((Size) len >= (MaxAllocSize / (Size) MAX_CONVERSION_GROWTH))
333 ereport(ERROR,
334 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
335 errmsg("out of memory"),
336 errdetail("String of %d bytes is too long for encoding conversion.",
337 len)));
339 result = palloc(len * MAX_CONVERSION_GROWTH + 1);
341 OidFunctionCall5(proc,
342 Int32GetDatum(src_encoding),
343 Int32GetDatum(dest_encoding),
344 CStringGetDatum(src),
345 CStringGetDatum(result),
346 Int32GetDatum(len));
347 return result;
351 * Convert string using encoding_name. The source
352 * encoding is the DB encoding.
354 * BYTEA convert_to(TEXT string, NAME encoding_name) */
355 Datum
356 pg_convert_to(PG_FUNCTION_ARGS)
358 Datum string = PG_GETARG_DATUM(0);
359 Datum dest_encoding_name = PG_GETARG_DATUM(1);
360 Datum src_encoding_name = DirectFunctionCall1(namein,
361 CStringGetDatum(DatabaseEncoding->name));
362 Datum result;
365 * pg_convert expects a bytea as its first argument. We're passing it a
366 * text argument here, relying on the fact that they are both in fact
367 * varlena types, and thus structurally identical.
369 result = DirectFunctionCall3(pg_convert, string,
370 src_encoding_name, dest_encoding_name);
372 PG_RETURN_DATUM(result);
376 * Convert string using encoding_name. The destination
377 * encoding is the DB encoding.
379 * TEXT convert_from(BYTEA string, NAME encoding_name) */
380 Datum
381 pg_convert_from(PG_FUNCTION_ARGS)
383 Datum string = PG_GETARG_DATUM(0);
384 Datum src_encoding_name = PG_GETARG_DATUM(1);
385 Datum dest_encoding_name = DirectFunctionCall1(namein,
386 CStringGetDatum(DatabaseEncoding->name));
387 Datum result;
389 result = DirectFunctionCall3(pg_convert, string,
390 src_encoding_name, dest_encoding_name);
393 * pg_convert returns a bytea, which we in turn return as text, relying on
394 * the fact that they are both in fact varlena types, and thus
395 * structurally identical. Although not all bytea values are valid text,
396 * in this case it will be because we've told pg_convert to return one
397 * that is valid as text in the current database encoding.
399 PG_RETURN_DATUM(result);
403 * Convert string using encoding_names.
405 * BYTEA convert(BYTEA string, NAME src_encoding_name, NAME dest_encoding_name)
407 Datum
408 pg_convert(PG_FUNCTION_ARGS)
410 bytea *string = PG_GETARG_BYTEA_P(0);
411 char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
412 int src_encoding = pg_char_to_encoding(src_encoding_name);
413 char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
414 int dest_encoding = pg_char_to_encoding(dest_encoding_name);
415 unsigned char *result;
416 bytea *retval;
417 unsigned char *str;
418 int len;
420 if (src_encoding < 0)
421 ereport(ERROR,
422 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
423 errmsg("invalid source encoding name \"%s\"",
424 src_encoding_name)));
425 if (dest_encoding < 0)
426 ereport(ERROR,
427 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
428 errmsg("invalid destination encoding name \"%s\"",
429 dest_encoding_name)));
431 /* make sure that source string is valid and null terminated */
432 len = VARSIZE(string) - VARHDRSZ;
433 pg_verify_mbstr(src_encoding, VARDATA(string), len, false);
434 str = palloc(len + 1);
435 memcpy(str, VARDATA(string), len);
436 *(str + len) = '\0';
438 result = pg_do_encoding_conversion(str, len, src_encoding, dest_encoding);
441 * build bytea data type structure.
443 len = strlen((char *) result) + VARHDRSZ;
444 retval = palloc(len);
445 SET_VARSIZE(retval, len);
446 memcpy(VARDATA(retval), result, len - VARHDRSZ);
448 if (result != str)
449 pfree(result);
450 pfree(str);
452 /* free memory if allocated by the toaster */
453 PG_FREE_IF_COPY(string, 0);
455 PG_RETURN_BYTEA_P(retval);
459 * get the length of the string considered as text in the specified
460 * encoding. Raises an error if the data is not valid in that
461 * encoding.
463 * INT4 length (BYTEA string, NAME src_encoding_name)
465 Datum
466 length_in_encoding(PG_FUNCTION_ARGS)
468 bytea *string = PG_GETARG_BYTEA_P(0);
469 char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
470 int src_encoding = pg_char_to_encoding(src_encoding_name);
471 int len = VARSIZE(string) - VARHDRSZ;
472 int retval;
474 if (src_encoding < 0)
475 ereport(ERROR,
476 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
477 errmsg("invalid encoding name \"%s\"",
478 src_encoding_name)));
480 retval = pg_verify_mbstr_len(src_encoding, VARDATA(string), len, false);
481 PG_RETURN_INT32(retval);
485 Datum
486 pg_encoding_max_length_sql(PG_FUNCTION_ARGS)
488 int encoding = PG_GETARG_INT32(0);
490 if (PG_VALID_ENCODING(encoding))
491 PG_RETURN_INT32(pg_wchar_table[encoding].maxmblen);
492 else
493 PG_RETURN_NULL();
497 * convert client encoding to server encoding.
499 char *
500 pg_client_to_server(const char *s, int len)
502 Assert(DatabaseEncoding);
503 Assert(ClientEncoding);
505 if (len <= 0)
506 return (char *) s;
508 if (ClientEncoding->encoding == DatabaseEncoding->encoding ||
509 ClientEncoding->encoding == PG_SQL_ASCII)
512 * No conversion is needed, but we must still validate the data.
514 (void) pg_verify_mbstr(DatabaseEncoding->encoding, s, len, false);
515 return (char *) s;
518 if (DatabaseEncoding->encoding == PG_SQL_ASCII)
521 * No conversion is possible, but we must still validate the data,
522 * because the client-side code might have done string escaping using
523 * the selected client_encoding. If the client encoding is ASCII-safe
524 * then we just do a straight validation under that encoding. For an
525 * ASCII-unsafe encoding we have a problem: we dare not pass such data
526 * to the parser but we have no way to convert it. We compromise by
527 * rejecting the data if it contains any non-ASCII characters.
529 if (PG_VALID_BE_ENCODING(ClientEncoding->encoding))
530 (void) pg_verify_mbstr(ClientEncoding->encoding, s, len, false);
531 else
533 int i;
535 for (i = 0; i < len; i++)
537 if (s[i] == '\0' || IS_HIGHBIT_SET(s[i]))
538 ereport(ERROR,
539 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
540 errmsg("invalid byte value for encoding \"%s\": 0x%02x",
541 pg_enc2name_tbl[PG_SQL_ASCII].name,
542 (unsigned char) s[i])));
545 return (char *) s;
548 return perform_default_encoding_conversion(s, len, true);
552 * convert server encoding to client encoding.
554 char *
555 pg_server_to_client(const char *s, int len)
557 Assert(DatabaseEncoding);
558 Assert(ClientEncoding);
560 if (len <= 0)
561 return (char *) s;
563 if (ClientEncoding->encoding == DatabaseEncoding->encoding ||
564 ClientEncoding->encoding == PG_SQL_ASCII ||
565 DatabaseEncoding->encoding == PG_SQL_ASCII)
566 return (char *) s; /* assume data is valid */
568 return perform_default_encoding_conversion(s, len, false);
572 * Perform default encoding conversion using cached FmgrInfo. Since
573 * this function does not access database at all, it is safe to call
574 * outside transactions. If the conversion has not been set up by
575 * SetClientEncoding(), no conversion is performed.
577 static char *
578 perform_default_encoding_conversion(const char *src, int len, bool is_client_to_server)
580 char *result;
581 int src_encoding,
582 dest_encoding;
583 FmgrInfo *flinfo;
585 if (is_client_to_server)
587 src_encoding = ClientEncoding->encoding;
588 dest_encoding = DatabaseEncoding->encoding;
589 flinfo = ToServerConvProc;
591 else
593 src_encoding = DatabaseEncoding->encoding;
594 dest_encoding = ClientEncoding->encoding;
595 flinfo = ToClientConvProc;
598 if (flinfo == NULL)
599 return (char *) src;
602 * Allocate space for conversion result, being wary of integer overflow
604 if ((Size) len >= (MaxAllocSize / (Size) MAX_CONVERSION_GROWTH))
605 ereport(ERROR,
606 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
607 errmsg("out of memory"),
608 errdetail("String of %d bytes is too long for encoding conversion.",
609 len)));
611 result = palloc(len * MAX_CONVERSION_GROWTH + 1);
613 FunctionCall5(flinfo,
614 Int32GetDatum(src_encoding),
615 Int32GetDatum(dest_encoding),
616 CStringGetDatum(src),
617 CStringGetDatum(result),
618 Int32GetDatum(len));
619 return result;
624 #ifdef USE_WIDE_UPPER_LOWER
627 * wchar2char --- convert wide characters to multibyte format
629 * This has the same API as the standard wcstombs() function; in particular,
630 * tolen is the maximum number of bytes to store at *to, and *from must be
631 * zero-terminated. The output will be zero-terminated iff there is room.
633 size_t
634 wchar2char(char *to, const wchar_t *from, size_t tolen)
636 size_t result;
638 if (tolen == 0)
639 return 0;
641 #ifdef WIN32
644 * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
645 * for some reason mbstowcs and wcstombs won't do this for us, so we use
646 * MultiByteToWideChar().
648 if (GetDatabaseEncoding() == PG_UTF8)
650 result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
651 NULL, NULL);
652 /* A zero return is failure */
653 if (result <= 0)
654 result = -1;
655 else
657 Assert(result <= tolen);
658 /* Microsoft counts the zero terminator in the result */
659 result--;
662 else
663 #endif /* WIN32 */
665 Assert(!lc_ctype_is_c());
666 result = wcstombs(to, from, tolen);
668 return result;
672 * char2wchar --- convert multibyte characters to wide characters
674 * This has almost the API of mbstowcs(), except that *from need not be
675 * null-terminated; instead, the number of input bytes is specified as
676 * fromlen. Also, we ereport() rather than returning -1 for invalid
677 * input encoding. tolen is the maximum number of wchar_t's to store at *to.
678 * The output will be zero-terminated iff there is room.
680 size_t
681 char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
683 size_t result;
685 if (tolen == 0)
686 return 0;
688 #ifdef WIN32
689 /* See WIN32 "Unicode" comment above */
690 if (GetDatabaseEncoding() == PG_UTF8)
692 /* Win32 API does not work for zero-length input */
693 if (fromlen == 0)
694 result = 0;
695 else
697 result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
698 /* A zero return is failure */
699 if (result == 0)
700 result = -1;
703 if (result != -1)
705 Assert(result < tolen);
706 /* Append trailing null wchar (MultiByteToWideChar() does not) */
707 to[result] = 0;
710 else
711 #endif /* WIN32 */
713 /* mbstowcs requires ending '\0' */
714 char *str = pnstrdup(from, fromlen);
716 Assert(!lc_ctype_is_c());
717 result = mbstowcs(to, str, tolen);
718 pfree(str);
721 if (result == -1)
724 * Invalid multibyte character encountered. We try to give a useful
725 * error message by letting pg_verifymbstr check the string. But it's
726 * possible that the string is OK to us, and not OK to mbstowcs ---
727 * this suggests that the LC_CTYPE locale is different from the
728 * database encoding. Give a generic error message if verifymbstr
729 * can't find anything wrong.
731 pg_verifymbstr(from, fromlen, false); /* might not return */
732 /* but if it does ... */
733 ereport(ERROR,
734 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
735 errmsg("invalid multibyte character for locale"),
736 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
739 return result;
741 #endif
743 /* convert a multibyte string to a wchar */
745 pg_mb2wchar(const char *from, pg_wchar *to)
747 return (*pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len) ((const unsigned char *) from, to, strlen(from));
750 /* convert a multibyte string to a wchar with a limited length */
752 pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
754 return (*pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len) ((const unsigned char *) from, to, len);
757 /* same, with any encoding */
759 pg_encoding_mb2wchar_with_len(int encoding,
760 const char *from, pg_wchar *to, int len)
762 return (*pg_wchar_table[encoding].mb2wchar_with_len) ((const unsigned char *) from, to, len);
765 /* returns the byte length of a multibyte character */
767 pg_mblen(const char *mbstr)
769 return ((*pg_wchar_table[DatabaseEncoding->encoding].mblen) ((const unsigned char *) mbstr));
772 /* returns the display length of a multibyte character */
774 pg_dsplen(const char *mbstr)
776 return ((*pg_wchar_table[DatabaseEncoding->encoding].dsplen) ((const unsigned char *) mbstr));
779 /* returns the length (counted in wchars) of a multibyte string */
781 pg_mbstrlen(const char *mbstr)
783 int len = 0;
785 /* optimization for single byte encoding */
786 if (pg_database_encoding_max_length() == 1)
787 return strlen(mbstr);
789 while (*mbstr)
791 mbstr += pg_mblen(mbstr);
792 len++;
794 return len;
797 /* returns the length (counted in wchars) of a multibyte string
798 * (not necessarily NULL terminated)
801 pg_mbstrlen_with_len(const char *mbstr, int limit)
803 int len = 0;
805 /* optimization for single byte encoding */
806 if (pg_database_encoding_max_length() == 1)
807 return limit;
809 while (limit > 0 && *mbstr)
811 int l = pg_mblen(mbstr);
813 limit -= l;
814 mbstr += l;
815 len++;
817 return len;
821 * returns the byte length of a multibyte string
822 * (not necessarily NULL terminated)
823 * that is no longer than limit.
824 * this function does not break multibyte character boundary.
827 pg_mbcliplen(const char *mbstr, int len, int limit)
829 return pg_encoding_mbcliplen(DatabaseEncoding->encoding, mbstr,
830 len, limit);
834 * pg_mbcliplen with specified encoding
837 pg_encoding_mbcliplen(int encoding, const char *mbstr,
838 int len, int limit)
840 mblen_converter mblen_fn;
841 int clen = 0;
842 int l;
844 /* optimization for single byte encoding */
845 if (pg_encoding_max_length(encoding) == 1)
846 return cliplen(mbstr, len, limit);
848 mblen_fn = pg_wchar_table[encoding].mblen;
850 while (len > 0 && *mbstr)
852 l = (*mblen_fn) ((const unsigned char *) mbstr);
853 if ((clen + l) > limit)
854 break;
855 clen += l;
856 if (clen == limit)
857 break;
858 len -= l;
859 mbstr += l;
861 return clen;
865 * Similar to pg_mbcliplen except the limit parameter specifies the
866 * character length, not the byte length.
869 pg_mbcharcliplen(const char *mbstr, int len, int limit)
871 int clen = 0;
872 int nch = 0;
873 int l;
875 /* optimization for single byte encoding */
876 if (pg_database_encoding_max_length() == 1)
877 return cliplen(mbstr, len, limit);
879 while (len > 0 && *mbstr)
881 l = pg_mblen(mbstr);
882 nch++;
883 if (nch > limit)
884 break;
885 clen += l;
886 len -= l;
887 mbstr += l;
889 return clen;
892 /* mbcliplen for any single-byte encoding */
893 static int
894 cliplen(const char *str, int len, int limit)
896 int l = 0;
898 len = Min(len, limit);
899 while (l < len && str[l])
900 l++;
901 return l;
904 void
905 SetDatabaseEncoding(int encoding)
907 if (!PG_VALID_BE_ENCODING(encoding))
908 elog(ERROR, "invalid database encoding: %d", encoding);
910 DatabaseEncoding = &pg_enc2name_tbl[encoding];
911 Assert(DatabaseEncoding->encoding == encoding);
915 * Bind gettext to the codeset equivalent with the database encoding.
917 void
918 pg_bind_textdomain_codeset(const char *domainname)
920 #if defined(ENABLE_NLS)
921 int encoding = GetDatabaseEncoding();
922 int i;
925 * gettext() uses the codeset specified by LC_CTYPE by default, so if that
926 * matches the database encoding we don't need to do anything. In CREATE
927 * DATABASE, we enforce or trust that the locale's codeset matches
928 * database encoding, except for the C locale. In C locale, we bind
929 * gettext() explicitly to the right codeset.
931 * On Windows, though, gettext() tends to get confused so we always bind
932 * it.
934 #ifndef WIN32
935 const char *ctype = setlocale(LC_CTYPE, NULL);
937 if (pg_strcasecmp(ctype, "C") != 0 && pg_strcasecmp(ctype, "POSIX") != 0)
938 return;
939 #endif
941 for (i = 0; pg_enc2gettext_tbl[i].name != NULL; i++)
943 if (pg_enc2gettext_tbl[i].encoding == encoding)
945 if (bind_textdomain_codeset(domainname,
946 pg_enc2gettext_tbl[i].name) == NULL)
947 elog(LOG, "bind_textdomain_codeset failed");
948 break;
951 #endif
955 GetDatabaseEncoding(void)
957 Assert(DatabaseEncoding);
958 return DatabaseEncoding->encoding;
961 const char *
962 GetDatabaseEncodingName(void)
964 Assert(DatabaseEncoding);
965 return DatabaseEncoding->name;
968 Datum
969 getdatabaseencoding(PG_FUNCTION_ARGS)
971 Assert(DatabaseEncoding);
972 return DirectFunctionCall1(namein, CStringGetDatum(DatabaseEncoding->name));
975 Datum
976 pg_client_encoding(PG_FUNCTION_ARGS)
978 Assert(ClientEncoding);
979 return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name));