1 /*-------------------------------------------------------------------------
4 * collation-related commands support code
6 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/commands/collationcmds.c
13 *-------------------------------------------------------------------------
17 #include "access/htup_details.h"
18 #include "access/table.h"
19 #include "access/xact.h"
20 #include "catalog/dependency.h"
21 #include "catalog/indexing.h"
22 #include "catalog/namespace.h"
23 #include "catalog/objectaccess.h"
24 #include "catalog/pg_collation.h"
25 #include "commands/alter.h"
26 #include "commands/collationcmds.h"
27 #include "commands/comment.h"
28 #include "commands/dbcommands.h"
29 #include "commands/defrem.h"
30 #include "common/string.h"
31 #include "mb/pg_wchar.h"
32 #include "miscadmin.h"
33 #include "utils/acl.h"
34 #include "utils/builtins.h"
35 #include "utils/lsyscache.h"
36 #include "utils/pg_locale.h"
37 #include "utils/rel.h"
38 #include "utils/syscache.h"
43 char *localename
; /* name of locale, as per "locale -a" */
44 char *alias
; /* shortened alias for same */
45 int enc
; /* encoding */
53 DefineCollation(ParseState
*pstate
, List
*names
, List
*parameters
, bool if_not_exists
)
59 DefElem
*fromEl
= NULL
;
60 DefElem
*localeEl
= NULL
;
61 DefElem
*lccollateEl
= NULL
;
62 DefElem
*lcctypeEl
= NULL
;
63 DefElem
*providerEl
= NULL
;
64 DefElem
*deterministicEl
= NULL
;
65 DefElem
*versionEl
= NULL
;
69 bool collisdeterministic
;
72 char *collversion
= NULL
;
74 ObjectAddress address
;
76 collNamespace
= QualifiedNameGetCreationNamespace(names
, &collName
);
78 aclresult
= pg_namespace_aclcheck(collNamespace
, GetUserId(), ACL_CREATE
);
79 if (aclresult
!= ACLCHECK_OK
)
80 aclcheck_error(aclresult
, OBJECT_SCHEMA
,
81 get_namespace_name(collNamespace
));
83 foreach(pl
, parameters
)
85 DefElem
*defel
= lfirst_node(DefElem
, pl
);
88 if (strcmp(defel
->defname
, "from") == 0)
90 else if (strcmp(defel
->defname
, "locale") == 0)
92 else if (strcmp(defel
->defname
, "lc_collate") == 0)
93 defelp
= &lccollateEl
;
94 else if (strcmp(defel
->defname
, "lc_ctype") == 0)
96 else if (strcmp(defel
->defname
, "provider") == 0)
98 else if (strcmp(defel
->defname
, "deterministic") == 0)
99 defelp
= &deterministicEl
;
100 else if (strcmp(defel
->defname
, "version") == 0)
105 (errcode(ERRCODE_SYNTAX_ERROR
),
106 errmsg("collation attribute \"%s\" not recognized",
108 parser_errposition(pstate
, defel
->location
)));
112 errorConflictingDefElem(defel
, pstate
);
116 if (localeEl
&& (lccollateEl
|| lcctypeEl
))
118 errcode(ERRCODE_SYNTAX_ERROR
),
119 errmsg("conflicting or redundant options"),
120 errdetail("LOCALE cannot be specified together with LC_COLLATE or LC_CTYPE."));
122 if (fromEl
&& list_length(parameters
) != 1)
124 errcode(ERRCODE_SYNTAX_ERROR
),
125 errmsg("conflicting or redundant options"),
126 errdetail("FROM cannot be specified together with any other options."));
135 collid
= get_collation_oid(defGetQualifiedName(fromEl
), false);
136 tp
= SearchSysCache1(COLLOID
, ObjectIdGetDatum(collid
));
137 if (!HeapTupleIsValid(tp
))
138 elog(ERROR
, "cache lookup failed for collation %u", collid
);
140 collprovider
= ((Form_pg_collation
) GETSTRUCT(tp
))->collprovider
;
141 collisdeterministic
= ((Form_pg_collation
) GETSTRUCT(tp
))->collisdeterministic
;
142 collencoding
= ((Form_pg_collation
) GETSTRUCT(tp
))->collencoding
;
144 datum
= SysCacheGetAttr(COLLOID
, tp
, Anum_pg_collation_collcollate
, &isnull
);
146 collcollate
= TextDatumGetCString(datum
);
150 datum
= SysCacheGetAttr(COLLOID
, tp
, Anum_pg_collation_collctype
, &isnull
);
152 collctype
= TextDatumGetCString(datum
);
156 datum
= SysCacheGetAttr(COLLOID
, tp
, Anum_pg_collation_colliculocale
, &isnull
);
158 colliculocale
= TextDatumGetCString(datum
);
160 colliculocale
= NULL
;
165 * Copying the "default" collation is not allowed because most code
166 * checks for DEFAULT_COLLATION_OID instead of COLLPROVIDER_DEFAULT,
167 * and so having a second collation with COLLPROVIDER_DEFAULT would
168 * not work and potentially confuse or crash some code. This could be
169 * fixed with some legwork.
171 if (collprovider
== COLLPROVIDER_DEFAULT
)
173 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION
),
174 errmsg("collation \"default\" cannot be copied")));
178 char *collproviderstr
= NULL
;
182 colliculocale
= NULL
;
185 collproviderstr
= defGetString(providerEl
);
188 collisdeterministic
= defGetBoolean(deterministicEl
);
190 collisdeterministic
= true;
193 collversion
= defGetString(versionEl
);
197 if (pg_strcasecmp(collproviderstr
, "icu") == 0)
198 collprovider
= COLLPROVIDER_ICU
;
199 else if (pg_strcasecmp(collproviderstr
, "libc") == 0)
200 collprovider
= COLLPROVIDER_LIBC
;
203 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION
),
204 errmsg("unrecognized collation provider: %s",
208 collprovider
= COLLPROVIDER_LIBC
;
212 if (collprovider
== COLLPROVIDER_LIBC
)
214 collcollate
= defGetString(localeEl
);
215 collctype
= defGetString(localeEl
);
218 colliculocale
= defGetString(localeEl
);
222 collcollate
= defGetString(lccollateEl
);
225 collctype
= defGetString(lcctypeEl
);
227 if (collprovider
== COLLPROVIDER_LIBC
)
231 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION
),
232 errmsg("parameter \"lc_collate\" must be specified")));
236 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION
),
237 errmsg("parameter \"lc_ctype\" must be specified")));
239 else if (collprovider
== COLLPROVIDER_ICU
)
243 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION
),
244 errmsg("parameter \"locale\" must be specified")));
248 * Nondeterministic collations are currently only supported with ICU
249 * because that's the only case where it can actually make a
250 * difference. So we can save writing the code for the other
253 if (!collisdeterministic
&& collprovider
!= COLLPROVIDER_ICU
)
255 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
256 errmsg("nondeterministic collations not supported with this provider")));
258 if (collprovider
== COLLPROVIDER_ICU
)
262 * We could create ICU collations with collencoding == database
263 * encoding, but it seems better to use -1 so that it matches the
264 * way initdb would create ICU collations. However, only allow
265 * one to be created when the current database's encoding is
266 * supported. Otherwise the collation is useless, plus we get
267 * surprising behaviors like not being able to drop the collation.
269 * Skip this test when !USE_ICU, because the error we want to
270 * throw for that isn't thrown till later.
272 if (!is_encoding_supported_by_icu(GetDatabaseEncoding()))
274 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
275 errmsg("current database's encoding is not supported with this provider")));
281 collencoding
= GetDatabaseEncoding();
282 check_encoding_locale_matches(collencoding
, collcollate
, collctype
);
287 collversion
= get_collation_actual_version(collprovider
, collprovider
== COLLPROVIDER_ICU
? colliculocale
: collcollate
);
289 newoid
= CollationCreate(collName
,
300 false); /* not quiet */
302 if (!OidIsValid(newoid
))
303 return InvalidObjectAddress
;
306 * Check that the locales can be loaded. NB: pg_newlocale_from_collation
307 * is only supposed to be called on non-C-equivalent locales.
309 CommandCounterIncrement();
310 if (!lc_collate_is_c(newoid
) || !lc_ctype_is_c(newoid
))
311 (void) pg_newlocale_from_collation(newoid
);
313 ObjectAddressSet(address
, CollationRelationId
, newoid
);
319 * Subroutine for ALTER COLLATION SET SCHEMA and RENAME
321 * Is there a collation with the same name of the given collation already in
322 * the given namespace? If so, raise an appropriate error message.
325 IsThereCollationInNamespace(const char *collname
, Oid nspOid
)
327 /* make sure the name doesn't already exist in new schema */
328 if (SearchSysCacheExists3(COLLNAMEENCNSP
,
329 CStringGetDatum(collname
),
330 Int32GetDatum(GetDatabaseEncoding()),
331 ObjectIdGetDatum(nspOid
)))
333 (errcode(ERRCODE_DUPLICATE_OBJECT
),
334 errmsg("collation \"%s\" for encoding \"%s\" already exists in schema \"%s\"",
335 collname
, GetDatabaseEncodingName(),
336 get_namespace_name(nspOid
))));
338 /* mustn't match an any-encoding entry, either */
339 if (SearchSysCacheExists3(COLLNAMEENCNSP
,
340 CStringGetDatum(collname
),
342 ObjectIdGetDatum(nspOid
)))
344 (errcode(ERRCODE_DUPLICATE_OBJECT
),
345 errmsg("collation \"%s\" already exists in schema \"%s\"",
346 collname
, get_namespace_name(nspOid
))));
353 AlterCollation(AlterCollationStmt
*stmt
)
358 Form_pg_collation collForm
;
363 ObjectAddress address
;
365 rel
= table_open(CollationRelationId
, RowExclusiveLock
);
366 collOid
= get_collation_oid(stmt
->collname
, false);
368 if (!pg_collation_ownercheck(collOid
, GetUserId()))
369 aclcheck_error(ACLCHECK_NOT_OWNER
, OBJECT_COLLATION
,
370 NameListToString(stmt
->collname
));
372 tup
= SearchSysCacheCopy1(COLLOID
, ObjectIdGetDatum(collOid
));
373 if (!HeapTupleIsValid(tup
))
374 elog(ERROR
, "cache lookup failed for collation %u", collOid
);
376 collForm
= (Form_pg_collation
) GETSTRUCT(tup
);
377 datum
= SysCacheGetAttr(COLLOID
, tup
, Anum_pg_collation_collversion
, &isnull
);
378 oldversion
= isnull
? NULL
: TextDatumGetCString(datum
);
380 datum
= SysCacheGetAttr(COLLOID
, tup
, collForm
->collprovider
== COLLPROVIDER_ICU
? Anum_pg_collation_colliculocale
: Anum_pg_collation_collcollate
, &isnull
);
382 elog(ERROR
, "unexpected null in pg_collation");
383 newversion
= get_collation_actual_version(collForm
->collprovider
, TextDatumGetCString(datum
));
385 /* cannot change from NULL to non-NULL or vice versa */
386 if ((!oldversion
&& newversion
) || (oldversion
&& !newversion
))
387 elog(ERROR
, "invalid collation version change");
388 else if (oldversion
&& newversion
&& strcmp(newversion
, oldversion
) != 0)
390 bool nulls
[Natts_pg_collation
];
391 bool replaces
[Natts_pg_collation
];
392 Datum values
[Natts_pg_collation
];
395 (errmsg("changing version from %s to %s",
396 oldversion
, newversion
)));
398 memset(values
, 0, sizeof(values
));
399 memset(nulls
, false, sizeof(nulls
));
400 memset(replaces
, false, sizeof(replaces
));
402 values
[Anum_pg_collation_collversion
- 1] = CStringGetTextDatum(newversion
);
403 replaces
[Anum_pg_collation_collversion
- 1] = true;
405 tup
= heap_modify_tuple(tup
, RelationGetDescr(rel
),
406 values
, nulls
, replaces
);
410 (errmsg("version has not changed")));
412 CatalogTupleUpdate(rel
, &tup
->t_self
, tup
);
414 InvokeObjectPostAlterHook(CollationRelationId
, collOid
, 0);
416 ObjectAddressSet(address
, CollationRelationId
, collOid
);
419 table_close(rel
, NoLock
);
426 pg_collation_actual_version(PG_FUNCTION_ARGS
)
428 Oid collid
= PG_GETARG_OID(0);
435 tp
= SearchSysCache1(COLLOID
, ObjectIdGetDatum(collid
));
436 if (!HeapTupleIsValid(tp
))
438 (errcode(ERRCODE_UNDEFINED_OBJECT
),
439 errmsg("collation with OID %u does not exist", collid
)));
441 collprovider
= ((Form_pg_collation
) GETSTRUCT(tp
))->collprovider
;
443 if (collprovider
!= COLLPROVIDER_DEFAULT
)
445 datum
= SysCacheGetAttr(COLLOID
, tp
, collprovider
== COLLPROVIDER_ICU
? Anum_pg_collation_colliculocale
: Anum_pg_collation_collcollate
, &isnull
);
447 elog(ERROR
, "unexpected null in pg_collation");
448 version
= get_collation_actual_version(collprovider
, TextDatumGetCString(datum
));
456 PG_RETURN_TEXT_P(cstring_to_text(version
));
462 /* will we use "locale -a" in pg_import_system_collations? */
463 #if defined(HAVE_LOCALE_T) && !defined(WIN32)
464 #define READ_LOCALE_A_OUTPUT
467 #ifdef READ_LOCALE_A_OUTPUT
469 * "Normalize" a libc locale name, stripping off encoding tags such as
470 * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
471 * -> "br_FR@euro"). Return true if a new, different name was
475 normalize_libc_locale_name(char *new, const char *old
)
479 bool changed
= false;
485 /* skip over encoding tag such as ".utf8" or ".UTF-8" */
487 while ((*o
>= 'A' && *o
<= 'Z')
488 || (*o
>= 'a' && *o
<= 'z')
489 || (*o
>= '0' && *o
<= '9')
503 * qsort comparator for CollAliasData items
506 cmpaliases(const void *a
, const void *b
)
508 const CollAliasData
*ca
= (const CollAliasData
*) a
;
509 const CollAliasData
*cb
= (const CollAliasData
*) b
;
511 /* comparing localename is enough because other fields are derived */
512 return strcmp(ca
->localename
, cb
->localename
);
514 #endif /* READ_LOCALE_A_OUTPUT */
519 * Get the ICU language tag for a locale name.
520 * The result is a palloc'd string.
523 get_icu_language_tag(const char *localename
)
525 char buf
[ULOC_FULLNAME_CAPACITY
];
528 status
= U_ZERO_ERROR
;
529 uloc_toLanguageTag(localename
, buf
, sizeof(buf
), true, &status
);
530 if (U_FAILURE(status
))
532 (errmsg("could not convert locale name \"%s\" to language tag: %s",
533 localename
, u_errorName(status
))));
539 * Get a comment (specifically, the display name) for an ICU locale.
540 * The result is a palloc'd string, or NULL if we can't get a comment
541 * or find that it's not all ASCII. (We can *not* accept non-ASCII
542 * comments, because the contents of template0 must be encoding-agnostic.)
545 get_icu_locale_comment(const char *localename
)
548 UChar displayname
[128];
553 status
= U_ZERO_ERROR
;
554 len_uchar
= uloc_getDisplayName(localename
, "en",
555 displayname
, lengthof(displayname
),
557 if (U_FAILURE(status
))
558 return NULL
; /* no good reason to raise an error */
560 /* Check for non-ASCII comment (can't use pg_is_ascii for this) */
561 for (i
= 0; i
< len_uchar
; i
++)
563 if (displayname
[i
] > 127)
568 result
= palloc(len_uchar
+ 1);
569 for (i
= 0; i
< len_uchar
; i
++)
570 result
[i
] = displayname
[i
];
571 result
[len_uchar
] = '\0';
579 * pg_import_system_collations: add known system collations to pg_collation
582 pg_import_system_collations(PG_FUNCTION_ARGS
)
584 Oid nspid
= PG_GETARG_OID(0);
589 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE
),
590 errmsg("must be superuser to import system collations")));
592 if (!SearchSysCacheExists1(NAMESPACEOID
, ObjectIdGetDatum(nspid
)))
594 (errcode(ERRCODE_UNDEFINED_SCHEMA
),
595 errmsg("schema with OID %u does not exist", nspid
)));
597 /* Load collations known to libc, using "locale -a" to enumerate them */
598 #ifdef READ_LOCALE_A_OUTPUT
600 FILE *locale_a_handle
;
601 char localebuf
[LOCALE_NAME_BUFLEN
];
604 CollAliasData
*aliases
;
609 /* expansible array of aliases */
611 aliases
= (CollAliasData
*) palloc(maxaliases
* sizeof(CollAliasData
));
614 locale_a_handle
= OpenPipeStream("locale -a", "r");
615 if (locale_a_handle
== NULL
)
617 (errcode_for_file_access(),
618 errmsg("could not execute command \"%s\": %m",
621 while (fgets(localebuf
, sizeof(localebuf
), locale_a_handle
))
625 char alias
[LOCALE_NAME_BUFLEN
];
627 len
= strlen(localebuf
);
629 if (len
== 0 || localebuf
[len
- 1] != '\n')
631 elog(DEBUG1
, "skipping locale with too-long name: \"%s\"", localebuf
);
634 localebuf
[len
- 1] = '\0';
637 * Some systems have locale names that don't consist entirely of
638 * ASCII letters (such as "bokmål" or "français").
639 * This is pretty silly, since we need the locale itself to
640 * interpret the non-ASCII characters. We can't do much with
641 * those, so we filter them out.
643 if (!pg_is_ascii(localebuf
))
645 elog(DEBUG1
, "skipping locale with non-ASCII name: \"%s\"", localebuf
);
649 enc
= pg_get_encoding_from_locale(localebuf
, false);
652 elog(DEBUG1
, "skipping locale with unrecognized encoding: \"%s\"",
656 if (!PG_VALID_BE_ENCODING(enc
))
658 elog(DEBUG1
, "skipping locale with client-only encoding: \"%s\"", localebuf
);
661 if (enc
== PG_SQL_ASCII
)
662 continue; /* C/POSIX are already in the catalog */
664 /* count valid locales found in operating system */
668 * Create a collation named the same as the locale, but quietly
669 * doing nothing if it already exists. This is the behavior we
670 * need even at initdb time, because some versions of "locale -a"
671 * can report the same locale name more than once. And it's
672 * convenient for later import runs, too, since you just about
673 * always want to add on new locales without a lot of chatter
674 * about existing ones.
676 collid
= CollationCreate(localebuf
, nspid
, GetUserId(),
677 COLLPROVIDER_LIBC
, true, enc
,
678 localebuf
, localebuf
, NULL
,
679 get_collation_actual_version(COLLPROVIDER_LIBC
, localebuf
),
681 if (OidIsValid(collid
))
685 /* Must do CCI between inserts to handle duplicates correctly */
686 CommandCounterIncrement();
690 * Generate aliases such as "en_US" in addition to "en_US.utf8"
691 * for ease of use. Note that collation names are unique per
692 * encoding only, so this doesn't clash with "en_US" for LATIN1,
695 * However, it might conflict with a name we'll see later in the
696 * "locale -a" output. So save up the aliases and try to add them
697 * after we've read all the output.
699 if (normalize_libc_locale_name(alias
, localebuf
))
701 if (naliases
>= maxaliases
)
704 aliases
= (CollAliasData
*)
705 repalloc(aliases
, maxaliases
* sizeof(CollAliasData
));
707 aliases
[naliases
].localename
= pstrdup(localebuf
);
708 aliases
[naliases
].alias
= pstrdup(alias
);
709 aliases
[naliases
].enc
= enc
;
714 ClosePipeStream(locale_a_handle
);
717 * Before processing the aliases, sort them by locale name. The point
718 * here is that if "locale -a" gives us multiple locale names with the
719 * same encoding and base name, say "en_US.utf8" and "en_US.utf-8", we
720 * want to pick a deterministic one of them. First in ASCII sort
721 * order is a good enough rule. (Before PG 10, the code corresponding
722 * to this logic in initdb.c had an additional ordering rule, to
723 * prefer the locale name exactly matching the alias, if any. We
724 * don't need to consider that here, because we would have already
725 * created such a pg_collation entry above, and that one will win.)
728 qsort((void *) aliases
, naliases
, sizeof(CollAliasData
), cmpaliases
);
730 /* Now add aliases, ignoring any that match pre-existing entries */
731 for (i
= 0; i
< naliases
; i
++)
733 char *locale
= aliases
[i
].localename
;
734 char *alias
= aliases
[i
].alias
;
735 int enc
= aliases
[i
].enc
;
737 collid
= CollationCreate(alias
, nspid
, GetUserId(),
738 COLLPROVIDER_LIBC
, true, enc
,
739 locale
, locale
, NULL
,
740 get_collation_actual_version(COLLPROVIDER_LIBC
, locale
),
742 if (OidIsValid(collid
))
746 CommandCounterIncrement();
750 /* Give a warning if "locale -a" seems to be malfunctioning */
753 (errmsg("no usable system locales were found")));
755 #endif /* READ_LOCALE_A_OUTPUT */
758 * Load collations known to ICU
760 * We use uloc_countAvailable()/uloc_getAvailable() rather than
761 * ucol_countAvailable()/ucol_getAvailable(). The former returns a full
762 * set of language+region combinations, whereas the latter only returns
763 * language+region combinations if they are distinct from the language's
764 * base collation. So there might not be a de-DE or en-GB, which would be
772 * Start the loop at -1 to sneak in the root locale without too much
775 for (i
= -1; i
< uloc_countAvailable(); i
++)
780 const char *iculocstr
;
784 name
= ""; /* ICU root locale */
786 name
= uloc_getAvailable(i
);
788 langtag
= get_icu_language_tag(name
);
789 iculocstr
= U_ICU_VERSION_MAJOR_NUM
>= 54 ? langtag
: name
;
792 * Be paranoid about not allowing any non-ASCII strings into
795 if (!pg_is_ascii(langtag
) || !pg_is_ascii(iculocstr
))
798 collid
= CollationCreate(psprintf("%s-x-icu", langtag
),
800 COLLPROVIDER_ICU
, true, -1,
801 NULL
, NULL
, iculocstr
,
802 get_collation_actual_version(COLLPROVIDER_ICU
, iculocstr
),
804 if (OidIsValid(collid
))
808 CommandCounterIncrement();
810 icucomment
= get_icu_locale_comment(name
);
812 CreateComments(collid
, CollationRelationId
, 0,
819 PG_RETURN_INT32(ncreated
);