4 Copyright (C) Andrew Tridgell 2004
6 ** NOTE! The following LGPL license applies to the ldb
7 ** library. This does NOT imply that all of Samba is released
10 This library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 3 of the License, or (at your option) any later version.
15 This library is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 Lesser General Public License for more details.
20 You should have received a copy of the GNU Lesser General Public
21 License along with this library; if not, see <http://www.gnu.org/licenses/>.
27 * Component: ldb utf8 handling
29 * Description: case folding and case comparison for UTF8 strings
31 * Author: Andrew Tridgell
34 #include "ldb_private.h"
35 #include "system/locale.h"
38 * Set functions for comparing and case-folding case-insensitive ldb val
41 void ldb_set_utf8_functions(struct ldb_context
*ldb
,
43 char *(*casefold
)(void *, void *, const char *, size_t),
44 int (*casecmp
)(void *ctx
,
45 const struct ldb_val
*v1
,
46 const struct ldb_val
*v2
))
49 ldb
->utf8_fns
.context
= context
;
52 ldb
->utf8_fns
.casefold
= casefold
;
55 ldb
->utf8_fns
.casecmp
= casecmp
;
60 this allow the user to pass in a caseless comparison
61 function to handle utf8 caseless comparisons
63 void ldb_set_utf8_fns(struct ldb_context
*ldb
,
65 char *(*casefold
)(void *, void *, const char *, size_t))
67 ldb_set_utf8_functions(ldb
, context
, casefold
, NULL
);
72 a simple case folding function
73 NOTE: does not handle UTF8
75 char *ldb_casefold_default(void *context
, TALLOC_CTX
*mem_ctx
, const char *s
, size_t n
)
78 char *ret
= talloc_strndup(mem_ctx
, s
, n
);
83 for (i
=0;ret
[i
];i
++) {
84 ret
[i
] = ldb_ascii_toupper(ret
[i
]);
91 * The default comparison fold function only knows ASCII. Multiple
92 * spaces (0x20) are collapsed into one, and [a-z] map to [A-Z]. All
93 * other bytes are compared without casefolding.
95 * Note that as well as not handling UTF-8, this function does not exactly
96 * implement RFC 4518 (2.6.1. Insignificant Space Handling and Appendix B).
99 int ldb_comparison_fold_ascii(void *ignored
,
100 const struct ldb_val
*v1
,
101 const struct ldb_val
*v2
)
103 const uint8_t *s1
= v1
->data
;
104 const uint8_t *s2
= v2
->data
;
105 size_t n1
= v1
->length
, n2
= v2
->length
;
107 while (n1
&& *s1
== ' ') { s1
++; n1
--; };
108 while (n2
&& *s2
== ' ') { s2
++; n2
--; };
110 while (n1
&& n2
&& *s1
&& *s2
) {
111 if (ldb_ascii_toupper(*s1
) != ldb_ascii_toupper(*s2
)) {
115 while (n1
> 1 && s1
[0] == s1
[1]) { s1
++; n1
--; }
116 while (n2
> 1 && s2
[0] == s2
[1]) { s2
++; n2
--; }
122 /* check for trailing spaces only if the other pointers has
123 * reached the end of the strings otherwise we can
124 * mistakenly match. ex. "domain users" <->
127 if (n1
&& *s1
== ' ' && (!n2
|| !*s2
)) {
128 while (n1
&& *s1
== ' ') { s1
++; n1
--; }
130 if (n2
&& *s2
== ' ' && (!n1
|| !*s1
)) {
131 while (n2
&& *s2
== ' ') { s2
++; n2
--; }
133 if (n1
== 0 && n2
!= 0) {
136 if (n2
== 0 && n1
!= 0) {
139 if (n1
== 0 && n2
== 0) {
142 return NUMERIC_CMP(*s1
, *s2
);
145 void ldb_set_utf8_default(struct ldb_context
*ldb
)
147 ldb_set_utf8_functions(ldb
, NULL
,
148 ldb_casefold_default
,
149 ldb_comparison_fold_ascii
);
152 char *ldb_casefold(struct ldb_context
*ldb
, TALLOC_CTX
*mem_ctx
, const char *s
, size_t n
)
154 return ldb
->utf8_fns
.casefold(ldb
->utf8_fns
.context
, mem_ctx
, s
, n
);
158 check the attribute name is valid according to rfc2251
159 returns 1 if the name is ok
162 int ldb_valid_attr_name(const char *s
)
169 /* handle special ldb_tdb wildcard */
170 if (strcmp(s
, "*") == 0) return 1;
172 for (i
= 0; s
[i
]; i
++) {
173 if (! isascii(s
[i
])) {
176 if (i
== 0) { /* first char must be an alpha (or our special '@' identifier) */
177 if (! (isalpha(s
[i
]) || (s
[i
] == '@'))) {
181 if (! (isalnum(s
[i
]) || (s
[i
] == '-'))) {
189 char *ldb_attr_casefold(TALLOC_CTX
*mem_ctx
, const char *s
)
192 char *ret
= talloc_strdup(mem_ctx
, s
);
197 for (i
= 0; ret
[i
]; i
++) {
198 ret
[i
] = ldb_ascii_toupper(ret
[i
]);
204 we accept either 'dn' or 'distinguishedName' for a distinguishedName
206 int ldb_attr_dn(const char *attr
)
208 if (ldb_attr_cmp(attr
, "dn") == 0 ||
209 ldb_attr_cmp(attr
, "distinguishedName") == 0) {
215 _PRIVATE_
char ldb_ascii_toupper(char c
) {
217 * We are aiming for a 1970s C-locale toupper(), when all letters
218 * were 7-bit and behaved with true American spirit.
220 * For example, we don't want the "i" in "<guid=" to be upper-cased to
221 * "İ" as would happen in some locales, or we won't be able to parse
222 * that properly. This is unfortunate for cases where we are dealing
223 * with real text; a search for the name "Ali" would need to be
224 * written "Alİ" to match.
226 return ('a' <= c
&& c
<= 'z') ? c
^ 0x20 : c
;