4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
25 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
26 * Copyright (c) 2017 by Delphix. All rights reserved.
30 #include <sys/types.h>
31 #include <sys/sunddi.h>
38 #include <sys/u8_textprep.h>
39 #include <smbsrv/alloc.h>
40 #include <sys/errno.h>
41 #include <smbsrv/string.h>
42 #include <smbsrv/cp_usascii.h>
43 #include <smbsrv/cp_unicode.h>
45 #define UNICODE_N_ENTRIES (sizeof (a_unicode) / sizeof (a_unicode[0]))
48 * Global pointer to the current codepage: defaults to ASCII,
49 * and a flag indicating whether the codepage is Unicode or ASCII.
51 static const smb_codepage_t
*current_codepage
= usascii_codepage
;
52 static boolean_t is_unicode
= B_FALSE
;
54 static smb_codepage_t
*unicode_codepage
= NULL
;
56 static smb_codepage_t
*smb_unicode_init(void);
61 * Scan a string replacing all occurrences of orgchar with newchar.
62 * Returns a pointer to s, or null of s is null.
65 strsubst(char *s
, char orgchar
, char newchar
)
84 * Normalize a string by reducing all the repeated characters in
85 * buf as defined by class. For example;
87 * char *buf = strdup("/d1//d2//d3\\\\d4\\\\f1.txt");
88 * strcanon(buf, "/\\");
90 * Would result in buf containing the following string:
94 * This function modifies the contents of buf in place and returns
98 strcanon(char *buf
, const char *class)
107 if ((r
= strchr(class, *p
)) != 0) {
119 smb_codepage_init(void)
126 if ((cp
= smb_unicode_init()) != NULL
) {
127 current_codepage
= cp
;
128 unicode_codepage
= cp
;
131 current_codepage
= usascii_codepage
;
132 is_unicode
= B_FALSE
;
137 smb_codepage_fini(void)
139 if (unicode_codepage
!= NULL
) {
140 MEM_FREE("unicode", unicode_codepage
);
141 unicode_codepage
= NULL
;
142 current_codepage
= NULL
;
147 * Determine whether or not a character is an uppercase character.
148 * This function operates on the current codepage table. Returns
149 * non-zero if the character is uppercase. Otherwise returns zero.
154 uint16_t mask
= is_unicode
? 0xffff : 0xff;
156 return (current_codepage
[c
& mask
].ctype
& CODEPAGE_ISUPPER
);
160 * Determine whether or not a character is an lowercase character.
161 * This function operates on the current codepage table. Returns
162 * non-zero if the character is lowercase. Otherwise returns zero.
167 uint16_t mask
= is_unicode
? 0xffff : 0xff;
169 return (current_codepage
[c
& mask
].ctype
& CODEPAGE_ISLOWER
);
173 * Convert individual characters to their uppercase equivalent value.
174 * If the specified character is lowercase, the uppercase value will
175 * be returned. Otherwise the original value will be returned.
180 uint16_t mask
= is_unicode
? 0xffff : 0xff;
182 return (current_codepage
[c
& mask
].upper
);
186 * Convert individual characters to their lowercase equivalent value.
187 * If the specified character is uppercase, the lowercase value will
188 * be returned. Otherwise the original value will be returned.
193 uint16_t mask
= is_unicode
? 0xffff : 0xff;
195 return (current_codepage
[c
& mask
].lower
);
199 * Convert a string to uppercase using the appropriate codepage. The
200 * string is converted in place. A pointer to the string is returned.
201 * There is an assumption here that uppercase and lowercase values
202 * always result encode to the same length.
211 if (smb_isascii(*p
)) {
212 *p
= smb_toupper(*p
);
215 if (smb_mbtowc(&c
, p
, MTS_MB_CHAR_MAX
) < 0)
222 p
+= smb_wctomb(p
, c
);
230 * Convert a string to lowercase using the appropriate codepage. The
231 * string is converted in place. A pointer to the string is returned.
232 * There is an assumption here that uppercase and lowercase values
233 * always result encode to the same length.
242 if (smb_isascii(*p
)) {
243 *p
= smb_tolower(*p
);
246 if (smb_mbtowc(&c
, p
, MTS_MB_CHAR_MAX
) < 0)
253 p
+= smb_wctomb(p
, c
);
261 * Returns 1 if string contains NO uppercase chars 0 otherwise. However,
262 * -1 is returned if "s" is not a valid multi-byte string.
265 smb_isstrlwr(const char *s
)
272 if (smb_isascii(*p
) && smb_isupper(*p
))
275 if ((n
= smb_mbtowc(&c
, p
, MTS_MB_CHAR_MAX
)) < 0)
292 * Returns 1 if string contains NO lowercase chars 0 otherwise. However,
293 * -1 is returned if "s" is not a valid multi-byte string.
296 smb_isstrupr(const char *s
)
303 if (smb_isascii(*p
) && smb_islower(*p
))
306 if ((n
= smb_mbtowc(&c
, p
, MTS_MB_CHAR_MAX
)) < 0)
323 * Compare the null-terminated strings s1 and s2 and return an integer
324 * greater than, equal to or less than 0 dependent on whether s1 is
325 * lexicographically greater than, equal to or less than s2 after
326 * translation of each character to lowercase. The original strings
329 * If n is non-zero, at most n bytes are compared. Otherwise, the strings
330 * are compared until a null terminator is encountered.
332 * Out: 0 if strings are equal
333 * < 0 if first string < second string
334 * > 0 if first string > second string
337 smb_strcasecmp(const char *s1
, const char *s2
, size_t n
)
342 rc
= u8_strcmp(s1
, s2
, n
, U8_STRCMP_CI_LOWER
, U8_UNICODE_LATEST
, &err
);
349 * First build a codepage based on cp_unicode.h. Then build the unicode
350 * codepage from this interim codepage by copying the entries over while
351 * fixing them and filling in the gaps.
353 static smb_codepage_t
*
354 smb_unicode_init(void)
356 smb_codepage_t
*unicode
;
360 unicode
= MEM_ZALLOC("unicode", sizeof (smb_codepage_t
) << 16);
364 while (b
!= 0xffff) {
366 * If there is a gap in the standard,
367 * fill in the gap with no-case entries.
369 if (UNICODE_N_ENTRIES
<= a
|| a_unicode
[a
].val
> b
) {
370 unicode
[b
].ctype
= CODEPAGE_ISNONE
;
371 unicode
[b
].upper
= (smb_wchar_t
)b
;
372 unicode
[b
].lower
= (smb_wchar_t
)b
;
378 * Copy the entry and fixup as required.
380 switch (a_unicode
[a
].ctype
) {
381 case CODEPAGE_ISNONE
:
383 * Replace 0xffff in upper/lower fields with its val.
385 unicode
[b
].ctype
= CODEPAGE_ISNONE
;
386 unicode
[b
].upper
= (smb_wchar_t
)b
;
387 unicode
[b
].lower
= (smb_wchar_t
)b
;
389 case CODEPAGE_ISUPPER
:
391 * Some characters may have case yet not have
392 * case conversion. Treat them as no-case.
394 if (a_unicode
[a
].lower
== 0xffff) {
395 unicode
[b
].ctype
= CODEPAGE_ISNONE
;
396 unicode
[b
].upper
= (smb_wchar_t
)b
;
397 unicode
[b
].lower
= (smb_wchar_t
)b
;
399 unicode
[b
].ctype
= CODEPAGE_ISUPPER
;
400 unicode
[b
].upper
= (smb_wchar_t
)b
;
401 unicode
[b
].lower
= a_unicode
[a
].lower
;
404 case CODEPAGE_ISLOWER
:
406 * Some characters may have case yet not have
407 * case conversion. Treat them as no-case.
409 if (a_unicode
[a
].upper
== 0xffff) {
410 unicode
[b
].ctype
= CODEPAGE_ISNONE
;
411 unicode
[b
].upper
= (smb_wchar_t
)b
;
412 unicode
[b
].lower
= (smb_wchar_t
)b
;
414 unicode
[b
].ctype
= CODEPAGE_ISLOWER
;
415 unicode
[b
].upper
= a_unicode
[a
].upper
;
416 unicode
[b
].lower
= (smb_wchar_t
)b
;
420 MEM_FREE("unicode", unicode
);
432 * Parse a UNC path (\\server\share\path) into its components.
433 * Although a standard UNC path starts with two '\', in DFS
434 * all UNC paths start with one '\'. So, this function only
437 * A valid UNC must at least contain two components i.e. server
438 * and share. The path is parsed to:
440 * unc_server server or domain name with no leading/trailing '\'
441 * unc_share share name with no leading/trailing '\'
442 * unc_path relative path to the share with no leading/trailing '\'
443 * it is valid for unc_path to be NULL.
445 * Upon successful return of this function, smb_unc_free()
446 * MUST be called when returned 'unc' is no longer needed.
448 * Returns 0 on success, otherwise returns an errno code.
451 smb_unc_init(const char *path
, smb_unc_t
*unc
)
455 if (path
== NULL
|| unc
== NULL
|| (*path
!= '\\' && *path
!= '/'))
458 bzero(unc
, sizeof (smb_unc_t
));
461 unc
->unc_buf
= smb_mem_strdup(path
);
463 if ((unc
->unc_buf
= strdup(path
)) == NULL
)
467 (void) strsubst(unc
->unc_buf
, '\\', '/');
468 (void) strcanon(unc
->unc_buf
, "/");
470 unc
->unc_server
= unc
->unc_buf
+ 1;
471 if (*unc
->unc_server
== '\0') {
476 if ((p
= strchr(unc
->unc_server
, '/')) == NULL
) {
484 if (*unc
->unc_share
== '\0') {
489 unc
->unc_path
= strchr(unc
->unc_share
, '/');
490 if ((p
= unc
->unc_path
) == NULL
)
496 /* remove the last '/' if any */
497 if ((p
= strchr(unc
->unc_path
, '\0')) != NULL
) {
506 smb_unc_free(smb_unc_t
*unc
)
512 smb_mem_free(unc
->unc_buf
);