1 // SPDX-License-Identifier: GPL-2.0
3 * linux/fs/hfsplus/unicode.c
6 * Brad Boyer (flar@allandria.com)
7 * (C) 2003 Ardis Technologies <roman@ardistech.com>
9 * Handler routines for unicode strings
12 #include <linux/types.h>
13 #include <linux/nls.h>
14 #include "hfsplus_fs.h"
15 #include "hfsplus_raw.h"
17 /* Fold the case of a unicode char, given the 16 bit value */
18 /* Returns folded char, or 0 if ignorable */
19 static inline u16
case_fold(u16 c
)
23 tmp
= hfsplus_case_fold_table
[c
>> 8];
25 tmp
= hfsplus_case_fold_table
[tmp
+ (c
& 0xff)];
31 /* Compare unicode strings, return values like normal strcmp */
32 int hfsplus_strcasecmp(const struct hfsplus_unistr
*s1
,
33 const struct hfsplus_unistr
*s2
)
35 u16 len1
, len2
, c1
, c2
;
36 const hfsplus_unichr
*p1
, *p2
;
38 len1
= be16_to_cpu(s1
->length
);
39 len2
= be16_to_cpu(s2
->length
);
47 c1
= case_fold(be16_to_cpu(*p1
));
52 c2
= case_fold(be16_to_cpu(*p2
));
58 return (c1
< c2
) ? -1 : 1;
64 /* Compare names as a sequence of 16-bit unsigned integers */
65 int hfsplus_strcmp(const struct hfsplus_unistr
*s1
,
66 const struct hfsplus_unistr
*s2
)
68 u16 len1
, len2
, c1
, c2
;
69 const hfsplus_unichr
*p1
, *p2
;
72 len1
= be16_to_cpu(s1
->length
);
73 len2
= be16_to_cpu(s2
->length
);
77 for (len
= min(len1
, len2
); len
> 0; len
--) {
78 c1
= be16_to_cpu(*p1
);
79 c2
= be16_to_cpu(*p2
);
81 return c1
< c2
? -1 : 1;
86 return len1
< len2
? -1 :
91 #define Hangul_SBase 0xac00
92 #define Hangul_LBase 0x1100
93 #define Hangul_VBase 0x1161
94 #define Hangul_TBase 0x11a7
95 #define Hangul_SCount 11172
96 #define Hangul_LCount 19
97 #define Hangul_VCount 21
98 #define Hangul_TCount 28
99 #define Hangul_NCount (Hangul_VCount * Hangul_TCount)
102 static u16
*hfsplus_compose_lookup(u16
*p
, u16 cc
)
108 if (!e
|| cc
< p
[s
* 2] || cc
> p
[e
* 2])
114 else if (cc
< p
[i
* 2])
117 return hfsplus_compose_table
+ p
[i
* 2 + 1];
122 int hfsplus_uni2asc(struct super_block
*sb
,
123 const struct hfsplus_unistr
*ustr
,
124 char *astr
, int *len_p
)
126 const hfsplus_unichr
*ip
;
127 struct nls_table
*nls
= HFSPLUS_SB(sb
)->nls
;
131 int i
, len
, ustrlen
, res
, compose
;
135 ustrlen
= be16_to_cpu(ustr
->length
);
138 compose
= !test_bit(HFSPLUS_SB_NODECOMPOSE
, &HFSPLUS_SB(sb
)->flags
);
140 while (ustrlen
> 0) {
141 c0
= be16_to_cpu(*ip
++);
143 /* search for single decomposed char */
145 ce1
= hfsplus_compose_lookup(hfsplus_compose_table
, c0
);
151 /* start of a possibly decomposed Hangul char */
156 c1
= be16_to_cpu(*ip
) - Hangul_VBase
;
157 if (c1
< Hangul_VCount
) {
158 /* compose the Hangul char */
159 cc
= (c0
- Hangul_LBase
) * Hangul_VCount
;
160 cc
= (cc
+ c1
) * Hangul_TCount
;
166 c1
= be16_to_cpu(*ip
) - Hangul_TBase
;
167 if (c1
> 0 && c1
< Hangul_TCount
) {
176 /* main loop for common case of not composed chars */
179 c1
= be16_to_cpu(*ip
);
181 ce1
= hfsplus_compose_lookup(
182 hfsplus_compose_table
, c1
);
193 res
= nls
->uni2char(c0
, op
, len
);
195 if (res
== -ENAMETOOLONG
)
206 ce2
= hfsplus_compose_lookup(ce1
, c0
);
209 while (i
< ustrlen
) {
210 ce1
= hfsplus_compose_lookup(ce2
,
236 res
= nls
->uni2char(cc
, op
, len
);
238 if (res
== -ENAMETOOLONG
)
248 *len_p
= (char *)op
- astr
;
253 * Convert one or more ASCII characters into a single unicode character.
254 * Returns the number of ASCII characters corresponding to the unicode char.
256 static inline int asc2unichar(struct super_block
*sb
, const char *astr
, int len
,
259 int size
= HFSPLUS_SB(sb
)->nls
->char2uni(astr
, len
, uc
);
275 /* Decomposes a non-Hangul unicode character. */
276 static u16
*hfsplus_decompose_nonhangul(wchar_t uc
, int *size
)
280 off
= hfsplus_decompose_table
[(uc
>> 12) & 0xf];
281 if (off
== 0 || off
== 0xffff)
284 off
= hfsplus_decompose_table
[off
+ ((uc
>> 8) & 0xf)];
288 off
= hfsplus_decompose_table
[off
+ ((uc
>> 4) & 0xf)];
292 off
= hfsplus_decompose_table
[off
+ (uc
& 0xf)];
296 return hfsplus_decompose_table
+ (off
/ 4);
300 * Try to decompose a unicode character as Hangul. Return 0 if @uc is not
301 * precomposed Hangul, otherwise return the length of the decomposition.
303 * This function was adapted from sample code from the Unicode Standard
304 * Annex #15: Unicode Normalization Forms, version 3.2.0.
306 * Copyright (C) 1991-2018 Unicode, Inc. All rights reserved. Distributed
307 * under the Terms of Use in http://www.unicode.org/copyright.html.
309 static int hfsplus_try_decompose_hangul(wchar_t uc
, u16
*result
)
314 index
= uc
- Hangul_SBase
;
315 if (index
< 0 || index
>= Hangul_SCount
)
318 l
= Hangul_LBase
+ index
/ Hangul_NCount
;
319 v
= Hangul_VBase
+ (index
% Hangul_NCount
) / Hangul_TCount
;
320 t
= Hangul_TBase
+ index
% Hangul_TCount
;
324 if (t
!= Hangul_TBase
) {
331 /* Decomposes a single unicode character. */
332 static u16
*decompose_unichar(wchar_t uc
, int *size
, u16
*hangul_buffer
)
336 /* Hangul is handled separately */
337 result
= hangul_buffer
;
338 *size
= hfsplus_try_decompose_hangul(uc
, result
);
340 result
= hfsplus_decompose_nonhangul(uc
, size
);
344 int hfsplus_asc2uni(struct super_block
*sb
,
345 struct hfsplus_unistr
*ustr
, int max_unistr_len
,
346 const char *astr
, int len
)
348 int size
, dsize
, decompose
;
349 u16
*dstr
, outlen
= 0;
353 decompose
= !test_bit(HFSPLUS_SB_NODECOMPOSE
, &HFSPLUS_SB(sb
)->flags
);
354 while (outlen
< max_unistr_len
&& len
> 0) {
355 size
= asc2unichar(sb
, astr
, len
, &c
);
358 dstr
= decompose_unichar(c
, &dsize
, dhangul
);
362 if (outlen
+ dsize
> max_unistr_len
)
365 ustr
->unicode
[outlen
++] = cpu_to_be16(*dstr
++);
366 } while (--dsize
> 0);
368 ustr
->unicode
[outlen
++] = cpu_to_be16(c
);
373 ustr
->length
= cpu_to_be16(outlen
);
375 return -ENAMETOOLONG
;
380 * Hash a string to an integer as appropriate for the HFS+ filesystem.
381 * Composed unicode characters are decomposed and case-folding is performed
382 * if the appropriate bits are (un)set on the superblock.
384 int hfsplus_hash_dentry(const struct dentry
*dentry
, struct qstr
*str
)
386 struct super_block
*sb
= dentry
->d_sb
;
389 int casefold
, decompose
, size
, len
;
395 casefold
= test_bit(HFSPLUS_SB_CASEFOLD
, &HFSPLUS_SB(sb
)->flags
);
396 decompose
= !test_bit(HFSPLUS_SB_NODECOMPOSE
, &HFSPLUS_SB(sb
)->flags
);
397 hash
= init_name_hash(dentry
);
402 size
= asc2unichar(sb
, astr
, len
, &c
);
407 dstr
= decompose_unichar(c
, &dsize
, dhangul
);
416 hash
= partial_name_hash(c2
, hash
);
417 } while (--dsize
> 0);
423 hash
= partial_name_hash(c2
, hash
);
426 str
->hash
= end_name_hash(hash
);
432 * Compare strings with HFS+ filename ordering.
433 * Composed unicode characters are decomposed and case-folding is performed
434 * if the appropriate bits are (un)set on the superblock.
436 int hfsplus_compare_dentry(const struct dentry
*dentry
,
437 unsigned int len
, const char *str
, const struct qstr
*name
)
439 struct super_block
*sb
= dentry
->d_sb
;
440 int casefold
, decompose
, size
;
441 int dsize1
, dsize2
, len1
, len2
;
442 const u16
*dstr1
, *dstr2
;
443 const char *astr1
, *astr2
;
446 u16 dhangul_1
[3], dhangul_2
[3];
448 casefold
= test_bit(HFSPLUS_SB_CASEFOLD
, &HFSPLUS_SB(sb
)->flags
);
449 decompose
= !test_bit(HFSPLUS_SB_NODECOMPOSE
, &HFSPLUS_SB(sb
)->flags
);
455 dstr1
= dstr2
= NULL
;
457 while (len1
> 0 && len2
> 0) {
459 size
= asc2unichar(sb
, astr1
, len1
, &c
);
464 dstr1
= decompose_unichar(c
, &dsize1
,
466 if (!decompose
|| !dstr1
) {
474 size
= asc2unichar(sb
, astr2
, len2
, &c
);
479 dstr2
= decompose_unichar(c
, &dsize2
,
481 if (!decompose
|| !dstr2
) {