1 // SPDX-License-Identifier: GPL-2.0
3 * linux/fs/hfsplus/unicode.c
6 * Brad Boyer (flar@allandria.com)
7 * (C) 2003 Ardis Technologies <roman@ardistech.com>
9 * Handler routines for unicode strings
12 #include <linux/types.h>
13 #include <linux/nls.h>
14 #include "hfsplus_fs.h"
15 #include "hfsplus_raw.h"
17 /* Fold the case of a unicode char, given the 16 bit value */
18 /* Returns folded char, or 0 if ignorable */
19 static inline u16
case_fold(u16 c
)
23 tmp
= hfsplus_case_fold_table
[c
>> 8];
25 tmp
= hfsplus_case_fold_table
[tmp
+ (c
& 0xff)];
31 /* Compare unicode strings, return values like normal strcmp */
32 int hfsplus_strcasecmp(const struct hfsplus_unistr
*s1
,
33 const struct hfsplus_unistr
*s2
)
35 u16 len1
, len2
, c1
, c2
;
36 const hfsplus_unichr
*p1
, *p2
;
38 len1
= be16_to_cpu(s1
->length
);
39 len2
= be16_to_cpu(s2
->length
);
47 c1
= case_fold(be16_to_cpu(*p1
));
52 c2
= case_fold(be16_to_cpu(*p2
));
58 return (c1
< c2
) ? -1 : 1;
64 /* Compare names as a sequence of 16-bit unsigned integers */
65 int hfsplus_strcmp(const struct hfsplus_unistr
*s1
,
66 const struct hfsplus_unistr
*s2
)
68 u16 len1
, len2
, c1
, c2
;
69 const hfsplus_unichr
*p1
, *p2
;
72 len1
= be16_to_cpu(s1
->length
);
73 len2
= be16_to_cpu(s2
->length
);
77 for (len
= min(len1
, len2
); len
> 0; len
--) {
78 c1
= be16_to_cpu(*p1
);
79 c2
= be16_to_cpu(*p2
);
81 return c1
< c2
? -1 : 1;
86 return len1
< len2
? -1 :
91 #define Hangul_SBase 0xac00
92 #define Hangul_LBase 0x1100
93 #define Hangul_VBase 0x1161
94 #define Hangul_TBase 0x11a7
95 #define Hangul_SCount 11172
96 #define Hangul_LCount 19
97 #define Hangul_VCount 21
98 #define Hangul_TCount 28
99 #define Hangul_NCount (Hangul_VCount * Hangul_TCount)
102 static u16
*hfsplus_compose_lookup(u16
*p
, u16 cc
)
108 if (!e
|| cc
< p
[s
* 2] || cc
> p
[e
* 2])
114 else if (cc
< p
[i
* 2])
117 return hfsplus_compose_table
+ p
[i
* 2 + 1];
122 int hfsplus_uni2asc(struct super_block
*sb
,
123 const struct hfsplus_unistr
*ustr
,
124 char *astr
, int *len_p
)
126 const hfsplus_unichr
*ip
;
127 struct nls_table
*nls
= HFSPLUS_SB(sb
)->nls
;
131 int i
, len
, ustrlen
, res
, compose
;
135 ustrlen
= be16_to_cpu(ustr
->length
);
138 compose
= !test_bit(HFSPLUS_SB_NODECOMPOSE
, &HFSPLUS_SB(sb
)->flags
);
140 while (ustrlen
> 0) {
141 c0
= be16_to_cpu(*ip
++);
143 /* search for single decomposed char */
145 ce1
= hfsplus_compose_lookup(hfsplus_compose_table
, c0
);
151 /* start of a possibly decomposed Hangul char */
156 c1
= be16_to_cpu(*ip
) - Hangul_VBase
;
157 if (c1
< Hangul_VCount
) {
158 /* compose the Hangul char */
159 cc
= (c0
- Hangul_LBase
) * Hangul_VCount
;
160 cc
= (cc
+ c1
) * Hangul_TCount
;
166 c1
= be16_to_cpu(*ip
) - Hangul_TBase
;
167 if (c1
> 0 && c1
< Hangul_TCount
) {
176 /* main loop for common case of not composed chars */
179 c1
= be16_to_cpu(*ip
);
181 ce1
= hfsplus_compose_lookup(
182 hfsplus_compose_table
, c1
);
193 res
= nls
->uni2char(c0
, op
, len
);
195 if (res
== -ENAMETOOLONG
)
206 ce2
= hfsplus_compose_lookup(ce1
, c0
);
209 while (i
< ustrlen
) {
210 ce1
= hfsplus_compose_lookup(ce2
,
236 res
= nls
->uni2char(cc
, op
, len
);
238 if (res
== -ENAMETOOLONG
)
248 *len_p
= (char *)op
- astr
;
253 * Convert one or more ASCII characters into a single unicode character.
254 * Returns the number of ASCII characters corresponding to the unicode char.
256 static inline int asc2unichar(struct super_block
*sb
, const char *astr
, int len
,
259 int size
= HFSPLUS_SB(sb
)->nls
->char2uni(astr
, len
, uc
);
275 /* Decomposes a single unicode character. */
276 static inline u16
*decompose_unichar(wchar_t uc
, int *size
)
280 off
= hfsplus_decompose_table
[(uc
>> 12) & 0xf];
281 if (off
== 0 || off
== 0xffff)
284 off
= hfsplus_decompose_table
[off
+ ((uc
>> 8) & 0xf)];
288 off
= hfsplus_decompose_table
[off
+ ((uc
>> 4) & 0xf)];
292 off
= hfsplus_decompose_table
[off
+ (uc
& 0xf)];
296 return hfsplus_decompose_table
+ (off
/ 4);
299 int hfsplus_asc2uni(struct super_block
*sb
,
300 struct hfsplus_unistr
*ustr
, int max_unistr_len
,
301 const char *astr
, int len
)
303 int size
, dsize
, decompose
;
304 u16
*dstr
, outlen
= 0;
307 decompose
= !test_bit(HFSPLUS_SB_NODECOMPOSE
, &HFSPLUS_SB(sb
)->flags
);
308 while (outlen
< max_unistr_len
&& len
> 0) {
309 size
= asc2unichar(sb
, astr
, len
, &c
);
312 dstr
= decompose_unichar(c
, &dsize
);
316 if (outlen
+ dsize
> max_unistr_len
)
319 ustr
->unicode
[outlen
++] = cpu_to_be16(*dstr
++);
320 } while (--dsize
> 0);
322 ustr
->unicode
[outlen
++] = cpu_to_be16(c
);
327 ustr
->length
= cpu_to_be16(outlen
);
329 return -ENAMETOOLONG
;
334 * Hash a string to an integer as appropriate for the HFS+ filesystem.
335 * Composed unicode characters are decomposed and case-folding is performed
336 * if the appropriate bits are (un)set on the superblock.
338 int hfsplus_hash_dentry(const struct dentry
*dentry
, struct qstr
*str
)
340 struct super_block
*sb
= dentry
->d_sb
;
343 int casefold
, decompose
, size
, len
;
348 casefold
= test_bit(HFSPLUS_SB_CASEFOLD
, &HFSPLUS_SB(sb
)->flags
);
349 decompose
= !test_bit(HFSPLUS_SB_NODECOMPOSE
, &HFSPLUS_SB(sb
)->flags
);
350 hash
= init_name_hash(dentry
);
354 int uninitialized_var(dsize
);
355 size
= asc2unichar(sb
, astr
, len
, &c
);
360 dstr
= decompose_unichar(c
, &dsize
);
369 hash
= partial_name_hash(c2
, hash
);
370 } while (--dsize
> 0);
376 hash
= partial_name_hash(c2
, hash
);
379 str
->hash
= end_name_hash(hash
);
385 * Compare strings with HFS+ filename ordering.
386 * Composed unicode characters are decomposed and case-folding is performed
387 * if the appropriate bits are (un)set on the superblock.
389 int hfsplus_compare_dentry(const struct dentry
*dentry
,
390 unsigned int len
, const char *str
, const struct qstr
*name
)
392 struct super_block
*sb
= dentry
->d_sb
;
393 int casefold
, decompose
, size
;
394 int dsize1
, dsize2
, len1
, len2
;
395 const u16
*dstr1
, *dstr2
;
396 const char *astr1
, *astr2
;
400 casefold
= test_bit(HFSPLUS_SB_CASEFOLD
, &HFSPLUS_SB(sb
)->flags
);
401 decompose
= !test_bit(HFSPLUS_SB_NODECOMPOSE
, &HFSPLUS_SB(sb
)->flags
);
407 dstr1
= dstr2
= NULL
;
409 while (len1
> 0 && len2
> 0) {
411 size
= asc2unichar(sb
, astr1
, len1
, &c
);
416 dstr1
= decompose_unichar(c
, &dsize1
);
417 if (!decompose
|| !dstr1
) {
425 size
= asc2unichar(sb
, astr2
, len2
, &c
);
430 dstr2
= decompose_unichar(c
, &dsize2
);
431 if (!decompose
|| !dstr2
) {