5 * Routines for converting between UTF-8 and OSTA Compressed Unicode.
6 * Also handles filename mangling
9 * OSTA Compressed Unicode is explained in the OSTA UDF specification.
10 * http://www.osta.org/
11 * UTF-8 is explained in the IETF RFC XXXX.
12 * ftp://ftp.internic.net/rfc/rfcxxxx.txt
15 * This file is distributed under the terms of the GNU General Public
16 * License (GPL). Copies of the GPL can be obtained from:
17 * ftp://prep.ai.mit.edu/pub/gnu/GPL
18 * Each contributing author retains all rights to their own work.
23 #include <linux/kernel.h>
24 #include <linux/string.h> /* for memset */
25 #include <linux/nls.h>
26 #include <linux/crc-itu-t.h>
27 #include <linux/slab.h>
31 #define SURROGATE_MASK 0xfffff800
32 #define SURROGATE_PAIR 0x0000d800
34 static int udf_uni2char_utf8(wchar_t uni
,
43 if ((uni
& SURROGATE_MASK
) == SURROGATE_PAIR
)
47 out
[u_len
++] = (unsigned char)uni
;
48 } else if (uni
< 0x800) {
51 out
[u_len
++] = (unsigned char)(0xc0 | (uni
>> 6));
52 out
[u_len
++] = (unsigned char)(0x80 | (uni
& 0x3f));
56 out
[u_len
++] = (unsigned char)(0xe0 | (uni
>> 12));
57 out
[u_len
++] = (unsigned char)(0x80 | ((uni
>> 6) & 0x3f));
58 out
[u_len
++] = (unsigned char)(0x80 | (uni
& 0x3f));
63 static int udf_char2uni_utf8(const unsigned char *in
,
67 unsigned int utf_char
;
73 for (u_len
= 0; u_len
< boundlen
;) {
76 /* Complete a multi-byte UTF-8 character */
78 utf_char
= (utf_char
<< 6) | (c
& 0x3f);
82 /* Check for a multi-byte UTF-8 character */
84 /* Start a multi-byte UTF-8 character */
85 if ((c
& 0xe0) == 0xc0) {
88 } else if ((c
& 0xf0) == 0xe0) {
91 } else if ((c
& 0xf8) == 0xf0) {
94 } else if ((c
& 0xfc) == 0xf8) {
97 } else if ((c
& 0xfe) == 0xfc) {
106 /* Single byte UTF-8 character (most common) */
120 #define ILLEGAL_CHAR_MARK '_'
124 /* Number of chars we need to store generated CRC to make filename unique */
127 static int udf_name_conv_char(uint8_t *str_o
, int str_o_max_len
,
129 const uint8_t *str_i
, int str_i_max_len
,
131 int u_ch
, int *needsCRC
,
132 int (*conv_f
)(wchar_t, unsigned char *, int),
139 for (; (!gotch
) && (*str_i_idx
< str_i_max_len
); *str_i_idx
+= u_ch
) {
140 if (*str_o_idx
>= str_o_max_len
) {
145 /* Expand OSTA compressed Unicode to Unicode */
146 c
= str_i
[*str_i_idx
];
148 c
= (c
<< 8) | str_i
[*str_i_idx
+ 1];
150 if (translate
&& (c
== '/' || c
== 0))
159 c
= ILLEGAL_CHAR_MARK
;
163 len
= conv_f(c
, &str_o
[*str_o_idx
], str_o_max_len
- *str_o_idx
);
164 /* Valid character? */
167 else if (len
== -ENAMETOOLONG
) {
171 str_o
[(*str_o_idx
)++] = '?';
178 static int udf_name_from_CS0(uint8_t *str_o
, int str_max_len
,
179 const uint8_t *ocu
, int ocu_len
,
180 int (*conv_f
)(wchar_t, unsigned char *, int),
188 int ext_i_len
, ext_max_len
;
189 int str_o_len
= 0; /* Length of resulting output */
190 int ext_o_len
= 0; /* Extension output length */
191 int ext_crc_len
= 0; /* Extension output length if used with CRC */
192 int i_ext
= -1; /* Extension position in input buffer */
193 int o_crc
= 0; /* Rightmost possible output pos for CRC+ext */
194 unsigned short valueCRC
;
195 uint8_t ext
[EXT_SIZE
* NLS_MAX_CHARSET_SIZE
+ 1];
196 uint8_t crc
[CRC_LEN
];
198 if (str_max_len
<= 0)
202 memset(str_o
, 0, str_max_len
);
207 if (cmp_id
!= 8 && cmp_id
!= 16) {
208 memset(str_o
, 0, str_max_len
);
209 pr_err("unknown compression code (%u)\n", cmp_id
);
217 if (ocu_len
% u_ch
) {
218 pr_err("incorrect filename length (%d)\n", ocu_len
+ 1);
223 /* Look for extension */
224 for (idx
= ocu_len
- u_ch
, ext_i_len
= 0;
225 (idx
>= 0) && (ext_i_len
< EXT_SIZE
);
226 idx
-= u_ch
, ext_i_len
++) {
229 c
= (c
<< 8) | ocu
[idx
+ 1];
238 /* Convert extension */
239 ext_max_len
= min_t(int, sizeof(ext
), str_max_len
);
240 ext
[ext_o_len
++] = EXT_MARK
;
242 while (udf_name_conv_char(ext
, ext_max_len
, &ext_o_len
,
245 conv_f
, translate
)) {
246 if ((ext_o_len
+ CRC_LEN
) < str_max_len
)
247 ext_crc_len
= ext_o_len
;
254 if (translate
&& (idx
== i_ext
)) {
255 if (str_o_len
> (str_max_len
- ext_o_len
))
260 if (!udf_name_conv_char(str_o
, str_max_len
, &str_o_len
,
262 u_ch
, &needsCRC
, conv_f
, translate
))
266 (str_o_len
<= (str_max_len
- ext_o_len
- CRC_LEN
)))
271 if (str_o_len
<= 2 && str_o
[0] == '.' &&
272 (str_o_len
== 1 || str_o
[1] == '.'))
276 valueCRC
= crc_itu_t(0, ocu
, ocu_len
);
278 crc
[1] = hex_asc_upper_hi(valueCRC
>> 8);
279 crc
[2] = hex_asc_upper_lo(valueCRC
>> 8);
280 crc
[3] = hex_asc_upper_hi(valueCRC
);
281 crc
[4] = hex_asc_upper_lo(valueCRC
);
282 len
= min_t(int, CRC_LEN
, str_max_len
- str_o_len
);
283 memcpy(&str_o
[str_o_len
], crc
, len
);
285 ext_o_len
= ext_crc_len
;
288 memcpy(&str_o
[str_o_len
], ext
, ext_o_len
);
289 str_o_len
+= ext_o_len
;
296 static int udf_name_to_CS0(uint8_t *ocu
, int ocu_max_len
,
297 const uint8_t *str_i
, int str_len
,
298 int (*conv_f
)(const unsigned char *, int, wchar_t *))
301 unsigned int max_val
;
305 if (ocu_max_len
<= 0)
308 memset(ocu
, 0, ocu_max_len
);
315 for (i
= 0; i
< str_len
; i
++) {
316 /* Name didn't fit? */
317 if (u_len
+ u_ch
> ocu_max_len
)
319 len
= conv_f(&str_i
[i
], str_len
- i
, &uni_char
);
322 /* Invalid character, deal with it */
328 if (uni_char
> max_val
) {
335 if (max_val
== 0xffff)
336 ocu
[u_len
++] = (uint8_t)(uni_char
>> 8);
337 ocu
[u_len
++] = (uint8_t)(uni_char
& 0xff);
344 int udf_dstrCS0toUTF8(uint8_t *utf_o
, int o_len
,
345 const uint8_t *ocu_i
, int i_len
)
350 s_len
= ocu_i
[i_len
- 1];
351 if (s_len
>= i_len
) {
352 pr_err("incorrect dstring lengths (%d/%d)\n",
358 return udf_name_from_CS0(utf_o
, o_len
, ocu_i
, s_len
,
359 udf_uni2char_utf8
, 0);
362 int udf_get_filename(struct super_block
*sb
, const uint8_t *sname
, int slen
,
363 uint8_t *dname
, int dlen
)
365 int (*conv_f
)(wchar_t, unsigned char *, int);
374 if (UDF_QUERY_FLAG(sb
, UDF_FLAG_UTF8
)) {
375 conv_f
= udf_uni2char_utf8
;
376 } else if (UDF_QUERY_FLAG(sb
, UDF_FLAG_NLS_MAP
)) {
377 conv_f
= UDF_SB(sb
)->s_nls_map
->uni2char
;
381 ret
= udf_name_from_CS0(dname
, dlen
, sname
, slen
, conv_f
, 1);
382 /* Zero length filename isn't valid... */
388 int udf_put_filename(struct super_block
*sb
, const uint8_t *sname
, int slen
,
389 uint8_t *dname
, int dlen
)
391 int (*conv_f
)(const unsigned char *, int, wchar_t *);
393 if (UDF_QUERY_FLAG(sb
, UDF_FLAG_UTF8
)) {
394 conv_f
= udf_char2uni_utf8
;
395 } else if (UDF_QUERY_FLAG(sb
, UDF_FLAG_NLS_MAP
)) {
396 conv_f
= UDF_SB(sb
)->s_nls_map
->char2uni
;
400 return udf_name_to_CS0(dname
, dlen
, sname
, slen
, conv_f
);