5 * Routines for converting between UTF-8 and OSTA Compressed Unicode.
6 * Also handles filename mangling
9 * OSTA Compressed Unicode is explained in the OSTA UDF specification.
10 * http://www.osta.org/
11 * UTF-8 is explained in the IETF RFC XXXX.
12 * ftp://ftp.internic.net/rfc/rfcxxxx.txt
15 * This file is distributed under the terms of the GNU General Public
16 * License (GPL). Copies of the GPL can be obtained from:
17 * ftp://prep.ai.mit.edu/pub/gnu/GPL
18 * Each contributing author retains all rights to their own work.
23 #include <linux/kernel.h>
24 #include <linux/string.h> /* for memset */
25 #include <linux/nls.h>
26 #include <linux/crc-itu-t.h>
27 #include <linux/slab.h>
31 static int udf_uni2char_utf8(wchar_t uni
,
41 out
[u_len
++] = (unsigned char)uni
;
42 } else if (uni
< 0x800) {
45 out
[u_len
++] = (unsigned char)(0xc0 | (uni
>> 6));
46 out
[u_len
++] = (unsigned char)(0x80 | (uni
& 0x3f));
50 out
[u_len
++] = (unsigned char)(0xe0 | (uni
>> 12));
51 out
[u_len
++] = (unsigned char)(0x80 | ((uni
>> 6) & 0x3f));
52 out
[u_len
++] = (unsigned char)(0x80 | (uni
& 0x3f));
57 static int udf_char2uni_utf8(const unsigned char *in
,
61 unsigned int utf_char
;
67 for (u_len
= 0; u_len
< boundlen
;) {
70 /* Complete a multi-byte UTF-8 character */
72 utf_char
= (utf_char
<< 6) | (c
& 0x3f);
76 /* Check for a multi-byte UTF-8 character */
78 /* Start a multi-byte UTF-8 character */
79 if ((c
& 0xe0) == 0xc0) {
82 } else if ((c
& 0xf0) == 0xe0) {
85 } else if ((c
& 0xf8) == 0xf0) {
88 } else if ((c
& 0xfc) == 0xf8) {
91 } else if ((c
& 0xfe) == 0xfc) {
100 /* Single byte UTF-8 character (most common) */
114 #define ILLEGAL_CHAR_MARK '_'
118 /* Number of chars we need to store generated CRC to make filename unique */
121 static int udf_name_conv_char(uint8_t *str_o
, int str_o_max_len
,
123 const uint8_t *str_i
, int str_i_max_len
,
125 int u_ch
, int *needsCRC
,
126 int (*conv_f
)(wchar_t, unsigned char *, int),
133 for (; (!gotch
) && (*str_i_idx
< str_i_max_len
); *str_i_idx
+= u_ch
) {
134 if (*str_o_idx
>= str_o_max_len
) {
139 /* Expand OSTA compressed Unicode to Unicode */
140 c
= str_i
[*str_i_idx
];
142 c
= (c
<< 8) | str_i
[*str_i_idx
+ 1];
144 if (translate
&& (c
== '/' || c
== 0))
153 c
= ILLEGAL_CHAR_MARK
;
157 len
= conv_f(c
, &str_o
[*str_o_idx
], str_o_max_len
- *str_o_idx
);
158 /* Valid character? */
161 else if (len
== -ENAMETOOLONG
) {
165 str_o
[(*str_o_idx
)++] = '?';
172 static int udf_name_from_CS0(uint8_t *str_o
, int str_max_len
,
173 const uint8_t *ocu
, int ocu_len
,
174 int (*conv_f
)(wchar_t, unsigned char *, int),
182 int ext_i_len
, ext_max_len
;
183 int str_o_len
= 0; /* Length of resulting output */
184 int ext_o_len
= 0; /* Extension output length */
185 int ext_crc_len
= 0; /* Extension output length if used with CRC */
186 int i_ext
= -1; /* Extension position in input buffer */
187 int o_crc
= 0; /* Rightmost possible output pos for CRC+ext */
188 unsigned short valueCRC
;
189 uint8_t ext
[EXT_SIZE
* NLS_MAX_CHARSET_SIZE
+ 1];
190 uint8_t crc
[CRC_LEN
];
192 if (str_max_len
<= 0)
196 memset(str_o
, 0, str_max_len
);
201 if (cmp_id
!= 8 && cmp_id
!= 16) {
202 memset(str_o
, 0, str_max_len
);
203 pr_err("unknown compression code (%u)\n", cmp_id
);
211 if (ocu_len
% u_ch
) {
212 pr_err("incorrect filename length (%d)\n", ocu_len
+ 1);
217 /* Look for extension */
218 for (idx
= ocu_len
- u_ch
, ext_i_len
= 0;
219 (idx
>= 0) && (ext_i_len
< EXT_SIZE
);
220 idx
-= u_ch
, ext_i_len
++) {
223 c
= (c
<< 8) | ocu
[idx
+ 1];
232 /* Convert extension */
233 ext_max_len
= min_t(int, sizeof(ext
), str_max_len
);
234 ext
[ext_o_len
++] = EXT_MARK
;
236 while (udf_name_conv_char(ext
, ext_max_len
, &ext_o_len
,
239 conv_f
, translate
)) {
240 if ((ext_o_len
+ CRC_LEN
) < str_max_len
)
241 ext_crc_len
= ext_o_len
;
248 if (translate
&& (idx
== i_ext
)) {
249 if (str_o_len
> (str_max_len
- ext_o_len
))
254 if (!udf_name_conv_char(str_o
, str_max_len
, &str_o_len
,
256 u_ch
, &needsCRC
, conv_f
, translate
))
260 (str_o_len
<= (str_max_len
- ext_o_len
- CRC_LEN
)))
265 if (str_o_len
<= 2 && str_o
[0] == '.' &&
266 (str_o_len
== 1 || str_o
[1] == '.'))
270 valueCRC
= crc_itu_t(0, ocu
, ocu_len
);
272 crc
[1] = hex_asc_upper_hi(valueCRC
>> 8);
273 crc
[2] = hex_asc_upper_lo(valueCRC
>> 8);
274 crc
[3] = hex_asc_upper_hi(valueCRC
);
275 crc
[4] = hex_asc_upper_lo(valueCRC
);
276 len
= min_t(int, CRC_LEN
, str_max_len
- str_o_len
);
277 memcpy(&str_o
[str_o_len
], crc
, len
);
279 ext_o_len
= ext_crc_len
;
282 memcpy(&str_o
[str_o_len
], ext
, ext_o_len
);
283 str_o_len
+= ext_o_len
;
290 static int udf_name_to_CS0(uint8_t *ocu
, int ocu_max_len
,
291 const uint8_t *str_i
, int str_len
,
292 int (*conv_f
)(const unsigned char *, int, wchar_t *))
295 unsigned int max_val
;
299 if (ocu_max_len
<= 0)
302 memset(ocu
, 0, ocu_max_len
);
309 for (i
= 0; i
< str_len
; i
++) {
310 /* Name didn't fit? */
311 if (u_len
+ u_ch
> ocu_max_len
)
313 len
= conv_f(&str_i
[i
], str_len
- i
, &uni_char
);
316 /* Invalid character, deal with it */
322 if (uni_char
> max_val
) {
329 if (max_val
== 0xffff)
330 ocu
[u_len
++] = (uint8_t)(uni_char
>> 8);
331 ocu
[u_len
++] = (uint8_t)(uni_char
& 0xff);
338 int udf_dstrCS0toUTF8(uint8_t *utf_o
, int o_len
,
339 const uint8_t *ocu_i
, int i_len
)
344 s_len
= ocu_i
[i_len
- 1];
345 if (s_len
>= i_len
) {
346 pr_err("incorrect dstring lengths (%d/%d)\n",
352 return udf_name_from_CS0(utf_o
, o_len
, ocu_i
, s_len
,
353 udf_uni2char_utf8
, 0);
356 int udf_get_filename(struct super_block
*sb
, const uint8_t *sname
, int slen
,
357 uint8_t *dname
, int dlen
)
359 int (*conv_f
)(wchar_t, unsigned char *, int);
368 if (UDF_QUERY_FLAG(sb
, UDF_FLAG_UTF8
)) {
369 conv_f
= udf_uni2char_utf8
;
370 } else if (UDF_QUERY_FLAG(sb
, UDF_FLAG_NLS_MAP
)) {
371 conv_f
= UDF_SB(sb
)->s_nls_map
->uni2char
;
375 ret
= udf_name_from_CS0(dname
, dlen
, sname
, slen
, conv_f
, 1);
376 /* Zero length filename isn't valid... */
382 int udf_put_filename(struct super_block
*sb
, const uint8_t *sname
, int slen
,
383 uint8_t *dname
, int dlen
)
385 int (*conv_f
)(const unsigned char *, int, wchar_t *);
387 if (UDF_QUERY_FLAG(sb
, UDF_FLAG_UTF8
)) {
388 conv_f
= udf_char2uni_utf8
;
389 } else if (UDF_QUERY_FLAG(sb
, UDF_FLAG_NLS_MAP
)) {
390 conv_f
= UDF_SB(sb
)->s_nls_map
->char2uni
;
394 return udf_name_to_CS0(dname
, dlen
, sname
, slen
, conv_f
);