2 * linux/fs/umsdos/mangle.c
4 * Written 1993 by Jacques Gelinas
6 * Control the mangling of file name to fit msdos name space.
7 * Many optimisations by GLU == dglaude@is1.vub.ac.be (Glaude David)
10 #include <linux/errno.h>
11 #include <linux/string.h>
12 #include <linux/kernel.h>
13 #include <linux/umsdos_fs.h>
15 /* (This file is used outside of the kernel) */
21 * Complete the mangling of the MSDOS fake name
22 * based on the position of the entry in the EMD file.
24 * Simply complete the job of umsdos_parse; fill the extension.
26 * Beware that info->f_pos must be set.
28 void umsdos_manglename (struct umsdos_info
*info
)
30 if (info
->msdos_reject
) {
31 /* #Specification: file name / non MSDOS conforming / mangling
32 * Each non MSDOS conforming file has a special extension
33 * build from the entry position in the EMD file.
35 * This number is then transform in a base 32 number, where
36 * each digit is expressed like hexadecimal number, using
37 * digit and letter, except it uses 22 letters from 'a' to 'v'.
38 * The number 32 comes from 2**5. It is faster to split a binary
39 * number using a base which is a power of two. And I was 32
40 * when I started this project. Pick your answer :-) .
42 * If the result is '0', it is replace with '_', simply
45 * This is true for the first two character of the extension.
46 * The last one is taken from a list of odd character, which
51 * With this scheme, we can produce 9216 ( 9* 32 * 32)
52 * different extensions which should not clash with any useful
53 * extension already popular or meaningful. Since most directory
54 * have much less than 32 * 32 files in it, the first character
55 * of the extension of any mangled name will be {.
57 * Here are the reason to do this (this kind of mangling).
59 * -The mangling is deterministic. Just by the extension, we
60 * are able to locate the entry in the EMD file.
62 * -By keeping to beginning of the file name almost unchanged,
63 * we are helping the MSDOS user.
65 * -The mangling produces names not too ugly, so an msdos user
66 * may live with it (remember it, type it, etc...).
68 * -The mangling produces names ugly enough so no one will
69 * ever think of using such a name in real life. This is not
70 * fool proof. I don't think there is a total solution to this.
73 char *pt
= info
->fake
.fname
+ info
->fake
.len
;
74 /* lookup for encoding the last character of the extension
75 * It contains valid character after the ugly one to make sure
76 * even if someone overflows the 32 * 32 * 9 limit, it still
79 #define SPECIAL_MANGLING '{','}','(',')','!','`','^','&','@'
80 static char lookup3
[] =
83 /* This is the start of lookup12 */
84 '_', '1', '2', '3', '4', '5', '6', '7', '8', '9',
85 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
86 'p', 'q', 'r', 's', 't', 'u', 'v'
89 #define lookup12 (lookup3+9)
90 entry_num
= info
->f_pos
/ UMSDOS_REC_SIZE
;
91 if (entry_num
> (9* 32 * 32)){
92 printk (KERN_WARNING
"UMSDOS: more than 9216 files in a directory.\n"
93 "This may break the mangling strategy.\n"
94 "Not a killer problem. See doc.\n");
97 *pt
++ = lookup3
[(entry_num
>> 10) & 31];
98 *pt
++ = lookup12
[(entry_num
>> 5) & 31];
99 *pt
++ = lookup12
[entry_num
& 31];
100 *pt
= '\0'; /* help doing printk */
102 info
->msdos_reject
= 0; /* Avoid mangling twice */
107 * Evaluate the record size needed to store of name of len character.
108 * The value returned is a multiple of UMSDOS_REC_SIZE.
110 int umsdos_evalrecsize (int len
)
112 struct umsdos_dirent dirent
;
113 int nbrec
= 1 + ((len
- 1 + (dirent
.name
- (char *) &dirent
))
116 return nbrec
* UMSDOS_REC_SIZE
;
118 * GLU This should be inlined or something to speed it up to the max.
119 * GLU nbrec is absolutely not needed to return the value.
123 int umsdos_evalrecsize_old (int len
)
125 struct umsdos_dirent dirent
;
126 int size
= len
+ (dirent
.name
- (char *) &dirent
);
127 int nbrec
= size
/ UMSDOS_REC_SIZE
;
128 int extra
= size
% UMSDOS_REC_SIZE
;
132 return nbrec
* UMSDOS_REC_SIZE
;
138 * Fill the struct info with the full and msdos name of a file
139 * Return 0 if all is OK, a negative error code otherwise.
144 struct umsdos_info
*info
)
146 int ret
= -ENAMETOOLONG
;
148 /* #Specification: file name / too long
149 * If a file name exceed UMSDOS maxima, the file name is silently
150 * truncated. This makes it conformant with the other file system
151 * of Linux (minix and ext2 at least).
153 if (len
> UMSDOS_MAXNAME
)
154 len
= UMSDOS_MAXNAME
;
156 const char *firstpt
= NULL
; /* First place we saw a "." in fname */
158 /* #Specification: file name / non MSDOS conforming / base length 0
159 * file names beginning with a period '.' are invalid for MS-DOS.
160 * It needs absolutely a base name. So the file name is mangled
162 int ivldchar
= fname
[0] == '.'; /* At least one invalid character */
167 * cardinal_per_size tells if there exists at least one
168 * DOS pseudo device on length n. See the test below.
170 static const char cardinal_per_size
[9] =
172 0, 0, 0, 1, 1, 0, 1, 0, 1
176 * lkp translate all character to acceptable character (for DOS).
177 * When lkp[n] == n, it means also it is an acceptable one.
178 * So it serves both as a flag and as a translator.
180 static char lkp
[256];
185 * Initialisation of the array is easier and less error
189 static const char *spc
= "\"*+,/:;<=>?[\\]|~";
192 for (i
= 0; i
<= 32; i
++)
194 for (i
= 33; i
< 'A'; i
++)
196 for (i
= 'A'; i
<= 'Z'; i
++)
197 lkp
[i
] = (char) (i
+ ('a' - 'A'));
198 for (i
= 'Z' + 1; i
< 127; i
++)
200 for (i
= 128; i
< 256; i
++)
205 lkp
[(unsigned char) (*spc
++)] = '#';
208 * File names longer than 8+'.'+3 are invalid for MS-DOS,
209 * so the file name is to be mangled--no further test is needed.
210 * This speeds up handling of long names.
211 * The position of the last point is no more necessary anyway.
213 if (len
<= (8 + 1 + 3)) {
214 const char *pt
= fname
;
215 const char *endpt
= fname
+ len
;
219 if (firstpt
!= NULL
) {
220 /* 2 . in a file name. Reject */
224 int extlen
= (int) (endpt
- pt
);
227 if (firstpt
- fname
> 8) {
228 /* base name longer than 8: reject */
231 } else if (extlen
> 4) {
232 /* Extension longer than 4 (including .): reject */
235 } else if (extlen
== 1) {
236 /* #Specification: file name / non MSDOS conforming / last char == .
237 * If the last character of a file name is
238 * a period, mangling is applied. MS-DOS does
239 * not support those file names.
243 } else if (extlen
== 4) {
244 /* #Specification: file name / non MSDOS conforming / mangling clash
245 * To avoid clash with the umsdos mangling, any file
246 * with a special character as the first character
247 * of the extension will be mangled. This solves the
252 * # FILE is invalid for DOS, so mangling is applied
253 * # file.{_1 is created in the DOS directory
255 * # To UMSDOS file point to a single DOS entry.
256 * # So file.{_1 has to be mangled.
259 static char special
[] =
261 SPECIAL_MANGLING
, '\0'
264 if (strchr (special
, firstpt
[1]) != NULL
) {
270 } else if (lkp
[(unsigned char) (*pt
)] != *pt
) {
280 || (firstpt
== NULL
&& len
> 8)
281 || (len
== UMSDOS_EMD_NAMELEN
282 && memcmp (fname
, UMSDOS_EMD_FILE
, UMSDOS_EMD_NAMELEN
) == 0)) {
283 /* #Specification: file name / --linux-.---
284 * The name of the EMD file --linux-.--- is map to a mangled
285 * name. So UMSDOS does not restrict its use.
287 /* #Specification: file name / non MSDOS conforming / mangling
288 * Non MSDOS conforming file names must use some alias to fit
289 * in the MSDOS name space.
291 * The strategy is simple. The name is simply truncated to
292 * 8 char. points are replace with underscore and a
293 * number is given as an extension. This number correspond
294 * to the entry number in the EMD file. The EMD file
295 * only need to carry the real name.
297 * Upper case is also converted to lower case.
298 * Control character are converted to #.
299 * Spaces are converted to #.
300 * The following characters are also converted to #.
302 * " * + , / : ; < = > ? [ \ ] | ~
305 * Sometimes the problem is not in MS-DOS itself but in
309 char *pt
= info
->fake
.fname
;
311 base_len
= msdos_len
= (msdos_len
> 8) ? 8 : msdos_len
;
313 * There is no '.' any more so we know for a fact that
314 * the base length is the length.
316 memcpy (info
->fake
.fname
, fname
, msdos_len
);
317 for (i
= 0; i
< msdos_len
; i
++, pt
++)
318 *pt
= lkp
[(unsigned char) (*pt
)];
319 *pt
= '\0'; /* GLU We force null termination. */
320 info
->msdos_reject
= 1;
322 * The numeric extension is added only when we know
323 * the position in the EMD file, in umsdos_newentry(),
324 * umsdos_delentry(), and umsdos_findentry().
325 * See umsdos_manglename().
328 /* Conforming MSDOS file name */
329 strncpy (info
->fake
.fname
, fname
, len
);
330 info
->msdos_reject
= 0;
331 base_len
= firstpt
!= NULL
? (int) (firstpt
- fname
) : len
;
333 if (cardinal_per_size
[base_len
]) {
334 /* #Specification: file name / MSDOS devices / mangling
335 * To avoid unreachable file from MS-DOS, any MS-DOS conforming
336 * file with a basename equal to one of the MS-DOS pseudo
337 * devices will be mangled.
339 * If a file such as "prn" was created, it would be unreachable
340 * under MS-DOS because "prn" is assumed to be the printer, even
341 * if the file does have an extension.
343 * Since the extension is unimportant to MS-DOS, we must patch
344 * the basename also. We simply insert a minus '-'. To avoid
345 * conflict with valid file with a minus in front (such as
346 * "-prn"), we add an mangled extension like any other
349 * Here is the list of DOS pseudo devices:
352 * "prn","con","aux","nul",
353 * "lpt1","lpt2","lpt3","lpt4",
354 * "com1","com2","com3","com4",
358 * and some standard ones for common DOS programs
360 * "emmxxxx0","xmsxxxx0","setverxx"
362 * (Thanks to Chris Hall <cah17@phoenix.cambridge.ac.uk>
363 * for pointing these out to me).
365 * Is there one missing?
367 /* This table must be ordered by length */
368 static const char *tbdev
[] =
370 "prn", "con", "aux", "nul",
371 "lpt1", "lpt2", "lpt3", "lpt4",
372 "com1", "com2", "com3", "com4",
374 "emmxxxx0", "xmsxxxx0", "setverxx"
377 /* Tell where to find in tbdev[], the first name of */
378 /* a certain length */
379 static const char start_ind_dev
[9] =
381 0, 0, 0, 4, 12, 12, 13, 13, 16
386 for (i
= start_ind_dev
[base_len
- 1]; i
< start_ind_dev
[base_len
]; i
++) {
387 if (memcmp (info
->fake
.fname
, tbdev
[i
], base_len
) == 0) {
388 memcpy (basen
, info
->fake
.fname
, base_len
);
389 basen
[base_len
] = '\0'; /* GLU We force null termination. */
391 * GLU We do that only if necessary; we try to do the
392 * GLU simple thing in the usual circumstance.
394 info
->fake
.fname
[0] = '-';
395 strcpy (info
->fake
.fname
+ 1, basen
); /* GLU We already guaranteed a null would be at the end. */
396 msdos_len
= (base_len
== 8) ? 8 : base_len
+ 1;
397 info
->msdos_reject
= 1;
402 info
->fake
.fname
[msdos_len
] = '\0'; /* Help doing printk */
403 /* GLU This zero should (always?) be there already. */
404 info
->fake
.len
= msdos_len
;
405 /* Why not use info->fake.len everywhere? Is it longer?
407 memcpy (info
->entry
.name
, fname
, len
);
408 info
->entry
.name
[len
] = '\0'; /* for printk */
409 info
->entry
.name_len
= len
;
413 * Evaluate how many records are needed to store this entry.
415 info
->recsize
= umsdos_evalrecsize (len
);
422 char *fname
; /* Name to validate */
423 int msdos_reject
; /* Expected msdos_reject flag */
424 char *msname
; /* Expected msdos name */
427 struct MANG_TEST tb
[] =
430 "hello.1", 0, "hello.1",
431 "hello.1_", 0, "hello.1_",
436 "Hello.1", 1, "hello.1",
437 "Hello.c", 1, "hello.c",
440 * I find the three examples below very unfortunate. I propose to
441 * convert them to lower case in a quick preliminary pass, then test
442 * whether there are other troublesome characters. I have not made
443 * this change, because it is not easy, but I wanted to mention the
444 * principle. Obviously something like that would increase the chance
445 * of collisions, for example between "HELLO" and "Hello", but these
446 * can be treated elsewhere along with the other collisions.
450 "Hello.1", 1, "hello_1",
451 "Hello.c", 1, "hello_c",
454 "hello.{_1", 1, "hello_{_",
455 "hello\t", 1, "hello#",
456 "hello.1.1", 1, "hello_1_",
457 "hel,lo", 1, "hel#lo",
458 "Salut.Tu.vas.bien?", 1, "salut_tu",
459 ".profile", 1, "_profile",
462 "clock$.x", 1, "-clock$",
463 "emmxxxx0", 1, "-emmxxxx",
464 "emmxxxx0.abcd", 1, "-emmxxxx",
467 "prn.abc", 1, "-prn",
470 * GLU WARNING: the results of these are different with my version
471 * GLU of mangling compared to the original one.
472 * GLU CAUSE: the manner of calculating the baselen variable.
473 * GLU For you they are always 3.
474 * GLU For me they are respectively 7, 8, and 8.
477 "PRN.abc", 1, "prn_abc",
478 "Prn.abcd", 1, "prn_abcd",
479 "prn.abcd", 1, "prn_abcd",
480 "Prn.abcdefghij", 1, "prn_abcd"
483 int main (int argc
, char *argv
[])
487 printf ("Testing the umsdos_parse.\n");
488 for (i
= 0; i
< sizeof (tb
) / sizeof (tb
[0]); i
++) {
489 struct MANG_TEST
*pttb
= tb
+ i
;
490 struct umsdos_info info
;
491 int ok
= umsdos_parse (pttb
->fname
, strlen (pttb
->fname
), &info
);
493 if (strcmp (info
.fake
.fname
, pttb
->msname
) != 0) {
494 printf ("**** %s -> ", pttb
->fname
);
495 printf ("%s <> %s\n", info
.fake
.fname
, pttb
->msname
);
496 } else if (info
.msdos_reject
!= pttb
->msdos_reject
) {
497 printf ("**** %s -> %s ", pttb
->fname
, pttb
->msname
);
498 printf ("%d <> %d\n", info
.msdos_reject
, pttb
->msdos_reject
);
500 printf (" %s -> %s %d\n", pttb
->fname
, pttb
->msname
501 ,pttb
->msdos_reject
);
504 printf ("Testing the new umsdos_evalrecsize.");
505 for (i
= 0; i
< UMSDOS_MAXNAME
; i
++) {
506 rnew
= umsdos_evalrecsize (i
);
507 rold
= umsdos_evalrecsize_old (i
);
508 if (!(i
% UMSDOS_REC_SIZE
)) {
509 printf ("\n%d:\t", i
);
512 printf ("**** %d newres: %d != %d \n", i
, rnew
, rold
);
517 printf ("\nEnd of Testing.\n");