3 * Miscellaneous support
5 * Copyright (C) 1997,1999 Martin von Löwis
6 * Copyright (C) 1997 Régis Duchesne
8 * The utf8 routines are copied from Python wstrop module.
11 #include "ntfstypes.h"
15 #include <linux/string.h>
16 #include <linux/errno.h>
17 /* FreeBSD doesn't seem to have EILSEQ in errno.h */
19 # define EILSEQ EINVAL
23 /* Converts a single wide character to a sequence of utf8 bytes.
24 * The character is represented in host byte order.
25 * Returns the number of bytes, or 0 on error.
28 to_utf8(ntfs_u16 c
,unsigned char* buf
)
31 return 0; /* No support for embedded 0 runes */
33 if(buf
)buf
[0]=(unsigned char)c
;
38 buf
[0] = 0xc0 | (c
>>6);
39 buf
[1] = 0x80 | (c
& 0x3f);
45 buf
[0] = 0xe0 | (c
>>12);
46 buf
[1] = 0x80 | ((c
>>6) & 0x3f);
47 buf
[2] = 0x80 | (c
& 0x3f);
51 /* We don't support characters above 0xFFFF in NTFS */
55 /* Decodes a sequence of utf8 bytes into a single wide character.
56 * The character is returned in host byte order.
57 * Returns the number of bytes consumed, or 0 on error.
60 from_utf8(const unsigned char* str
,ntfs_u16
*c
)
68 if(*str
<0xc0) /* lead byte must not be 10xxxxxx */
69 return 0; /* is c0 a possible lead byte? */
70 if(*str
<0xe0){ /* 110xxxxx */
73 }else if(*str
<0xf0){ /* 1110xxxx */
76 }else if(*str
<0xf8){ /* 11110xxx */
79 }else /* We don't support characters above 0xFFFF in NTFS */
84 /* all other bytes must be 10xxxxxx */
85 if((str
[i
] & 0xc0) != 0x80)
93 /* Converts wide string to UTF-8. Expects two in- and two out-parameters.
94 * Returns 0 on success, or error code.
95 * The caller has to free the result string.
96 * There is no support for UTF-16, yet
98 static int ntfs_dupuni2utf8(ntfs_u16
* in
, int in_len
,char **out
,int *out_len
)
102 unsigned char *result
;
104 ntfs_debug(DEBUG_NAME1
,"converting l=%d\n",in_len
);
105 /* count the length of the resulting UTF-8 */
106 for(i
=len8
=0;i
<in_len
;i
++){
107 tmp
=to_utf8(NTFS_GETU16(in
+i
),0);
109 /* invalid character */
113 *out
=result
=ntfs_malloc(len8
+1); /* allow for zero-termination */
119 for(i
=len8
=0;i
<in_len
;i
++)
120 len8
+=to_utf8(NTFS_GETU16(in
+i
),result
+len8
);
121 ntfs_debug(DEBUG_NAME1
,"result %p:%s\n",result
,result
);
125 /* Converts an UTF-8 sequence to a wide string. Same conventions as the
128 static int ntfs_duputf82uni(unsigned char* in
, int in_len
,ntfs_u16
** out
,int *out_len
)
135 for(i
=len16
=0;i
<in_len
;i
+=tmp
,len16
++){
136 tmp
=from_utf8(in
+i
,&wtmp
);
140 *out
=result
=ntfs_malloc(2*(len16
+1));
145 for(i
=len16
=0;i
<in_len
;i
+=tmp
,len16
++)
147 tmp
=from_utf8(in
+i
, &wtmp
);
148 NTFS_PUTU16(result
+len16
, wtmp
);
153 /* See above. Produces ISO-8859-1 from wide strings */
154 static int ntfs_dupuni288591(ntfs_u16
* in
,int in_len
,char** out
,int *out_len
)
159 /* check for characters out of range */
160 for(i
=0;i
<in_len
;i
++)
161 if(NTFS_GETU16(in
+i
)>=256)
163 *out
=result
=ntfs_malloc(in_len
+1);
168 for(i
=0;i
<in_len
;i
++)
169 result
[i
]=(unsigned char)NTFS_GETU16(in
+i
);
174 static int ntfs_dup885912uni(unsigned char* in
,int in_len
,ntfs_u16
**out
,int *out_len
)
179 *out
=result
=ntfs_malloc(2*in_len
);
183 for(i
=0;i
<in_len
;i
++)
184 NTFS_PUTU16(result
+i
,in
[i
]);
188 /* Encodings dispatcher */
189 int ntfs_encodeuni(ntfs_volume
*vol
,ntfs_u16
*in
, int in_len
,
190 char **out
, int *out_len
)
192 if(vol
->nct
& nct_utf8
)
193 return ntfs_dupuni2utf8(in
,in_len
,out
,out_len
);
194 else if(vol
->nct
& nct_iso8859_1
)
195 return ntfs_dupuni288591(in
,in_len
,out
,out_len
);
196 else if(vol
->nct
& (nct_map
|nct_uni_xlate
))
197 /* uni_xlate is handled inside map */
198 return ntfs_dupuni2map(vol
,in
,in_len
,out
,out_len
);
200 return EINVAL
; /* unknown encoding */
203 int ntfs_decodeuni(ntfs_volume
*vol
,char *in
, int in_len
,
204 ntfs_u16
**out
, int *out_len
)
206 if(vol
->nct
& nct_utf8
)
207 return ntfs_duputf82uni(in
,in_len
,out
,out_len
);
208 else if(vol
->nct
& nct_iso8859_1
)
209 return ntfs_dup885912uni(in
,in_len
,out
,out_len
);
210 else if(vol
->nct
& (nct_map
| nct_uni_xlate
))
211 return ntfs_dupmap2uni(vol
,in
,in_len
,out
,out_len
);
216 /* Same address space copies */
217 void ntfs_put(ntfs_io
*dest
,void *src
,ntfs_size_t n
)
219 ntfs_memcpy(dest
->param
,src
,n
);
220 ((char*)dest
->param
)+=n
;
223 void ntfs_get(void* dest
,ntfs_io
*src
,ntfs_size_t n
)
225 ntfs_memcpy(dest
,src
->param
,n
);
226 ((char*)src
->param
)+=n
;
229 void *ntfs_calloc(int size
)
231 void *result
=ntfs_malloc(size
);
233 ntfs_bzero(result
,size
);
238 /* copy len unicode characters from from to to :) */
239 void ntfs_uni2ascii(char *to
,char *from
,int len
)
249 /* copy len asci characters from from to to :) */
250 void ntfs_ascii2uni(short int *to
,char *from
,int len
)
259 /* strncmp for Unicode strings */
260 int ntfs_uni_strncmp(short int* a
,short int *b
,int n
)
274 /* strncmp between Unicode and ASCII strings */
275 int ntfs_ua_strncmp(short int* a
,char* b
,int n
)
281 if(NTFS_GETU16(a
+i
)<b
[i
])
283 if(b
[i
]<NTFS_GETU16(a
+i
))
289 /* Convert the NT UTC (based 1.1.1601, in hundred nanosecond units)
290 * into Unix UTC (based 1.1.1970, in seconds)
292 ntfs_time_t
ntfs_ntutc2unixutc(ntfs_time64_t ntutc
)
295 * This is very gross because
296 * 1: We must do 64-bit division on a 32-bit machine
297 * 2: We can't use libgcc for long long operations in the kernel
298 * 3: Floating point math in the kernel would corrupt user data
300 const unsigned int D
= 10000000;
301 unsigned int H
= (unsigned int)(ntutc
>> 32);
302 unsigned int L
= (unsigned int)ntutc
;
303 unsigned int numerator2
;
304 unsigned int lowseconds
;
307 /* It is best to subtract 0x019db1ded53e8000 first. */
308 /* Then the 1601-based date becomes a 1970-based date. */
309 if(L
< (unsigned)0xd53e8000) H
--;
310 L
-= (unsigned)0xd53e8000;
311 H
-= (unsigned)0x019db1de;
314 * Now divide 64-bit numbers on a 32-bit machine :-)
315 * With the subtraction already done, the result fits in 32 bits.
316 * The numerator fits in 56 bits and the denominator fits
317 * in 24 bits, so we can shift by 8 bits to make this work.
320 numerator2
= (H
<<8) | (L
>>24);
321 result
= (numerator2
/ D
); /* shifted 24 right!! */
322 lowseconds
= result
<< 24;
324 numerator2
= ((numerator2
-result
*D
)<<8) | ((L
>>16)&0xff);
325 result
= (numerator2
/ D
); /* shifted 16 right!! */
326 lowseconds
|= result
<< 16;
328 numerator2
= ((numerator2
-result
*D
)<<8) | ((L
>>8)&0xff);
329 result
= (numerator2
/ D
); /* shifted 8 right!! */
330 lowseconds
|= result
<< 8;
332 numerator2
= ((numerator2
-result
*D
)<<8) | (L
&0xff);
333 result
= (numerator2
/ D
); /* not shifted */
334 lowseconds
|= result
;
339 /* Convert the Unix UTC into NT UTC */
340 ntfs_time64_t
ntfs_unixutc2ntutc(ntfs_time_t t
)
342 return ((t
+ (ntfs_time64_t
)(369*365+89)*24*3600) * 10000000);
345 /* Fill index name. */
348 ntfs_indexname(char *buf
, int type
)
350 char hex
[]="0123456789ABCDEF";
354 for (index
=24; index
>0; index
-=4)
355 if((0xF << index
) & type
)
358 *buf
++ = hex
[(type
>> index
) & 0xF];
366 * c-file-style: "linux"