libntfs-3g/unistr.c

   1 /**
   2  * unistr.c - Unicode string handling. Originated from the Linux-NTFS project.
   3  *
   4  * Copyright (c) 2000-2004 Anton Altaparmakov
   5  * Copyright (c) 2002-2009 Szabolcs Szakacsits
   6  * Copyright (c) 2008-2015 Jean-Pierre Andre
   7  * Copyright (c) 2008      Bernhard Kaindl
   8  *
   9  * This program/include file is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU General Public License as published
  11  * by the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * This program/include file is distributed in the hope that it will be
  15  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
  16  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program (in the main directory of the NTFS-3G
  21  * distribution in the file COPYING); if not, write to the Free Software
  22  * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  23  */
  24
  25 #ifdef HAVE_CONFIG_H
  26 #include "config.h"
  27 #endif
  28
  29 #ifdef HAVE_STDIO_H
  30 #include <stdio.h>
  31 #endif
  32 #ifdef HAVE_STDLIB_H
  33 #include <stdlib.h>
  34 #endif
  35 #ifdef HAVE_WCHAR_H
  36 #include <wchar.h>
  37 #endif
  38 #ifdef HAVE_STRING_H
  39 #include <string.h>
  40 #endif
  41 #ifdef HAVE_ERRNO_H
  42 #include <errno.h>
  43 #endif
  44 #ifdef HAVE_LOCALE_H
  45 #include <locale.h>
  46 #endif
  47
  48 #if defined(__APPLE__) || defined(__DARWIN__)
  49 #ifdef ENABLE_NFCONV
  50 #include <CoreFoundation/CoreFoundation.h>
  51 #endif /* ENABLE_NFCONV */
  52 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
  53
  54 #include "compat.h"
  55 #include "attrib.h"
  56 #include "types.h"
  57 #include "unistr.h"
  58 #include "debug.h"
  59 #include "logging.h"
  60 #include "misc.h"
  61
  62 #ifndef ALLOW_BROKEN_UNICODE
  63 /* Erik allowing broken UTF-16 surrogate pairs and U+FFFE and U+FFFF by default,
  64  * open to debate. */
  65 #define ALLOW_BROKEN_UNICODE 1
  66 #endif /* !defined(ALLOW_BROKEN_UNICODE) */
  67
  68 /*
  69  * IMPORTANT
  70  * =========
  71  *
  72  * All these routines assume that the Unicode characters are in little endian
  73  * encoding inside the strings!!!
  74  */
  75
  76 static int use_utf8 = 1; /* use UTF-8 encoding for file names */
  77
  78 #if defined(__APPLE__) || defined(__DARWIN__)
  79 #ifdef ENABLE_NFCONV
  80 /**
  81  * This variable controls whether or not automatic normalization form conversion
  82  * should be performed when translating NTFS unicode file names to UTF-8.
  83  * Defaults to on, but can be controlled from the outside using the function
  84  *   int ntfs_macosx_normalize_filenames(int normalize);
  85  */
  86 static int nfconvert_utf8 = 1;
  87 #endif /* ENABLE_NFCONV */
  88 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
  89
  90 /*
  91  * This is used by the name collation functions to quickly determine what
  92  * characters are (in)valid.
  93  */
  94 #if 0
  95 static const u8 legal_ansi_char_array[0x40] = {
  96         0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
  97         0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
  98
  99         0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
 100         0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
 101
 102         0x17, 0x07, 0x18, 0x17, 0x17, 0x17, 0x17, 0x17,
 103         0x17, 0x17, 0x18, 0x16, 0x16, 0x17, 0x07, 0x00,
 104
 105         0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17,
 106         0x17, 0x17, 0x04, 0x16, 0x18, 0x16, 0x18, 0x18,
 107 };
 108 #endif
 109
 110 /**
 111  * ntfs_names_are_equal - compare two Unicode names for equality
 112  * @s1:                 name to compare to @s2
 113  * @s1_len:             length in Unicode characters of @s1
 114  * @s2:                 name to compare to @s1
 115  * @s2_len:             length in Unicode characters of @s2
 116  * @ic:                 ignore case bool
 117  * @upcase:             upcase table (only if @ic == IGNORE_CASE)
 118  * @upcase_size:        length in Unicode characters of @upcase (if present)
 119  *
 120  * Compare the names @s1 and @s2 and return TRUE (1) if the names are
 121  * identical, or FALSE (0) if they are not identical. If @ic is IGNORE_CASE,
 122  * the @upcase table is used to perform a case insensitive comparison.
 123  */
 124 BOOL ntfs_names_are_equal(const ntfschar *s1, size_t s1_len,
 125                 const ntfschar *s2, size_t s2_len,
 126                 const IGNORE_CASE_BOOL ic,
 127                 const ntfschar *upcase, const u32 upcase_size)
 128 {
 129         if (s1_len != s2_len)
 130                 return FALSE;
 131         if (!s1_len)
 132                 return TRUE;
 133         if (ic == CASE_SENSITIVE)
 134                 return ntfs_ucsncmp(s1, s2, s1_len) ? FALSE: TRUE;
 135         return ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size) ? FALSE:
 136                                                                        TRUE;
 137 }
 138
 139 /*
 140  * ntfs_names_full_collate() fully collate two Unicode names
 141  *
 142  * @name1:      first Unicode name to compare
 143  * @name1_len:  length of first Unicode name to compare
 144  * @name2:      second Unicode name to compare
 145  * @name2_len:  length of second Unicode name to compare
 146  * @ic:         either CASE_SENSITIVE or IGNORE_CASE (see below)
 147  * @upcase:     upcase table
 148  * @upcase_len: upcase table size
 149  *
 150  * If @ic is CASE_SENSITIVE, then the names are compared primarily ignoring
 151  * case, but if the names are equal ignoring case, then they are compared
 152  * case-sensitively.  As an example, "abc" would collate before "BCD" (since
 153  * "abc" and "BCD" differ ignoring case and 'A' < 'B') but after "ABC" (since
 154  * "ABC" and "abc" are equal ignoring case and 'A' < 'a').  This matches the
 155  * collation order of filenames as indexed in NTFS directories.
 156  *
 157  * If @ic is IGNORE_CASE, then the names are only compared case-insensitively
 158  * and are considered to match if and only if they are equal ignoring case.
 159  *
 160  * Returns:
 161  *  -1 if the first name collates before the second one,
 162  *   0 if the names match, or
 163  *   1 if the second name collates before the first one
 164  */
 165 int ntfs_names_full_collate(const ntfschar *name1, const u32 name1_len,
 166                 const ntfschar *name2, const u32 name2_len,
 167                 const IGNORE_CASE_BOOL ic, const ntfschar *upcase,
 168                 const u32 upcase_len)
 169 {
 170         u32 cnt;
 171         u16 c1, c2;
 172         u16 u1, u2;
 173
 174 #ifdef DEBUG
 175         if (!name1 || !name2 || !upcase || !upcase_len) {
 176                 ntfs_log_debug("ntfs_names_collate received NULL pointer!\n");
 177                 exit(1);
 178         }
 179 #endif
 180         cnt = min(name1_len, name2_len);
 181         if (cnt > 0) {
 182                 if (ic == CASE_SENSITIVE) {
 183                         while (--cnt && (*name1 == *name2)) {
 184                                 name1++;
 185                                 name2++;
 186                         }
 187                         u1 = c1 = le16_to_cpu(*name1);
 188                         u2 = c2 = le16_to_cpu(*name2);
 189                         if (u1 < upcase_len)
 190                                 u1 = le16_to_cpu(upcase[u1]);
 191                         if (u2 < upcase_len)
 192                                 u2 = le16_to_cpu(upcase[u2]);
 193                         if ((u1 == u2) && cnt)
 194                                 do {
 195                                         name1++;
 196                                         u1 = le16_to_cpu(*name1);
 197                                         name2++;
 198                                         u2 = le16_to_cpu(*name2);
 199                                         if (u1 < upcase_len)
 200                                                 u1 = le16_to_cpu(upcase[u1]);
 201                                         if (u2 < upcase_len)
 202                                                 u2 = le16_to_cpu(upcase[u2]);
 203                                 } while ((u1 == u2) && --cnt);
 204                         if (u1 < u2)
 205                                 return -1;
 206                         if (u1 > u2)
 207                                 return 1;
 208                         if (name1_len < name2_len)
 209                                 return -1;
 210                         if (name1_len > name2_len)
 211                                 return 1;
 212                         if (c1 < c2)
 213                                 return -1;
 214                         if (c1 > c2)
 215                                 return 1;
 216                 } else {
 217                         do {
 218                                 u1 = le16_to_cpu(*name1);
 219                                 name1++;
 220                                 u2 = le16_to_cpu(*name2);
 221                                 name2++;
 222                                 if (u1 < upcase_len)
 223                                         u1 = le16_to_cpu(upcase[u1]);
 224                                 if (u2 < upcase_len)
 225                                         u2 = le16_to_cpu(upcase[u2]);
 226                         } while ((u1 == u2) && --cnt);
 227                         if (u1 < u2)
 228                                 return -1;
 229                         if (u1 > u2)
 230                                 return 1;
 231                         if (name1_len < name2_len)
 232                                 return -1;
 233                         if (name1_len > name2_len)
 234                                 return 1;
 235                 }
 236         } else {
 237                 if (name1_len < name2_len)
 238                         return -1;
 239                 if (name1_len > name2_len)
 240                         return 1;
 241         }
 242         return 0;
 243 }
 244
 245 /**
 246  * ntfs_ucsncmp - compare two little endian Unicode strings
 247  * @s1:         first string
 248  * @s2:         second string
 249  * @n:          maximum unicode characters to compare
 250  *
 251  * Compare the first @n characters of the Unicode strings @s1 and @s2,
 252  * The strings in little endian format and appropriate le16_to_cpu()
 253  * conversion is performed on non-little endian machines.
 254  *
 255  * The function returns an integer less than, equal to, or greater than zero
 256  * if @s1 (or the first @n Unicode characters thereof) is found, respectively,
 257  * to be less than, to match, or be greater than @s2.
 258  */
 259 int ntfs_ucsncmp(const ntfschar *s1, const ntfschar *s2, size_t n)
 260 {
 261         u16 c1, c2;
 262         size_t i;
 263
 264 #ifdef DEBUG
 265         if (!s1 || !s2) {
 266                 ntfs_log_debug("ntfs_wcsncmp() received NULL pointer!\n");
 267                 exit(1);
 268         }
 269 #endif
 270         for (i = 0; i < n; ++i) {
 271                 c1 = le16_to_cpu(s1[i]);
 272                 c2 = le16_to_cpu(s2[i]);
 273                 if (c1 < c2)
 274                         return -1;
 275                 if (c1 > c2)
 276                         return 1;
 277                 if (!c1)
 278                         break;
 279         }
 280         return 0;
 281 }
 282
 283 /**
 284  * ntfs_ucsncasecmp - compare two little endian Unicode strings, ignoring case
 285  * @s1:                 first string
 286  * @s2:                 second string
 287  * @n:                  maximum unicode characters to compare
 288  * @upcase:             upcase table
 289  * @upcase_size:        upcase table size in Unicode characters
 290  *
 291  * Compare the first @n characters of the Unicode strings @s1 and @s2,
 292  * ignoring case. The strings in little endian format and appropriate
 293  * le16_to_cpu() conversion is performed on non-little endian machines.
 294  *
 295  * Each character is uppercased using the @upcase table before the comparison.
 296  *
 297  * The function returns an integer less than, equal to, or greater than zero
 298  * if @s1 (or the first @n Unicode characters thereof) is found, respectively,
 299  * to be less than, to match, or be greater than @s2.
 300  */
 301 int ntfs_ucsncasecmp(const ntfschar *s1, const ntfschar *s2, size_t n,
 302                 const ntfschar *upcase, const u32 upcase_size)
 303 {
 304         u16 c1, c2;
 305         size_t i;
 306
 307 #ifdef DEBUG
 308         if (!s1 || !s2 || !upcase) {
 309                 ntfs_log_debug("ntfs_wcsncasecmp() received NULL pointer!\n");
 310                 exit(1);
 311         }
 312 #endif
 313         for (i = 0; i < n; ++i) {
 314                 if ((c1 = le16_to_cpu(s1[i])) < upcase_size)
 315                         c1 = le16_to_cpu(upcase[c1]);
 316                 if ((c2 = le16_to_cpu(s2[i])) < upcase_size)
 317                         c2 = le16_to_cpu(upcase[c2]);
 318                 if (c1 < c2)
 319                         return -1;
 320                 if (c1 > c2)
 321                         return 1;
 322                 if (!c1)
 323                         break;
 324         }
 325         return 0;
 326 }
 327
 328 /**
 329  * ntfs_ucsnlen - determine the length of a little endian Unicode string
 330  * @s:          pointer to Unicode string
 331  * @maxlen:     maximum length of string @s
 332  *
 333  * Return the number of Unicode characters in the little endian Unicode
 334  * string @s up to a maximum of maxlen Unicode characters, not including
 335  * the terminating (ntfschar)'\0'. If there is no (ntfschar)'\0' between @s
 336  * and @s + @maxlen, @maxlen is returned.
 337  *
 338  * This function never looks beyond @s + @maxlen.
 339  */
 340 u32 ntfs_ucsnlen(const ntfschar *s, u32 maxlen)
 341 {
 342         u32 i;
 343
 344         for (i = 0; i < maxlen; i++) {
 345                 if (!le16_to_cpu(s[i]))
 346                         break;
 347         }
 348         return i;
 349 }
 350
 351 /**
 352  * ntfs_ucsndup - duplicate little endian Unicode string
 353  * @s:          pointer to Unicode string
 354  * @maxlen:     maximum length of string @s
 355  *
 356  * Return a pointer to a new little endian Unicode string which is a duplicate
 357  * of the string s.  Memory for the new string is obtained with ntfs_malloc(3),
 358  * and can be freed with free(3).
 359  *
 360  * A maximum of @maxlen Unicode characters are copied and a terminating
 361  * (ntfschar)'\0' little endian Unicode character is added.
 362  *
 363  * This function never looks beyond @s + @maxlen.
 364  *
 365  * Return a pointer to the new little endian Unicode string on success and NULL
 366  * on failure with errno set to the error code.
 367  */
 368 ntfschar *ntfs_ucsndup(const ntfschar *s, u32 maxlen)
 369 {
 370         ntfschar *dst;
 371         u32 len;
 372
 373         len = ntfs_ucsnlen(s, maxlen);
 374         dst = ntfs_malloc((len + 1) * sizeof(ntfschar));
 375         if (dst) {
 376                 memcpy(dst, s, len * sizeof(ntfschar));
 377                 dst[len] = const_cpu_to_le16(L'\0');
 378         }
 379         return dst;
 380 }
 381
 382 /**
 383  * ntfs_name_upcase - Map an Unicode name to its uppercase equivalent
 384  * @name:
 385  * @name_len:
 386  * @upcase:
 387  * @upcase_len:
 388  *
 389  * Description...
 390  *
 391  * Returns:
 392  */
 393 void ntfs_name_upcase(ntfschar *name, u32 name_len, const ntfschar *upcase,
 394                 const u32 upcase_len)
 395 {
 396         u32 i;
 397         u16 u;
 398
 399         for (i = 0; i < name_len; i++)
 400                 if ((u = le16_to_cpu(name[i])) < upcase_len)
 401                         name[i] = upcase[u];
 402 }
 403
 404 /**
 405  * ntfs_name_locase - Map a Unicode name to its lowercase equivalent
 406  */
 407 void ntfs_name_locase(ntfschar *name, u32 name_len, const ntfschar *locase,
 408                 const u32 locase_len)
 409 {
 410         u32 i;
 411         u16 u;
 412
 413         if (locase)
 414                 for (i = 0; i < name_len; i++)
 415                         if ((u = le16_to_cpu(name[i])) < locase_len)
 416                                 name[i] = locase[u];
 417 }
 418
 419 /**
 420  * ntfs_file_value_upcase - Convert a filename to upper case
 421  * @file_name_attr:
 422  * @upcase:
 423  * @upcase_len:
 424  *
 425  * Description...
 426  *
 427  * Returns:
 428  */
 429 void ntfs_file_value_upcase(FILE_NAME_ATTR *file_name_attr,
 430                 const ntfschar *upcase, const u32 upcase_len)
 431 {
 432         ntfs_name_upcase((ntfschar*)&file_name_attr->file_name,
 433                         file_name_attr->file_name_length, upcase, upcase_len);
 434 }
 435
 436 /*
 437    NTFS uses Unicode (UTF-16LE [NTFS-3G uses UCS-2LE, which is enough
 438    for now]) for path names, but the Unicode code points need to be
 439    converted before a path can be accessed under NTFS. For 7 bit ASCII/ANSI,
 440    glibc does this even without a locale in a hard-coded fashion as that
 441    appears to be is easy because the low 7-bit ASCII range appears to be
 442    available in all charsets but it does not convert anything if
 443    there was some error with the locale setup or none set up like
 444    when mount is called during early boot where he (by policy) do
 445    not use locales (and may be not available if /usr is not yet mounted),
 446    so this patch fixes the resulting issues for systems which use
 447    UTF-8 and for others, specifying the locale in fstab brings them
 448    the encoding which they want.
 449
 450    If no locale is defined or there was a problem with setting one
 451    up and whenever nl_langinfo(CODESET) returns a sting starting with
 452    "ANSI", use an internal UCS-2LE <-> UTF-8 codeset converter to fix
 453    the bug where NTFS-3G does not show any path names which include
 454    international characters!!! (and also fails on creating them) as result.
 455
 456    Author: Bernhard Kaindl <bk@suse.de>
 457    Jean-Pierre Andre made it compliant with RFC3629/RFC2781.
 458 */
 459
 460 /*
 461  * Return the number of bytes in UTF-8 needed (without the terminating null) to
 462  * store the given UTF-16LE string.
 463  *
 464  * On error, -1 is returned, and errno is set to the error code. The following
 465  * error codes can be expected:
 466  *      EILSEQ          The input string is not valid UTF-16LE (only possible
 467  *                      if compiled without ALLOW_BROKEN_UNICODE).
 468  *      ENAMETOOLONG    The length of the UTF-8 string in bytes (without the
 469  *                      terminating null) would exceed @outs_len.
 470  */
 471 static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_len)
 472 {
 473         int i, ret = -1;
 474         int count = 0;
 475         BOOL surrog;
 476
 477         surrog = FALSE;
 478         for (i = 0; i < ins_len && ins[i] && count <= outs_len; i++) {
 479                 unsigned short c = le16_to_cpu(ins[i]);
 480                 if (surrog) {
 481                         if ((c >= 0xdc00) && (c < 0xe000)) {
 482                                 surrog = FALSE;
 483                                 count += 4;
 484                         } else {
 485 #if ALLOW_BROKEN_UNICODE
 486                                 /* The first UTF-16 unit of a surrogate pair has
 487                                  * a value between 0xd800 and 0xdc00. It can be
 488                                  * encoded as an individual UTF-8 sequence if we
 489                                  * cannot combine it with the next UTF-16 unit
 490                                  * unit as a surrogate pair. */
 491                                 surrog = FALSE;
 492                                 count += 3;
 493
 494                                 --i;
 495                                 continue;
 496 #else
 497                                 goto fail;
 498 #endif /* ALLOW_BROKEN_UNICODE */
 499                         }
 500                 } else
 501                         if (c < 0x80)
 502                                 count++;
 503                         else if (c < 0x800)
 504                                 count += 2;
 505                         else if (c < 0xd800)
 506                                 count += 3;
 507                         else if (c < 0xdc00)
 508                                 surrog = TRUE;
 509 #if ALLOW_BROKEN_UNICODE
 510                         else if (c < 0xe000)
 511                                 count += 3;
 512                         else if (c >= 0xe000)
 513 #else
 514                         else if ((c >= 0xe000) && (c < 0xfffe))
 515 #endif /* ALLOW_BROKEN_UNICODE */
 516                                 count += 3;
 517                         else
 518                                 goto fail;
 519         }
 520
 521         if (surrog && count <= outs_len) {
 522 #if ALLOW_BROKEN_UNICODE
 523                 count += 3; /* ending with a single surrogate */
 524 #else
 525                 goto fail;
 526 #endif /* ALLOW_BROKEN_UNICODE */
 527         }
 528
 529         if (count > outs_len) {
 530                 errno = ENAMETOOLONG;
 531                 goto out;
 532         }
 533
 534         ret = count;
 535 out:
 536         return ret;
 537 fail:
 538         errno = EILSEQ;
 539         goto out;
 540 }
 541
 542 /*
 543  * ntfs_utf16_to_utf8 - convert a little endian UTF16LE string to an UTF-8 string
 544  * @ins:        input utf16 string buffer
 545  * @ins_len:    length of input string in utf16 characters
 546  * @outs:       on return contains the (allocated) output multibyte string
 547  * @outs_len:   length of output buffer in bytes (ignored if *@outs is NULL)
 548  *
 549  * Return -1 with errno set if string has invalid byte sequence or too long.
 550  */
 551 static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
 552                               char **outs, int outs_len)
 553 {
 554 #if defined(__APPLE__) || defined(__DARWIN__)
 555 #ifdef ENABLE_NFCONV
 556         char *original_outs_value = *outs;
 557         int original_outs_len = outs_len;
 558 #endif /* ENABLE_NFCONV */
 559 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
 560
 561         char *t;
 562         int i, size, ret = -1;
 563         int halfpair;
 564
 565         halfpair = 0;
 566         if (!*outs) {
 567                 /* If no output buffer was provided, we will allocate one and
 568                  * limit its length to PATH_MAX.  Note: we follow the standard
 569                  * convention of PATH_MAX including the terminating null. */
 570                 outs_len = PATH_MAX;
 571         }
 572
 573         /* The size *with* the terminating null is limited to @outs_len,
 574          * so the size *without* the terminating null is limited to one less. */
 575         size = utf16_to_utf8_size(ins, ins_len, outs_len - 1);
 576
 577         if (size < 0)
 578                 goto out;
 579
 580         if (!*outs) {
 581                 outs_len = size + 1;
 582                 *outs = ntfs_malloc(outs_len);
 583                 if (!*outs)
 584                         goto out;
 585         }
 586
 587         t = *outs;
 588
 589         for (i = 0; i < ins_len && ins[i]; i++) {
 590             unsigned short c = le16_to_cpu(ins[i]);
 591                         /* size not double-checked */
 592                 if (halfpair) {
 593                         if ((c >= 0xdc00) && (c < 0xe000)) {
 594                                 *t++ = 0xf0 + (((halfpair + 64) >> 8) & 7);
 595                                 *t++ = 0x80 + (((halfpair + 64) >> 2) & 63);
 596                                 *t++ = 0x80 + ((c >> 6) & 15) + ((halfpair & 3) << 4);
 597                                 *t++ = 0x80 + (c & 63);
 598                                 halfpair = 0;
 599                         } else {
 600 #if ALLOW_BROKEN_UNICODE
 601                                 /* The first UTF-16 unit of a surrogate pair has
 602                                  * a value between 0xd800 and 0xdc00. It can be
 603                                  * encoded as an individual UTF-8 sequence if we
 604                                  * cannot combine it with the next UTF-16 unit
 605                                  * unit as a surrogate pair. */
 606                                 *t++ = 0xe0 | (halfpair >> 12);
 607                                 *t++ = 0x80 | ((halfpair >> 6) & 0x3f);
 608                                 *t++ = 0x80 | (halfpair & 0x3f);
 609                                 halfpair = 0;
 610
 611                                 --i;
 612                                 continue;
 613 #else
 614                                 goto fail;
 615 #endif /* ALLOW_BROKEN_UNICODE */
 616                         }
 617                 } else if (c < 0x80) {
 618                         *t++ = c;
 619                 } else {
 620                         if (c < 0x800) {
 621                                 *t++ = (0xc0 | ((c >> 6) & 0x3f));
 622                                 *t++ = 0x80 | (c & 0x3f);
 623                         } else if (c < 0xd800) {
 624                                 *t++ = 0xe0 | (c >> 12);
 625                                 *t++ = 0x80 | ((c >> 6) & 0x3f);
 626                                 *t++ = 0x80 | (c & 0x3f);
 627                         } else if (c < 0xdc00)
 628                                 halfpair = c;
 629 #if ALLOW_BROKEN_UNICODE
 630                         else if (c < 0xe000) {
 631                                 *t++ = 0xe0 | (c >> 12);
 632                                 *t++ = 0x80 | ((c >> 6) & 0x3f);
 633                                 *t++ = 0x80 | (c & 0x3f);
 634                         }
 635 #endif /* ALLOW_BROKEN_UNICODE */
 636                         else if (c >= 0xe000) {
 637                                 *t++ = 0xe0 | (c >> 12);
 638                                 *t++ = 0x80 | ((c >> 6) & 0x3f);
 639                                 *t++ = 0x80 | (c & 0x3f);
 640                         } else
 641                                 goto fail;
 642                 }
 643         }
 644 #if ALLOW_BROKEN_UNICODE
 645         if (halfpair) { /* ending with a single surrogate */
 646                 *t++ = 0xe0 | (halfpair >> 12);
 647                 *t++ = 0x80 | ((halfpair >> 6) & 0x3f);
 648                 *t++ = 0x80 | (halfpair & 0x3f);
 649         }
 650 #endif /* ALLOW_BROKEN_UNICODE */
 651         *t = '\0';
 652
 653 #if defined(__APPLE__) || defined(__DARWIN__)
 654 #ifdef ENABLE_NFCONV
 655         if(nfconvert_utf8 && (t - *outs) > 0) {
 656                 char *new_outs = NULL;
 657                 int new_outs_len = ntfs_macosx_normalize_utf8(*outs, &new_outs, 0); // Normalize to decomposed form
 658                 if(new_outs_len >= 0 && new_outs != NULL) {
 659                         if(original_outs_value != *outs) {
 660                                 // We have allocated outs ourselves.
 661                                 free(*outs);
 662                                 *outs = new_outs;
 663                                 t = *outs + new_outs_len;
 664                         }
 665                         else {
 666                                 // We need to copy new_outs into the fixed outs buffer.
 667                                 memset(*outs, 0, original_outs_len);
 668                                 strncpy(*outs, new_outs, original_outs_len-1);
 669                                 t = *outs + original_outs_len;
 670                                 free(new_outs);
 671                         }
 672                 }
 673                 else {
 674                         ntfs_log_error("Failed to normalize NTFS string to UTF-8 NFD: %s\n", *outs);
 675                         ntfs_log_error("  new_outs=0x%p\n", new_outs);
 676                         ntfs_log_error("  new_outs_len=%d\n", new_outs_len);
 677                 }
 678         }
 679 #endif /* ENABLE_NFCONV */
 680 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
 681
 682         ret = t - *outs;
 683 out:
 684         return ret;
 685 fail:
 686         errno = EILSEQ;
 687         goto out;
 688 }
 689
 690 /*
 691  * Return the amount of 16-bit elements in UTF-16LE needed
 692  * (without the terminating null) to store given UTF-8 string.
 693  *
 694  * Return -1 with errno set if it's longer than PATH_MAX or string is invalid.
 695  *
 696  * Note: This does not check whether the input sequence is a valid utf8 string,
 697  *       and should be used only in context where such check is made!
 698  */
 699 static int utf8_to_utf16_size(const char *s)
 700 {
 701         int ret = -1;
 702         unsigned int byte;
 703         size_t count = 0;
 704
 705         while ((byte = *((const unsigned char *)s++))) {
 706                 if (++count >= PATH_MAX)
 707                         goto fail;
 708                 if (byte >= 0xc0) {
 709                         if (byte >= 0xF5) {
 710                                 errno = EILSEQ;
 711                                 goto out;
 712                         }
 713                         if (!*s)
 714                                 break;
 715                         if (byte >= 0xC0)
 716                                 s++;
 717                         if (!*s)
 718                                 break;
 719                         if (byte >= 0xE0)
 720                                 s++;
 721                         if (!*s)
 722                                 break;
 723                         if (byte >= 0xF0) {
 724                                 s++;
 725                                 if (++count >= PATH_MAX)
 726                                         goto fail;
 727                         }
 728                 }
 729         }
 730         ret = count;
 731 out:
 732         return ret;
 733 fail:
 734         errno = ENAMETOOLONG;
 735         goto out;
 736 }
 737 /*
 738  * This converts one UTF-8 sequence to cpu-endian Unicode value
 739  * within range U+0 .. U+10ffff and excluding U+D800 .. U+DFFF
 740  *
 741  * Return the number of used utf8 bytes or -1 with errno set
 742  * if sequence is invalid.
 743  */
 744 static int utf8_to_unicode(u32 *wc, const char *s)
 745 {
 746         unsigned int byte = *((const unsigned char *)s);
 747
 748                                         /* single byte */
 749         if (byte == 0) {
 750                 *wc = (u32) 0;
 751                 return 0;
 752         } else if (byte < 0x80) {
 753                 *wc = (u32) byte;
 754                 return 1;
 755                                         /* double byte */
 756         } else if (byte < 0xc2) {
 757                 goto fail;
 758         } else if (byte < 0xE0) {
 759                 if ((s[1] & 0xC0) == 0x80) {
 760                         *wc = ((u32)(byte & 0x1F) << 6)
 761                             | ((u32)(s[1] & 0x3F));
 762                         return 2;
 763                 } else
 764                         goto fail;
 765                                         /* three-byte */
 766         } else if (byte < 0xF0) {
 767                 if (((s[1] & 0xC0) == 0x80) && ((s[2] & 0xC0) == 0x80)) {
 768                         *wc = ((u32)(byte & 0x0F) << 12)
 769                             | ((u32)(s[1] & 0x3F) << 6)
 770                             | ((u32)(s[2] & 0x3F));
 771                         /* Check valid ranges */
 772 #if ALLOW_BROKEN_UNICODE
 773                         if (((*wc >= 0x800) && (*wc <= 0xD7FF))
 774                           || ((*wc >= 0xD800) && (*wc <= 0xDFFF))
 775                           || ((*wc >= 0xe000) && (*wc <= 0xFFFF)))
 776                                 return 3;
 777 #else
 778                         if (((*wc >= 0x800) && (*wc <= 0xD7FF))
 779                           || ((*wc >= 0xe000) && (*wc <= 0xFFFD)))
 780                                 return 3;
 781 #endif /* ALLOW_BROKEN_UNICODE */
 782                 }
 783                 goto fail;
 784                                         /* four-byte */
 785         } else if (byte < 0xF5) {
 786                 if (((s[1] & 0xC0) == 0x80) && ((s[2] & 0xC0) == 0x80)
 787                   && ((s[3] & 0xC0) == 0x80)) {
 788                         *wc = ((u32)(byte & 0x07) << 18)
 789                             | ((u32)(s[1] & 0x3F) << 12)
 790                             | ((u32)(s[2] & 0x3F) << 6)
 791                             | ((u32)(s[3] & 0x3F));
 792                         /* Check valid ranges */
 793                         if ((*wc <= 0x10ffff) && (*wc >= 0x10000))
 794                                 return 4;
 795                 }
 796                 goto fail;
 797         }
 798 fail:
 799         errno = EILSEQ;
 800         return -1;
 801 }
 802
 803 /**
 804  * ntfs_utf8_to_utf16 - convert a UTF-8 string to a UTF-16LE string
 805  * @ins:        input multibyte string buffer
 806  * @outs:       on return contains the (allocated) output utf16 string
 807  * @outs_len:   length of output buffer in utf16 characters
 808  *
 809  * Return -1 with errno set.
 810  */
 811 static int ntfs_utf8_to_utf16(const char *ins, ntfschar **outs)
 812 {
 813 #if defined(__APPLE__) || defined(__DARWIN__)
 814 #ifdef ENABLE_NFCONV
 815         char *new_ins = NULL;
 816         if(nfconvert_utf8) {
 817                 int new_ins_len;
 818                 new_ins_len = ntfs_macosx_normalize_utf8(ins, &new_ins, 1); // Normalize to composed form
 819                 if(new_ins_len >= 0)
 820                         ins = new_ins;
 821                 else
 822                         ntfs_log_error("Failed to normalize NTFS string to UTF-8 NFC: %s\n", ins);
 823         }
 824 #endif /* ENABLE_NFCONV */
 825 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
 826         const char *t = ins;
 827         u32 wc;
 828         BOOL allocated;
 829         ntfschar *outpos;
 830         int shorts, ret = -1;
 831
 832         shorts = utf8_to_utf16_size(ins);
 833         if (shorts < 0)
 834                 goto fail;
 835
 836         allocated = FALSE;
 837         if (!*outs) {
 838                 *outs = ntfs_malloc((shorts + 1) * sizeof(ntfschar));
 839                 if (!*outs)
 840                         goto fail;
 841                 allocated = TRUE;
 842         }
 843
 844         outpos = *outs;
 845
 846         while(1) {
 847                 int m  = utf8_to_unicode(&wc, t);
 848                 if (m <= 0) {
 849                         if (m < 0) {
 850                                 /* do not leave space allocated if failed */
 851                                 if (allocated) {
 852                                         free(*outs);
 853                                         *outs = (ntfschar*)NULL;
 854                                 }
 855                                 goto fail;
 856                         }
 857                         *outpos++ = const_cpu_to_le16(0);
 858                         break;
 859                 }
 860                 if (wc < 0x10000)
 861                         *outpos++ = cpu_to_le16(wc);
 862                 else {
 863                         wc -= 0x10000;
 864                         *outpos++ = cpu_to_le16((wc >> 10) + 0xd800);
 865                         *outpos++ = cpu_to_le16((wc & 0x3ff) + 0xdc00);
 866                 }
 867                 t += m;
 868         }
 869
 870         ret = --outpos - *outs;
 871 fail:
 872 #if defined(__APPLE__) || defined(__DARWIN__)
 873 #ifdef ENABLE_NFCONV
 874         if(new_ins != NULL)
 875                 free(new_ins);
 876 #endif /* ENABLE_NFCONV */
 877 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
 878         return ret;
 879 }
 880
 881 /**
 882  * ntfs_ucstombs - convert a little endian Unicode string to a multibyte string
 883  * @ins:        input Unicode string buffer
 884  * @ins_len:    length of input string in Unicode characters
 885  * @outs:       on return contains the (allocated) output multibyte string
 886  * @outs_len:   length of output buffer in bytes (ignored if *@outs is NULL)
 887  *
 888  * Convert the input little endian, 2-byte Unicode string @ins, of length
 889  * @ins_len into the multibyte string format dictated by the current locale.
 890  *
 891  * If *@outs is NULL, the function allocates the string and the caller is
 892  * responsible for calling free(*@outs); when finished with it.
 893  *
 894  * On success the function returns the number of bytes written to the output
 895  * string *@outs (>= 0), not counting the terminating NULL byte. If the output
 896  * string buffer was allocated, *@outs is set to it.
 897  *
 898  * On error, -1 is returned, and errno is set to the error code. The following
 899  * error codes can be expected:
 900  *      EINVAL          Invalid arguments (e.g. @ins or @outs is NULL).
 901  *      EILSEQ          The input string cannot be represented as a multibyte
 902  *                      sequence according to the current locale.
 903  *      ENAMETOOLONG    Destination buffer is too small for input string.
 904  *      ENOMEM          Not enough memory to allocate destination buffer.
 905  */
 906 int ntfs_ucstombs(const ntfschar *ins, const int ins_len, char **outs,
 907                 int outs_len)
 908 {
 909         char *mbs;
 910         int mbs_len;
 911 #ifdef MB_CUR_MAX
 912         wchar_t wc;
 913         int i, o;
 914         int cnt = 0;
 915 #ifdef HAVE_MBSINIT
 916         mbstate_t mbstate;
 917 #endif
 918 #endif /* MB_CUR_MAX */
 919
 920         if (!ins || !outs) {
 921                 errno = EINVAL;
 922                 return -1;
 923         }
 924         mbs = *outs;
 925         mbs_len = outs_len;
 926         if (mbs && !mbs_len) {
 927                 errno = ENAMETOOLONG;
 928                 return -1;
 929         }
 930         if (use_utf8)
 931                 return ntfs_utf16_to_utf8(ins, ins_len, outs, outs_len);
 932 #ifdef MB_CUR_MAX
 933         if (!mbs) {
 934                 mbs_len = (ins_len + 1) * MB_CUR_MAX;
 935                 mbs = ntfs_malloc(mbs_len);
 936                 if (!mbs)
 937                         return -1;
 938         }
 939 #ifdef HAVE_MBSINIT
 940         memset(&mbstate, 0, sizeof(mbstate));
 941 #else
 942         wctomb(NULL, 0);
 943 #endif
 944         for (i = o = 0; i < ins_len; i++) {
 945                 /* Reallocate memory if necessary or abort. */
 946                 if ((int)(o + MB_CUR_MAX) > mbs_len) {
 947                         char *tc;
 948                         if (mbs == *outs) {
 949                                 errno = ENAMETOOLONG;
 950                                 return -1;
 951                         }
 952                         tc = ntfs_malloc((mbs_len + 64) & ~63);
 953                         if (!tc)
 954                                 goto err_out;
 955                         memcpy(tc, mbs, mbs_len);
 956                         mbs_len = (mbs_len + 64) & ~63;
 957                         free(mbs);
 958                         mbs = tc;
 959                 }
 960                 /* Convert the LE Unicode character to a CPU wide character. */
 961                 wc = (wchar_t)le16_to_cpu(ins[i]);
 962                 if (!wc)
 963                         break;
 964                 /* Convert the CPU endian wide character to multibyte. */
 965 #ifdef HAVE_MBSINIT
 966                 cnt = wcrtomb(mbs + o, wc, &mbstate);
 967 #else
 968                 cnt = wctomb(mbs + o, wc);
 969 #endif
 970                 if (cnt == -1)
 971                         goto err_out;
 972                 if (cnt <= 0) {
 973                         ntfs_log_debug("Eeek. cnt <= 0, cnt = %i\n", cnt);
 974                         errno = EINVAL;
 975                         goto err_out;
 976                 }
 977                 o += cnt;
 978         }
 979 #ifdef HAVE_MBSINIT
 980         /* Make sure we are back in the initial state. */
 981         if (!mbsinit(&mbstate)) {
 982                 ntfs_log_debug("Eeek. mbstate not in initial state!\n");
 983                 errno = EILSEQ;
 984                 goto err_out;
 985         }
 986 #endif
 987         /* Now write the NULL character. */
 988         mbs[o] = '\0';
 989         if (*outs != mbs)
 990                 *outs = mbs;
 991         return o;
 992 err_out:
 993         if (mbs != *outs) {
 994                 int eo = errno;
 995                 free(mbs);
 996                 errno = eo;
 997         }
 998 #else /* MB_CUR_MAX */
 999         errno = EILSEQ;
1000 #endif /* MB_CUR_MAX */
1001         return -1;
1002 }
1003
1004 /**
1005  * ntfs_mbstoucs - convert a multibyte string to a little endian Unicode string
1006  * @ins:        input multibyte string buffer
1007  * @outs:       on return contains the (allocated) output Unicode string
1008  *
1009  * Convert the input multibyte string @ins, from the current locale into the
1010  * corresponding little endian, 2-byte Unicode string.
1011  *
1012  * The function allocates the string and the caller is responsible for calling
1013  * free(*@outs); when finished with it.
1014  *
1015  * On success the function returns the number of Unicode characters written to
1016  * the output string *@outs (>= 0), not counting the terminating Unicode NULL
1017  * character.
1018  *
1019  * On error, -1 is returned, and errno is set to the error code. The following
1020  * error codes can be expected:
1021  *      EINVAL          Invalid arguments (e.g. @ins or @outs is NULL).
1022  *      EILSEQ          The input string cannot be represented as a Unicode
1023  *                      string according to the current locale.
1024  *      ENAMETOOLONG    Destination buffer is too small for input string.
1025  *      ENOMEM          Not enough memory to allocate destination buffer.
1026  */
1027 int ntfs_mbstoucs(const char *ins, ntfschar **outs)
1028 {
1029 #ifdef MB_CUR_MAX
1030         ntfschar *ucs;
1031         const char *s;
1032         wchar_t wc;
1033         int i, o, cnt, ins_len, ucs_len, ins_size;
1034 #ifdef HAVE_MBSINIT
1035         mbstate_t mbstate;
1036 #endif
1037 #endif /* MB_CUR_MAX */
1038
1039         if (!ins || !outs) {
1040                 errno = EINVAL;
1041                 return -1;
1042         }
1043
1044         if (use_utf8)
1045                 return ntfs_utf8_to_utf16(ins, outs);
1046
1047 #ifdef MB_CUR_MAX
1048         /* Determine the size of the multi-byte string in bytes. */
1049         ins_size = strlen(ins);
1050         /* Determine the length of the multi-byte string. */
1051         s = ins;
1052 #if defined(HAVE_MBSINIT)
1053         memset(&mbstate, 0, sizeof(mbstate));
1054         ins_len = mbsrtowcs(NULL, (const char **)&s, 0, &mbstate);
1055 #ifdef __CYGWIN32__
1056         if (!ins_len && *ins) {
1057                 /* Older Cygwin had broken mbsrtowcs() implementation. */
1058                 ins_len = strlen(ins);
1059         }
1060 #endif
1061 #elif !defined(DJGPP)
1062         ins_len = mbstowcs(NULL, s, 0);
1063 #else
1064         /* Eeek!!! DJGPP has broken mbstowcs() implementation!!! */
1065         ins_len = strlen(ins);
1066 #endif
1067         if (ins_len == -1)
1068                 return ins_len;
1069 #ifdef HAVE_MBSINIT
1070         if ((s != ins) || !mbsinit(&mbstate)) {
1071 #else
1072         if (s != ins) {
1073 #endif
1074                 errno = EILSEQ;
1075                 return -1;
1076         }
1077         /* Add the NULL terminator. */
1078         ins_len++;
1079         ucs_len = ins_len;
1080         ucs = ntfs_malloc(ucs_len * sizeof(ntfschar));
1081         if (!ucs)
1082                 return -1;
1083 #ifdef HAVE_MBSINIT
1084         memset(&mbstate, 0, sizeof(mbstate));
1085 #else
1086         mbtowc(NULL, NULL, 0);
1087 #endif
1088         for (i = o = cnt = 0; i < ins_size; i += cnt, o++) {
1089                 /* Reallocate memory if necessary. */
1090                 if (o >= ucs_len) {
1091                         ntfschar *tc;
1092                         ucs_len = (ucs_len * sizeof(ntfschar) + 64) & ~63;
1093                         tc = realloc(ucs, ucs_len);
1094                         if (!tc)
1095                                 goto err_out;
1096                         ucs = tc;
1097                         ucs_len /= sizeof(ntfschar);
1098                 }
1099                 /* Convert the multibyte character to a wide character. */
1100 #ifdef HAVE_MBSINIT
1101                 cnt = mbrtowc(&wc, ins + i, ins_size - i, &mbstate);
1102 #else
1103                 cnt = mbtowc(&wc, ins + i, ins_size - i);
1104 #endif
1105                 if (!cnt)
1106                         break;
1107                 if (cnt == -1)
1108                         goto err_out;
1109                 if (cnt < -1) {
1110                         ntfs_log_trace("Eeek. cnt = %i\n", cnt);
1111                         errno = EINVAL;
1112                         goto err_out;
1113                 }
1114                 /* Make sure we are not overflowing the NTFS Unicode set. */
1115                 if ((unsigned long)wc >= (unsigned long)(1 <<
1116                                 (8 * sizeof(ntfschar)))) {
1117                         errno = EILSEQ;
1118                         goto err_out;
1119                 }
1120                 /* Convert the CPU wide character to a LE Unicode character. */
1121                 ucs[o] = cpu_to_le16(wc);
1122         }
1123 #ifdef HAVE_MBSINIT
1124         /* Make sure we are back in the initial state. */
1125         if (!mbsinit(&mbstate)) {
1126                 ntfs_log_trace("Eeek. mbstate not in initial state!\n");
1127                 errno = EILSEQ;
1128                 goto err_out;
1129         }
1130 #endif
1131         /* Now write the NULL character. */
1132         ucs[o] = const_cpu_to_le16(L'\0');
1133         *outs = ucs;
1134         return o;
1135 err_out:
1136         free(ucs);
1137 #else /* MB_CUR_MAX */
1138         errno = EILSEQ;
1139 #endif /* MB_CUR_MAX */
1140         return -1;
1141 }
1142
1143 /*
1144  *              Turn a UTF8 name uppercase
1145  *
1146  *      Returns an allocated uppercase name which has to be freed by caller
1147  *      or NULL if there is an error (described by errno)
1148  */
1149
1150 char *ntfs_uppercase_mbs(const char *low,
1151                         const ntfschar *upcase, u32 upcase_size)
1152 {
1153         int size;
1154         char *upp;
1155         u32 wc;
1156         int n;
1157         const char *s;
1158         char *t;
1159
1160         size = strlen(low);
1161         upp = (char*)ntfs_malloc(3*size + 1);
1162         if (upp) {
1163                 s = low;
1164                 t = upp;
1165                 do {
1166                         n = utf8_to_unicode(&wc, s);
1167                         if (n > 0) {
1168                                 if (wc < upcase_size)
1169                                         wc = le16_to_cpu(upcase[wc]);
1170                                 if (wc < 0x80)
1171                                         *t++ = wc;
1172                                 else if (wc < 0x800) {
1173                                         *t++ = (0xc0 | ((wc >> 6) & 0x3f));
1174                                         *t++ = 0x80 | (wc & 0x3f);
1175                                 } else if (wc < 0x10000) {
1176                                         *t++ = 0xe0 | (wc >> 12);
1177                                         *t++ = 0x80 | ((wc >> 6) & 0x3f);
1178                                         *t++ = 0x80 | (wc & 0x3f);
1179                                 } else {
1180                                         *t++ = 0xf0 | ((wc >> 18) & 7);
1181                                         *t++ = 0x80 | ((wc >> 12) & 63);
1182                                         *t++ = 0x80 | ((wc >> 6) & 0x3f);
1183                                         *t++ = 0x80 | (wc & 0x3f);
1184                                 }
1185                         s += n;
1186                         }
1187                 } while (n > 0);
1188                 if (n < 0) {
1189                         free(upp);
1190                         upp = (char*)NULL;
1191                         errno = EILSEQ;
1192                 }
1193                 *t = 0;
1194         }
1195         return (upp);
1196 }
1197
1198 /**
1199  * ntfs_upcase_table_build - build the default upcase table for NTFS
1200  * @uc:         destination buffer where to store the built table
1201  * @uc_len:     size of destination buffer in bytes
1202  *
1203  * ntfs_upcase_table_build() builds the default upcase table for NTFS and
1204  * stores it in the caller supplied buffer @uc of size @uc_len.
1205  *
1206  * Note, @uc_len must be at least 128kiB in size or bad things will happen!
1207  */
1208 void ntfs_upcase_table_build(ntfschar *uc, u32 uc_len)
1209 {
1210         struct NEWUPPERCASE {
1211                 unsigned short first;
1212                 unsigned short last;
1213                 short diff;
1214                 unsigned char step;
1215                 unsigned char osmajor;
1216                 unsigned char osminor;
1217         } ;
1218
1219         /*
1220          *      This is the table as defined by Windows XP
1221          */
1222         static int uc_run_table[][3] = { /* Start, End, Add */
1223         {0x0061, 0x007B,  -32}, {0x0451, 0x045D, -80}, {0x1F70, 0x1F72,  74},
1224         {0x00E0, 0x00F7,  -32}, {0x045E, 0x0460, -80}, {0x1F72, 0x1F76,  86},
1225         {0x00F8, 0x00FF,  -32}, {0x0561, 0x0587, -48}, {0x1F76, 0x1F78, 100},
1226         {0x0256, 0x0258, -205}, {0x1F00, 0x1F08,   8}, {0x1F78, 0x1F7A, 128},
1227         {0x028A, 0x028C, -217}, {0x1F10, 0x1F16,   8}, {0x1F7A, 0x1F7C, 112},
1228         {0x03AC, 0x03AD,  -38}, {0x1F20, 0x1F28,   8}, {0x1F7C, 0x1F7E, 126},
1229         {0x03AD, 0x03B0,  -37}, {0x1F30, 0x1F38,   8}, {0x1FB0, 0x1FB2,   8},
1230         {0x03B1, 0x03C2,  -32}, {0x1F40, 0x1F46,   8}, {0x1FD0, 0x1FD2,   8},
1231         {0x03C2, 0x03C3,  -31}, {0x1F51, 0x1F52,   8}, {0x1FE0, 0x1FE2,   8},
1232         {0x03C3, 0x03CC,  -32}, {0x1F53, 0x1F54,   8}, {0x1FE5, 0x1FE6,   7},
1233         {0x03CC, 0x03CD,  -64}, {0x1F55, 0x1F56,   8}, {0x2170, 0x2180, -16},
1234         {0x03CD, 0x03CF,  -63}, {0x1F57, 0x1F58,   8}, {0x24D0, 0x24EA, -26},
1235         {0x0430, 0x0450,  -32}, {0x1F60, 0x1F68,   8}, {0xFF41, 0xFF5B, -32},
1236         {0}
1237         };
1238         static int uc_dup_table[][2] = { /* Start, End */
1239         {0x0100, 0x012F}, {0x01A0, 0x01A6}, {0x03E2, 0x03EF}, {0x04CB, 0x04CC},
1240         {0x0132, 0x0137}, {0x01B3, 0x01B7}, {0x0460, 0x0481}, {0x04D0, 0x04EB},
1241         {0x0139, 0x0149}, {0x01CD, 0x01DD}, {0x0490, 0x04BF}, {0x04EE, 0x04F5},
1242         {0x014A, 0x0178}, {0x01DE, 0x01EF}, {0x04BF, 0x04BF}, {0x04F8, 0x04F9},
1243         {0x0179, 0x017E}, {0x01F4, 0x01F5}, {0x04C1, 0x04C4}, {0x1E00, 0x1E95},
1244         {0x018B, 0x018B}, {0x01FA, 0x0218}, {0x04C7, 0x04C8}, {0x1EA0, 0x1EF9},
1245         {0}
1246         };
1247         static int uc_byte_table[][2] = { /* Offset, Value */
1248         {0x00FF, 0x0178}, {0x01AD, 0x01AC}, {0x01F3, 0x01F1}, {0x0269, 0x0196},
1249         {0x0183, 0x0182}, {0x01B0, 0x01AF}, {0x0253, 0x0181}, {0x026F, 0x019C},
1250         {0x0185, 0x0184}, {0x01B9, 0x01B8}, {0x0254, 0x0186}, {0x0272, 0x019D},
1251         {0x0188, 0x0187}, {0x01BD, 0x01BC}, {0x0259, 0x018F}, {0x0275, 0x019F},
1252         {0x018C, 0x018B}, {0x01C6, 0x01C4}, {0x025B, 0x0190}, {0x0283, 0x01A9},
1253         {0x0192, 0x0191}, {0x01C9, 0x01C7}, {0x0260, 0x0193}, {0x0288, 0x01AE},
1254         {0x0199, 0x0198}, {0x01CC, 0x01CA}, {0x0263, 0x0194}, {0x0292, 0x01B7},
1255         {0x01A8, 0x01A7}, {0x01DD, 0x018E}, {0x0268, 0x0197},
1256         {0}
1257         };
1258
1259 /*
1260  *              Changes which were applied to later Windows versions
1261  *
1262  *   md5 for $UpCase from Winxp : 6fa3db2468275286210751e869d36373
1263  *                        Vista : 2f03b5a69d486ff3864cecbd07f24440
1264  *                        Win8 :  7ff498a44e45e77374cc7c962b1b92f2
1265  */
1266         static const struct NEWUPPERCASE newuppercase[] = {
1267                                                 /* from Windows 6.0 (Vista) */
1268                 { 0x37b, 0x37d, 0x82, 1, 6, 0 },
1269                 { 0x1f80, 0x1f87, 0x8, 1, 6, 0 },
1270                 { 0x1f90, 0x1f97, 0x8, 1, 6, 0 },
1271                 { 0x1fa0, 0x1fa7, 0x8, 1, 6, 0 },
1272                 { 0x2c30, 0x2c5e, -0x30, 1, 6, 0 },
1273                 { 0x2d00, 0x2d25, -0x1c60, 1, 6, 0 },
1274                 { 0x2c68, 0x2c6c, -0x1, 2, 6, 0 },
1275                 { 0x219, 0x21f, -0x1, 2, 6, 0 },
1276                 { 0x223, 0x233, -0x1, 2, 6, 0 },
1277                 { 0x247, 0x24f, -0x1, 2, 6, 0 },
1278                 { 0x3d9, 0x3e1, -0x1, 2, 6, 0 },
1279                 { 0x48b, 0x48f, -0x1, 2, 6, 0 },
1280                 { 0x4fb, 0x513, -0x1, 2, 6, 0 },
1281                 { 0x2c81, 0x2ce3, -0x1, 2, 6, 0 },
1282                 { 0x3f8, 0x3fb, -0x1, 3, 6, 0 },
1283                 { 0x4c6, 0x4ce, -0x1, 4, 6, 0 },
1284                 { 0x23c, 0x242, -0x1, 6, 6, 0 },
1285                 { 0x4ed, 0x4f7, -0x1, 10, 6, 0 },
1286                 { 0x450, 0x45d, -0x50, 13, 6, 0 },
1287                 { 0x2c61, 0x2c76, -0x1, 21, 6, 0 },
1288                 { 0x1fcc, 0x1ffc, -0x9, 48, 6, 0 },
1289                 { 0x180, 0x180, 0xc3, 1, 6, 0 },
1290                 { 0x195, 0x195, 0x61, 1, 6, 0 },
1291                 { 0x19a, 0x19a, 0xa3, 1, 6, 0 },
1292                 { 0x19e, 0x19e, 0x82, 1, 6, 0 },
1293                 { 0x1bf, 0x1bf, 0x38, 1, 6, 0 },
1294                 { 0x1f9, 0x1f9, -0x1, 1, 6, 0 },
1295                 { 0x23a, 0x23a, 0x2a2b, 1, 6, 0 },
1296                 { 0x23e, 0x23e, 0x2a28, 1, 6, 0 },
1297                 { 0x26b, 0x26b, 0x29f7, 1, 6, 0 },
1298                 { 0x27d, 0x27d, 0x29e7, 1, 6, 0 },
1299                 { 0x280, 0x280, -0xda, 1, 6, 0 },
1300                 { 0x289, 0x289, -0x45, 1, 6, 0 },
1301                 { 0x28c, 0x28c, -0x47, 1, 6, 0 },
1302                 { 0x3f2, 0x3f2, 0x7, 1, 6, 0 },
1303                 { 0x4cf, 0x4cf, -0xf, 1, 6, 0 },
1304                 { 0x1d7d, 0x1d7d, 0xee6, 1, 6, 0 },
1305                 { 0x1fb3, 0x1fb3, 0x9, 1, 6, 0 },
1306                 { 0x214e, 0x214e, -0x1c, 1, 6, 0 },
1307                 { 0x2184, 0x2184, -0x1, 1, 6, 0 },
1308                                                 /* from Windows 6.1 (Win7) */
1309                 { 0x23a, 0x23e,  0x0, 4, 6, 1 },
1310                 { 0x250, 0x250,  0x2a1f, 2, 6, 1 },
1311                 { 0x251, 0x251,  0x2a1c, 2, 6, 1 },
1312                 { 0x271, 0x271,  0x29fd, 2, 6, 1 },
1313                 { 0x371, 0x373, -0x1, 2, 6, 1 },
1314                 { 0x377, 0x377, -0x1, 2, 6, 1 },
1315                 { 0x3c2, 0x3c2,  0x0, 2, 6, 1 },
1316                 { 0x3d7, 0x3d7, -0x8, 2, 6, 1 },
1317                 { 0x515, 0x523, -0x1, 2, 6, 1 },
1318                         /* below, -0x75fc stands for 0x8a04 and truncation */
1319                 { 0x1d79, 0x1d79, -0x75fc, 2, 6, 1 },
1320                 { 0x1efb, 0x1eff, -0x1, 2, 6, 1 },
1321                 { 0x1fc3, 0x1ff3,  0x9, 48, 6, 1 },
1322                 { 0x1fcc, 0x1ffc,  0x0, 48, 6, 1 },
1323                 { 0x2c65, 0x2c65, -0x2a2b, 2, 6, 1 },
1324                 { 0x2c66, 0x2c66, -0x2a28, 2, 6, 1 },
1325                 { 0x2c73, 0x2c73, -0x1, 2, 6, 1 },
1326                 { 0xa641, 0xa65f, -0x1, 2, 6, 1 },
1327                 { 0xa663, 0xa66d, -0x1, 2, 6, 1 },
1328                 { 0xa681, 0xa697, -0x1, 2, 6, 1 },
1329                 { 0xa723, 0xa72f, -0x1, 2, 6, 1 },
1330                 { 0xa733, 0xa76f, -0x1, 2, 6, 1 },
1331                 { 0xa77a, 0xa77c, -0x1, 2, 6, 1 },
1332                 { 0xa77f, 0xa787, -0x1, 2, 6, 1 },
1333                 { 0xa78c, 0xa78c, -0x1, 2, 6, 1 },
1334                                                         /* end mark */
1335                 { 0 }
1336         } ;
1337
1338         int i, r;
1339         int k, off;
1340         const struct NEWUPPERCASE *puc;
1341
1342         memset((char*)uc, 0, uc_len);
1343         uc_len >>= 1;
1344         if (uc_len > 65536)
1345                 uc_len = 65536;
1346         for (i = 0; (u32)i < uc_len; i++)
1347                 uc[i] = cpu_to_le16(i);
1348         for (r = 0; uc_run_table[r][0]; r++) {
1349                 off = uc_run_table[r][2];
1350                 for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++)
1351                         uc[i] = cpu_to_le16(i + off);
1352         }
1353         for (r = 0; uc_dup_table[r][0]; r++)
1354                 for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2)
1355                         uc[i + 1] = cpu_to_le16(i);
1356         for (r = 0; uc_byte_table[r][0]; r++) {
1357                 k = uc_byte_table[r][1];
1358                 uc[uc_byte_table[r][0]] = cpu_to_le16(k);
1359         }
1360         for (r=0; newuppercase[r].first; r++) {
1361                 puc = &newuppercase[r];
1362                 if ((puc->osmajor < UPCASE_MAJOR)
1363                   || ((puc->osmajor == UPCASE_MAJOR)
1364                      && (puc->osminor <= UPCASE_MINOR))) {
1365                         off = puc->diff;
1366                         for (i = puc->first; i <= puc->last; i += puc->step)
1367                                 uc[i] = cpu_to_le16(i + off);
1368                 }
1369         }
1370 }
1371
1372 /*
1373  *              Allocate and build the default upcase table
1374  *
1375  *      Returns the number of entries
1376  *              0 if failed
1377  */
1378
1379 #define UPCASE_LEN 65536 /* default number of entries in upcase */
1380
1381 u32 ntfs_upcase_build_default(ntfschar **upcase)
1382 {
1383         u32 upcase_len = 0;
1384
1385         *upcase = (ntfschar*)ntfs_malloc(UPCASE_LEN*2);
1386         if (*upcase) {
1387                 ntfs_upcase_table_build(*upcase, UPCASE_LEN*2);
1388                 upcase_len = UPCASE_LEN;
1389         }
1390         return (upcase_len);
1391 }
1392
1393 /*
1394  *              Build a table for converting to lower case
1395  *
1396  *      This is only meaningful when there is a single lower case
1397  *      character leading to an upper case one, and currently the
1398  *      only exception is the greek letter sigma which has a single
1399  *      upper case glyph (code U+03A3), but two lower case glyphs
1400  *      (code U+03C3 and U+03C2, the latter to be used at the end
1401  *      of a word). In the following implementation the upper case
1402  *      sigma will be lowercased as U+03C3.
1403  */
1404
1405 ntfschar *ntfs_locase_table_build(const ntfschar *uc, u32 uc_cnt)
1406 {
1407         ntfschar *lc;
1408         u32 upp;
1409         u32 i;
1410
1411         lc = (ntfschar*)ntfs_malloc(uc_cnt*sizeof(ntfschar));
1412         if (lc) {
1413                 for (i=0; i<uc_cnt; i++)
1414                         lc[i] = cpu_to_le16(i);
1415                 for (i=0; i<uc_cnt; i++) {
1416                         upp = le16_to_cpu(uc[i]);
1417                         if ((upp != i) && (upp < uc_cnt))
1418                                 lc[upp] = cpu_to_le16(i);
1419                 }
1420         } else
1421                 ntfs_log_error("Could not build the locase table\n");
1422         return (lc);
1423 }
1424
1425 /**
1426  * ntfs_str2ucs - convert a string to a valid NTFS file name
1427  * @s:          input string
1428  * @len:        length of output buffer in Unicode characters
1429  *
1430  * Convert the input @s string into the corresponding little endian,
1431  * 2-byte Unicode string. The length of the converted string is less
1432  * or equal to the maximum length allowed by the NTFS format (255).
1433  *
1434  * If @s is NULL then return AT_UNNAMED.
1435  *
1436  * On success the function returns the Unicode string in an allocated
1437  * buffer and the caller is responsible to free it when it's not needed
1438  * anymore.
1439  *
1440  * On error NULL is returned and errno is set to the error code.
1441  */
1442 ntfschar *ntfs_str2ucs(const char *s, int *len)
1443 {
1444         ntfschar *ucs = NULL;
1445
1446         if (s && ((*len = ntfs_mbstoucs(s, &ucs)) == -1)) {
1447                 ntfs_log_perror("Couldn't convert '%s' to Unicode", s);
1448                 return NULL;
1449         }
1450         if (*len > NTFS_MAX_NAME_LEN) {
1451                 free(ucs);
1452                 errno = ENAMETOOLONG;
1453                 return NULL;
1454         }
1455         if (!ucs || !*len) {
1456                 ucs  = AT_UNNAMED;
1457                 *len = 0;
1458         }
1459         return ucs;
1460 }
1461
1462 /**
1463  * ntfs_ucsfree - free memory allocated by ntfs_str2ucs()
1464  * @ucs         input string to be freed
1465  *
1466  * Free memory at @ucs and which was allocated by ntfs_str2ucs.
1467  *
1468  * Return value: none.
1469  */
1470 void ntfs_ucsfree(ntfschar *ucs)
1471 {
1472         if (ucs && (ucs != AT_UNNAMED))
1473                 free(ucs);
1474 }
1475
1476 /*
1477  *              Check whether a name contains no chars forbidden
1478  *      for DOS or Win32 use
1479  *
1480  *      If @strict is TRUE, then trailing dots and spaces are forbidden.
1481  *      These names are technically allowed in the Win32 namespace, but
1482  *      they can be problematic.  See comment for FILE_NAME_WIN32.
1483  *
1484  *      If there is a bad char, errno is set to EINVAL
1485  */
1486
1487 BOOL ntfs_forbidden_chars(const ntfschar *name, int len, BOOL strict)
1488 {
1489         BOOL forbidden;
1490         int ch;
1491         int i;
1492         static const u32 mainset = (1L << ('\"' - 0x20))
1493                         | (1L << ('*' - 0x20))
1494                         | (1L << ('/' - 0x20))
1495                         | (1L << (':' - 0x20))
1496                         | (1L << ('<' - 0x20))
1497                         | (1L << ('>' - 0x20))
1498                         | (1L << ('?' - 0x20));
1499
1500         forbidden = (len == 0) ||
1501                     (strict && (name[len-1] == const_cpu_to_le16(' ') ||
1502                                 name[len-1] == const_cpu_to_le16('.')));
1503         for (i=0; i<len; i++) {
1504                 ch = le16_to_cpu(name[i]);
1505                 if ((ch < 0x20)
1506                     || ((ch < 0x40)
1507                         && ((1L << (ch - 0x20)) & mainset))
1508                     || (ch == '\\')
1509                     || (ch == '|'))
1510                         forbidden = TRUE;
1511         }
1512         if (forbidden)
1513                 errno = EINVAL;
1514         return (forbidden);
1515 }
1516
1517 /*
1518  *              Check whether a name contains no forbidden chars and
1519  *      is not a reserved name for DOS or Win32 use
1520  *
1521  *      The reserved names are CON, PRN, AUX, NUL, COM1..COM9, LPT1..LPT9
1522  *      with no suffix or any suffix.
1523  *
1524  *      If @strict is TRUE, then trailing dots and spaces are forbidden.
1525  *      These names are technically allowed in the Win32 namespace, but
1526  *      they can be problematic.  See comment for FILE_NAME_WIN32.
1527  *
1528  *      If the name is forbidden, errno is set to EINVAL
1529  */
1530
1531 BOOL ntfs_forbidden_names(ntfs_volume *vol, const ntfschar *name, int len,
1532                           BOOL strict)
1533 {
1534         BOOL forbidden;
1535         int h;
1536         static const ntfschar dot = const_cpu_to_le16('.');
1537         static const ntfschar con[] = { const_cpu_to_le16('c'),
1538                         const_cpu_to_le16('o'), const_cpu_to_le16('n') };
1539         static const ntfschar prn[] = { const_cpu_to_le16('p'),
1540                         const_cpu_to_le16('r'), const_cpu_to_le16('n') };
1541         static const ntfschar aux[] = { const_cpu_to_le16('a'),
1542                         const_cpu_to_le16('u'), const_cpu_to_le16('x') };
1543         static const ntfschar nul[] = { const_cpu_to_le16('n'),
1544                         const_cpu_to_le16('u'), const_cpu_to_le16('l') };
1545         static const ntfschar com[] = { const_cpu_to_le16('c'),
1546                         const_cpu_to_le16('o'), const_cpu_to_le16('m') };
1547         static const ntfschar lpt[] = { const_cpu_to_le16('l'),
1548                         const_cpu_to_le16('p'), const_cpu_to_le16('t') };
1549
1550         forbidden = ntfs_forbidden_chars(name, len, strict);
1551         if (!forbidden && (len >= 3)) {
1552                 /*
1553                  * Rough hash check to tell whether the first couple of chars
1554                  * may be one of CO PR AU NU LP or lowercase variants.
1555                  */
1556                 h = ((le16_to_cpu(name[0]) & 31)*48)
1557                                 ^ ((le16_to_cpu(name[1]) & 31)*165);
1558                 if ((h % 23) == 17) {
1559                         /* do a full check, depending on the third char */
1560                         switch (le16_to_cpu(name[2]) & ~0x20) {
1561                         case 'N' :
1562                                 if (((len == 3) || (name[3] == dot))
1563                                     && (!ntfs_ucsncasecmp(name, con, 3,
1564                                                 vol->upcase, vol->upcase_len)
1565                                         || !ntfs_ucsncasecmp(name, prn, 3,
1566                                                 vol->upcase, vol->upcase_len)))
1567                                         forbidden = TRUE;
1568                                 break;
1569                         case 'X' :
1570                                 if (((len == 3) || (name[3] == dot))
1571                                     && !ntfs_ucsncasecmp(name, aux, 3,
1572                                                 vol->upcase, vol->upcase_len))
1573                                         forbidden = TRUE;
1574                                 break;
1575                         case 'L' :
1576                                 if (((len == 3) || (name[3] == dot))
1577                                     && !ntfs_ucsncasecmp(name, nul, 3,
1578                                                 vol->upcase, vol->upcase_len))
1579                                         forbidden = TRUE;
1580                                 break;
1581                         case 'M' :
1582                                 if ((len > 3)
1583                                     && (le16_to_cpu(name[3]) >= '1')
1584                                     && (le16_to_cpu(name[3]) <= '9')
1585                                     && ((len == 4) || (name[4] == dot))
1586                                     && !ntfs_ucsncasecmp(name, com, 3,
1587                                                 vol->upcase, vol->upcase_len))
1588                                         forbidden = TRUE;
1589                                 break;
1590                         case 'T' :
1591                                 if ((len > 3)
1592                                     && (le16_to_cpu(name[3]) >= '1')
1593                                     && (le16_to_cpu(name[3]) <= '9')
1594                                     && ((len == 4) || (name[4] == dot))
1595                                     && !ntfs_ucsncasecmp(name, lpt, 3,
1596                                                 vol->upcase, vol->upcase_len))
1597                                         forbidden = TRUE;
1598                                 break;
1599                         }
1600                 }
1601         }
1602
1603         if (forbidden)
1604                 errno = EINVAL;
1605         return (forbidden);
1606 }
1607
1608 /*
1609  *              Check whether the same name can be used as a DOS and
1610  *      a Win32 name
1611  *
1612  *      The names must be the same, or the short name the uppercase
1613  *      variant of the long name
1614  */
1615
1616 BOOL ntfs_collapsible_chars(ntfs_volume *vol,
1617                         const ntfschar *shortname, int shortlen,
1618                         const ntfschar *longname, int longlen)
1619 {
1620         BOOL collapsible;
1621         unsigned int ch;
1622         unsigned int cs;
1623         int i;
1624
1625         collapsible = shortlen == longlen;
1626         for (i=0; collapsible && (i<shortlen); i++) {
1627                 ch = le16_to_cpu(longname[i]);
1628                 cs = le16_to_cpu(shortname[i]);
1629                 if ((cs != ch)
1630                     && ((ch >= vol->upcase_len)
1631                         || (cs >= vol->upcase_len)
1632                         || (vol->upcase[cs] != vol->upcase[ch])))
1633                                 collapsible = FALSE;
1634         }
1635         return (collapsible);
1636 }
1637
1638 /*
1639  * Define the character encoding to be used.
1640  * Use UTF-8 unless specified otherwise.
1641  */
1642
1643 int ntfs_set_char_encoding(const char *locale)
1644 {
1645         use_utf8 = 0;
1646         if (!locale || strstr(locale,"utf8") || strstr(locale,"UTF8")
1647             || strstr(locale,"utf-8") || strstr(locale,"UTF-8"))
1648                 use_utf8 = 1;
1649         else
1650                 if (setlocale(LC_ALL, locale))
1651                         use_utf8 = 0;
1652                 else {
1653                         ntfs_log_error("Invalid locale, encoding to UTF-8\n");
1654                         use_utf8 = 1;
1655                 }
1656         return 0; /* always successful */
1657 }
1658
1659 #if defined(__APPLE__) || defined(__DARWIN__)
1660
1661 int ntfs_macosx_normalize_filenames(int normalize) {
1662 #ifdef ENABLE_NFCONV
1663         if (normalize == 0 || normalize == 1) {
1664                 nfconvert_utf8 = normalize;
1665                 return 0;
1666         }
1667         else {
1668                 return -1;
1669         }
1670 #else
1671         return -1;
1672 #endif /* ENABLE_NFCONV */
1673 }
1674
1675 int ntfs_macosx_normalize_utf8(const char *utf8_string, char **target,
1676                 int composed)
1677 {
1678 #ifdef ENABLE_NFCONV
1679         /* For this code to compile, the CoreFoundation framework must be fed to
1680          * the linker. */
1681         CFStringRef cfSourceString;
1682         CFMutableStringRef cfMutableString;
1683         CFRange rangeToProcess;
1684         CFIndex requiredBufferLength;
1685         char *result = NULL;
1686         int resultLength = -1;
1687
1688         /* Convert the UTF-8 string to a CFString. */
1689         cfSourceString = CFStringCreateWithCString(kCFAllocatorDefault,
1690                 utf8_string, kCFStringEncodingUTF8);
1691         if (cfSourceString == NULL) {
1692                 ntfs_log_error("CFStringCreateWithCString failed!\n");
1693                 return -2;
1694         }
1695
1696         /* Create a mutable string from cfSourceString that we are free to
1697          * modify. */
1698         cfMutableString = CFStringCreateMutableCopy(kCFAllocatorDefault, 0,
1699                 cfSourceString);
1700         CFRelease(cfSourceString); /* End-of-life. */
1701         if (cfMutableString == NULL) {
1702                 ntfs_log_error("CFStringCreateMutableCopy failed!\n");
1703                 return -3;
1704         }
1705
1706         /* Normalize the mutable string to the desired normalization form. */
1707         CFStringNormalize(cfMutableString, (composed != 0 ?
1708                 kCFStringNormalizationFormC : kCFStringNormalizationFormD));
1709
1710         /* Store the resulting string in a '\0'-terminated UTF-8 encoded char*
1711          * buffer. */
1712         rangeToProcess = CFRangeMake(0, CFStringGetLength(cfMutableString));
1713         if (CFStringGetBytes(cfMutableString, rangeToProcess,
1714                 kCFStringEncodingUTF8, 0, false, NULL, 0,
1715                 &requiredBufferLength) > 0)
1716         {
1717                 resultLength = sizeof(char) * (requiredBufferLength + 1);
1718                 result = ntfs_calloc(resultLength);
1719
1720                 if (result != NULL) {
1721                         if (CFStringGetBytes(cfMutableString, rangeToProcess,
1722                                 kCFStringEncodingUTF8, 0, false,
1723                                 (UInt8*) result, resultLength - 1,
1724                                 &requiredBufferLength) <= 0)
1725                         {
1726                                 ntfs_log_error("Could not perform UTF-8 "
1727                                         "conversion of normalized "
1728                                         "CFMutableString.\n");
1729                                 free(result);
1730                                 result = NULL;
1731                         }
1732                 }
1733                 else {
1734                         ntfs_log_error("Could not perform a ntfs_calloc of %d "
1735                                 "bytes for char *result.\n", resultLength);
1736                 }
1737         }
1738         else {
1739                 ntfs_log_error("Could not perform check for required length of "
1740                         "UTF-8 conversion of normalized CFMutableString.\n");
1741         }
1742
1743         CFRelease(cfMutableString);
1744
1745         if (result != NULL) {
1746                 *target = result;
1747                 return resultLength - 1;
1748         }
1749         else {
1750                 return -1;
1751         }
1752 #else
1753         return -1;
1754 #endif /* ENABLE_NFCONV */
1755 }
1756 #endif /* defined(__APPLE__) || defined(__DARWIN__) */