fs/cifs/cifs_unicode.c

   1 /*
   2  *   fs/cifs/cifs_unicode.c
   3  *
   4  *   Copyright (c) International Business Machines  Corp., 2000,2009
   5  *   Modified by Steve French (sfrench@us.ibm.com)
   6  *
   7  *   This program is free software;  you can redistribute it and/or modify
   8  *   it under the terms of the GNU General Public License as published by
   9  *   the Free Software Foundation; either version 2 of the License, or
  10  *   (at your option) any later version.
  11  *
  12  *   This program is distributed in the hope that it will be useful,
  13  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  15  *   the GNU General Public License for more details.
  16  *
  17  *   You should have received a copy of the GNU General Public License
  18  *   along with this program;  if not, write to the Free Software
  19  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  20  */
  21 #include <linux/fs.h>
  22 #include <linux/slab.h>
  23 #include "cifs_fs_sb.h"
  24 #include "cifs_unicode.h"
  25 #include "cifs_uniupr.h"
  26 #include "cifspdu.h"
  27 #include "cifsglob.h"
  28 #include "cifs_debug.h"
  29
  30 /*
  31  * cifs_utf16_bytes - how long will a string be after conversion?
  32  * @utf16 - pointer to input string
  33  * @maxbytes - don't go past this many bytes of input string
  34  * @codepage - destination codepage
  35  *
  36  * Walk a utf16le string and return the number of bytes that the string will
  37  * be after being converted to the given charset, not including any null
  38  * termination required. Don't walk past maxbytes in the source buffer.
  39  */
  40 int
  41 cifs_utf16_bytes(const __le16 *from, int maxbytes,
  42                 const struct nls_table *codepage)
  43 {
  44         int i;
  45         int charlen, outlen = 0;
  46         int maxwords = maxbytes / 2;
  47         char tmp[NLS_MAX_CHARSET_SIZE];
  48         __u16 ftmp;
  49
  50         for (i = 0; i < maxwords; i++) {
  51                 ftmp = get_unaligned_le16(&from[i]);
  52                 if (ftmp == 0)
  53                         break;
  54
  55                 charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE);
  56                 if (charlen > 0)
  57                         outlen += charlen;
  58                 else
  59                         outlen++;
  60         }
  61
  62         return outlen;
  63 }
  64
  65 int cifs_remap(struct cifs_sb_info *cifs_sb)
  66 {
  67         int map_type;
  68
  69         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR)
  70                 map_type = SFM_MAP_UNI_RSVD;
  71         else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR)
  72                 map_type = SFU_MAP_UNI_RSVD;
  73         else
  74                 map_type = NO_MAP_UNI_RSVD;
  75
  76         return map_type;
  77 }
  78
  79 /* Convert character using the SFU - "Services for Unix" remapping range */
  80 static bool
  81 convert_sfu_char(const __u16 src_char, char *target)
  82 {
  83         /*
  84          * BB: Cannot handle remapping UNI_SLASH until all the calls to
  85          *     build_path_from_dentry are modified, as they use slash as
  86          *     separator.
  87          */
  88         switch (src_char) {
  89         case UNI_COLON:
  90                 *target = ':';
  91                 break;
  92         case UNI_ASTERISK:
  93                 *target = '*';
  94                 break;
  95         case UNI_QUESTION:
  96                 *target = '?';
  97                 break;
  98         case UNI_PIPE:
  99                 *target = '|';
 100                 break;
 101         case UNI_GRTRTHAN:
 102                 *target = '>';
 103                 break;
 104         case UNI_LESSTHAN:
 105                 *target = '<';
 106                 break;
 107         default:
 108                 return false;
 109         }
 110         return true;
 111 }
 112
 113 /* Convert character using the SFM - "Services for Mac" remapping range */
 114 static bool
 115 convert_sfm_char(const __u16 src_char, char *target)
 116 {
 117         switch (src_char) {
 118         case SFM_COLON:
 119                 *target = ':';
 120                 break;
 121         case SFM_ASTERISK:
 122                 *target = '*';
 123                 break;
 124         case SFM_QUESTION:
 125                 *target = '?';
 126                 break;
 127         case SFM_PIPE:
 128                 *target = '|';
 129                 break;
 130         case SFM_GRTRTHAN:
 131                 *target = '>';
 132                 break;
 133         case SFM_LESSTHAN:
 134                 *target = '<';
 135                 break;
 136         case SFM_SLASH:
 137                 *target = '\\';
 138                 break;
 139         default:
 140                 return false;
 141         }
 142         return true;
 143 }
 144
 145
 146 /*
 147  * cifs_mapchar - convert a host-endian char to proper char in codepage
 148  * @target - where converted character should be copied
 149  * @src_char - 2 byte host-endian source character
 150  * @cp - codepage to which character should be converted
 151  * @map_type - How should the 7 NTFS/SMB reserved characters be mapped to UCS2?
 152  *
 153  * This function handles the conversion of a single character. It is the
 154  * responsibility of the caller to ensure that the target buffer is large
 155  * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
 156  */
 157 static int
 158 cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
 159              int maptype)
 160 {
 161         int len = 1;
 162
 163         if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target))
 164                 return len;
 165         else if ((maptype == SFU_MAP_UNI_RSVD) &&
 166                   convert_sfu_char(src_char, target))
 167                 return len;
 168
 169         /* if character not one of seven in special remap set */
 170         len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
 171         if (len <= 0) {
 172                 *target = '?';
 173                 len = 1;
 174         }
 175         return len;
 176 }
 177
 178 /*
 179  * cifs_from_utf16 - convert utf16le string to local charset
 180  * @to - destination buffer
 181  * @from - source buffer
 182  * @tolen - destination buffer size (in bytes)
 183  * @fromlen - source buffer size (in bytes)
 184  * @codepage - codepage to which characters should be converted
 185  * @mapchar - should characters be remapped according to the mapchars option?
 186  *
 187  * Convert a little-endian utf16le string (as sent by the server) to a string
 188  * in the provided codepage. The tolen and fromlen parameters are to ensure
 189  * that the code doesn't walk off of the end of the buffer (which is always
 190  * a danger if the alignment of the source buffer is off). The destination
 191  * string is always properly null terminated and fits in the destination
 192  * buffer. Returns the length of the destination string in bytes (including
 193  * null terminator).
 194  *
 195  * Note that some windows versions actually send multiword UTF-16 characters
 196  * instead of straight UTF16-2. The linux nls routines however aren't able to
 197  * deal with those characters properly. In the event that we get some of
 198  * those characters, they won't be translated properly.
 199  */
 200 int
 201 cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
 202                 const struct nls_table *codepage, int map_type)
 203 {
 204         int i, charlen, safelen;
 205         int outlen = 0;
 206         int nullsize = nls_nullsize(codepage);
 207         int fromwords = fromlen / 2;
 208         char tmp[NLS_MAX_CHARSET_SIZE];
 209         __u16 ftmp;
 210
 211         /*
 212          * because the chars can be of varying widths, we need to take care
 213          * not to overflow the destination buffer when we get close to the
 214          * end of it. Until we get to this offset, we don't need to check
 215          * for overflow however.
 216          */
 217         safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
 218
 219         for (i = 0; i < fromwords; i++) {
 220                 ftmp = get_unaligned_le16(&from[i]);
 221                 if (ftmp == 0)
 222                         break;
 223
 224                 /*
 225                  * check to see if converting this character might make the
 226                  * conversion bleed into the null terminator
 227                  */
 228                 if (outlen >= safelen) {
 229                         charlen = cifs_mapchar(tmp, ftmp, codepage, map_type);
 230                         if ((outlen + charlen) > (tolen - nullsize))
 231                                 break;
 232                 }
 233
 234                 /* put converted char into 'to' buffer */
 235                 charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type);
 236                 outlen += charlen;
 237         }
 238
 239         /* properly null-terminate string */
 240         for (i = 0; i < nullsize; i++)
 241                 to[outlen++] = 0;
 242
 243         return outlen;
 244 }
 245
 246 /*
 247  * NAME:        cifs_strtoUTF16()
 248  *
 249  * FUNCTION:    Convert character string to unicode string
 250  *
 251  */
 252 int
 253 cifs_strtoUTF16(__le16 *to, const char *from, int len,
 254               const struct nls_table *codepage)
 255 {
 256         int charlen;
 257         int i;
 258         wchar_t wchar_to; /* needed to quiet sparse */
 259
 260         /* special case for utf8 to handle no plane0 chars */
 261         if (!strcmp(codepage->charset, "utf8")) {
 262                 /*
 263                  * convert utf8 -> utf16, we assume we have enough space
 264                  * as caller should have assumed conversion does not overflow
 265                  * in destination len is length in wchar_t units (16bits)
 266                  */
 267                 i  = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN,
 268                                        (wchar_t *) to, len);
 269
 270                 /* if success terminate and exit */
 271                 if (i >= 0)
 272                         goto success;
 273                 /*
 274                  * if fails fall back to UCS encoding as this
 275                  * function should not return negative values
 276                  * currently can fail only if source contains
 277                  * invalid encoded characters
 278                  */
 279         }
 280
 281         for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
 282                 charlen = codepage->char2uni(from, len, &wchar_to);
 283                 if (charlen < 1) {
 284                         cifs_dbg(VFS, "strtoUTF16: char2uni of 0x%x returned %d\n",
 285                                  *from, charlen);
 286                         /* A question mark */
 287                         wchar_to = 0x003f;
 288                         charlen = 1;
 289                 }
 290                 put_unaligned_le16(wchar_to, &to[i]);
 291         }
 292
 293 success:
 294         put_unaligned_le16(0, &to[i]);
 295         return i;
 296 }
 297
 298 /*
 299  * cifs_strndup_from_utf16 - copy a string from wire format to the local
 300  * codepage
 301  * @src - source string
 302  * @maxlen - don't walk past this many bytes in the source string
 303  * @is_unicode - is this a unicode string?
 304  * @codepage - destination codepage
 305  *
 306  * Take a string given by the server, convert it to the local codepage and
 307  * put it in a new buffer. Returns a pointer to the new string or NULL on
 308  * error.
 309  */
 310 char *
 311 cifs_strndup_from_utf16(const char *src, const int maxlen,
 312                         const bool is_unicode, const struct nls_table *codepage)
 313 {
 314         int len;
 315         char *dst;
 316
 317         if (is_unicode) {
 318                 len = cifs_utf16_bytes((__le16 *) src, maxlen, codepage);
 319                 len += nls_nullsize(codepage);
 320                 dst = kmalloc(len, GFP_KERNEL);
 321                 if (!dst)
 322                         return NULL;
 323                 cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage,
 324                                NO_MAP_UNI_RSVD);
 325         } else {
 326                 len = strnlen(src, maxlen);
 327                 len++;
 328                 dst = kmalloc(len, GFP_KERNEL);
 329                 if (!dst)
 330                         return NULL;
 331                 strlcpy(dst, src, len);
 332         }
 333
 334         return dst;
 335 }
 336
 337 static __le16 convert_to_sfu_char(char src_char)
 338 {
 339         __le16 dest_char;
 340
 341         switch (src_char) {
 342         case ':':
 343                 dest_char = cpu_to_le16(UNI_COLON);
 344                 break;
 345         case '*':
 346                 dest_char = cpu_to_le16(UNI_ASTERISK);
 347                 break;
 348         case '?':
 349                 dest_char = cpu_to_le16(UNI_QUESTION);
 350                 break;
 351         case '<':
 352                 dest_char = cpu_to_le16(UNI_LESSTHAN);
 353                 break;
 354         case '>':
 355                 dest_char = cpu_to_le16(UNI_GRTRTHAN);
 356                 break;
 357         case '|':
 358                 dest_char = cpu_to_le16(UNI_PIPE);
 359                 break;
 360         default:
 361                 dest_char = 0;
 362         }
 363
 364         return dest_char;
 365 }
 366
 367 static __le16 convert_to_sfm_char(char src_char)
 368 {
 369         __le16 dest_char;
 370
 371         switch (src_char) {
 372         case ':':
 373                 dest_char = cpu_to_le16(SFM_COLON);
 374                 break;
 375         case '*':
 376                 dest_char = cpu_to_le16(SFM_ASTERISK);
 377                 break;
 378         case '?':
 379                 dest_char = cpu_to_le16(SFM_QUESTION);
 380                 break;
 381         case '<':
 382                 dest_char = cpu_to_le16(SFM_LESSTHAN);
 383                 break;
 384         case '>':
 385                 dest_char = cpu_to_le16(SFM_GRTRTHAN);
 386                 break;
 387         case '|':
 388                 dest_char = cpu_to_le16(SFM_PIPE);
 389                 break;
 390         default:
 391                 dest_char = 0;
 392         }
 393
 394         return dest_char;
 395 }
 396
 397 /*
 398  * Convert 16 bit Unicode pathname to wire format from string in current code
 399  * page. Conversion may involve remapping up the six characters that are
 400  * only legal in POSIX-like OS (if they are present in the string). Path
 401  * names are little endian 16 bit Unicode on the wire
 402  */
 403 int
 404 cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
 405                  const struct nls_table *cp, int map_chars)
 406 {
 407         int i, charlen;
 408         int j = 0;
 409         char src_char;
 410         __le16 dst_char;
 411         wchar_t tmp;
 412
 413         if (map_chars == NO_MAP_UNI_RSVD)
 414                 return cifs_strtoUTF16(target, source, PATH_MAX, cp);
 415
 416         for (i = 0; i < srclen; j++) {
 417                 src_char = source[i];
 418                 charlen = 1;
 419
 420                 /* check if end of string */
 421                 if (src_char == 0)
 422                         goto ctoUTF16_out;
 423
 424                 /* see if we must remap this char */
 425                 if (map_chars == SFU_MAP_UNI_RSVD)
 426                         dst_char = convert_to_sfu_char(src_char);
 427                 else if (map_chars == SFM_MAP_UNI_RSVD)
 428                         dst_char = convert_to_sfm_char(src_char);
 429                 else
 430                         dst_char = 0;
 431                 /*
 432                  * FIXME: We can not handle remapping backslash (UNI_SLASH)
 433                  * until all the calls to build_path_from_dentry are modified,
 434                  * as they use backslash as separator.
 435                  */
 436                 if (dst_char == 0) {
 437                         charlen = cp->char2uni(source + i, srclen - i, &tmp);
 438                         dst_char = cpu_to_le16(tmp);
 439
 440                         /*
 441                          * if no match, use question mark, which at least in
 442                          * some cases serves as wild card
 443                          */
 444                         if (charlen < 1) {
 445                                 dst_char = cpu_to_le16(0x003f);
 446                                 charlen = 1;
 447                         }
 448                 }
 449                 /*
 450                  * character may take more than one byte in the source string,
 451                  * but will take exactly two bytes in the target string
 452                  */
 453                 i += charlen;
 454                 put_unaligned(dst_char, &target[j]);
 455         }
 456
 457 ctoUTF16_out:
 458         put_unaligned(0, &target[j]); /* Null terminate target unicode string */
 459         return j;
 460 }
 461
 462 #ifdef CONFIG_CIFS_SMB2
 463 /*
 464  * cifs_local_to_utf16_bytes - how long will a string be after conversion?
 465  * @from - pointer to input string
 466  * @maxbytes - don't go past this many bytes of input string
 467  * @codepage - source codepage
 468  *
 469  * Walk a string and return the number of bytes that the string will
 470  * be after being converted to the given charset, not including any null
 471  * termination required. Don't walk past maxbytes in the source buffer.
 472  */
 473
 474 static int
 475 cifs_local_to_utf16_bytes(const char *from, int len,
 476                           const struct nls_table *codepage)
 477 {
 478         int charlen;
 479         int i;
 480         wchar_t wchar_to;
 481
 482         for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
 483                 charlen = codepage->char2uni(from, len, &wchar_to);
 484                 /* Failed conversion defaults to a question mark */
 485                 if (charlen < 1)
 486                         charlen = 1;
 487         }
 488         return 2 * i; /* UTF16 characters are two bytes */
 489 }
 490
 491 /*
 492  * cifs_strndup_to_utf16 - copy a string to wire format from the local codepage
 493  * @src - source string
 494  * @maxlen - don't walk past this many bytes in the source string
 495  * @utf16_len - the length of the allocated string in bytes (including null)
 496  * @cp - source codepage
 497  * @remap - map special chars
 498  *
 499  * Take a string convert it from the local codepage to UTF16 and
 500  * put it in a new buffer. Returns a pointer to the new string or NULL on
 501  * error.
 502  */
 503 __le16 *
 504 cifs_strndup_to_utf16(const char *src, const int maxlen, int *utf16_len,
 505                       const struct nls_table *cp, int remap)
 506 {
 507         int len;
 508         __le16 *dst;
 509
 510         len = cifs_local_to_utf16_bytes(src, maxlen, cp);
 511         len += 2; /* NULL */
 512         dst = kmalloc(len, GFP_KERNEL);
 513         if (!dst) {
 514                 *utf16_len = 0;
 515                 return NULL;
 516         }
 517         cifsConvertToUTF16(dst, src, strlen(src), cp, remap);
 518         *utf16_len = len;
 519         return dst;
 520 }
 521 #endif /* CONFIG_CIFS_SMB2 */