lib/util/charset/util_unistr.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Samba utility functions
   4    Copyright (C) Andrew Tridgell 1992-2001
   5    Copyright (C) Simo Sorce 2001
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21 #include "replace.h"
  22 #include "system/locale.h"
  23 #include "charset.h"
  24 #include "lib/util/byteorder.h"
  25 #include "lib/util/fault.h"
  26 #include "lib/util/tsort.h"
  27
  28 /**
  29  String replace.
  30  NOTE: oldc and newc must be 7 bit characters
  31 **/
  32 _PUBLIC_ void string_replace_m(char *s, char oldc, char newc)
  33 {
  34         struct smb_iconv_handle *ic = get_iconv_handle();
  35         while (s && *s) {
  36                 size_t size;
  37                 codepoint_t c = next_codepoint_handle(ic, s, &size);
  38                 if (c == oldc) {
  39                         *s = newc;
  40                 }
  41                 s += size;
  42         }
  43 }
  44
  45 /**
  46  Convert a string to lower case, allocated with talloc
  47 **/
  48 _PUBLIC_ char *strlower_talloc_handle(struct smb_iconv_handle *iconv_handle,
  49                                       TALLOC_CTX *ctx, const char *src)
  50 {
  51         size_t size=0;
  52         char *dest;
  53
  54         if(src == NULL) {
  55                 return NULL;
  56         }
  57
  58         /* this takes advantage of the fact that upper/lower can't
  59            change the length of a character by more than 1 byte */
  60         dest = talloc_array(ctx, char, 2*(strlen(src))+1);
  61         if (dest == NULL) {
  62                 return NULL;
  63         }
  64
  65         while (*src) {
  66                 size_t c_size;
  67                 codepoint_t c = next_codepoint_handle(iconv_handle, src, &c_size);
  68                 src += c_size;
  69
  70                 c = tolower_m(c);
  71
  72                 c_size = push_codepoint_handle(iconv_handle, dest+size, c);
  73                 if (c_size == -1) {
  74                         talloc_free(dest);
  75                         return NULL;
  76                 }
  77                 size += c_size;
  78         }
  79
  80         dest[size] = 0;
  81
  82         /* trim it so talloc_append_string() works */
  83         dest = talloc_realloc(ctx, dest, char, size+1);
  84
  85         talloc_set_name_const(dest, dest);
  86
  87         return dest;
  88 }
  89
  90 _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
  91 {
  92         struct smb_iconv_handle *iconv_handle = get_iconv_handle();
  93         return strlower_talloc_handle(iconv_handle, ctx, src);
  94 }
  95
  96 /**
  97  Convert a string to UPPER case, allocated with talloc
  98  source length limited to n bytes, iconv handle supplied
  99 **/
 100 _PUBLIC_ char *strupper_talloc_n_handle(struct smb_iconv_handle *iconv_handle,
 101                                         TALLOC_CTX *ctx, const char *src, size_t n)
 102 {
 103         size_t size=0;
 104         char *dest;
 105
 106         if (!src) {
 107                 return NULL;
 108         }
 109
 110         /* this takes advantage of the fact that upper/lower can't
 111            change the length of a character by more than 1 byte */
 112         dest = talloc_array(ctx, char, 2*(n+1));
 113         if (dest == NULL) {
 114                 return NULL;
 115         }
 116
 117         while (n && *src) {
 118                 size_t c_size;
 119                 codepoint_t c = next_codepoint_handle_ext(iconv_handle, src, n,
 120                                                           CH_UNIX, &c_size);
 121                 src += c_size;
 122                 n -= c_size;
 123
 124                 c = toupper_m(c);
 125
 126                 c_size = push_codepoint_handle(iconv_handle, dest+size, c);
 127                 if (c_size == -1) {
 128                         talloc_free(dest);
 129                         return NULL;
 130                 }
 131                 size += c_size;
 132         }
 133
 134         dest[size] = 0;
 135
 136         /* trim it so talloc_append_string() works */
 137         dest = talloc_realloc(ctx, dest, char, size+1);
 138
 139         talloc_set_name_const(dest, dest);
 140
 141         return dest;
 142 }
 143
 144 /**
 145  Convert a string to UPPER case, allocated with talloc
 146  source length limited to n bytes
 147 **/
 148 _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
 149 {
 150         struct smb_iconv_handle *iconv_handle = get_iconv_handle();
 151         return strupper_talloc_n_handle(iconv_handle, ctx, src, n);
 152 }
 153 /**
 154  Convert a string to UPPER case, allocated with talloc
 155 **/
 156 _PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
 157 {
 158         return strupper_talloc_n(ctx, src, src?strlen(src):0);
 159 }
 160
 161 /**
 162  talloc_strdup() a unix string to upper case.
 163 **/
 164 _PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src)
 165 {
 166         return strupper_talloc(ctx, src);
 167 }
 168
 169
 170 /*
 171  * strncasecmp_ldb() works like a *bit* like strncasecmp, with various
 172  * tricks to suit the way LDB compares strings. The differences are:
 173  *
 174  * 0. each string has it's own length.
 175  *
 176  * 1. consecutive spaces are collapsed down to one space, so that
 177  *    "a  b" equals "a b". (this is why each string needs its own
 178  *    length). Leading and trailing spaces are removed altogether.
 179  *
 180  * 2. Comparisons are done in UPPER CASE, as Windows does, not in
 181  *    lowercase as POSIX would have it.
 182  *
 183  * 3. An invalid byte compares higher than any real character. For example,
 184  *    "hello\xc2\xff" would sort higher than "hello\xcd\xb6", because CD
 185  *    B6 is a valid sequence and C2 FF is not.
 186  *
 187  * 4. If two strings become invalid on the same character, the rest
 188  *    of the string is compared via ldb ASCII case fold rules.
 189  *
 190  *    For example, "hellō\xC2\xFFworld" < " hElLŌ\xFE ", because the
 191  *    strings are equal up to 'ō' by utf-8 casefold, but the "\xc2\xff" and
 192  *    "\xfe" are invalid sequences. At that point, we skip to the byte-by-byte
 193  *    (but space-eating, casefolding) comparison, and 0xc2 < 0xff.
 194  */
 195
 196 #define EAT_SPACE(s, len, ends_in_space)                         \
 197         do {                                                     \
 198                 while (len) {                                    \
 199                         if (*s != ' ') {                         \
 200                                 break;                           \
 201                         }                                        \
 202                         s++;                                     \
 203                         len--;                                   \
 204                 }                                                \
 205                 ends_in_space = (len == 0 || *s == '\0');        \
 206         } while(0)
 207
 208
 209 _PUBLIC_ int strncasecmp_ldb(const char *s1,
 210                              size_t len1,
 211                              const char *s2,
 212                              size_t len2)
 213 {
 214         struct smb_iconv_handle *iconv_handle = get_iconv_handle();
 215         codepoint_t c1, c2;
 216         size_t cs1, cs2;
 217         bool ends_in_space1, ends_in_space2;
 218         int ret;
 219         bool end1, end2;
 220
 221         EAT_SPACE(s1, len1, ends_in_space1);
 222         EAT_SPACE(s2, len2, ends_in_space2);
 223         /*
 224          * if ends_in_space was set, the string was empty or only
 225          * spaces (which we treat as equivalent).
 226          */
 227         if (ends_in_space1 && ends_in_space2) {
 228                 return 0;
 229         }
 230         if (ends_in_space1) {
 231                 return -1;
 232         }
 233         if (ends_in_space2) {
 234                 return 1;
 235         }
 236
 237         while (true) {
 238                 /*
 239                  * If the next byte is a space, we eat all the spaces,
 240                  * and say we found a single codepoint. If the spaces
 241                  * were at the end of the string, the codepoint is 0,
 242                  * as if there were no spaces. Otherwise it is 0x20,
 243                  * as if there was one space.
 244                  *
 245                  * Setting the codepoint to 0 will break the loop, but
 246                  * only after codepoints have been found in both strings.
 247                  */
 248                 if (len1 == 0 || *s1 == 0) {
 249                         c1 = 0;
 250                 } else if (*s1 == ' ') {
 251                         EAT_SPACE(s1, len1, ends_in_space1);
 252                         c1 = ends_in_space1 ? 0 : ' ';
 253                 } else if ((*s1 & 0x80) == 0) {
 254                         c1 = *s1;
 255                         s1++;
 256                         len1--;
 257                 } else {
 258                         c1 = next_codepoint_handle_ext(iconv_handle, s1, len1,
 259                                                        CH_UNIX, &cs1);
 260                         if (c1 != INVALID_CODEPOINT) {
 261                                 s1 += cs1;
 262                                 len1 -= cs1;
 263                         }
 264                 }
 265
 266                 if (len2 == 0 || *s2 == 0) {
 267                         c2 = 0;
 268                 } else if (*s2 == ' ') {
 269                         EAT_SPACE(s2, len2, ends_in_space2);
 270                         c2 = ends_in_space2 ? 0 : ' ';
 271                 } else if ((*s2 & 0x80) == 0) {
 272                         c2 = *s2;
 273                         s2++;
 274                         len2--;
 275                 } else {
 276                         c2 = next_codepoint_handle_ext(iconv_handle, s2, len2,
 277                                                        CH_UNIX, &cs2);
 278                         if (c2 != INVALID_CODEPOINT) {
 279                                 s2 += cs2;
 280                                 len2 -= cs2;
 281                         }
 282                 }
 283
 284                 if (c1 == 0 || c2 == 0 ||
 285                     c1 == INVALID_CODEPOINT || c2 == INVALID_CODEPOINT) {
 286                         break;
 287                 }
 288
 289                 if (c1 == c2) {
 290                         continue;
 291                 }
 292                 c1 = toupper_m(c1);
 293                 c2 = toupper_m(c2);
 294                 if (c1 != c2) {
 295                         break;
 296                 }
 297         }
 298
 299         /*
 300          * Either a difference has been found, or one or both strings have
 301          * ended or hit invalid codepoints.
 302          */
 303         ret = NUMERIC_CMP(c1, c2);
 304
 305         if (ret != 0) {
 306                 return ret;
 307         }
 308         /*
 309          * the strings are equal up to here, but one might be longer.
 310          */
 311         end1 = len1 == 0 || *s1 == 0;
 312         end2 = len2 == 0 || *s2 == 0;
 313
 314         if (end1 && end2) {
 315                 return 0;
 316         }
 317         if (end1) {
 318                 return -1;
 319         }
 320         if (end2) {
 321                 return -1;
 322         }
 323
 324         /*
 325          * By elimination, if we got here, we have INVALID_CODEPOINT on both
 326          * sides.
 327          *
 328          * THere is no perfect option, but what we choose to do is continue on
 329          * with ascii case fold (as if calling ldb_comparison_fold_ascii()
 330          * which is private to ldb, so we can't just defer to it).
 331          */
 332         while (true) {
 333                 if (len1 == 0 || *s1 == 0) {
 334                         c1 = 0;
 335                 } else if (*s1 == ' ') {
 336                         EAT_SPACE(s1, len1, ends_in_space1);
 337                         c1 = ends_in_space1 ? 0 : ' ';
 338                 } else {
 339                         c1 = *s1;
 340                         s1++;
 341                         len1--;
 342                         c1 = ('a' <= c1 && c1 <= 'z') ? c1 ^ 0x20 : c1;
 343                 }
 344
 345                 if (len2 == 0 || *s2 == 0) {
 346                         c2 = 0;
 347                 } else if (*s2 == ' ') {
 348                         EAT_SPACE(s2, len2, ends_in_space2);
 349                         c2 = ends_in_space2 ? 0 : ' ';
 350                 } else {
 351                         c2 = *s2;
 352                         s2++;
 353                         len2--;
 354                         c2 = ('a' <= c2 && c2 <= 'z') ? c2 ^ 0x20 : c2;
 355                 }
 356
 357                 if (c1 == 0 || c2 == 0 || c1 != c2) {
 358                         break;
 359                 }
 360         }
 361         return NUMERIC_CMP(c1, c2);
 362 }
 363
 364 #undef EAT_SPACE
 365
 366
 367 /**
 368  Find the number of 'c' chars in a string
 369 **/
 370 _PUBLIC_ size_t count_chars_m(const char *s, char c)
 371 {
 372         struct smb_iconv_handle *ic = get_iconv_handle();
 373         size_t count = 0;
 374
 375         while (*s) {
 376                 size_t size;
 377                 codepoint_t c2 = next_codepoint_handle(ic, s, &size);
 378                 if (c2 == c) count++;
 379                 s += size;
 380         }
 381
 382         return count;
 383 }
 384
 385 size_t ucs2_align(const void *base_ptr, const void *p, int flags)
 386 {
 387         if (flags & (STR_NOALIGN|STR_ASCII)) {
 388                 return 0;
 389         }
 390         return PTR_DIFF(p, base_ptr) & 1;
 391 }
 392
 393 /**
 394 return the number of bytes occupied by a buffer in CH_UTF16 format
 395 **/
 396 size_t utf16_len(const void *buf)
 397 {
 398         size_t len;
 399
 400         for (len = 0; PULL_LE_U16(buf,len); len += 2) ;
 401
 402         return len;
 403 }
 404
 405 /**
 406 return the number of bytes occupied by a buffer in CH_UTF16 format
 407 the result includes the null termination
 408 **/
 409 size_t utf16_null_terminated_len(const void *buf)
 410 {
 411         return utf16_len(buf) + 2;
 412 }
 413
 414 /**
 415 return the number of bytes occupied by a buffer in CH_UTF16 format
 416 limited by 'n' bytes
 417 **/
 418 size_t utf16_len_n(const void *src, size_t n)
 419 {
 420         size_t len;
 421
 422         for (len = 0; (len+2 <= n) && PULL_LE_U16(src, len); len += 2) ;
 423
 424         return len;
 425 }
 426
 427 /**
 428 return the number of bytes occupied by a buffer in CH_UTF16 format
 429 the result includes the null termination
 430 limited by 'n' bytes
 431 **/
 432 size_t utf16_null_terminated_len_n(const void *src, size_t n)
 433 {
 434         size_t len;
 435
 436         len = utf16_len_n(src, n);
 437
 438         if (len+2 <= n) {
 439                 len += 2;
 440         }
 441
 442         return len;
 443 }
 444
 445 unsigned char *talloc_utf16_strlendup(TALLOC_CTX *mem_ctx, const char *str, size_t len)
 446 {
 447         unsigned char *new_str = NULL;
 448
 449         /* Check for overflow. */
 450         if (len > SIZE_MAX - 2) {
 451                 return NULL;
 452         }
 453
 454         /*
 455          * Allocate the new string, including space for the
 456          * UTF‐16 null terminator.
 457          */
 458         new_str = talloc_size(mem_ctx, len + 2);
 459         if (new_str == NULL) {
 460                 return NULL;
 461         }
 462
 463         memcpy(new_str, str, len);
 464
 465         /*
 466          * Ensure that the UTF‐16 string is
 467          * null‐terminated.
 468          */
 469         new_str[len] = '\0';
 470         new_str[len + 1] = '\0';
 471
 472         return new_str;
 473 }
 474
 475 unsigned char *talloc_utf16_strdup(TALLOC_CTX *mem_ctx, const char *str)
 476 {
 477         if (str == NULL) {
 478                 return NULL;
 479         }
 480         return talloc_utf16_strlendup(mem_ctx, str, utf16_len(str));
 481 }
 482
 483 unsigned char *talloc_utf16_strndup(TALLOC_CTX *mem_ctx, const char *str, size_t n)
 484 {
 485         if (str == NULL) {
 486                 return NULL;
 487         }
 488         return talloc_utf16_strlendup(mem_ctx, str, utf16_len_n(str, n));
 489 }
 490
 491 /**
 492  * Determine the length and validity of a utf-8 string.
 493  *
 494  * @param input the string pointer
 495  * @param maxlen maximum size of the string
 496  * @param byte_len receives the length of the valid section
 497  * @param char_len receives the number of unicode characters in the valid section
 498  * @param utf16_len receives the number of bytes the string would need in UTF16 encoding.
 499  *
 500  * @return true if the input is valid up to maxlen, or a '\0' byte, otherwise false.
 501  */
 502 bool utf8_check(const char *input, size_t maxlen,
 503                 size_t *byte_len,
 504                 size_t *char_len,
 505                 size_t *utf16_len)
 506 {
 507         const uint8_t *s = (const uint8_t *)input;
 508         size_t i;
 509         size_t chars = 0;
 510         size_t long_chars = 0;
 511         uint32_t codepoint;
 512         uint8_t a, b, c, d;
 513         for (i = 0; i < maxlen; i++, chars++) {
 514                 if (s[i] == 0) {
 515                         break;
 516                 }
 517                 if (s[i] < 0x80) {
 518                         continue;
 519                 }
 520                 if ((s[i] & 0xe0) == 0xc0) {
 521                         /* 110xxxxx 10xxxxxx */
 522                         a = s[i];
 523                         if (maxlen - i < 2) {
 524                                 goto error;
 525                         }
 526                         b = s[i + 1];
 527                         if ((b & 0xc0) != 0x80) {
 528                                 goto error;
 529                         }
 530                         codepoint = (a & 31) << 6 | (b & 63);
 531                         if (codepoint < 0x80) {
 532                                 goto error;
 533                         }
 534                         i++;
 535                         continue;
 536                 }
 537                 if ((s[i] & 0xf0) == 0xe0) {
 538                         /* 1110xxxx 10xxxxxx 10xxxxxx */
 539                         if (maxlen - i < 3) {
 540                                 goto error;
 541                         }
 542                         a = s[i];
 543                         b = s[i + 1];
 544                         c = s[i + 2];
 545                         if ((b & 0xc0) != 0x80 || (c & 0xc0) != 0x80) {
 546                                 goto error;
 547                         }
 548                         codepoint = (c & 63) | (b & 63) << 6 | (a & 15) << 12;
 549
 550                         if (codepoint < 0x800) {
 551                                 goto error;
 552                         }
 553                         if (codepoint >= 0xd800 && codepoint <= 0xdfff) {
 554                                 /*
 555                                  * This is an invalid codepoint, per
 556                                  * RFC3629, as it encodes part of a
 557                                  * UTF-16 surrogate pair for a
 558                                  * character over U+10000, which ought
 559                                  * to have been encoded as a four byte
 560                                  * utf-8 sequence.
 561                                  */
 562                                 goto error;
 563                         }
 564                         i += 2;
 565                         continue;
 566                 }
 567
 568                 if ((s[i] & 0xf8) == 0xf0) {
 569                         /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
 570                         if (maxlen - i < 4) {
 571                                 goto error;
 572                         }
 573                         a = s[i];
 574                         b = s[i + 1];
 575                         c = s[i + 2];
 576                         d = s[i + 3];
 577
 578                         if ((b & 0xc0) != 0x80 ||
 579                             (c & 0xc0) != 0x80 ||
 580                             (d & 0xc0) != 0x80) {
 581                                 goto error;
 582                         }
 583                         codepoint = (d & 63) | (c & 63) << 6 | (b & 63) << 12 | (a & 7) << 18;
 584
 585                         if (codepoint < 0x10000 || codepoint > 0x10ffff) {
 586                                 goto error;
 587                         }
 588                         /* this one will need two UTF16 characters */
 589                         long_chars++;
 590                         i += 3;
 591                         continue;
 592                 }
 593                 /*
 594                  * If it wasn't handled yet, it's wrong.
 595                  */
 596                 goto error;
 597         }
 598         *byte_len = i;
 599         *char_len = chars;
 600         *utf16_len = chars + long_chars;
 601         return true;
 602
 603 error:
 604         *byte_len = i;
 605         *char_len = chars;
 606         *utf16_len = chars + long_chars;
 607         return false;
 608 }
 609
 610
 611 /**
 612  * Copy a string from a char* unix src to a dos codepage string destination.
 613  *
 614  * @converted_size the number of bytes occupied by the string in the destination.
 615  * @return bool true if success.
 616  *
 617  * @param flags can include
 618  * <dl>
 619  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
 620  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
 621  * </dl>
 622  *
 623  * @param dest_len the maximum length in bytes allowed in the
 624  * destination.  If @p dest_len is -1 then no maximum is used.
 625  **/
 626 static bool push_ascii_string(void *dest, const char *src, size_t dest_len, int flags, size_t *converted_size)
 627 {
 628         size_t src_len;
 629         bool ret;
 630
 631         if (flags & STR_UPPER) {
 632                 char *tmpbuf = strupper_talloc(NULL, src);
 633                 if (tmpbuf == NULL) {
 634                         return false;
 635                 }
 636                 ret = push_ascii_string(dest, tmpbuf, dest_len, flags & ~STR_UPPER, converted_size);
 637                 talloc_free(tmpbuf);
 638                 return ret;
 639         }
 640
 641         src_len = strlen(src);
 642
 643         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
 644                 src_len++;
 645
 646         return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, converted_size);
 647 }
 648
 649 /**
 650  * Copy a string from a dos codepage source to a unix char* destination.
 651  *
 652  * The resulting string in "dest" is always null terminated.
 653  *
 654  * @param flags can have:
 655  * <dl>
 656  * <dt>STR_TERMINATE</dt>
 657  * <dd>STR_TERMINATE means the string in @p src
 658  * is null terminated, and src_len is ignored.</dd>
 659  * </dl>
 660  *
 661  * @param src_len is the length of the source area in bytes.
 662  * @returns the number of bytes occupied by the string in @p src.
 663  **/
 664 static ssize_t pull_ascii_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 665 {
 666         size_t size = 0;
 667
 668         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
 669                 if (src_len == (size_t)-1) {
 670                         src_len = strlen((const char *)src) + 1;
 671                 } else {
 672                         size_t len = strnlen((const char *)src, src_len);
 673                         if (len < src_len)
 674                                 len++;
 675                         src_len = len;
 676                 }
 677         }
 678
 679         /* We're ignoring the return here.. */
 680         (void)convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, &size);
 681
 682         if (dest_len)
 683                 dest[MIN(size, dest_len-1)] = 0;
 684
 685         return src_len;
 686 }
 687
 688 /**
 689  * Copy a string from a char* src to a unicode destination.
 690  *
 691  * @returns the number of bytes occupied by the string in the destination.
 692  *
 693  * @param flags can have:
 694  *
 695  * <dl>
 696  * <dt>STR_TERMINATE <dd>means include the null termination.
 697  * <dt>STR_UPPER     <dd>means uppercase in the destination.
 698  * <dt>STR_NOALIGN   <dd>means don't do alignment.
 699  * </dl>
 700  *
 701  * @param dest_len is the maximum length allowed in the
 702  * destination. If dest_len is -1 then no maximum is used.
 703  **/
 704 static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
 705 {
 706         size_t len=0;
 707         size_t src_len = strlen(src);
 708         size_t size = 0;
 709         bool ret;
 710
 711         if (flags & STR_UPPER) {
 712                 char *tmpbuf = strupper_talloc(NULL, src);
 713                 ssize_t retval;
 714                 if (tmpbuf == NULL) {
 715                         return -1;
 716                 }
 717                 retval = push_ucs2(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
 718                 talloc_free(tmpbuf);
 719                 return retval;
 720         }
 721
 722         if (flags & STR_TERMINATE)
 723                 src_len++;
 724
 725         if (ucs2_align(NULL, dest, flags)) {
 726                 *(char *)dest = 0;
 727                 dest = (void *)((char *)dest + 1);
 728                 if (dest_len) dest_len--;
 729                 len++;
 730         }
 731
 732         /* ucs2 is always a multiple of 2 bytes */
 733         dest_len &= ~1;
 734
 735         ret = convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len, &size);
 736         if (ret == false) {
 737                 return 0;
 738         }
 739
 740         len += size;
 741
 742         return (ssize_t)len;
 743 }
 744
 745
 746 /**
 747  Copy a string from a ucs2 source to a unix char* destination.
 748  Flags can have:
 749   STR_TERMINATE means the string in src is null terminated.
 750   STR_NOALIGN   means don't try to align.
 751  if STR_TERMINATE is set then src_len is ignored if it is -1.
 752  src_len is the length of the source area in bytes
 753  Return the number of bytes occupied by the string in src.
 754  The resulting string in "dest" is always null terminated.
 755 **/
 756
 757 static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 758 {
 759         size_t size = 0;
 760
 761         if (ucs2_align(NULL, src, flags)) {
 762                 src = (const void *)((const char *)src + 1);
 763                 if (src_len > 0)
 764                         src_len--;
 765         }
 766
 767         if (flags & STR_TERMINATE) {
 768                 if (src_len == (size_t)-1) {
 769                         src_len = utf16_null_terminated_len(src);
 770                 } else {
 771                         src_len = utf16_null_terminated_len_n(src, src_len);
 772                 }
 773         }
 774
 775         /* ucs2 is always a multiple of 2 bytes */
 776         if (src_len != (size_t)-1)
 777                 src_len &= ~1;
 778
 779         /* We're ignoring the return here.. */
 780         (void)convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len, &size);
 781         if (dest_len)
 782                 dest[MIN(size, dest_len-1)] = 0;
 783
 784         return src_len;
 785 }
 786
 787 /**
 788  Copy a string from a char* src to a unicode or ascii
 789  dos codepage destination choosing unicode or ascii based on the
 790  flags in the SMB buffer starting at base_ptr.
 791  Return the number of bytes occupied by the string in the destination.
 792  flags can have:
 793   STR_TERMINATE means include the null termination.
 794   STR_UPPER     means uppercase in the destination.
 795   STR_ASCII     use ascii even with unicode packet.
 796   STR_NOALIGN   means don't do alignment.
 797  dest_len is the maximum length allowed in the destination. If dest_len
 798  is -1 then no maximum is used.
 799 **/
 800
 801 _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
 802 {
 803         if (flags & STR_ASCII) {
 804                 size_t size = 0;
 805                 if (push_ascii_string(dest, src, dest_len, flags, &size)) {
 806                         return (ssize_t)size;
 807                 } else {
 808                         return (ssize_t)-1;
 809                 }
 810         } else if (flags & STR_UNICODE) {
 811                 return push_ucs2(dest, src, dest_len, flags);
 812         } else {
 813                 smb_panic("push_string requires either STR_ASCII or STR_UNICODE flag to be set");
 814                 return -1;
 815         }
 816 }
 817
 818
 819 /**
 820  Copy a string from a unicode or ascii source (depending on
 821  the packet flags) to a char* destination.
 822  Flags can have:
 823   STR_TERMINATE means the string in src is null terminated.
 824   STR_UNICODE   means to force as unicode.
 825   STR_ASCII     use ascii even with unicode packet.
 826   STR_NOALIGN   means don't do alignment.
 827  if STR_TERMINATE is set then src_len is ignored is it is -1
 828  src_len is the length of the source area in bytes.
 829  Return the number of bytes occupied by the string in src.
 830  The resulting string in "dest" is always null terminated.
 831 **/
 832
 833 _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 834 {
 835         if (flags & STR_ASCII) {
 836                 return pull_ascii_string(dest, src, dest_len, src_len, flags);
 837         } else if (flags & STR_UNICODE) {
 838                 return pull_ucs2(dest, src, dest_len, src_len, flags);
 839         } else {
 840                 smb_panic("pull_string requires either STR_ASCII or STR_UNICODE flag to be set");
 841                 return -1;
 842         }
 843 }