source3/lib/util_str.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Samba utility functions
   4
   5    Copyright (C) Andrew Tridgell 1992-2001
   6    Copyright (C) Simo Sorce      2001-2002
   7    Copyright (C) Martin Pool     2003
   8    Copyright (C) James Peach     2006
   9    Copyright (C) Jeremy Allison  1992-2007
  10
  11    This program is free software; you can redistribute it and/or modify
  12    it under the terms of the GNU General Public License as published by
  13    the Free Software Foundation; either version 3 of the License, or
  14    (at your option) any later version.
  15
  16    This program is distributed in the hope that it will be useful,
  17    but WITHOUT ANY WARRANTY; without even the implied warranty of
  18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19    GNU General Public License for more details.
  20
  21    You should have received a copy of the GNU General Public License
  22    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  23 */
  24
  25 #include "includes.h"
  26 #include "lib/param/loadparm.h"
  27 #include "lib/util/smb_strtox.h"
  28
  29 static const char toupper_ascii_fast_table[128] = {
  30         0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
  31         0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
  32         0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
  33         0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
  34         0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
  35         0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
  36         0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
  37         0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f
  38 };
  39
  40 /**
  41  * Compare 2 strings up to and including the nth char.
  42  *
  43  * @note The comparison is case-insensitive.
  44  **/
  45 bool strnequal(const char *s1,const char *s2,size_t n)
  46 {
  47         if (s1 == s2)
  48                 return(true);
  49         if (!s1 || !s2 || !n)
  50                 return(false);
  51
  52         return(strncasecmp_m(s1,s2,n)==0);
  53 }
  54
  55 /**
  56  Skip past a string in a buffer. Buffer may not be
  57  null terminated. end_ptr points to the first byte after
  58  then end of the buffer.
  59 **/
  60
  61 char *skip_string(const char *base, size_t len, char *buf)
  62 {
  63         const char *end_ptr = base + len;
  64
  65         if (end_ptr < base || !base || !buf || buf >= end_ptr) {
  66                 return NULL;
  67         }
  68
  69         /* Skip the string */
  70         while (*buf) {
  71                 buf++;
  72                 if (buf >= end_ptr) {
  73                         return NULL;
  74                 }
  75         }
  76         /* Skip the '\0' */
  77         buf++;
  78         return buf;
  79 }
  80
  81 /**
  82  Count the number of characters in a string. Normally this will
  83  be the same as the number of bytes in a string for single byte strings,
  84  but will be different for multibyte.
  85 **/
  86
  87 size_t str_charnum(const char *s)
  88 {
  89         size_t ret, converted_size;
  90         smb_ucs2_t *tmpbuf2 = NULL;
  91         if (!push_ucs2_talloc(talloc_tos(), &tmpbuf2, s, &converted_size)) {
  92                 return 0;
  93         }
  94         ret = strlen_w(tmpbuf2);
  95         TALLOC_FREE(tmpbuf2);
  96         return ret;
  97 }
  98
  99 bool trim_char(char *s,char cfront,char cback)
 100 {
 101         bool ret = false;
 102         char *ep;
 103         char *fp = s;
 104
 105         /* Ignore null or empty strings. */
 106         if (!s || (s[0] == '\0'))
 107                 return false;
 108
 109         if (cfront) {
 110                 while (*fp && *fp == cfront)
 111                         fp++;
 112                 if (!*fp) {
 113                         /* We ate the string. */
 114                         s[0] = '\0';
 115                         return true;
 116                 }
 117                 if (fp != s)
 118                         ret = true;
 119         }
 120
 121         ep = fp + strlen(fp) - 1;
 122         if (cback) {
 123                 /* Attempt ascii only. Bail for mb strings. */
 124                 while ((ep >= fp) && (*ep == cback)) {
 125                         ret = true;
 126                         if ((ep > fp) && (((unsigned char)ep[-1]) & 0x80)) {
 127                                 /* Could be mb... bail back to trim_string. */
 128                                 char fs[2], bs[2];
 129                                 if (cfront) {
 130                                         fs[0] = cfront;
 131                                         fs[1] = '\0';
 132                                 }
 133                                 bs[0] = cback;
 134                                 bs[1] = '\0';
 135                                 return trim_string(s, cfront ? fs : NULL, bs);
 136                         } else {
 137                                 ep--;
 138                         }
 139                 }
 140                 if (ep < fp) {
 141                         /* We ate the string. */
 142                         s[0] = '\0';
 143                         return true;
 144                 }
 145         }
 146
 147         ep[1] = '\0';
 148         memmove(s, fp, ep-fp+2);
 149         return ret;
 150 }
 151
 152 /**
 153  Check if a string is part of a list.
 154 **/
 155
 156 bool in_list(const char *s, const char *list, bool casesensitive)
 157 {
 158         char *tok = NULL;
 159         bool ret = false;
 160         TALLOC_CTX *frame;
 161
 162         if (!list) {
 163                 return false;
 164         }
 165
 166         frame = talloc_stackframe();
 167         while (next_token_talloc(frame, &list, &tok,LIST_SEP)) {
 168                 if (casesensitive) {
 169                         if (strcmp(tok,s) == 0) {
 170                                 ret = true;
 171                                 break;
 172                         }
 173                 } else {
 174                         if (strcasecmp_m(tok,s) == 0) {
 175                                 ret = true;
 176                                 break;
 177                         }
 178                 }
 179         }
 180         TALLOC_FREE(frame);
 181         return ret;
 182 }
 183
 184 /**
 185  Truncate a string at a specified length.
 186 **/
 187
 188 char *string_truncate(char *s, unsigned int length)
 189 {
 190         if (s && strlen(s) > length)
 191                 s[length] = 0;
 192         return s;
 193 }
 194
 195 static bool unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 196 {
 197         size_t size;
 198         smb_ucs2_t *buffer = NULL;
 199         bool ret;
 200
 201         if (!convert_string_talloc(talloc_tos(), CH_UNIX, CH_UTF16LE, src, srclen,
 202                                    (void **)(void *)&buffer, &size))
 203         {
 204                 return false;
 205         }
 206         if (!strlower_w(buffer) && (dest == src)) {
 207                 TALLOC_FREE(buffer);
 208                 return true;
 209         }
 210         ret = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, &size);
 211         TALLOC_FREE(buffer);
 212         return ret;
 213 }
 214
 215 #if 0 /* Alternate function that avoid talloc calls for ASCII and non ASCII */
 216
 217 /**
 218  Convert a string to lower case.
 219 **/
 220 _PUBLIC_ void strlower_m(char *s)
 221 {
 222         char *d;
 223         struct smb_iconv_handle *iconv_handle;
 224
 225         iconv_handle = get_iconv_handle();
 226
 227         d = s;
 228
 229         while (*s) {
 230                 size_t c_size, c_size2;
 231                 codepoint_t c = next_codepoint_handle(iconv_handle, s, &c_size);
 232                 c_size2 = push_codepoint_handle(iconv_handle, d, tolower_m(c));
 233                 if (c_size2 > c_size) {
 234                         DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strlower_m\n",
 235                                  c, tolower_m(c), (int)c_size, (int)c_size2));
 236                         smb_panic("codepoint expansion in strlower_m\n");
 237                 }
 238                 s += c_size;
 239                 d += c_size2;
 240         }
 241         *d = 0;
 242 }
 243
 244 #endif
 245
 246 /**
 247  Convert a string to lower case.
 248 **/
 249
 250 bool strlower_m(char *s)
 251 {
 252         size_t len;
 253         int errno_save;
 254         bool ret = false;
 255
 256         /* this is quite a common operation, so we want it to be
 257            fast. We optimise for the ascii case, knowing that all our
 258            supported multi-byte character sets are ascii-compatible
 259            (ie. they match for the first 128 chars) */
 260
 261         while (*s && !(((unsigned char)s[0]) & 0x80)) {
 262                 *s = tolower_m((unsigned char)*s);
 263                 s++;
 264         }
 265
 266         if (!*s)
 267                 return true;
 268
 269         /* I assume that lowercased string takes the same number of bytes
 270          * as source string even in UTF-8 encoding. (VIV) */
 271         len = strlen(s) + 1;
 272         errno_save = errno;
 273         errno = 0;
 274         ret = unix_strlower(s,len,s,len);
 275         /* Catch mb conversion errors that may not terminate. */
 276         if (errno) {
 277                 s[len-1] = '\0';
 278         }
 279         errno = errno_save;
 280         return ret;
 281 }
 282
 283 static bool unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 284 {
 285         size_t size;
 286         smb_ucs2_t *buffer;
 287         bool ret;
 288
 289         if (!push_ucs2_talloc(talloc_tos(), &buffer, src, &size)) {
 290                 return false;
 291         }
 292
 293         if (!strupper_w(buffer) && (dest == src)) {
 294                 TALLOC_FREE(buffer);
 295                 return true;
 296         }
 297
 298         ret = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, &size);
 299         TALLOC_FREE(buffer);
 300         return ret;
 301 }
 302
 303 #if 0 /* Alternate function that avoid talloc calls for ASCII and non ASCII */
 304
 305 /**
 306  Convert a string to UPPER case.
 307 **/
 308 _PUBLIC_ void strupper_m(char *s)
 309 {
 310         char *d;
 311         struct smb_iconv_handle *iconv_handle;
 312
 313         iconv_handle = get_iconv_handle();
 314
 315         d = s;
 316
 317         while (*s) {
 318                 size_t c_size, c_size2;
 319                 codepoint_t c = next_codepoint_handle(iconv_handle, s, &c_size);
 320                 c_size2 = push_codepoint_handle(iconv_handle, d, toupper_m(c));
 321                 if (c_size2 > c_size) {
 322                         DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strupper_m\n",
 323                                  c, toupper_m(c), (int)c_size, (int)c_size2));
 324                         smb_panic("codepoint expansion in strupper_m\n");
 325                 }
 326                 s += c_size;
 327                 d += c_size2;
 328         }
 329         *d = 0;
 330 }
 331
 332 #endif
 333
 334 /**
 335  Convert a string to upper case.
 336 **/
 337
 338 bool strupper_m(char *s)
 339 {
 340         size_t len;
 341         bool ret = false;
 342
 343         /* this is quite a common operation, so we want it to be
 344            fast. We optimise for the ascii case, knowing that all our
 345            supported multi-byte character sets are ascii-compatible
 346            (ie. they match for the first 128 chars) */
 347
 348         while (*s && !(((unsigned char)s[0]) & 0x80)) {
 349                 *s = toupper_ascii_fast_table[(unsigned char)s[0]];
 350                 s++;
 351         }
 352
 353         if (!*s)
 354                 return true;
 355
 356         /* I assume that uppercased string takes the same number of bytes
 357          * as source string even in multibyte encoding. (VIV) */
 358         len = strlen(s) + 1;
 359         ret = unix_strupper(s,len,s,len);
 360         /* Catch mb conversion errors that may not terminate. */
 361         if (!ret) {
 362                 s[len-1] = '\0';
 363         }
 364         return ret;
 365 }
 366
 367 /**
 368  Just a typesafety wrapper for snprintf into a fstring.
 369 **/
 370
 371 int fstr_sprintf(fstring s, const char *fmt, ...)
 372 {
 373         va_list ap;
 374         int ret;
 375
 376         va_start(ap, fmt);
 377         ret = vsnprintf(s, FSTRING_LEN, fmt, ap);
 378         va_end(ap);
 379         return ret;
 380 }
 381
 382 /* read a SMB_BIG_UINT from a string */
 383 uint64_t STR_TO_SMB_BIG_UINT(const char *nptr, const char **entptr)
 384 {
 385
 386         uint64_t val = (uint64_t)-1;
 387         const char *p = nptr;
 388
 389         if (!p) {
 390                 if (entptr) {
 391                         *entptr = p;
 392                 }
 393                 return val;
 394         }
 395
 396         while (*p && isspace(*p))
 397                 p++;
 398
 399         sscanf(p,"%"SCNu64,&val);
 400         if (entptr) {
 401                 while (*p && isdigit(*p))
 402                         p++;
 403                 *entptr = p;
 404         }
 405
 406         return val;
 407 }
 408
 409 /* Convert a size specification to a count of bytes. We accept the following
 410  * suffixes:
 411  *          bytes if there is no suffix
 412  *      kK  kibibytes
 413  *      mM  mebibytes
 414  *      gG  gibibytes
 415  *      tT  tibibytes
 416  *      pP  whatever the ISO name for petabytes is
 417  *
 418  *  Returns 0 if the string can't be converted.
 419  */
 420 uint64_t conv_str_size(const char * str)
 421 {
 422         uint64_t lval;
 423         char *end;
 424         int error = 0;
 425
 426         if (str == NULL || *str == '\0') {
 427                 return 0;
 428         }
 429
 430         lval = smb_strtoull(str, &end, 10, &error, SMB_STR_STANDARD);
 431
 432         if (error != 0) {
 433                 return 0;
 434         }
 435
 436         if (*end == '\0') {
 437                 return lval;
 438         }
 439
 440         if (strwicmp(end, "K") == 0) {
 441                 lval *= 1024ULL;
 442         } else if (strwicmp(end, "M") == 0) {
 443                 lval *= (1024ULL * 1024ULL);
 444         } else if (strwicmp(end, "G") == 0) {
 445                 lval *= (1024ULL * 1024ULL *
 446                          1024ULL);
 447         } else if (strwicmp(end, "T") == 0) {
 448                 lval *= (1024ULL * 1024ULL *
 449                          1024ULL * 1024ULL);
 450         } else if (strwicmp(end, "P") == 0) {
 451                 lval *= (1024ULL * 1024ULL *
 452                          1024ULL * 1024ULL *
 453                          1024ULL);
 454         } else {
 455                 return 0;
 456         }
 457
 458         return lval;
 459 }
 460
 461 char *talloc_asprintf_strupper_m(TALLOC_CTX *t, const char *fmt, ...)
 462 {
 463         va_list ap;
 464         char *ret;
 465
 466         va_start(ap, fmt);
 467         ret = talloc_vasprintf(t, fmt, ap);
 468         va_end(ap);
 469
 470         if (ret == NULL) {
 471                 return NULL;
 472         }
 473         if (!strupper_m(ret)) {
 474                 TALLOC_FREE(ret);
 475                 return NULL;
 476         }
 477         return ret;
 478 }
 479
 480 char *talloc_asprintf_strlower_m(TALLOC_CTX *t, const char *fmt, ...)
 481 {
 482         va_list ap;
 483         char *ret;
 484
 485         va_start(ap, fmt);
 486         ret = talloc_vasprintf(t, fmt, ap);
 487         va_end(ap);
 488
 489         if (ret == NULL) {
 490                 return NULL;
 491         }
 492         if (!strlower_m(ret)) {
 493                 TALLOC_FREE(ret);
 494                 return NULL;
 495         }
 496         return ret;
 497 }
 498
 499
 500 /********************************************************************
 501  Check a string for any occurrences of a specified list of invalid
 502  characters.
 503 ********************************************************************/
 504
 505 bool validate_net_name( const char *name,
 506                 const char *invalid_chars,
 507                 int max_len)
 508 {
 509         int i;
 510
 511         if (!name) {
 512                 return false;
 513         }
 514
 515         for ( i=0; i<max_len && name[i]; i++ ) {
 516                 /* fail if strchr_m() finds one of the invalid characters */
 517                 if ( name[i] && strchr_m( invalid_chars, name[i] ) ) {
 518                         return false;
 519                 }
 520         }
 521
 522         return true;
 523 }
 524
 525
 526 /*******************************************************************
 527  Add a shell escape character '\' to any character not in a known list
 528  of characters. UNIX charset format.
 529 *******************************************************************/
 530
 531 #define INCLUDE_LIST "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_/ \t.,"
 532 #define INSIDE_DQUOTE_LIST "$`\n\"\\"
 533
 534 char *escape_shell_string(const char *src)
 535 {
 536         size_t srclen = strlen(src);
 537         char *ret = SMB_MALLOC_ARRAY(char, (srclen * 2) + 1);
 538         char *dest = ret;
 539         bool in_s_quote = false;
 540         bool in_d_quote = false;
 541         bool next_escaped = false;
 542
 543         if (!ret) {
 544                 return NULL;
 545         }
 546
 547         while (*src) {
 548                 size_t c_size;
 549                 codepoint_t c = next_codepoint(src, &c_size);
 550
 551                 if (c == INVALID_CODEPOINT) {
 552                         SAFE_FREE(ret);
 553                         return NULL;
 554                 }
 555
 556                 if (c_size > 1) {
 557                         memcpy(dest, src, c_size);
 558                         src += c_size;
 559                         dest += c_size;
 560                         next_escaped = false;
 561                         continue;
 562                 }
 563
 564                 /*
 565                  * Deal with backslash escaped state.
 566                  * This only lasts for one character.
 567                  */
 568
 569                 if (next_escaped) {
 570                         *dest++ = *src++;
 571                         next_escaped = false;
 572                         continue;
 573                 }
 574
 575                 /*
 576                  * Deal with single quote state. The
 577                  * only thing we care about is exiting
 578                  * this state.
 579                  */
 580
 581                 if (in_s_quote) {
 582                         if (*src == '\'') {
 583                                 in_s_quote = false;
 584                         }
 585                         *dest++ = *src++;
 586                         continue;
 587                 }
 588
 589                 /*
 590                  * Deal with double quote state. The most
 591                  * complex state. We must cope with \, meaning
 592                  * possibly escape next char (depending what it
 593                  * is), ", meaning exit this state, and possibly
 594                  * add an \ escape to any unprotected character
 595                  * (listed in INSIDE_DQUOTE_LIST).
 596                  */
 597
 598                 if (in_d_quote) {
 599                         if (*src == '\\') {
 600                                 /*
 601                                  * Next character might be escaped.
 602                                  * We have to peek. Inside double
 603                                  * quotes only INSIDE_DQUOTE_LIST
 604                                  * characters are escaped by a \.
 605                                  */
 606
 607                                 char nextchar;
 608
 609                                 c = next_codepoint(&src[1], &c_size);
 610                                 if (c == INVALID_CODEPOINT) {
 611                                         SAFE_FREE(ret);
 612                                         return NULL;
 613                                 }
 614                                 if (c_size > 1) {
 615                                         /*
 616                                          * Don't escape the next char.
 617                                          * Just copy the \.
 618                                          */
 619                                         *dest++ = *src++;
 620                                         continue;
 621                                 }
 622
 623                                 nextchar = src[1];
 624
 625                                 if (nextchar && strchr(INSIDE_DQUOTE_LIST,
 626                                                         (int)nextchar)) {
 627                                         next_escaped = true;
 628                                 }
 629                                 *dest++ = *src++;
 630                                 continue;
 631                         }
 632
 633                         if (*src == '\"') {
 634                                 /* Exit double quote state. */
 635                                 in_d_quote = false;
 636                                 *dest++ = *src++;
 637                                 continue;
 638                         }
 639
 640                         /*
 641                          * We know the character isn't \ or ",
 642                          * so escape it if it's any of the other
 643                          * possible unprotected characters.
 644                          */
 645
 646                         if (strchr(INSIDE_DQUOTE_LIST, (int)*src)) {
 647                                 *dest++ = '\\';
 648                         }
 649                         *dest++ = *src++;
 650                         continue;
 651                 }
 652
 653                 /*
 654                  * From here to the end of the loop we're
 655                  * not in the single or double quote state.
 656                  */
 657
 658                 if (*src == '\\') {
 659                         /* Next character must be escaped. */
 660                         next_escaped = true;
 661                         *dest++ = *src++;
 662                         continue;
 663                 }
 664
 665                 if (*src == '\'') {
 666                         /* Go into single quote state. */
 667                         in_s_quote = true;
 668                         *dest++ = *src++;
 669                         continue;
 670                 }
 671
 672                 if (*src == '\"') {
 673                         /* Go into double quote state. */
 674                         in_d_quote = true;
 675                         *dest++ = *src++;
 676                         continue;
 677                 }
 678
 679                 /* Check if we need to escape the character. */
 680
 681                 if (!strchr(INCLUDE_LIST, (int)*src)) {
 682                         *dest++ = '\\';
 683                 }
 684                 *dest++ = *src++;
 685         }
 686         *dest++ = '\0';
 687         return ret;
 688 }
 689
 690 /*
 691  * This routine improves performance for operations temporarily acting on a
 692  * full path. It is equivalent to the much more expensive
 693  *
 694  * talloc_asprintf(talloc_tos(), "%s/%s", dir, name)
 695  *
 696  * This actually does make a difference in metadata-heavy workloads (i.e. the
 697  * "standard" client.txt nbench run.
 698  */
 699
 700 ssize_t full_path_tos(const char *dir, const char *name,
 701                       char *tmpbuf, size_t tmpbuf_len,
 702                       char **pdst, char **to_free)
 703 {
 704         size_t dirlen, namelen, len;
 705         char *dst;
 706
 707         dirlen = strlen(dir);
 708         namelen = strlen(name);
 709         len = dirlen + namelen + 1;
 710
 711         if (len < tmpbuf_len) {
 712                 dst = tmpbuf;
 713                 *to_free = NULL;
 714         } else {
 715                 dst = talloc_array(talloc_tos(), char, len+1);
 716                 if (dst == NULL) {
 717                         return -1;
 718                 }
 719                 *to_free = dst;
 720         }
 721
 722         memcpy(dst, dir, dirlen);
 723         dst[dirlen] = '/';
 724         memcpy(dst+dirlen+1, name, namelen+1);
 725         *pdst = dst;
 726         return len;
 727 }