src/util/conv.c

   1 /** Conversion functions
   2  * @file */
   3
   4 #ifdef HAVE_CONFIG_H
   5 #include "config.h"
   6 #endif
   7
   8 #include <ctype.h>
   9 #include <errno.h>
  10 #ifdef HAVE_LIMITS_H
  11 #include <limits.h>
  12 #endif
  13 #include <stdlib.h>
  14 #include <string.h>
  15
  16 #include "elinks.h"
  17
  18 #include "intl/charsets.h" /* NBSP_CHAR */
  19 #include "util/conv.h"
  20 #include "util/error.h"
  21 #include "util/string.h"
  22 #include "util/time.h"
  23
  24
  25
  26 /** This function takes string @a s and stores the @a number (of a
  27  * result width @a width) in string format there, starting at position
  28  * [*@a slen]. If the number would take more space than @a width, it
  29  * is truncated and only the _last_ digits of it are inserted to the
  30  * string. If the number takes less space than @a width, it is padded
  31  * by @a fillchar from left.
  32  * @a base defined which base should be used (10, 16, 8, 2, ...)
  33  * @a upper selects either hexa uppercased chars or lowercased chars.
  34  *
  35  * A NUL char is always added at the end of the string. @a s must point
  36  * to a sufficiently large memory space, at least *@a slen + @a width + 1.
  37  *
  38  * Examples:
  39  *
  40  * @code
  41  * elinks_ulongcat(s, NULL, 12345, 4, 0, 10, 0) : s = "2345"
  42  * elinks_ulongcat(s, NULL, 255, 4, '*', 16, 1) : s = "**FF"
  43  * elinks_ulongcat(s, NULL, 123, 5, '0', 10, 0) : s = "00123"
  44  * @endcode
  45  *
  46  * Note that this function exists to provide a fast and efficient, however
  47  * still quite powerful alternative to sprintf(). It is optimized for speed and
  48  * is *MUCH* faster than sprintf(). If you can use it, use it ;-). But do not
  49  * get too enthusiastic, do not use it in cases where it would break i18n.
  50  *
  51  * @returns 0 if OK or width needed for the whole number to fit there,
  52  * if it had to be truncated. A negative value signs an error. */
  53 NONSTATIC_INLINE int
  54 elinks_ulongcat(unsigned char *s, unsigned int *slen,
  55                 unsigned long long number, unsigned int width,
  56                 unsigned char fillchar, unsigned int base,
  57                 unsigned int upper)
  58 {
  59         static const unsigned char unum[]= "0123456789ABCDEF";
  60         static const unsigned char lnum[]= "0123456789abcdef";
  61         const unsigned char *to_num = (upper ? unum : lnum);
  62         unsigned int start = slen ? *slen : 0;
  63         unsigned int nlen = 1; /* '0' is one char, we can't have less. */
  64         unsigned int pos = start; /* starting position of the number */
  65         unsigned long long q = number;
  66         int ret = 0;
  67
  68         if (width < 1 || !s || base < 2 || base > 16) return -1;
  69
  70         /* Count the length of the number in chars. */
  71         while (q > (base - 1)) {
  72                 nlen++;
  73                 q /= base;
  74         }
  75
  76         /* If max. width attained, truncate. */
  77         if (nlen > width) {
  78                 ret = nlen;
  79                 nlen = width;
  80         }
  81
  82         if (slen) *slen += nlen;
  83
  84         /* Fill left space with fillchar. */
  85         if (fillchar) {
  86                 /* ie. width = 4 nlen = 2 -> pad = 2 */
  87                 unsigned int pad = width - nlen;
  88
  89                 if (pad > 0) {
  90                         /* Relocate the start of number. */
  91                         if (slen) *slen += pad;
  92                         pos += pad;
  93
  94                         /* Pad. */
  95                         while (pad > 0) s[--pad + start] = fillchar;
  96                 }
  97         }
  98
  99         s[pos + nlen] = '\0';
 100
 101         /* Now write number starting from end. */
 102         while (nlen > 0) {
 103                 s[--nlen + pos] = to_num[(number % base)];
 104                 number /= base;
 105         }
 106
 107         return ret;
 108 }
 109
 110 /** Similar to elinks_ulongcat() but for @c long number. */
 111 NONSTATIC_INLINE int
 112 elinks_longcat(unsigned char *s, unsigned int *slen,
 113                long long number, unsigned int width,
 114                unsigned char fillchar, unsigned int base,
 115                unsigned int upper)
 116 {
 117         unsigned char *p = s;
 118
 119         if (number < 0 && width > 0) {
 120                 if (slen) p[(*slen)++] = '-';
 121                 else *(p++) = '-';
 122                 number = -number;
 123                 width--;
 124         }
 125
 126         return elinks_ulongcat(p, slen, number, width, fillchar, base, upper);
 127 }
 128
 129
 130 /** @relates string */
 131 struct string *
 132 add_long_to_string(struct string *string, long long number)
 133 {
 134         unsigned char buffer[64];
 135         int length = 0;
 136         int width;
 137
 138         assert(string);
 139         if_assert_failed { return NULL; }
 140
 141         width = longcat(buffer, &length, number, sizeof(buffer) - 1, 0);
 142         if (width < 0 || !length) return NULL;
 143
 144         return add_bytes_to_string(string, buffer, length);
 145 }
 146
 147 /** @relates string */
 148 struct string *
 149 add_knum_to_string(struct string *string, long long num)
 150 {
 151         int ret;
 152         unsigned char t[64];
 153         int tlen = 0;
 154
 155         if (num && (num / (1024 * 1024)) * (1024 * 1024) == num) {
 156                 ret = longcat(&t, &tlen, num / (1024 * 1024), sizeof(t) - 2, 0);
 157                 t[tlen++] = 'M';
 158                 t[tlen] = '\0';
 159         } else if (num && (num / 1024) * 1024 == num) {
 160                 ret = longcat(&t, &tlen, num / 1024, sizeof(t) - 2, 0);
 161                 t[tlen++] = 'k';
 162                 t[tlen] = '\0';
 163         } else {
 164                 ret = longcat(&t, &tlen, num, sizeof(t) - 1, 0);
 165         }
 166
 167         if (ret < 0 || !tlen) return NULL;
 168
 169         add_bytes_to_string(string, t, tlen);
 170
 171         return string;
 172 }
 173
 174 /** @relates string */
 175 struct string *
 176 add_xnum_to_string(struct string *string, long long xnum)
 177 {
 178         unsigned char suff[3] = "\0i";
 179         off_t d = -1;
 180
 181         /* XXX: I don't completely like the computation of d here. --pasky */
 182         /* Mebi (Mi), 2^20 */
 183         if (xnum >= 1024 * 1024) {
 184                 suff[0] = 'M';
 185                 d = (xnum * (int) 10 / (int) ((int) (1024 * 1024))) % 10;
 186                 xnum /= 1024*1024;
 187         /* Kibi (Ki), 2^10 */
 188         } else if (xnum >= 1024) {
 189                 suff[0] = 'K';
 190                 d = (xnum * (int) 10 / (int) 1024) % 10;
 191                 xnum /= 1024;
 192         }
 193
 194         add_long_to_string(string, xnum);
 195
 196         if (d != -1) {
 197                 add_char_to_string(string, '.');
 198                 add_long_to_string(string, d);
 199         }
 200         add_char_to_string(string, ' ');
 201
 202         if (suff[0]) add_to_string(string, suff);
 203         add_char_to_string(string, 'B');
 204         return string;
 205 }
 206
 207 /** @relates string */
 208 struct string *
 209 add_duration_to_string(struct string *string, long seconds)
 210 {
 211         unsigned char q[64];
 212         int qlen = 0;
 213
 214         if (seconds < 0) seconds = 0;
 215
 216         /* Days */
 217         if (seconds >= (24 * 3600)) {
 218                 ulongcat(q, &qlen, (seconds / (24 * 3600)), 5, 0);
 219                 q[qlen++] = 'd';
 220                 q[qlen++] = ' ';
 221         }
 222
 223         /* Hours and minutes */
 224         if (seconds >= 3600) {
 225                 seconds %= (24 * 3600);
 226                 ulongcat(q, &qlen, (seconds / 3600), 4, 0);
 227                 q[qlen++] = ':';
 228                 ulongcat(q, &qlen, ((seconds / 60) % 60), 2, '0');
 229         } else {
 230                 /* Only minutes */
 231                 ulongcat(q, &qlen, (seconds / 60), 2, 0);
 232         }
 233
 234         /* Seconds */
 235         q[qlen++] = ':';
 236         ulongcat(q, &qlen, (seconds % 60), 2, '0');
 237
 238         add_to_string(string, q);
 239         return string;
 240 }
 241
 242 /** @relates string */
 243 struct string *
 244 add_timeval_to_string(struct string *string, timeval_T *timeval)
 245 {
 246         return add_duration_to_string(string, timeval_to_seconds(timeval));
 247 }
 248
 249 #ifdef HAVE_STRFTIME
 250 struct string *
 251 add_date_to_string(struct string *string, const unsigned char *fmt,
 252                    const time_t *date)
 253 {
 254         unsigned char buffer[MAX_STR_LEN];
 255         time_t when_time = date ? *date : time(NULL);
 256         struct tm *when_local = localtime(&when_time);
 257
 258         if (strftime(buffer, sizeof(buffer), fmt, when_local) <= 0)
 259                 return NULL;
 260
 261         return add_to_string(string, buffer);
 262 }
 263 #endif
 264
 265 /* Encoders and string changers */
 266
 267 struct string *
 268 add_string_replace(struct string *string, unsigned char *src, int len,
 269                    unsigned char replaceable, unsigned char replacement)
 270 {
 271         int oldlength = string->length;
 272
 273         if (!add_bytes_to_string(string, src, len))
 274                 return NULL;
 275
 276         for (src = string->source + oldlength; len; len--, src++)
 277                 if (*src == replaceable)
 278                         *src = replacement;
 279
 280         return string;
 281 }
 282
 283 struct string *
 284 add_html_to_string(struct string *string, const unsigned char *src, int len)
 285 {
 286         for (; len; len--, src++) {
 287                 if (*src < 0x20
 288                     || *src == '<' || *src == '>' || *src == '&'
 289                     || *src == '\"' || *src == '\'') {
 290                         int rollback_length = string->length;
 291
 292                         if (!add_bytes_to_string(string, "&#", 2)
 293                             || !add_long_to_string(string, (long long)*src)
 294                             || !add_char_to_string(string, ';')) {
 295                                 string->length = rollback_length;
 296                                 string->source[rollback_length] = '\0';
 297                                 return NULL;
 298                         }
 299                 } else {
 300                         if (!add_char_to_string(string, *src))
 301                                 return NULL;
 302                 }
 303         }
 304
 305         return string;
 306 }
 307
 308 struct string *
 309 add_cp_html_to_string(struct string *string, int src_codepage,
 310                       const unsigned char *src, int len)
 311 {
 312         const unsigned char *const end = src + len;
 313         unicode_val_T unicode;
 314
 315         for (;;) {
 316                 unicode = cp_to_unicode(src_codepage,
 317                                         (unsigned char **) &src, end);
 318                 if (unicode == UCS_NO_CHAR)
 319                         break;
 320
 321                 if (unicode < 0x20 || unicode >= 0x7F
 322                     || unicode == '<' || unicode == '>' || unicode == '&'
 323                     || unicode == '\"' || unicode == '\'') {
 324                         int rollback_length = string->length;
 325
 326                         if (!add_bytes_to_string(string, "&#", 2)
 327                             || !add_long_to_string(string, unicode)
 328                             || !add_char_to_string(string, ';')) {
 329                                 string->length = rollback_length;
 330                                 string->source[rollback_length] = '\0';
 331                                 return NULL;
 332                         }
 333                 } else {
 334                         if (!add_char_to_string(string, unicode))
 335                                 return NULL;
 336                 }
 337         }
 338
 339         return string;
 340 }
 341
 342 /* TODO Optimize later --pasky */
 343 struct string *
 344 add_quoted_to_string(struct string *string, const unsigned char *src, int len)
 345 {
 346         for (; len; len--, src++) {
 347                 if (isquote(*src) || *src == '\\')
 348                         add_char_to_string(string, '\\');
 349                 add_char_to_string(string, *src);
 350         }
 351
 352         return string;
 353 }
 354
 355 struct string *
 356 add_shell_quoted_to_string(struct string *string, unsigned char *src, int len)
 357 {
 358         add_char_to_string(string, '\'');
 359         for (; len; len--, ++src)
 360                 if (*src == '\'')
 361                         add_to_string(string, "'\\''");
 362                 else
 363                         add_char_to_string(string, *src);
 364         add_char_to_string(string, '\'');
 365
 366         return string;
 367 }
 368
 369 struct string *
 370 add_shell_safe_to_string(struct string *string, unsigned char *cmd, int cmdlen)
 371 {
 372         int prev_safe = 0;
 373
 374         for (; cmdlen; cmdlen--, cmd++) {
 375                 if ((*cmd == '-' && prev_safe) ||
 376                     (prev_safe = is_safe_in_shell(*cmd))) {
 377                         add_char_to_string(string, *cmd);
 378                 } else {
 379                         /* XXX: Not all programs we might exec are capable of
 380                          * decoding these.  For some, we should just report
 381                          * an error rather than exec with an encoded string. */
 382                         add_char_to_string(string, '%');
 383                         add_char_to_string(string, hx((*cmd & 0xf0) >> 4));
 384                         add_char_to_string(string, hx(*cmd & 0x0f));
 385                 }
 386         }
 387
 388         return string;
 389 }
 390
 391
 392 long
 393 strtolx(unsigned char *str, unsigned char **end)
 394 {
 395         long num;
 396         unsigned char postfix;
 397
 398         errno = 0;
 399         num = strtol(str, (char **) end, 10);
 400         if (errno) return 0;
 401         if (!*end) return num;
 402
 403         postfix = c_toupper(**end);
 404         if (postfix == 'K') {
 405                 (*end)++;
 406                 if (num < -INT_MAX / 1024) return -INT_MAX;
 407                 if (num > INT_MAX / 1024) return INT_MAX;
 408                 return num * 1024;
 409         }
 410
 411         if (postfix == 'M') {
 412                 (*end)++;
 413                 if (num < -INT_MAX / (1024 * 1024)) return -INT_MAX;
 414                 if (num > INT_MAX / (1024 * 1024)) return INT_MAX;
 415                 return num * (1024 * 1024);
 416         }
 417
 418         return num;
 419 }
 420
 421 int
 422 month2num(const unsigned char *str)
 423 {
 424         unsigned char month[3] = { str[0]|32, str[1]|32, str[2]|32 };
 425
 426         switch (month[0]) {
 427         case 'j': /* jan, jun, jul */
 428                 if (month[1] == 'a') {
 429                         if (month[2] == 'n') return 0; /* jan */
 430                         return -1;
 431                 }
 432                 if (month[1] == 'u') {
 433                         if (month[2] == 'n') return 5; /* jun */
 434                         if (month[2] == 'l') return 6; /* jul */
 435                 }
 436                 return -1;
 437         case 'm': /* mar, may */
 438                 if (month[1] == 'a') {
 439                         if (month[2] == 'r') return 2; /* mar */
 440                         if (month[2] == 'y') return 4; /* may */
 441                 }
 442                 return -1;
 443         case 'a': /* apr, aug */
 444                 if (month[1] == 'p') {
 445                         if (month[2] == 'r') return 3; /* apr */
 446                         return -1;
 447                 }
 448                 if (month[1] == 'u' && month[2] == 'g') return 7; /* aug */
 449                 return -1;
 450         case 's':
 451                 if (month[1] == 'e' && month[2] == 'p') return 8; /* sep */
 452                 return -1;
 453         case 'o':
 454                 if (month[1] == 'c' && month[2] == 't') return 9; /* oct */
 455                 return -1;
 456         case 'n':
 457                 if (month[1] == 'o' && month[2] == 'v') return 10; /* nov */
 458                 return -1;
 459         case 'd':
 460                 if (month[1] == 'e' && month[2] == 'c') return 11; /* dec */
 461                 return -1;
 462         case 'f':
 463                 if (month[1] == 'e' && month[2] == 'b') return 1; /* feb */
 464                 return -1;
 465         default:
 466                 return -1;
 467         }
 468 }
 469
 470 /** This function drops control chars, nbsp char and limit the number
 471  * of consecutive space chars to one. It modifies its argument. */
 472 void
 473 clr_spaces(unsigned char *str)
 474 {
 475         unsigned char *s;
 476         unsigned char *dest = str;
 477
 478         assert(str);
 479
 480         for (s = str; *s; s++)
 481                 if (*s < ' ' || *s == NBSP_CHAR) *s = ' ';
 482
 483         for (s = str; *s; s++) {
 484                 if (*s == ' ' && (dest == str || s[1] == ' ' || !s[1]))
 485                         continue;
 486
 487                 *dest++ = *s;
 488         }
 489
 490         *dest = '\0';
 491 }
 492
 493 /** Replace invalid chars in @a title with ' ' and trim all starting/ending
 494  * spaces.
 495  *
 496  * update_bookmark() assumes this function does not switch translation
 497  * tables.  */
 498 void
 499 sanitize_title(unsigned char *title)
 500 {
 501         int len = strlen(title);
 502
 503         if (!len) return;
 504
 505         while (len--) {
 506                 if (title[len] < ' ' || title[len] == NBSP_CHAR)
 507                         title[len] = ' ';
 508         }
 509         trim_chars(title, ' ', NULL);
 510 }
 511
 512 /** Returns 0 if @a url contains invalid chars, 1 if ok.
 513  * It trims starting/ending spaces. */
 514 int
 515 sanitize_url(unsigned char *url)
 516 {
 517         int len = strlen(url);
 518
 519         if (!len) return 1;
 520
 521         while (len--) {
 522                 if (url[len] < ' ')
 523                         return 0;
 524         }
 525         trim_chars(url, ' ', NULL);
 526         return 1;
 527 }
 528
 529
 530 int c_tolower(int c) {
 531         switch (c)
 532         {
 533                 case 'A': return 'a';
 534                 case 'B': return 'b';
 535                 case 'C': return 'c';
 536                 case 'D': return 'd';
 537                 case 'E': return 'e';
 538                 case 'F': return 'f';
 539                 case 'G': return 'g';
 540                 case 'H': return 'h';
 541                 case 'I': return 'i';
 542                 case 'J': return 'j';
 543                 case 'K': return 'k';
 544                 case 'L': return 'l';
 545                 case 'M': return 'm';
 546                 case 'N': return 'n';
 547                 case 'O': return 'o';
 548                 case 'P': return 'p';
 549                 case 'Q': return 'q';
 550                 case 'R': return 'r';
 551                 case 'S': return 's';
 552                 case 'T': return 't';
 553                 case 'U': return 'u';
 554                 case 'V': return 'v';
 555                 case 'W': return 'w';
 556                 case 'X': return 'x';
 557                 case 'Y': return 'y';
 558                 case 'Z': return 'z';
 559                 default: return c;
 560         }
 561 }
 562
 563 int c_toupper(int c) {
 564         switch (c) {
 565                 case 'a': return 'A';
 566                 case 'b': return 'B';
 567                 case 'c': return 'C';
 568                 case 'd': return 'D';
 569                 case 'e': return 'E';
 570                 case 'f': return 'F';
 571                 case 'g': return 'G';
 572                 case 'h': return 'H';
 573                 case 'i': return 'I';
 574                 case 'j': return 'J';
 575                 case 'k': return 'K';
 576                 case 'l': return 'L';
 577                 case 'm': return 'M';
 578                 case 'n': return 'N';
 579                 case 'o': return 'O';
 580                 case 'p': return 'P';
 581                 case 'q': return 'Q';
 582                 case 'r': return 'R';
 583                 case 's': return 'S';
 584                 case 't': return 'T';
 585                 case 'u': return 'U';
 586                 case 'v': return 'V';
 587                 case 'w': return 'W';
 588                 case 'x': return 'X';
 589                 case 'y': return 'Y';
 590                 case 'z': return 'Z';
 591                 default: return c;
 592         }
 593 }
 594
 595 int c_isupper (int c)
 596 {
 597         switch (c)
 598         {
 599                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 600                 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 601                 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 602                 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 603                 case 'Y': case 'Z':
 604                         return 1;
 605                 default:
 606                         return 0;
 607         }
 608 }
 609
 610 int c_islower (int c)
 611 {
 612         switch (c)
 613         {
 614                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 615                 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 616                 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 617                 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 618                 case 'y': case 'z':
 619                         return 1;
 620                 default:
 621                         return 0;
 622         }
 623 }
 624