src/deark-ucstring.c

   1 // This file is part of Deark.
   2 // Copyright (C) 2016 Jason Summers
   3 // See the file COPYING for terms of use.
   4
   5 // Implementation of de_ucstring Unicode string object
   6
   7 #define DE_NOT_IN_MODULE
   8 #include "deark-config.h"
   9
  10 #include "deark-private.h"
  11
  12 // de_ucstring is a Unicode (utf-32) string object.
  13 de_ucstring *ucstring_create(deark *c)
  14 {
  15         de_ucstring *s;
  16         s = de_malloc(c, sizeof(de_ucstring));
  17         s->c = c;
  18         return s;
  19 }
  20
  21 void ucstring_empty(de_ucstring *s)
  22 {
  23         ucstring_truncate(s, 0);
  24 }
  25
  26 // Reduce the string's length to newlen, by deleting the characters after
  27 // that point.
  28 // 'newlen' is expected to be no larger than the string's current length.
  29 void ucstring_truncate(de_ucstring *s, i64 newlen)
  30 {
  31         if(!s) return;
  32         if(newlen<0) newlen=0;
  33         if(newlen<s->len) s->len = newlen;
  34
  35         if(s->tmp_string) {
  36                 // There's no requirement to free tmp_string here, but it's no
  37                 // longer needed, and maybe it's nice to have a way to do it.
  38                 de_free(s->c, s->tmp_string);
  39                 s->tmp_string = NULL;
  40         }
  41 }
  42
  43 // Delete the first U+0000 character, and everything after it.
  44 void ucstring_truncate_at_NUL(de_ucstring *s)
  45 {
  46         i64 i;
  47
  48         for(i=0; i<s->len; i++) {
  49                 if(s->str[i]==0x0000) {
  50                         ucstring_truncate(s, i);
  51                         return;
  52                 }
  53         }
  54 }
  55
  56 // If the string ends with U+0000, delete that character.
  57 // If not, do nothing.
  58 void ucstring_strip_trailing_NUL(de_ucstring *s)
  59 {
  60         if(s->len>=1 && s->str[s->len-1]==0x0000) {
  61                 ucstring_truncate(s, s->len-1);
  62         }
  63 }
  64
  65 void ucstring_strip_trailing_spaces(de_ucstring *s)
  66 {
  67         while(s->len>=1 && s->str[s->len-1]==' ') {
  68                 ucstring_truncate(s, s->len-1);
  69         }
  70 }
  71
  72 // Append s2 to s1
  73 void ucstring_append_ucstring(de_ucstring *s1, const de_ucstring *s2)
  74 {
  75         i64 i;
  76
  77         if(!s2) return;
  78         // TODO: This could be done more efficiently.
  79         for(i=0; i<s2->len; i++) {
  80                 ucstring_append_char(s1, s2->str[i]);
  81         }
  82 }
  83
  84 void ucstring_vprintf(de_ucstring *s, de_ext_encoding ee, const char *fmt, va_list ap)
  85 {
  86         char buf[1024];
  87         de_vsnprintf(buf, sizeof(buf), fmt, ap);
  88         ucstring_append_sz(s, buf, ee);
  89 }
  90
  91 // Appends a formatted C-style string.
  92 // (Unfortunately, there is no format specifier for a ucstring.)
  93 // There is a limit to how many characters will be appended.
  94 void ucstring_printf(de_ucstring *s, de_ext_encoding ee, const char *fmt, ...)
  95 {
  96         va_list ap;
  97
  98         va_start(ap, fmt);
  99         ucstring_vprintf(s, ee, fmt, ap);
 100         va_end(ap);
 101 }
 102
 103 de_ucstring *ucstring_clone(const de_ucstring *src)
 104 {
 105         de_ucstring *dst;
 106
 107         if(!src) return NULL;
 108         dst = ucstring_create(src->c);
 109         ucstring_append_ucstring(dst, src);
 110         return dst;
 111 }
 112
 113 void ucstring_destroy(de_ucstring *s)
 114 {
 115         deark *c;
 116         if(s) {
 117                 c = s->c;
 118                 de_free(c, s->str);
 119                 de_free(c, s->tmp_string);
 120                 de_free(c, s);
 121         }
 122 }
 123
 124 int ucstring_isempty(const de_ucstring *s)
 125 {
 126         if(!s) return 1;
 127         return (s->len == 0);
 128 }
 129
 130 int ucstring_isnonempty(const de_ucstring *s)
 131 {
 132         return (s && (s->len > 0));
 133 }
 134
 135 void ucstring_append_char(de_ucstring *s, de_rune ch)
 136 {
 137         i64 new_len;
 138         i64 new_alloc;
 139
 140         if(s->len >= 100000000) {
 141                 return;
 142         }
 143         new_len = s->len + 1;
 144         if(new_len > s->alloc) {
 145                 new_alloc = s->alloc * 2;
 146                 if(new_alloc<32) new_alloc=32;
 147                 s->str = de_reallocarray(s->c, s->str, s->alloc, sizeof(i32), new_alloc);
 148                 s->alloc = new_alloc;
 149         }
 150
 151         s->str[s->len] = ch;
 152         s->len++;
 153 }
 154
 155 static void handle_invalid_byte(de_ucstring *s, u8 n)
 156 {
 157         i32 ch;
 158
 159         ch = DE_CODEPOINT_BYTE00 + (i32)n;
 160         ucstring_append_char(s, ch);
 161 }
 162
 163 static void handle_invalid_bytes(de_ucstring *s, const u8 *buf, UI buflen)
 164 {
 165         UI k;
 166
 167         for(k=0; k<buflen; k++) {
 168                 handle_invalid_byte(s, buf[k]);
 169         }
 170 }
 171
 172 // This function does not validate the "control" bits of the UTF-8 sequence.
 173 // Out-of-range codepoints are return as U+FFFD.
 174 static de_rune decode_utf8_sequence(const u8 *b, UI seqlen)
 175 {
 176         UI ch;
 177
 178         if(seqlen==2) { // 2-byte
 179                 ch = b[0] & 0x1f;
 180                 ch = (ch<<6) | (b[1] & 0x3f);
 181                 if(ch<0x80) ch = 0xfffd;
 182         }
 183         else if(seqlen==3) { // 3-byte
 184                 ch = b[0] & 0x0f;
 185                 ch = (ch<<6) | (b[1] & 0x3f);
 186                 ch = (ch<<6) | (b[2] & 0x3f);
 187                 if(ch<0x800) ch = 0xfffd;
 188         }
 189         else if(seqlen==4) {
 190                 ch = b[0] & 0x07;
 191                 ch = (ch<<6) | (b[1] & 0x3f);
 192                 ch = (ch<<6) | (b[2] & 0x3f);
 193                 ch = (ch<<6) | (b[3] & 0x3f);
 194                 if(ch<0x10000 || ch>0x10ffff) ch = 0xfffd;
 195         }
 196         else {
 197                 ch = b[0] & 0x7f;
 198         }
 199         return (de_rune)ch;
 200 }
 201
 202 #define UTF8_NBYTES_EXPECTED (es->buf[7])
 203 #define UTF8_NBYTES_SAVED    (es->buf[6])
 204
 205 static void append_bytes_utf8(de_ucstring *s, const u8 *cbuf, i64 buflen,
 206         unsigned int conv_flags, struct de_encconv_state *es)
 207 {
 208         i64 pos;
 209
 210         for(pos=0; pos<buflen; pos++) {
 211                 u8 n = cbuf[pos];
 212
 213                 if(n>=0x80 && n<=0xbf) { // continuation byte
 214                         if(UTF8_NBYTES_EXPECTED==0) {
 215                                 handle_invalid_byte(s, n);
 216                         }
 217                         else { // valid continuation byte
 218                                 es->buf[UTF8_NBYTES_SAVED] = n;
 219                                 UTF8_NBYTES_SAVED++;
 220                                 UTF8_NBYTES_EXPECTED--;
 221                                 if(UTF8_NBYTES_EXPECTED==0) {
 222                                         ucstring_append_char(s,
 223                                                 decode_utf8_sequence(es->buf, (UI)UTF8_NBYTES_SAVED));
 224                                 }
 225                         }
 226                 }
 227                 else { // not a continuation byte
 228                         // Should not be any pending bytes. If there are, they're invalid.
 229                         if(UTF8_NBYTES_EXPECTED != 0) {
 230                                 handle_invalid_bytes(s, es->buf, (UI)UTF8_NBYTES_SAVED);
 231                                 UTF8_NBYTES_EXPECTED = 0;
 232                         }
 233
 234                         if(n<=0x7f) {
 235                                 ucstring_append_char(s, (i32)n);
 236                                 UTF8_NBYTES_EXPECTED = 0; // number of additional continuation bytes expected
 237                         }
 238                         else if(n<=0xdf) { // 2-byte UTF-8 char
 239                                 es->buf[0] = n; // save this byte for later
 240                                 UTF8_NBYTES_SAVED = 1; // number of bytes saved in buf[0]...buf[3]
 241                                 UTF8_NBYTES_EXPECTED = 1; // 1 more byte expected in this sequence
 242                         }
 243                         else if(n<=0xef) { // 3-byte UTF-8 char
 244                                 es->buf[0] = n;
 245                                 UTF8_NBYTES_SAVED = 1;
 246                                 UTF8_NBYTES_EXPECTED = 2;
 247                         }
 248                         else if(n<=0xf7) { // 4-byte UTF-8 char
 249                                 es->buf[0] = n;
 250                                 UTF8_NBYTES_SAVED = 1;
 251                                 UTF8_NBYTES_EXPECTED = 3;
 252                         }
 253                         else {
 254                                 handle_invalid_byte(s, n);
 255                                 UTF8_NBYTES_EXPECTED = 0;
 256                         }
 257                 }
 258         }
 259
 260         // Check if there are unprocessed bytes at the end of the string, when there
 261         // shouldn't be.
 262         if(UTF8_NBYTES_EXPECTED!=0 && !(conv_flags & DE_CONVFLAG_PARTIAL_DATA)) {
 263                 handle_invalid_bytes(s, es->buf, (UI)UTF8_NBYTES_SAVED);
 264                 UTF8_NBYTES_EXPECTED = 0;
 265         }
 266 }
 267
 268 #undef UTF8_NBYTES_EXPECTED
 269 #undef UTF8_NBYTES_SAVED
 270
 271 #define UTF16_NBYTES_SAVED    (es->buf[7])
 272
 273 static i64 getu16x_direct(const u8 *m, int is_le)
 274 {
 275         if(is_le)
 276                 return de_getu16le_direct(m);
 277         return de_getu16be_direct(m);
 278 }
 279
 280 static void append_bytes_utf16(de_ucstring *s, const u8 *cbuf, i64 buflen,
 281         unsigned int conv_flags, struct de_encconv_state *es, int is_le)
 282 {
 283         i64 pos = 0;
 284         i32 ch;
 285
 286         for(pos=0; pos<buflen; pos++) {
 287                 u8 n = cbuf[pos];
 288
 289                 es->buf[UTF16_NBYTES_SAVED] = n;
 290                 UTF16_NBYTES_SAVED++;
 291
 292                 if(UTF16_NBYTES_SAVED==2) {
 293                         ch = (i32)getu16x_direct(es->buf, is_le);
 294                         if(ch>=0xd800 && ch<0xdc00) {
 295                                 ; // lead surrogate; do nothing
 296                         }
 297                         else if(ch>=0xdc00 && ch<0xe000) {
 298                                 // trail surrogate, shouldn't be here
 299                                 ucstring_append_char(s, 0xfffd);
 300                                 UTF16_NBYTES_SAVED = 0;
 301                         }
 302                         else { // non-surrogate
 303                                 ucstring_append_char(s, ch);
 304                                 UTF16_NBYTES_SAVED = 0;
 305                         }
 306                 }
 307                 else if(UTF16_NBYTES_SAVED>=4) { // >4 is impossible
 308                         i32 ch2;
 309
 310                         ch = (i32)getu16x_direct(es->buf, is_le);
 311                         ch2 = (i32)getu16x_direct(&es->buf[2], is_le);
 312
 313                         if(ch2>=0xd800 && ch2<0xdc00) {
 314                                 ; // lead surrogate immediately following another lead surrogate
 315                                 ucstring_append_char(s, 0xfffd);
 316                                 es->buf[0] = es->buf[2];
 317                                 es->buf[1] = es->buf[3];
 318                                 UTF16_NBYTES_SAVED = 2;
 319                         }
 320                         else if(ch2>=0xdc00 && ch2<0xe000) {
 321                                 // well-formed surrogate pair
 322                                 ucstring_append_char(s, 0x10000 + (((ch-0xd800)<<10) | (ch2-0xdc00)));
 323                                 UTF16_NBYTES_SAVED = 0;
 324                         }
 325                         else { // non-surrogate immediately following a lead surrogate
 326                                 ucstring_append_char(s, 0xfffd);
 327                                 ucstring_append_char(s, ch2);
 328                                 UTF16_NBYTES_SAVED = 0;
 329                         }
 330
 331                 }
 332         }
 333
 334         // Check if there are unprocessed bytes at the end of the string, when there
 335         // shouldn't be.
 336         if(UTF16_NBYTES_SAVED!=0 && !(conv_flags & DE_CONVFLAG_PARTIAL_DATA)) {
 337                 ucstring_append_char(s, 0xfffd);
 338                 UTF16_NBYTES_SAVED = 0;
 339         }
 340 }
 341
 342 #undef  UTF16_NBYTES_SAVED
 343
 344 // conv_flags:
 345 //  DE_CONVFLAG_PARTIAL_DATA: There might be more data after this; if 'buf' ends
 346 //   in a way it shouldn't, it's not necessarily an error.
 347 void ucstring_append_bytes_ex(de_ucstring *s, const u8 *buf, i64 buflen,
 348         unsigned int conv_flags, struct de_encconv_state *es)
 349 {
 350         de_encoding encoding = DE_EXTENC_GET_BASE(es->ee);
 351
 352         // Adjust buflen if necessary.
 353         if(conv_flags & DE_CONVFLAG_STOP_AT_NUL) {
 354                 char *tmpp;
 355                 tmpp = de_memchr(buf, 0, (size_t)buflen);
 356                 if(tmpp) {
 357                         buflen = (const u8*)tmpp - buf;
 358                 }
 359         }
 360
 361         if(encoding==DE_ENCODING_UTF8) {
 362                 append_bytes_utf8(s, buf, buflen, conv_flags, es);
 363         }
 364         else if(encoding==DE_ENCODING_UTF16LE || encoding==DE_ENCODING_UTF16BE) {
 365                 append_bytes_utf16(s, buf, buflen, conv_flags, es, (encoding==DE_ENCODING_UTF16LE));
 366         }
 367         else {
 368                 i64 pos;
 369
 370                 for(pos=0; pos<buflen; pos++) {
 371                         i32 ch;
 372
 373                         ch = de_char_to_unicode_ex(buf[pos], es);
 374                         if(ch==DE_CODEPOINT_INVALID) {
 375                                 handle_invalid_byte(s, buf[pos]);
 376                         }
 377                         else {
 378                                 ucstring_append_char(s, ch);
 379                         }
 380                 }
 381         }
 382 }
 383
 384 void ucstring_append_bytes(de_ucstring *s, const u8 *buf, i64 buflen,
 385         unsigned int conv_flags, de_ext_encoding ee)
 386 {
 387         struct de_encconv_state es;
 388
 389         de_encconv_init(&es, ee);
 390         ucstring_append_bytes_ex(s, buf, buflen, conv_flags, &es);
 391 }
 392
 393 void ucstring_append_sz(de_ucstring *s, const char *sz, de_ext_encoding ee)
 394 {
 395         i64 len;
 396
 397         if(ee==DE_ENCODING_LATIN1) { // Fast path for this common case
 398                 const u8 *sz_u8 = (const u8*)sz;
 399
 400                 while(*sz_u8) {
 401                         ucstring_append_char(s, (de_rune)(*sz_u8));
 402                         sz_u8++;
 403                 }
 404                 return;
 405         }
 406
 407         len = (i64)de_strlen(sz);
 408         ucstring_append_bytes(s, (const u8*)sz, len, 0, ee);
 409 }
 410
 411 static int ucstring_is_ascii(const de_ucstring *s)
 412 {
 413         i64 i;
 414         for(i=0; i<s->len; i++) {
 415                 if(s->str[i]<0 || s->str[i]>=0x80)
 416                         return 0;
 417         }
 418         return 1;
 419 }
 420
 421 i64 ucstring_count_utf8_bytes(de_ucstring *s)
 422 {
 423         i64 i;
 424         i64 n = 0;
 425         if(!s) return n;
 426         for(i=0; i<s->len; i++) {
 427                 if(s->str[i]<0 || s->str[i]>0xffff) n+=4;
 428                 else if(s->str[i]<=0x7f) n+=1;
 429                 else if(s->str[i]<=0x7ff) n+=2;
 430                 else n+=3;
 431         }
 432         return n;
 433 }
 434
 435 // If add_bom_if_needed is set, we'll prepend a BOM if the global c->write_bom
 436 // option is enabled, and 's' has any non-ASCII characters, and 's' doesn't already
 437 // start with a BOM.
 438 void ucstring_write_as_utf8(deark *c, de_ucstring *s, dbuf *outf, int add_bom_if_needed)
 439 {
 440         i64 i;
 441
 442         if(add_bom_if_needed &&
 443                 c->write_bom &&
 444                 (s->len>0 && s->str[0]!=0xfeff) &&
 445                 !ucstring_is_ascii(s))
 446         {
 447                 // Write a BOM
 448                 dbuf_write_uchar_as_utf8(outf, 0xfeff);
 449         }
 450
 451         for(i=0; i<s->len; i++) {
 452                 dbuf_write_uchar_as_utf8(outf, s->str[i]);
 453         }
 454 }
 455
 456 // Note: This function is similar to de_finfo_set_name_from_ucstring().
 457 // Maybe they should be consolidated.
 458 // TODO: Should we remove the 'encoding' param, and always assume UTF-8?
 459 void ucstring_to_sz(de_ucstring *s, char *szbuf, size_t szbuf_len,
 460         unsigned int flags, de_ext_encoding ee)
 461 {
 462         i64 i;
 463         i64 szpos = 0;
 464         i32 ch;
 465         i64 charcodelen;
 466         de_encoding encoding = DE_EXTENC_GET_BASE(ee);
 467         static const char *sc1 = "\x01<"; // DE_CODEPOINT_HL in UTF-8
 468         static const char *sc2 = ">\x02"; // DE_CODEPOINT_UNHL
 469         u8 charcodebuf[32];
 470
 471         if(szbuf_len<1) return;
 472
 473         for(i=0; i<s->len; i++) {
 474                 ch = s->str[i];
 475                 if(encoding==DE_ENCODING_UTF8) {
 476                         de_uchar_to_utf8(ch, charcodebuf, &charcodelen);
 477                 }
 478                 else { // DE_ENCODING_LATIN1 or DE_ENCODING_ASCII
 479                         // TODO: This may not work right if DE_CONVFLAG_MAKE_PRINTABLE is used,
 480                         // but currently that never happens.
 481                         if(ch>=0 && ch<=(encoding==DE_ENCODING_LATIN1?255:127))
 482                                 charcodebuf[0] = (u8)ch;
 483                         else
 484                                 charcodebuf[0] = '_';
 485                         charcodelen = 1;
 486                 }
 487
 488                 if(flags & DE_CONVFLAG_MAKE_PRINTABLE) {
 489                         // TODO: This is slightly inefficient, because we're overwriting the
 490                         // conversion we already did.
 491                         if(!de_is_printable_uchar(ch)) {
 492                                 if(ch==0x0a) {
 493                                         de_snprintf((char*)charcodebuf, sizeof(charcodebuf),
 494                                                 "%s\\n%s", sc1, sc2);
 495                                 }
 496                                 else if(ch==0x0d) {
 497                                         de_snprintf((char*)charcodebuf, sizeof(charcodebuf),
 498                                                 "%s\\r%s", sc1, sc2);
 499                                 }
 500                                 else if(ch==0x09) {
 501                                         de_snprintf((char*)charcodebuf, sizeof(charcodebuf),
 502                                                 "%s\\t%s", sc1, sc2);
 503                                 }
 504                                 else if(ch==0x00) {
 505                                         de_snprintf((char*)charcodebuf, sizeof(charcodebuf),
 506                                                 "%s\\0%s", sc1, sc2);
 507                                 }
 508                                 else if(ch>=DE_CODEPOINT_BYTE00 && ch<=DE_CODEPOINT_BYTEFF) {
 509                                         de_snprintf((char*)charcodebuf, sizeof(charcodebuf), "%s%02X%s",
 510                                                 sc1, (int)(ch-DE_CODEPOINT_BYTE00), sc2);
 511                                 }
 512                                 else {
 513                                         de_snprintf((char*)charcodebuf, sizeof(charcodebuf),
 514                                                 "%sU+%04X%s", sc1, (unsigned int)ch, sc2);
 515                                 }
 516                                 charcodelen = (i64)de_strlen((const char*)charcodebuf);
 517                         }
 518                 }
 519
 520                 if(szpos + charcodelen + 1 > (i64)szbuf_len) break;
 521                 if(charcodelen==1) {
 522                         szbuf[szpos] = charcodebuf[0];
 523                 }
 524                 else {
 525                         de_memcpy(&szbuf[szpos], charcodebuf, (size_t)charcodelen);
 526                 }
 527                 szpos += charcodelen;
 528         }
 529
 530         szbuf[szpos] = '\0';
 531 }
 532
 533 // Try to determine if a Unicode codepoint (presumed to be from an untrusted source)
 534 // is "safe" to print to a terminal.
 535 // We try to ban control characters, formatting characters, private-use characters,
 536 // and noncharacters.
 537 // It would be good to also ban incorrectly-used "combining" and other context-
 538 // sensitive characters, but that's too difficult.
 539 int de_is_printable_uchar(de_rune ch)
 540 {
 541         struct pr_range { i32 n1, n2; };
 542         static const struct pr_range ranges[] = {
 543                 { 0x0020, 0x007e },
 544                 { 0x00a0, 0x200d },
 545                 { 0x2010, 0x2027 },
 546                 { 0x202f, 0x2065 },
 547                 { 0x2070, 0xd7ff },
 548                 { 0xf900, 0xfdcf },
 549                 { 0xfdf0, 0xfdff },
 550                 { 0xfe10, 0xfefe },
 551                 { 0xff00, 0xffef },
 552                 { 0xfffd, 0xfffd },
 553                 { 0x10000, 0x101ff },
 554                 { 0x1f000, 0x1fbff }
 555                 // TODO: Whitelist more codepoints
 556         };
 557         size_t i;
 558         const size_t num_ranges = DE_ARRAYCOUNT(ranges);
 559
 560         for(i=0; i<num_ranges; i++) {
 561                 if(ch>=ranges[i].n1 && ch<=ranges[i].n2) return 1;
 562         }
 563         return 0;
 564 }
 565
 566 static const char *ucstring_getpsz_internal(de_ucstring *s,
 567         int has_max, i64 max_bytes)
 568 {
 569         i64 allocsize;
 570
 571         if(!s) {
 572                 if(has_max && max_bytes<6) return "";
 573                 return "(null)";
 574         }
 575
 576         if(has_max) {
 577                 allocsize = max_bytes + 1;
 578         }
 579         else {
 580                 // TODO: Calculating the proper allocsize could be difficult,
 581                 // depending on how DE_CONVFLAG_MAKE_PRINTABLE is implemented.
 582                 allocsize = s->len * 4 + 1 + 100;
 583         }
 584
 585         if(s->tmp_string)
 586                 de_free(s->c, s->tmp_string);
 587         s->tmp_string = de_malloc(s->c, allocsize);
 588
 589         ucstring_to_sz(s, s->tmp_string, (size_t)allocsize, DE_CONVFLAG_MAKE_PRINTABLE, DE_ENCODING_UTF8);
 590
 591         return s->tmp_string;
 592 }
 593
 594 const char *ucstring_getpsz(de_ucstring *s)
 595 {
 596         return ucstring_getpsz_internal(s, 0, 0);
 597 }
 598
 599 // It might make more sense to limit the number of visible characters, instead
 600 // of the number of bytes in the encoded string, but that's too difficult.
 601 const char *ucstring_getpsz_n(de_ucstring *s, i64 max_bytes)
 602 {
 603         return ucstring_getpsz_internal(s, 1, max_bytes);
 604 }
 605
 606 const char *ucstring_getpsz_d(de_ucstring *s)
 607 {
 608         return ucstring_getpsz_internal(s, 1, DE_DBG_MAX_STRLEN);
 609 }
 610
 611 void ucstring_append_flags_item(de_ucstring *s, const char *str)
 612 {
 613         ucstring_printf(s, DE_ENCODING_UTF8, "%s%s", (s->len>0)?" | ":"", str);
 614 }
 615
 616 void ucstring_append_flags_itemf(de_ucstring *s, const char *fmt, ...)
 617 {
 618         va_list ap;
 619         char buf[1024];
 620
 621         va_start(ap, fmt);
 622         de_vsnprintf(buf, sizeof(buf), fmt, ap);
 623         ucstring_append_flags_item(s, buf);
 624         va_end(ap);
 625 }
 626
 627 // strarray: A mini library, intended mainly to help manage directory paths.
 628
 629 struct de_strarray {
 630         deark *c;
 631         size_t max_elems;
 632         size_t count;
 633         size_t num_alloc;
 634         de_ucstring **ss; // array of 'num_alloc' ucstring pointers
 635 };
 636
 637 struct de_strarray *de_strarray_create(deark *c, size_t max_elems)
 638 {
 639         struct de_strarray *sa;
 640         sa = de_malloc(c, sizeof(struct de_strarray));
 641         sa->c = c;
 642         sa->max_elems = max_elems;
 643         return sa;
 644 }
 645
 646 void de_strarray_destroy(struct de_strarray *sa)
 647 {
 648         deark *c;
 649
 650         if(!sa) return;
 651         c = sa->c;
 652         while(sa->count>0) {
 653                 de_strarray_pop(sa);
 654         }
 655         de_free(c, sa->ss);
 656         de_free(c, sa);
 657 }
 658
 659 // This makes a copy of 's'. The caller still owns 's'.
 660 int de_strarray_push(struct de_strarray *sa, de_ucstring *s)
 661 {
 662         deark *c = sa->c;
 663         size_t newidx = sa->count;
 664
 665         if(sa->count >= sa->max_elems) return 0;
 666         if(newidx >= sa->num_alloc) {
 667                 size_t old_num_alloc = sa->num_alloc;
 668                 sa->num_alloc *= 2;
 669                 if(sa->num_alloc<8) sa->num_alloc=8;
 670                 sa->ss = de_reallocarray(c, sa->ss, old_num_alloc, sizeof(de_ucstring*),
 671                         sa->num_alloc);
 672         }
 673         sa->ss[newidx] = ucstring_clone(s);
 674         sa->count++;
 675         return 1;
 676 }
 677
 678 int de_strarray_pop(struct de_strarray *sa)
 679 {
 680         if(sa->count<1) return 0;
 681         ucstring_destroy(sa->ss[sa->count-1]);
 682         sa->ss[sa->count-1] = NULL;
 683         sa->count--;
 684         return 1;
 685 }
 686
 687 // Replace slashes in a string, starting at the given position.
 688 static void mp_squash_slashes(de_ucstring *s, i64 pos1)
 689 {
 690         i64 i;
 691
 692         for(i=pos1; i<s->len; i++) {
 693                 if(s->str[i]=='/') {
 694                         s->str[i] = '_';
 695                 }
 696         }
 697 }
 698
 699 // Caller allocates 'path' to receive the path.
 700 void de_strarray_make_path(struct de_strarray *sa, de_ucstring *path, unsigned int flags)
 701 {
 702         size_t i;
 703
 704         for(i=0; i<sa->count; i++) {
 705                 i64 oldlen = path->len;
 706
 707                 if(ucstring_isnonempty(sa->ss[i])) {
 708                         ucstring_append_ucstring(path, sa->ss[i]);
 709                 }
 710                 else {
 711                         ucstring_append_char(path, '_');
 712                 }
 713
 714                 mp_squash_slashes(path, oldlen);
 715                 if((i+1 < sa->count) || !(flags & DE_MPFLAG_NOTRAILINGSLASH)) {
 716                         ucstring_append_char(path, '/');
 717                 }
 718         }
 719 }