lua/isocline/src/stringbuf.c

   1 /* ----------------------------------------------------------------------------
   2   Copyright (c) 2021, Daan Leijen
   3   This is free software; you can redistribute it and/or modify it
   4   under the terms of the MIT License. A copy of the license can be
   5   found in the "LICENSE" file at the root of this distribution.
   6 -----------------------------------------------------------------------------*/
   7
   8 // get `wcwidth` for the column width of unicode characters
   9 // note: for now the OS provided one is unused as we see quite a bit of variation
  10 // among platforms and including our own seems more reliable.
  11 /*
  12 #if defined(__linux__) || defined(__freebsd__)
  13 // use the system supplied one
  14 #if !defined(_XOPEN_SOURCE)
  15 #define  _XOPEN_SOURCE  700    // so wcwidth is visible
  16 #endif
  17 #include <wchar.h>
  18 #else
  19 */
  20 // use our own (also on APPLE as that fails within vscode)
  21 #define  wcwidth(c)  mk_wcwidth(c)
  22 #include "wcwidth.c"
  23 // #endif
  24
  25 #include <stdio.h>
  26 #include <string.h>
  27 #include <inttypes.h>
  28
  29 #include "common.h"
  30 #include "stringbuf.h"
  31
  32 //-------------------------------------------------------------
  33 // In place growable utf-8 strings
  34 //-------------------------------------------------------------
  35
  36 struct stringbuf_s {
  37   char*     buf;
  38   ssize_t   buflen;
  39   ssize_t   count;
  40   alloc_t*  mem;
  41 };
  42
  43
  44 //-------------------------------------------------------------
  45 // String column width
  46 //-------------------------------------------------------------
  47
  48 // column width of a utf8 single character sequence.
  49 static ssize_t utf8_char_width( const char* s, ssize_t n ) {
  50   if (n <= 0) { return 0; }
  51
  52   uint8_t b = (uint8_t)s[0];
  53   int32_t c;
  54   if (b < ' ') {
  55     return 0;
  56   }
  57   else if (b <= 0x7F) {
  58     return 1;
  59   }
  60   else if (b <= 0xC1) { // invalid continuation byte or invalid 0xC0, 0xC1 (check is strictly not necessary as we don't validate..)
  61     return 1;
  62   }
  63   else if (b <= 0xDF && n >= 2) { // b >= 0xC2  // 2 bytes
  64     c = (((b & 0x1F) << 6) | (s[1] & 0x3F));
  65     assert(c < 0xD800 || c > 0xDFFF);
  66     int w = wcwidth(c);
  67     return w;
  68   }
  69   else if (b <= 0xEF && n >= 3) { // b >= 0xE0  // 3 bytes
  70     c = (((b & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F));
  71     return wcwidth(c);
  72   }
  73   else if (b <= 0xF4 && n >= 4) { // b >= 0xF0  // 4 bytes
  74     c = (((b & 0x07) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F));
  75     return wcwidth(c);
  76   }
  77   else {
  78     // failed
  79     return 1;
  80   }
  81 }
  82
  83
  84 // The column width of a codepoint (0, 1, or 2)
  85 static ssize_t char_column_width( const char* s, ssize_t n ) {
  86   if (s == NULL || n <= 0) { return 0; }
  87   else if ((uint8_t)(*s) < ' ') { return 0; }   // also for CSI escape sequences
  88   else {
  89     ssize_t w = utf8_char_width(s, n);
  90     #ifdef _WIN32
  91     return (w <= 0 ? 1 : w); // windows console seems to use at least one column
  92     #else
  93     return w;
  94     #endif
  95   }
  96 }
  97
  98 static ssize_t str_column_width_n( const char* s, ssize_t len ) {
  99   if (s == NULL || len <= 0) { return 0; }
 100   ssize_t pos = 0;
 101   ssize_t cwidth = 0;
 102   ssize_t cw;
 103   ssize_t ofs;
 104   while (s[pos] != 0 && (ofs = str_next_ofs(s, len, pos, &cw)) > 0) {
 105     cwidth += cw;
 106     pos += ofs;
 107   }
 108   return cwidth;
 109 }
 110
 111 ic_private ssize_t str_column_width( const char* s ) {
 112   return str_column_width_n( s, ic_strlen(s) );
 113 }
 114
 115 ic_private ssize_t str_skip_until_fit( const char* s, ssize_t max_width ) {
 116   if (s == NULL) { return 0; }
 117   ssize_t cwidth = str_column_width(s);
 118   ssize_t len    = ic_strlen(s);
 119   ssize_t pos = 0;
 120   ssize_t next;
 121   ssize_t cw;
 122   while (cwidth > max_width && (next = str_next_ofs(s, len, pos, &cw)) > 0) {
 123     cwidth -= cw;
 124     pos += next;
 125   }
 126   return pos;
 127 }
 128
 129 ic_private ssize_t str_take_while_fit( const char* s, ssize_t max_width) {
 130   if (s == NULL) { return 0; }
 131   const ssize_t len = ic_strlen(s);
 132   ssize_t pos = 0;
 133   ssize_t next;
 134   ssize_t cw;
 135   ssize_t cwidth = 0;
 136   while ((next = str_next_ofs(s, len, pos, &cw)) > 0) {
 137     if (cwidth + cw > max_width) { break; }
 138     cwidth += cw;
 139     pos += next;
 140   }
 141   return pos;
 142 }
 143
 144
 145 //-------------------------------------------------------------
 146 // String navigation
 147 //-------------------------------------------------------------
 148
 149 // get offset of the previous codepoint. does not skip back over CSI sequences.
 150 ic_private ssize_t str_prev_ofs( const char* s, ssize_t pos, ssize_t* width ) {
 151   ssize_t ofs = 0;
 152   if (s != NULL && pos > 0) {
 153     ofs = 1;
 154     while (pos > ofs) {
 155       uint8_t u = (uint8_t)s[pos - ofs];
 156       if (u < 0x80 || u > 0xBF) { break; }  // continue while follower
 157       ofs++;
 158     }
 159   }
 160   if (width != NULL) { *width = char_column_width( s+(pos-ofs), ofs ); }
 161   return ofs;
 162 }
 163
 164 // skip an escape sequence
 165 // <https://www.xfree86.org/current/ctlseqs.html>
 166 ic_private bool skip_esc( const char* s, ssize_t len, ssize_t* esclen ) {
 167   if (s == NULL || len <= 1 || s[0] != '\x1B') { return false; }
 168   if (esclen != NULL) { *esclen = 0; }
 169   if (strchr("[PX^_]",s[1]) != NULL) {
 170     // CSI (ESC [), DCS (ESC P), SOS (ESC X), PM (ESC ^), APC (ESC _), and OSC (ESC ]): terminated with a special sequence
 171     bool finalCSI = (s[1] == '[');  // CSI terminates with 0x40-0x7F; otherwise ST (bell or ESC \)
 172     ssize_t n = 2;
 173     while (len > n) {
 174       char c = s[n++];
 175       if ((finalCSI && (uint8_t)c >= 0x40 && (uint8_t)c <= 0x7F) ||  // terminating byte: @A–Z[\]^_`a–z{|}~
 176           (!finalCSI && c == '\x07') ||   // bell
 177           (c == '\x02'))                  // STX terminates as well
 178       {
 179         if (esclen != NULL) { *esclen = n; }
 180         return true;
 181       }
 182       else if (!finalCSI && c == '\x1B' && len > n && s[n] == '\\') {  // ST (ESC \)
 183         n++;
 184         if (esclen != NULL) { *esclen = n; }
 185         return true;
 186       }
 187     }
 188   }
 189   if (strchr(" #%()*+",s[1]) != NULL) {
 190     // assume escape sequence of length 3 (like ESC % G)
 191     if (esclen != NULL) { *esclen = 2; }
 192     return true;
 193   }
 194   else {
 195     // assume single character escape code (like ESC 7)
 196     if (esclen != NULL) { *esclen = 2; }
 197     return true;
 198   }
 199   return false;
 200 }
 201
 202 // Offset to the next codepoint, treats CSI escape sequences as a single code point.
 203 ic_private ssize_t str_next_ofs( const char* s, ssize_t len, ssize_t pos, ssize_t* cwidth ) {
 204   ssize_t ofs = 0;
 205   if (s != NULL && len > pos) {
 206     if (skip_esc(s+pos,len-pos,&ofs)) {
 207       // skip escape sequence
 208     }
 209     else {
 210       ofs = 1;
 211       // utf8 extended character?
 212       while(len > pos + ofs) {
 213         uint8_t u = (uint8_t)s[pos + ofs];
 214         if (u < 0x80 || u > 0xBF) { break; }  // break if not a follower
 215         ofs++;
 216       }
 217     }
 218   }
 219   if (cwidth != NULL) { *cwidth = char_column_width( s+pos, ofs ); }
 220   return ofs;
 221 }
 222
 223 static ssize_t str_limit_to_length( const char* s, ssize_t n ) {
 224   ssize_t i;
 225   for(i = 0; i < n && s[i] != 0; i++) { /* nothing */ }
 226   return i;
 227 }
 228
 229
 230 //-------------------------------------------------------------
 231 // String searching prev/next word, line, ws_word
 232 //-------------------------------------------------------------
 233
 234
 235 static ssize_t str_find_backward( const char* s, ssize_t len, ssize_t pos, ic_is_char_class_fun_t* match, bool skip_immediate_matches ) {
 236   if (pos > len) { pos = len; }
 237   if (pos < 0) { pos = 0; }
 238   ssize_t i = pos;
 239   // skip matching first (say, whitespace in case of the previous start-of-word)
 240   if (skip_immediate_matches) {
 241     do {
 242       ssize_t prev = str_prev_ofs(s, i, NULL);
 243       if (prev <= 0) { break; }
 244       assert(i - prev >= 0);
 245       if (!match(s + i - prev, (long)prev)) { break; }
 246       i -= prev;
 247     } while (i > 0);
 248   }
 249   // find match
 250   do {
 251     ssize_t prev = str_prev_ofs(s, i, NULL);
 252     if (prev <= 0) { break; }
 253     assert(i - prev >= 0);
 254     if (match(s + i - prev, (long)prev)) {
 255       return i;  // found;
 256     }
 257     i -= prev;
 258   } while (i > 0);
 259   return -1; // not found
 260 }
 261
 262 static ssize_t str_find_forward( const char* s, ssize_t len, ssize_t pos, ic_is_char_class_fun_t* match, bool skip_immediate_matches ) {
 263   if (s == NULL || len < 0) { return -1; }
 264   if (pos > len) { pos = len; }
 265   if (pos < 0) { pos = 0; }
 266   ssize_t i = pos;
 267   ssize_t next;
 268   // skip matching first (say, whitespace in case of the next end-of-word)
 269   if (skip_immediate_matches) {
 270     do {
 271       next = str_next_ofs(s, len, i, NULL);
 272       if (next <= 0) { break; }
 273       assert( i + next <= len);
 274       if (!match(s + i, (long)next)) { break; }
 275       i += next;
 276     } while (i < len);
 277   }
 278   // and then look
 279   do {
 280     next = str_next_ofs(s, len, i, NULL);
 281     if (next <= 0) { break; }
 282     assert( i + next <= len);
 283     if (match(s + i, (long)next)) {
 284       return i; // found
 285     }
 286     i += next;
 287   } while (i < len);
 288   return -1;
 289 }
 290
 291 static bool char_is_linefeed( const char* s, long n ) {
 292   return (n == 1 && (*s == '\n' || *s == 0));
 293 }
 294
 295 static ssize_t str_find_line_start( const char* s, ssize_t len, ssize_t pos) {
 296   ssize_t start = str_find_backward(s,len,pos,&char_is_linefeed,false /* don't skip immediate matches */);
 297   return (start < 0 ? 0 : start);
 298 }
 299
 300 static ssize_t str_find_line_end( const char* s, ssize_t len, ssize_t pos) {
 301   ssize_t end = str_find_forward(s,len,pos, &char_is_linefeed, false);
 302   return (end < 0 ? len : end);
 303 }
 304
 305 static ssize_t str_find_word_start( const char* s, ssize_t len, ssize_t pos) {
 306   ssize_t start = str_find_backward(s,len,pos, &ic_char_is_idletter,true /* skip immediate matches */);
 307   return (start < 0 ? 0 : start);
 308 }
 309
 310 static ssize_t str_find_word_end( const char* s, ssize_t len, ssize_t pos) {
 311   ssize_t end = str_find_forward(s,len,pos,&ic_char_is_idletter,true /* skip immediate matches */);
 312   return (end < 0 ? len : end);
 313 }
 314
 315 static ssize_t str_find_ws_word_start( const char* s, ssize_t len, ssize_t pos) {
 316   ssize_t start = str_find_backward(s,len,pos,&ic_char_is_white,true /* skip immediate matches */);
 317   return (start < 0 ? 0 : start);
 318 }
 319
 320 static ssize_t str_find_ws_word_end( const char* s, ssize_t len, ssize_t pos) {
 321   ssize_t end = str_find_forward(s,len,pos,&ic_char_is_white,true /* skip immediate matches */);
 322   return (end < 0 ? len : end);
 323 }
 324
 325
 326 //-------------------------------------------------------------
 327 // String row/column iteration
 328 //-------------------------------------------------------------
 329
 330 // invoke a function for each terminal row; returns total row count.
 331 static ssize_t str_for_each_row( const char* s, ssize_t len, ssize_t termw, ssize_t promptw, ssize_t cpromptw,
 332                                  row_fun_t* fun, const void* arg, void* res )
 333 {
 334   if (s == NULL) { s = ""; }
 335   ssize_t i;
 336   ssize_t rcount = 0;
 337   ssize_t rcol = 0;
 338   ssize_t rstart = 0;
 339   ssize_t startw  = promptw;
 340   for(i = 0; i < len; ) {
 341     ssize_t w;
 342     ssize_t next = str_next_ofs(s, len, i, &w);
 343     if (next <= 0) {
 344       debug_msg("str: foreach row: next<=0: len %" PRIz "d, i %" PRIz "d, w %" PRIz "d, buf %s\n", len, i, w, s );
 345       assert(false);
 346       break;
 347     }
 348     startw = (rcount == 0 ? promptw : cpromptw);
 349     ssize_t termcol = rcol + w + startw + 1 /* for the cursor */;
 350     if (termw != 0 && i != 0 && termcol >= termw) {
 351       // wrap
 352       if (fun != NULL) {
 353         if (fun(s,rcount,rstart,i - rstart,startw,true,arg,res)) { return rcount; }
 354       }
 355       rcount++;
 356       rstart = i;
 357       rcol   = 0;
 358     }
 359     if (s[i] == '\n') {
 360       // newline
 361       if (fun != NULL) {
 362         if (fun(s,rcount,rstart,i - rstart,startw,false,arg,res)) { return rcount; }
 363       }
 364       rcount++;
 365       rstart = i+1;
 366       rcol = 0;
 367     }
 368     assert (s[i] != 0);
 369     i += next;
 370     rcol += w;
 371   }
 372   if (fun != NULL) {
 373     if (fun(s,rcount,rstart,i - rstart,startw,false,arg,res)) { return rcount; }
 374   }
 375   return rcount+1;
 376 }
 377
 378 //-------------------------------------------------------------
 379 // String: get row/column position
 380 //-------------------------------------------------------------
 381
 382
 383 static bool str_get_current_pos_iter(
 384     const char* s,
 385     ssize_t row, ssize_t row_start, ssize_t row_len,
 386     ssize_t startw, bool is_wrap, const void* arg, void* res)
 387 {
 388   ic_unused(is_wrap); ic_unused(startw);
 389   rowcol_t* rc = (rowcol_t*)res;
 390   ssize_t pos = *((ssize_t*)arg);
 391
 392   if (pos >= row_start && pos <= (row_start + row_len)) {
 393     // found the cursor row
 394     rc->row_start = row_start;
 395     rc->row_len   = row_len;
 396     rc->row = row;
 397     rc->col = str_column_width_n( s + row_start, pos - row_start );
 398     rc->first_on_row = (pos == row_start);
 399     if (is_wrap) {
 400       // if wrapped, we check if the next character is at row_len
 401       ssize_t next = str_next_ofs(s, row_start + row_len, pos, NULL);
 402       rc->last_on_row = (pos + next >= row_start + row_len);
 403     }
 404     else {
 405       // normal last position is right after the last character
 406       rc->last_on_row = (pos >= row_start + row_len);
 407     }
 408     // debug_msg("edit; pos iter: pos: %" PRIz "d (%c), row_start: %" PRIz "d, rowlen: %" PRIz "d\n", pos, s[pos], row_start, row_len);
 409   }
 410   return false; // always continue to count all rows
 411 }
 412
 413 static ssize_t str_get_rc_at_pos(const char* s, ssize_t len, ssize_t termw, ssize_t promptw, ssize_t cpromptw, ssize_t pos, rowcol_t* rc) {
 414   memset(rc, 0, sizeof(*rc));
 415   ssize_t rows = str_for_each_row(s, len, termw, promptw, cpromptw, &str_get_current_pos_iter, &pos, rc);
 416   // debug_msg("edit: current pos: (%d, %d) %s %s\n", rc->row, rc->col, rc->first_on_row ? "first" : "", rc->last_on_row ? "last" : "");
 417   return rows;
 418 }
 419
 420
 421
 422 //-------------------------------------------------------------
 423 // String: get row/column position for a resized terminal
 424 // with potentially "hard-wrapped" rows
 425 //-------------------------------------------------------------
 426 typedef struct wrapped_arg_s {
 427   ssize_t  pos;
 428   ssize_t  newtermw;
 429 } wrapped_arg_t;
 430
 431 typedef struct wrowcol_s {
 432   rowcol_t rc;
 433   ssize_t  hrows;  // count of hard-wrapped extra rows
 434 } wrowcol_t;
 435
 436 static bool str_get_current_wrapped_pos_iter(
 437     const char* s,
 438     ssize_t row, ssize_t row_start, ssize_t row_len,
 439     ssize_t startw, bool is_wrap, const void* arg, void* res)
 440 {
 441   ic_unused(is_wrap);
 442   wrowcol_t*     wrc = (wrowcol_t*)res;
 443   const wrapped_arg_t* warg = (const wrapped_arg_t*)arg;
 444
 445   // iterate through the row and record the postion and hard-wraps
 446   ssize_t hwidth = startw;
 447   ssize_t i = 0;
 448   while( i <= row_len ) {  // include rowlen as the cursor position can be just after the last character
 449     // get next position and column width
 450     ssize_t cw;
 451     ssize_t next;
 452     bool is_cursor = (warg->pos == row_start+i);
 453     if (i < row_len) {
 454       next = str_next_ofs(s + row_start, row_len, i, &cw);
 455     }
 456     else {
 457       // end of row: take wrap or cursor into account
 458       // (wrap has width 2 as it displays a back-arrow but also has an invisible newline that wraps)
 459       cw = (is_wrap ? 2 : (is_cursor ? 1 : 0));
 460       next = 1;
 461     }
 462
 463     if (next > 0) {
 464       if (hwidth + cw > warg->newtermw) {
 465         // hardwrap
 466         hwidth = 0;
 467         wrc->hrows++;
 468         debug_msg("str: found hardwrap: row: %" PRIz "d, hrows: %" PRIz "d\n", row, wrc->hrows);
 469       }
 470     }
 471     else {
 472       next++; // ensure we terminate (as we go up to rowlen)
 473     }
 474
 475     // did we find our position?
 476     if (is_cursor) {
 477       debug_msg("str: found position: row: %" PRIz "d, hrows: %" PRIz "d\n", row, wrc->hrows);
 478       wrc->rc.row_start = row_start;
 479       wrc->rc.row_len   = row_len;
 480       wrc->rc.row       = wrc->hrows + row;
 481       wrc->rc.col       = hwidth;
 482       wrc->rc.first_on_row = (i == 0);
 483       wrc->rc.last_on_row  = (i+next >= row_len - (is_wrap ? 1 : 0));
 484     }
 485
 486     // advance
 487     hwidth += cw;
 488     i += next;
 489   }
 490   return false; // always continue to count all rows
 491 }
 492
 493
 494 static ssize_t str_get_wrapped_rc_at_pos(const char* s, ssize_t len, ssize_t termw, ssize_t newtermw, ssize_t promptw, ssize_t cpromptw, ssize_t pos, rowcol_t* rc) {
 495   wrapped_arg_t warg;
 496   warg.pos = pos;
 497   warg.newtermw = newtermw;
 498   wrowcol_t wrc;
 499   memset(&wrc,0,sizeof(wrc));
 500   ssize_t rows = str_for_each_row(s, len, termw, promptw, cpromptw, &str_get_current_wrapped_pos_iter, &warg, &wrc);
 501   debug_msg("edit: wrapped pos: (%" PRIz "d,%" PRIz "d) rows %" PRIz "d %s %s, hrows: %" PRIz "d\n", wrc.rc.row, wrc.rc.col, rows, wrc.rc.first_on_row ? "first" : "", wrc.rc.last_on_row ? "last" : "", wrc.hrows);
 502   *rc = wrc.rc;
 503   return (rows + wrc.hrows);
 504 }
 505
 506
 507 //-------------------------------------------------------------
 508 // Set position
 509 //-------------------------------------------------------------
 510
 511 static bool str_set_pos_iter(
 512     const char* s,
 513     ssize_t row, ssize_t row_start, ssize_t row_len,
 514     ssize_t startw, bool is_wrap, const void* arg, void* res)
 515 {
 516   ic_unused(arg); ic_unused(is_wrap); ic_unused(startw);
 517   rowcol_t* rc = (rowcol_t*)arg;
 518   if (rc->row != row) { return false; } // keep searching
 519   // we found our row
 520   ssize_t col = 0;
 521   ssize_t i   = row_start;
 522   ssize_t end = row_start + row_len;
 523   while (col < rc->col && i < end) {
 524     ssize_t cw;
 525     ssize_t next = str_next_ofs(s, row_start + row_len, i, &cw);
 526     if (next <= 0) { break; }
 527     i   += next;
 528     col += cw;
 529   }
 530   *((ssize_t*)res) = i;
 531   return true; // stop iteration
 532 }
 533
 534 static ssize_t str_get_pos_at_rc(const char* s, ssize_t len, ssize_t termw, ssize_t promptw, ssize_t cpromptw, ssize_t row, ssize_t col /* without prompt */) {
 535   rowcol_t rc;
 536   memset(&rc,0,ssizeof(rc));
 537   rc.row = row;
 538   rc.col = col;
 539   ssize_t pos = -1;
 540   str_for_each_row(s,len,termw,promptw,cpromptw,&str_set_pos_iter,&rc,&pos);
 541   return pos;
 542 }
 543
 544
 545 //-------------------------------------------------------------
 546 // String buffer
 547 //-------------------------------------------------------------
 548 static bool sbuf_ensure_extra(stringbuf_t* s, ssize_t extra)
 549 {
 550   if (s->buflen >= s->count + extra) { return true; }
 551   // reallocate; pick good initial size and multiples to increase reuse on allocation
 552   ssize_t newlen = (s->buflen <= 0 ? 120 : (s->buflen > 1000 ? s->buflen + 1000 : 2*s->buflen));
 553   if (newlen < s->count + extra) { newlen = s->count + extra; }
 554   if (s->buflen > 0) {
 555     debug_msg("stringbuf: reallocate: old %" PRIz "d, new %" PRIz "d\n", s->buflen, newlen);
 556   }
 557   char* newbuf = mem_realloc_tp(s->mem, char, s->buf, newlen+1); // one more for terminating zero
 558   if (newbuf == NULL) {
 559     assert(false);
 560     return false;
 561   }
 562   s->buf = newbuf;
 563   s->buflen = newlen;
 564   s->buf[s->count] = s->buf[s->buflen] = 0;
 565   assert(s->buflen >= s->count + extra);
 566   return true;
 567 }
 568
 569 static void sbuf_init( stringbuf_t* sbuf, alloc_t* mem ) {
 570   sbuf->mem = mem;
 571   sbuf->buf = NULL;
 572   sbuf->buflen = 0;
 573   sbuf->count = 0;
 574 }
 575
 576 static void sbuf_done( stringbuf_t* sbuf ) {
 577   mem_free( sbuf->mem, sbuf->buf );
 578   sbuf->buf = NULL;
 579   sbuf->buflen = 0;
 580   sbuf->count = 0;
 581 }
 582
 583
 584 ic_private void sbuf_free( stringbuf_t* sbuf ) {
 585   if (sbuf == NULL) { return; }
 586   sbuf_done(sbuf);
 587   mem_free(sbuf->mem, sbuf);
 588 }
 589
 590 ic_private stringbuf_t*  sbuf_new( alloc_t* mem ) {
 591   stringbuf_t* sbuf = mem_zalloc_tp(mem,stringbuf_t);
 592   if (sbuf == NULL) { return NULL; }
 593   sbuf_init(sbuf,mem);
 594   return sbuf;
 595 }
 596
 597 // free the sbuf and return the current string buffer as the result
 598 ic_private char* sbuf_free_dup(stringbuf_t* sbuf) {
 599   if (sbuf == NULL) { return NULL; }
 600   char* s = NULL;
 601   if (sbuf->buf != NULL) {
 602     s = mem_realloc_tp(sbuf->mem, char, sbuf->buf, sbuf_len(sbuf)+1);
 603     if (s == NULL) { s = sbuf->buf; }
 604     sbuf->buf = 0;
 605     sbuf->buflen = 0;
 606     sbuf->count = 0;
 607   }
 608   sbuf_free(sbuf);
 609   return s;
 610 }
 611
 612 ic_private const char* sbuf_string_at( stringbuf_t* sbuf, ssize_t pos ) {
 613   if (pos < 0 || sbuf->count < pos) { return NULL; }
 614   if (sbuf->buf == NULL) { return ""; }
 615   assert(sbuf->buf[sbuf->count] == 0);
 616   return sbuf->buf + pos;
 617 }
 618
 619 ic_private const char* sbuf_string( stringbuf_t* sbuf ) {
 620   return sbuf_string_at( sbuf, 0 );
 621 }
 622
 623 ic_private char sbuf_char_at(stringbuf_t* sbuf, ssize_t pos) {
 624   if (sbuf->buf == NULL || pos < 0 || sbuf->count < pos) { return 0; }
 625   return sbuf->buf[pos];
 626 }
 627
 628 ic_private char* sbuf_strdup_at( stringbuf_t* sbuf, ssize_t pos ) {
 629   return mem_strdup(sbuf->mem, sbuf_string_at(sbuf,pos));
 630 }
 631
 632 ic_private char* sbuf_strdup( stringbuf_t* sbuf ) {
 633   return mem_strdup(sbuf->mem, sbuf_string(sbuf));
 634 }
 635
 636 ic_private ssize_t sbuf_len(const stringbuf_t* s) {
 637   if (s == NULL) { return 0; }
 638   return s->count;
 639 }
 640
 641 ic_private ssize_t sbuf_append_vprintf(stringbuf_t* sb, const char* fmt, va_list args) {
 642   const ssize_t min_needed = ic_strlen(fmt);
 643   if (!sbuf_ensure_extra(sb,min_needed + 16)) { return sb->count; }
 644   ssize_t avail = sb->buflen - sb->count;
 645   va_list args0;
 646   va_copy(args0, args);
 647   ssize_t needed = vsnprintf(sb->buf + sb->count, to_size_t(avail), fmt, args0);
 648   if (needed > avail) {
 649     sb->buf[sb->count] = 0;
 650     if (!sbuf_ensure_extra(sb, needed)) { return sb->count; }
 651     avail = sb->buflen - sb->count;
 652     needed = vsnprintf(sb->buf + sb->count, to_size_t(avail), fmt, args);
 653   }
 654   assert(needed <= avail);
 655   sb->count += (needed > avail ? avail : (needed >= 0 ? needed : 0));
 656   assert(sb->count <= sb->buflen);
 657   sb->buf[sb->count] = 0;
 658   return sb->count;
 659 }
 660
 661 ic_private ssize_t sbuf_appendf(stringbuf_t* sb, const char* fmt, ...) {
 662   va_list args;
 663   va_start( args, fmt);
 664   ssize_t res = sbuf_append_vprintf( sb, fmt, args );
 665   va_end(args);
 666   return res;
 667 }
 668
 669
 670 ic_private ssize_t sbuf_insert_at_n(stringbuf_t* sbuf, const char* s, ssize_t n, ssize_t pos ) {
 671   if (pos < 0 || pos > sbuf->count || s == NULL) { return pos; }
 672   n = str_limit_to_length(s,n);
 673   if (n <= 0 || !sbuf_ensure_extra(sbuf,n)) { return pos; }
 674   ic_memmove(sbuf->buf + pos + n, sbuf->buf + pos, sbuf->count - pos);
 675   ic_memcpy(sbuf->buf + pos, s, n);
 676   sbuf->count += n;
 677   sbuf->buf[sbuf->count] = 0;
 678   return (pos + n);
 679 }
 680
 681 ic_private stringbuf_t* sbuf_split_at( stringbuf_t* sb, ssize_t pos ) {
 682   stringbuf_t* res = sbuf_new(sb->mem);
 683   if (res == NULL || pos < 0) { return NULL; }
 684   if (pos < sb->count) {
 685     sbuf_append_n(res, sb->buf + pos, sb->count - pos);
 686     sb->count = pos;
 687   }
 688   return res;
 689 }
 690
 691 ic_private ssize_t sbuf_insert_at(stringbuf_t* sbuf, const char* s, ssize_t pos ) {
 692   return sbuf_insert_at_n( sbuf, s, ic_strlen(s), pos );
 693 }
 694
 695 ic_private ssize_t sbuf_insert_char_at(stringbuf_t* sbuf, char c, ssize_t pos ) {
 696   char s[2];
 697   s[0] = c;
 698   s[1] = 0;
 699   return sbuf_insert_at_n( sbuf, s, 1, pos);
 700 }
 701
 702 ic_private ssize_t sbuf_insert_unicode_at(stringbuf_t* sbuf, unicode_t u, ssize_t pos) {
 703   uint8_t s[5];
 704   unicode_to_qutf8(u, s);
 705   return sbuf_insert_at(sbuf, (const char*)s, pos);
 706 }
 707
 708
 709
 710 ic_private void sbuf_delete_at( stringbuf_t* sbuf, ssize_t pos, ssize_t count ) {
 711   if (pos < 0 || pos >= sbuf->count) { return; }
 712   if (pos + count > sbuf->count) { count = sbuf->count - pos; }
 713   ic_memmove(sbuf->buf + pos, sbuf->buf + pos + count, sbuf->count - pos - count);
 714   sbuf->count -= count;
 715   sbuf->buf[sbuf->count] = 0;
 716 }
 717
 718 ic_private void sbuf_delete_from_to( stringbuf_t* sbuf, ssize_t pos, ssize_t end ) {
 719   if (end <= pos) { return; }
 720   sbuf_delete_at( sbuf, pos, end - pos);
 721 }
 722
 723 ic_private void  sbuf_delete_from(stringbuf_t* sbuf, ssize_t pos ) {
 724   sbuf_delete_at(sbuf, pos, sbuf_len(sbuf) - pos );
 725 }
 726
 727
 728 ic_private void sbuf_clear( stringbuf_t* sbuf ) {
 729   sbuf_delete_at(sbuf, 0, sbuf_len(sbuf));
 730 }
 731
 732 ic_private ssize_t sbuf_append_n( stringbuf_t* sbuf, const char* s, ssize_t n ) {
 733   return sbuf_insert_at_n( sbuf, s, n, sbuf_len(sbuf));
 734 }
 735
 736 ic_private ssize_t sbuf_append( stringbuf_t* sbuf, const char* s ) {
 737   return sbuf_insert_at( sbuf, s, sbuf_len(sbuf));
 738 }
 739
 740 ic_private ssize_t sbuf_append_char( stringbuf_t* sbuf, char c ) {
 741   char buf[2];
 742   buf[0] = c;
 743   buf[1] = 0;
 744   return sbuf_append( sbuf, buf );
 745 }
 746
 747 ic_private void sbuf_replace(stringbuf_t* sbuf, const char* s) {
 748   sbuf_clear(sbuf);
 749   sbuf_append(sbuf,s);
 750 }
 751
 752 ic_private ssize_t sbuf_next_ofs( stringbuf_t* sbuf, ssize_t pos, ssize_t* cwidth ) {
 753   return str_next_ofs( sbuf->buf, sbuf->count, pos, cwidth);
 754 }
 755
 756 ic_private ssize_t sbuf_prev_ofs( stringbuf_t* sbuf, ssize_t pos, ssize_t* cwidth ) {
 757   return str_prev_ofs( sbuf->buf, pos, cwidth);
 758 }
 759
 760 ic_private ssize_t sbuf_next( stringbuf_t* sbuf, ssize_t pos, ssize_t* cwidth) {
 761   ssize_t ofs = sbuf_next_ofs(sbuf,pos,cwidth);
 762   if (ofs <= 0) { return -1; }
 763   assert(pos + ofs <= sbuf->count);
 764   return pos + ofs;
 765 }
 766
 767 ic_private ssize_t sbuf_prev( stringbuf_t* sbuf, ssize_t pos, ssize_t* cwidth) {
 768   ssize_t ofs = sbuf_prev_ofs(sbuf,pos,cwidth);
 769   if (ofs <= 0) { return -1; }
 770   assert(pos - ofs >= 0);
 771   return pos - ofs;
 772 }
 773
 774 ic_private ssize_t sbuf_delete_char_before( stringbuf_t* sbuf, ssize_t pos ) {
 775   ssize_t n = sbuf_prev_ofs(sbuf, pos, NULL);
 776   if (n <= 0) { return 0; }
 777   assert( pos - n >= 0 );
 778   sbuf_delete_at(sbuf, pos - n, n);
 779   return pos - n;
 780 }
 781
 782 ic_private void sbuf_delete_char_at( stringbuf_t* sbuf, ssize_t pos ) {
 783   ssize_t n = sbuf_next_ofs(sbuf, pos, NULL);
 784   if (n <= 0) { return; }
 785   assert( pos + n <= sbuf->count );
 786   sbuf_delete_at(sbuf, pos, n);
 787   return;
 788 }
 789
 790 ic_private ssize_t sbuf_swap_char( stringbuf_t* sbuf, ssize_t pos ) {
 791   ssize_t next = sbuf_next_ofs(sbuf, pos, NULL);
 792   if (next <= 0) { return 0; }
 793   ssize_t prev = sbuf_prev_ofs(sbuf, pos, NULL);
 794   if (prev <= 0) { return 0; }
 795   char buf[64];
 796   if (prev >= 63) { return 0; }
 797   ic_memcpy(buf, sbuf->buf + pos - prev, prev );
 798   ic_memmove(sbuf->buf + pos - prev, sbuf->buf + pos, next);
 799   ic_memmove(sbuf->buf + pos - prev + next, buf, prev);
 800   return pos - prev;
 801 }
 802
 803 ic_private ssize_t sbuf_find_line_start( stringbuf_t* sbuf, ssize_t pos ) {
 804   return str_find_line_start( sbuf->buf, sbuf->count, pos);
 805 }
 806
 807 ic_private ssize_t sbuf_find_line_end( stringbuf_t* sbuf, ssize_t pos ) {
 808   return str_find_line_end( sbuf->buf, sbuf->count, pos);
 809 }
 810
 811 ic_private ssize_t sbuf_find_word_start( stringbuf_t* sbuf, ssize_t pos ) {
 812   return str_find_word_start( sbuf->buf, sbuf->count, pos);
 813 }
 814
 815 ic_private ssize_t sbuf_find_word_end( stringbuf_t* sbuf, ssize_t pos ) {
 816   return str_find_word_end( sbuf->buf, sbuf->count, pos);
 817 }
 818
 819 ic_private ssize_t sbuf_find_ws_word_start( stringbuf_t* sbuf, ssize_t pos ) {
 820   return str_find_ws_word_start( sbuf->buf, sbuf->count, pos);
 821 }
 822
 823 ic_private ssize_t sbuf_find_ws_word_end( stringbuf_t* sbuf, ssize_t pos ) {
 824   return str_find_ws_word_end( sbuf->buf, sbuf->count, pos);
 825 }
 826
 827 // find row/col position
 828 ic_private ssize_t sbuf_get_pos_at_rc( stringbuf_t* sbuf, ssize_t termw, ssize_t promptw, ssize_t cpromptw, ssize_t row, ssize_t col ) {
 829   return str_get_pos_at_rc( sbuf->buf, sbuf->count, termw, promptw, cpromptw, row, col);
 830 }
 831
 832 // get row/col for a given position
 833 ic_private ssize_t sbuf_get_rc_at_pos( stringbuf_t* sbuf, ssize_t termw, ssize_t promptw, ssize_t cpromptw, ssize_t pos, rowcol_t* rc ) {
 834   return str_get_rc_at_pos( sbuf->buf, sbuf->count, termw, promptw, cpromptw, pos, rc);
 835 }
 836
 837 ic_private ssize_t sbuf_get_wrapped_rc_at_pos( stringbuf_t* sbuf, ssize_t termw, ssize_t newtermw, ssize_t promptw, ssize_t cpromptw, ssize_t pos, rowcol_t* rc ) {
 838   return str_get_wrapped_rc_at_pos( sbuf->buf, sbuf->count, termw, newtermw, promptw, cpromptw, pos, rc);
 839 }
 840
 841 ic_private ssize_t sbuf_for_each_row( stringbuf_t* sbuf, ssize_t termw, ssize_t promptw, ssize_t cpromptw, row_fun_t* fun, void* arg, void* res ) {
 842   if (sbuf == NULL) { return 0; }
 843   return str_for_each_row( sbuf->buf, sbuf->count, termw, promptw, cpromptw, fun, arg, res);
 844 }
 845
 846
 847 // Duplicate and decode from utf-8 (for non-utf8 terminals)
 848 ic_private char* sbuf_strdup_from_utf8(stringbuf_t* sbuf) {
 849   ssize_t len = sbuf_len(sbuf);
 850   if (sbuf == NULL || len <= 0) { return NULL; }
 851   char* s = mem_zalloc_tp_n(sbuf->mem, char, len);
 852   if (s == NULL) { return NULL; }
 853   ssize_t dest = 0;
 854   for (ssize_t i = 0; i < len; ) {
 855     ssize_t ofs = sbuf_next_ofs(sbuf, i, NULL);
 856     if (ofs <= 0) {
 857       // invalid input
 858       break;
 859     }
 860     else if (ofs == 1) {
 861       // regular character
 862       s[dest++] = sbuf->buf[i];
 863     }
 864     else if (sbuf->buf[i] == '\x1B') {
 865       // skip escape sequences
 866     }
 867     else {
 868       // decode unicode
 869       ssize_t nread;
 870       unicode_t uchr = unicode_from_qutf8( (const uint8_t*)(sbuf->buf + i), ofs, &nread);
 871       uint8_t c;
 872       if (unicode_is_raw(uchr, &c)) {
 873         // raw byte, output as is (this will take care of locale specific input)
 874         s[dest++] = (char)c;
 875       }
 876       else if (uchr <= 0x7F) {
 877         // allow ascii
 878         s[dest++] = (char)uchr;
 879       }
 880       else {
 881         // skip unknown unicode characters..
 882         // todo: convert according to locale?
 883       }
 884     }
 885     i += ofs;
 886   }
 887   assert(dest <= len);
 888   s[dest] = 0;
 889   return s;
 890 }
 891
 892 //-------------------------------------------------------------
 893 // String helpers
 894 //-------------------------------------------------------------
 895
 896 ic_public long ic_prev_char( const char* s, long pos ) {
 897   ssize_t len = ic_strlen(s);
 898   if (pos < 0 || pos > len) { return -1; }
 899   ssize_t ofs = str_prev_ofs( s, pos, NULL );
 900   if (ofs <= 0) { return -1; }
 901   return (long)(pos - ofs);
 902 }
 903
 904 ic_public long ic_next_char( const char* s, long pos ) {
 905   ssize_t len = ic_strlen(s);
 906   if (pos < 0 || pos > len) { return -1; }
 907   ssize_t ofs = str_next_ofs( s, len, pos, NULL );
 908   if (ofs <= 0) { return -1; }
 909   return (long)(pos + ofs);
 910 }
 911
 912
 913 // parse a decimal (leave pi unchanged on error)
 914 ic_private bool ic_atoz(const char* s, ssize_t* pi) {
 915   return (sscanf(s, "%" PRIz "d", pi) == 1);
 916 }
 917
 918 // parse two decimals separated by a semicolon
 919 ic_private bool ic_atoz2(const char* s, ssize_t* pi, ssize_t* pj) {
 920   return (sscanf(s, "%" PRIz "d;%" PRIz "d", pi, pj) == 2);
 921 }
 922
 923 // parse unsigned 32-bit (leave pu unchanged on error)
 924 ic_private bool ic_atou32(const char* s, uint32_t* pu) {
 925   return (sscanf(s, "%" SCNu32, pu) == 1);
 926 }
 927
 928
 929 // Convenience: character class for whitespace `[ \t\r\n]`.
 930 ic_public bool ic_char_is_white(const char* s, long len) {
 931   if (s == NULL || len != 1) { return false; }
 932   const char c = *s;
 933   return (c == ' ' || c == '\t' || c == '\n' || c == '\r');
 934 }
 935
 936 // Convenience: character class for non-whitespace `[^ \t\r\n]`.
 937 ic_public bool ic_char_is_nonwhite(const char* s, long len) {
 938   return !ic_char_is_white(s, len);
 939 }
 940
 941 // Convenience: character class for separators `[ \t\r\n,.;:/\\\(\)\{\}\[\]]`.
 942 ic_public bool ic_char_is_separator(const char* s, long len) {
 943   if (s == NULL || len != 1) { return false; }
 944   const char c = *s;
 945   return (strchr(" \t\r\n,.;:/\\(){}[]", c) != NULL);
 946 }
 947
 948 // Convenience: character class for non-separators.
 949 ic_public bool ic_char_is_nonseparator(const char* s, long len) {
 950   return !ic_char_is_separator(s, len);
 951 }
 952
 953
 954 // Convenience: character class for digits (`[0-9]`).
 955 ic_public bool ic_char_is_digit(const char* s, long len) {
 956   if (s == NULL || len != 1) { return false; }
 957   const char c = *s;
 958   return (c >= '0' && c <= '9');
 959 }
 960
 961 // Convenience: character class for hexadecimal digits (`[A-Fa-f0-9]`).
 962 ic_public bool ic_char_is_hexdigit(const char* s, long len) {
 963   if (s == NULL || len != 1) { return false; }
 964   const char c = *s;
 965   return ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
 966 }
 967
 968 // Convenience: character class for letters (`[A-Za-z]` and any unicode > 0x80).
 969 ic_public bool ic_char_is_letter(const char* s, long len) {
 970   if (s == NULL || len <= 0) { return false; }
 971   const char c = *s;
 972   return ((uint8_t)c >= 0x80 || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'));
 973 }
 974
 975 // Convenience: character class for identifier letters (`[A-Za-z0-9_-]` and any unicode > 0x80).
 976 ic_public bool ic_char_is_idletter(const char* s, long len) {
 977   if (s == NULL || len <= 0) { return false; }
 978   const char c = *s;
 979   return ((uint8_t)c >= 0x80 || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || (c == '_') || (c == '-'));
 980 }
 981
 982 // Convenience: character class for filename letters (`[^ \t\r\n`@$><=;|&{(]`).
 983 ic_public bool ic_char_is_filename_letter(const char* s, long len) {
 984   if (s == NULL || len <= 0) { return false; }
 985   const char c = *s;
 986   return ((uint8_t)c >= 0x80 || (strchr(" \t\r\n`@$><=;|&{}()[]", c) == NULL));
 987 }
 988
 989 // Convenience: If this is a token start, returns the length (or <= 0 if not found).
 990 ic_public long ic_is_token(const char* s, long pos, ic_is_char_class_fun_t* is_token_char) {
 991   if (s == NULL || pos < 0 || is_token_char == NULL) { return -1; }
 992   ssize_t len = ic_strlen(s);
 993   if (pos >= len) { return -1; }
 994   if (pos > 0 && is_token_char(s + pos -1, 1)) { return -1; } // token start?
 995   ssize_t i = pos;
 996   while ( i < len ) {
 997     ssize_t next = str_next_ofs(s, len, i, NULL);
 998     if (next <= 0) { return -1; }
 999     if (!is_token_char(s + i, (long)next)) { break; }
1000     i += next;
1001   }
1002   return (long)(i - pos);
1003 }
1004
1005
1006 static int ic_strncmp(const char* s1, const char* s2, ssize_t n) {
1007   return strncmp(s1, s2, to_size_t(n));
1008 }
1009
1010 // Convenience: Does this match the specified token?
1011 // Ensures not to match prefixes or suffixes, and returns the length of the match (in bytes).
1012 // E.g. `ic_match_token("function",0,&ic_char_is_letter,"fun")` returns 0.
1013 ic_public long ic_match_token(const char* s, long pos, ic_is_char_class_fun_t* is_token_char, const char* token) {
1014   long n = ic_is_token(s, pos, is_token_char);
1015   if (n > 0 && token != NULL && n == ic_strlen(token) && ic_strncmp(s + pos, token, n) == 0) {
1016     return n;
1017   }
1018   else {
1019     return 0;
1020   }
1021 }
1022
1023
1024 // Convenience: Do any of the specified tokens match?
1025 // Ensures not to match prefixes or suffixes, and returns the length of the match (in bytes).
1026 // Ensures not to match prefixes or suffixes.
1027 // E.g. `ic_match_any_token("function",0,&ic_char_is_letter,{"fun","func",NULL})` returns 0.
1028 ic_public long ic_match_any_token(const char* s, long pos, ic_is_char_class_fun_t* is_token_char, const char** tokens) {
1029   long n = ic_is_token(s, pos, is_token_char);
1030   if (n <= 0 || tokens == NULL) { return 0; }
1031   for (const char** token = tokens; *token != NULL; token++) {
1032     if (n == ic_strlen(*token) && ic_strncmp(s + pos, *token, n) == 0) {
1033       return n;
1034     }
1035   }
1036   return 0;
1037 }