bump copyright year to 2025
[liba.git] / lua / isocline / src / stringbuf.c
blobc6ae2e06b259db36ce098c101be5a60c5c718376
1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2021, Daan Leijen
3 This is free software; you can redistribute it and/or modify it
4 under the terms of the MIT License. A copy of the license can be
5 found in the "LICENSE" file at the root of this distribution.
6 -----------------------------------------------------------------------------*/
8 // get `wcwidth` for the column width of unicode characters
9 // note: for now the OS provided one is unused as we see quite a bit of variation
10 // among platforms and including our own seems more reliable.
12 #if defined(__linux__) || defined(__freebsd__)
13 // use the system supplied one
14 #if !defined(_XOPEN_SOURCE)
15 #define _XOPEN_SOURCE 700 // so wcwidth is visible
16 #endif
17 #include <wchar.h>
18 #else
20 // use our own (also on APPLE as that fails within vscode)
21 #define wcwidth(c) mk_wcwidth(c)
22 #include "wcwidth.c"
23 // #endif
25 #include <stdio.h>
26 #include <string.h>
27 #include <inttypes.h>
29 #include "common.h"
30 #include "stringbuf.h"
32 //-------------------------------------------------------------
33 // In place growable utf-8 strings
34 //-------------------------------------------------------------
36 struct stringbuf_s {
37 char* buf;
38 ssize_t buflen;
39 ssize_t count;
40 alloc_t* mem;
44 //-------------------------------------------------------------
45 // String column width
46 //-------------------------------------------------------------
48 // column width of a utf8 single character sequence.
49 static ssize_t utf8_char_width( const char* s, ssize_t n ) {
50 if (n <= 0) { return 0; }
52 uint8_t b = (uint8_t)s[0];
53 int32_t c;
54 if (b < ' ') {
55 return 0;
57 else if (b <= 0x7F) {
58 return 1;
60 else if (b <= 0xC1) { // invalid continuation byte or invalid 0xC0, 0xC1 (check is strictly not necessary as we don't validate..)
61 return 1;
63 else if (b <= 0xDF && n >= 2) { // b >= 0xC2 // 2 bytes
64 c = (((b & 0x1F) << 6) | (s[1] & 0x3F));
65 assert(c < 0xD800 || c > 0xDFFF);
66 int w = wcwidth(c);
67 return w;
69 else if (b <= 0xEF && n >= 3) { // b >= 0xE0 // 3 bytes
70 c = (((b & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F));
71 return wcwidth(c);
73 else if (b <= 0xF4 && n >= 4) { // b >= 0xF0 // 4 bytes
74 c = (((b & 0x07) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F));
75 return wcwidth(c);
77 else {
78 // failed
79 return 1;
84 // The column width of a codepoint (0, 1, or 2)
85 static ssize_t char_column_width( const char* s, ssize_t n ) {
86 if (s == NULL || n <= 0) { return 0; }
87 else if ((uint8_t)(*s) < ' ') { return 0; } // also for CSI escape sequences
88 else {
89 ssize_t w = utf8_char_width(s, n);
90 #ifdef _WIN32
91 return (w <= 0 ? 1 : w); // windows console seems to use at least one column
92 #else
93 return w;
94 #endif
98 static ssize_t str_column_width_n( const char* s, ssize_t len ) {
99 if (s == NULL || len <= 0) { return 0; }
100 ssize_t pos = 0;
101 ssize_t cwidth = 0;
102 ssize_t cw;
103 ssize_t ofs;
104 while (s[pos] != 0 && (ofs = str_next_ofs(s, len, pos, &cw)) > 0) {
105 cwidth += cw;
106 pos += ofs;
108 return cwidth;
111 ic_private ssize_t str_column_width( const char* s ) {
112 return str_column_width_n( s, ic_strlen(s) );
115 ic_private ssize_t str_skip_until_fit( const char* s, ssize_t max_width ) {
116 if (s == NULL) { return 0; }
117 ssize_t cwidth = str_column_width(s);
118 ssize_t len = ic_strlen(s);
119 ssize_t pos = 0;
120 ssize_t next;
121 ssize_t cw;
122 while (cwidth > max_width && (next = str_next_ofs(s, len, pos, &cw)) > 0) {
123 cwidth -= cw;
124 pos += next;
126 return pos;
129 ic_private ssize_t str_take_while_fit( const char* s, ssize_t max_width) {
130 if (s == NULL) { return 0; }
131 const ssize_t len = ic_strlen(s);
132 ssize_t pos = 0;
133 ssize_t next;
134 ssize_t cw;
135 ssize_t cwidth = 0;
136 while ((next = str_next_ofs(s, len, pos, &cw)) > 0) {
137 if (cwidth + cw > max_width) { break; }
138 cwidth += cw;
139 pos += next;
141 return pos;
145 //-------------------------------------------------------------
146 // String navigation
147 //-------------------------------------------------------------
149 // get offset of the previous codepoint. does not skip back over CSI sequences.
150 ic_private ssize_t str_prev_ofs( const char* s, ssize_t pos, ssize_t* width ) {
151 ssize_t ofs = 0;
152 if (s != NULL && pos > 0) {
153 ofs = 1;
154 while (pos > ofs) {
155 uint8_t u = (uint8_t)s[pos - ofs];
156 if (u < 0x80 || u > 0xBF) { break; } // continue while follower
157 ofs++;
160 if (width != NULL) { *width = char_column_width( s+(pos-ofs), ofs ); }
161 return ofs;
164 // skip an escape sequence
165 // <https://www.xfree86.org/current/ctlseqs.html>
166 ic_private bool skip_esc( const char* s, ssize_t len, ssize_t* esclen ) {
167 if (s == NULL || len <= 1 || s[0] != '\x1B') { return false; }
168 if (esclen != NULL) { *esclen = 0; }
169 if (strchr("[PX^_]",s[1]) != NULL) {
170 // CSI (ESC [), DCS (ESC P), SOS (ESC X), PM (ESC ^), APC (ESC _), and OSC (ESC ]): terminated with a special sequence
171 bool finalCSI = (s[1] == '['); // CSI terminates with 0x40-0x7F; otherwise ST (bell or ESC \)
172 ssize_t n = 2;
173 while (len > n) {
174 char c = s[n++];
175 if ((finalCSI && (uint8_t)c >= 0x40 && (uint8_t)c <= 0x7F) || // terminating byte: @A–Z[\]^_`a–z{|}~
176 (!finalCSI && c == '\x07') || // bell
177 (c == '\x02')) // STX terminates as well
179 if (esclen != NULL) { *esclen = n; }
180 return true;
182 else if (!finalCSI && c == '\x1B' && len > n && s[n] == '\\') { // ST (ESC \)
183 n++;
184 if (esclen != NULL) { *esclen = n; }
185 return true;
189 if (strchr(" #%()*+",s[1]) != NULL) {
190 // assume escape sequence of length 3 (like ESC % G)
191 if (esclen != NULL) { *esclen = 2; }
192 return true;
194 else {
195 // assume single character escape code (like ESC 7)
196 if (esclen != NULL) { *esclen = 2; }
197 return true;
199 return false;
202 // Offset to the next codepoint, treats CSI escape sequences as a single code point.
203 ic_private ssize_t str_next_ofs( const char* s, ssize_t len, ssize_t pos, ssize_t* cwidth ) {
204 ssize_t ofs = 0;
205 if (s != NULL && len > pos) {
206 if (skip_esc(s+pos,len-pos,&ofs)) {
207 // skip escape sequence
209 else {
210 ofs = 1;
211 // utf8 extended character?
212 while(len > pos + ofs) {
213 uint8_t u = (uint8_t)s[pos + ofs];
214 if (u < 0x80 || u > 0xBF) { break; } // break if not a follower
215 ofs++;
219 if (cwidth != NULL) { *cwidth = char_column_width( s+pos, ofs ); }
220 return ofs;
223 static ssize_t str_limit_to_length( const char* s, ssize_t n ) {
224 ssize_t i;
225 for(i = 0; i < n && s[i] != 0; i++) { /* nothing */ }
226 return i;
230 //-------------------------------------------------------------
231 // String searching prev/next word, line, ws_word
232 //-------------------------------------------------------------
235 static ssize_t str_find_backward( const char* s, ssize_t len, ssize_t pos, ic_is_char_class_fun_t* match, bool skip_immediate_matches ) {
236 if (pos > len) { pos = len; }
237 if (pos < 0) { pos = 0; }
238 ssize_t i = pos;
239 // skip matching first (say, whitespace in case of the previous start-of-word)
240 if (skip_immediate_matches) {
241 do {
242 ssize_t prev = str_prev_ofs(s, i, NULL);
243 if (prev <= 0) { break; }
244 assert(i - prev >= 0);
245 if (!match(s + i - prev, (long)prev)) { break; }
246 i -= prev;
247 } while (i > 0);
249 // find match
250 do {
251 ssize_t prev = str_prev_ofs(s, i, NULL);
252 if (prev <= 0) { break; }
253 assert(i - prev >= 0);
254 if (match(s + i - prev, (long)prev)) {
255 return i; // found;
257 i -= prev;
258 } while (i > 0);
259 return -1; // not found
262 static ssize_t str_find_forward( const char* s, ssize_t len, ssize_t pos, ic_is_char_class_fun_t* match, bool skip_immediate_matches ) {
263 if (s == NULL || len < 0) { return -1; }
264 if (pos > len) { pos = len; }
265 if (pos < 0) { pos = 0; }
266 ssize_t i = pos;
267 ssize_t next;
268 // skip matching first (say, whitespace in case of the next end-of-word)
269 if (skip_immediate_matches) {
270 do {
271 next = str_next_ofs(s, len, i, NULL);
272 if (next <= 0) { break; }
273 assert( i + next <= len);
274 if (!match(s + i, (long)next)) { break; }
275 i += next;
276 } while (i < len);
278 // and then look
279 do {
280 next = str_next_ofs(s, len, i, NULL);
281 if (next <= 0) { break; }
282 assert( i + next <= len);
283 if (match(s + i, (long)next)) {
284 return i; // found
286 i += next;
287 } while (i < len);
288 return -1;
291 static bool char_is_linefeed( const char* s, long n ) {
292 return (n == 1 && (*s == '\n' || *s == 0));
295 static ssize_t str_find_line_start( const char* s, ssize_t len, ssize_t pos) {
296 ssize_t start = str_find_backward(s,len,pos,&char_is_linefeed,false /* don't skip immediate matches */);
297 return (start < 0 ? 0 : start);
300 static ssize_t str_find_line_end( const char* s, ssize_t len, ssize_t pos) {
301 ssize_t end = str_find_forward(s,len,pos, &char_is_linefeed, false);
302 return (end < 0 ? len : end);
305 static ssize_t str_find_word_start( const char* s, ssize_t len, ssize_t pos) {
306 ssize_t start = str_find_backward(s,len,pos, &ic_char_is_idletter,true /* skip immediate matches */);
307 return (start < 0 ? 0 : start);
310 static ssize_t str_find_word_end( const char* s, ssize_t len, ssize_t pos) {
311 ssize_t end = str_find_forward(s,len,pos,&ic_char_is_idletter,true /* skip immediate matches */);
312 return (end < 0 ? len : end);
315 static ssize_t str_find_ws_word_start( const char* s, ssize_t len, ssize_t pos) {
316 ssize_t start = str_find_backward(s,len,pos,&ic_char_is_white,true /* skip immediate matches */);
317 return (start < 0 ? 0 : start);
320 static ssize_t str_find_ws_word_end( const char* s, ssize_t len, ssize_t pos) {
321 ssize_t end = str_find_forward(s,len,pos,&ic_char_is_white,true /* skip immediate matches */);
322 return (end < 0 ? len : end);
326 //-------------------------------------------------------------
327 // String row/column iteration
328 //-------------------------------------------------------------
330 // invoke a function for each terminal row; returns total row count.
331 static ssize_t str_for_each_row( const char* s, ssize_t len, ssize_t termw, ssize_t promptw, ssize_t cpromptw,
332 row_fun_t* fun, const void* arg, void* res )
334 if (s == NULL) { s = ""; }
335 ssize_t i;
336 ssize_t rcount = 0;
337 ssize_t rcol = 0;
338 ssize_t rstart = 0;
339 ssize_t startw = promptw;
340 for(i = 0; i < len; ) {
341 ssize_t w;
342 ssize_t next = str_next_ofs(s, len, i, &w);
343 if (next <= 0) {
344 debug_msg("str: foreach row: next<=0: len %" PRIz "d, i %" PRIz "d, w %" PRIz "d, buf %s\n", len, i, w, s );
345 assert(false);
346 break;
348 startw = (rcount == 0 ? promptw : cpromptw);
349 ssize_t termcol = rcol + w + startw + 1 /* for the cursor */;
350 if (termw != 0 && i != 0 && termcol >= termw) {
351 // wrap
352 if (fun != NULL) {
353 if (fun(s,rcount,rstart,i - rstart,startw,true,arg,res)) { return rcount; }
355 rcount++;
356 rstart = i;
357 rcol = 0;
359 if (s[i] == '\n') {
360 // newline
361 if (fun != NULL) {
362 if (fun(s,rcount,rstart,i - rstart,startw,false,arg,res)) { return rcount; }
364 rcount++;
365 rstart = i+1;
366 rcol = 0;
368 assert (s[i] != 0);
369 i += next;
370 rcol += w;
372 if (fun != NULL) {
373 if (fun(s,rcount,rstart,i - rstart,startw,false,arg,res)) { return rcount; }
375 return rcount+1;
378 //-------------------------------------------------------------
379 // String: get row/column position
380 //-------------------------------------------------------------
383 static bool str_get_current_pos_iter(
384 const char* s,
385 ssize_t row, ssize_t row_start, ssize_t row_len,
386 ssize_t startw, bool is_wrap, const void* arg, void* res)
388 ic_unused(is_wrap); ic_unused(startw);
389 rowcol_t* rc = (rowcol_t*)res;
390 ssize_t pos = *((ssize_t*)arg);
392 if (pos >= row_start && pos <= (row_start + row_len)) {
393 // found the cursor row
394 rc->row_start = row_start;
395 rc->row_len = row_len;
396 rc->row = row;
397 rc->col = str_column_width_n( s + row_start, pos - row_start );
398 rc->first_on_row = (pos == row_start);
399 if (is_wrap) {
400 // if wrapped, we check if the next character is at row_len
401 ssize_t next = str_next_ofs(s, row_start + row_len, pos, NULL);
402 rc->last_on_row = (pos + next >= row_start + row_len);
404 else {
405 // normal last position is right after the last character
406 rc->last_on_row = (pos >= row_start + row_len);
408 // debug_msg("edit; pos iter: pos: %" PRIz "d (%c), row_start: %" PRIz "d, rowlen: %" PRIz "d\n", pos, s[pos], row_start, row_len);
410 return false; // always continue to count all rows
413 static ssize_t str_get_rc_at_pos(const char* s, ssize_t len, ssize_t termw, ssize_t promptw, ssize_t cpromptw, ssize_t pos, rowcol_t* rc) {
414 memset(rc, 0, sizeof(*rc));
415 ssize_t rows = str_for_each_row(s, len, termw, promptw, cpromptw, &str_get_current_pos_iter, &pos, rc);
416 // debug_msg("edit: current pos: (%d, %d) %s %s\n", rc->row, rc->col, rc->first_on_row ? "first" : "", rc->last_on_row ? "last" : "");
417 return rows;
422 //-------------------------------------------------------------
423 // String: get row/column position for a resized terminal
424 // with potentially "hard-wrapped" rows
425 //-------------------------------------------------------------
426 typedef struct wrapped_arg_s {
427 ssize_t pos;
428 ssize_t newtermw;
429 } wrapped_arg_t;
431 typedef struct wrowcol_s {
432 rowcol_t rc;
433 ssize_t hrows; // count of hard-wrapped extra rows
434 } wrowcol_t;
436 static bool str_get_current_wrapped_pos_iter(
437 const char* s,
438 ssize_t row, ssize_t row_start, ssize_t row_len,
439 ssize_t startw, bool is_wrap, const void* arg, void* res)
441 ic_unused(is_wrap);
442 wrowcol_t* wrc = (wrowcol_t*)res;
443 const wrapped_arg_t* warg = (const wrapped_arg_t*)arg;
445 // iterate through the row and record the postion and hard-wraps
446 ssize_t hwidth = startw;
447 ssize_t i = 0;
448 while( i <= row_len ) { // include rowlen as the cursor position can be just after the last character
449 // get next position and column width
450 ssize_t cw;
451 ssize_t next;
452 bool is_cursor = (warg->pos == row_start+i);
453 if (i < row_len) {
454 next = str_next_ofs(s + row_start, row_len, i, &cw);
456 else {
457 // end of row: take wrap or cursor into account
458 // (wrap has width 2 as it displays a back-arrow but also has an invisible newline that wraps)
459 cw = (is_wrap ? 2 : (is_cursor ? 1 : 0));
460 next = 1;
463 if (next > 0) {
464 if (hwidth + cw > warg->newtermw) {
465 // hardwrap
466 hwidth = 0;
467 wrc->hrows++;
468 debug_msg("str: found hardwrap: row: %" PRIz "d, hrows: %" PRIz "d\n", row, wrc->hrows);
471 else {
472 next++; // ensure we terminate (as we go up to rowlen)
475 // did we find our position?
476 if (is_cursor) {
477 debug_msg("str: found position: row: %" PRIz "d, hrows: %" PRIz "d\n", row, wrc->hrows);
478 wrc->rc.row_start = row_start;
479 wrc->rc.row_len = row_len;
480 wrc->rc.row = wrc->hrows + row;
481 wrc->rc.col = hwidth;
482 wrc->rc.first_on_row = (i == 0);
483 wrc->rc.last_on_row = (i+next >= row_len - (is_wrap ? 1 : 0));
486 // advance
487 hwidth += cw;
488 i += next;
490 return false; // always continue to count all rows
494 static ssize_t str_get_wrapped_rc_at_pos(const char* s, ssize_t len, ssize_t termw, ssize_t newtermw, ssize_t promptw, ssize_t cpromptw, ssize_t pos, rowcol_t* rc) {
495 wrapped_arg_t warg;
496 warg.pos = pos;
497 warg.newtermw = newtermw;
498 wrowcol_t wrc;
499 memset(&wrc,0,sizeof(wrc));
500 ssize_t rows = str_for_each_row(s, len, termw, promptw, cpromptw, &str_get_current_wrapped_pos_iter, &warg, &wrc);
501 debug_msg("edit: wrapped pos: (%" PRIz "d,%" PRIz "d) rows %" PRIz "d %s %s, hrows: %" PRIz "d\n", wrc.rc.row, wrc.rc.col, rows, wrc.rc.first_on_row ? "first" : "", wrc.rc.last_on_row ? "last" : "", wrc.hrows);
502 *rc = wrc.rc;
503 return (rows + wrc.hrows);
507 //-------------------------------------------------------------
508 // Set position
509 //-------------------------------------------------------------
511 static bool str_set_pos_iter(
512 const char* s,
513 ssize_t row, ssize_t row_start, ssize_t row_len,
514 ssize_t startw, bool is_wrap, const void* arg, void* res)
516 ic_unused(arg); ic_unused(is_wrap); ic_unused(startw);
517 rowcol_t* rc = (rowcol_t*)arg;
518 if (rc->row != row) { return false; } // keep searching
519 // we found our row
520 ssize_t col = 0;
521 ssize_t i = row_start;
522 ssize_t end = row_start + row_len;
523 while (col < rc->col && i < end) {
524 ssize_t cw;
525 ssize_t next = str_next_ofs(s, row_start + row_len, i, &cw);
526 if (next <= 0) { break; }
527 i += next;
528 col += cw;
530 *((ssize_t*)res) = i;
531 return true; // stop iteration
534 static ssize_t str_get_pos_at_rc(const char* s, ssize_t len, ssize_t termw, ssize_t promptw, ssize_t cpromptw, ssize_t row, ssize_t col /* without prompt */) {
535 rowcol_t rc;
536 memset(&rc,0,ssizeof(rc));
537 rc.row = row;
538 rc.col = col;
539 ssize_t pos = -1;
540 str_for_each_row(s,len,termw,promptw,cpromptw,&str_set_pos_iter,&rc,&pos);
541 return pos;
545 //-------------------------------------------------------------
546 // String buffer
547 //-------------------------------------------------------------
548 static bool sbuf_ensure_extra(stringbuf_t* s, ssize_t extra)
550 if (s->buflen >= s->count + extra) { return true; }
551 // reallocate; pick good initial size and multiples to increase reuse on allocation
552 ssize_t newlen = (s->buflen <= 0 ? 120 : (s->buflen > 1000 ? s->buflen + 1000 : 2*s->buflen));
553 if (newlen < s->count + extra) { newlen = s->count + extra; }
554 if (s->buflen > 0) {
555 debug_msg("stringbuf: reallocate: old %" PRIz "d, new %" PRIz "d\n", s->buflen, newlen);
557 char* newbuf = mem_realloc_tp(s->mem, char, s->buf, newlen+1); // one more for terminating zero
558 if (newbuf == NULL) {
559 assert(false);
560 return false;
562 s->buf = newbuf;
563 s->buflen = newlen;
564 s->buf[s->count] = s->buf[s->buflen] = 0;
565 assert(s->buflen >= s->count + extra);
566 return true;
569 static void sbuf_init( stringbuf_t* sbuf, alloc_t* mem ) {
570 sbuf->mem = mem;
571 sbuf->buf = NULL;
572 sbuf->buflen = 0;
573 sbuf->count = 0;
576 static void sbuf_done( stringbuf_t* sbuf ) {
577 mem_free( sbuf->mem, sbuf->buf );
578 sbuf->buf = NULL;
579 sbuf->buflen = 0;
580 sbuf->count = 0;
584 ic_private void sbuf_free( stringbuf_t* sbuf ) {
585 if (sbuf == NULL) { return; }
586 sbuf_done(sbuf);
587 mem_free(sbuf->mem, sbuf);
590 ic_private stringbuf_t* sbuf_new( alloc_t* mem ) {
591 stringbuf_t* sbuf = mem_zalloc_tp(mem,stringbuf_t);
592 if (sbuf == NULL) { return NULL; }
593 sbuf_init(sbuf,mem);
594 return sbuf;
597 // free the sbuf and return the current string buffer as the result
598 ic_private char* sbuf_free_dup(stringbuf_t* sbuf) {
599 if (sbuf == NULL) { return NULL; }
600 char* s = NULL;
601 if (sbuf->buf != NULL) {
602 s = mem_realloc_tp(sbuf->mem, char, sbuf->buf, sbuf_len(sbuf)+1);
603 if (s == NULL) { s = sbuf->buf; }
604 sbuf->buf = 0;
605 sbuf->buflen = 0;
606 sbuf->count = 0;
608 sbuf_free(sbuf);
609 return s;
612 ic_private const char* sbuf_string_at( stringbuf_t* sbuf, ssize_t pos ) {
613 if (pos < 0 || sbuf->count < pos) { return NULL; }
614 if (sbuf->buf == NULL) { return ""; }
615 assert(sbuf->buf[sbuf->count] == 0);
616 return sbuf->buf + pos;
619 ic_private const char* sbuf_string( stringbuf_t* sbuf ) {
620 return sbuf_string_at( sbuf, 0 );
623 ic_private char sbuf_char_at(stringbuf_t* sbuf, ssize_t pos) {
624 if (sbuf->buf == NULL || pos < 0 || sbuf->count < pos) { return 0; }
625 return sbuf->buf[pos];
628 ic_private char* sbuf_strdup_at( stringbuf_t* sbuf, ssize_t pos ) {
629 return mem_strdup(sbuf->mem, sbuf_string_at(sbuf,pos));
632 ic_private char* sbuf_strdup( stringbuf_t* sbuf ) {
633 return mem_strdup(sbuf->mem, sbuf_string(sbuf));
636 ic_private ssize_t sbuf_len(const stringbuf_t* s) {
637 if (s == NULL) { return 0; }
638 return s->count;
641 ic_private ssize_t sbuf_append_vprintf(stringbuf_t* sb, const char* fmt, va_list args) {
642 const ssize_t min_needed = ic_strlen(fmt);
643 if (!sbuf_ensure_extra(sb,min_needed + 16)) { return sb->count; }
644 ssize_t avail = sb->buflen - sb->count;
645 va_list args0;
646 va_copy(args0, args);
647 ssize_t needed = vsnprintf(sb->buf + sb->count, to_size_t(avail), fmt, args0);
648 if (needed > avail) {
649 sb->buf[sb->count] = 0;
650 if (!sbuf_ensure_extra(sb, needed)) { return sb->count; }
651 avail = sb->buflen - sb->count;
652 needed = vsnprintf(sb->buf + sb->count, to_size_t(avail), fmt, args);
654 assert(needed <= avail);
655 sb->count += (needed > avail ? avail : (needed >= 0 ? needed : 0));
656 assert(sb->count <= sb->buflen);
657 sb->buf[sb->count] = 0;
658 return sb->count;
661 ic_private ssize_t sbuf_appendf(stringbuf_t* sb, const char* fmt, ...) {
662 va_list args;
663 va_start( args, fmt);
664 ssize_t res = sbuf_append_vprintf( sb, fmt, args );
665 va_end(args);
666 return res;
670 ic_private ssize_t sbuf_insert_at_n(stringbuf_t* sbuf, const char* s, ssize_t n, ssize_t pos ) {
671 if (pos < 0 || pos > sbuf->count || s == NULL) { return pos; }
672 n = str_limit_to_length(s,n);
673 if (n <= 0 || !sbuf_ensure_extra(sbuf,n)) { return pos; }
674 ic_memmove(sbuf->buf + pos + n, sbuf->buf + pos, sbuf->count - pos);
675 ic_memcpy(sbuf->buf + pos, s, n);
676 sbuf->count += n;
677 sbuf->buf[sbuf->count] = 0;
678 return (pos + n);
681 ic_private stringbuf_t* sbuf_split_at( stringbuf_t* sb, ssize_t pos ) {
682 stringbuf_t* res = sbuf_new(sb->mem);
683 if (res == NULL || pos < 0) { return NULL; }
684 if (pos < sb->count) {
685 sbuf_append_n(res, sb->buf + pos, sb->count - pos);
686 sb->count = pos;
688 return res;
691 ic_private ssize_t sbuf_insert_at(stringbuf_t* sbuf, const char* s, ssize_t pos ) {
692 return sbuf_insert_at_n( sbuf, s, ic_strlen(s), pos );
695 ic_private ssize_t sbuf_insert_char_at(stringbuf_t* sbuf, char c, ssize_t pos ) {
696 char s[2];
697 s[0] = c;
698 s[1] = 0;
699 return sbuf_insert_at_n( sbuf, s, 1, pos);
702 ic_private ssize_t sbuf_insert_unicode_at(stringbuf_t* sbuf, unicode_t u, ssize_t pos) {
703 uint8_t s[5];
704 unicode_to_qutf8(u, s);
705 return sbuf_insert_at(sbuf, (const char*)s, pos);
710 ic_private void sbuf_delete_at( stringbuf_t* sbuf, ssize_t pos, ssize_t count ) {
711 if (pos < 0 || pos >= sbuf->count) { return; }
712 if (pos + count > sbuf->count) { count = sbuf->count - pos; }
713 ic_memmove(sbuf->buf + pos, sbuf->buf + pos + count, sbuf->count - pos - count);
714 sbuf->count -= count;
715 sbuf->buf[sbuf->count] = 0;
718 ic_private void sbuf_delete_from_to( stringbuf_t* sbuf, ssize_t pos, ssize_t end ) {
719 if (end <= pos) { return; }
720 sbuf_delete_at( sbuf, pos, end - pos);
723 ic_private void sbuf_delete_from(stringbuf_t* sbuf, ssize_t pos ) {
724 sbuf_delete_at(sbuf, pos, sbuf_len(sbuf) - pos );
728 ic_private void sbuf_clear( stringbuf_t* sbuf ) {
729 sbuf_delete_at(sbuf, 0, sbuf_len(sbuf));
732 ic_private ssize_t sbuf_append_n( stringbuf_t* sbuf, const char* s, ssize_t n ) {
733 return sbuf_insert_at_n( sbuf, s, n, sbuf_len(sbuf));
736 ic_private ssize_t sbuf_append( stringbuf_t* sbuf, const char* s ) {
737 return sbuf_insert_at( sbuf, s, sbuf_len(sbuf));
740 ic_private ssize_t sbuf_append_char( stringbuf_t* sbuf, char c ) {
741 char buf[2];
742 buf[0] = c;
743 buf[1] = 0;
744 return sbuf_append( sbuf, buf );
747 ic_private void sbuf_replace(stringbuf_t* sbuf, const char* s) {
748 sbuf_clear(sbuf);
749 sbuf_append(sbuf,s);
752 ic_private ssize_t sbuf_next_ofs( stringbuf_t* sbuf, ssize_t pos, ssize_t* cwidth ) {
753 return str_next_ofs( sbuf->buf, sbuf->count, pos, cwidth);
756 ic_private ssize_t sbuf_prev_ofs( stringbuf_t* sbuf, ssize_t pos, ssize_t* cwidth ) {
757 return str_prev_ofs( sbuf->buf, pos, cwidth);
760 ic_private ssize_t sbuf_next( stringbuf_t* sbuf, ssize_t pos, ssize_t* cwidth) {
761 ssize_t ofs = sbuf_next_ofs(sbuf,pos,cwidth);
762 if (ofs <= 0) { return -1; }
763 assert(pos + ofs <= sbuf->count);
764 return pos + ofs;
767 ic_private ssize_t sbuf_prev( stringbuf_t* sbuf, ssize_t pos, ssize_t* cwidth) {
768 ssize_t ofs = sbuf_prev_ofs(sbuf,pos,cwidth);
769 if (ofs <= 0) { return -1; }
770 assert(pos - ofs >= 0);
771 return pos - ofs;
774 ic_private ssize_t sbuf_delete_char_before( stringbuf_t* sbuf, ssize_t pos ) {
775 ssize_t n = sbuf_prev_ofs(sbuf, pos, NULL);
776 if (n <= 0) { return 0; }
777 assert( pos - n >= 0 );
778 sbuf_delete_at(sbuf, pos - n, n);
779 return pos - n;
782 ic_private void sbuf_delete_char_at( stringbuf_t* sbuf, ssize_t pos ) {
783 ssize_t n = sbuf_next_ofs(sbuf, pos, NULL);
784 if (n <= 0) { return; }
785 assert( pos + n <= sbuf->count );
786 sbuf_delete_at(sbuf, pos, n);
787 return;
790 ic_private ssize_t sbuf_swap_char( stringbuf_t* sbuf, ssize_t pos ) {
791 ssize_t next = sbuf_next_ofs(sbuf, pos, NULL);
792 if (next <= 0) { return 0; }
793 ssize_t prev = sbuf_prev_ofs(sbuf, pos, NULL);
794 if (prev <= 0) { return 0; }
795 char buf[64];
796 if (prev >= 63) { return 0; }
797 ic_memcpy(buf, sbuf->buf + pos - prev, prev );
798 ic_memmove(sbuf->buf + pos - prev, sbuf->buf + pos, next);
799 ic_memmove(sbuf->buf + pos - prev + next, buf, prev);
800 return pos - prev;
803 ic_private ssize_t sbuf_find_line_start( stringbuf_t* sbuf, ssize_t pos ) {
804 return str_find_line_start( sbuf->buf, sbuf->count, pos);
807 ic_private ssize_t sbuf_find_line_end( stringbuf_t* sbuf, ssize_t pos ) {
808 return str_find_line_end( sbuf->buf, sbuf->count, pos);
811 ic_private ssize_t sbuf_find_word_start( stringbuf_t* sbuf, ssize_t pos ) {
812 return str_find_word_start( sbuf->buf, sbuf->count, pos);
815 ic_private ssize_t sbuf_find_word_end( stringbuf_t* sbuf, ssize_t pos ) {
816 return str_find_word_end( sbuf->buf, sbuf->count, pos);
819 ic_private ssize_t sbuf_find_ws_word_start( stringbuf_t* sbuf, ssize_t pos ) {
820 return str_find_ws_word_start( sbuf->buf, sbuf->count, pos);
823 ic_private ssize_t sbuf_find_ws_word_end( stringbuf_t* sbuf, ssize_t pos ) {
824 return str_find_ws_word_end( sbuf->buf, sbuf->count, pos);
827 // find row/col position
828 ic_private ssize_t sbuf_get_pos_at_rc( stringbuf_t* sbuf, ssize_t termw, ssize_t promptw, ssize_t cpromptw, ssize_t row, ssize_t col ) {
829 return str_get_pos_at_rc( sbuf->buf, sbuf->count, termw, promptw, cpromptw, row, col);
832 // get row/col for a given position
833 ic_private ssize_t sbuf_get_rc_at_pos( stringbuf_t* sbuf, ssize_t termw, ssize_t promptw, ssize_t cpromptw, ssize_t pos, rowcol_t* rc ) {
834 return str_get_rc_at_pos( sbuf->buf, sbuf->count, termw, promptw, cpromptw, pos, rc);
837 ic_private ssize_t sbuf_get_wrapped_rc_at_pos( stringbuf_t* sbuf, ssize_t termw, ssize_t newtermw, ssize_t promptw, ssize_t cpromptw, ssize_t pos, rowcol_t* rc ) {
838 return str_get_wrapped_rc_at_pos( sbuf->buf, sbuf->count, termw, newtermw, promptw, cpromptw, pos, rc);
841 ic_private ssize_t sbuf_for_each_row( stringbuf_t* sbuf, ssize_t termw, ssize_t promptw, ssize_t cpromptw, row_fun_t* fun, void* arg, void* res ) {
842 if (sbuf == NULL) { return 0; }
843 return str_for_each_row( sbuf->buf, sbuf->count, termw, promptw, cpromptw, fun, arg, res);
847 // Duplicate and decode from utf-8 (for non-utf8 terminals)
848 ic_private char* sbuf_strdup_from_utf8(stringbuf_t* sbuf) {
849 ssize_t len = sbuf_len(sbuf);
850 if (sbuf == NULL || len <= 0) { return NULL; }
851 char* s = mem_zalloc_tp_n(sbuf->mem, char, len);
852 if (s == NULL) { return NULL; }
853 ssize_t dest = 0;
854 for (ssize_t i = 0; i < len; ) {
855 ssize_t ofs = sbuf_next_ofs(sbuf, i, NULL);
856 if (ofs <= 0) {
857 // invalid input
858 break;
860 else if (ofs == 1) {
861 // regular character
862 s[dest++] = sbuf->buf[i];
864 else if (sbuf->buf[i] == '\x1B') {
865 // skip escape sequences
867 else {
868 // decode unicode
869 ssize_t nread;
870 unicode_t uchr = unicode_from_qutf8( (const uint8_t*)(sbuf->buf + i), ofs, &nread);
871 uint8_t c;
872 if (unicode_is_raw(uchr, &c)) {
873 // raw byte, output as is (this will take care of locale specific input)
874 s[dest++] = (char)c;
876 else if (uchr <= 0x7F) {
877 // allow ascii
878 s[dest++] = (char)uchr;
880 else {
881 // skip unknown unicode characters..
882 // todo: convert according to locale?
885 i += ofs;
887 assert(dest <= len);
888 s[dest] = 0;
889 return s;
892 //-------------------------------------------------------------
893 // String helpers
894 //-------------------------------------------------------------
896 ic_public long ic_prev_char( const char* s, long pos ) {
897 ssize_t len = ic_strlen(s);
898 if (pos < 0 || pos > len) { return -1; }
899 ssize_t ofs = str_prev_ofs( s, pos, NULL );
900 if (ofs <= 0) { return -1; }
901 return (long)(pos - ofs);
904 ic_public long ic_next_char( const char* s, long pos ) {
905 ssize_t len = ic_strlen(s);
906 if (pos < 0 || pos > len) { return -1; }
907 ssize_t ofs = str_next_ofs( s, len, pos, NULL );
908 if (ofs <= 0) { return -1; }
909 return (long)(pos + ofs);
913 // parse a decimal (leave pi unchanged on error)
914 ic_private bool ic_atoz(const char* s, ssize_t* pi) {
915 return (sscanf(s, "%" PRIz "d", pi) == 1);
918 // parse two decimals separated by a semicolon
919 ic_private bool ic_atoz2(const char* s, ssize_t* pi, ssize_t* pj) {
920 return (sscanf(s, "%" PRIz "d;%" PRIz "d", pi, pj) == 2);
923 // parse unsigned 32-bit (leave pu unchanged on error)
924 ic_private bool ic_atou32(const char* s, uint32_t* pu) {
925 return (sscanf(s, "%" SCNu32, pu) == 1);
929 // Convenience: character class for whitespace `[ \t\r\n]`.
930 ic_public bool ic_char_is_white(const char* s, long len) {
931 if (s == NULL || len != 1) { return false; }
932 const char c = *s;
933 return (c == ' ' || c == '\t' || c == '\n' || c == '\r');
936 // Convenience: character class for non-whitespace `[^ \t\r\n]`.
937 ic_public bool ic_char_is_nonwhite(const char* s, long len) {
938 return !ic_char_is_white(s, len);
941 // Convenience: character class for separators `[ \t\r\n,.;:/\\\(\)\{\}\[\]]`.
942 ic_public bool ic_char_is_separator(const char* s, long len) {
943 if (s == NULL || len != 1) { return false; }
944 const char c = *s;
945 return (strchr(" \t\r\n,.;:/\\(){}[]", c) != NULL);
948 // Convenience: character class for non-separators.
949 ic_public bool ic_char_is_nonseparator(const char* s, long len) {
950 return !ic_char_is_separator(s, len);
954 // Convenience: character class for digits (`[0-9]`).
955 ic_public bool ic_char_is_digit(const char* s, long len) {
956 if (s == NULL || len != 1) { return false; }
957 const char c = *s;
958 return (c >= '0' && c <= '9');
961 // Convenience: character class for hexadecimal digits (`[A-Fa-f0-9]`).
962 ic_public bool ic_char_is_hexdigit(const char* s, long len) {
963 if (s == NULL || len != 1) { return false; }
964 const char c = *s;
965 return ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
968 // Convenience: character class for letters (`[A-Za-z]` and any unicode > 0x80).
969 ic_public bool ic_char_is_letter(const char* s, long len) {
970 if (s == NULL || len <= 0) { return false; }
971 const char c = *s;
972 return ((uint8_t)c >= 0x80 || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'));
975 // Convenience: character class for identifier letters (`[A-Za-z0-9_-]` and any unicode > 0x80).
976 ic_public bool ic_char_is_idletter(const char* s, long len) {
977 if (s == NULL || len <= 0) { return false; }
978 const char c = *s;
979 return ((uint8_t)c >= 0x80 || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || (c == '_') || (c == '-'));
982 // Convenience: character class for filename letters (`[^ \t\r\n`@$><=;|&{(]`).
983 ic_public bool ic_char_is_filename_letter(const char* s, long len) {
984 if (s == NULL || len <= 0) { return false; }
985 const char c = *s;
986 return ((uint8_t)c >= 0x80 || (strchr(" \t\r\n`@$><=;|&{}()[]", c) == NULL));
989 // Convenience: If this is a token start, returns the length (or <= 0 if not found).
990 ic_public long ic_is_token(const char* s, long pos, ic_is_char_class_fun_t* is_token_char) {
991 if (s == NULL || pos < 0 || is_token_char == NULL) { return -1; }
992 ssize_t len = ic_strlen(s);
993 if (pos >= len) { return -1; }
994 if (pos > 0 && is_token_char(s + pos -1, 1)) { return -1; } // token start?
995 ssize_t i = pos;
996 while ( i < len ) {
997 ssize_t next = str_next_ofs(s, len, i, NULL);
998 if (next <= 0) { return -1; }
999 if (!is_token_char(s + i, (long)next)) { break; }
1000 i += next;
1002 return (long)(i - pos);
1006 static int ic_strncmp(const char* s1, const char* s2, ssize_t n) {
1007 return strncmp(s1, s2, to_size_t(n));
1010 // Convenience: Does this match the specified token?
1011 // Ensures not to match prefixes or suffixes, and returns the length of the match (in bytes).
1012 // E.g. `ic_match_token("function",0,&ic_char_is_letter,"fun")` returns 0.
1013 ic_public long ic_match_token(const char* s, long pos, ic_is_char_class_fun_t* is_token_char, const char* token) {
1014 long n = ic_is_token(s, pos, is_token_char);
1015 if (n > 0 && token != NULL && n == ic_strlen(token) && ic_strncmp(s + pos, token, n) == 0) {
1016 return n;
1018 else {
1019 return 0;
1024 // Convenience: Do any of the specified tokens match?
1025 // Ensures not to match prefixes or suffixes, and returns the length of the match (in bytes).
1026 // Ensures not to match prefixes or suffixes.
1027 // E.g. `ic_match_any_token("function",0,&ic_char_is_letter,{"fun","func",NULL})` returns 0.
1028 ic_public long ic_match_any_token(const char* s, long pos, ic_is_char_class_fun_t* is_token_char, const char** tokens) {
1029 long n = ic_is_token(s, pos, is_token_char);
1030 if (n <= 0 || tokens == NULL) { return 0; }
1031 for (const char** token = tokens; *token != NULL; token++) {
1032 if (n == ic_strlen(*token) && ic_strncmp(s + pos, *token, n) == 0) {
1033 return n;
1036 return 0;