1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2021, Daan Leijen
3 This is free software; you can redistribute it and/or modify it
4 under the terms of the MIT License. A copy of the license can be
5 found in the "LICENSE" file at the root of this distribution.
6 -----------------------------------------------------------------------------*/
8 // get `wcwidth` for the column width of unicode characters
9 // note: for now the OS provided one is unused as we see quite a bit of variation
10 // among platforms and including our own seems more reliable.
12 #if defined(__linux__) || defined(__freebsd__)
13 // use the system supplied one
14 #if !defined(_XOPEN_SOURCE)
15 #define _XOPEN_SOURCE 700 // so wcwidth is visible
20 // use our own (also on APPLE as that fails within vscode)
21 #define wcwidth(c) mk_wcwidth(c)
30 #include "stringbuf.h"
32 //-------------------------------------------------------------
33 // In place growable utf-8 strings
34 //-------------------------------------------------------------
44 //-------------------------------------------------------------
45 // String column width
46 //-------------------------------------------------------------
48 // column width of a utf8 single character sequence.
49 static ssize_t
utf8_char_width( const char* s
, ssize_t n
) {
50 if (n
<= 0) { return 0; }
52 uint8_t b
= (uint8_t)s
[0];
60 else if (b
<= 0xC1) { // invalid continuation byte or invalid 0xC0, 0xC1 (check is strictly not necessary as we don't validate..)
63 else if (b
<= 0xDF && n
>= 2) { // b >= 0xC2 // 2 bytes
64 c
= (((b
& 0x1F) << 6) | (s
[1] & 0x3F));
65 assert(c
< 0xD800 || c
> 0xDFFF);
69 else if (b
<= 0xEF && n
>= 3) { // b >= 0xE0 // 3 bytes
70 c
= (((b
& 0x0F) << 12) | ((s
[1] & 0x3F) << 6) | (s
[2] & 0x3F));
73 else if (b
<= 0xF4 && n
>= 4) { // b >= 0xF0 // 4 bytes
74 c
= (((b
& 0x07) << 18) | ((s
[1] & 0x3F) << 12) | ((s
[2] & 0x3F) << 6) | (s
[3] & 0x3F));
84 // The column width of a codepoint (0, 1, or 2)
85 static ssize_t
char_column_width( const char* s
, ssize_t n
) {
86 if (s
== NULL
|| n
<= 0) { return 0; }
87 else if ((uint8_t)(*s
) < ' ') { return 0; } // also for CSI escape sequences
89 ssize_t w
= utf8_char_width(s
, n
);
91 return (w
<= 0 ? 1 : w
); // windows console seems to use at least one column
98 static ssize_t
str_column_width_n( const char* s
, ssize_t len
) {
99 if (s
== NULL
|| len
<= 0) { return 0; }
104 while (s
[pos
] != 0 && (ofs
= str_next_ofs(s
, len
, pos
, &cw
)) > 0) {
111 ic_private ssize_t
str_column_width( const char* s
) {
112 return str_column_width_n( s
, ic_strlen(s
) );
115 ic_private ssize_t
str_skip_until_fit( const char* s
, ssize_t max_width
) {
116 if (s
== NULL
) { return 0; }
117 ssize_t cwidth
= str_column_width(s
);
118 ssize_t len
= ic_strlen(s
);
122 while (cwidth
> max_width
&& (next
= str_next_ofs(s
, len
, pos
, &cw
)) > 0) {
129 ic_private ssize_t
str_take_while_fit( const char* s
, ssize_t max_width
) {
130 if (s
== NULL
) { return 0; }
131 const ssize_t len
= ic_strlen(s
);
136 while ((next
= str_next_ofs(s
, len
, pos
, &cw
)) > 0) {
137 if (cwidth
+ cw
> max_width
) { break; }
145 //-------------------------------------------------------------
147 //-------------------------------------------------------------
149 // get offset of the previous codepoint. does not skip back over CSI sequences.
150 ic_private ssize_t
str_prev_ofs( const char* s
, ssize_t pos
, ssize_t
* width
) {
152 if (s
!= NULL
&& pos
> 0) {
155 uint8_t u
= (uint8_t)s
[pos
- ofs
];
156 if (u
< 0x80 || u
> 0xBF) { break; } // continue while follower
160 if (width
!= NULL
) { *width
= char_column_width( s
+(pos
-ofs
), ofs
); }
164 // skip an escape sequence
165 // <https://www.xfree86.org/current/ctlseqs.html>
166 ic_private
bool skip_esc( const char* s
, ssize_t len
, ssize_t
* esclen
) {
167 if (s
== NULL
|| len
<= 1 || s
[0] != '\x1B') { return false; }
168 if (esclen
!= NULL
) { *esclen
= 0; }
169 if (strchr("[PX^_]",s
[1]) != NULL
) {
170 // CSI (ESC [), DCS (ESC P), SOS (ESC X), PM (ESC ^), APC (ESC _), and OSC (ESC ]): terminated with a special sequence
171 bool finalCSI
= (s
[1] == '['); // CSI terminates with 0x40-0x7F; otherwise ST (bell or ESC \)
175 if ((finalCSI
&& (uint8_t)c
>= 0x40 && (uint8_t)c
<= 0x7F) || // terminating byte: @A–Z[\]^_`a–z{|}~
176 (!finalCSI
&& c
== '\x07') || // bell
177 (c
== '\x02')) // STX terminates as well
179 if (esclen
!= NULL
) { *esclen
= n
; }
182 else if (!finalCSI
&& c
== '\x1B' && len
> n
&& s
[n
] == '\\') { // ST (ESC \)
184 if (esclen
!= NULL
) { *esclen
= n
; }
189 if (strchr(" #%()*+",s
[1]) != NULL
) {
190 // assume escape sequence of length 3 (like ESC % G)
191 if (esclen
!= NULL
) { *esclen
= 2; }
195 // assume single character escape code (like ESC 7)
196 if (esclen
!= NULL
) { *esclen
= 2; }
202 // Offset to the next codepoint, treats CSI escape sequences as a single code point.
203 ic_private ssize_t
str_next_ofs( const char* s
, ssize_t len
, ssize_t pos
, ssize_t
* cwidth
) {
205 if (s
!= NULL
&& len
> pos
) {
206 if (skip_esc(s
+pos
,len
-pos
,&ofs
)) {
207 // skip escape sequence
211 // utf8 extended character?
212 while(len
> pos
+ ofs
) {
213 uint8_t u
= (uint8_t)s
[pos
+ ofs
];
214 if (u
< 0x80 || u
> 0xBF) { break; } // break if not a follower
219 if (cwidth
!= NULL
) { *cwidth
= char_column_width( s
+pos
, ofs
); }
223 static ssize_t
str_limit_to_length( const char* s
, ssize_t n
) {
225 for(i
= 0; i
< n
&& s
[i
] != 0; i
++) { /* nothing */ }
230 //-------------------------------------------------------------
231 // String searching prev/next word, line, ws_word
232 //-------------------------------------------------------------
235 static ssize_t
str_find_backward( const char* s
, ssize_t len
, ssize_t pos
, ic_is_char_class_fun_t
* match
, bool skip_immediate_matches
) {
236 if (pos
> len
) { pos
= len
; }
237 if (pos
< 0) { pos
= 0; }
239 // skip matching first (say, whitespace in case of the previous start-of-word)
240 if (skip_immediate_matches
) {
242 ssize_t prev
= str_prev_ofs(s
, i
, NULL
);
243 if (prev
<= 0) { break; }
244 assert(i
- prev
>= 0);
245 if (!match(s
+ i
- prev
, (long)prev
)) { break; }
251 ssize_t prev
= str_prev_ofs(s
, i
, NULL
);
252 if (prev
<= 0) { break; }
253 assert(i
- prev
>= 0);
254 if (match(s
+ i
- prev
, (long)prev
)) {
259 return -1; // not found
262 static ssize_t
str_find_forward( const char* s
, ssize_t len
, ssize_t pos
, ic_is_char_class_fun_t
* match
, bool skip_immediate_matches
) {
263 if (s
== NULL
|| len
< 0) { return -1; }
264 if (pos
> len
) { pos
= len
; }
265 if (pos
< 0) { pos
= 0; }
268 // skip matching first (say, whitespace in case of the next end-of-word)
269 if (skip_immediate_matches
) {
271 next
= str_next_ofs(s
, len
, i
, NULL
);
272 if (next
<= 0) { break; }
273 assert( i
+ next
<= len
);
274 if (!match(s
+ i
, (long)next
)) { break; }
280 next
= str_next_ofs(s
, len
, i
, NULL
);
281 if (next
<= 0) { break; }
282 assert( i
+ next
<= len
);
283 if (match(s
+ i
, (long)next
)) {
291 static bool char_is_linefeed( const char* s
, long n
) {
292 return (n
== 1 && (*s
== '\n' || *s
== 0));
295 static ssize_t
str_find_line_start( const char* s
, ssize_t len
, ssize_t pos
) {
296 ssize_t start
= str_find_backward(s
,len
,pos
,&char_is_linefeed
,false /* don't skip immediate matches */);
297 return (start
< 0 ? 0 : start
);
300 static ssize_t
str_find_line_end( const char* s
, ssize_t len
, ssize_t pos
) {
301 ssize_t end
= str_find_forward(s
,len
,pos
, &char_is_linefeed
, false);
302 return (end
< 0 ? len
: end
);
305 static ssize_t
str_find_word_start( const char* s
, ssize_t len
, ssize_t pos
) {
306 ssize_t start
= str_find_backward(s
,len
,pos
, &ic_char_is_idletter
,true /* skip immediate matches */);
307 return (start
< 0 ? 0 : start
);
310 static ssize_t
str_find_word_end( const char* s
, ssize_t len
, ssize_t pos
) {
311 ssize_t end
= str_find_forward(s
,len
,pos
,&ic_char_is_idletter
,true /* skip immediate matches */);
312 return (end
< 0 ? len
: end
);
315 static ssize_t
str_find_ws_word_start( const char* s
, ssize_t len
, ssize_t pos
) {
316 ssize_t start
= str_find_backward(s
,len
,pos
,&ic_char_is_white
,true /* skip immediate matches */);
317 return (start
< 0 ? 0 : start
);
320 static ssize_t
str_find_ws_word_end( const char* s
, ssize_t len
, ssize_t pos
) {
321 ssize_t end
= str_find_forward(s
,len
,pos
,&ic_char_is_white
,true /* skip immediate matches */);
322 return (end
< 0 ? len
: end
);
326 //-------------------------------------------------------------
327 // String row/column iteration
328 //-------------------------------------------------------------
330 // invoke a function for each terminal row; returns total row count.
331 static ssize_t
str_for_each_row( const char* s
, ssize_t len
, ssize_t termw
, ssize_t promptw
, ssize_t cpromptw
,
332 row_fun_t
* fun
, const void* arg
, void* res
)
334 if (s
== NULL
) { s
= ""; }
339 ssize_t startw
= promptw
;
340 for(i
= 0; i
< len
; ) {
342 ssize_t next
= str_next_ofs(s
, len
, i
, &w
);
344 debug_msg("str: foreach row: next<=0: len %" PRIz
"d, i %" PRIz
"d, w %" PRIz
"d, buf %s\n", len
, i
, w
, s
);
348 startw
= (rcount
== 0 ? promptw
: cpromptw
);
349 ssize_t termcol
= rcol
+ w
+ startw
+ 1 /* for the cursor */;
350 if (termw
!= 0 && i
!= 0 && termcol
>= termw
) {
353 if (fun(s
,rcount
,rstart
,i
- rstart
,startw
,true,arg
,res
)) { return rcount
; }
362 if (fun(s
,rcount
,rstart
,i
- rstart
,startw
,false,arg
,res
)) { return rcount
; }
373 if (fun(s
,rcount
,rstart
,i
- rstart
,startw
,false,arg
,res
)) { return rcount
; }
378 //-------------------------------------------------------------
379 // String: get row/column position
380 //-------------------------------------------------------------
383 static bool str_get_current_pos_iter(
385 ssize_t row
, ssize_t row_start
, ssize_t row_len
,
386 ssize_t startw
, bool is_wrap
, const void* arg
, void* res
)
388 ic_unused(is_wrap
); ic_unused(startw
);
389 rowcol_t
* rc
= (rowcol_t
*)res
;
390 ssize_t pos
= *((ssize_t
*)arg
);
392 if (pos
>= row_start
&& pos
<= (row_start
+ row_len
)) {
393 // found the cursor row
394 rc
->row_start
= row_start
;
395 rc
->row_len
= row_len
;
397 rc
->col
= str_column_width_n( s
+ row_start
, pos
- row_start
);
398 rc
->first_on_row
= (pos
== row_start
);
400 // if wrapped, we check if the next character is at row_len
401 ssize_t next
= str_next_ofs(s
, row_start
+ row_len
, pos
, NULL
);
402 rc
->last_on_row
= (pos
+ next
>= row_start
+ row_len
);
405 // normal last position is right after the last character
406 rc
->last_on_row
= (pos
>= row_start
+ row_len
);
408 // debug_msg("edit; pos iter: pos: %" PRIz "d (%c), row_start: %" PRIz "d, rowlen: %" PRIz "d\n", pos, s[pos], row_start, row_len);
410 return false; // always continue to count all rows
413 static ssize_t
str_get_rc_at_pos(const char* s
, ssize_t len
, ssize_t termw
, ssize_t promptw
, ssize_t cpromptw
, ssize_t pos
, rowcol_t
* rc
) {
414 memset(rc
, 0, sizeof(*rc
));
415 ssize_t rows
= str_for_each_row(s
, len
, termw
, promptw
, cpromptw
, &str_get_current_pos_iter
, &pos
, rc
);
416 // debug_msg("edit: current pos: (%d, %d) %s %s\n", rc->row, rc->col, rc->first_on_row ? "first" : "", rc->last_on_row ? "last" : "");
422 //-------------------------------------------------------------
423 // String: get row/column position for a resized terminal
424 // with potentially "hard-wrapped" rows
425 //-------------------------------------------------------------
426 typedef struct wrapped_arg_s
{
431 typedef struct wrowcol_s
{
433 ssize_t hrows
; // count of hard-wrapped extra rows
436 static bool str_get_current_wrapped_pos_iter(
438 ssize_t row
, ssize_t row_start
, ssize_t row_len
,
439 ssize_t startw
, bool is_wrap
, const void* arg
, void* res
)
442 wrowcol_t
* wrc
= (wrowcol_t
*)res
;
443 const wrapped_arg_t
* warg
= (const wrapped_arg_t
*)arg
;
445 // iterate through the row and record the postion and hard-wraps
446 ssize_t hwidth
= startw
;
448 while( i
<= row_len
) { // include rowlen as the cursor position can be just after the last character
449 // get next position and column width
452 bool is_cursor
= (warg
->pos
== row_start
+i
);
454 next
= str_next_ofs(s
+ row_start
, row_len
, i
, &cw
);
457 // end of row: take wrap or cursor into account
458 // (wrap has width 2 as it displays a back-arrow but also has an invisible newline that wraps)
459 cw
= (is_wrap
? 2 : (is_cursor
? 1 : 0));
464 if (hwidth
+ cw
> warg
->newtermw
) {
468 debug_msg("str: found hardwrap: row: %" PRIz
"d, hrows: %" PRIz
"d\n", row
, wrc
->hrows
);
472 next
++; // ensure we terminate (as we go up to rowlen)
475 // did we find our position?
477 debug_msg("str: found position: row: %" PRIz
"d, hrows: %" PRIz
"d\n", row
, wrc
->hrows
);
478 wrc
->rc
.row_start
= row_start
;
479 wrc
->rc
.row_len
= row_len
;
480 wrc
->rc
.row
= wrc
->hrows
+ row
;
481 wrc
->rc
.col
= hwidth
;
482 wrc
->rc
.first_on_row
= (i
== 0);
483 wrc
->rc
.last_on_row
= (i
+next
>= row_len
- (is_wrap
? 1 : 0));
490 return false; // always continue to count all rows
494 static ssize_t
str_get_wrapped_rc_at_pos(const char* s
, ssize_t len
, ssize_t termw
, ssize_t newtermw
, ssize_t promptw
, ssize_t cpromptw
, ssize_t pos
, rowcol_t
* rc
) {
497 warg
.newtermw
= newtermw
;
499 memset(&wrc
,0,sizeof(wrc
));
500 ssize_t rows
= str_for_each_row(s
, len
, termw
, promptw
, cpromptw
, &str_get_current_wrapped_pos_iter
, &warg
, &wrc
);
501 debug_msg("edit: wrapped pos: (%" PRIz
"d,%" PRIz
"d) rows %" PRIz
"d %s %s, hrows: %" PRIz
"d\n", wrc
.rc
.row
, wrc
.rc
.col
, rows
, wrc
.rc
.first_on_row
? "first" : "", wrc
.rc
.last_on_row
? "last" : "", wrc
.hrows
);
503 return (rows
+ wrc
.hrows
);
507 //-------------------------------------------------------------
509 //-------------------------------------------------------------
511 static bool str_set_pos_iter(
513 ssize_t row
, ssize_t row_start
, ssize_t row_len
,
514 ssize_t startw
, bool is_wrap
, const void* arg
, void* res
)
516 ic_unused(arg
); ic_unused(is_wrap
); ic_unused(startw
);
517 rowcol_t
* rc
= (rowcol_t
*)arg
;
518 if (rc
->row
!= row
) { return false; } // keep searching
521 ssize_t i
= row_start
;
522 ssize_t end
= row_start
+ row_len
;
523 while (col
< rc
->col
&& i
< end
) {
525 ssize_t next
= str_next_ofs(s
, row_start
+ row_len
, i
, &cw
);
526 if (next
<= 0) { break; }
530 *((ssize_t
*)res
) = i
;
531 return true; // stop iteration
534 static ssize_t
str_get_pos_at_rc(const char* s
, ssize_t len
, ssize_t termw
, ssize_t promptw
, ssize_t cpromptw
, ssize_t row
, ssize_t col
/* without prompt */) {
536 memset(&rc
,0,ssizeof(rc
));
540 str_for_each_row(s
,len
,termw
,promptw
,cpromptw
,&str_set_pos_iter
,&rc
,&pos
);
545 //-------------------------------------------------------------
547 //-------------------------------------------------------------
548 static bool sbuf_ensure_extra(stringbuf_t
* s
, ssize_t extra
)
550 if (s
->buflen
>= s
->count
+ extra
) { return true; }
551 // reallocate; pick good initial size and multiples to increase reuse on allocation
552 ssize_t newlen
= (s
->buflen
<= 0 ? 120 : (s
->buflen
> 1000 ? s
->buflen
+ 1000 : 2*s
->buflen
));
553 if (newlen
< s
->count
+ extra
) { newlen
= s
->count
+ extra
; }
555 debug_msg("stringbuf: reallocate: old %" PRIz
"d, new %" PRIz
"d\n", s
->buflen
, newlen
);
557 char* newbuf
= mem_realloc_tp(s
->mem
, char, s
->buf
, newlen
+1); // one more for terminating zero
558 if (newbuf
== NULL
) {
564 s
->buf
[s
->count
] = s
->buf
[s
->buflen
] = 0;
565 assert(s
->buflen
>= s
->count
+ extra
);
569 static void sbuf_init( stringbuf_t
* sbuf
, alloc_t
* mem
) {
576 static void sbuf_done( stringbuf_t
* sbuf
) {
577 mem_free( sbuf
->mem
, sbuf
->buf
);
584 ic_private
void sbuf_free( stringbuf_t
* sbuf
) {
585 if (sbuf
== NULL
) { return; }
587 mem_free(sbuf
->mem
, sbuf
);
590 ic_private stringbuf_t
* sbuf_new( alloc_t
* mem
) {
591 stringbuf_t
* sbuf
= mem_zalloc_tp(mem
,stringbuf_t
);
592 if (sbuf
== NULL
) { return NULL
; }
597 // free the sbuf and return the current string buffer as the result
598 ic_private
char* sbuf_free_dup(stringbuf_t
* sbuf
) {
599 if (sbuf
== NULL
) { return NULL
; }
601 if (sbuf
->buf
!= NULL
) {
602 s
= mem_realloc_tp(sbuf
->mem
, char, sbuf
->buf
, sbuf_len(sbuf
)+1);
603 if (s
== NULL
) { s
= sbuf
->buf
; }
612 ic_private
const char* sbuf_string_at( stringbuf_t
* sbuf
, ssize_t pos
) {
613 if (pos
< 0 || sbuf
->count
< pos
) { return NULL
; }
614 if (sbuf
->buf
== NULL
) { return ""; }
615 assert(sbuf
->buf
[sbuf
->count
] == 0);
616 return sbuf
->buf
+ pos
;
619 ic_private
const char* sbuf_string( stringbuf_t
* sbuf
) {
620 return sbuf_string_at( sbuf
, 0 );
623 ic_private
char sbuf_char_at(stringbuf_t
* sbuf
, ssize_t pos
) {
624 if (sbuf
->buf
== NULL
|| pos
< 0 || sbuf
->count
< pos
) { return 0; }
625 return sbuf
->buf
[pos
];
628 ic_private
char* sbuf_strdup_at( stringbuf_t
* sbuf
, ssize_t pos
) {
629 return mem_strdup(sbuf
->mem
, sbuf_string_at(sbuf
,pos
));
632 ic_private
char* sbuf_strdup( stringbuf_t
* sbuf
) {
633 return mem_strdup(sbuf
->mem
, sbuf_string(sbuf
));
636 ic_private ssize_t
sbuf_len(const stringbuf_t
* s
) {
637 if (s
== NULL
) { return 0; }
641 ic_private ssize_t
sbuf_append_vprintf(stringbuf_t
* sb
, const char* fmt
, va_list args
) {
642 const ssize_t min_needed
= ic_strlen(fmt
);
643 if (!sbuf_ensure_extra(sb
,min_needed
+ 16)) { return sb
->count
; }
644 ssize_t avail
= sb
->buflen
- sb
->count
;
646 va_copy(args0
, args
);
647 ssize_t needed
= vsnprintf(sb
->buf
+ sb
->count
, to_size_t(avail
), fmt
, args0
);
648 if (needed
> avail
) {
649 sb
->buf
[sb
->count
] = 0;
650 if (!sbuf_ensure_extra(sb
, needed
)) { return sb
->count
; }
651 avail
= sb
->buflen
- sb
->count
;
652 needed
= vsnprintf(sb
->buf
+ sb
->count
, to_size_t(avail
), fmt
, args
);
654 assert(needed
<= avail
);
655 sb
->count
+= (needed
> avail
? avail
: (needed
>= 0 ? needed
: 0));
656 assert(sb
->count
<= sb
->buflen
);
657 sb
->buf
[sb
->count
] = 0;
661 ic_private ssize_t
sbuf_appendf(stringbuf_t
* sb
, const char* fmt
, ...) {
663 va_start( args
, fmt
);
664 ssize_t res
= sbuf_append_vprintf( sb
, fmt
, args
);
670 ic_private ssize_t
sbuf_insert_at_n(stringbuf_t
* sbuf
, const char* s
, ssize_t n
, ssize_t pos
) {
671 if (pos
< 0 || pos
> sbuf
->count
|| s
== NULL
) { return pos
; }
672 n
= str_limit_to_length(s
,n
);
673 if (n
<= 0 || !sbuf_ensure_extra(sbuf
,n
)) { return pos
; }
674 ic_memmove(sbuf
->buf
+ pos
+ n
, sbuf
->buf
+ pos
, sbuf
->count
- pos
);
675 ic_memcpy(sbuf
->buf
+ pos
, s
, n
);
677 sbuf
->buf
[sbuf
->count
] = 0;
681 ic_private stringbuf_t
* sbuf_split_at( stringbuf_t
* sb
, ssize_t pos
) {
682 stringbuf_t
* res
= sbuf_new(sb
->mem
);
683 if (res
== NULL
|| pos
< 0) { return NULL
; }
684 if (pos
< sb
->count
) {
685 sbuf_append_n(res
, sb
->buf
+ pos
, sb
->count
- pos
);
691 ic_private ssize_t
sbuf_insert_at(stringbuf_t
* sbuf
, const char* s
, ssize_t pos
) {
692 return sbuf_insert_at_n( sbuf
, s
, ic_strlen(s
), pos
);
695 ic_private ssize_t
sbuf_insert_char_at(stringbuf_t
* sbuf
, char c
, ssize_t pos
) {
699 return sbuf_insert_at_n( sbuf
, s
, 1, pos
);
702 ic_private ssize_t
sbuf_insert_unicode_at(stringbuf_t
* sbuf
, unicode_t u
, ssize_t pos
) {
704 unicode_to_qutf8(u
, s
);
705 return sbuf_insert_at(sbuf
, (const char*)s
, pos
);
710 ic_private
void sbuf_delete_at( stringbuf_t
* sbuf
, ssize_t pos
, ssize_t count
) {
711 if (pos
< 0 || pos
>= sbuf
->count
) { return; }
712 if (pos
+ count
> sbuf
->count
) { count
= sbuf
->count
- pos
; }
713 ic_memmove(sbuf
->buf
+ pos
, sbuf
->buf
+ pos
+ count
, sbuf
->count
- pos
- count
);
714 sbuf
->count
-= count
;
715 sbuf
->buf
[sbuf
->count
] = 0;
718 ic_private
void sbuf_delete_from_to( stringbuf_t
* sbuf
, ssize_t pos
, ssize_t end
) {
719 if (end
<= pos
) { return; }
720 sbuf_delete_at( sbuf
, pos
, end
- pos
);
723 ic_private
void sbuf_delete_from(stringbuf_t
* sbuf
, ssize_t pos
) {
724 sbuf_delete_at(sbuf
, pos
, sbuf_len(sbuf
) - pos
);
728 ic_private
void sbuf_clear( stringbuf_t
* sbuf
) {
729 sbuf_delete_at(sbuf
, 0, sbuf_len(sbuf
));
732 ic_private ssize_t
sbuf_append_n( stringbuf_t
* sbuf
, const char* s
, ssize_t n
) {
733 return sbuf_insert_at_n( sbuf
, s
, n
, sbuf_len(sbuf
));
736 ic_private ssize_t
sbuf_append( stringbuf_t
* sbuf
, const char* s
) {
737 return sbuf_insert_at( sbuf
, s
, sbuf_len(sbuf
));
740 ic_private ssize_t
sbuf_append_char( stringbuf_t
* sbuf
, char c
) {
744 return sbuf_append( sbuf
, buf
);
747 ic_private
void sbuf_replace(stringbuf_t
* sbuf
, const char* s
) {
752 ic_private ssize_t
sbuf_next_ofs( stringbuf_t
* sbuf
, ssize_t pos
, ssize_t
* cwidth
) {
753 return str_next_ofs( sbuf
->buf
, sbuf
->count
, pos
, cwidth
);
756 ic_private ssize_t
sbuf_prev_ofs( stringbuf_t
* sbuf
, ssize_t pos
, ssize_t
* cwidth
) {
757 return str_prev_ofs( sbuf
->buf
, pos
, cwidth
);
760 ic_private ssize_t
sbuf_next( stringbuf_t
* sbuf
, ssize_t pos
, ssize_t
* cwidth
) {
761 ssize_t ofs
= sbuf_next_ofs(sbuf
,pos
,cwidth
);
762 if (ofs
<= 0) { return -1; }
763 assert(pos
+ ofs
<= sbuf
->count
);
767 ic_private ssize_t
sbuf_prev( stringbuf_t
* sbuf
, ssize_t pos
, ssize_t
* cwidth
) {
768 ssize_t ofs
= sbuf_prev_ofs(sbuf
,pos
,cwidth
);
769 if (ofs
<= 0) { return -1; }
770 assert(pos
- ofs
>= 0);
774 ic_private ssize_t
sbuf_delete_char_before( stringbuf_t
* sbuf
, ssize_t pos
) {
775 ssize_t n
= sbuf_prev_ofs(sbuf
, pos
, NULL
);
776 if (n
<= 0) { return 0; }
777 assert( pos
- n
>= 0 );
778 sbuf_delete_at(sbuf
, pos
- n
, n
);
782 ic_private
void sbuf_delete_char_at( stringbuf_t
* sbuf
, ssize_t pos
) {
783 ssize_t n
= sbuf_next_ofs(sbuf
, pos
, NULL
);
784 if (n
<= 0) { return; }
785 assert( pos
+ n
<= sbuf
->count
);
786 sbuf_delete_at(sbuf
, pos
, n
);
790 ic_private ssize_t
sbuf_swap_char( stringbuf_t
* sbuf
, ssize_t pos
) {
791 ssize_t next
= sbuf_next_ofs(sbuf
, pos
, NULL
);
792 if (next
<= 0) { return 0; }
793 ssize_t prev
= sbuf_prev_ofs(sbuf
, pos
, NULL
);
794 if (prev
<= 0) { return 0; }
796 if (prev
>= 63) { return 0; }
797 ic_memcpy(buf
, sbuf
->buf
+ pos
- prev
, prev
);
798 ic_memmove(sbuf
->buf
+ pos
- prev
, sbuf
->buf
+ pos
, next
);
799 ic_memmove(sbuf
->buf
+ pos
- prev
+ next
, buf
, prev
);
803 ic_private ssize_t
sbuf_find_line_start( stringbuf_t
* sbuf
, ssize_t pos
) {
804 return str_find_line_start( sbuf
->buf
, sbuf
->count
, pos
);
807 ic_private ssize_t
sbuf_find_line_end( stringbuf_t
* sbuf
, ssize_t pos
) {
808 return str_find_line_end( sbuf
->buf
, sbuf
->count
, pos
);
811 ic_private ssize_t
sbuf_find_word_start( stringbuf_t
* sbuf
, ssize_t pos
) {
812 return str_find_word_start( sbuf
->buf
, sbuf
->count
, pos
);
815 ic_private ssize_t
sbuf_find_word_end( stringbuf_t
* sbuf
, ssize_t pos
) {
816 return str_find_word_end( sbuf
->buf
, sbuf
->count
, pos
);
819 ic_private ssize_t
sbuf_find_ws_word_start( stringbuf_t
* sbuf
, ssize_t pos
) {
820 return str_find_ws_word_start( sbuf
->buf
, sbuf
->count
, pos
);
823 ic_private ssize_t
sbuf_find_ws_word_end( stringbuf_t
* sbuf
, ssize_t pos
) {
824 return str_find_ws_word_end( sbuf
->buf
, sbuf
->count
, pos
);
827 // find row/col position
828 ic_private ssize_t
sbuf_get_pos_at_rc( stringbuf_t
* sbuf
, ssize_t termw
, ssize_t promptw
, ssize_t cpromptw
, ssize_t row
, ssize_t col
) {
829 return str_get_pos_at_rc( sbuf
->buf
, sbuf
->count
, termw
, promptw
, cpromptw
, row
, col
);
832 // get row/col for a given position
833 ic_private ssize_t
sbuf_get_rc_at_pos( stringbuf_t
* sbuf
, ssize_t termw
, ssize_t promptw
, ssize_t cpromptw
, ssize_t pos
, rowcol_t
* rc
) {
834 return str_get_rc_at_pos( sbuf
->buf
, sbuf
->count
, termw
, promptw
, cpromptw
, pos
, rc
);
837 ic_private ssize_t
sbuf_get_wrapped_rc_at_pos( stringbuf_t
* sbuf
, ssize_t termw
, ssize_t newtermw
, ssize_t promptw
, ssize_t cpromptw
, ssize_t pos
, rowcol_t
* rc
) {
838 return str_get_wrapped_rc_at_pos( sbuf
->buf
, sbuf
->count
, termw
, newtermw
, promptw
, cpromptw
, pos
, rc
);
841 ic_private ssize_t
sbuf_for_each_row( stringbuf_t
* sbuf
, ssize_t termw
, ssize_t promptw
, ssize_t cpromptw
, row_fun_t
* fun
, void* arg
, void* res
) {
842 if (sbuf
== NULL
) { return 0; }
843 return str_for_each_row( sbuf
->buf
, sbuf
->count
, termw
, promptw
, cpromptw
, fun
, arg
, res
);
847 // Duplicate and decode from utf-8 (for non-utf8 terminals)
848 ic_private
char* sbuf_strdup_from_utf8(stringbuf_t
* sbuf
) {
849 ssize_t len
= sbuf_len(sbuf
);
850 if (sbuf
== NULL
|| len
<= 0) { return NULL
; }
851 char* s
= mem_zalloc_tp_n(sbuf
->mem
, char, len
);
852 if (s
== NULL
) { return NULL
; }
854 for (ssize_t i
= 0; i
< len
; ) {
855 ssize_t ofs
= sbuf_next_ofs(sbuf
, i
, NULL
);
862 s
[dest
++] = sbuf
->buf
[i
];
864 else if (sbuf
->buf
[i
] == '\x1B') {
865 // skip escape sequences
870 unicode_t uchr
= unicode_from_qutf8( (const uint8_t*)(sbuf
->buf
+ i
), ofs
, &nread
);
872 if (unicode_is_raw(uchr
, &c
)) {
873 // raw byte, output as is (this will take care of locale specific input)
876 else if (uchr
<= 0x7F) {
878 s
[dest
++] = (char)uchr
;
881 // skip unknown unicode characters..
882 // todo: convert according to locale?
892 //-------------------------------------------------------------
894 //-------------------------------------------------------------
896 ic_public
long ic_prev_char( const char* s
, long pos
) {
897 ssize_t len
= ic_strlen(s
);
898 if (pos
< 0 || pos
> len
) { return -1; }
899 ssize_t ofs
= str_prev_ofs( s
, pos
, NULL
);
900 if (ofs
<= 0) { return -1; }
901 return (long)(pos
- ofs
);
904 ic_public
long ic_next_char( const char* s
, long pos
) {
905 ssize_t len
= ic_strlen(s
);
906 if (pos
< 0 || pos
> len
) { return -1; }
907 ssize_t ofs
= str_next_ofs( s
, len
, pos
, NULL
);
908 if (ofs
<= 0) { return -1; }
909 return (long)(pos
+ ofs
);
913 // parse a decimal (leave pi unchanged on error)
914 ic_private
bool ic_atoz(const char* s
, ssize_t
* pi
) {
915 return (sscanf(s
, "%" PRIz
"d", pi
) == 1);
918 // parse two decimals separated by a semicolon
919 ic_private
bool ic_atoz2(const char* s
, ssize_t
* pi
, ssize_t
* pj
) {
920 return (sscanf(s
, "%" PRIz
"d;%" PRIz
"d", pi
, pj
) == 2);
923 // parse unsigned 32-bit (leave pu unchanged on error)
924 ic_private
bool ic_atou32(const char* s
, uint32_t* pu
) {
925 return (sscanf(s
, "%" SCNu32
, pu
) == 1);
929 // Convenience: character class for whitespace `[ \t\r\n]`.
930 ic_public
bool ic_char_is_white(const char* s
, long len
) {
931 if (s
== NULL
|| len
!= 1) { return false; }
933 return (c
== ' ' || c
== '\t' || c
== '\n' || c
== '\r');
936 // Convenience: character class for non-whitespace `[^ \t\r\n]`.
937 ic_public
bool ic_char_is_nonwhite(const char* s
, long len
) {
938 return !ic_char_is_white(s
, len
);
941 // Convenience: character class for separators `[ \t\r\n,.;:/\\\(\)\{\}\[\]]`.
942 ic_public
bool ic_char_is_separator(const char* s
, long len
) {
943 if (s
== NULL
|| len
!= 1) { return false; }
945 return (strchr(" \t\r\n,.;:/\\(){}[]", c
) != NULL
);
948 // Convenience: character class for non-separators.
949 ic_public
bool ic_char_is_nonseparator(const char* s
, long len
) {
950 return !ic_char_is_separator(s
, len
);
954 // Convenience: character class for digits (`[0-9]`).
955 ic_public
bool ic_char_is_digit(const char* s
, long len
) {
956 if (s
== NULL
|| len
!= 1) { return false; }
958 return (c
>= '0' && c
<= '9');
961 // Convenience: character class for hexadecimal digits (`[A-Fa-f0-9]`).
962 ic_public
bool ic_char_is_hexdigit(const char* s
, long len
) {
963 if (s
== NULL
|| len
!= 1) { return false; }
965 return ((c
>= '0' && c
<= '9') || (c
>= 'a' && c
<= 'f') || (c
>= 'A' && c
<= 'F'));
968 // Convenience: character class for letters (`[A-Za-z]` and any unicode > 0x80).
969 ic_public
bool ic_char_is_letter(const char* s
, long len
) {
970 if (s
== NULL
|| len
<= 0) { return false; }
972 return ((uint8_t)c
>= 0x80 || (c
>= 'A' && c
<= 'Z') || (c
>= 'a' && c
<= 'z'));
975 // Convenience: character class for identifier letters (`[A-Za-z0-9_-]` and any unicode > 0x80).
976 ic_public
bool ic_char_is_idletter(const char* s
, long len
) {
977 if (s
== NULL
|| len
<= 0) { return false; }
979 return ((uint8_t)c
>= 0x80 || (c
>= 'A' && c
<= 'Z') || (c
>= 'a' && c
<= 'z') || (c
>= '0' && c
<= '9') || (c
== '_') || (c
== '-'));
982 // Convenience: character class for filename letters (`[^ \t\r\n`@$><=;|&{(]`).
983 ic_public
bool ic_char_is_filename_letter(const char* s
, long len
) {
984 if (s
== NULL
|| len
<= 0) { return false; }
986 return ((uint8_t)c
>= 0x80 || (strchr(" \t\r\n`@$><=;|&{}()[]", c
) == NULL
));
989 // Convenience: If this is a token start, returns the length (or <= 0 if not found).
990 ic_public
long ic_is_token(const char* s
, long pos
, ic_is_char_class_fun_t
* is_token_char
) {
991 if (s
== NULL
|| pos
< 0 || is_token_char
== NULL
) { return -1; }
992 ssize_t len
= ic_strlen(s
);
993 if (pos
>= len
) { return -1; }
994 if (pos
> 0 && is_token_char(s
+ pos
-1, 1)) { return -1; } // token start?
997 ssize_t next
= str_next_ofs(s
, len
, i
, NULL
);
998 if (next
<= 0) { return -1; }
999 if (!is_token_char(s
+ i
, (long)next
)) { break; }
1002 return (long)(i
- pos
);
1006 static int ic_strncmp(const char* s1
, const char* s2
, ssize_t n
) {
1007 return strncmp(s1
, s2
, to_size_t(n
));
1010 // Convenience: Does this match the specified token?
1011 // Ensures not to match prefixes or suffixes, and returns the length of the match (in bytes).
1012 // E.g. `ic_match_token("function",0,&ic_char_is_letter,"fun")` returns 0.
1013 ic_public
long ic_match_token(const char* s
, long pos
, ic_is_char_class_fun_t
* is_token_char
, const char* token
) {
1014 long n
= ic_is_token(s
, pos
, is_token_char
);
1015 if (n
> 0 && token
!= NULL
&& n
== ic_strlen(token
) && ic_strncmp(s
+ pos
, token
, n
) == 0) {
1024 // Convenience: Do any of the specified tokens match?
1025 // Ensures not to match prefixes or suffixes, and returns the length of the match (in bytes).
1026 // Ensures not to match prefixes or suffixes.
1027 // E.g. `ic_match_any_token("function",0,&ic_char_is_letter,{"fun","func",NULL})` returns 0.
1028 ic_public
long ic_match_any_token(const char* s
, long pos
, ic_is_char_class_fun_t
* is_token_char
, const char** tokens
) {
1029 long n
= ic_is_token(s
, pos
, is_token_char
);
1030 if (n
<= 0 || tokens
== NULL
) { return 0; }
1031 for (const char** token
= tokens
; *token
!= NULL
; token
++) {
1032 if (n
== ic_strlen(*token
) && ic_strncmp(s
+ pos
, *token
, n
) == 0) {