build: provide install-strip make target
[vis.git] / text-motions.c
blob21967e50f9eb333453cad7ad24f994e41622a4f1
1 #include <ctype.h>
2 #include <string.h>
3 #include <stdlib.h>
4 #include <wchar.h>
5 #include <errno.h>
6 #include <limits.h>
7 #include "text-motions.h"
8 #include "text-util.h"
9 #include "util.h"
10 #include "text-objects.h"
12 #define blank(c) ((c) == ' ' || (c) == '\t')
13 #define space(c) (isspace((unsigned char)c))
14 #define boundary(c) (isboundary((unsigned char)c))
16 // TODO: specify this per file type?
17 int is_word_boundary(int c) {
18 return ISASCII(c) && !(('0' <= c && c <= '9') ||
19 ('a' <= c && c <= 'z') ||
20 ('A' <= c && c <= 'Z') || c == '_');
23 size_t text_begin(Text *txt, size_t pos) {
24 return 0;
27 size_t text_end(Text *txt, size_t pos) {
28 return text_size(txt);
31 size_t text_char_next(Text *txt, size_t pos) {
32 Iterator it = text_iterator_get(txt, pos);
33 text_iterator_char_next(&it, NULL);
34 return it.pos;
37 size_t text_char_prev(Text *txt, size_t pos) {
38 Iterator it = text_iterator_get(txt, pos);
39 text_iterator_char_prev(&it, NULL);
40 return it.pos;
43 size_t text_codepoint_next(Text *txt, size_t pos) {
44 Iterator it = text_iterator_get(txt, pos);
45 text_iterator_codepoint_next(&it, NULL);
46 return it.pos;
49 size_t text_codepoint_prev(Text *txt, size_t pos) {
50 Iterator it = text_iterator_get(txt, pos);
51 text_iterator_codepoint_prev(&it, NULL);
52 return it.pos;
55 static size_t find_next(Text *txt, size_t pos, const char *s, bool line) {
56 if (!s)
57 return pos;
58 size_t len = strlen(s), matched = 0;
59 Iterator it = text_iterator_get(txt, pos), sit;
60 for (char c; matched < len && text_iterator_byte_get(&it, &c); ) {
61 if (c == s[matched]) {
62 if (matched == 0)
63 sit = it;
64 matched++;
65 } else if (matched > 0) {
66 it = sit;
67 matched = 0;
69 text_iterator_byte_next(&it, NULL);
70 if (line && c == '\n')
71 break;
73 return matched == len ? it.pos - len : pos;
76 size_t text_find_next(Text *txt, size_t pos, const char *s) {
77 return find_next(txt, pos, s, false);
80 size_t text_line_find_next(Text *txt, size_t pos, const char *s) {
81 return find_next(txt, pos, s, true);
84 static size_t find_prev(Text *txt, size_t pos, const char *s, bool line) {
85 if (!s)
86 return pos;
87 size_t len = strlen(s), matched = len - 1;
88 Iterator it = text_iterator_get(txt, pos), sit;
89 if (len == 0)
90 return pos;
91 for (char c; text_iterator_byte_prev(&it, &c); ) {
92 if (c == s[matched]) {
93 if (matched == 0)
94 return it.pos;
95 if (matched == len - 1)
96 sit = it;
97 matched--;
98 } else if (matched < len - 1) {
99 it = sit;
100 matched = len - 1;
102 if (line && c == '\n')
103 break;
105 return pos;
108 size_t text_find_prev(Text *txt, size_t pos, const char *s) {
109 return find_prev(txt, pos, s, false);
112 size_t text_line_find_prev(Text *txt, size_t pos, const char *s) {
113 return find_prev(txt, pos, s, true);
116 size_t text_line_prev(Text *txt, size_t pos) {
117 Iterator it = text_iterator_get(txt, pos);
118 text_iterator_byte_find_prev(&it, '\n');
119 return it.pos;
122 size_t text_line_begin(Text *txt, size_t pos) {
123 Iterator it = text_iterator_get(txt, pos);
124 return text_iterator_byte_find_prev(&it, '\n') ? it.pos+1 : it.pos;
127 size_t text_line_start(Text *txt, size_t pos) {
128 char c;
129 Iterator it = text_iterator_get(txt, text_line_begin(txt, pos));
130 while (text_iterator_byte_get(&it, &c) && blank(c))
131 text_iterator_byte_next(&it, NULL);
132 return it.pos;
135 size_t text_line_finish(Text *txt, size_t pos) {
136 char c;
137 size_t end = text_line_end(txt, pos);
138 Iterator it = text_iterator_get(txt, end);
139 if (!text_iterator_byte_prev(&it, &c) || c == '\n')
140 return end;
141 while (blank(c) && text_iterator_byte_prev(&it, &c));
142 return it.pos + (c == '\n');
145 size_t text_line_end(Text *txt, size_t pos) {
146 Iterator it = text_iterator_get(txt, pos);
147 text_iterator_byte_find_next(&it, '\n');
148 return it.pos;
151 size_t text_line_next(Text *txt, size_t pos) {
152 Iterator it = text_iterator_get(txt, pos);
153 if (text_iterator_byte_find_next(&it, '\n'))
154 text_iterator_byte_next(&it, NULL);
155 return it.pos;
158 size_t text_line_offset(Text *txt, size_t pos, size_t off) {
159 char c;
160 size_t bol = text_line_begin(txt, pos);
161 Iterator it = text_iterator_get(txt, bol);
162 while (off-- > 0 && text_iterator_byte_get(&it, &c) && c != '\n')
163 text_iterator_byte_next(&it, NULL);
164 return it.pos;
167 size_t text_line_char_set(Text *txt, size_t pos, int count) {
168 char c;
169 size_t bol = text_line_begin(txt, pos);
170 Iterator it = text_iterator_get(txt, bol);
171 if (text_iterator_byte_get(&it, &c) && c != '\n')
172 while (count-- > 0 && text_iterator_char_next(&it, &c) && c != '\n');
173 return it.pos;
176 int text_line_char_get(Text *txt, size_t pos) {
177 char c;
178 int count = 0;
179 size_t bol = text_line_begin(txt, pos);
180 Iterator it = text_iterator_get(txt, bol);
181 if (text_iterator_byte_get(&it, &c) && c != '\n') {
182 while (it.pos < pos && c != '\n' && text_iterator_char_next(&it, &c))
183 count++;
185 return count;
188 int text_line_width_get(Text *txt, size_t pos) {
189 int width = 0;
190 mbstate_t ps = { 0 };
191 size_t bol = text_line_begin(txt, pos);
192 Iterator it = text_iterator_get(txt, bol);
194 while (it.pos < pos) {
195 char buf[MB_LEN_MAX];
196 size_t len = text_bytes_get(txt, it.pos, sizeof buf, buf);
197 if (len == 0 || buf[0] == '\n')
198 break;
199 wchar_t wc;
200 size_t wclen = mbrtowc(&wc, buf, len, &ps);
201 if (wclen == (size_t)-1 && errno == EILSEQ) {
202 ps = (mbstate_t){0};
203 /* assume a replacement symbol will be displayed */
204 width++;
205 } else if (wclen == (size_t)-2) {
206 /* do nothing, advance to next character */
207 } else if (wclen == 0) {
208 /* assume NUL byte will be displayed as ^@ */
209 width += 2;
210 } else if (buf[0] == '\t') {
211 width++;
212 } else {
213 int w = wcwidth(wc);
214 if (w == -1)
215 w = 2; /* assume non-printable will be displayed as ^{char} */
216 width += w;
219 if (!text_iterator_codepoint_next(&it, NULL))
220 break;
223 return width;
226 size_t text_line_width_set(Text *txt, size_t pos, int width) {
227 int cur_width = 0;
228 mbstate_t ps = { 0 };
229 size_t bol = text_line_begin(txt, pos);
230 Iterator it = text_iterator_get(txt, bol);
232 for (;;) {
233 char buf[MB_LEN_MAX];
234 size_t len = text_bytes_get(txt, it.pos, sizeof buf, buf);
235 if (len == 0 || buf[0] == '\n')
236 break;
237 wchar_t wc;
238 size_t wclen = mbrtowc(&wc, buf, len, &ps);
239 if (wclen == (size_t)-1 && errno == EILSEQ) {
240 ps = (mbstate_t){0};
241 /* assume a replacement symbol will be displayed */
242 cur_width++;
243 } else if (wclen == (size_t)-2) {
244 /* do nothing, advance to next character */
245 } else if (wclen == 0) {
246 /* assume NUL byte will be displayed as ^@ */
247 cur_width += 2;
248 } else if (buf[0] == '\t') {
249 cur_width++;
250 } else {
251 int w = wcwidth(wc);
252 if (w == -1)
253 w = 2; /* assume non-printable will be displayed as ^{char} */
254 cur_width += w;
257 if (cur_width >= width || !text_iterator_codepoint_next(&it, NULL))
258 break;
261 return it.pos;
264 size_t text_line_char_next(Text *txt, size_t pos) {
265 char c;
266 Iterator it = text_iterator_get(txt, pos);
267 if (!text_iterator_byte_get(&it, &c) || c == '\n')
268 return pos;
269 text_iterator_char_next(&it, NULL);
270 return it.pos;
273 size_t text_line_char_prev(Text *txt, size_t pos) {
274 char c;
275 Iterator it = text_iterator_get(txt, pos);
276 if (!text_iterator_char_prev(&it, &c) || c == '\n')
277 return pos;
278 return it.pos;
281 size_t text_line_up(Text *txt, size_t pos) {
282 int width = text_line_width_get(txt, pos);
283 size_t prev = text_line_prev(txt, pos);
284 return text_line_width_set(txt, prev, width);
287 size_t text_line_down(Text *txt, size_t pos) {
288 int width = text_line_width_get(txt, pos);
289 size_t next = text_line_next(txt, pos);
290 return text_line_width_set(txt, next, width);
293 size_t text_range_line_first(Text *txt, Filerange *r) {
294 if (!text_range_valid(r))
295 return EPOS;
296 return r->start;
299 size_t text_range_line_last(Text *txt, Filerange *r) {
300 if (!text_range_valid(r))
301 return EPOS;
302 size_t pos = text_line_begin(txt, r->end);
303 if (pos == r->end) {
304 /* range ends at a begin of a line, skip last line ending */
305 pos = text_line_prev(txt, pos);
306 pos = text_line_begin(txt, pos);
308 return r->start <= pos ? pos : r->start;
311 size_t text_range_line_next(Text *txt, Filerange *r, size_t pos) {
312 if (!text_range_contains(r, pos))
313 return EPOS;
314 size_t newpos = text_line_next(txt, pos);
315 return newpos != pos && newpos < r->end ? newpos : EPOS;
318 size_t text_range_line_prev(Text *txt, Filerange *r, size_t pos) {
319 if (!text_range_contains(r, pos))
320 return EPOS;
321 size_t newpos = text_line_begin(txt, text_line_prev(txt, pos));
322 return newpos != pos && r->start <= newpos ? newpos : EPOS;
325 size_t text_customword_start_next(Text *txt, size_t pos, int (*isboundary)(int)) {
326 char c;
327 Iterator it = text_iterator_get(txt, pos);
328 if (!text_iterator_byte_get(&it, &c))
329 return pos;
330 if (boundary(c))
331 while (boundary(c) && !space(c) && text_iterator_char_next(&it, &c));
332 else
333 while (!boundary(c) && text_iterator_char_next(&it, &c));
334 while (space(c) && text_iterator_char_next(&it, &c));
335 return it.pos;
338 size_t text_customword_start_prev(Text *txt, size_t pos, int (*isboundary)(int)) {
339 char c;
340 Iterator it = text_iterator_get(txt, pos);
341 while (text_iterator_char_prev(&it, &c) && space(c));
342 if (boundary(c))
343 do pos = it.pos; while (text_iterator_char_prev(&it, &c) && boundary(c) && !space(c));
344 else
345 do pos = it.pos; while (text_iterator_char_prev(&it, &c) && !boundary(c));
346 return pos;
349 size_t text_customword_end_next(Text *txt, size_t pos, int (*isboundary)(int)) {
350 char c;
351 Iterator it = text_iterator_get(txt, pos);
352 while (text_iterator_char_next(&it, &c) && space(c));
353 if (boundary(c))
354 do pos = it.pos; while (text_iterator_char_next(&it, &c) && boundary(c) && !space(c));
355 else
356 do pos = it.pos; while (text_iterator_char_next(&it, &c) && !isboundary(c));
357 return pos;
360 size_t text_customword_end_prev(Text *txt, size_t pos, int (*isboundary)(int)) {
361 char c;
362 Iterator it = text_iterator_get(txt, pos);
363 if (!text_iterator_byte_get(&it, &c))
364 return pos;
365 if (boundary(c))
366 while (boundary(c) && !space(c) && text_iterator_char_prev(&it, &c));
367 else
368 while (!boundary(c) && text_iterator_char_prev(&it, &c));
369 while (space(c) && text_iterator_char_prev(&it, &c));
370 return it.pos;
373 size_t text_longword_end_next(Text *txt, size_t pos) {
374 return text_customword_end_next(txt, pos, isspace);
377 size_t text_longword_end_prev(Text *txt, size_t pos) {
378 return text_customword_end_prev(txt, pos, isspace);
381 size_t text_longword_start_next(Text *txt, size_t pos) {
382 return text_customword_start_next(txt, pos, isspace);
385 size_t text_longword_start_prev(Text *txt, size_t pos) {
386 return text_customword_start_prev(txt, pos, isspace);
389 size_t text_word_end_next(Text *txt, size_t pos) {
390 return text_customword_end_next(txt, pos, is_word_boundary);
393 size_t text_word_end_prev(Text *txt, size_t pos) {
394 return text_customword_end_prev(txt, pos, is_word_boundary);
397 size_t text_word_start_next(Text *txt, size_t pos) {
398 return text_customword_start_next(txt, pos, is_word_boundary);
401 size_t text_word_start_prev(Text *txt, size_t pos) {
402 return text_customword_start_prev(txt, pos, is_word_boundary);
405 size_t text_sentence_next(Text *txt, size_t pos) {
406 char c, prev = 'X';
407 Iterator it = text_iterator_get(txt, pos), rev = it;
409 if (!text_iterator_byte_get(&it, &c))
410 return pos;
412 while (text_iterator_byte_get(&rev, &prev) && space(prev))
413 text_iterator_byte_prev(&rev, NULL);
414 prev = rev.pos == 0 ? '.' : prev; /* simulate punctuation at BOF */
416 do {
417 if ((prev == '.' || prev == '?' || prev == '!') && space(c)) {
418 do text_iterator_byte_next(&it, NULL);
419 while (text_iterator_byte_get(&it, &c) && space(c));
420 return it.pos;
422 prev = c;
423 } while (text_iterator_byte_next(&it, &c));
424 return it.pos;
427 size_t text_sentence_prev(Text *txt, size_t pos) {
428 char c, prev = 'X';
429 bool content = false;
430 Iterator it = text_iterator_get(txt, pos);
432 while (it.pos != 0 && text_iterator_byte_prev(&it, &c)) {
433 if (content && space(prev) && (c == '.' || c == '?' || c == '!')) {
434 do text_iterator_byte_next(&it, NULL);
435 while (text_iterator_byte_get(&it, &c) && space(c));
436 return it.pos;
438 content |= !space(c);
439 prev = c;
440 } /* The loop only ends on hitting BOF or error */
441 if (content) /* starting pos was after first sentence in file => find that sentences start */
442 while (text_iterator_byte_get(&it, &c) && space(c))
443 text_iterator_byte_next(&it, NULL);
444 return it.pos;
447 size_t text_paragraph_next(Text *txt, size_t pos) {
448 char c;
449 Iterator it = text_iterator_get(txt, pos);
451 while (text_iterator_byte_get(&it, &c) && (c == '\n' || blank(c)))
452 text_iterator_char_next(&it, NULL);
453 return text_line_blank_next(txt, it.pos);
456 size_t text_paragraph_prev(Text *txt, size_t pos) {
457 char c;
458 Iterator it = text_iterator_get(txt, pos);
460 while (text_iterator_byte_prev(&it, &c) && (c == '\n' || blank(c)));
461 return text_line_blank_prev(txt, it.pos);
464 size_t text_line_empty_next(Text *txt, size_t pos) {
465 char c;
466 Iterator it = text_iterator_get(txt, pos);
467 while (text_iterator_byte_find_next(&it, '\n')) {
468 if (text_iterator_byte_next(&it, &c) && c == '\n')
469 return it.pos;
471 return it.pos;
474 size_t text_line_empty_prev(Text *txt, size_t pos) {
475 char c;
476 Iterator it = text_iterator_get(txt, pos);
477 while (text_iterator_byte_find_prev(&it, '\n')) {
478 if (text_iterator_byte_prev(&it, &c) && c == '\n')
479 return it.pos + 1;
481 return it.pos;
484 size_t text_line_blank_next(Text *txt, size_t pos) {
485 char c;
486 Iterator it = text_iterator_get(txt, pos);
487 while (text_iterator_byte_find_next(&it, '\n')) {
488 size_t n = it.pos;
489 while (text_iterator_byte_next(&it, &c) && blank(c));
490 if (c == '\n')
491 return n + 1;
493 return it.pos;
496 size_t text_line_blank_prev(Text *txt, size_t pos) {
497 char c;
498 Iterator it = text_iterator_get(txt, pos);
499 while (text_iterator_byte_find_prev(&it, '\n')) {
500 while (text_iterator_byte_prev(&it, &c) && blank(c));
501 if (c == '\n')
502 return it.pos + 1;
504 return it.pos;
507 size_t text_block_start(Text *txt, size_t pos) {
508 Filerange r = text_object_curly_bracket(txt, pos-1);
509 return text_range_valid(&r) ? r.start-1 : pos;
512 size_t text_block_end(Text *txt, size_t pos) {
513 Filerange r = text_object_curly_bracket(txt, pos+1);
514 return text_range_valid(&r) ? r.end : pos;
517 size_t text_parenthesis_start(Text *txt, size_t pos) {
518 Filerange r = text_object_parenthesis(txt, pos-1);
519 return text_range_valid(&r) ? r.start-1 : pos;
522 size_t text_parenthesis_end(Text *txt, size_t pos) {
523 Filerange r = text_object_parenthesis(txt, pos+1);
524 return text_range_valid(&r) ? r.end : pos;
527 size_t text_bracket_match(Text *txt, size_t pos, const Filerange *limits) {
528 return text_bracket_match_symbol(txt, pos, NULL, limits);
531 static size_t match_symbol(Text *txt, size_t pos, char search, int direction, const Filerange *limits) {
532 char c, current;
533 int count = 1;
534 bool instring = false;
535 Iterator it = text_iterator_get(txt, pos);
536 if (!text_iterator_byte_get(&it, &current))
537 return pos;
538 if (direction >= 0) { /* forward search */
539 while (text_iterator_byte_next(&it, &c)) {
540 if (limits && it.pos >= limits->end)
541 break;
542 if (c != current && c == '"')
543 instring = !instring;
544 if (!instring) {
545 if (c == search && --count == 0)
546 return it.pos;
547 else if (c == current)
548 count++;
551 } else { /* backwards */
552 while (text_iterator_byte_prev(&it, &c)) {
553 if (limits && it.pos < limits->start)
554 break;
555 if (c != current && c == '"')
556 instring = !instring;
557 if (!instring) {
558 if (c == search && --count == 0)
559 return it.pos;
560 else if (c == current)
561 count++;
566 return pos; /* no match found */
569 size_t text_bracket_match_symbol(Text *txt, size_t pos, const char *symbols, const Filerange *limits) {
570 int direction;
571 char search, current, c;
572 Iterator it = text_iterator_get(txt, pos);
573 if (!text_iterator_byte_get(&it, &current))
574 return pos;
575 if (symbols && !memchr(symbols, current, strlen(symbols)))
576 return pos;
577 switch (current) {
578 case '(': search = ')'; direction = 1; break;
579 case ')': search = '('; direction = -1; break;
580 case '{': search = '}'; direction = 1; break;
581 case '}': search = '{'; direction = -1; break;
582 case '[': search = ']'; direction = 1; break;
583 case ']': search = '['; direction = -1; break;
584 case '<': search = '>'; direction = 1; break;
585 case '>': search = '<'; direction = -1; break;
586 case '"':
587 case '`':
588 case '\'':
590 /* prefer matches on the same line */
591 size_t fw = match_symbol(txt, pos, current, +1, limits);
592 size_t bw = match_symbol(txt, pos, current, -1, limits);
593 if (fw == pos)
594 return bw;
595 if (bw == pos)
596 return fw;
597 size_t line = text_lineno_by_pos(txt, pos);
598 size_t line_fw = text_lineno_by_pos(txt, fw);
599 size_t line_bw = text_lineno_by_pos(txt, bw);
600 if (line != line_fw)
601 return bw;
602 if (line != line_bw)
603 return fw;
604 direction = +1;
605 if (text_iterator_byte_next(&it, &c)) {
606 /* if a single or double quote is followed by
607 * a special character, search backwards */
608 char special[] = " \t\n)}]>.,:;";
609 if (memchr(special, c, sizeof(special)))
610 direction = -1;
612 return direction >= 0 ? fw : bw;
614 default:
615 return pos;
618 return match_symbol(txt, pos, search, direction, limits);
621 size_t text_search_forward(Text *txt, size_t pos, Regex *regex) {
622 size_t start = pos + 1;
623 size_t end = text_size(txt);
624 RegexMatch match[1];
625 char c;
626 int flags = text_byte_get(txt, pos, &c) && c == '\n' ? 0 : REG_NOTBOL;
627 bool found = start < end && !text_search_range_forward(txt, start, end - start, regex, 1, match, flags);
629 if (!found) {
630 start = 0;
631 found = !text_search_range_forward(txt, start, end - start, regex, 1, match, 0);
634 return found ? match[0].start : pos;
637 size_t text_search_backward(Text *txt, size_t pos, Regex *regex) {
638 size_t start = 0;
639 size_t end = pos;
640 RegexMatch match[1];
641 bool found = !text_search_range_backward(txt, start, end, regex, 1, match, REG_NOTEOL);
643 if (!found) {
644 end = text_size(txt);
645 found = !text_search_range_backward(txt, start, end - start, regex, 1, match, 0);
648 return found ? match[0].start : pos;