build: skip -pie for static build
[vis.git] / text-motions.c
blobe48dd9abbde3a13ec9f1789739f95898f9cf4c34
1 #include <ctype.h>
2 #include <string.h>
3 #include <stdlib.h>
4 #include <wchar.h>
5 #include <errno.h>
6 #include "text-motions.h"
7 #include "text-util.h"
8 #include "util.h"
9 #include "text-objects.h"
11 #define space(c) (isspace((unsigned char)c))
12 #define boundary(c) (isboundary((unsigned char)c))
14 // TODO: specify this per file type?
15 int is_word_boundary(int c) {
16 return ISASCII(c) && !(('0' <= c && c <= '9') ||
17 ('a' <= c && c <= 'z') ||
18 ('A' <= c && c <= 'Z') || c == '_');
21 size_t text_begin(Text *txt, size_t pos) {
22 return 0;
25 size_t text_end(Text *txt, size_t pos) {
26 return text_size(txt);
29 size_t text_char_next(Text *txt, size_t pos) {
30 Iterator it = text_iterator_get(txt, pos);
31 text_iterator_char_next(&it, NULL);
32 return it.pos;
35 size_t text_char_prev(Text *txt, size_t pos) {
36 Iterator it = text_iterator_get(txt, pos);
37 text_iterator_char_prev(&it, NULL);
38 return it.pos;
41 static size_t find_next(Text *txt, size_t pos, const char *s, bool line) {
42 if (!s)
43 return pos;
44 size_t len = strlen(s), matched = 0;
45 Iterator it = text_iterator_get(txt, pos), sit;
46 for (char c; matched < len && text_iterator_byte_get(&it, &c); ) {
47 if (c == s[matched]) {
48 if (matched == 0)
49 sit = it;
50 matched++;
51 } else if (matched > 0) {
52 it = sit;
53 matched = 0;
55 text_iterator_byte_next(&it, NULL);
56 if (line && c == '\n')
57 break;
59 return matched == len ? it.pos - len : pos;
62 size_t text_find_next(Text *txt, size_t pos, const char *s) {
63 return find_next(txt, pos, s, false);
66 size_t text_line_find_next(Text *txt, size_t pos, const char *s) {
67 return find_next(txt, pos, s, true);
70 static size_t find_prev(Text *txt, size_t pos, const char *s, bool line) {
71 if (!s)
72 return pos;
73 size_t len = strlen(s), matched = len - 1;
74 Iterator it = text_iterator_get(txt, pos), sit;
75 if (len == 0)
76 return pos;
77 for (char c; text_iterator_byte_prev(&it, &c); ) {
78 if (c == s[matched]) {
79 if (matched == 0)
80 return it.pos;
81 if (matched == len - 1)
82 sit = it;
83 matched--;
84 } else if (matched < len - 1) {
85 it = sit;
86 matched = len - 1;
88 if (line && c == '\n')
89 break;
91 return pos;
94 size_t text_find_prev(Text *txt, size_t pos, const char *s) {
95 return find_prev(txt, pos, s, false);
98 size_t text_line_find_prev(Text *txt, size_t pos, const char *s) {
99 return find_prev(txt, pos, s, true);
102 size_t text_line_prev(Text *txt, size_t pos) {
103 char c;
104 Iterator it = text_iterator_get(txt, pos);
105 if (!text_iterator_byte_get(&it, &c))
106 return pos;
107 if (c == '\n')
108 text_iterator_byte_prev(&it, &c);
109 if (c == '\r')
110 text_iterator_byte_prev(&it, &c);
111 while (text_iterator_byte_get(&it, &c) && c != '\n')
112 text_iterator_byte_prev(&it, NULL);
113 if (text_iterator_byte_prev(&it, &c) && c != '\r')
114 text_iterator_byte_next(&it, &c);
115 return it.pos;
118 size_t text_line_begin(Text *txt, size_t pos) {
119 char c;
120 Iterator it = text_iterator_get(txt, pos);
121 if (!text_iterator_byte_get(&it, &c))
122 return pos;
123 if (c == '\n')
124 text_iterator_byte_prev(&it, &c);
125 if (c == '\r')
126 text_iterator_byte_prev(&it, &c);
127 while (text_iterator_byte_get(&it, &c)) {
128 if (c == '\n') {
129 it.pos++;
130 break;
132 text_iterator_byte_prev(&it, NULL);
134 return it.pos;
137 size_t text_line_start(Text *txt, size_t pos) {
138 char c;
139 Iterator it = text_iterator_get(txt, text_line_begin(txt, pos));
140 while (text_iterator_byte_get(&it, &c) && c != '\n' && space(c))
141 text_iterator_byte_next(&it, NULL);
142 return it.pos;
145 size_t text_line_finish(Text *txt, size_t pos) {
146 char c;
147 Iterator it = text_iterator_get(txt, text_line_end(txt, pos));
148 do text_iterator_char_prev(&it, NULL);
149 while (text_iterator_byte_get(&it, &c) && c != '\n' && space(c));
150 return it.pos;
153 size_t text_line_lastchar(Text *txt, size_t pos) {
154 char c;
155 Iterator it = text_iterator_get(txt, text_line_end(txt, pos));
156 if (text_iterator_char_prev(&it, &c) && c == '\n')
157 text_iterator_byte_next(&it, NULL);
158 return it.pos;
161 size_t text_line_end(Text *txt, size_t pos) {
162 char c;
163 Iterator it = text_iterator_get(txt, pos);
164 while (text_iterator_byte_get(&it, &c) && c != '\r' && c != '\n')
165 text_iterator_byte_next(&it, NULL);
166 return it.pos;
169 size_t text_line_next(Text *txt, size_t pos) {
170 char c;
171 Iterator it = text_iterator_get(txt, pos);
172 while (text_iterator_byte_get(&it, &c) && c != '\n')
173 text_iterator_byte_next(&it, NULL);
174 text_iterator_byte_next(&it, NULL);
175 return it.pos;
178 size_t text_line_offset(Text *txt, size_t pos, size_t off) {
179 char c;
180 size_t bol = text_line_begin(txt, pos);
181 Iterator it = text_iterator_get(txt, bol);
182 while (off-- > 0 && text_iterator_byte_get(&it, &c) && c != '\r' && c != '\n')
183 text_iterator_byte_next(&it, NULL);
184 return it.pos;
187 size_t text_line_char_set(Text *txt, size_t pos, int count) {
188 char c;
189 size_t bol = text_line_begin(txt, pos);
190 Iterator it = text_iterator_get(txt, bol);
191 while (count-- > 0 && text_iterator_byte_get(&it, &c) && c != '\r' && c != '\n')
192 text_iterator_char_next(&it, NULL);
193 return it.pos;
196 int text_line_char_get(Text *txt, size_t pos) {
197 char c;
198 int count = 0;
199 size_t bol = text_line_begin(txt, pos);
200 Iterator it = text_iterator_get(txt, bol);
201 while (text_iterator_byte_get(&it, &c) && it.pos < pos && c != '\r' && c != '\n') {
202 text_iterator_char_next(&it, NULL);
203 count++;
205 return count;
208 int text_line_width_get(Text *txt, size_t pos) {
209 int width = 0;
210 mbstate_t ps = { 0 };
211 size_t bol = text_line_begin(txt, pos);
212 Iterator it = text_iterator_get(txt, bol);
214 while (it.pos < pos) {
215 char buf[MB_CUR_MAX];
216 size_t len = text_bytes_get(txt, it.pos, sizeof buf, buf);
217 if (len == 0 || buf[0] == '\r' || buf[0] == '\n')
218 break;
219 wchar_t wc;
220 size_t wclen = mbrtowc(&wc, buf, len, &ps);
221 if (wclen == (size_t)-1 && errno == EILSEQ) {
222 /* assume a replacement symbol will be displayed */
223 width++;
224 } else if (wclen == (size_t)-2) {
225 /* do nothing, advance to next character */
226 } else if (wclen == 0) {
227 /* assume NUL byte will be displayed as ^@ */
228 width += 2;
229 } else if (buf[0] == '\t') {
230 width++;
231 } else {
232 int w = wcwidth(wc);
233 if (w == -1)
234 w = 2; /* assume non-printable will be displayed as ^{char} */
235 width += w;
238 if (!text_iterator_codepoint_next(&it, NULL))
239 break;
242 return width;
245 size_t text_line_width_set(Text *txt, size_t pos, int width) {
246 int cur_width = 0;
247 mbstate_t ps = { 0 };
248 size_t bol = text_line_begin(txt, pos);
249 Iterator it = text_iterator_get(txt, bol);
251 for (;;) {
252 char buf[MB_CUR_MAX];
253 size_t len = text_bytes_get(txt, it.pos, sizeof buf, buf);
254 if (len == 0 || buf[0] == '\r' || buf[0] == '\n')
255 break;
256 wchar_t wc;
257 size_t wclen = mbrtowc(&wc, buf, len, &ps);
258 if (wclen == (size_t)-1 && errno == EILSEQ) {
259 /* assume a replacement symbol will be displayed */
260 cur_width++;
261 } else if (wclen == (size_t)-2) {
262 /* do nothing, advance to next character */
263 } else if (wclen == 0) {
264 /* assume NUL byte will be displayed as ^@ */
265 cur_width += 2;
266 } else if (buf[0] == '\t') {
267 cur_width++;
268 } else {
269 int w = wcwidth(wc);
270 if (w == -1)
271 w = 2; /* assume non-printable will be displayed as ^{char} */
272 cur_width += w;
275 if (cur_width >= width || !text_iterator_codepoint_next(&it, NULL))
276 break;
279 return it.pos;
282 size_t text_line_char_next(Text *txt, size_t pos) {
283 char c;
284 Iterator it = text_iterator_get(txt, pos);
285 if (!text_iterator_byte_get(&it, &c) || c == '\r' || c == '\n')
286 return pos;
287 if (!text_iterator_char_next(&it, &c) || c == '\r' || c == '\n')
288 return pos;
289 return it.pos;
292 size_t text_line_char_prev(Text *txt, size_t pos) {
293 char c;
294 Iterator it = text_iterator_get(txt, pos);
295 if (!text_iterator_char_prev(&it, &c) || c == '\n')
296 return pos;
297 return it.pos;
300 size_t text_line_up(Text *txt, size_t pos) {
301 int width = text_line_width_get(txt, pos);
302 size_t prev = text_line_prev(txt, pos);
303 return text_line_width_set(txt, prev, width);
306 size_t text_line_down(Text *txt, size_t pos) {
307 int width = text_line_width_get(txt, pos);
308 size_t next = text_line_next(txt, pos);
309 return text_line_width_set(txt, next, width);
312 size_t text_range_line_first(Text *txt, Filerange *r) {
313 if (!text_range_valid(r))
314 return EPOS;
315 return r->start;
318 size_t text_range_line_last(Text *txt, Filerange *r) {
319 if (!text_range_valid(r))
320 return EPOS;
321 size_t pos = text_line_begin(txt, r->end);
322 if (pos == r->end) {
323 /* range ends at a begin of a line, skip last line ending */
324 pos = text_line_prev(txt, pos);
325 pos = text_line_begin(txt, pos);
327 return r->start <= pos ? pos : r->start;
330 size_t text_range_line_next(Text *txt, Filerange *r, size_t pos) {
331 if (!text_range_contains(r, pos))
332 return EPOS;
333 size_t newpos = text_line_next(txt, pos);
334 return newpos != pos && newpos < r->end ? newpos : EPOS;
337 size_t text_range_line_prev(Text *txt, Filerange *r, size_t pos) {
338 if (!text_range_contains(r, pos))
339 return EPOS;
340 size_t newpos = text_line_begin(txt, text_line_prev(txt, pos));
341 return newpos != pos && r->start <= newpos ? newpos : EPOS;
344 size_t text_customword_start_next(Text *txt, size_t pos, int (*isboundary)(int)) {
345 char c;
346 Iterator it = text_iterator_get(txt, pos);
347 if (!text_iterator_byte_get(&it, &c))
348 return pos;
349 if (boundary(c))
350 while (boundary(c) && !space(c) && text_iterator_char_next(&it, &c));
351 else
352 while (!boundary(c) && text_iterator_char_next(&it, &c));
353 while (space(c) && text_iterator_char_next(&it, &c));
354 return it.pos;
357 size_t text_customword_start_prev(Text *txt, size_t pos, int (*isboundary)(int)) {
358 char c;
359 Iterator it = text_iterator_get(txt, pos);
360 while (text_iterator_char_prev(&it, &c) && space(c));
361 if (boundary(c))
362 do pos = it.pos; while (text_iterator_char_prev(&it, &c) && boundary(c) && !space(c));
363 else
364 do pos = it.pos; while (text_iterator_char_prev(&it, &c) && !boundary(c));
365 return pos;
368 size_t text_customword_end_next(Text *txt, size_t pos, int (*isboundary)(int)) {
369 char c;
370 Iterator it = text_iterator_get(txt, pos);
371 while (text_iterator_char_next(&it, &c) && space(c));
372 if (boundary(c))
373 do pos = it.pos; while (text_iterator_char_next(&it, &c) && boundary(c) && !space(c));
374 else
375 do pos = it.pos; while (text_iterator_char_next(&it, &c) && !isboundary(c));
376 return pos;
379 size_t text_customword_end_prev(Text *txt, size_t pos, int (*isboundary)(int)) {
380 char c;
381 Iterator it = text_iterator_get(txt, pos);
382 if (!text_iterator_byte_get(&it, &c))
383 return pos;
384 if (boundary(c))
385 while (boundary(c) && !space(c) && text_iterator_char_prev(&it, &c));
386 else
387 while (!boundary(c) && text_iterator_char_prev(&it, &c));
388 while (space(c) && text_iterator_char_prev(&it, &c));
389 return it.pos;
392 size_t text_longword_end_next(Text *txt, size_t pos) {
393 return text_customword_end_next(txt, pos, isspace);
396 size_t text_longword_end_prev(Text *txt, size_t pos) {
397 return text_customword_end_prev(txt, pos, isspace);
400 size_t text_longword_start_next(Text *txt, size_t pos) {
401 return text_customword_start_next(txt, pos, isspace);
404 size_t text_longword_start_prev(Text *txt, size_t pos) {
405 return text_customword_start_prev(txt, pos, isspace);
408 size_t text_word_end_next(Text *txt, size_t pos) {
409 return text_customword_end_next(txt, pos, is_word_boundary);
412 size_t text_word_end_prev(Text *txt, size_t pos) {
413 return text_customword_end_prev(txt, pos, is_word_boundary);
416 size_t text_word_start_next(Text *txt, size_t pos) {
417 return text_customword_start_next(txt, pos, is_word_boundary);
420 size_t text_word_start_prev(Text *txt, size_t pos) {
421 return text_customword_start_prev(txt, pos, is_word_boundary);
424 size_t text_sentence_next(Text *txt, size_t pos) {
425 char c, prev = 'X';
426 Iterator it = text_iterator_get(txt, pos), rev = text_iterator_get(txt, pos);
428 if (!text_iterator_byte_get(&it, &c))
429 return pos;
431 while (text_iterator_byte_get(&rev, &prev) && space(prev))
432 text_iterator_byte_prev(&rev, NULL);
433 prev = rev.pos == 0 ? '.' : prev; /* simulate punctuation at BOF */
435 do {
436 if ((prev == '.' || prev == '?' || prev == '!') && space(c)) {
437 do text_iterator_byte_next(&it, NULL);
438 while (text_iterator_byte_get(&it, &c) && space(c));
439 return it.pos;
441 prev = c;
442 } while (text_iterator_byte_next(&it, &c));
443 return it.pos;
446 size_t text_sentence_prev(Text *txt, size_t pos) {
447 char c, prev = 'X';
448 bool content = false;
449 Iterator it = text_iterator_get(txt, pos);
451 while (it.pos != 0 && text_iterator_byte_prev(&it, &c)) {
452 if (content && space(prev) && (c == '.' || c == '?' || c == '!')) {
453 do text_iterator_byte_next(&it, NULL);
454 while (text_iterator_byte_get(&it, &c) && space(c));
455 return it.pos;
457 content |= !space(c);
458 prev = c;
459 } /* The loop only ends on hitting BOF or error */
460 if (content) /* starting pos was after first sentence in file => find that sentences start */
461 while (text_iterator_byte_get(&it, &c) && space(c))
462 text_iterator_byte_next(&it, NULL);
463 return it.pos;
466 size_t text_paragraph_next(Text *txt, size_t pos) {
467 char c;
468 Iterator it = text_iterator_get(txt, pos);
470 while (text_iterator_byte_get(&it, &c) && (c == '\n' || c == '\r'))
471 text_iterator_byte_next(&it, NULL);
472 return text_line_empty_next(txt, it.pos);
475 size_t text_paragraph_prev(Text *txt, size_t pos) {
476 char c;
477 Iterator it = text_iterator_get(txt, pos);
479 /* c == \0 catches starting the search at EOF */
480 while (text_iterator_byte_get(&it, &c) && (c == '\n' || c == '\r' || c == '\0'))
481 text_iterator_byte_prev(&it, NULL);
482 return text_line_empty_prev(txt, it.pos);
485 size_t text_line_empty_next(Text *txt, size_t pos) {
486 char c;
487 Iterator it = text_iterator_get(txt, pos);
488 while (text_iterator_byte_get(&it, &c)) {
489 if (c == '\n' && text_iterator_byte_next(&it, &c)) {
490 size_t match = it.pos;
491 if (c == '\r')
492 text_iterator_byte_next(&it, &c);
493 if (c == '\n')
494 return match;
496 text_iterator_byte_next(&it, NULL);
498 return it.pos;
501 size_t text_line_empty_prev(Text *txt, size_t pos) {
502 char c;
503 Iterator it = text_iterator_get(txt, pos);
504 while (text_iterator_byte_prev(&it, &c)) {
505 if (c == '\n' && text_iterator_byte_prev(&it, &c)) {
506 if (c == '\r')
507 text_iterator_byte_prev(&it, &c);
508 if (c == '\n')
509 return it.pos + 1;
512 return it.pos;
515 size_t text_function_start_next(Text *txt, size_t pos) {
516 size_t a = text_function_end_next(txt, pos);
517 size_t b = a;
518 char c;
519 if (a != pos) {
520 Iterator it = text_iterator_get(txt, a);
521 while (text_iterator_byte_next(&it, &c) && (c == '\r' || c == '\n'));
522 a = it.pos;
524 if (b != pos) {
525 size_t match = text_bracket_match(txt, b);
526 b = match != b ? text_line_next(txt, text_line_empty_prev(txt, match)) : pos;
528 if (a <= pos && b <= pos)
529 return pos;
530 else if (a <= pos)
531 return b;
532 else if (b <= pos)
533 return a;
534 else
535 return MIN(a, b);
538 size_t text_function_start_prev(Text *txt, size_t pos) {
539 char c;
540 size_t apos = text_byte_get(txt, pos, &c) && c == '}' && pos > 0 ? pos - 1 : pos;
541 size_t a = text_function_end_next(txt, apos);
542 size_t b = text_function_end_prev(txt, pos);
543 if (a != apos) {
544 size_t match = text_bracket_match(txt, a);
545 a = match != a ? text_line_next(txt, text_line_empty_prev(txt, match)) : pos;
547 if (b != pos) {
548 size_t match = text_bracket_match(txt, b);
549 b = match != b ? text_line_next(txt, text_line_empty_prev(txt, match)) : pos;
551 if (a >= pos && b >= pos)
552 return pos;
553 else if (a >= pos)
554 return b;
555 else if (b >= pos)
556 return a;
557 else
558 return MAX(a, b);
561 static size_t text_function_end_direction(Text *txt, size_t pos, int direction) {
562 size_t start = pos, match;
563 if (direction < 0 && pos > 0)
564 pos--;
565 for (;;) {
566 char c[3];
567 if (direction > 0)
568 match = text_find_next(txt, pos, "\n}");
569 else
570 match = text_find_prev(txt, pos, "\n}");
571 if (text_bytes_get(txt, match, sizeof c, c) != 3 || c[0] != '\n' || c[1] != '}')
572 break;
573 if (c[2] == '\r' || c[2] == '\n')
574 return match+1;
575 if (match == pos)
576 match += direction;
577 pos = match;
579 return start;
582 size_t text_function_end_next(Text *txt, size_t pos) {
583 return text_function_end_direction(txt, pos, +1);
586 size_t text_function_end_prev(Text *txt, size_t pos) {
587 return text_function_end_direction(txt, pos, -1);
590 size_t text_block_start(Text *txt, size_t pos) {
591 Filerange r = text_object_curly_bracket(txt, pos-1);
592 return text_range_valid(&r) ? r.start-1 : pos;
595 size_t text_block_end(Text *txt, size_t pos) {
596 Filerange r = text_object_curly_bracket(txt, pos+1);
597 return text_range_valid(&r) ? r.end : pos;
600 size_t text_parenthese_start(Text *txt, size_t pos) {
601 Filerange r = text_object_paranthese(txt, pos-1);
602 return text_range_valid(&r) ? r.start-1 : pos;
605 size_t text_parenthese_end(Text *txt, size_t pos) {
606 Filerange r = text_object_paranthese(txt, pos+1);
607 return text_range_valid(&r) ? r.end : pos;
610 size_t text_bracket_match(Text *txt, size_t pos) {
611 return text_bracket_match_symbol(txt, pos, NULL);
614 size_t text_bracket_match_symbol(Text *txt, size_t pos, const char *symbols) {
615 int direction, count = 1;
616 char search, current, c;
617 bool instring = false;
618 Iterator it = text_iterator_get(txt, pos);
619 if (!text_iterator_byte_get(&it, &current))
620 return pos;
621 if (symbols && !memchr(symbols, current, strlen(symbols)))
622 return pos;
623 switch (current) {
624 case '(': search = ')'; direction = 1; break;
625 case ')': search = '('; direction = -1; break;
626 case '{': search = '}'; direction = 1; break;
627 case '}': search = '{'; direction = -1; break;
628 case '[': search = ']'; direction = 1; break;
629 case ']': search = '['; direction = -1; break;
630 case '<': search = '>'; direction = 1; break;
631 case '>': search = '<'; direction = -1; break;
632 case '"':
633 case '`':
634 case '\'': {
635 char special[] = " \n)}]>.,:;";
636 search = current;
637 direction = 1;
638 if (text_iterator_byte_next(&it, &c)) {
639 /* if a single or double quote is followed by
640 * a special character, search backwards */
641 if (memchr(special, c, sizeof(special)))
642 direction = -1;
643 text_iterator_byte_prev(&it, NULL);
645 break;
647 default: return pos;
650 if (direction >= 0) { /* forward search */
651 while (text_iterator_byte_next(&it, &c)) {
652 if (c != current && c == '"')
653 instring = !instring;
654 if (!instring) {
655 if (c == search && --count == 0)
656 return it.pos;
657 else if (c == current)
658 count++;
661 } else { /* backwards */
662 while (text_iterator_byte_prev(&it, &c)) {
663 if (c != current && c == '"')
664 instring = !instring;
665 if (!instring) {
666 if (c == search && --count == 0)
667 return it.pos;
668 else if (c == current)
669 count++;
674 return pos; /* no match found */
677 size_t text_search_forward(Text *txt, size_t pos, Regex *regex) {
678 size_t start = pos + 1;
679 size_t end = text_size(txt);
680 RegexMatch match[1];
681 bool found = start < end && !text_search_range_forward(txt, start, end - start, regex, 1, match, 0);
683 if (!found) {
684 start = 0;
685 end = pos;
686 found = !text_search_range_forward(txt, start, end, regex, 1, match, 0);
689 return found ? match[0].start : pos;
692 size_t text_search_backward(Text *txt, size_t pos, Regex *regex) {
693 size_t start = 0;
694 size_t end = pos;
695 RegexMatch match[1];
696 bool found = !text_search_range_backward(txt, start, end, regex, 1, match, 0);
698 if (!found) {
699 start = pos + 1;
700 end = text_size(txt);
701 found = start < end && !text_search_range_backward(txt, start, end - start, regex, 1, match, 0);
704 return found ? match[0].start : pos;