lexer: assume .h is ANSI C, not C++
[vis.git] / text-motions.c
blob1bf661117639da0c082e2dee528f08515c307900
1 #include <ctype.h>
2 #include <string.h>
3 #include <stdlib.h>
4 #include <wchar.h>
5 #include <errno.h>
6 #include "text-motions.h"
7 #include "text-util.h"
8 #include "util.h"
9 #include "text-objects.h"
11 #define space(c) (isspace((unsigned char)c))
12 #define boundary(c) (isboundary((unsigned char)c))
14 // TODO: specify this per file type?
15 int is_word_boundary(int c) {
16 return ISASCII(c) && !(('0' <= c && c <= '9') ||
17 ('a' <= c && c <= 'z') ||
18 ('A' <= c && c <= 'Z') || c == '_');
21 size_t text_begin(Text *txt, size_t pos) {
22 return 0;
25 size_t text_end(Text *txt, size_t pos) {
26 return text_size(txt);
29 size_t text_char_next(Text *txt, size_t pos) {
30 Iterator it = text_iterator_get(txt, pos);
31 text_iterator_char_next(&it, NULL);
32 return it.pos;
35 size_t text_char_prev(Text *txt, size_t pos) {
36 Iterator it = text_iterator_get(txt, pos);
37 text_iterator_char_prev(&it, NULL);
38 return it.pos;
41 static size_t find_next(Text *txt, size_t pos, const char *s, bool line) {
42 if (!s)
43 return pos;
44 size_t len = strlen(s), matched = 0;
45 Iterator it = text_iterator_get(txt, pos), sit;
46 for (char c; matched < len && text_iterator_byte_get(&it, &c); ) {
47 if (c == s[matched]) {
48 if (matched == 0)
49 sit = it;
50 matched++;
51 } else if (matched > 0) {
52 it = sit;
53 matched = 0;
55 text_iterator_byte_next(&it, NULL);
56 if (line && c == '\n')
57 break;
59 return matched == len ? it.pos - len : pos;
62 size_t text_find_next(Text *txt, size_t pos, const char *s) {
63 return find_next(txt, pos, s, false);
66 size_t text_line_find_next(Text *txt, size_t pos, const char *s) {
67 return find_next(txt, pos, s, true);
70 static size_t find_prev(Text *txt, size_t pos, const char *s, bool line) {
71 if (!s)
72 return pos;
73 size_t len = strlen(s), matched = len - 1;
74 Iterator it = text_iterator_get(txt, pos), sit;
75 if (len == 0)
76 return pos;
77 for (char c; text_iterator_byte_prev(&it, &c); ) {
78 if (c == s[matched]) {
79 if (matched == 0)
80 return it.pos;
81 if (matched == len - 1)
82 sit = it;
83 matched--;
84 } else if (matched < len - 1) {
85 it = sit;
86 matched = len - 1;
88 if (line && c == '\n')
89 break;
91 return pos;
94 size_t text_find_prev(Text *txt, size_t pos, const char *s) {
95 return find_prev(txt, pos, s, false);
98 size_t text_line_find_prev(Text *txt, size_t pos, const char *s) {
99 return find_prev(txt, pos, s, true);
102 size_t text_line_prev(Text *txt, size_t pos) {
103 char c;
104 Iterator it = text_iterator_get(txt, pos);
105 if (!text_iterator_byte_get(&it, &c))
106 return pos;
107 if (c == '\n')
108 text_iterator_byte_prev(&it, &c);
109 if (c == '\r')
110 text_iterator_byte_prev(&it, &c);
111 while (text_iterator_byte_get(&it, &c) && c != '\n')
112 text_iterator_byte_prev(&it, NULL);
113 if (text_iterator_byte_prev(&it, &c) && c != '\r')
114 text_iterator_byte_next(&it, &c);
115 return it.pos;
118 size_t text_line_begin(Text *txt, size_t pos) {
119 char c;
120 Iterator it = text_iterator_get(txt, pos);
121 if (!text_iterator_byte_get(&it, &c))
122 return pos;
123 if (c == '\n')
124 text_iterator_byte_prev(&it, &c);
125 if (c == '\r')
126 text_iterator_byte_prev(&it, &c);
127 while (text_iterator_byte_get(&it, &c)) {
128 if (c == '\n') {
129 it.pos++;
130 break;
132 text_iterator_byte_prev(&it, NULL);
134 return it.pos;
137 size_t text_line_start(Text *txt, size_t pos) {
138 char c;
139 Iterator it = text_iterator_get(txt, text_line_begin(txt, pos));
140 while (text_iterator_byte_get(&it, &c) && c != '\n' && space(c))
141 text_iterator_byte_next(&it, NULL);
142 return it.pos;
145 size_t text_line_finish(Text *txt, size_t pos) {
146 char c;
147 Iterator it = text_iterator_get(txt, text_line_end(txt, pos));
148 do text_iterator_char_prev(&it, NULL);
149 while (text_iterator_byte_get(&it, &c) && c != '\n' && space(c));
150 return it.pos;
153 size_t text_line_lastchar(Text *txt, size_t pos) {
154 char c;
155 Iterator it = text_iterator_get(txt, text_line_end(txt, pos));
156 if (text_iterator_char_prev(&it, &c) && c == '\n')
157 text_iterator_byte_next(&it, NULL);
158 return it.pos;
161 size_t text_line_end(Text *txt, size_t pos) {
162 char c;
163 Iterator it = text_iterator_get(txt, pos);
164 while (text_iterator_byte_get(&it, &c) && c != '\r' && c != '\n')
165 text_iterator_byte_next(&it, NULL);
166 return it.pos;
169 size_t text_line_next(Text *txt, size_t pos) {
170 char c;
171 Iterator it = text_iterator_get(txt, pos);
172 while (text_iterator_byte_get(&it, &c) && c != '\n')
173 text_iterator_byte_next(&it, NULL);
174 text_iterator_byte_next(&it, NULL);
175 return it.pos;
178 size_t text_line_offset(Text *txt, size_t pos, size_t off) {
179 char c;
180 size_t bol = text_line_begin(txt, pos);
181 Iterator it = text_iterator_get(txt, bol);
182 while (off-- > 0 && text_iterator_byte_get(&it, &c) && c != '\r' && c != '\n')
183 text_iterator_byte_next(&it, NULL);
184 return it.pos;
187 size_t text_line_char_set(Text *txt, size_t pos, int count) {
188 char c;
189 size_t bol = text_line_begin(txt, pos);
190 Iterator it = text_iterator_get(txt, bol);
191 while (count-- > 0 && text_iterator_byte_get(&it, &c) && c != '\r' && c != '\n')
192 text_iterator_char_next(&it, NULL);
193 return it.pos;
196 int text_line_char_get(Text *txt, size_t pos) {
197 char c;
198 int count = 0;
199 size_t bol = text_line_begin(txt, pos);
200 Iterator it = text_iterator_get(txt, bol);
201 while (text_iterator_byte_get(&it, &c) && it.pos < pos && c != '\r' && c != '\n') {
202 text_iterator_char_next(&it, NULL);
203 count++;
205 return count;
208 int text_line_width_get(Text *txt, size_t pos) {
209 int width = 0;
210 mbstate_t ps = { 0 };
211 size_t bol = text_line_begin(txt, pos);
212 Iterator it = text_iterator_get(txt, bol);
214 while (it.pos < pos) {
215 char buf[MB_CUR_MAX];
216 size_t len = text_bytes_get(txt, it.pos, sizeof buf, buf);
217 if (len == 0 || buf[0] == '\r' || buf[0] == '\n')
218 break;
219 wchar_t wc;
220 size_t wclen = mbrtowc(&wc, buf, len, &ps);
221 if (wclen == (size_t)-1 && errno == EILSEQ) {
222 /* assume a replacement symbol will be displayed */
223 width++;
224 } else if (wclen == (size_t)-2) {
225 /* do nothing, advance to next character */
226 } else if (wclen == 0) {
227 /* assume NUL byte will be displayed as ^@ */
228 width += 2;
229 } else if (buf[0] == '\t') {
230 width++;
231 } else {
232 int w = wcwidth(wc);
233 if (w == -1)
234 w = 2; /* assume non-printable will be displayed as ^{char} */
235 width += w;
238 if (!text_iterator_codepoint_next(&it, NULL))
239 break;
242 return width;
245 size_t text_line_width_set(Text *txt, size_t pos, int width) {
246 int cur_width = 0;
247 mbstate_t ps = { 0 };
248 size_t bol = text_line_begin(txt, pos);
249 Iterator it = text_iterator_get(txt, bol);
251 for (;;) {
252 char buf[MB_CUR_MAX];
253 size_t len = text_bytes_get(txt, it.pos, sizeof buf, buf);
254 if (len == 0 || buf[0] == '\r' || buf[0] == '\n')
255 break;
256 wchar_t wc;
257 size_t wclen = mbrtowc(&wc, buf, len, &ps);
258 if (wclen == (size_t)-1 && errno == EILSEQ) {
259 /* assume a replacement symbol will be displayed */
260 cur_width++;
261 } else if (wclen == (size_t)-2) {
262 /* do nothing, advance to next character */
263 } else if (wclen == 0) {
264 /* assume NUL byte will be displayed as ^@ */
265 cur_width += 2;
266 } else if (buf[0] == '\t') {
267 cur_width++;
268 } else {
269 int w = wcwidth(wc);
270 if (w == -1)
271 w = 2; /* assume non-printable will be displayed as ^{char} */
272 cur_width += w;
275 if (cur_width >= width || !text_iterator_codepoint_next(&it, NULL))
276 break;
279 return it.pos;
282 size_t text_line_char_next(Text *txt, size_t pos) {
283 char c;
284 Iterator it = text_iterator_get(txt, pos);
285 if (!text_iterator_byte_get(&it, &c) || c == '\r' || c == '\n')
286 return pos;
287 text_iterator_char_next(&it, NULL);
288 return it.pos;
291 size_t text_line_char_prev(Text *txt, size_t pos) {
292 char c;
293 Iterator it = text_iterator_get(txt, pos);
294 if (!text_iterator_char_prev(&it, &c) || c == '\n')
295 return pos;
296 return it.pos;
299 size_t text_line_up(Text *txt, size_t pos) {
300 int width = text_line_width_get(txt, pos);
301 size_t prev = text_line_prev(txt, pos);
302 return text_line_width_set(txt, prev, width);
305 size_t text_line_down(Text *txt, size_t pos) {
306 int width = text_line_width_get(txt, pos);
307 size_t next = text_line_next(txt, pos);
308 return text_line_width_set(txt, next, width);
311 size_t text_range_line_first(Text *txt, Filerange *r) {
312 if (!text_range_valid(r))
313 return EPOS;
314 return r->start;
317 size_t text_range_line_last(Text *txt, Filerange *r) {
318 if (!text_range_valid(r))
319 return EPOS;
320 size_t pos = text_line_begin(txt, r->end);
321 if (pos == r->end) {
322 /* range ends at a begin of a line, skip last line ending */
323 pos = text_line_prev(txt, pos);
324 pos = text_line_begin(txt, pos);
326 return r->start <= pos ? pos : r->start;
329 size_t text_range_line_next(Text *txt, Filerange *r, size_t pos) {
330 if (!text_range_contains(r, pos))
331 return EPOS;
332 size_t newpos = text_line_next(txt, pos);
333 return newpos != pos && newpos < r->end ? newpos : EPOS;
336 size_t text_range_line_prev(Text *txt, Filerange *r, size_t pos) {
337 if (!text_range_contains(r, pos))
338 return EPOS;
339 size_t newpos = text_line_begin(txt, text_line_prev(txt, pos));
340 return newpos != pos && r->start <= newpos ? newpos : EPOS;
343 size_t text_customword_start_next(Text *txt, size_t pos, int (*isboundary)(int)) {
344 char c;
345 Iterator it = text_iterator_get(txt, pos);
346 if (!text_iterator_byte_get(&it, &c))
347 return pos;
348 if (boundary(c))
349 while (boundary(c) && !space(c) && text_iterator_char_next(&it, &c));
350 else
351 while (!boundary(c) && text_iterator_char_next(&it, &c));
352 while (space(c) && text_iterator_char_next(&it, &c));
353 return it.pos;
356 size_t text_customword_start_prev(Text *txt, size_t pos, int (*isboundary)(int)) {
357 char c;
358 Iterator it = text_iterator_get(txt, pos);
359 while (text_iterator_char_prev(&it, &c) && space(c));
360 if (boundary(c))
361 do pos = it.pos; while (text_iterator_char_prev(&it, &c) && boundary(c) && !space(c));
362 else
363 do pos = it.pos; while (text_iterator_char_prev(&it, &c) && !boundary(c));
364 return pos;
367 size_t text_customword_end_next(Text *txt, size_t pos, int (*isboundary)(int)) {
368 char c;
369 Iterator it = text_iterator_get(txt, pos);
370 while (text_iterator_char_next(&it, &c) && space(c));
371 if (boundary(c))
372 do pos = it.pos; while (text_iterator_char_next(&it, &c) && boundary(c) && !space(c));
373 else
374 do pos = it.pos; while (text_iterator_char_next(&it, &c) && !isboundary(c));
375 return pos;
378 size_t text_customword_end_prev(Text *txt, size_t pos, int (*isboundary)(int)) {
379 char c;
380 Iterator it = text_iterator_get(txt, pos);
381 if (!text_iterator_byte_get(&it, &c))
382 return pos;
383 if (boundary(c))
384 while (boundary(c) && !space(c) && text_iterator_char_prev(&it, &c));
385 else
386 while (!boundary(c) && text_iterator_char_prev(&it, &c));
387 while (space(c) && text_iterator_char_prev(&it, &c));
388 return it.pos;
391 size_t text_longword_end_next(Text *txt, size_t pos) {
392 return text_customword_end_next(txt, pos, isspace);
395 size_t text_longword_end_prev(Text *txt, size_t pos) {
396 return text_customword_end_prev(txt, pos, isspace);
399 size_t text_longword_start_next(Text *txt, size_t pos) {
400 return text_customword_start_next(txt, pos, isspace);
403 size_t text_longword_start_prev(Text *txt, size_t pos) {
404 return text_customword_start_prev(txt, pos, isspace);
407 size_t text_word_end_next(Text *txt, size_t pos) {
408 return text_customword_end_next(txt, pos, is_word_boundary);
411 size_t text_word_end_prev(Text *txt, size_t pos) {
412 return text_customword_end_prev(txt, pos, is_word_boundary);
415 size_t text_word_start_next(Text *txt, size_t pos) {
416 return text_customword_start_next(txt, pos, is_word_boundary);
419 size_t text_word_start_prev(Text *txt, size_t pos) {
420 return text_customword_start_prev(txt, pos, is_word_boundary);
423 size_t text_sentence_next(Text *txt, size_t pos) {
424 char c, prev = 'X';
425 Iterator it = text_iterator_get(txt, pos), rev = text_iterator_get(txt, pos);
427 if (!text_iterator_byte_get(&it, &c))
428 return pos;
430 while (text_iterator_byte_get(&rev, &prev) && space(prev))
431 text_iterator_byte_prev(&rev, NULL);
432 prev = rev.pos == 0 ? '.' : prev; /* simulate punctuation at BOF */
434 do {
435 if ((prev == '.' || prev == '?' || prev == '!') && space(c)) {
436 do text_iterator_byte_next(&it, NULL);
437 while (text_iterator_byte_get(&it, &c) && space(c));
438 return it.pos;
440 prev = c;
441 } while (text_iterator_byte_next(&it, &c));
442 return it.pos;
445 size_t text_sentence_prev(Text *txt, size_t pos) {
446 char c, prev = 'X';
447 bool content = false;
448 Iterator it = text_iterator_get(txt, pos);
450 while (it.pos != 0 && text_iterator_byte_prev(&it, &c)) {
451 if (content && space(prev) && (c == '.' || c == '?' || c == '!')) {
452 do text_iterator_byte_next(&it, NULL);
453 while (text_iterator_byte_get(&it, &c) && space(c));
454 return it.pos;
456 content |= !space(c);
457 prev = c;
458 } /* The loop only ends on hitting BOF or error */
459 if (content) /* starting pos was after first sentence in file => find that sentences start */
460 while (text_iterator_byte_get(&it, &c) && space(c))
461 text_iterator_byte_next(&it, NULL);
462 return it.pos;
465 size_t text_paragraph_next(Text *txt, size_t pos) {
466 char c;
467 Iterator it = text_iterator_get(txt, pos);
469 while (text_iterator_byte_get(&it, &c) && (c == '\n' || c == '\r'))
470 text_iterator_byte_next(&it, NULL);
471 return text_line_empty_next(txt, it.pos);
474 size_t text_paragraph_prev(Text *txt, size_t pos) {
475 char c;
476 Iterator it = text_iterator_get(txt, pos);
478 /* c == \0 catches starting the search at EOF */
479 while (text_iterator_byte_get(&it, &c) && (c == '\n' || c == '\r' || c == '\0'))
480 text_iterator_byte_prev(&it, NULL);
481 return text_line_empty_prev(txt, it.pos);
484 size_t text_line_empty_next(Text *txt, size_t pos) {
485 char c;
486 Iterator it = text_iterator_get(txt, pos);
487 while (text_iterator_byte_get(&it, &c)) {
488 if (c == '\n' && text_iterator_byte_next(&it, &c)) {
489 size_t match = it.pos;
490 if (c == '\r')
491 text_iterator_byte_next(&it, &c);
492 if (c == '\n')
493 return match;
495 text_iterator_byte_next(&it, NULL);
497 return it.pos;
500 size_t text_line_empty_prev(Text *txt, size_t pos) {
501 char c;
502 Iterator it = text_iterator_get(txt, pos);
503 while (text_iterator_byte_prev(&it, &c)) {
504 if (c == '\n' && text_iterator_byte_prev(&it, &c)) {
505 if (c == '\r')
506 text_iterator_byte_prev(&it, &c);
507 if (c == '\n')
508 return it.pos + 1;
511 return it.pos;
514 size_t text_function_start_next(Text *txt, size_t pos) {
515 size_t a = text_function_end_next(txt, pos);
516 size_t b = a;
517 char c;
518 if (a != pos) {
519 Iterator it = text_iterator_get(txt, a);
520 while (text_iterator_byte_next(&it, &c) && (c == '\r' || c == '\n'));
521 a = it.pos;
523 if (b != pos) {
524 size_t match = text_bracket_match(txt, b);
525 b = match != b ? text_line_next(txt, text_line_empty_prev(txt, match)) : pos;
527 if (a <= pos && b <= pos)
528 return pos;
529 else if (a <= pos)
530 return b;
531 else if (b <= pos)
532 return a;
533 else
534 return MIN(a, b);
537 size_t text_function_start_prev(Text *txt, size_t pos) {
538 char c;
539 size_t apos = text_byte_get(txt, pos, &c) && c == '}' && pos > 0 ? pos - 1 : pos;
540 size_t a = text_function_end_next(txt, apos);
541 size_t b = text_function_end_prev(txt, pos);
542 if (a != apos) {
543 size_t match = text_bracket_match(txt, a);
544 a = match != a ? text_line_next(txt, text_line_empty_prev(txt, match)) : pos;
546 if (b != pos) {
547 size_t match = text_bracket_match(txt, b);
548 b = match != b ? text_line_next(txt, text_line_empty_prev(txt, match)) : pos;
550 if (a >= pos && b >= pos)
551 return pos;
552 else if (a >= pos)
553 return b;
554 else if (b >= pos)
555 return a;
556 else
557 return MAX(a, b);
560 static size_t text_function_end_direction(Text *txt, size_t pos, int direction) {
561 size_t start = pos, match;
562 if (direction < 0 && pos > 0)
563 pos--;
564 for (;;) {
565 char c[3];
566 if (direction > 0)
567 match = text_find_next(txt, pos, "\n}");
568 else
569 match = text_find_prev(txt, pos, "\n}");
570 if (text_bytes_get(txt, match, sizeof c, c) != 3 || c[0] != '\n' || c[1] != '}')
571 break;
572 if (c[2] == '\r' || c[2] == '\n')
573 return match+1;
574 if (match == pos)
575 match += direction;
576 pos = match;
578 return start;
581 size_t text_function_end_next(Text *txt, size_t pos) {
582 return text_function_end_direction(txt, pos, +1);
585 size_t text_function_end_prev(Text *txt, size_t pos) {
586 return text_function_end_direction(txt, pos, -1);
589 size_t text_block_start(Text *txt, size_t pos) {
590 Filerange r = text_object_curly_bracket(txt, pos-1);
591 return text_range_valid(&r) ? r.start-1 : pos;
594 size_t text_block_end(Text *txt, size_t pos) {
595 Filerange r = text_object_curly_bracket(txt, pos+1);
596 return text_range_valid(&r) ? r.end : pos;
599 size_t text_parenthese_start(Text *txt, size_t pos) {
600 Filerange r = text_object_paranthese(txt, pos-1);
601 return text_range_valid(&r) ? r.start-1 : pos;
604 size_t text_parenthese_end(Text *txt, size_t pos) {
605 Filerange r = text_object_paranthese(txt, pos+1);
606 return text_range_valid(&r) ? r.end : pos;
609 size_t text_bracket_match(Text *txt, size_t pos) {
610 return text_bracket_match_symbol(txt, pos, NULL);
613 size_t text_bracket_match_symbol(Text *txt, size_t pos, const char *symbols) {
614 int direction, count = 1;
615 char search, current, c;
616 bool instring = false;
617 Iterator it = text_iterator_get(txt, pos);
618 if (!text_iterator_byte_get(&it, &current))
619 return pos;
620 if (symbols && !memchr(symbols, current, strlen(symbols)))
621 return pos;
622 switch (current) {
623 case '(': search = ')'; direction = 1; break;
624 case ')': search = '('; direction = -1; break;
625 case '{': search = '}'; direction = 1; break;
626 case '}': search = '{'; direction = -1; break;
627 case '[': search = ']'; direction = 1; break;
628 case ']': search = '['; direction = -1; break;
629 case '<': search = '>'; direction = 1; break;
630 case '>': search = '<'; direction = -1; break;
631 case '"':
632 case '`':
633 case '\'': {
634 char special[] = " \n)}]>.,:;";
635 search = current;
636 direction = 1;
637 if (text_iterator_byte_next(&it, &c)) {
638 /* if a single or double quote is followed by
639 * a special character, search backwards */
640 if (memchr(special, c, sizeof(special)))
641 direction = -1;
642 text_iterator_byte_prev(&it, NULL);
644 break;
646 default: return pos;
649 if (direction >= 0) { /* forward search */
650 while (text_iterator_byte_next(&it, &c)) {
651 if (c != current && c == '"')
652 instring = !instring;
653 if (!instring) {
654 if (c == search && --count == 0)
655 return it.pos;
656 else if (c == current)
657 count++;
660 } else { /* backwards */
661 while (text_iterator_byte_prev(&it, &c)) {
662 if (c != current && c == '"')
663 instring = !instring;
664 if (!instring) {
665 if (c == search && --count == 0)
666 return it.pos;
667 else if (c == current)
668 count++;
673 return pos; /* no match found */
676 size_t text_search_forward(Text *txt, size_t pos, Regex *regex) {
677 size_t start = pos + 1;
678 size_t end = text_size(txt);
679 RegexMatch match[1];
680 bool found = start < end && !text_search_range_forward(txt, start, end - start, regex, 1, match, 0);
682 if (!found) {
683 start = 0;
684 end = pos;
685 found = !text_search_range_forward(txt, start, end, regex, 1, match, 0);
688 return found ? match[0].start : pos;
691 size_t text_search_backward(Text *txt, size_t pos, Regex *regex) {
692 size_t start = 0;
693 size_t end = pos;
694 RegexMatch match[1];
695 bool found = !text_search_range_backward(txt, start, end, regex, 1, match, 0);
697 if (!found) {
698 start = pos + 1;
699 end = text_size(txt);
700 found = start < end && !text_search_range_backward(txt, start, end - start, regex, 1, match, 0);
703 return found ? match[0].start : pos;