Make f, F, t, T motion work when replaying a macro
[vis.git] / text-motions.c
blob298d4acc5c85f29eaa98f8bfb7dabf6cfa2e96c5
1 /*
2 * Copyright (c) 2014-2015 Marc André Tanner <mat at brain-dump.org>
4 * Permission to use, copy, modify, and/or distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 #include <ctype.h>
17 #include <string.h>
18 #include <stdlib.h>
19 #include <wchar.h>
20 #include <errno.h>
21 #include "text-motions.h"
22 #include "text-util.h"
23 #include "util.h"
25 #define space(c) (isspace((unsigned char)c))
26 #define boundary(c) (isboundary((unsigned char)c))
28 // TODO: specify this per file type?
29 int is_word_boundry(int c) {
30 return ISASCII(c) && !(('0' <= c && c <= '9') ||
31 ('a' <= c && c <= 'z') ||
32 ('A' <= c && c <= 'Z') || c == '_');
35 size_t text_begin(Text *txt, size_t pos) {
36 return 0;
39 size_t text_end(Text *txt, size_t pos) {
40 return text_size(txt);
43 size_t text_char_next(Text *txt, size_t pos) {
44 Iterator it = text_iterator_get(txt, pos);
45 text_iterator_char_next(&it, NULL);
46 return it.pos;
49 size_t text_char_prev(Text *txt, size_t pos) {
50 Iterator it = text_iterator_get(txt, pos);
51 text_iterator_char_prev(&it, NULL);
52 return it.pos;
55 static size_t find_next(Text *txt, size_t pos, const char *s, bool line) {
56 if (!s)
57 return pos;
58 size_t len = strlen(s), matched = 0;
59 Iterator it = text_iterator_get(txt, pos), sit;
60 for (char c; matched < len && text_iterator_byte_get(&it, &c); ) {
61 if (c == s[matched]) {
62 if (matched == 0)
63 sit = it;
64 matched++;
65 } else if (matched > 0) {
66 it = sit;
67 matched = 0;
69 text_iterator_byte_next(&it, NULL);
70 if (line && c == '\n')
71 break;
73 return matched == len ? it.pos - len : pos;
76 size_t text_find_next(Text *txt, size_t pos, const char *s) {
77 return find_next(txt, pos, s, false);
80 size_t text_line_find_next(Text *txt, size_t pos, const char *s) {
81 return find_next(txt, pos, s, true);
84 static size_t find_prev(Text *txt, size_t pos, const char *s, bool line) {
85 if (!s)
86 return pos;
87 size_t len = strlen(s), matched = len - 1;
88 Iterator it, sit;
89 if (len == 0)
90 return pos;
91 pos += len;
92 it = text_iterator_get(txt, pos);
93 for (char c; text_iterator_byte_prev(&it, &c); ) {
94 if (c == s[matched]) {
95 if (matched == 0)
96 return it.pos;
97 if (matched == len - 1)
98 sit = it;
99 matched--;
100 } else if (matched < len - 1) {
101 it = sit;
102 matched = len - 1;
104 if (line && c == '\n')
105 break;
107 return pos;
110 size_t text_find_prev(Text *txt, size_t pos, const char *s) {
111 return find_prev(txt, pos, s, false);
114 size_t text_line_find_prev(Text *txt, size_t pos, const char *s) {
115 return find_prev(txt, pos, s, true);
118 size_t text_line_prev(Text *txt, size_t pos) {
119 char c;
120 Iterator it = text_iterator_get(txt, pos);
121 if (!text_iterator_byte_get(&it, &c))
122 return pos;
123 if (c == '\n')
124 text_iterator_byte_prev(&it, &c);
125 if (c == '\r')
126 text_iterator_byte_prev(&it, &c);
127 while (text_iterator_byte_get(&it, &c) && c != '\n')
128 text_iterator_byte_prev(&it, NULL);
129 if (text_iterator_byte_prev(&it, &c) && c != '\r')
130 text_iterator_byte_next(&it, &c);
131 return it.pos;
134 size_t text_line_begin(Text *txt, size_t pos) {
135 char c;
136 Iterator it = text_iterator_get(txt, pos);
137 if (!text_iterator_byte_get(&it, &c))
138 return pos;
139 if (c == '\n')
140 text_iterator_byte_prev(&it, &c);
141 if (c == '\r')
142 text_iterator_byte_prev(&it, &c);
143 while (text_iterator_byte_get(&it, &c)) {
144 if (c == '\n') {
145 it.pos++;
146 break;
148 text_iterator_byte_prev(&it, NULL);
150 return it.pos;
153 size_t text_line_start(Text *txt, size_t pos) {
154 char c;
155 Iterator it = text_iterator_get(txt, text_line_begin(txt, pos));
156 while (text_iterator_byte_get(&it, &c) && c != '\n' && space(c))
157 text_iterator_byte_next(&it, NULL);
158 return it.pos;
161 size_t text_line_finish(Text *txt, size_t pos) {
162 char c;
163 Iterator it = text_iterator_get(txt, text_line_end(txt, pos));
164 do text_iterator_char_prev(&it, NULL);
165 while (text_iterator_byte_get(&it, &c) && c != '\n' && space(c));
166 return it.pos;
169 size_t text_line_lastchar(Text *txt, size_t pos) {
170 char c;
171 Iterator it = text_iterator_get(txt, text_line_end(txt, pos));
172 if (text_iterator_char_prev(&it, &c) && c == '\n')
173 text_iterator_byte_next(&it, NULL);
174 return it.pos;
177 size_t text_line_end(Text *txt, size_t pos) {
178 char c;
179 Iterator it = text_iterator_get(txt, pos);
180 while (text_iterator_byte_get(&it, &c) && c != '\r' && c != '\n')
181 text_iterator_byte_next(&it, NULL);
182 return it.pos;
185 size_t text_line_next(Text *txt, size_t pos) {
186 char c;
187 Iterator it = text_iterator_get(txt, pos);
188 while (text_iterator_byte_get(&it, &c) && c != '\n')
189 text_iterator_byte_next(&it, NULL);
190 text_iterator_byte_next(&it, NULL);
191 return it.pos;
194 size_t text_line_offset(Text *txt, size_t pos, size_t off) {
195 char c;
196 size_t bol = text_line_begin(txt, pos);
197 Iterator it = text_iterator_get(txt, bol);
198 while (off-- > 0 && text_iterator_byte_get(&it, &c) && c != '\r' && c != '\n')
199 text_iterator_byte_next(&it, NULL);
200 return it.pos;
203 size_t text_line_char_set(Text *txt, size_t pos, int count) {
204 char c;
205 size_t bol = text_line_begin(txt, pos);
206 Iterator it = text_iterator_get(txt, bol);
207 while (count-- > 0 && text_iterator_byte_get(&it, &c) && c != '\r' && c != '\n')
208 text_iterator_char_next(&it, NULL);
209 return it.pos;
212 int text_line_char_get(Text *txt, size_t pos) {
213 char c;
214 int count = 0;
215 size_t bol = text_line_begin(txt, pos);
216 Iterator it = text_iterator_get(txt, bol);
217 while (text_iterator_byte_get(&it, &c) && it.pos < pos && c != '\r' && c != '\n') {
218 text_iterator_char_next(&it, NULL);
219 count++;
221 return count;
224 int text_line_width_get(Text *txt, size_t pos) {
225 int width = 0;
226 mbstate_t ps = { 0 };
227 size_t bol = text_line_begin(txt, pos);
228 Iterator it = text_iterator_get(txt, bol);
230 while (it.pos < pos) {
231 char buf[MB_CUR_MAX];
232 size_t len = text_bytes_get(txt, it.pos, sizeof buf, buf);
233 if (len == 0 || buf[0] == '\r' || buf[0] == '\n')
234 break;
235 wchar_t wc;
236 size_t wclen = mbrtowc(&wc, buf, len, &ps);
237 if (wclen == (size_t)-1 && errno == EILSEQ) {
238 /* assume a replacement symbol will be displayed */
239 width++;
240 } else if (wclen == (size_t)-2) {
241 /* do nothing, advance to next character */
242 } else if (wclen == 0) {
243 /* assume NUL byte will be displayed as ^@ */
244 width += 2;
245 } else if (buf[0] == '\t') {
246 width++;
247 } else {
248 int w = wcwidth(wc);
249 if (w == -1)
250 w = 2; /* assume non-printable will be displayed as ^{char} */
251 width += w;
254 if (!text_iterator_codepoint_next(&it, NULL))
255 break;
258 return width;
261 size_t text_line_width_set(Text *txt, size_t pos, int width) {
262 int cur_width = 0;
263 mbstate_t ps = { 0 };
264 size_t bol = text_line_begin(txt, pos);
265 Iterator it = text_iterator_get(txt, bol);
267 for (;;) {
268 char buf[MB_CUR_MAX];
269 size_t len = text_bytes_get(txt, it.pos, sizeof buf, buf);
270 if (len == 0 || buf[0] == '\r' || buf[0] == '\n')
271 break;
272 wchar_t wc;
273 size_t wclen = mbrtowc(&wc, buf, len, &ps);
274 if (wclen == (size_t)-1 && errno == EILSEQ) {
275 /* assume a replacement symbol will be displayed */
276 cur_width++;
277 } else if (wclen == (size_t)-2) {
278 /* do nothing, advance to next character */
279 } else if (wclen == 0) {
280 /* assume NUL byte will be displayed as ^@ */
281 cur_width += 2;
282 } else if (buf[0] == '\t') {
283 cur_width++;
284 } else {
285 int w = wcwidth(wc);
286 if (w == -1)
287 w = 2; /* assume non-printable will be displayed as ^{char} */
288 cur_width += w;
291 if (cur_width >= width || !text_iterator_codepoint_next(&it, NULL))
292 break;
295 return it.pos;
298 size_t text_line_char_next(Text *txt, size_t pos) {
299 char c;
300 Iterator it = text_iterator_get(txt, pos);
301 if (!text_iterator_byte_get(&it, &c) || c == '\r' || c == '\n')
302 return pos;
303 if (!text_iterator_char_next(&it, &c) || c == '\r' || c == '\n')
304 return pos;
305 return it.pos;
308 size_t text_line_char_prev(Text *txt, size_t pos) {
309 char c;
310 Iterator it = text_iterator_get(txt, pos);
311 if (!text_iterator_char_prev(&it, &c) || c == '\n')
312 return pos;
313 return it.pos;
316 size_t text_line_up(Text *txt, size_t pos) {
317 int width = text_line_width_get(txt, pos);
318 size_t prev = text_line_prev(txt, pos);
319 return text_line_width_set(txt, prev, width);
322 size_t text_line_down(Text *txt, size_t pos) {
323 int width = text_line_width_get(txt, pos);
324 size_t next = text_line_next(txt, pos);
325 return text_line_width_set(txt, next, width);
328 size_t text_range_line_first(Text *txt, Filerange *r) {
329 if (!text_range_valid(r))
330 return EPOS;
331 return r->start;
334 size_t text_range_line_last(Text *txt, Filerange *r) {
335 if (!text_range_valid(r))
336 return EPOS;
337 size_t pos = text_line_begin(txt, r->end);
338 if (pos == r->end) {
339 /* range ends at a begin of a line, skip last line ending */
340 pos = text_line_prev(txt, pos);
341 pos = text_line_begin(txt, pos);
343 return r->start <= pos ? pos : r->start;
346 size_t text_range_line_next(Text *txt, Filerange *r, size_t pos) {
347 if (!text_range_contains(r, pos))
348 return EPOS;
349 size_t newpos = text_line_next(txt, pos);
350 return newpos != pos && newpos < r->end ? newpos : EPOS;
353 size_t text_range_line_prev(Text *txt, Filerange *r, size_t pos) {
354 if (!text_range_contains(r, pos))
355 return EPOS;
356 size_t newpos = text_line_begin(txt, text_line_prev(txt, pos));
357 return newpos != pos && r->start <= newpos ? newpos : EPOS;
360 size_t text_customword_start_next(Text *txt, size_t pos, int (*isboundary)(int)) {
361 char c;
362 Iterator it = text_iterator_get(txt, pos);
363 if (!text_iterator_byte_get(&it, &c))
364 return pos;
365 if (boundary(c))
366 while (boundary(c) && !space(c) && text_iterator_char_next(&it, &c));
367 else
368 while (!boundary(c) && text_iterator_char_next(&it, &c));
369 while (space(c) && text_iterator_char_next(&it, &c));
370 return it.pos;
373 size_t text_customword_start_prev(Text *txt, size_t pos, int (*isboundary)(int)) {
374 char c;
375 Iterator it = text_iterator_get(txt, pos);
376 while (text_iterator_char_prev(&it, &c) && space(c));
377 if (boundary(c))
378 do pos = it.pos; while (text_iterator_char_prev(&it, &c) && boundary(c) && !space(c));
379 else
380 do pos = it.pos; while (text_iterator_char_prev(&it, &c) && !boundary(c));
381 return pos;
384 size_t text_customword_end_next(Text *txt, size_t pos, int (*isboundary)(int)) {
385 char c;
386 Iterator it = text_iterator_get(txt, pos);
387 while (text_iterator_char_next(&it, &c) && space(c));
388 if (boundary(c))
389 do pos = it.pos; while (text_iterator_char_next(&it, &c) && boundary(c) && !space(c));
390 else
391 do pos = it.pos; while (text_iterator_char_next(&it, &c) && !isboundary(c));
392 return pos;
395 size_t text_customword_end_prev(Text *txt, size_t pos, int (*isboundary)(int)) {
396 char c;
397 Iterator it = text_iterator_get(txt, pos);
398 if (!text_iterator_byte_get(&it, &c))
399 return pos;
400 if (boundary(c))
401 while (boundary(c) && !space(c) && text_iterator_char_prev(&it, &c));
402 else
403 while (!boundary(c) && text_iterator_char_prev(&it, &c));
404 while (space(c) && text_iterator_char_prev(&it, &c));
405 return it.pos;
408 size_t text_longword_end_next(Text *txt, size_t pos) {
409 return text_customword_end_next(txt, pos, isspace);
412 size_t text_longword_end_prev(Text *txt, size_t pos) {
413 return text_customword_end_prev(txt, pos, isspace);
416 size_t text_longword_start_next(Text *txt, size_t pos) {
417 return text_customword_start_next(txt, pos, isspace);
420 size_t text_longword_start_prev(Text *txt, size_t pos) {
421 return text_customword_start_prev(txt, pos, isspace);
424 size_t text_word_end_next(Text *txt, size_t pos) {
425 return text_customword_end_next(txt, pos, is_word_boundry);
428 size_t text_word_end_prev(Text *txt, size_t pos) {
429 return text_customword_end_prev(txt, pos, is_word_boundry);
432 size_t text_word_start_next(Text *txt, size_t pos) {
433 return text_customword_start_next(txt, pos, is_word_boundry);
436 size_t text_word_start_prev(Text *txt, size_t pos) {
437 return text_customword_start_prev(txt, pos, is_word_boundry);
440 size_t text_sentence_next(Text *txt, size_t pos) {
441 char c, prev = 'X';
442 Iterator it = text_iterator_get(txt, pos), rev = text_iterator_get(txt, pos);
444 if (!text_iterator_byte_get(&it, &c))
445 return pos;
447 while (text_iterator_byte_get(&rev, &prev) && space(prev))
448 text_iterator_byte_prev(&rev, NULL);
449 prev = rev.pos == 0 ? '.' : prev; /* simulate punctuation at BOF */
451 do {
452 if ((prev == '.' || prev == '?' || prev == '!') && space(c)) {
453 do text_iterator_byte_next(&it, NULL);
454 while (text_iterator_byte_get(&it, &c) && space(c));
455 return it.pos;
457 prev = c;
458 } while (text_iterator_byte_next(&it, &c));
459 return it.pos;
462 size_t text_sentence_prev(Text *txt, size_t pos) {
463 char c, prev = 'X';
464 bool content = false;
465 Iterator it = text_iterator_get(txt, pos);
467 while (it.pos != 0 && text_iterator_byte_prev(&it, &c)) {
468 if (content && space(prev) && (c == '.' || c == '?' || c == '!')) {
469 do text_iterator_byte_next(&it, NULL);
470 while (text_iterator_byte_get(&it, &c) && space(c));
471 return it.pos;
473 content |= !space(c);
474 prev = c;
475 } /* The loop only ends on hitting BOF or error */
476 if (content) /* starting pos was after first sentence in file => find that sentences start */
477 while (text_iterator_byte_get(&it, &c) && space(c))
478 text_iterator_byte_next(&it, NULL);
479 return it.pos;
482 size_t text_paragraph_next(Text *txt, size_t pos) {
483 char c;
484 Iterator it = text_iterator_get(txt, pos);
486 while (text_iterator_byte_get(&it, &c) && (c == '\n' || c == '\r'))
487 text_iterator_byte_next(&it, NULL);
488 return text_line_empty_next(txt, it.pos);
491 size_t text_paragraph_prev(Text *txt, size_t pos) {
492 char c;
493 Iterator it = text_iterator_get(txt, pos);
495 /* c == \0 catches starting the search at EOF */
496 while (text_iterator_byte_get(&it, &c) && (c == '\n' || c == '\r' || c == '\0'))
497 text_iterator_byte_prev(&it, NULL);
498 return text_line_empty_prev(txt, it.pos);
501 size_t text_line_empty_next(Text *txt, size_t pos) {
502 char c;
503 Iterator it = text_iterator_get(txt, pos);
504 while (text_iterator_byte_get(&it, &c)) {
505 if (c == '\n' && text_iterator_byte_next(&it, &c)) {
506 size_t match = it.pos;
507 if (c == '\r')
508 text_iterator_byte_next(&it, &c);
509 if (c == '\n')
510 return match;
512 text_iterator_byte_next(&it, NULL);
514 return it.pos;
517 size_t text_line_empty_prev(Text *txt, size_t pos) {
518 char c;
519 Iterator it = text_iterator_get(txt, pos);
520 while (text_iterator_byte_prev(&it, &c)) {
521 if (c == '\n' && text_iterator_byte_prev(&it, &c)) {
522 if (c == '\r')
523 text_iterator_byte_prev(&it, &c);
524 if (c == '\n')
525 return it.pos + 1;
528 return it.pos;
531 size_t text_function_start_next(Text *txt, size_t pos) {
532 size_t a = text_function_end_next(txt, pos);
533 size_t b = a;
534 char c;
535 if (a != pos) {
536 Iterator it = text_iterator_get(txt, a);
537 while (text_iterator_byte_next(&it, &c) && (c == '\r' || c == '\n'));
538 a = it.pos;
540 if (b != pos) {
541 size_t match = text_bracket_match(txt, b);
542 b = match != b ? text_line_next(txt, text_line_empty_prev(txt, match)) : pos;
544 if (a <= pos && b <= pos)
545 return pos;
546 else if (a <= pos)
547 return b;
548 else if (b <= pos)
549 return a;
550 else
551 return MIN(a, b);
554 size_t text_function_start_prev(Text *txt, size_t pos) {
555 char c;
556 size_t apos = text_byte_get(txt, pos, &c) && c == '}' && pos > 0 ? pos - 1 : pos;
557 size_t a = text_function_end_next(txt, apos);
558 size_t b = text_function_end_prev(txt, pos);
559 if (a != apos) {
560 size_t match = text_bracket_match(txt, a);
561 a = match != a ? text_line_next(txt, text_line_empty_prev(txt, match)) : pos;
563 if (b != pos) {
564 size_t match = text_bracket_match(txt, b);
565 b = match != b ? text_line_next(txt, text_line_empty_prev(txt, match)) : pos;
567 if (a >= pos && b >= pos)
568 return pos;
569 else if (a >= pos)
570 return b;
571 else if (b >= pos)
572 return a;
573 else
574 return MAX(a, b);
577 static size_t text_function_end_direction(Text *txt, size_t pos, int direction) {
578 size_t start = pos, match;
579 if (direction < 0 && pos > 0)
580 pos--;
581 for (;;) {
582 char c[3];
583 if (direction > 0)
584 match = text_find_next(txt, pos, "\n}");
585 else
586 match = text_find_prev(txt, pos, "\n}");
587 if (text_bytes_get(txt, match, sizeof c, c) != 3 || c[0] != '\n' || c[1] != '}')
588 break;
589 if (c[2] == '\r' || c[2] == '\n')
590 return match+1;
591 if (match == pos)
592 match += direction;
593 pos = match;
595 return start;
598 size_t text_function_end_next(Text *txt, size_t pos) {
599 return text_function_end_direction(txt, pos, +1);
602 size_t text_function_end_prev(Text *txt, size_t pos) {
603 return text_function_end_direction(txt, pos, -1);
606 size_t text_bracket_match(Text *txt, size_t pos) {
607 return text_bracket_match_symbol(txt, pos, NULL);
610 size_t text_bracket_match_symbol(Text *txt, size_t pos, const char *symbols) {
611 int direction, count = 1;
612 char search, current, c;
613 bool instring = false;
614 Iterator it = text_iterator_get(txt, pos);
615 if (!text_iterator_byte_get(&it, &current))
616 return pos;
617 if (symbols && !memchr(symbols, current, strlen(symbols)))
618 return pos;
619 switch (current) {
620 case '(': search = ')'; direction = 1; break;
621 case ')': search = '('; direction = -1; break;
622 case '{': search = '}'; direction = 1; break;
623 case '}': search = '{'; direction = -1; break;
624 case '[': search = ']'; direction = 1; break;
625 case ']': search = '['; direction = -1; break;
626 case '<': search = '>'; direction = 1; break;
627 case '>': search = '<'; direction = -1; break;
628 case '"':
629 case '`':
630 case '\'': {
631 char special[] = " \n)}]>.,:;";
632 search = current;
633 direction = 1;
634 if (text_iterator_byte_next(&it, &c)) {
635 /* if a single or double quote is followed by
636 * a special character, search backwards */
637 if (memchr(special, c, sizeof(special)))
638 direction = -1;
639 text_iterator_byte_prev(&it, NULL);
641 break;
643 default: return pos;
646 if (direction >= 0) { /* forward search */
647 while (text_iterator_byte_next(&it, &c)) {
648 if (c != current && c == '"')
649 instring = !instring;
650 if (!instring) {
651 if (c == search && --count == 0)
652 return it.pos;
653 else if (c == current)
654 count++;
657 } else { /* backwards */
658 while (text_iterator_byte_prev(&it, &c)) {
659 if (c != current && c == '"')
660 instring = !instring;
661 if (!instring) {
662 if (c == search && --count == 0)
663 return it.pos;
664 else if (c == current)
665 count++;
670 return pos; /* no match found */
673 size_t text_search_forward(Text *txt, size_t pos, Regex *regex) {
674 size_t start = pos + 1;
675 size_t end = text_size(txt);
676 RegexMatch match[1];
677 bool found = start < end && !text_search_range_forward(txt, start, end - start, regex, 1, match, 0);
679 if (!found) {
680 start = 0;
681 end = pos;
682 found = !text_search_range_forward(txt, start, end, regex, 1, match, 0);
685 return found ? match[0].start : pos;
688 size_t text_search_backward(Text *txt, size_t pos, Regex *regex) {
689 size_t start = 0;
690 size_t end = pos;
691 RegexMatch match[1];
692 bool found = !text_search_range_backward(txt, start, end, regex, 1, match, 0);
694 if (!found) {
695 start = pos + 1;
696 end = text_size(txt);
697 found = start < end && !text_search_range_backward(txt, start, end - start, regex, 1, match, 0);
700 return found ? match[0].start : pos;