vis: let '^ mark point to top of jump list
[vis.git] / text-motions.c
blob4dd7b89a6c0098cf00d7298b6f97765f847e7d23
1 #include <ctype.h>
2 #include <string.h>
3 #include <stdlib.h>
4 #include <wchar.h>
5 #include <errno.h>
6 #include <limits.h>
7 #include "text-motions.h"
8 #include "text-util.h"
9 #include "util.h"
10 #include "text-objects.h"
12 #define blank(c) ((c) == ' ' || (c) == '\t')
13 #define space(c) (isspace((unsigned char)c))
14 #define boundary(c) (isboundary((unsigned char)c))
16 // TODO: specify this per file type?
17 int is_word_boundary(int c) {
18 return ISASCII(c) && !(('0' <= c && c <= '9') ||
19 ('a' <= c && c <= 'z') ||
20 ('A' <= c && c <= 'Z') || c == '_');
23 size_t text_begin(Text *txt, size_t pos) {
24 return 0;
27 size_t text_end(Text *txt, size_t pos) {
28 return text_size(txt);
31 size_t text_char_next(Text *txt, size_t pos) {
32 Iterator it = text_iterator_get(txt, pos);
33 text_iterator_char_next(&it, NULL);
34 return it.pos;
37 size_t text_char_prev(Text *txt, size_t pos) {
38 Iterator it = text_iterator_get(txt, pos);
39 text_iterator_char_prev(&it, NULL);
40 return it.pos;
43 size_t text_codepoint_next(Text *txt, size_t pos) {
44 Iterator it = text_iterator_get(txt, pos);
45 text_iterator_codepoint_next(&it, NULL);
46 return it.pos;
49 size_t text_codepoint_prev(Text *txt, size_t pos) {
50 Iterator it = text_iterator_get(txt, pos);
51 text_iterator_codepoint_prev(&it, NULL);
52 return it.pos;
55 static size_t find_next(Text *txt, size_t pos, const char *s, bool line) {
56 if (!s)
57 return pos;
58 size_t len = strlen(s), matched = 0;
59 Iterator it = text_iterator_get(txt, pos), sit;
60 for (char c; matched < len && text_iterator_byte_get(&it, &c); ) {
61 if (c == s[matched]) {
62 if (matched == 0)
63 sit = it;
64 matched++;
65 } else if (matched > 0) {
66 it = sit;
67 matched = 0;
69 text_iterator_byte_next(&it, NULL);
70 if (line && c == '\n')
71 break;
73 return matched == len ? it.pos - len : pos;
76 size_t text_find_next(Text *txt, size_t pos, const char *s) {
77 return find_next(txt, pos, s, false);
80 size_t text_line_find_next(Text *txt, size_t pos, const char *s) {
81 return find_next(txt, pos, s, true);
84 static size_t find_prev(Text *txt, size_t pos, const char *s, bool line) {
85 if (!s)
86 return pos;
87 size_t len = strlen(s), matched = len - 1;
88 Iterator it = text_iterator_get(txt, pos), sit;
89 if (len == 0)
90 return pos;
91 for (char c; text_iterator_byte_prev(&it, &c); ) {
92 if (c == s[matched]) {
93 if (matched == 0)
94 return it.pos;
95 if (matched == len - 1)
96 sit = it;
97 matched--;
98 } else if (matched < len - 1) {
99 it = sit;
100 matched = len - 1;
102 if (line && c == '\n')
103 break;
105 return pos;
108 size_t text_find_prev(Text *txt, size_t pos, const char *s) {
109 return find_prev(txt, pos, s, false);
112 size_t text_line_find_prev(Text *txt, size_t pos, const char *s) {
113 return find_prev(txt, pos, s, true);
116 size_t text_line_prev(Text *txt, size_t pos) {
117 Iterator it = text_iterator_get(txt, pos);
118 text_iterator_byte_find_prev(&it, '\n');
119 return it.pos;
122 size_t text_line_begin(Text *txt, size_t pos) {
123 Iterator it = text_iterator_get(txt, pos);
124 return text_iterator_byte_find_prev(&it, '\n') ? it.pos+1 : it.pos;
127 size_t text_line_start(Text *txt, size_t pos) {
128 char c;
129 Iterator it = text_iterator_get(txt, text_line_begin(txt, pos));
130 while (text_iterator_byte_get(&it, &c) && blank(c))
131 text_iterator_byte_next(&it, NULL);
132 return it.pos;
135 size_t text_line_finish(Text *txt, size_t pos) {
136 char c;
137 size_t end = text_line_end(txt, pos);
138 Iterator it = text_iterator_get(txt, end);
139 if (!text_iterator_byte_prev(&it, &c) || c == '\n')
140 return end;
141 while (blank(c) && text_iterator_byte_prev(&it, &c));
142 return it.pos + (c == '\n');
145 size_t text_line_end(Text *txt, size_t pos) {
146 Iterator it = text_iterator_get(txt, pos);
147 text_iterator_byte_find_next(&it, '\n');
148 return it.pos;
151 size_t text_line_next(Text *txt, size_t pos) {
152 Iterator it = text_iterator_get(txt, pos);
153 if (text_iterator_byte_find_next(&it, '\n'))
154 text_iterator_byte_next(&it, NULL);
155 return it.pos;
158 size_t text_line_offset(Text *txt, size_t pos, size_t off) {
159 char c;
160 size_t bol = text_line_begin(txt, pos);
161 Iterator it = text_iterator_get(txt, bol);
162 while (off-- > 0 && text_iterator_byte_get(&it, &c) && c != '\n')
163 text_iterator_byte_next(&it, NULL);
164 return it.pos;
167 size_t text_line_char_set(Text *txt, size_t pos, int count) {
168 char c;
169 size_t bol = text_line_begin(txt, pos);
170 Iterator it = text_iterator_get(txt, bol);
171 if (text_iterator_byte_get(&it, &c) && c != '\n')
172 while (count-- > 0 && text_iterator_char_next(&it, &c) && c != '\n');
173 return it.pos;
176 int text_line_char_get(Text *txt, size_t pos) {
177 char c;
178 int count = 0;
179 size_t bol = text_line_begin(txt, pos);
180 Iterator it = text_iterator_get(txt, bol);
181 if (text_iterator_byte_get(&it, &c) && c != '\n') {
182 while (it.pos < pos && c != '\n' && text_iterator_char_next(&it, &c))
183 count++;
185 return count;
188 int text_line_width_get(Text *txt, size_t pos) {
189 int width = 0;
190 mbstate_t ps = { 0 };
191 size_t bol = text_line_begin(txt, pos);
192 Iterator it = text_iterator_get(txt, bol);
194 while (it.pos < pos) {
195 char buf[MB_LEN_MAX];
196 size_t len = text_bytes_get(txt, it.pos, sizeof buf, buf);
197 if (len == 0 || buf[0] == '\n')
198 break;
199 wchar_t wc;
200 size_t wclen = mbrtowc(&wc, buf, len, &ps);
201 if (wclen == (size_t)-1 && errno == EILSEQ) {
202 /* assume a replacement symbol will be displayed */
203 width++;
204 } else if (wclen == (size_t)-2) {
205 /* do nothing, advance to next character */
206 } else if (wclen == 0) {
207 /* assume NUL byte will be displayed as ^@ */
208 width += 2;
209 } else if (buf[0] == '\t') {
210 width++;
211 } else {
212 int w = wcwidth(wc);
213 if (w == -1)
214 w = 2; /* assume non-printable will be displayed as ^{char} */
215 width += w;
218 if (!text_iterator_codepoint_next(&it, NULL))
219 break;
222 return width;
225 size_t text_line_width_set(Text *txt, size_t pos, int width) {
226 int cur_width = 0;
227 mbstate_t ps = { 0 };
228 size_t bol = text_line_begin(txt, pos);
229 Iterator it = text_iterator_get(txt, bol);
231 for (;;) {
232 char buf[MB_LEN_MAX];
233 size_t len = text_bytes_get(txt, it.pos, sizeof buf, buf);
234 if (len == 0 || buf[0] == '\n')
235 break;
236 wchar_t wc;
237 size_t wclen = mbrtowc(&wc, buf, len, &ps);
238 if (wclen == (size_t)-1 && errno == EILSEQ) {
239 /* assume a replacement symbol will be displayed */
240 cur_width++;
241 } else if (wclen == (size_t)-2) {
242 /* do nothing, advance to next character */
243 } else if (wclen == 0) {
244 /* assume NUL byte will be displayed as ^@ */
245 cur_width += 2;
246 } else if (buf[0] == '\t') {
247 cur_width++;
248 } else {
249 int w = wcwidth(wc);
250 if (w == -1)
251 w = 2; /* assume non-printable will be displayed as ^{char} */
252 cur_width += w;
255 if (cur_width >= width || !text_iterator_codepoint_next(&it, NULL))
256 break;
259 return it.pos;
262 size_t text_line_char_next(Text *txt, size_t pos) {
263 char c;
264 Iterator it = text_iterator_get(txt, pos);
265 if (!text_iterator_byte_get(&it, &c) || c == '\n')
266 return pos;
267 text_iterator_char_next(&it, NULL);
268 return it.pos;
271 size_t text_line_char_prev(Text *txt, size_t pos) {
272 char c;
273 Iterator it = text_iterator_get(txt, pos);
274 if (!text_iterator_char_prev(&it, &c) || c == '\n')
275 return pos;
276 return it.pos;
279 size_t text_line_up(Text *txt, size_t pos) {
280 int width = text_line_width_get(txt, pos);
281 size_t prev = text_line_prev(txt, pos);
282 return text_line_width_set(txt, prev, width);
285 size_t text_line_down(Text *txt, size_t pos) {
286 int width = text_line_width_get(txt, pos);
287 size_t next = text_line_next(txt, pos);
288 return text_line_width_set(txt, next, width);
291 size_t text_range_line_first(Text *txt, Filerange *r) {
292 if (!text_range_valid(r))
293 return EPOS;
294 return r->start;
297 size_t text_range_line_last(Text *txt, Filerange *r) {
298 if (!text_range_valid(r))
299 return EPOS;
300 size_t pos = text_line_begin(txt, r->end);
301 if (pos == r->end) {
302 /* range ends at a begin of a line, skip last line ending */
303 pos = text_line_prev(txt, pos);
304 pos = text_line_begin(txt, pos);
306 return r->start <= pos ? pos : r->start;
309 size_t text_range_line_next(Text *txt, Filerange *r, size_t pos) {
310 if (!text_range_contains(r, pos))
311 return EPOS;
312 size_t newpos = text_line_next(txt, pos);
313 return newpos != pos && newpos < r->end ? newpos : EPOS;
316 size_t text_range_line_prev(Text *txt, Filerange *r, size_t pos) {
317 if (!text_range_contains(r, pos))
318 return EPOS;
319 size_t newpos = text_line_begin(txt, text_line_prev(txt, pos));
320 return newpos != pos && r->start <= newpos ? newpos : EPOS;
323 size_t text_customword_start_next(Text *txt, size_t pos, int (*isboundary)(int)) {
324 char c;
325 Iterator it = text_iterator_get(txt, pos);
326 if (!text_iterator_byte_get(&it, &c))
327 return pos;
328 if (boundary(c))
329 while (boundary(c) && !space(c) && text_iterator_char_next(&it, &c));
330 else
331 while (!boundary(c) && text_iterator_char_next(&it, &c));
332 while (space(c) && text_iterator_char_next(&it, &c));
333 return it.pos;
336 size_t text_customword_start_prev(Text *txt, size_t pos, int (*isboundary)(int)) {
337 char c;
338 Iterator it = text_iterator_get(txt, pos);
339 while (text_iterator_char_prev(&it, &c) && space(c));
340 if (boundary(c))
341 do pos = it.pos; while (text_iterator_char_prev(&it, &c) && boundary(c) && !space(c));
342 else
343 do pos = it.pos; while (text_iterator_char_prev(&it, &c) && !boundary(c));
344 return pos;
347 size_t text_customword_end_next(Text *txt, size_t pos, int (*isboundary)(int)) {
348 char c;
349 Iterator it = text_iterator_get(txt, pos);
350 while (text_iterator_char_next(&it, &c) && space(c));
351 if (boundary(c))
352 do pos = it.pos; while (text_iterator_char_next(&it, &c) && boundary(c) && !space(c));
353 else
354 do pos = it.pos; while (text_iterator_char_next(&it, &c) && !isboundary(c));
355 return pos;
358 size_t text_customword_end_prev(Text *txt, size_t pos, int (*isboundary)(int)) {
359 char c;
360 Iterator it = text_iterator_get(txt, pos);
361 if (!text_iterator_byte_get(&it, &c))
362 return pos;
363 if (boundary(c))
364 while (boundary(c) && !space(c) && text_iterator_char_prev(&it, &c));
365 else
366 while (!boundary(c) && text_iterator_char_prev(&it, &c));
367 while (space(c) && text_iterator_char_prev(&it, &c));
368 return it.pos;
371 size_t text_longword_end_next(Text *txt, size_t pos) {
372 return text_customword_end_next(txt, pos, isspace);
375 size_t text_longword_end_prev(Text *txt, size_t pos) {
376 return text_customword_end_prev(txt, pos, isspace);
379 size_t text_longword_start_next(Text *txt, size_t pos) {
380 return text_customword_start_next(txt, pos, isspace);
383 size_t text_longword_start_prev(Text *txt, size_t pos) {
384 return text_customword_start_prev(txt, pos, isspace);
387 size_t text_word_end_next(Text *txt, size_t pos) {
388 return text_customword_end_next(txt, pos, is_word_boundary);
391 size_t text_word_end_prev(Text *txt, size_t pos) {
392 return text_customword_end_prev(txt, pos, is_word_boundary);
395 size_t text_word_start_next(Text *txt, size_t pos) {
396 return text_customword_start_next(txt, pos, is_word_boundary);
399 size_t text_word_start_prev(Text *txt, size_t pos) {
400 return text_customword_start_prev(txt, pos, is_word_boundary);
403 size_t text_sentence_next(Text *txt, size_t pos) {
404 char c, prev = 'X';
405 Iterator it = text_iterator_get(txt, pos), rev = it;
407 if (!text_iterator_byte_get(&it, &c))
408 return pos;
410 while (text_iterator_byte_get(&rev, &prev) && space(prev))
411 text_iterator_byte_prev(&rev, NULL);
412 prev = rev.pos == 0 ? '.' : prev; /* simulate punctuation at BOF */
414 do {
415 if ((prev == '.' || prev == '?' || prev == '!') && space(c)) {
416 do text_iterator_byte_next(&it, NULL);
417 while (text_iterator_byte_get(&it, &c) && space(c));
418 return it.pos;
420 prev = c;
421 } while (text_iterator_byte_next(&it, &c));
422 return it.pos;
425 size_t text_sentence_prev(Text *txt, size_t pos) {
426 char c, prev = 'X';
427 bool content = false;
428 Iterator it = text_iterator_get(txt, pos);
430 while (it.pos != 0 && text_iterator_byte_prev(&it, &c)) {
431 if (content && space(prev) && (c == '.' || c == '?' || c == '!')) {
432 do text_iterator_byte_next(&it, NULL);
433 while (text_iterator_byte_get(&it, &c) && space(c));
434 return it.pos;
436 content |= !space(c);
437 prev = c;
438 } /* The loop only ends on hitting BOF or error */
439 if (content) /* starting pos was after first sentence in file => find that sentences start */
440 while (text_iterator_byte_get(&it, &c) && space(c))
441 text_iterator_byte_next(&it, NULL);
442 return it.pos;
445 size_t text_paragraph_next(Text *txt, size_t pos) {
446 char c;
447 Iterator it = text_iterator_get(txt, pos);
449 while (text_iterator_byte_get(&it, &c) && c == '\n')
450 text_iterator_char_next(&it, NULL);
451 return text_line_empty_next(txt, it.pos);
454 size_t text_paragraph_prev(Text *txt, size_t pos) {
455 char c;
456 Iterator it = text_iterator_get(txt, pos);
458 /* c == \0 catches starting the search at EOF */
459 while (text_iterator_byte_get(&it, &c) && (c == '\n' || c == '\0'))
460 text_iterator_byte_prev(&it, NULL);
461 return text_line_empty_prev(txt, it.pos);
464 size_t text_line_empty_next(Text *txt, size_t pos) {
465 char c;
466 Iterator it = text_iterator_get(txt, pos);
467 while (text_iterator_byte_find_next(&it, '\n')) {
468 if (text_iterator_byte_next(&it, &c) && c == '\n')
469 return it.pos;
471 return it.pos;
474 size_t text_line_empty_prev(Text *txt, size_t pos) {
475 char c;
476 Iterator it = text_iterator_get(txt, pos);
477 while (text_iterator_byte_find_prev(&it, '\n')) {
478 if (text_iterator_byte_prev(&it, &c) && c == '\n')
479 return it.pos + 1;
481 return it.pos;
484 size_t text_block_start(Text *txt, size_t pos) {
485 Filerange r = text_object_curly_bracket(txt, pos-1);
486 return text_range_valid(&r) ? r.start-1 : pos;
489 size_t text_block_end(Text *txt, size_t pos) {
490 Filerange r = text_object_curly_bracket(txt, pos+1);
491 return text_range_valid(&r) ? r.end : pos;
494 size_t text_parenthese_start(Text *txt, size_t pos) {
495 Filerange r = text_object_paranthese(txt, pos-1);
496 return text_range_valid(&r) ? r.start-1 : pos;
499 size_t text_parenthese_end(Text *txt, size_t pos) {
500 Filerange r = text_object_paranthese(txt, pos+1);
501 return text_range_valid(&r) ? r.end : pos;
504 size_t text_bracket_match(Text *txt, size_t pos) {
505 return text_bracket_match_symbol(txt, pos, NULL);
508 static size_t match_symbol(Text *txt, size_t pos, char search, int direction) {
509 char c, current;
510 int count = 1;
511 bool instring = false;
512 Iterator it = text_iterator_get(txt, pos);
513 if (!text_iterator_byte_get(&it, &current))
514 return pos;
515 if (direction >= 0) { /* forward search */
516 while (text_iterator_byte_next(&it, &c)) {
517 if (c != current && c == '"')
518 instring = !instring;
519 if (!instring) {
520 if (c == search && --count == 0)
521 return it.pos;
522 else if (c == current)
523 count++;
526 } else { /* backwards */
527 while (text_iterator_byte_prev(&it, &c)) {
528 if (c != current && c == '"')
529 instring = !instring;
530 if (!instring) {
531 if (c == search && --count == 0)
532 return it.pos;
533 else if (c == current)
534 count++;
539 return pos; /* no match found */
542 size_t text_bracket_match_symbol(Text *txt, size_t pos, const char *symbols) {
543 int direction;
544 char search, current, c;
545 Iterator it = text_iterator_get(txt, pos);
546 if (!text_iterator_byte_get(&it, &current))
547 return pos;
548 if (symbols && !memchr(symbols, current, strlen(symbols)))
549 return pos;
550 switch (current) {
551 case '(': search = ')'; direction = 1; break;
552 case ')': search = '('; direction = -1; break;
553 case '{': search = '}'; direction = 1; break;
554 case '}': search = '{'; direction = -1; break;
555 case '[': search = ']'; direction = 1; break;
556 case ']': search = '['; direction = -1; break;
557 case '<': search = '>'; direction = 1; break;
558 case '>': search = '<'; direction = -1; break;
559 case '"':
560 case '`':
561 case '\'':
563 /* prefer matches on the same line */
564 size_t fw = match_symbol(txt, pos, current, +1);
565 size_t bw = match_symbol(txt, pos, current, -1);
566 if (fw == pos)
567 return bw;
568 if (bw == pos)
569 return fw;
570 size_t line = text_lineno_by_pos(txt, pos);
571 size_t line_fw = text_lineno_by_pos(txt, fw);
572 size_t line_bw = text_lineno_by_pos(txt, bw);
573 if (line != line_fw)
574 return bw;
575 if (line != line_bw)
576 return fw;
577 direction = +1;
578 if (text_iterator_byte_next(&it, &c)) {
579 /* if a single or double quote is followed by
580 * a special character, search backwards */
581 char special[] = " \n)}]>.,:;";
582 if (memchr(special, c, sizeof(special)))
583 direction = -1;
585 return direction >= 0 ? fw : bw;
587 default:
588 return pos;
591 return match_symbol(txt, pos, search, direction);
594 size_t text_search_forward(Text *txt, size_t pos, Regex *regex) {
595 size_t start = pos + 1;
596 size_t end = text_size(txt);
597 RegexMatch match[1];
598 bool found = start < end && !text_search_range_forward(txt, start, end - start, regex, 1, match, 0);
600 if (!found) {
601 start = 0;
602 end = pos;
603 found = !text_search_range_forward(txt, start, end, regex, 1, match, 0);
606 return found ? match[0].start : pos;
609 size_t text_search_backward(Text *txt, size_t pos, Regex *regex) {
610 size_t start = 0;
611 size_t end = pos;
612 RegexMatch match[1];
613 bool found = !text_search_range_backward(txt, start, end, regex, 1, match, 0);
615 if (!found) {
616 start = pos + 1;
617 end = text_size(txt);
618 found = start < end && !text_search_range_backward(txt, start, end - start, regex, 1, match, 0);
621 return found ? match[0].start : pos;