text: avoid invalid pointer arithmetic
[vis.git] / text-motions.c
blob1430d4906142d5603355586f88250c5530cc331b
1 #include <ctype.h>
2 #include <string.h>
3 #include <stdlib.h>
4 #include <wchar.h>
5 #include <errno.h>
6 #include <limits.h>
7 #include "text-motions.h"
8 #include "text-util.h"
9 #include "util.h"
10 #include "text-objects.h"
12 #define blank(c) ((c) == ' ' || (c) == '\t')
13 #define space(c) (isspace((unsigned char)c))
14 #define boundary(c) (isboundary((unsigned char)c))
16 // TODO: specify this per file type?
17 int is_word_boundary(int c) {
18 return ISASCII(c) && !(('0' <= c && c <= '9') ||
19 ('a' <= c && c <= 'z') ||
20 ('A' <= c && c <= 'Z') || c == '_');
23 size_t text_begin(Text *txt, size_t pos) {
24 return 0;
27 size_t text_end(Text *txt, size_t pos) {
28 return text_size(txt);
31 size_t text_char_next(Text *txt, size_t pos) {
32 Iterator it = text_iterator_get(txt, pos);
33 text_iterator_char_next(&it, NULL);
34 return it.pos;
37 size_t text_char_prev(Text *txt, size_t pos) {
38 Iterator it = text_iterator_get(txt, pos);
39 text_iterator_char_prev(&it, NULL);
40 return it.pos;
43 size_t text_codepoint_next(Text *txt, size_t pos) {
44 Iterator it = text_iterator_get(txt, pos);
45 text_iterator_codepoint_next(&it, NULL);
46 return it.pos;
49 size_t text_codepoint_prev(Text *txt, size_t pos) {
50 Iterator it = text_iterator_get(txt, pos);
51 text_iterator_codepoint_prev(&it, NULL);
52 return it.pos;
55 static size_t find_next(Text *txt, size_t pos, const char *s, bool line) {
56 if (!s)
57 return pos;
58 size_t len = strlen(s), matched = 0;
59 Iterator it = text_iterator_get(txt, pos), sit;
60 for (char c; matched < len && text_iterator_byte_get(&it, &c); ) {
61 if (c == s[matched]) {
62 if (matched == 0)
63 sit = it;
64 matched++;
65 } else if (matched > 0) {
66 it = sit;
67 matched = 0;
69 text_iterator_byte_next(&it, NULL);
70 if (line && c == '\n')
71 break;
73 return matched == len ? it.pos - len : pos;
76 size_t text_find_next(Text *txt, size_t pos, const char *s) {
77 return find_next(txt, pos, s, false);
80 size_t text_line_find_next(Text *txt, size_t pos, const char *s) {
81 return find_next(txt, pos, s, true);
84 static size_t find_prev(Text *txt, size_t pos, const char *s, bool line) {
85 if (!s)
86 return pos;
87 size_t len = strlen(s), matched = len - 1;
88 Iterator it = text_iterator_get(txt, pos), sit;
89 if (len == 0)
90 return pos;
91 for (char c; text_iterator_byte_prev(&it, &c); ) {
92 if (c == s[matched]) {
93 if (matched == 0)
94 return it.pos;
95 if (matched == len - 1)
96 sit = it;
97 matched--;
98 } else if (matched < len - 1) {
99 it = sit;
100 matched = len - 1;
102 if (line && c == '\n')
103 break;
105 return pos;
108 size_t text_find_prev(Text *txt, size_t pos, const char *s) {
109 return find_prev(txt, pos, s, false);
112 size_t text_line_find_prev(Text *txt, size_t pos, const char *s) {
113 return find_prev(txt, pos, s, true);
116 size_t text_line_prev(Text *txt, size_t pos) {
117 Iterator it = text_iterator_get(txt, pos);
118 text_iterator_byte_find_prev(&it, '\n');
119 return it.pos;
122 size_t text_line_begin(Text *txt, size_t pos) {
123 Iterator it = text_iterator_get(txt, pos);
124 return text_iterator_byte_find_prev(&it, '\n') ? it.pos+1 : it.pos;
127 size_t text_line_start(Text *txt, size_t pos) {
128 char c;
129 Iterator it = text_iterator_get(txt, text_line_begin(txt, pos));
130 while (text_iterator_byte_get(&it, &c) && blank(c))
131 text_iterator_byte_next(&it, NULL);
132 return it.pos;
135 size_t text_line_finish(Text *txt, size_t pos) {
136 char c;
137 size_t end = text_line_end(txt, pos);
138 Iterator it = text_iterator_get(txt, end);
139 if (!text_iterator_byte_prev(&it, &c) || c == '\n')
140 return end;
141 while (blank(c) && text_iterator_byte_prev(&it, &c));
142 return it.pos + (c == '\n');
145 size_t text_line_end(Text *txt, size_t pos) {
146 Iterator it = text_iterator_get(txt, pos);
147 text_iterator_byte_find_next(&it, '\n');
148 return it.pos;
151 size_t text_line_next(Text *txt, size_t pos) {
152 Iterator it = text_iterator_get(txt, pos);
153 if (text_iterator_byte_find_next(&it, '\n'))
154 text_iterator_byte_next(&it, NULL);
155 return it.pos;
158 size_t text_line_offset(Text *txt, size_t pos, size_t off) {
159 char c;
160 size_t bol = text_line_begin(txt, pos);
161 Iterator it = text_iterator_get(txt, bol);
162 while (off-- > 0 && text_iterator_byte_get(&it, &c) && c != '\n')
163 text_iterator_byte_next(&it, NULL);
164 return it.pos;
167 size_t text_line_char_set(Text *txt, size_t pos, int count) {
168 char c;
169 size_t bol = text_line_begin(txt, pos);
170 Iterator it = text_iterator_get(txt, bol);
171 if (text_iterator_byte_get(&it, &c) && c != '\n')
172 while (count-- > 0 && text_iterator_char_next(&it, &c) && c != '\n');
173 return it.pos;
176 int text_line_char_get(Text *txt, size_t pos) {
177 char c;
178 int count = 0;
179 size_t bol = text_line_begin(txt, pos);
180 Iterator it = text_iterator_get(txt, bol);
181 if (text_iterator_byte_get(&it, &c) && c != '\n') {
182 while (it.pos < pos && c != '\n' && text_iterator_char_next(&it, &c))
183 count++;
185 return count;
188 int text_line_width_get(Text *txt, size_t pos) {
189 int width = 0;
190 mbstate_t ps = { 0 };
191 size_t bol = text_line_begin(txt, pos);
192 Iterator it = text_iterator_get(txt, bol);
194 while (it.pos < pos) {
195 char buf[MB_LEN_MAX];
196 size_t len = text_bytes_get(txt, it.pos, sizeof buf, buf);
197 if (len == 0 || buf[0] == '\n')
198 break;
199 wchar_t wc;
200 size_t wclen = mbrtowc(&wc, buf, len, &ps);
201 if (wclen == (size_t)-1 && errno == EILSEQ) {
202 ps = (mbstate_t){0};
203 /* assume a replacement symbol will be displayed */
204 width++;
205 } else if (wclen == (size_t)-2) {
206 /* do nothing, advance to next character */
207 } else if (wclen == 0) {
208 /* assume NUL byte will be displayed as ^@ */
209 width += 2;
210 } else if (buf[0] == '\t') {
211 width++;
212 } else {
213 int w = wcwidth(wc);
214 if (w == -1)
215 w = 2; /* assume non-printable will be displayed as ^{char} */
216 width += w;
219 if (!text_iterator_codepoint_next(&it, NULL))
220 break;
223 return width;
226 size_t text_line_width_set(Text *txt, size_t pos, int width) {
227 int cur_width = 0;
228 mbstate_t ps = { 0 };
229 size_t bol = text_line_begin(txt, pos);
230 Iterator it = text_iterator_get(txt, bol);
232 for (;;) {
233 char buf[MB_LEN_MAX];
234 size_t len = text_bytes_get(txt, it.pos, sizeof buf, buf);
235 if (len == 0 || buf[0] == '\n')
236 break;
237 wchar_t wc;
238 size_t wclen = mbrtowc(&wc, buf, len, &ps);
239 if (wclen == (size_t)-1 && errno == EILSEQ) {
240 ps = (mbstate_t){0};
241 /* assume a replacement symbol will be displayed */
242 cur_width++;
243 } else if (wclen == (size_t)-2) {
244 /* do nothing, advance to next character */
245 } else if (wclen == 0) {
246 /* assume NUL byte will be displayed as ^@ */
247 cur_width += 2;
248 } else if (buf[0] == '\t') {
249 cur_width++;
250 } else {
251 int w = wcwidth(wc);
252 if (w == -1)
253 w = 2; /* assume non-printable will be displayed as ^{char} */
254 cur_width += w;
257 if (cur_width >= width || !text_iterator_codepoint_next(&it, NULL))
258 break;
261 return it.pos;
264 size_t text_line_char_next(Text *txt, size_t pos) {
265 char c;
266 Iterator it = text_iterator_get(txt, pos);
267 if (!text_iterator_byte_get(&it, &c) || c == '\n')
268 return pos;
269 text_iterator_char_next(&it, NULL);
270 return it.pos;
273 size_t text_line_char_prev(Text *txt, size_t pos) {
274 char c;
275 Iterator it = text_iterator_get(txt, pos);
276 if (!text_iterator_char_prev(&it, &c) || c == '\n')
277 return pos;
278 return it.pos;
281 size_t text_line_up(Text *txt, size_t pos) {
282 int width = text_line_width_get(txt, pos);
283 size_t prev = text_line_prev(txt, pos);
284 return text_line_width_set(txt, prev, width);
287 size_t text_line_down(Text *txt, size_t pos) {
288 int width = text_line_width_get(txt, pos);
289 size_t next = text_line_next(txt, pos);
290 if (next == text_size(txt))
291 return pos;
292 return text_line_width_set(txt, next, width);
295 size_t text_range_line_first(Text *txt, Filerange *r) {
296 if (!text_range_valid(r))
297 return EPOS;
298 return r->start;
301 size_t text_range_line_last(Text *txt, Filerange *r) {
302 if (!text_range_valid(r))
303 return EPOS;
304 size_t pos = text_line_begin(txt, r->end);
305 if (pos == r->end) {
306 /* range ends at a begin of a line, skip last line ending */
307 pos = text_line_prev(txt, pos);
308 pos = text_line_begin(txt, pos);
310 return r->start <= pos ? pos : r->start;
313 size_t text_range_line_next(Text *txt, Filerange *r, size_t pos) {
314 if (!text_range_contains(r, pos))
315 return EPOS;
316 size_t newpos = text_line_next(txt, pos);
317 return newpos != pos && newpos < r->end ? newpos : EPOS;
320 size_t text_range_line_prev(Text *txt, Filerange *r, size_t pos) {
321 if (!text_range_contains(r, pos))
322 return EPOS;
323 size_t newpos = text_line_begin(txt, text_line_prev(txt, pos));
324 return newpos != pos && r->start <= newpos ? newpos : EPOS;
327 size_t text_customword_start_next(Text *txt, size_t pos, int (*isboundary)(int)) {
328 char c;
329 Iterator it = text_iterator_get(txt, pos);
330 if (!text_iterator_byte_get(&it, &c))
331 return pos;
332 if (boundary(c))
333 while (boundary(c) && !space(c) && text_iterator_char_next(&it, &c));
334 else
335 while (!boundary(c) && text_iterator_char_next(&it, &c));
336 while (space(c) && text_iterator_char_next(&it, &c));
337 return it.pos;
340 size_t text_customword_start_prev(Text *txt, size_t pos, int (*isboundary)(int)) {
341 char c;
342 Iterator it = text_iterator_get(txt, pos);
343 while (text_iterator_char_prev(&it, &c) && space(c));
344 if (boundary(c))
345 do pos = it.pos; while (text_iterator_char_prev(&it, &c) && boundary(c) && !space(c));
346 else
347 do pos = it.pos; while (text_iterator_char_prev(&it, &c) && !boundary(c));
348 return pos;
351 size_t text_customword_end_next(Text *txt, size_t pos, int (*isboundary)(int)) {
352 char c;
353 Iterator it = text_iterator_get(txt, pos);
354 while (text_iterator_char_next(&it, &c) && space(c));
355 if (boundary(c))
356 do pos = it.pos; while (text_iterator_char_next(&it, &c) && boundary(c) && !space(c));
357 else
358 do pos = it.pos; while (text_iterator_char_next(&it, &c) && !isboundary(c));
359 return pos;
362 size_t text_customword_end_prev(Text *txt, size_t pos, int (*isboundary)(int)) {
363 char c;
364 Iterator it = text_iterator_get(txt, pos);
365 if (!text_iterator_byte_get(&it, &c))
366 return pos;
367 if (boundary(c))
368 while (boundary(c) && !space(c) && text_iterator_char_prev(&it, &c));
369 else
370 while (!boundary(c) && text_iterator_char_prev(&it, &c));
371 while (space(c) && text_iterator_char_prev(&it, &c));
372 return it.pos;
375 size_t text_longword_end_next(Text *txt, size_t pos) {
376 return text_customword_end_next(txt, pos, isspace);
379 size_t text_longword_end_prev(Text *txt, size_t pos) {
380 return text_customword_end_prev(txt, pos, isspace);
383 size_t text_longword_start_next(Text *txt, size_t pos) {
384 return text_customword_start_next(txt, pos, isspace);
387 size_t text_longword_start_prev(Text *txt, size_t pos) {
388 return text_customword_start_prev(txt, pos, isspace);
391 size_t text_word_end_next(Text *txt, size_t pos) {
392 return text_customword_end_next(txt, pos, is_word_boundary);
395 size_t text_word_end_prev(Text *txt, size_t pos) {
396 return text_customword_end_prev(txt, pos, is_word_boundary);
399 size_t text_word_start_next(Text *txt, size_t pos) {
400 return text_customword_start_next(txt, pos, is_word_boundary);
403 size_t text_word_start_prev(Text *txt, size_t pos) {
404 return text_customword_start_prev(txt, pos, is_word_boundary);
407 size_t text_sentence_next(Text *txt, size_t pos) {
408 char c, prev = 'X';
409 Iterator it = text_iterator_get(txt, pos), rev = it;
411 if (!text_iterator_byte_get(&it, &c))
412 return pos;
414 while (text_iterator_byte_get(&rev, &prev) && space(prev))
415 text_iterator_byte_prev(&rev, NULL);
416 prev = rev.pos == 0 ? '.' : prev; /* simulate punctuation at BOF */
418 do {
419 if ((prev == '.' || prev == '?' || prev == '!') && space(c)) {
420 do text_iterator_byte_next(&it, NULL);
421 while (text_iterator_byte_get(&it, &c) && space(c));
422 return it.pos;
424 prev = c;
425 } while (text_iterator_byte_next(&it, &c));
426 return it.pos;
429 size_t text_sentence_prev(Text *txt, size_t pos) {
430 char c, prev = 'X';
431 bool content = false;
432 Iterator it = text_iterator_get(txt, pos);
434 while (it.pos != 0 && text_iterator_byte_prev(&it, &c)) {
435 if (content && space(prev) && (c == '.' || c == '?' || c == '!')) {
436 do text_iterator_byte_next(&it, NULL);
437 while (text_iterator_byte_get(&it, &c) && space(c));
438 return it.pos;
440 content |= !space(c);
441 prev = c;
442 } /* The loop only ends on hitting BOF or error */
443 if (content) /* starting pos was after first sentence in file => find that sentences start */
444 while (text_iterator_byte_get(&it, &c) && space(c))
445 text_iterator_byte_next(&it, NULL);
446 return it.pos;
449 size_t text_paragraph_next(Text *txt, size_t pos) {
450 char c;
451 Iterator it = text_iterator_get(txt, pos);
453 while (text_iterator_byte_get(&it, &c) && (c == '\n' || blank(c)))
454 text_iterator_char_next(&it, NULL);
455 return text_line_blank_next(txt, it.pos);
458 size_t text_paragraph_prev(Text *txt, size_t pos) {
459 char c;
460 Iterator it = text_iterator_get(txt, pos);
462 while (text_iterator_byte_prev(&it, &c) && (c == '\n' || blank(c)));
463 return text_line_blank_prev(txt, it.pos);
466 size_t text_line_empty_next(Text *txt, size_t pos) {
467 char c;
468 Iterator it = text_iterator_get(txt, pos);
469 while (text_iterator_byte_find_next(&it, '\n')) {
470 if (text_iterator_byte_next(&it, &c) && c == '\n')
471 return it.pos;
473 return it.pos;
476 size_t text_line_empty_prev(Text *txt, size_t pos) {
477 char c;
478 Iterator it = text_iterator_get(txt, pos);
479 while (text_iterator_byte_find_prev(&it, '\n')) {
480 if (text_iterator_byte_prev(&it, &c) && c == '\n')
481 return it.pos + 1;
483 return it.pos;
486 size_t text_line_blank_next(Text *txt, size_t pos) {
487 char c;
488 Iterator it = text_iterator_get(txt, pos);
489 while (text_iterator_byte_find_next(&it, '\n')) {
490 size_t n = it.pos;
491 while (text_iterator_byte_next(&it, &c) && blank(c));
492 if (c == '\n')
493 return n + 1;
495 return it.pos;
498 size_t text_line_blank_prev(Text *txt, size_t pos) {
499 char c;
500 Iterator it = text_iterator_get(txt, pos);
501 while (text_iterator_byte_find_prev(&it, '\n')) {
502 while (text_iterator_byte_prev(&it, &c) && blank(c));
503 if (c == '\n')
504 return it.pos + 1;
506 return it.pos;
509 size_t text_block_start(Text *txt, size_t pos) {
510 Filerange r = text_object_curly_bracket(txt, pos-1);
511 return text_range_valid(&r) ? r.start-1 : pos;
514 size_t text_block_end(Text *txt, size_t pos) {
515 Filerange r = text_object_curly_bracket(txt, pos+1);
516 return text_range_valid(&r) ? r.end : pos;
519 size_t text_parenthesis_start(Text *txt, size_t pos) {
520 Filerange r = text_object_parenthesis(txt, pos-1);
521 return text_range_valid(&r) ? r.start-1 : pos;
524 size_t text_parenthesis_end(Text *txt, size_t pos) {
525 Filerange r = text_object_parenthesis(txt, pos+1);
526 return text_range_valid(&r) ? r.end : pos;
529 size_t text_bracket_match(Text *txt, size_t pos, const Filerange *limits) {
530 return text_bracket_match_symbol(txt, pos, NULL, limits);
533 static size_t match_symbol(Text *txt, size_t pos, char search, int direction, const Filerange *limits) {
534 char c, current;
535 int count = 1;
536 bool instring = false;
537 Iterator it = text_iterator_get(txt, pos);
538 if (!text_iterator_byte_get(&it, &current))
539 return pos;
540 if (direction >= 0) { /* forward search */
541 while (text_iterator_byte_next(&it, &c)) {
542 if (limits && it.pos >= limits->end)
543 break;
544 if (c != current && c == '"')
545 instring = !instring;
546 if (!instring) {
547 if (c == search && --count == 0)
548 return it.pos;
549 else if (c == current)
550 count++;
553 } else { /* backwards */
554 while (text_iterator_byte_prev(&it, &c)) {
555 if (limits && it.pos < limits->start)
556 break;
557 if (c != current && c == '"')
558 instring = !instring;
559 if (!instring) {
560 if (c == search && --count == 0)
561 return it.pos;
562 else if (c == current)
563 count++;
568 return pos; /* no match found */
571 size_t text_bracket_match_symbol(Text *txt, size_t pos, const char *symbols, const Filerange *limits) {
572 int direction;
573 char search, current, c;
574 Iterator it = text_iterator_get(txt, pos);
575 if (!text_iterator_byte_get(&it, &current))
576 return pos;
577 if (symbols && !memchr(symbols, current, strlen(symbols)))
578 return pos;
579 switch (current) {
580 case '(': search = ')'; direction = 1; break;
581 case ')': search = '('; direction = -1; break;
582 case '{': search = '}'; direction = 1; break;
583 case '}': search = '{'; direction = -1; break;
584 case '[': search = ']'; direction = 1; break;
585 case ']': search = '['; direction = -1; break;
586 case '<': search = '>'; direction = 1; break;
587 case '>': search = '<'; direction = -1; break;
588 case '"':
589 case '`':
590 case '\'':
592 /* prefer matches on the same line */
593 size_t fw = match_symbol(txt, pos, current, +1, limits);
594 size_t bw = match_symbol(txt, pos, current, -1, limits);
595 if (fw == pos)
596 return bw;
597 if (bw == pos)
598 return fw;
599 size_t line = text_lineno_by_pos(txt, pos);
600 size_t line_fw = text_lineno_by_pos(txt, fw);
601 size_t line_bw = text_lineno_by_pos(txt, bw);
602 if (line != line_fw)
603 return bw;
604 if (line != line_bw)
605 return fw;
606 direction = +1;
607 if (text_iterator_byte_next(&it, &c)) {
608 /* if a single or double quote is followed by
609 * a special character, search backwards */
610 char special[] = " \t\n)}]>.,:;";
611 if (memchr(special, c, sizeof(special)))
612 direction = -1;
614 return direction >= 0 ? fw : bw;
616 default:
617 return pos;
620 return match_symbol(txt, pos, search, direction, limits);
623 size_t text_search_forward(Text *txt, size_t pos, Regex *regex) {
624 size_t start = pos + 1;
625 size_t end = text_size(txt);
626 RegexMatch match[1];
627 char c;
628 int flags = text_byte_get(txt, pos, &c) && c == '\n' ? 0 : REG_NOTBOL;
629 bool found = start < end && !text_search_range_forward(txt, start, end - start, regex, 1, match, flags);
631 if (!found) {
632 start = 0;
633 found = !text_search_range_forward(txt, start, end - start, regex, 1, match, 0);
636 return found ? match[0].start : pos;
639 size_t text_search_backward(Text *txt, size_t pos, Regex *regex) {
640 size_t start = 0;
641 size_t end = pos;
642 RegexMatch match[1];
643 bool found = !text_search_range_backward(txt, start, end, regex, 1, match, REG_NOTEOL);
645 if (!found) {
646 end = text_size(txt);
647 found = !text_search_range_backward(txt, start, end - start, regex, 1, match, 0);
650 return found ? match[0].start : pos;