6 #include "text-motions.h"
9 #include "text-objects.h"
11 #define space(c) (isspace((unsigned char)c))
12 #define boundary(c) (isboundary((unsigned char)c))
14 // TODO: specify this per file type?
15 int is_word_boundary(int c
) {
16 return ISASCII(c
) && !(('0' <= c
&& c
<= '9') ||
17 ('a' <= c
&& c
<= 'z') ||
18 ('A' <= c
&& c
<= 'Z') || c
== '_');
21 size_t text_begin(Text
*txt
, size_t pos
) {
25 size_t text_end(Text
*txt
, size_t pos
) {
26 return text_size(txt
);
29 size_t text_char_next(Text
*txt
, size_t pos
) {
30 Iterator it
= text_iterator_get(txt
, pos
);
31 text_iterator_char_next(&it
, NULL
);
35 size_t text_char_prev(Text
*txt
, size_t pos
) {
36 Iterator it
= text_iterator_get(txt
, pos
);
37 text_iterator_char_prev(&it
, NULL
);
41 static size_t find_next(Text
*txt
, size_t pos
, const char *s
, bool line
) {
44 size_t len
= strlen(s
), matched
= 0;
45 Iterator it
= text_iterator_get(txt
, pos
), sit
;
46 for (char c
; matched
< len
&& text_iterator_byte_get(&it
, &c
); ) {
47 if (c
== s
[matched
]) {
51 } else if (matched
> 0) {
55 text_iterator_byte_next(&it
, NULL
);
56 if (line
&& c
== '\n')
59 return matched
== len
? it
.pos
- len
: pos
;
62 size_t text_find_next(Text
*txt
, size_t pos
, const char *s
) {
63 return find_next(txt
, pos
, s
, false);
66 size_t text_line_find_next(Text
*txt
, size_t pos
, const char *s
) {
67 return find_next(txt
, pos
, s
, true);
70 static size_t find_prev(Text
*txt
, size_t pos
, const char *s
, bool line
) {
73 size_t len
= strlen(s
), matched
= len
- 1;
74 Iterator it
= text_iterator_get(txt
, pos
), sit
;
77 for (char c
; text_iterator_byte_prev(&it
, &c
); ) {
78 if (c
== s
[matched
]) {
81 if (matched
== len
- 1)
84 } else if (matched
< len
- 1) {
88 if (line
&& c
== '\n')
94 size_t text_find_prev(Text
*txt
, size_t pos
, const char *s
) {
95 return find_prev(txt
, pos
, s
, false);
98 size_t text_line_find_prev(Text
*txt
, size_t pos
, const char *s
) {
99 return find_prev(txt
, pos
, s
, true);
102 size_t text_line_prev(Text
*txt
, size_t pos
) {
104 Iterator it
= text_iterator_get(txt
, pos
);
105 if (!text_iterator_byte_get(&it
, &c
))
108 text_iterator_byte_prev(&it
, &c
);
110 text_iterator_byte_prev(&it
, &c
);
111 while (text_iterator_byte_get(&it
, &c
) && c
!= '\n')
112 text_iterator_byte_prev(&it
, NULL
);
113 if (text_iterator_byte_prev(&it
, &c
) && c
!= '\r')
114 text_iterator_byte_next(&it
, &c
);
118 size_t text_line_begin(Text
*txt
, size_t pos
) {
120 Iterator it
= text_iterator_get(txt
, pos
);
121 if (!text_iterator_byte_get(&it
, &c
))
124 text_iterator_byte_prev(&it
, &c
);
126 text_iterator_byte_prev(&it
, &c
);
127 while (text_iterator_byte_get(&it
, &c
)) {
132 text_iterator_byte_prev(&it
, NULL
);
137 size_t text_line_start(Text
*txt
, size_t pos
) {
139 Iterator it
= text_iterator_get(txt
, text_line_begin(txt
, pos
));
140 while (text_iterator_byte_get(&it
, &c
) && c
!= '\n' && space(c
))
141 text_iterator_byte_next(&it
, NULL
);
145 size_t text_line_finish(Text
*txt
, size_t pos
) {
147 Iterator it
= text_iterator_get(txt
, text_line_end(txt
, pos
));
148 do text_iterator_char_prev(&it
, NULL
);
149 while (text_iterator_byte_get(&it
, &c
) && c
!= '\n' && space(c
));
153 size_t text_line_lastchar(Text
*txt
, size_t pos
) {
155 Iterator it
= text_iterator_get(txt
, text_line_end(txt
, pos
));
156 if (text_iterator_char_prev(&it
, &c
) && c
== '\n')
157 text_iterator_byte_next(&it
, NULL
);
161 size_t text_line_end(Text
*txt
, size_t pos
) {
163 Iterator it
= text_iterator_get(txt
, pos
);
164 while (text_iterator_byte_get(&it
, &c
) && c
!= '\r' && c
!= '\n')
165 text_iterator_byte_next(&it
, NULL
);
169 size_t text_line_next(Text
*txt
, size_t pos
) {
171 Iterator it
= text_iterator_get(txt
, pos
);
172 while (text_iterator_byte_get(&it
, &c
) && c
!= '\n')
173 text_iterator_byte_next(&it
, NULL
);
174 text_iterator_byte_next(&it
, NULL
);
178 size_t text_line_offset(Text
*txt
, size_t pos
, size_t off
) {
180 size_t bol
= text_line_begin(txt
, pos
);
181 Iterator it
= text_iterator_get(txt
, bol
);
182 while (off
-- > 0 && text_iterator_byte_get(&it
, &c
) && c
!= '\r' && c
!= '\n')
183 text_iterator_byte_next(&it
, NULL
);
187 size_t text_line_char_set(Text
*txt
, size_t pos
, int count
) {
189 size_t bol
= text_line_begin(txt
, pos
);
190 Iterator it
= text_iterator_get(txt
, bol
);
191 while (count
-- > 0 && text_iterator_byte_get(&it
, &c
) && c
!= '\r' && c
!= '\n')
192 text_iterator_char_next(&it
, NULL
);
196 int text_line_char_get(Text
*txt
, size_t pos
) {
199 size_t bol
= text_line_begin(txt
, pos
);
200 Iterator it
= text_iterator_get(txt
, bol
);
201 while (text_iterator_byte_get(&it
, &c
) && it
.pos
< pos
&& c
!= '\r' && c
!= '\n') {
202 text_iterator_char_next(&it
, NULL
);
208 int text_line_width_get(Text
*txt
, size_t pos
) {
210 mbstate_t ps
= { 0 };
211 size_t bol
= text_line_begin(txt
, pos
);
212 Iterator it
= text_iterator_get(txt
, bol
);
214 while (it
.pos
< pos
) {
215 char buf
[MB_CUR_MAX
];
216 size_t len
= text_bytes_get(txt
, it
.pos
, sizeof buf
, buf
);
217 if (len
== 0 || buf
[0] == '\r' || buf
[0] == '\n')
220 size_t wclen
= mbrtowc(&wc
, buf
, len
, &ps
);
221 if (wclen
== (size_t)-1 && errno
== EILSEQ
) {
222 /* assume a replacement symbol will be displayed */
224 } else if (wclen
== (size_t)-2) {
225 /* do nothing, advance to next character */
226 } else if (wclen
== 0) {
227 /* assume NUL byte will be displayed as ^@ */
229 } else if (buf
[0] == '\t') {
234 w
= 2; /* assume non-printable will be displayed as ^{char} */
238 if (!text_iterator_codepoint_next(&it
, NULL
))
245 size_t text_line_width_set(Text
*txt
, size_t pos
, int width
) {
247 mbstate_t ps
= { 0 };
248 size_t bol
= text_line_begin(txt
, pos
);
249 Iterator it
= text_iterator_get(txt
, bol
);
252 char buf
[MB_CUR_MAX
];
253 size_t len
= text_bytes_get(txt
, it
.pos
, sizeof buf
, buf
);
254 if (len
== 0 || buf
[0] == '\r' || buf
[0] == '\n')
257 size_t wclen
= mbrtowc(&wc
, buf
, len
, &ps
);
258 if (wclen
== (size_t)-1 && errno
== EILSEQ
) {
259 /* assume a replacement symbol will be displayed */
261 } else if (wclen
== (size_t)-2) {
262 /* do nothing, advance to next character */
263 } else if (wclen
== 0) {
264 /* assume NUL byte will be displayed as ^@ */
266 } else if (buf
[0] == '\t') {
271 w
= 2; /* assume non-printable will be displayed as ^{char} */
275 if (cur_width
>= width
|| !text_iterator_codepoint_next(&it
, NULL
))
282 size_t text_line_char_next(Text
*txt
, size_t pos
) {
284 Iterator it
= text_iterator_get(txt
, pos
);
285 if (!text_iterator_byte_get(&it
, &c
) || c
== '\r' || c
== '\n')
287 text_iterator_char_next(&it
, NULL
);
291 size_t text_line_char_prev(Text
*txt
, size_t pos
) {
293 Iterator it
= text_iterator_get(txt
, pos
);
294 if (!text_iterator_char_prev(&it
, &c
) || c
== '\n')
299 size_t text_line_up(Text
*txt
, size_t pos
) {
300 int width
= text_line_width_get(txt
, pos
);
301 size_t prev
= text_line_prev(txt
, pos
);
302 return text_line_width_set(txt
, prev
, width
);
305 size_t text_line_down(Text
*txt
, size_t pos
) {
306 int width
= text_line_width_get(txt
, pos
);
307 size_t next
= text_line_next(txt
, pos
);
308 return text_line_width_set(txt
, next
, width
);
311 size_t text_range_line_first(Text
*txt
, Filerange
*r
) {
312 if (!text_range_valid(r
))
317 size_t text_range_line_last(Text
*txt
, Filerange
*r
) {
318 if (!text_range_valid(r
))
320 size_t pos
= text_line_begin(txt
, r
->end
);
322 /* range ends at a begin of a line, skip last line ending */
323 pos
= text_line_prev(txt
, pos
);
324 pos
= text_line_begin(txt
, pos
);
326 return r
->start
<= pos
? pos
: r
->start
;
329 size_t text_range_line_next(Text
*txt
, Filerange
*r
, size_t pos
) {
330 if (!text_range_contains(r
, pos
))
332 size_t newpos
= text_line_next(txt
, pos
);
333 return newpos
!= pos
&& newpos
< r
->end
? newpos
: EPOS
;
336 size_t text_range_line_prev(Text
*txt
, Filerange
*r
, size_t pos
) {
337 if (!text_range_contains(r
, pos
))
339 size_t newpos
= text_line_begin(txt
, text_line_prev(txt
, pos
));
340 return newpos
!= pos
&& r
->start
<= newpos
? newpos
: EPOS
;
343 size_t text_customword_start_next(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
345 Iterator it
= text_iterator_get(txt
, pos
);
346 if (!text_iterator_byte_get(&it
, &c
))
349 while (boundary(c
) && !space(c
) && text_iterator_char_next(&it
, &c
));
351 while (!boundary(c
) && text_iterator_char_next(&it
, &c
));
352 while (space(c
) && text_iterator_char_next(&it
, &c
));
356 size_t text_customword_start_prev(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
358 Iterator it
= text_iterator_get(txt
, pos
);
359 while (text_iterator_char_prev(&it
, &c
) && space(c
));
361 do pos
= it
.pos
; while (text_iterator_char_prev(&it
, &c
) && boundary(c
) && !space(c
));
363 do pos
= it
.pos
; while (text_iterator_char_prev(&it
, &c
) && !boundary(c
));
367 size_t text_customword_end_next(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
369 Iterator it
= text_iterator_get(txt
, pos
);
370 while (text_iterator_char_next(&it
, &c
) && space(c
));
372 do pos
= it
.pos
; while (text_iterator_char_next(&it
, &c
) && boundary(c
) && !space(c
));
374 do pos
= it
.pos
; while (text_iterator_char_next(&it
, &c
) && !isboundary(c
));
378 size_t text_customword_end_prev(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
380 Iterator it
= text_iterator_get(txt
, pos
);
381 if (!text_iterator_byte_get(&it
, &c
))
384 while (boundary(c
) && !space(c
) && text_iterator_char_prev(&it
, &c
));
386 while (!boundary(c
) && text_iterator_char_prev(&it
, &c
));
387 while (space(c
) && text_iterator_char_prev(&it
, &c
));
391 size_t text_longword_end_next(Text
*txt
, size_t pos
) {
392 return text_customword_end_next(txt
, pos
, isspace
);
395 size_t text_longword_end_prev(Text
*txt
, size_t pos
) {
396 return text_customword_end_prev(txt
, pos
, isspace
);
399 size_t text_longword_start_next(Text
*txt
, size_t pos
) {
400 return text_customword_start_next(txt
, pos
, isspace
);
403 size_t text_longword_start_prev(Text
*txt
, size_t pos
) {
404 return text_customword_start_prev(txt
, pos
, isspace
);
407 size_t text_word_end_next(Text
*txt
, size_t pos
) {
408 return text_customword_end_next(txt
, pos
, is_word_boundary
);
411 size_t text_word_end_prev(Text
*txt
, size_t pos
) {
412 return text_customword_end_prev(txt
, pos
, is_word_boundary
);
415 size_t text_word_start_next(Text
*txt
, size_t pos
) {
416 return text_customword_start_next(txt
, pos
, is_word_boundary
);
419 size_t text_word_start_prev(Text
*txt
, size_t pos
) {
420 return text_customword_start_prev(txt
, pos
, is_word_boundary
);
423 size_t text_sentence_next(Text
*txt
, size_t pos
) {
425 Iterator it
= text_iterator_get(txt
, pos
), rev
= text_iterator_get(txt
, pos
);
427 if (!text_iterator_byte_get(&it
, &c
))
430 while (text_iterator_byte_get(&rev
, &prev
) && space(prev
))
431 text_iterator_byte_prev(&rev
, NULL
);
432 prev
= rev
.pos
== 0 ? '.' : prev
; /* simulate punctuation at BOF */
435 if ((prev
== '.' || prev
== '?' || prev
== '!') && space(c
)) {
436 do text_iterator_byte_next(&it
, NULL
);
437 while (text_iterator_byte_get(&it
, &c
) && space(c
));
441 } while (text_iterator_byte_next(&it
, &c
));
445 size_t text_sentence_prev(Text
*txt
, size_t pos
) {
447 bool content
= false;
448 Iterator it
= text_iterator_get(txt
, pos
);
450 while (it
.pos
!= 0 && text_iterator_byte_prev(&it
, &c
)) {
451 if (content
&& space(prev
) && (c
== '.' || c
== '?' || c
== '!')) {
452 do text_iterator_byte_next(&it
, NULL
);
453 while (text_iterator_byte_get(&it
, &c
) && space(c
));
456 content
|= !space(c
);
458 } /* The loop only ends on hitting BOF or error */
459 if (content
) /* starting pos was after first sentence in file => find that sentences start */
460 while (text_iterator_byte_get(&it
, &c
) && space(c
))
461 text_iterator_byte_next(&it
, NULL
);
465 size_t text_paragraph_next(Text
*txt
, size_t pos
) {
467 Iterator it
= text_iterator_get(txt
, pos
);
469 while (text_iterator_byte_get(&it
, &c
) && (c
== '\n' || c
== '\r'))
470 text_iterator_byte_next(&it
, NULL
);
471 return text_line_empty_next(txt
, it
.pos
);
474 size_t text_paragraph_prev(Text
*txt
, size_t pos
) {
476 Iterator it
= text_iterator_get(txt
, pos
);
478 /* c == \0 catches starting the search at EOF */
479 while (text_iterator_byte_get(&it
, &c
) && (c
== '\n' || c
== '\r' || c
== '\0'))
480 text_iterator_byte_prev(&it
, NULL
);
481 return text_line_empty_prev(txt
, it
.pos
);
484 size_t text_line_empty_next(Text
*txt
, size_t pos
) {
486 Iterator it
= text_iterator_get(txt
, pos
);
487 while (text_iterator_byte_get(&it
, &c
)) {
488 if (c
== '\n' && text_iterator_byte_next(&it
, &c
)) {
489 size_t match
= it
.pos
;
491 text_iterator_byte_next(&it
, &c
);
495 text_iterator_byte_next(&it
, NULL
);
500 size_t text_line_empty_prev(Text
*txt
, size_t pos
) {
502 Iterator it
= text_iterator_get(txt
, pos
);
503 while (text_iterator_byte_prev(&it
, &c
)) {
504 if (c
== '\n' && text_iterator_byte_prev(&it
, &c
)) {
506 text_iterator_byte_prev(&it
, &c
);
514 size_t text_function_start_next(Text
*txt
, size_t pos
) {
515 size_t a
= text_function_end_next(txt
, pos
);
519 Iterator it
= text_iterator_get(txt
, a
);
520 while (text_iterator_byte_next(&it
, &c
) && (c
== '\r' || c
== '\n'));
524 size_t match
= text_bracket_match(txt
, b
);
525 b
= match
!= b
? text_line_next(txt
, text_line_empty_prev(txt
, match
)) : pos
;
527 if (a
<= pos
&& b
<= pos
)
537 size_t text_function_start_prev(Text
*txt
, size_t pos
) {
539 size_t apos
= text_byte_get(txt
, pos
, &c
) && c
== '}' && pos
> 0 ? pos
- 1 : pos
;
540 size_t a
= text_function_end_next(txt
, apos
);
541 size_t b
= text_function_end_prev(txt
, pos
);
543 size_t match
= text_bracket_match(txt
, a
);
544 a
= match
!= a
? text_line_next(txt
, text_line_empty_prev(txt
, match
)) : pos
;
547 size_t match
= text_bracket_match(txt
, b
);
548 b
= match
!= b
? text_line_next(txt
, text_line_empty_prev(txt
, match
)) : pos
;
550 if (a
>= pos
&& b
>= pos
)
560 static size_t text_function_end_direction(Text
*txt
, size_t pos
, int direction
) {
561 size_t start
= pos
, match
;
562 if (direction
< 0 && pos
> 0)
567 match
= text_find_next(txt
, pos
, "\n}");
569 match
= text_find_prev(txt
, pos
, "\n}");
570 if (text_bytes_get(txt
, match
, sizeof c
, c
) != 3 || c
[0] != '\n' || c
[1] != '}')
572 if (c
[2] == '\r' || c
[2] == '\n')
581 size_t text_function_end_next(Text
*txt
, size_t pos
) {
582 return text_function_end_direction(txt
, pos
, +1);
585 size_t text_function_end_prev(Text
*txt
, size_t pos
) {
586 return text_function_end_direction(txt
, pos
, -1);
589 size_t text_block_start(Text
*txt
, size_t pos
) {
590 Filerange r
= text_object_curly_bracket(txt
, pos
-1);
591 return text_range_valid(&r
) ? r
.start
-1 : pos
;
594 size_t text_block_end(Text
*txt
, size_t pos
) {
595 Filerange r
= text_object_curly_bracket(txt
, pos
+1);
596 return text_range_valid(&r
) ? r
.end
: pos
;
599 size_t text_parenthese_start(Text
*txt
, size_t pos
) {
600 Filerange r
= text_object_paranthese(txt
, pos
-1);
601 return text_range_valid(&r
) ? r
.start
-1 : pos
;
604 size_t text_parenthese_end(Text
*txt
, size_t pos
) {
605 Filerange r
= text_object_paranthese(txt
, pos
+1);
606 return text_range_valid(&r
) ? r
.end
: pos
;
609 size_t text_bracket_match(Text
*txt
, size_t pos
) {
610 return text_bracket_match_symbol(txt
, pos
, NULL
);
613 size_t text_bracket_match_symbol(Text
*txt
, size_t pos
, const char *symbols
) {
614 int direction
, count
= 1;
615 char search
, current
, c
;
616 bool instring
= false;
617 Iterator it
= text_iterator_get(txt
, pos
);
618 if (!text_iterator_byte_get(&it
, ¤t
))
620 if (symbols
&& !memchr(symbols
, current
, strlen(symbols
)))
623 case '(': search
= ')'; direction
= 1; break;
624 case ')': search
= '('; direction
= -1; break;
625 case '{': search
= '}'; direction
= 1; break;
626 case '}': search
= '{'; direction
= -1; break;
627 case '[': search
= ']'; direction
= 1; break;
628 case ']': search
= '['; direction
= -1; break;
629 case '<': search
= '>'; direction
= 1; break;
630 case '>': search
= '<'; direction
= -1; break;
634 char special
[] = " \n)}]>.,:;";
637 if (text_iterator_byte_next(&it
, &c
)) {
638 /* if a single or double quote is followed by
639 * a special character, search backwards */
640 if (memchr(special
, c
, sizeof(special
)))
642 text_iterator_byte_prev(&it
, NULL
);
649 if (direction
>= 0) { /* forward search */
650 while (text_iterator_byte_next(&it
, &c
)) {
651 if (c
!= current
&& c
== '"')
652 instring
= !instring
;
654 if (c
== search
&& --count
== 0)
656 else if (c
== current
)
660 } else { /* backwards */
661 while (text_iterator_byte_prev(&it
, &c
)) {
662 if (c
!= current
&& c
== '"')
663 instring
= !instring
;
665 if (c
== search
&& --count
== 0)
667 else if (c
== current
)
673 return pos
; /* no match found */
676 size_t text_search_forward(Text
*txt
, size_t pos
, Regex
*regex
) {
677 size_t start
= pos
+ 1;
678 size_t end
= text_size(txt
);
680 bool found
= start
< end
&& !text_search_range_forward(txt
, start
, end
- start
, regex
, 1, match
, 0);
685 found
= !text_search_range_forward(txt
, start
, end
, regex
, 1, match
, 0);
688 return found
? match
[0].start
: pos
;
691 size_t text_search_backward(Text
*txt
, size_t pos
, Regex
*regex
) {
695 bool found
= !text_search_range_backward(txt
, start
, end
, regex
, 1, match
, 0);
699 end
= text_size(txt
);
700 found
= start
< end
&& !text_search_range_backward(txt
, start
, end
- start
, regex
, 1, match
, 0);
703 return found
? match
[0].start
: pos
;