6 #include "text-motions.h"
9 #include "text-objects.h"
11 #define space(c) (isspace((unsigned char)c))
12 #define boundary(c) (isboundary((unsigned char)c))
14 // TODO: specify this per file type?
15 int is_word_boundary(int c
) {
16 return ISASCII(c
) && !(('0' <= c
&& c
<= '9') ||
17 ('a' <= c
&& c
<= 'z') ||
18 ('A' <= c
&& c
<= 'Z') || c
== '_');
21 size_t text_begin(Text
*txt
, size_t pos
) {
25 size_t text_end(Text
*txt
, size_t pos
) {
26 return text_size(txt
);
29 size_t text_char_next(Text
*txt
, size_t pos
) {
30 Iterator it
= text_iterator_get(txt
, pos
);
31 text_iterator_char_next(&it
, NULL
);
35 size_t text_char_prev(Text
*txt
, size_t pos
) {
36 Iterator it
= text_iterator_get(txt
, pos
);
37 text_iterator_char_prev(&it
, NULL
);
41 static size_t find_next(Text
*txt
, size_t pos
, const char *s
, bool line
) {
44 size_t len
= strlen(s
), matched
= 0;
45 Iterator it
= text_iterator_get(txt
, pos
), sit
;
46 for (char c
; matched
< len
&& text_iterator_byte_get(&it
, &c
); ) {
47 if (c
== s
[matched
]) {
51 } else if (matched
> 0) {
55 text_iterator_byte_next(&it
, NULL
);
56 if (line
&& c
== '\n')
59 return matched
== len
? it
.pos
- len
: pos
;
62 size_t text_find_next(Text
*txt
, size_t pos
, const char *s
) {
63 return find_next(txt
, pos
, s
, false);
66 size_t text_line_find_next(Text
*txt
, size_t pos
, const char *s
) {
67 return find_next(txt
, pos
, s
, true);
70 static size_t find_prev(Text
*txt
, size_t pos
, const char *s
, bool line
) {
73 size_t len
= strlen(s
), matched
= len
- 1;
74 Iterator it
= text_iterator_get(txt
, pos
), sit
;
77 for (char c
; text_iterator_byte_prev(&it
, &c
); ) {
78 if (c
== s
[matched
]) {
81 if (matched
== len
- 1)
84 } else if (matched
< len
- 1) {
88 if (line
&& c
== '\n')
94 size_t text_find_prev(Text
*txt
, size_t pos
, const char *s
) {
95 return find_prev(txt
, pos
, s
, false);
98 size_t text_line_find_prev(Text
*txt
, size_t pos
, const char *s
) {
99 return find_prev(txt
, pos
, s
, true);
102 size_t text_line_prev(Text
*txt
, size_t pos
) {
104 Iterator it
= text_iterator_get(txt
, pos
);
105 if (!text_iterator_byte_get(&it
, &c
))
108 text_iterator_byte_prev(&it
, &c
);
110 text_iterator_byte_prev(&it
, &c
);
111 while (text_iterator_byte_get(&it
, &c
) && c
!= '\n')
112 text_iterator_byte_prev(&it
, NULL
);
113 if (text_iterator_byte_prev(&it
, &c
) && c
!= '\r')
114 text_iterator_byte_next(&it
, &c
);
118 size_t text_line_begin(Text
*txt
, size_t pos
) {
120 Iterator it
= text_iterator_get(txt
, pos
);
121 if (!text_iterator_byte_get(&it
, &c
))
124 text_iterator_byte_prev(&it
, &c
);
126 text_iterator_byte_prev(&it
, &c
);
127 while (text_iterator_byte_get(&it
, &c
)) {
132 text_iterator_byte_prev(&it
, NULL
);
137 size_t text_line_start(Text
*txt
, size_t pos
) {
139 Iterator it
= text_iterator_get(txt
, text_line_begin(txt
, pos
));
140 while (text_iterator_byte_get(&it
, &c
) && c
!= '\n' && space(c
))
141 text_iterator_byte_next(&it
, NULL
);
145 size_t text_line_finish(Text
*txt
, size_t pos
) {
147 Iterator it
= text_iterator_get(txt
, text_line_end(txt
, pos
));
148 do text_iterator_char_prev(&it
, NULL
);
149 while (text_iterator_byte_get(&it
, &c
) && c
!= '\n' && space(c
));
153 size_t text_line_lastchar(Text
*txt
, size_t pos
) {
155 Iterator it
= text_iterator_get(txt
, text_line_end(txt
, pos
));
156 if (text_iterator_char_prev(&it
, &c
) && c
== '\n')
157 text_iterator_byte_next(&it
, NULL
);
161 size_t text_line_end(Text
*txt
, size_t pos
) {
163 Iterator it
= text_iterator_get(txt
, pos
);
164 while (text_iterator_byte_get(&it
, &c
) && c
!= '\r' && c
!= '\n')
165 text_iterator_byte_next(&it
, NULL
);
169 size_t text_line_next(Text
*txt
, size_t pos
) {
171 Iterator it
= text_iterator_get(txt
, pos
);
172 while (text_iterator_byte_get(&it
, &c
) && c
!= '\n')
173 text_iterator_byte_next(&it
, NULL
);
174 text_iterator_byte_next(&it
, NULL
);
178 size_t text_line_offset(Text
*txt
, size_t pos
, size_t off
) {
180 size_t bol
= text_line_begin(txt
, pos
);
181 Iterator it
= text_iterator_get(txt
, bol
);
182 while (off
-- > 0 && text_iterator_byte_get(&it
, &c
) && c
!= '\r' && c
!= '\n')
183 text_iterator_byte_next(&it
, NULL
);
187 size_t text_line_char_set(Text
*txt
, size_t pos
, int count
) {
189 size_t bol
= text_line_begin(txt
, pos
);
190 Iterator it
= text_iterator_get(txt
, bol
);
191 while (count
-- > 0 && text_iterator_byte_get(&it
, &c
) && c
!= '\r' && c
!= '\n')
192 text_iterator_char_next(&it
, NULL
);
196 int text_line_char_get(Text
*txt
, size_t pos
) {
199 size_t bol
= text_line_begin(txt
, pos
);
200 Iterator it
= text_iterator_get(txt
, bol
);
201 while (text_iterator_byte_get(&it
, &c
) && it
.pos
< pos
&& c
!= '\r' && c
!= '\n') {
202 text_iterator_char_next(&it
, NULL
);
208 int text_line_width_get(Text
*txt
, size_t pos
) {
210 mbstate_t ps
= { 0 };
211 size_t bol
= text_line_begin(txt
, pos
);
212 Iterator it
= text_iterator_get(txt
, bol
);
214 while (it
.pos
< pos
) {
215 char buf
[MB_CUR_MAX
];
216 size_t len
= text_bytes_get(txt
, it
.pos
, sizeof buf
, buf
);
217 if (len
== 0 || buf
[0] == '\r' || buf
[0] == '\n')
220 size_t wclen
= mbrtowc(&wc
, buf
, len
, &ps
);
221 if (wclen
== (size_t)-1 && errno
== EILSEQ
) {
222 /* assume a replacement symbol will be displayed */
224 } else if (wclen
== (size_t)-2) {
225 /* do nothing, advance to next character */
226 } else if (wclen
== 0) {
227 /* assume NUL byte will be displayed as ^@ */
229 } else if (buf
[0] == '\t') {
234 w
= 2; /* assume non-printable will be displayed as ^{char} */
238 if (!text_iterator_codepoint_next(&it
, NULL
))
245 size_t text_line_width_set(Text
*txt
, size_t pos
, int width
) {
247 mbstate_t ps
= { 0 };
248 size_t bol
= text_line_begin(txt
, pos
);
249 Iterator it
= text_iterator_get(txt
, bol
);
252 char buf
[MB_CUR_MAX
];
253 size_t len
= text_bytes_get(txt
, it
.pos
, sizeof buf
, buf
);
254 if (len
== 0 || buf
[0] == '\r' || buf
[0] == '\n')
257 size_t wclen
= mbrtowc(&wc
, buf
, len
, &ps
);
258 if (wclen
== (size_t)-1 && errno
== EILSEQ
) {
259 /* assume a replacement symbol will be displayed */
261 } else if (wclen
== (size_t)-2) {
262 /* do nothing, advance to next character */
263 } else if (wclen
== 0) {
264 /* assume NUL byte will be displayed as ^@ */
266 } else if (buf
[0] == '\t') {
271 w
= 2; /* assume non-printable will be displayed as ^{char} */
275 if (cur_width
>= width
|| !text_iterator_codepoint_next(&it
, NULL
))
282 size_t text_line_char_next(Text
*txt
, size_t pos
) {
284 Iterator it
= text_iterator_get(txt
, pos
);
285 if (!text_iterator_byte_get(&it
, &c
) || c
== '\r' || c
== '\n')
287 if (!text_iterator_char_next(&it
, &c
) || c
== '\r' || c
== '\n')
292 size_t text_line_char_prev(Text
*txt
, size_t pos
) {
294 Iterator it
= text_iterator_get(txt
, pos
);
295 if (!text_iterator_char_prev(&it
, &c
) || c
== '\n')
300 size_t text_line_up(Text
*txt
, size_t pos
) {
301 int width
= text_line_width_get(txt
, pos
);
302 size_t prev
= text_line_prev(txt
, pos
);
303 return text_line_width_set(txt
, prev
, width
);
306 size_t text_line_down(Text
*txt
, size_t pos
) {
307 int width
= text_line_width_get(txt
, pos
);
308 size_t next
= text_line_next(txt
, pos
);
309 return text_line_width_set(txt
, next
, width
);
312 size_t text_range_line_first(Text
*txt
, Filerange
*r
) {
313 if (!text_range_valid(r
))
318 size_t text_range_line_last(Text
*txt
, Filerange
*r
) {
319 if (!text_range_valid(r
))
321 size_t pos
= text_line_begin(txt
, r
->end
);
323 /* range ends at a begin of a line, skip last line ending */
324 pos
= text_line_prev(txt
, pos
);
325 pos
= text_line_begin(txt
, pos
);
327 return r
->start
<= pos
? pos
: r
->start
;
330 size_t text_range_line_next(Text
*txt
, Filerange
*r
, size_t pos
) {
331 if (!text_range_contains(r
, pos
))
333 size_t newpos
= text_line_next(txt
, pos
);
334 return newpos
!= pos
&& newpos
< r
->end
? newpos
: EPOS
;
337 size_t text_range_line_prev(Text
*txt
, Filerange
*r
, size_t pos
) {
338 if (!text_range_contains(r
, pos
))
340 size_t newpos
= text_line_begin(txt
, text_line_prev(txt
, pos
));
341 return newpos
!= pos
&& r
->start
<= newpos
? newpos
: EPOS
;
344 size_t text_customword_start_next(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
346 Iterator it
= text_iterator_get(txt
, pos
);
347 if (!text_iterator_byte_get(&it
, &c
))
350 while (boundary(c
) && !space(c
) && text_iterator_char_next(&it
, &c
));
352 while (!boundary(c
) && text_iterator_char_next(&it
, &c
));
353 while (space(c
) && text_iterator_char_next(&it
, &c
));
357 size_t text_customword_start_prev(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
359 Iterator it
= text_iterator_get(txt
, pos
);
360 while (text_iterator_char_prev(&it
, &c
) && space(c
));
362 do pos
= it
.pos
; while (text_iterator_char_prev(&it
, &c
) && boundary(c
) && !space(c
));
364 do pos
= it
.pos
; while (text_iterator_char_prev(&it
, &c
) && !boundary(c
));
368 size_t text_customword_end_next(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
370 Iterator it
= text_iterator_get(txt
, pos
);
371 while (text_iterator_char_next(&it
, &c
) && space(c
));
373 do pos
= it
.pos
; while (text_iterator_char_next(&it
, &c
) && boundary(c
) && !space(c
));
375 do pos
= it
.pos
; while (text_iterator_char_next(&it
, &c
) && !isboundary(c
));
379 size_t text_customword_end_prev(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
381 Iterator it
= text_iterator_get(txt
, pos
);
382 if (!text_iterator_byte_get(&it
, &c
))
385 while (boundary(c
) && !space(c
) && text_iterator_char_prev(&it
, &c
));
387 while (!boundary(c
) && text_iterator_char_prev(&it
, &c
));
388 while (space(c
) && text_iterator_char_prev(&it
, &c
));
392 size_t text_longword_end_next(Text
*txt
, size_t pos
) {
393 return text_customword_end_next(txt
, pos
, isspace
);
396 size_t text_longword_end_prev(Text
*txt
, size_t pos
) {
397 return text_customword_end_prev(txt
, pos
, isspace
);
400 size_t text_longword_start_next(Text
*txt
, size_t pos
) {
401 return text_customword_start_next(txt
, pos
, isspace
);
404 size_t text_longword_start_prev(Text
*txt
, size_t pos
) {
405 return text_customword_start_prev(txt
, pos
, isspace
);
408 size_t text_word_end_next(Text
*txt
, size_t pos
) {
409 return text_customword_end_next(txt
, pos
, is_word_boundary
);
412 size_t text_word_end_prev(Text
*txt
, size_t pos
) {
413 return text_customword_end_prev(txt
, pos
, is_word_boundary
);
416 size_t text_word_start_next(Text
*txt
, size_t pos
) {
417 return text_customword_start_next(txt
, pos
, is_word_boundary
);
420 size_t text_word_start_prev(Text
*txt
, size_t pos
) {
421 return text_customword_start_prev(txt
, pos
, is_word_boundary
);
424 size_t text_sentence_next(Text
*txt
, size_t pos
) {
426 Iterator it
= text_iterator_get(txt
, pos
), rev
= text_iterator_get(txt
, pos
);
428 if (!text_iterator_byte_get(&it
, &c
))
431 while (text_iterator_byte_get(&rev
, &prev
) && space(prev
))
432 text_iterator_byte_prev(&rev
, NULL
);
433 prev
= rev
.pos
== 0 ? '.' : prev
; /* simulate punctuation at BOF */
436 if ((prev
== '.' || prev
== '?' || prev
== '!') && space(c
)) {
437 do text_iterator_byte_next(&it
, NULL
);
438 while (text_iterator_byte_get(&it
, &c
) && space(c
));
442 } while (text_iterator_byte_next(&it
, &c
));
446 size_t text_sentence_prev(Text
*txt
, size_t pos
) {
448 bool content
= false;
449 Iterator it
= text_iterator_get(txt
, pos
);
451 while (it
.pos
!= 0 && text_iterator_byte_prev(&it
, &c
)) {
452 if (content
&& space(prev
) && (c
== '.' || c
== '?' || c
== '!')) {
453 do text_iterator_byte_next(&it
, NULL
);
454 while (text_iterator_byte_get(&it
, &c
) && space(c
));
457 content
|= !space(c
);
459 } /* The loop only ends on hitting BOF or error */
460 if (content
) /* starting pos was after first sentence in file => find that sentences start */
461 while (text_iterator_byte_get(&it
, &c
) && space(c
))
462 text_iterator_byte_next(&it
, NULL
);
466 size_t text_paragraph_next(Text
*txt
, size_t pos
) {
468 Iterator it
= text_iterator_get(txt
, pos
);
470 while (text_iterator_byte_get(&it
, &c
) && (c
== '\n' || c
== '\r'))
471 text_iterator_byte_next(&it
, NULL
);
472 return text_line_empty_next(txt
, it
.pos
);
475 size_t text_paragraph_prev(Text
*txt
, size_t pos
) {
477 Iterator it
= text_iterator_get(txt
, pos
);
479 /* c == \0 catches starting the search at EOF */
480 while (text_iterator_byte_get(&it
, &c
) && (c
== '\n' || c
== '\r' || c
== '\0'))
481 text_iterator_byte_prev(&it
, NULL
);
482 return text_line_empty_prev(txt
, it
.pos
);
485 size_t text_line_empty_next(Text
*txt
, size_t pos
) {
487 Iterator it
= text_iterator_get(txt
, pos
);
488 while (text_iterator_byte_get(&it
, &c
)) {
489 if (c
== '\n' && text_iterator_byte_next(&it
, &c
)) {
490 size_t match
= it
.pos
;
492 text_iterator_byte_next(&it
, &c
);
496 text_iterator_byte_next(&it
, NULL
);
501 size_t text_line_empty_prev(Text
*txt
, size_t pos
) {
503 Iterator it
= text_iterator_get(txt
, pos
);
504 while (text_iterator_byte_prev(&it
, &c
)) {
505 if (c
== '\n' && text_iterator_byte_prev(&it
, &c
)) {
507 text_iterator_byte_prev(&it
, &c
);
515 size_t text_function_start_next(Text
*txt
, size_t pos
) {
516 size_t a
= text_function_end_next(txt
, pos
);
520 Iterator it
= text_iterator_get(txt
, a
);
521 while (text_iterator_byte_next(&it
, &c
) && (c
== '\r' || c
== '\n'));
525 size_t match
= text_bracket_match(txt
, b
);
526 b
= match
!= b
? text_line_next(txt
, text_line_empty_prev(txt
, match
)) : pos
;
528 if (a
<= pos
&& b
<= pos
)
538 size_t text_function_start_prev(Text
*txt
, size_t pos
) {
540 size_t apos
= text_byte_get(txt
, pos
, &c
) && c
== '}' && pos
> 0 ? pos
- 1 : pos
;
541 size_t a
= text_function_end_next(txt
, apos
);
542 size_t b
= text_function_end_prev(txt
, pos
);
544 size_t match
= text_bracket_match(txt
, a
);
545 a
= match
!= a
? text_line_next(txt
, text_line_empty_prev(txt
, match
)) : pos
;
548 size_t match
= text_bracket_match(txt
, b
);
549 b
= match
!= b
? text_line_next(txt
, text_line_empty_prev(txt
, match
)) : pos
;
551 if (a
>= pos
&& b
>= pos
)
561 static size_t text_function_end_direction(Text
*txt
, size_t pos
, int direction
) {
562 size_t start
= pos
, match
;
563 if (direction
< 0 && pos
> 0)
568 match
= text_find_next(txt
, pos
, "\n}");
570 match
= text_find_prev(txt
, pos
, "\n}");
571 if (text_bytes_get(txt
, match
, sizeof c
, c
) != 3 || c
[0] != '\n' || c
[1] != '}')
573 if (c
[2] == '\r' || c
[2] == '\n')
582 size_t text_function_end_next(Text
*txt
, size_t pos
) {
583 return text_function_end_direction(txt
, pos
, +1);
586 size_t text_function_end_prev(Text
*txt
, size_t pos
) {
587 return text_function_end_direction(txt
, pos
, -1);
590 size_t text_block_start(Text
*txt
, size_t pos
) {
591 Filerange r
= text_object_curly_bracket(txt
, pos
-1);
592 return text_range_valid(&r
) ? r
.start
-1 : pos
;
595 size_t text_block_end(Text
*txt
, size_t pos
) {
596 Filerange r
= text_object_curly_bracket(txt
, pos
+1);
597 return text_range_valid(&r
) ? r
.end
: pos
;
600 size_t text_parenthese_start(Text
*txt
, size_t pos
) {
601 Filerange r
= text_object_paranthese(txt
, pos
-1);
602 return text_range_valid(&r
) ? r
.start
-1 : pos
;
605 size_t text_parenthese_end(Text
*txt
, size_t pos
) {
606 Filerange r
= text_object_paranthese(txt
, pos
+1);
607 return text_range_valid(&r
) ? r
.end
: pos
;
610 size_t text_bracket_match(Text
*txt
, size_t pos
) {
611 return text_bracket_match_symbol(txt
, pos
, NULL
);
614 size_t text_bracket_match_symbol(Text
*txt
, size_t pos
, const char *symbols
) {
615 int direction
, count
= 1;
616 char search
, current
, c
;
617 bool instring
= false;
618 Iterator it
= text_iterator_get(txt
, pos
);
619 if (!text_iterator_byte_get(&it
, ¤t
))
621 if (symbols
&& !memchr(symbols
, current
, strlen(symbols
)))
624 case '(': search
= ')'; direction
= 1; break;
625 case ')': search
= '('; direction
= -1; break;
626 case '{': search
= '}'; direction
= 1; break;
627 case '}': search
= '{'; direction
= -1; break;
628 case '[': search
= ']'; direction
= 1; break;
629 case ']': search
= '['; direction
= -1; break;
630 case '<': search
= '>'; direction
= 1; break;
631 case '>': search
= '<'; direction
= -1; break;
635 char special
[] = " \n)}]>.,:;";
638 if (text_iterator_byte_next(&it
, &c
)) {
639 /* if a single or double quote is followed by
640 * a special character, search backwards */
641 if (memchr(special
, c
, sizeof(special
)))
643 text_iterator_byte_prev(&it
, NULL
);
650 if (direction
>= 0) { /* forward search */
651 while (text_iterator_byte_next(&it
, &c
)) {
652 if (c
!= current
&& c
== '"')
653 instring
= !instring
;
655 if (c
== search
&& --count
== 0)
657 else if (c
== current
)
661 } else { /* backwards */
662 while (text_iterator_byte_prev(&it
, &c
)) {
663 if (c
!= current
&& c
== '"')
664 instring
= !instring
;
666 if (c
== search
&& --count
== 0)
668 else if (c
== current
)
674 return pos
; /* no match found */
677 size_t text_search_forward(Text
*txt
, size_t pos
, Regex
*regex
) {
678 size_t start
= pos
+ 1;
679 size_t end
= text_size(txt
);
681 bool found
= start
< end
&& !text_search_range_forward(txt
, start
, end
- start
, regex
, 1, match
, 0);
686 found
= !text_search_range_forward(txt
, start
, end
, regex
, 1, match
, 0);
689 return found
? match
[0].start
: pos
;
692 size_t text_search_backward(Text
*txt
, size_t pos
, Regex
*regex
) {
696 bool found
= !text_search_range_backward(txt
, start
, end
, regex
, 1, match
, 0);
700 end
= text_size(txt
);
701 found
= start
< end
&& !text_search_range_backward(txt
, start
, end
- start
, regex
, 1, match
, 0);
704 return found
? match
[0].start
: pos
;