7 #include "text-motions.h"
10 #include "text-objects.h"
12 #define blank(c) ((c) == ' ' || (c) == '\t')
13 #define space(c) (isspace((unsigned char)c))
14 #define boundary(c) (isboundary((unsigned char)c))
16 // TODO: specify this per file type?
17 int is_word_boundary(int c
) {
18 return ISASCII(c
) && !(('0' <= c
&& c
<= '9') ||
19 ('a' <= c
&& c
<= 'z') ||
20 ('A' <= c
&& c
<= 'Z') || c
== '_');
23 size_t text_begin(Text
*txt
, size_t pos
) {
27 size_t text_end(Text
*txt
, size_t pos
) {
28 return text_size(txt
);
31 size_t text_char_next(Text
*txt
, size_t pos
) {
32 Iterator it
= text_iterator_get(txt
, pos
);
33 text_iterator_char_next(&it
, NULL
);
37 size_t text_char_prev(Text
*txt
, size_t pos
) {
38 Iterator it
= text_iterator_get(txt
, pos
);
39 text_iterator_char_prev(&it
, NULL
);
43 size_t text_codepoint_next(Text
*txt
, size_t pos
) {
44 Iterator it
= text_iterator_get(txt
, pos
);
45 text_iterator_codepoint_next(&it
, NULL
);
49 size_t text_codepoint_prev(Text
*txt
, size_t pos
) {
50 Iterator it
= text_iterator_get(txt
, pos
);
51 text_iterator_codepoint_prev(&it
, NULL
);
55 static size_t find_next(Text
*txt
, size_t pos
, const char *s
, bool line
) {
58 size_t len
= strlen(s
), matched
= 0;
59 Iterator it
= text_iterator_get(txt
, pos
), sit
;
60 for (char c
; matched
< len
&& text_iterator_byte_get(&it
, &c
); ) {
61 if (c
== s
[matched
]) {
65 } else if (matched
> 0) {
69 text_iterator_byte_next(&it
, NULL
);
70 if (line
&& c
== '\n')
73 return matched
== len
? it
.pos
- len
: pos
;
76 size_t text_find_next(Text
*txt
, size_t pos
, const char *s
) {
77 return find_next(txt
, pos
, s
, false);
80 size_t text_line_find_next(Text
*txt
, size_t pos
, const char *s
) {
81 return find_next(txt
, pos
, s
, true);
84 static size_t find_prev(Text
*txt
, size_t pos
, const char *s
, bool line
) {
87 size_t len
= strlen(s
), matched
= len
- 1;
88 Iterator it
= text_iterator_get(txt
, pos
), sit
;
91 for (char c
; text_iterator_byte_prev(&it
, &c
); ) {
92 if (c
== s
[matched
]) {
95 if (matched
== len
- 1)
98 } else if (matched
< len
- 1) {
102 if (line
&& c
== '\n')
108 size_t text_find_prev(Text
*txt
, size_t pos
, const char *s
) {
109 return find_prev(txt
, pos
, s
, false);
112 size_t text_line_find_prev(Text
*txt
, size_t pos
, const char *s
) {
113 return find_prev(txt
, pos
, s
, true);
116 size_t text_line_prev(Text
*txt
, size_t pos
) {
117 Iterator it
= text_iterator_get(txt
, pos
);
118 text_iterator_byte_find_prev(&it
, '\n');
122 size_t text_line_begin(Text
*txt
, size_t pos
) {
123 Iterator it
= text_iterator_get(txt
, pos
);
124 return text_iterator_byte_find_prev(&it
, '\n') ? it
.pos
+1 : it
.pos
;
127 size_t text_line_start(Text
*txt
, size_t pos
) {
129 Iterator it
= text_iterator_get(txt
, text_line_begin(txt
, pos
));
130 while (text_iterator_byte_get(&it
, &c
) && blank(c
))
131 text_iterator_byte_next(&it
, NULL
);
135 size_t text_line_finish(Text
*txt
, size_t pos
) {
137 size_t end
= text_line_end(txt
, pos
);
138 Iterator it
= text_iterator_get(txt
, end
);
139 if (!text_iterator_byte_prev(&it
, &c
) || c
== '\n')
141 while (blank(c
) && text_iterator_byte_prev(&it
, &c
));
142 return it
.pos
+ (c
== '\n');
145 size_t text_line_end(Text
*txt
, size_t pos
) {
146 Iterator it
= text_iterator_get(txt
, pos
);
147 text_iterator_byte_find_next(&it
, '\n');
151 size_t text_line_next(Text
*txt
, size_t pos
) {
152 Iterator it
= text_iterator_get(txt
, pos
);
153 if (text_iterator_byte_find_next(&it
, '\n'))
154 text_iterator_byte_next(&it
, NULL
);
158 size_t text_line_offset(Text
*txt
, size_t pos
, size_t off
) {
160 size_t bol
= text_line_begin(txt
, pos
);
161 Iterator it
= text_iterator_get(txt
, bol
);
162 while (off
-- > 0 && text_iterator_byte_get(&it
, &c
) && c
!= '\n')
163 text_iterator_byte_next(&it
, NULL
);
167 size_t text_line_char_set(Text
*txt
, size_t pos
, int count
) {
169 size_t bol
= text_line_begin(txt
, pos
);
170 Iterator it
= text_iterator_get(txt
, bol
);
171 if (text_iterator_byte_get(&it
, &c
) && c
!= '\n')
172 while (count
-- > 0 && text_iterator_char_next(&it
, &c
) && c
!= '\n');
176 int text_line_char_get(Text
*txt
, size_t pos
) {
179 size_t bol
= text_line_begin(txt
, pos
);
180 Iterator it
= text_iterator_get(txt
, bol
);
181 if (text_iterator_byte_get(&it
, &c
) && c
!= '\n') {
182 while (it
.pos
< pos
&& c
!= '\n' && text_iterator_char_next(&it
, &c
))
188 int text_line_width_get(Text
*txt
, size_t pos
) {
190 mbstate_t ps
= { 0 };
191 size_t bol
= text_line_begin(txt
, pos
);
192 Iterator it
= text_iterator_get(txt
, bol
);
194 while (it
.pos
< pos
) {
195 char buf
[MB_LEN_MAX
];
196 size_t len
= text_bytes_get(txt
, it
.pos
, sizeof buf
, buf
);
197 if (len
== 0 || buf
[0] == '\n')
200 size_t wclen
= mbrtowc(&wc
, buf
, len
, &ps
);
201 if (wclen
== (size_t)-1 && errno
== EILSEQ
) {
202 /* assume a replacement symbol will be displayed */
204 } else if (wclen
== (size_t)-2) {
205 /* do nothing, advance to next character */
206 } else if (wclen
== 0) {
207 /* assume NUL byte will be displayed as ^@ */
209 } else if (buf
[0] == '\t') {
214 w
= 2; /* assume non-printable will be displayed as ^{char} */
218 if (!text_iterator_codepoint_next(&it
, NULL
))
225 size_t text_line_width_set(Text
*txt
, size_t pos
, int width
) {
227 mbstate_t ps
= { 0 };
228 size_t bol
= text_line_begin(txt
, pos
);
229 Iterator it
= text_iterator_get(txt
, bol
);
232 char buf
[MB_LEN_MAX
];
233 size_t len
= text_bytes_get(txt
, it
.pos
, sizeof buf
, buf
);
234 if (len
== 0 || buf
[0] == '\n')
237 size_t wclen
= mbrtowc(&wc
, buf
, len
, &ps
);
238 if (wclen
== (size_t)-1 && errno
== EILSEQ
) {
239 /* assume a replacement symbol will be displayed */
241 } else if (wclen
== (size_t)-2) {
242 /* do nothing, advance to next character */
243 } else if (wclen
== 0) {
244 /* assume NUL byte will be displayed as ^@ */
246 } else if (buf
[0] == '\t') {
251 w
= 2; /* assume non-printable will be displayed as ^{char} */
255 if (cur_width
>= width
|| !text_iterator_codepoint_next(&it
, NULL
))
262 size_t text_line_char_next(Text
*txt
, size_t pos
) {
264 Iterator it
= text_iterator_get(txt
, pos
);
265 if (!text_iterator_byte_get(&it
, &c
) || c
== '\n')
267 text_iterator_char_next(&it
, NULL
);
271 size_t text_line_char_prev(Text
*txt
, size_t pos
) {
273 Iterator it
= text_iterator_get(txt
, pos
);
274 if (!text_iterator_char_prev(&it
, &c
) || c
== '\n')
279 size_t text_line_up(Text
*txt
, size_t pos
) {
280 int width
= text_line_width_get(txt
, pos
);
281 size_t prev
= text_line_prev(txt
, pos
);
282 return text_line_width_set(txt
, prev
, width
);
285 size_t text_line_down(Text
*txt
, size_t pos
) {
286 int width
= text_line_width_get(txt
, pos
);
287 size_t next
= text_line_next(txt
, pos
);
288 return text_line_width_set(txt
, next
, width
);
291 size_t text_range_line_first(Text
*txt
, Filerange
*r
) {
292 if (!text_range_valid(r
))
297 size_t text_range_line_last(Text
*txt
, Filerange
*r
) {
298 if (!text_range_valid(r
))
300 size_t pos
= text_line_begin(txt
, r
->end
);
302 /* range ends at a begin of a line, skip last line ending */
303 pos
= text_line_prev(txt
, pos
);
304 pos
= text_line_begin(txt
, pos
);
306 return r
->start
<= pos
? pos
: r
->start
;
309 size_t text_range_line_next(Text
*txt
, Filerange
*r
, size_t pos
) {
310 if (!text_range_contains(r
, pos
))
312 size_t newpos
= text_line_next(txt
, pos
);
313 return newpos
!= pos
&& newpos
< r
->end
? newpos
: EPOS
;
316 size_t text_range_line_prev(Text
*txt
, Filerange
*r
, size_t pos
) {
317 if (!text_range_contains(r
, pos
))
319 size_t newpos
= text_line_begin(txt
, text_line_prev(txt
, pos
));
320 return newpos
!= pos
&& r
->start
<= newpos
? newpos
: EPOS
;
323 size_t text_customword_start_next(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
325 Iterator it
= text_iterator_get(txt
, pos
);
326 if (!text_iterator_byte_get(&it
, &c
))
329 while (boundary(c
) && !space(c
) && text_iterator_char_next(&it
, &c
));
331 while (!boundary(c
) && text_iterator_char_next(&it
, &c
));
332 while (space(c
) && text_iterator_char_next(&it
, &c
));
336 size_t text_customword_start_prev(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
338 Iterator it
= text_iterator_get(txt
, pos
);
339 while (text_iterator_char_prev(&it
, &c
) && space(c
));
341 do pos
= it
.pos
; while (text_iterator_char_prev(&it
, &c
) && boundary(c
) && !space(c
));
343 do pos
= it
.pos
; while (text_iterator_char_prev(&it
, &c
) && !boundary(c
));
347 size_t text_customword_end_next(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
349 Iterator it
= text_iterator_get(txt
, pos
);
350 while (text_iterator_char_next(&it
, &c
) && space(c
));
352 do pos
= it
.pos
; while (text_iterator_char_next(&it
, &c
) && boundary(c
) && !space(c
));
354 do pos
= it
.pos
; while (text_iterator_char_next(&it
, &c
) && !isboundary(c
));
358 size_t text_customword_end_prev(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
360 Iterator it
= text_iterator_get(txt
, pos
);
361 if (!text_iterator_byte_get(&it
, &c
))
364 while (boundary(c
) && !space(c
) && text_iterator_char_prev(&it
, &c
));
366 while (!boundary(c
) && text_iterator_char_prev(&it
, &c
));
367 while (space(c
) && text_iterator_char_prev(&it
, &c
));
371 size_t text_longword_end_next(Text
*txt
, size_t pos
) {
372 return text_customword_end_next(txt
, pos
, isspace
);
375 size_t text_longword_end_prev(Text
*txt
, size_t pos
) {
376 return text_customword_end_prev(txt
, pos
, isspace
);
379 size_t text_longword_start_next(Text
*txt
, size_t pos
) {
380 return text_customword_start_next(txt
, pos
, isspace
);
383 size_t text_longword_start_prev(Text
*txt
, size_t pos
) {
384 return text_customword_start_prev(txt
, pos
, isspace
);
387 size_t text_word_end_next(Text
*txt
, size_t pos
) {
388 return text_customword_end_next(txt
, pos
, is_word_boundary
);
391 size_t text_word_end_prev(Text
*txt
, size_t pos
) {
392 return text_customword_end_prev(txt
, pos
, is_word_boundary
);
395 size_t text_word_start_next(Text
*txt
, size_t pos
) {
396 return text_customword_start_next(txt
, pos
, is_word_boundary
);
399 size_t text_word_start_prev(Text
*txt
, size_t pos
) {
400 return text_customword_start_prev(txt
, pos
, is_word_boundary
);
403 size_t text_sentence_next(Text
*txt
, size_t pos
) {
405 Iterator it
= text_iterator_get(txt
, pos
), rev
= it
;
407 if (!text_iterator_byte_get(&it
, &c
))
410 while (text_iterator_byte_get(&rev
, &prev
) && space(prev
))
411 text_iterator_byte_prev(&rev
, NULL
);
412 prev
= rev
.pos
== 0 ? '.' : prev
; /* simulate punctuation at BOF */
415 if ((prev
== '.' || prev
== '?' || prev
== '!') && space(c
)) {
416 do text_iterator_byte_next(&it
, NULL
);
417 while (text_iterator_byte_get(&it
, &c
) && space(c
));
421 } while (text_iterator_byte_next(&it
, &c
));
425 size_t text_sentence_prev(Text
*txt
, size_t pos
) {
427 bool content
= false;
428 Iterator it
= text_iterator_get(txt
, pos
);
430 while (it
.pos
!= 0 && text_iterator_byte_prev(&it
, &c
)) {
431 if (content
&& space(prev
) && (c
== '.' || c
== '?' || c
== '!')) {
432 do text_iterator_byte_next(&it
, NULL
);
433 while (text_iterator_byte_get(&it
, &c
) && space(c
));
436 content
|= !space(c
);
438 } /* The loop only ends on hitting BOF or error */
439 if (content
) /* starting pos was after first sentence in file => find that sentences start */
440 while (text_iterator_byte_get(&it
, &c
) && space(c
))
441 text_iterator_byte_next(&it
, NULL
);
445 size_t text_paragraph_next(Text
*txt
, size_t pos
) {
447 Iterator it
= text_iterator_get(txt
, pos
);
449 while (text_iterator_byte_get(&it
, &c
) && c
== '\n')
450 text_iterator_char_next(&it
, NULL
);
451 return text_line_empty_next(txt
, it
.pos
);
454 size_t text_paragraph_prev(Text
*txt
, size_t pos
) {
456 Iterator it
= text_iterator_get(txt
, pos
);
458 /* c == \0 catches starting the search at EOF */
459 while (text_iterator_byte_get(&it
, &c
) && (c
== '\n' || c
== '\0'))
460 text_iterator_byte_prev(&it
, NULL
);
461 return text_line_empty_prev(txt
, it
.pos
);
464 size_t text_line_empty_next(Text
*txt
, size_t pos
) {
466 Iterator it
= text_iterator_get(txt
, pos
);
467 while (text_iterator_byte_find_next(&it
, '\n')) {
468 if (text_iterator_byte_next(&it
, &c
) && c
== '\n')
474 size_t text_line_empty_prev(Text
*txt
, size_t pos
) {
476 Iterator it
= text_iterator_get(txt
, pos
);
477 while (text_iterator_byte_find_prev(&it
, '\n')) {
478 if (text_iterator_byte_prev(&it
, &c
) && c
== '\n')
484 size_t text_block_start(Text
*txt
, size_t pos
) {
485 Filerange r
= text_object_curly_bracket(txt
, pos
-1);
486 return text_range_valid(&r
) ? r
.start
-1 : pos
;
489 size_t text_block_end(Text
*txt
, size_t pos
) {
490 Filerange r
= text_object_curly_bracket(txt
, pos
+1);
491 return text_range_valid(&r
) ? r
.end
: pos
;
494 size_t text_parenthese_start(Text
*txt
, size_t pos
) {
495 Filerange r
= text_object_paranthese(txt
, pos
-1);
496 return text_range_valid(&r
) ? r
.start
-1 : pos
;
499 size_t text_parenthese_end(Text
*txt
, size_t pos
) {
500 Filerange r
= text_object_paranthese(txt
, pos
+1);
501 return text_range_valid(&r
) ? r
.end
: pos
;
504 size_t text_bracket_match(Text
*txt
, size_t pos
) {
505 return text_bracket_match_symbol(txt
, pos
, NULL
);
508 static size_t match_symbol(Text
*txt
, size_t pos
, char search
, int direction
) {
511 bool instring
= false;
512 Iterator it
= text_iterator_get(txt
, pos
);
513 if (!text_iterator_byte_get(&it
, ¤t
))
515 if (direction
>= 0) { /* forward search */
516 while (text_iterator_byte_next(&it
, &c
)) {
517 if (c
!= current
&& c
== '"')
518 instring
= !instring
;
520 if (c
== search
&& --count
== 0)
522 else if (c
== current
)
526 } else { /* backwards */
527 while (text_iterator_byte_prev(&it
, &c
)) {
528 if (c
!= current
&& c
== '"')
529 instring
= !instring
;
531 if (c
== search
&& --count
== 0)
533 else if (c
== current
)
539 return pos
; /* no match found */
542 size_t text_bracket_match_symbol(Text
*txt
, size_t pos
, const char *symbols
) {
544 char search
, current
, c
;
545 Iterator it
= text_iterator_get(txt
, pos
);
546 if (!text_iterator_byte_get(&it
, ¤t
))
548 if (symbols
&& !memchr(symbols
, current
, strlen(symbols
)))
551 case '(': search
= ')'; direction
= 1; break;
552 case ')': search
= '('; direction
= -1; break;
553 case '{': search
= '}'; direction
= 1; break;
554 case '}': search
= '{'; direction
= -1; break;
555 case '[': search
= ']'; direction
= 1; break;
556 case ']': search
= '['; direction
= -1; break;
557 case '<': search
= '>'; direction
= 1; break;
558 case '>': search
= '<'; direction
= -1; break;
563 /* prefer matches on the same line */
564 size_t fw
= match_symbol(txt
, pos
, current
, +1);
565 size_t bw
= match_symbol(txt
, pos
, current
, -1);
570 size_t line
= text_lineno_by_pos(txt
, pos
);
571 size_t line_fw
= text_lineno_by_pos(txt
, fw
);
572 size_t line_bw
= text_lineno_by_pos(txt
, bw
);
578 if (text_iterator_byte_next(&it
, &c
)) {
579 /* if a single or double quote is followed by
580 * a special character, search backwards */
581 char special
[] = " \n)}]>.,:;";
582 if (memchr(special
, c
, sizeof(special
)))
585 return direction
>= 0 ? fw
: bw
;
591 return match_symbol(txt
, pos
, search
, direction
);
594 size_t text_search_forward(Text
*txt
, size_t pos
, Regex
*regex
) {
595 size_t start
= pos
+ 1;
596 size_t end
= text_size(txt
);
598 bool found
= start
< end
&& !text_search_range_forward(txt
, start
, end
- start
, regex
, 1, match
, 0);
603 found
= !text_search_range_forward(txt
, start
, end
, regex
, 1, match
, 0);
606 return found
? match
[0].start
: pos
;
609 size_t text_search_backward(Text
*txt
, size_t pos
, Regex
*regex
) {
613 bool found
= !text_search_range_backward(txt
, start
, end
, regex
, 1, match
, 0);
617 end
= text_size(txt
);
618 found
= start
< end
&& !text_search_range_backward(txt
, start
, end
- start
, regex
, 1, match
, 0);
621 return found
? match
[0].start
: pos
;