7 #include "text-motions.h"
10 #include "text-objects.h"
12 #define blank(c) ((c) == ' ' || (c) == '\t')
13 #define space(c) (isspace((unsigned char)c))
14 #define boundary(c) (isboundary((unsigned char)c))
16 // TODO: specify this per file type?
17 int is_word_boundary(int c
) {
18 return ISASCII(c
) && !(('0' <= c
&& c
<= '9') ||
19 ('a' <= c
&& c
<= 'z') ||
20 ('A' <= c
&& c
<= 'Z') || c
== '_');
23 size_t text_begin(Text
*txt
, size_t pos
) {
27 size_t text_end(Text
*txt
, size_t pos
) {
28 return text_size(txt
);
31 size_t text_char_next(Text
*txt
, size_t pos
) {
32 Iterator it
= text_iterator_get(txt
, pos
);
33 text_iterator_char_next(&it
, NULL
);
37 size_t text_char_prev(Text
*txt
, size_t pos
) {
38 Iterator it
= text_iterator_get(txt
, pos
);
39 text_iterator_char_prev(&it
, NULL
);
43 size_t text_codepoint_next(Text
*txt
, size_t pos
) {
44 Iterator it
= text_iterator_get(txt
, pos
);
45 text_iterator_codepoint_next(&it
, NULL
);
49 size_t text_codepoint_prev(Text
*txt
, size_t pos
) {
50 Iterator it
= text_iterator_get(txt
, pos
);
51 text_iterator_codepoint_prev(&it
, NULL
);
55 static size_t find_next(Text
*txt
, size_t pos
, const char *s
, bool line
) {
58 size_t len
= strlen(s
), matched
= 0;
59 Iterator it
= text_iterator_get(txt
, pos
), sit
;
60 for (char c
; matched
< len
&& text_iterator_byte_get(&it
, &c
); ) {
61 if (c
== s
[matched
]) {
65 } else if (matched
> 0) {
69 text_iterator_byte_next(&it
, NULL
);
70 if (line
&& c
== '\n')
73 return matched
== len
? it
.pos
- len
: pos
;
76 size_t text_find_next(Text
*txt
, size_t pos
, const char *s
) {
77 return find_next(txt
, pos
, s
, false);
80 size_t text_line_find_next(Text
*txt
, size_t pos
, const char *s
) {
81 return find_next(txt
, pos
, s
, true);
84 static size_t find_prev(Text
*txt
, size_t pos
, const char *s
, bool line
) {
87 size_t len
= strlen(s
), matched
= len
- 1;
88 Iterator it
= text_iterator_get(txt
, pos
), sit
;
91 for (char c
; text_iterator_byte_prev(&it
, &c
); ) {
92 if (c
== s
[matched
]) {
95 if (matched
== len
- 1)
98 } else if (matched
< len
- 1) {
102 if (line
&& c
== '\n')
108 size_t text_find_prev(Text
*txt
, size_t pos
, const char *s
) {
109 return find_prev(txt
, pos
, s
, false);
112 size_t text_line_find_prev(Text
*txt
, size_t pos
, const char *s
) {
113 return find_prev(txt
, pos
, s
, true);
116 size_t text_line_prev(Text
*txt
, size_t pos
) {
117 Iterator it
= text_iterator_get(txt
, pos
);
118 text_iterator_byte_find_prev(&it
, '\n');
122 size_t text_line_begin(Text
*txt
, size_t pos
) {
123 Iterator it
= text_iterator_get(txt
, pos
);
124 return text_iterator_byte_find_prev(&it
, '\n') ? it
.pos
+1 : it
.pos
;
127 size_t text_line_start(Text
*txt
, size_t pos
) {
129 Iterator it
= text_iterator_get(txt
, text_line_begin(txt
, pos
));
130 while (text_iterator_byte_get(&it
, &c
) && blank(c
))
131 text_iterator_byte_next(&it
, NULL
);
135 size_t text_line_finish(Text
*txt
, size_t pos
) {
137 size_t end
= text_line_end(txt
, pos
);
138 Iterator it
= text_iterator_get(txt
, end
);
139 if (!text_iterator_byte_prev(&it
, &c
) || c
== '\n')
141 while (blank(c
) && text_iterator_byte_prev(&it
, &c
));
142 return it
.pos
+ (c
== '\n');
145 size_t text_line_end(Text
*txt
, size_t pos
) {
146 Iterator it
= text_iterator_get(txt
, pos
);
147 text_iterator_byte_find_next(&it
, '\n');
151 size_t text_line_next(Text
*txt
, size_t pos
) {
152 Iterator it
= text_iterator_get(txt
, pos
);
153 if (text_iterator_byte_find_next(&it
, '\n'))
154 text_iterator_byte_next(&it
, NULL
);
158 size_t text_line_offset(Text
*txt
, size_t pos
, size_t off
) {
160 size_t bol
= text_line_begin(txt
, pos
);
161 Iterator it
= text_iterator_get(txt
, bol
);
162 while (off
-- > 0 && text_iterator_byte_get(&it
, &c
) && c
!= '\n')
163 text_iterator_byte_next(&it
, NULL
);
167 size_t text_line_char_set(Text
*txt
, size_t pos
, int count
) {
169 size_t bol
= text_line_begin(txt
, pos
);
170 Iterator it
= text_iterator_get(txt
, bol
);
171 if (text_iterator_byte_get(&it
, &c
) && c
!= '\n')
172 while (count
-- > 0 && text_iterator_char_next(&it
, &c
) && c
!= '\n');
176 int text_line_char_get(Text
*txt
, size_t pos
) {
179 size_t bol
= text_line_begin(txt
, pos
);
180 Iterator it
= text_iterator_get(txt
, bol
);
181 if (text_iterator_byte_get(&it
, &c
) && c
!= '\n') {
182 while (it
.pos
< pos
&& c
!= '\n' && text_iterator_char_next(&it
, &c
))
188 int text_line_width_get(Text
*txt
, size_t pos
) {
190 mbstate_t ps
= { 0 };
191 size_t bol
= text_line_begin(txt
, pos
);
192 Iterator it
= text_iterator_get(txt
, bol
);
194 while (it
.pos
< pos
) {
195 char buf
[MB_LEN_MAX
];
196 size_t len
= text_bytes_get(txt
, it
.pos
, sizeof buf
, buf
);
197 if (len
== 0 || buf
[0] == '\n')
200 size_t wclen
= mbrtowc(&wc
, buf
, len
, &ps
);
201 if (wclen
== (size_t)-1 && errno
== EILSEQ
) {
203 /* assume a replacement symbol will be displayed */
205 } else if (wclen
== (size_t)-2) {
206 /* do nothing, advance to next character */
207 } else if (wclen
== 0) {
208 /* assume NUL byte will be displayed as ^@ */
210 } else if (buf
[0] == '\t') {
215 w
= 2; /* assume non-printable will be displayed as ^{char} */
219 if (!text_iterator_codepoint_next(&it
, NULL
))
226 size_t text_line_width_set(Text
*txt
, size_t pos
, int width
) {
228 mbstate_t ps
= { 0 };
229 size_t bol
= text_line_begin(txt
, pos
);
230 Iterator it
= text_iterator_get(txt
, bol
);
233 char buf
[MB_LEN_MAX
];
234 size_t len
= text_bytes_get(txt
, it
.pos
, sizeof buf
, buf
);
235 if (len
== 0 || buf
[0] == '\n')
238 size_t wclen
= mbrtowc(&wc
, buf
, len
, &ps
);
239 if (wclen
== (size_t)-1 && errno
== EILSEQ
) {
241 /* assume a replacement symbol will be displayed */
243 } else if (wclen
== (size_t)-2) {
244 /* do nothing, advance to next character */
245 } else if (wclen
== 0) {
246 /* assume NUL byte will be displayed as ^@ */
248 } else if (buf
[0] == '\t') {
253 w
= 2; /* assume non-printable will be displayed as ^{char} */
257 if (cur_width
>= width
|| !text_iterator_codepoint_next(&it
, NULL
))
264 size_t text_line_char_next(Text
*txt
, size_t pos
) {
266 Iterator it
= text_iterator_get(txt
, pos
);
267 if (!text_iterator_byte_get(&it
, &c
) || c
== '\n')
269 text_iterator_char_next(&it
, NULL
);
273 size_t text_line_char_prev(Text
*txt
, size_t pos
) {
275 Iterator it
= text_iterator_get(txt
, pos
);
276 if (!text_iterator_char_prev(&it
, &c
) || c
== '\n')
281 size_t text_line_up(Text
*txt
, size_t pos
) {
282 int width
= text_line_width_get(txt
, pos
);
283 size_t prev
= text_line_prev(txt
, pos
);
284 return text_line_width_set(txt
, prev
, width
);
287 size_t text_line_down(Text
*txt
, size_t pos
) {
288 int width
= text_line_width_get(txt
, pos
);
289 size_t next
= text_line_next(txt
, pos
);
290 return text_line_width_set(txt
, next
, width
);
293 size_t text_range_line_first(Text
*txt
, Filerange
*r
) {
294 if (!text_range_valid(r
))
299 size_t text_range_line_last(Text
*txt
, Filerange
*r
) {
300 if (!text_range_valid(r
))
302 size_t pos
= text_line_begin(txt
, r
->end
);
304 /* range ends at a begin of a line, skip last line ending */
305 pos
= text_line_prev(txt
, pos
);
306 pos
= text_line_begin(txt
, pos
);
308 return r
->start
<= pos
? pos
: r
->start
;
311 size_t text_range_line_next(Text
*txt
, Filerange
*r
, size_t pos
) {
312 if (!text_range_contains(r
, pos
))
314 size_t newpos
= text_line_next(txt
, pos
);
315 return newpos
!= pos
&& newpos
< r
->end
? newpos
: EPOS
;
318 size_t text_range_line_prev(Text
*txt
, Filerange
*r
, size_t pos
) {
319 if (!text_range_contains(r
, pos
))
321 size_t newpos
= text_line_begin(txt
, text_line_prev(txt
, pos
));
322 return newpos
!= pos
&& r
->start
<= newpos
? newpos
: EPOS
;
325 size_t text_customword_start_next(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
327 Iterator it
= text_iterator_get(txt
, pos
);
328 if (!text_iterator_byte_get(&it
, &c
))
331 while (boundary(c
) && !space(c
) && text_iterator_char_next(&it
, &c
));
333 while (!boundary(c
) && text_iterator_char_next(&it
, &c
));
334 while (space(c
) && text_iterator_char_next(&it
, &c
));
338 size_t text_customword_start_prev(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
340 Iterator it
= text_iterator_get(txt
, pos
);
341 while (text_iterator_char_prev(&it
, &c
) && space(c
));
343 do pos
= it
.pos
; while (text_iterator_char_prev(&it
, &c
) && boundary(c
) && !space(c
));
345 do pos
= it
.pos
; while (text_iterator_char_prev(&it
, &c
) && !boundary(c
));
349 size_t text_customword_end_next(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
351 Iterator it
= text_iterator_get(txt
, pos
);
352 while (text_iterator_char_next(&it
, &c
) && space(c
));
354 do pos
= it
.pos
; while (text_iterator_char_next(&it
, &c
) && boundary(c
) && !space(c
));
356 do pos
= it
.pos
; while (text_iterator_char_next(&it
, &c
) && !isboundary(c
));
360 size_t text_customword_end_prev(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
362 Iterator it
= text_iterator_get(txt
, pos
);
363 if (!text_iterator_byte_get(&it
, &c
))
366 while (boundary(c
) && !space(c
) && text_iterator_char_prev(&it
, &c
));
368 while (!boundary(c
) && text_iterator_char_prev(&it
, &c
));
369 while (space(c
) && text_iterator_char_prev(&it
, &c
));
373 size_t text_longword_end_next(Text
*txt
, size_t pos
) {
374 return text_customword_end_next(txt
, pos
, isspace
);
377 size_t text_longword_end_prev(Text
*txt
, size_t pos
) {
378 return text_customword_end_prev(txt
, pos
, isspace
);
381 size_t text_longword_start_next(Text
*txt
, size_t pos
) {
382 return text_customword_start_next(txt
, pos
, isspace
);
385 size_t text_longword_start_prev(Text
*txt
, size_t pos
) {
386 return text_customword_start_prev(txt
, pos
, isspace
);
389 size_t text_word_end_next(Text
*txt
, size_t pos
) {
390 return text_customword_end_next(txt
, pos
, is_word_boundary
);
393 size_t text_word_end_prev(Text
*txt
, size_t pos
) {
394 return text_customword_end_prev(txt
, pos
, is_word_boundary
);
397 size_t text_word_start_next(Text
*txt
, size_t pos
) {
398 return text_customword_start_next(txt
, pos
, is_word_boundary
);
401 size_t text_word_start_prev(Text
*txt
, size_t pos
) {
402 return text_customword_start_prev(txt
, pos
, is_word_boundary
);
405 size_t text_sentence_next(Text
*txt
, size_t pos
) {
407 Iterator it
= text_iterator_get(txt
, pos
), rev
= it
;
409 if (!text_iterator_byte_get(&it
, &c
))
412 while (text_iterator_byte_get(&rev
, &prev
) && space(prev
))
413 text_iterator_byte_prev(&rev
, NULL
);
414 prev
= rev
.pos
== 0 ? '.' : prev
; /* simulate punctuation at BOF */
417 if ((prev
== '.' || prev
== '?' || prev
== '!') && space(c
)) {
418 do text_iterator_byte_next(&it
, NULL
);
419 while (text_iterator_byte_get(&it
, &c
) && space(c
));
423 } while (text_iterator_byte_next(&it
, &c
));
427 size_t text_sentence_prev(Text
*txt
, size_t pos
) {
429 bool content
= false;
430 Iterator it
= text_iterator_get(txt
, pos
);
432 while (it
.pos
!= 0 && text_iterator_byte_prev(&it
, &c
)) {
433 if (content
&& space(prev
) && (c
== '.' || c
== '?' || c
== '!')) {
434 do text_iterator_byte_next(&it
, NULL
);
435 while (text_iterator_byte_get(&it
, &c
) && space(c
));
438 content
|= !space(c
);
440 } /* The loop only ends on hitting BOF or error */
441 if (content
) /* starting pos was after first sentence in file => find that sentences start */
442 while (text_iterator_byte_get(&it
, &c
) && space(c
))
443 text_iterator_byte_next(&it
, NULL
);
447 size_t text_paragraph_next(Text
*txt
, size_t pos
) {
449 Iterator it
= text_iterator_get(txt
, pos
);
451 while (text_iterator_byte_get(&it
, &c
) && (c
== '\n' || blank(c
)))
452 text_iterator_char_next(&it
, NULL
);
453 return text_line_blank_next(txt
, it
.pos
);
456 size_t text_paragraph_prev(Text
*txt
, size_t pos
) {
458 Iterator it
= text_iterator_get(txt
, pos
);
460 while (text_iterator_byte_prev(&it
, &c
) && (c
== '\n' || blank(c
)));
461 return text_line_blank_prev(txt
, it
.pos
);
464 size_t text_line_empty_next(Text
*txt
, size_t pos
) {
466 Iterator it
= text_iterator_get(txt
, pos
);
467 while (text_iterator_byte_find_next(&it
, '\n')) {
468 if (text_iterator_byte_next(&it
, &c
) && c
== '\n')
474 size_t text_line_empty_prev(Text
*txt
, size_t pos
) {
476 Iterator it
= text_iterator_get(txt
, pos
);
477 while (text_iterator_byte_find_prev(&it
, '\n')) {
478 if (text_iterator_byte_prev(&it
, &c
) && c
== '\n')
484 size_t text_line_blank_next(Text
*txt
, size_t pos
) {
486 Iterator it
= text_iterator_get(txt
, pos
);
487 while (text_iterator_byte_find_next(&it
, '\n')) {
489 while (text_iterator_byte_next(&it
, &c
) && blank(c
));
496 size_t text_line_blank_prev(Text
*txt
, size_t pos
) {
498 Iterator it
= text_iterator_get(txt
, pos
);
499 while (text_iterator_byte_find_prev(&it
, '\n')) {
500 while (text_iterator_byte_prev(&it
, &c
) && blank(c
));
507 size_t text_block_start(Text
*txt
, size_t pos
) {
508 Filerange r
= text_object_curly_bracket(txt
, pos
-1);
509 return text_range_valid(&r
) ? r
.start
-1 : pos
;
512 size_t text_block_end(Text
*txt
, size_t pos
) {
513 Filerange r
= text_object_curly_bracket(txt
, pos
+1);
514 return text_range_valid(&r
) ? r
.end
: pos
;
517 size_t text_parenthesis_start(Text
*txt
, size_t pos
) {
518 Filerange r
= text_object_parenthesis(txt
, pos
-1);
519 return text_range_valid(&r
) ? r
.start
-1 : pos
;
522 size_t text_parenthesis_end(Text
*txt
, size_t pos
) {
523 Filerange r
= text_object_parenthesis(txt
, pos
+1);
524 return text_range_valid(&r
) ? r
.end
: pos
;
527 size_t text_bracket_match(Text
*txt
, size_t pos
, const Filerange
*limits
) {
528 return text_bracket_match_symbol(txt
, pos
, NULL
, limits
);
531 static size_t match_symbol(Text
*txt
, size_t pos
, char search
, int direction
, const Filerange
*limits
) {
534 bool instring
= false;
535 Iterator it
= text_iterator_get(txt
, pos
);
536 if (!text_iterator_byte_get(&it
, ¤t
))
538 if (direction
>= 0) { /* forward search */
539 while (text_iterator_byte_next(&it
, &c
)) {
540 if (limits
&& it
.pos
>= limits
->end
)
542 if (c
!= current
&& c
== '"')
543 instring
= !instring
;
545 if (c
== search
&& --count
== 0)
547 else if (c
== current
)
551 } else { /* backwards */
552 while (text_iterator_byte_prev(&it
, &c
)) {
553 if (limits
&& it
.pos
< limits
->start
)
555 if (c
!= current
&& c
== '"')
556 instring
= !instring
;
558 if (c
== search
&& --count
== 0)
560 else if (c
== current
)
566 return pos
; /* no match found */
569 size_t text_bracket_match_symbol(Text
*txt
, size_t pos
, const char *symbols
, const Filerange
*limits
) {
571 char search
, current
, c
;
572 Iterator it
= text_iterator_get(txt
, pos
);
573 if (!text_iterator_byte_get(&it
, ¤t
))
575 if (symbols
&& !memchr(symbols
, current
, strlen(symbols
)))
578 case '(': search
= ')'; direction
= 1; break;
579 case ')': search
= '('; direction
= -1; break;
580 case '{': search
= '}'; direction
= 1; break;
581 case '}': search
= '{'; direction
= -1; break;
582 case '[': search
= ']'; direction
= 1; break;
583 case ']': search
= '['; direction
= -1; break;
584 case '<': search
= '>'; direction
= 1; break;
585 case '>': search
= '<'; direction
= -1; break;
590 /* prefer matches on the same line */
591 size_t fw
= match_symbol(txt
, pos
, current
, +1, limits
);
592 size_t bw
= match_symbol(txt
, pos
, current
, -1, limits
);
597 size_t line
= text_lineno_by_pos(txt
, pos
);
598 size_t line_fw
= text_lineno_by_pos(txt
, fw
);
599 size_t line_bw
= text_lineno_by_pos(txt
, bw
);
605 if (text_iterator_byte_next(&it
, &c
)) {
606 /* if a single or double quote is followed by
607 * a special character, search backwards */
608 char special
[] = " \t\n)}]>.,:;";
609 if (memchr(special
, c
, sizeof(special
)))
612 return direction
>= 0 ? fw
: bw
;
618 return match_symbol(txt
, pos
, search
, direction
, limits
);
621 size_t text_search_forward(Text
*txt
, size_t pos
, Regex
*regex
) {
622 size_t start
= pos
+ 1;
623 size_t end
= text_size(txt
);
626 int flags
= text_byte_get(txt
, pos
, &c
) && c
== '\n' ? 0 : REG_NOTBOL
;
627 bool found
= start
< end
&& !text_search_range_forward(txt
, start
, end
- start
, regex
, 1, match
, flags
);
631 found
= !text_search_range_forward(txt
, start
, end
- start
, regex
, 1, match
, 0);
634 return found
? match
[0].start
: pos
;
637 size_t text_search_backward(Text
*txt
, size_t pos
, Regex
*regex
) {
641 bool found
= !text_search_range_backward(txt
, start
, end
, regex
, 1, match
, REG_NOTEOL
);
644 end
= text_size(txt
);
645 found
= !text_search_range_backward(txt
, start
, end
- start
, regex
, 1, match
, 0);
648 return found
? match
[0].start
: pos
;