7 #include "text-motions.h"
10 #include "text-objects.h"
12 #define blank(c) ((c) == ' ' || (c) == '\t')
13 #define space(c) (isspace((unsigned char)c))
14 #define boundary(c) (isboundary((unsigned char)c))
16 // TODO: specify this per file type?
17 int is_word_boundary(int c
) {
18 return ISASCII(c
) && !(('0' <= c
&& c
<= '9') ||
19 ('a' <= c
&& c
<= 'z') ||
20 ('A' <= c
&& c
<= 'Z') || c
== '_');
23 size_t text_begin(Text
*txt
, size_t pos
) {
27 size_t text_end(Text
*txt
, size_t pos
) {
28 return text_size(txt
);
31 size_t text_char_next(Text
*txt
, size_t pos
) {
32 Iterator it
= text_iterator_get(txt
, pos
);
33 text_iterator_char_next(&it
, NULL
);
37 size_t text_char_prev(Text
*txt
, size_t pos
) {
38 Iterator it
= text_iterator_get(txt
, pos
);
39 text_iterator_char_prev(&it
, NULL
);
43 size_t text_codepoint_next(Text
*txt
, size_t pos
) {
44 Iterator it
= text_iterator_get(txt
, pos
);
45 text_iterator_codepoint_next(&it
, NULL
);
49 size_t text_codepoint_prev(Text
*txt
, size_t pos
) {
50 Iterator it
= text_iterator_get(txt
, pos
);
51 text_iterator_codepoint_prev(&it
, NULL
);
55 static size_t find_next(Text
*txt
, size_t pos
, const char *s
, bool line
) {
58 size_t len
= strlen(s
), matched
= 0;
59 Iterator it
= text_iterator_get(txt
, pos
), sit
;
60 for (char c
; matched
< len
&& text_iterator_byte_get(&it
, &c
); ) {
61 if (c
== s
[matched
]) {
65 } else if (matched
> 0) {
69 text_iterator_byte_next(&it
, NULL
);
70 if (line
&& c
== '\n')
73 return matched
== len
? it
.pos
- len
: pos
;
76 size_t text_find_next(Text
*txt
, size_t pos
, const char *s
) {
77 return find_next(txt
, pos
, s
, false);
80 size_t text_line_find_next(Text
*txt
, size_t pos
, const char *s
) {
81 return find_next(txt
, pos
, s
, true);
84 static size_t find_prev(Text
*txt
, size_t pos
, const char *s
, bool line
) {
87 size_t len
= strlen(s
), matched
= len
- 1;
88 Iterator it
= text_iterator_get(txt
, pos
), sit
;
91 for (char c
; text_iterator_byte_prev(&it
, &c
); ) {
92 if (c
== s
[matched
]) {
95 if (matched
== len
- 1)
98 } else if (matched
< len
- 1) {
102 if (line
&& c
== '\n')
108 size_t text_find_prev(Text
*txt
, size_t pos
, const char *s
) {
109 return find_prev(txt
, pos
, s
, false);
112 size_t text_line_find_prev(Text
*txt
, size_t pos
, const char *s
) {
113 return find_prev(txt
, pos
, s
, true);
116 size_t text_line_prev(Text
*txt
, size_t pos
) {
117 Iterator it
= text_iterator_get(txt
, pos
);
118 text_iterator_byte_find_prev(&it
, '\n');
122 size_t text_line_begin(Text
*txt
, size_t pos
) {
123 Iterator it
= text_iterator_get(txt
, pos
);
124 return text_iterator_byte_find_prev(&it
, '\n') ? it
.pos
+1 : it
.pos
;
127 size_t text_line_start(Text
*txt
, size_t pos
) {
129 Iterator it
= text_iterator_get(txt
, text_line_begin(txt
, pos
));
130 while (text_iterator_byte_get(&it
, &c
) && blank(c
))
131 text_iterator_byte_next(&it
, NULL
);
135 size_t text_line_finish(Text
*txt
, size_t pos
) {
137 size_t end
= text_line_end(txt
, pos
);
138 Iterator it
= text_iterator_get(txt
, end
);
139 if (!text_iterator_byte_prev(&it
, &c
) || c
== '\n')
141 while (blank(c
) && text_iterator_byte_prev(&it
, &c
));
142 return it
.pos
+ (c
== '\n');
145 size_t text_line_end(Text
*txt
, size_t pos
) {
146 Iterator it
= text_iterator_get(txt
, pos
);
147 text_iterator_byte_find_next(&it
, '\n');
151 size_t text_line_next(Text
*txt
, size_t pos
) {
152 Iterator it
= text_iterator_get(txt
, pos
);
153 if (text_iterator_byte_find_next(&it
, '\n'))
154 text_iterator_byte_next(&it
, NULL
);
158 size_t text_line_offset(Text
*txt
, size_t pos
, size_t off
) {
160 size_t bol
= text_line_begin(txt
, pos
);
161 Iterator it
= text_iterator_get(txt
, bol
);
162 while (off
-- > 0 && text_iterator_byte_get(&it
, &c
) && c
!= '\n')
163 text_iterator_byte_next(&it
, NULL
);
167 size_t text_line_char_set(Text
*txt
, size_t pos
, int count
) {
169 size_t bol
= text_line_begin(txt
, pos
);
170 Iterator it
= text_iterator_get(txt
, bol
);
171 if (text_iterator_byte_get(&it
, &c
) && c
!= '\n')
172 while (count
-- > 0 && text_iterator_char_next(&it
, &c
) && c
!= '\n');
176 int text_line_char_get(Text
*txt
, size_t pos
) {
179 size_t bol
= text_line_begin(txt
, pos
);
180 Iterator it
= text_iterator_get(txt
, bol
);
181 if (text_iterator_byte_get(&it
, &c
) && c
!= '\n') {
182 while (it
.pos
< pos
&& c
!= '\n' && text_iterator_char_next(&it
, &c
))
188 int text_line_width_get(Text
*txt
, size_t pos
) {
190 mbstate_t ps
= { 0 };
191 size_t bol
= text_line_begin(txt
, pos
);
192 Iterator it
= text_iterator_get(txt
, bol
);
194 while (it
.pos
< pos
) {
195 char buf
[MB_LEN_MAX
];
196 size_t len
= text_bytes_get(txt
, it
.pos
, sizeof buf
, buf
);
197 if (len
== 0 || buf
[0] == '\n')
200 size_t wclen
= mbrtowc(&wc
, buf
, len
, &ps
);
201 if (wclen
== (size_t)-1 && errno
== EILSEQ
) {
203 /* assume a replacement symbol will be displayed */
205 } else if (wclen
== (size_t)-2) {
206 /* do nothing, advance to next character */
207 } else if (wclen
== 0) {
208 /* assume NUL byte will be displayed as ^@ */
210 } else if (buf
[0] == '\t') {
215 w
= 2; /* assume non-printable will be displayed as ^{char} */
219 if (!text_iterator_codepoint_next(&it
, NULL
))
226 size_t text_line_width_set(Text
*txt
, size_t pos
, int width
) {
228 mbstate_t ps
= { 0 };
229 size_t bol
= text_line_begin(txt
, pos
);
230 Iterator it
= text_iterator_get(txt
, bol
);
233 char buf
[MB_LEN_MAX
];
234 size_t len
= text_bytes_get(txt
, it
.pos
, sizeof buf
, buf
);
235 if (len
== 0 || buf
[0] == '\n')
238 size_t wclen
= mbrtowc(&wc
, buf
, len
, &ps
);
239 if (wclen
== (size_t)-1 && errno
== EILSEQ
) {
241 /* assume a replacement symbol will be displayed */
243 } else if (wclen
== (size_t)-2) {
244 /* do nothing, advance to next character */
245 } else if (wclen
== 0) {
246 /* assume NUL byte will be displayed as ^@ */
248 } else if (buf
[0] == '\t') {
253 w
= 2; /* assume non-printable will be displayed as ^{char} */
257 if (cur_width
>= width
|| !text_iterator_codepoint_next(&it
, NULL
))
264 size_t text_line_char_next(Text
*txt
, size_t pos
) {
266 Iterator it
= text_iterator_get(txt
, pos
);
267 if (!text_iterator_byte_get(&it
, &c
) || c
== '\n')
269 text_iterator_char_next(&it
, NULL
);
273 size_t text_line_char_prev(Text
*txt
, size_t pos
) {
275 Iterator it
= text_iterator_get(txt
, pos
);
276 if (!text_iterator_char_prev(&it
, &c
) || c
== '\n')
281 size_t text_line_up(Text
*txt
, size_t pos
) {
282 int width
= text_line_width_get(txt
, pos
);
283 size_t prev
= text_line_prev(txt
, pos
);
284 return text_line_width_set(txt
, prev
, width
);
287 size_t text_line_down(Text
*txt
, size_t pos
) {
288 int width
= text_line_width_get(txt
, pos
);
289 size_t next
= text_line_next(txt
, pos
);
290 if (next
== text_size(txt
))
292 return text_line_width_set(txt
, next
, width
);
295 size_t text_range_line_first(Text
*txt
, Filerange
*r
) {
296 if (!text_range_valid(r
))
301 size_t text_range_line_last(Text
*txt
, Filerange
*r
) {
302 if (!text_range_valid(r
))
304 size_t pos
= text_line_begin(txt
, r
->end
);
306 /* range ends at a begin of a line, skip last line ending */
307 pos
= text_line_prev(txt
, pos
);
308 pos
= text_line_begin(txt
, pos
);
310 return r
->start
<= pos
? pos
: r
->start
;
313 size_t text_range_line_next(Text
*txt
, Filerange
*r
, size_t pos
) {
314 if (!text_range_contains(r
, pos
))
316 size_t newpos
= text_line_next(txt
, pos
);
317 return newpos
!= pos
&& newpos
< r
->end
? newpos
: EPOS
;
320 size_t text_range_line_prev(Text
*txt
, Filerange
*r
, size_t pos
) {
321 if (!text_range_contains(r
, pos
))
323 size_t newpos
= text_line_begin(txt
, text_line_prev(txt
, pos
));
324 return newpos
!= pos
&& r
->start
<= newpos
? newpos
: EPOS
;
327 size_t text_customword_start_next(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
329 Iterator it
= text_iterator_get(txt
, pos
);
330 if (!text_iterator_byte_get(&it
, &c
))
333 while (boundary(c
) && !space(c
) && text_iterator_char_next(&it
, &c
));
335 while (!boundary(c
) && text_iterator_char_next(&it
, &c
));
336 while (space(c
) && text_iterator_char_next(&it
, &c
));
340 size_t text_customword_start_prev(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
342 Iterator it
= text_iterator_get(txt
, pos
);
343 while (text_iterator_char_prev(&it
, &c
) && space(c
));
345 do pos
= it
.pos
; while (text_iterator_char_prev(&it
, &c
) && boundary(c
) && !space(c
));
347 do pos
= it
.pos
; while (text_iterator_char_prev(&it
, &c
) && !boundary(c
));
351 size_t text_customword_end_next(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
353 Iterator it
= text_iterator_get(txt
, pos
);
354 while (text_iterator_char_next(&it
, &c
) && space(c
));
356 do pos
= it
.pos
; while (text_iterator_char_next(&it
, &c
) && boundary(c
) && !space(c
));
358 do pos
= it
.pos
; while (text_iterator_char_next(&it
, &c
) && !isboundary(c
));
362 size_t text_customword_end_prev(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
364 Iterator it
= text_iterator_get(txt
, pos
);
365 if (!text_iterator_byte_get(&it
, &c
))
368 while (boundary(c
) && !space(c
) && text_iterator_char_prev(&it
, &c
));
370 while (!boundary(c
) && text_iterator_char_prev(&it
, &c
));
371 while (space(c
) && text_iterator_char_prev(&it
, &c
));
375 size_t text_longword_end_next(Text
*txt
, size_t pos
) {
376 return text_customword_end_next(txt
, pos
, isspace
);
379 size_t text_longword_end_prev(Text
*txt
, size_t pos
) {
380 return text_customword_end_prev(txt
, pos
, isspace
);
383 size_t text_longword_start_next(Text
*txt
, size_t pos
) {
384 return text_customword_start_next(txt
, pos
, isspace
);
387 size_t text_longword_start_prev(Text
*txt
, size_t pos
) {
388 return text_customword_start_prev(txt
, pos
, isspace
);
391 size_t text_word_end_next(Text
*txt
, size_t pos
) {
392 return text_customword_end_next(txt
, pos
, is_word_boundary
);
395 size_t text_word_end_prev(Text
*txt
, size_t pos
) {
396 return text_customword_end_prev(txt
, pos
, is_word_boundary
);
399 size_t text_word_start_next(Text
*txt
, size_t pos
) {
400 return text_customword_start_next(txt
, pos
, is_word_boundary
);
403 size_t text_word_start_prev(Text
*txt
, size_t pos
) {
404 return text_customword_start_prev(txt
, pos
, is_word_boundary
);
407 size_t text_sentence_next(Text
*txt
, size_t pos
) {
409 Iterator it
= text_iterator_get(txt
, pos
), rev
= it
;
411 if (!text_iterator_byte_get(&it
, &c
))
414 while (text_iterator_byte_get(&rev
, &prev
) && space(prev
))
415 text_iterator_byte_prev(&rev
, NULL
);
416 prev
= rev
.pos
== 0 ? '.' : prev
; /* simulate punctuation at BOF */
419 if ((prev
== '.' || prev
== '?' || prev
== '!') && space(c
)) {
420 do text_iterator_byte_next(&it
, NULL
);
421 while (text_iterator_byte_get(&it
, &c
) && space(c
));
425 } while (text_iterator_byte_next(&it
, &c
));
429 size_t text_sentence_prev(Text
*txt
, size_t pos
) {
431 bool content
= false;
432 Iterator it
= text_iterator_get(txt
, pos
);
434 while (it
.pos
!= 0 && text_iterator_byte_prev(&it
, &c
)) {
435 if (content
&& space(prev
) && (c
== '.' || c
== '?' || c
== '!')) {
436 do text_iterator_byte_next(&it
, NULL
);
437 while (text_iterator_byte_get(&it
, &c
) && space(c
));
440 content
|= !space(c
);
442 } /* The loop only ends on hitting BOF or error */
443 if (content
) /* starting pos was after first sentence in file => find that sentences start */
444 while (text_iterator_byte_get(&it
, &c
) && space(c
))
445 text_iterator_byte_next(&it
, NULL
);
449 size_t text_paragraph_next(Text
*txt
, size_t pos
) {
451 Iterator it
= text_iterator_get(txt
, pos
);
453 while (text_iterator_byte_get(&it
, &c
) && (c
== '\n' || blank(c
)))
454 text_iterator_char_next(&it
, NULL
);
455 return text_line_blank_next(txt
, it
.pos
);
458 size_t text_paragraph_prev(Text
*txt
, size_t pos
) {
460 Iterator it
= text_iterator_get(txt
, pos
);
462 while (text_iterator_byte_prev(&it
, &c
) && (c
== '\n' || blank(c
)));
463 return text_line_blank_prev(txt
, it
.pos
);
466 size_t text_line_empty_next(Text
*txt
, size_t pos
) {
468 Iterator it
= text_iterator_get(txt
, pos
);
469 while (text_iterator_byte_find_next(&it
, '\n')) {
470 if (text_iterator_byte_next(&it
, &c
) && c
== '\n')
476 size_t text_line_empty_prev(Text
*txt
, size_t pos
) {
478 Iterator it
= text_iterator_get(txt
, pos
);
479 while (text_iterator_byte_find_prev(&it
, '\n')) {
480 if (text_iterator_byte_prev(&it
, &c
) && c
== '\n')
486 size_t text_line_blank_next(Text
*txt
, size_t pos
) {
488 Iterator it
= text_iterator_get(txt
, pos
);
489 while (text_iterator_byte_find_next(&it
, '\n')) {
491 while (text_iterator_byte_next(&it
, &c
) && blank(c
));
498 size_t text_line_blank_prev(Text
*txt
, size_t pos
) {
500 Iterator it
= text_iterator_get(txt
, pos
);
501 while (text_iterator_byte_find_prev(&it
, '\n')) {
502 while (text_iterator_byte_prev(&it
, &c
) && blank(c
));
509 size_t text_block_start(Text
*txt
, size_t pos
) {
510 Filerange r
= text_object_curly_bracket(txt
, pos
-1);
511 return text_range_valid(&r
) ? r
.start
-1 : pos
;
514 size_t text_block_end(Text
*txt
, size_t pos
) {
515 Filerange r
= text_object_curly_bracket(txt
, pos
+1);
516 return text_range_valid(&r
) ? r
.end
: pos
;
519 size_t text_parenthesis_start(Text
*txt
, size_t pos
) {
520 Filerange r
= text_object_parenthesis(txt
, pos
-1);
521 return text_range_valid(&r
) ? r
.start
-1 : pos
;
524 size_t text_parenthesis_end(Text
*txt
, size_t pos
) {
525 Filerange r
= text_object_parenthesis(txt
, pos
+1);
526 return text_range_valid(&r
) ? r
.end
: pos
;
529 size_t text_bracket_match(Text
*txt
, size_t pos
, const Filerange
*limits
) {
530 return text_bracket_match_symbol(txt
, pos
, NULL
, limits
);
533 static size_t match_symbol(Text
*txt
, size_t pos
, char search
, int direction
, const Filerange
*limits
) {
536 bool instring
= false;
537 Iterator it
= text_iterator_get(txt
, pos
);
538 if (!text_iterator_byte_get(&it
, ¤t
))
540 if (direction
>= 0) { /* forward search */
541 while (text_iterator_byte_next(&it
, &c
)) {
542 if (limits
&& it
.pos
>= limits
->end
)
544 if (c
!= current
&& c
== '"')
545 instring
= !instring
;
547 if (c
== search
&& --count
== 0)
549 else if (c
== current
)
553 } else { /* backwards */
554 while (text_iterator_byte_prev(&it
, &c
)) {
555 if (limits
&& it
.pos
< limits
->start
)
557 if (c
!= current
&& c
== '"')
558 instring
= !instring
;
560 if (c
== search
&& --count
== 0)
562 else if (c
== current
)
568 return pos
; /* no match found */
571 size_t text_bracket_match_symbol(Text
*txt
, size_t pos
, const char *symbols
, const Filerange
*limits
) {
573 char search
, current
, c
;
574 Iterator it
= text_iterator_get(txt
, pos
);
575 if (!text_iterator_byte_get(&it
, ¤t
))
577 if (symbols
&& !memchr(symbols
, current
, strlen(symbols
)))
580 case '(': search
= ')'; direction
= 1; break;
581 case ')': search
= '('; direction
= -1; break;
582 case '{': search
= '}'; direction
= 1; break;
583 case '}': search
= '{'; direction
= -1; break;
584 case '[': search
= ']'; direction
= 1; break;
585 case ']': search
= '['; direction
= -1; break;
586 case '<': search
= '>'; direction
= 1; break;
587 case '>': search
= '<'; direction
= -1; break;
592 /* prefer matches on the same line */
593 size_t fw
= match_symbol(txt
, pos
, current
, +1, limits
);
594 size_t bw
= match_symbol(txt
, pos
, current
, -1, limits
);
599 size_t line
= text_lineno_by_pos(txt
, pos
);
600 size_t line_fw
= text_lineno_by_pos(txt
, fw
);
601 size_t line_bw
= text_lineno_by_pos(txt
, bw
);
607 if (text_iterator_byte_next(&it
, &c
)) {
608 /* if a single or double quote is followed by
609 * a special character, search backwards */
610 char special
[] = " \t\n)}]>.,:;";
611 if (memchr(special
, c
, sizeof(special
)))
614 return direction
>= 0 ? fw
: bw
;
620 return match_symbol(txt
, pos
, search
, direction
, limits
);
623 size_t text_search_forward(Text
*txt
, size_t pos
, Regex
*regex
) {
624 size_t start
= pos
+ 1;
625 size_t end
= text_size(txt
);
628 int flags
= text_byte_get(txt
, pos
, &c
) && c
== '\n' ? 0 : REG_NOTBOL
;
629 bool found
= start
< end
&& !text_search_range_forward(txt
, start
, end
- start
, regex
, 1, match
, flags
);
633 found
= !text_search_range_forward(txt
, start
, end
- start
, regex
, 1, match
, 0);
636 return found
? match
[0].start
: pos
;
639 size_t text_search_backward(Text
*txt
, size_t pos
, Regex
*regex
) {
643 bool found
= !text_search_range_backward(txt
, start
, end
, regex
, 1, match
, REG_NOTEOL
);
646 end
= text_size(txt
);
647 found
= !text_search_range_backward(txt
, start
, end
- start
, regex
, 1, match
, 0);
650 return found
? match
[0].start
: pos
;