2 * Copyright (c) 2014-2015 Marc André Tanner <mat at brain-dump.org>
4 * Permission to use, copy, modify, and/or distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 #include "text-motions.h"
22 #include "text-util.h"
25 #define space(c) (isspace((unsigned char)c))
26 #define boundary(c) (isboundary((unsigned char)c))
28 // TODO: specify this per file type?
29 int is_word_boundry(int c
) {
30 return ISASCII(c
) && !(('0' <= c
&& c
<= '9') ||
31 ('a' <= c
&& c
<= 'z') ||
32 ('A' <= c
&& c
<= 'Z') || c
== '_');
35 size_t text_begin(Text
*txt
, size_t pos
) {
39 size_t text_end(Text
*txt
, size_t pos
) {
40 return text_size(txt
);
43 size_t text_char_next(Text
*txt
, size_t pos
) {
44 Iterator it
= text_iterator_get(txt
, pos
);
45 text_iterator_char_next(&it
, NULL
);
49 size_t text_char_prev(Text
*txt
, size_t pos
) {
50 Iterator it
= text_iterator_get(txt
, pos
);
51 text_iterator_char_prev(&it
, NULL
);
55 static size_t find_next(Text
*txt
, size_t pos
, const char *s
, bool line
) {
58 size_t len
= strlen(s
), matched
= 0;
59 Iterator it
= text_iterator_get(txt
, pos
), sit
;
60 for (char c
; matched
< len
&& text_iterator_byte_get(&it
, &c
); ) {
61 if (c
== s
[matched
]) {
65 } else if (matched
> 0) {
69 text_iterator_byte_next(&it
, NULL
);
70 if (line
&& c
== '\n')
73 return matched
== len
? it
.pos
- len
: pos
;
76 size_t text_find_next(Text
*txt
, size_t pos
, const char *s
) {
77 return find_next(txt
, pos
, s
, false);
80 size_t text_line_find_next(Text
*txt
, size_t pos
, const char *s
) {
81 return find_next(txt
, pos
, s
, true);
84 static size_t find_prev(Text
*txt
, size_t pos
, const char *s
, bool line
) {
87 size_t len
= strlen(s
), matched
= len
- 1;
92 it
= text_iterator_get(txt
, pos
);
93 for (char c
; text_iterator_byte_prev(&it
, &c
); ) {
94 if (c
== s
[matched
]) {
97 if (matched
== len
- 1)
100 } else if (matched
< len
- 1) {
104 if (line
&& c
== '\n')
110 size_t text_find_prev(Text
*txt
, size_t pos
, const char *s
) {
111 return find_prev(txt
, pos
, s
, false);
114 size_t text_line_find_prev(Text
*txt
, size_t pos
, const char *s
) {
115 return find_prev(txt
, pos
, s
, true);
118 size_t text_line_prev(Text
*txt
, size_t pos
) {
120 Iterator it
= text_iterator_get(txt
, pos
);
121 if (!text_iterator_byte_get(&it
, &c
))
124 text_iterator_byte_prev(&it
, &c
);
126 text_iterator_byte_prev(&it
, &c
);
127 while (text_iterator_byte_get(&it
, &c
) && c
!= '\n')
128 text_iterator_byte_prev(&it
, NULL
);
129 if (text_iterator_byte_prev(&it
, &c
) && c
!= '\r')
130 text_iterator_byte_next(&it
, &c
);
134 size_t text_line_begin(Text
*txt
, size_t pos
) {
136 Iterator it
= text_iterator_get(txt
, pos
);
137 if (!text_iterator_byte_get(&it
, &c
))
140 text_iterator_byte_prev(&it
, &c
);
142 text_iterator_byte_prev(&it
, &c
);
143 while (text_iterator_byte_get(&it
, &c
)) {
148 text_iterator_byte_prev(&it
, NULL
);
153 size_t text_line_start(Text
*txt
, size_t pos
) {
155 Iterator it
= text_iterator_get(txt
, text_line_begin(txt
, pos
));
156 while (text_iterator_byte_get(&it
, &c
) && c
!= '\n' && space(c
))
157 text_iterator_byte_next(&it
, NULL
);
161 size_t text_line_finish(Text
*txt
, size_t pos
) {
163 Iterator it
= text_iterator_get(txt
, text_line_end(txt
, pos
));
164 do text_iterator_char_prev(&it
, NULL
);
165 while (text_iterator_byte_get(&it
, &c
) && c
!= '\n' && space(c
));
169 size_t text_line_lastchar(Text
*txt
, size_t pos
) {
171 Iterator it
= text_iterator_get(txt
, text_line_end(txt
, pos
));
172 if (text_iterator_char_prev(&it
, &c
) && c
== '\n')
173 text_iterator_byte_next(&it
, NULL
);
177 size_t text_line_end(Text
*txt
, size_t pos
) {
179 Iterator it
= text_iterator_get(txt
, pos
);
180 while (text_iterator_byte_get(&it
, &c
) && c
!= '\r' && c
!= '\n')
181 text_iterator_byte_next(&it
, NULL
);
185 size_t text_line_next(Text
*txt
, size_t pos
) {
187 Iterator it
= text_iterator_get(txt
, pos
);
188 while (text_iterator_byte_get(&it
, &c
) && c
!= '\n')
189 text_iterator_byte_next(&it
, NULL
);
190 text_iterator_byte_next(&it
, NULL
);
194 size_t text_line_offset(Text
*txt
, size_t pos
, size_t off
) {
196 size_t bol
= text_line_begin(txt
, pos
);
197 Iterator it
= text_iterator_get(txt
, bol
);
198 while (off
-- > 0 && text_iterator_byte_get(&it
, &c
) && c
!= '\r' && c
!= '\n')
199 text_iterator_byte_next(&it
, NULL
);
203 size_t text_line_char_set(Text
*txt
, size_t pos
, int count
) {
205 size_t bol
= text_line_begin(txt
, pos
);
206 Iterator it
= text_iterator_get(txt
, bol
);
207 while (count
-- > 0 && text_iterator_byte_get(&it
, &c
) && c
!= '\r' && c
!= '\n')
208 text_iterator_char_next(&it
, NULL
);
212 int text_line_char_get(Text
*txt
, size_t pos
) {
215 size_t bol
= text_line_begin(txt
, pos
);
216 Iterator it
= text_iterator_get(txt
, bol
);
217 while (text_iterator_byte_get(&it
, &c
) && it
.pos
< pos
&& c
!= '\r' && c
!= '\n') {
218 text_iterator_char_next(&it
, NULL
);
224 int text_line_width_get(Text
*txt
, size_t pos
) {
226 mbstate_t ps
= { 0 };
227 size_t bol
= text_line_begin(txt
, pos
);
228 Iterator it
= text_iterator_get(txt
, bol
);
230 while (it
.pos
< pos
) {
231 char buf
[MB_CUR_MAX
];
232 size_t len
= text_bytes_get(txt
, it
.pos
, sizeof buf
, buf
);
233 if (len
== 0 || buf
[0] == '\r' || buf
[0] == '\n')
236 size_t wclen
= mbrtowc(&wc
, buf
, len
, &ps
);
237 if (wclen
== (size_t)-1 && errno
== EILSEQ
) {
238 /* assume a replacement symbol will be displayed */
240 } else if (wclen
== (size_t)-2) {
241 /* do nothing, advance to next character */
242 } else if (wclen
== 0) {
243 /* assume NUL byte will be displayed as ^@ */
245 } else if (buf
[0] == '\t') {
250 w
= 2; /* assume non-printable will be displayed as ^{char} */
254 if (!text_iterator_codepoint_next(&it
, NULL
))
261 size_t text_line_width_set(Text
*txt
, size_t pos
, int width
) {
263 mbstate_t ps
= { 0 };
264 size_t bol
= text_line_begin(txt
, pos
);
265 Iterator it
= text_iterator_get(txt
, bol
);
268 char buf
[MB_CUR_MAX
];
269 size_t len
= text_bytes_get(txt
, it
.pos
, sizeof buf
, buf
);
270 if (len
== 0 || buf
[0] == '\r' || buf
[0] == '\n')
273 size_t wclen
= mbrtowc(&wc
, buf
, len
, &ps
);
274 if (wclen
== (size_t)-1 && errno
== EILSEQ
) {
275 /* assume a replacement symbol will be displayed */
277 } else if (wclen
== (size_t)-2) {
278 /* do nothing, advance to next character */
279 } else if (wclen
== 0) {
280 /* assume NUL byte will be displayed as ^@ */
282 } else if (buf
[0] == '\t') {
287 w
= 2; /* assume non-printable will be displayed as ^{char} */
291 if (cur_width
>= width
|| !text_iterator_codepoint_next(&it
, NULL
))
298 size_t text_line_char_next(Text
*txt
, size_t pos
) {
300 Iterator it
= text_iterator_get(txt
, pos
);
301 if (!text_iterator_byte_get(&it
, &c
) || c
== '\r' || c
== '\n')
303 if (!text_iterator_char_next(&it
, &c
) || c
== '\r' || c
== '\n')
308 size_t text_line_char_prev(Text
*txt
, size_t pos
) {
310 Iterator it
= text_iterator_get(txt
, pos
);
311 if (!text_iterator_char_prev(&it
, &c
) || c
== '\n')
316 size_t text_line_up(Text
*txt
, size_t pos
) {
317 int width
= text_line_width_get(txt
, pos
);
318 size_t prev
= text_line_prev(txt
, pos
);
319 return text_line_width_set(txt
, prev
, width
);
322 size_t text_line_down(Text
*txt
, size_t pos
) {
323 int width
= text_line_width_get(txt
, pos
);
324 size_t next
= text_line_next(txt
, pos
);
325 return text_line_width_set(txt
, next
, width
);
328 size_t text_range_line_first(Text
*txt
, Filerange
*r
) {
329 if (!text_range_valid(r
))
334 size_t text_range_line_last(Text
*txt
, Filerange
*r
) {
335 if (!text_range_valid(r
))
337 size_t pos
= text_line_begin(txt
, r
->end
);
339 /* range ends at a begin of a line, skip last line ending */
340 pos
= text_line_prev(txt
, pos
);
341 pos
= text_line_begin(txt
, pos
);
343 return r
->start
<= pos
? pos
: r
->start
;
346 size_t text_range_line_next(Text
*txt
, Filerange
*r
, size_t pos
) {
347 if (!text_range_contains(r
, pos
))
349 size_t newpos
= text_line_next(txt
, pos
);
350 return newpos
!= pos
&& newpos
< r
->end
? newpos
: EPOS
;
353 size_t text_range_line_prev(Text
*txt
, Filerange
*r
, size_t pos
) {
354 if (!text_range_contains(r
, pos
))
356 size_t newpos
= text_line_begin(txt
, text_line_prev(txt
, pos
));
357 return newpos
!= pos
&& r
->start
<= newpos
? newpos
: EPOS
;
360 size_t text_customword_start_next(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
362 Iterator it
= text_iterator_get(txt
, pos
);
363 if (!text_iterator_byte_get(&it
, &c
))
366 while (boundary(c
) && !space(c
) && text_iterator_char_next(&it
, &c
));
368 while (!boundary(c
) && text_iterator_char_next(&it
, &c
));
369 while (space(c
) && text_iterator_char_next(&it
, &c
));
373 size_t text_customword_start_prev(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
375 Iterator it
= text_iterator_get(txt
, pos
);
376 while (text_iterator_char_prev(&it
, &c
) && space(c
));
378 do pos
= it
.pos
; while (text_iterator_char_prev(&it
, &c
) && boundary(c
) && !space(c
));
380 do pos
= it
.pos
; while (text_iterator_char_prev(&it
, &c
) && !boundary(c
));
384 size_t text_customword_end_next(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
386 Iterator it
= text_iterator_get(txt
, pos
);
387 while (text_iterator_char_next(&it
, &c
) && space(c
));
389 do pos
= it
.pos
; while (text_iterator_char_next(&it
, &c
) && boundary(c
) && !space(c
));
391 do pos
= it
.pos
; while (text_iterator_char_next(&it
, &c
) && !isboundary(c
));
395 size_t text_customword_end_prev(Text
*txt
, size_t pos
, int (*isboundary
)(int)) {
397 Iterator it
= text_iterator_get(txt
, pos
);
398 if (!text_iterator_byte_get(&it
, &c
))
401 while (boundary(c
) && !space(c
) && text_iterator_char_prev(&it
, &c
));
403 while (!boundary(c
) && text_iterator_char_prev(&it
, &c
));
404 while (space(c
) && text_iterator_char_prev(&it
, &c
));
408 size_t text_longword_end_next(Text
*txt
, size_t pos
) {
409 return text_customword_end_next(txt
, pos
, isspace
);
412 size_t text_longword_end_prev(Text
*txt
, size_t pos
) {
413 return text_customword_end_prev(txt
, pos
, isspace
);
416 size_t text_longword_start_next(Text
*txt
, size_t pos
) {
417 return text_customword_start_next(txt
, pos
, isspace
);
420 size_t text_longword_start_prev(Text
*txt
, size_t pos
) {
421 return text_customword_start_prev(txt
, pos
, isspace
);
424 size_t text_word_end_next(Text
*txt
, size_t pos
) {
425 return text_customword_end_next(txt
, pos
, is_word_boundry
);
428 size_t text_word_end_prev(Text
*txt
, size_t pos
) {
429 return text_customword_end_prev(txt
, pos
, is_word_boundry
);
432 size_t text_word_start_next(Text
*txt
, size_t pos
) {
433 return text_customword_start_next(txt
, pos
, is_word_boundry
);
436 size_t text_word_start_prev(Text
*txt
, size_t pos
) {
437 return text_customword_start_prev(txt
, pos
, is_word_boundry
);
440 size_t text_sentence_next(Text
*txt
, size_t pos
) {
442 Iterator it
= text_iterator_get(txt
, pos
), rev
= text_iterator_get(txt
, pos
);
444 if (!text_iterator_byte_get(&it
, &c
))
447 while (text_iterator_byte_get(&rev
, &prev
) && space(prev
))
448 text_iterator_byte_prev(&rev
, NULL
);
449 prev
= rev
.pos
== 0 ? '.' : prev
; /* simulate punctuation at BOF */
452 if ((prev
== '.' || prev
== '?' || prev
== '!') && space(c
)) {
453 do text_iterator_byte_next(&it
, NULL
);
454 while (text_iterator_byte_get(&it
, &c
) && space(c
));
458 } while (text_iterator_byte_next(&it
, &c
));
462 size_t text_sentence_prev(Text
*txt
, size_t pos
) {
464 bool content
= false;
465 Iterator it
= text_iterator_get(txt
, pos
);
467 while (it
.pos
!= 0 && text_iterator_byte_prev(&it
, &c
)) {
468 if (content
&& space(prev
) && (c
== '.' || c
== '?' || c
== '!')) {
469 do text_iterator_byte_next(&it
, NULL
);
470 while (text_iterator_byte_get(&it
, &c
) && space(c
));
473 content
|= !space(c
);
475 } /* The loop only ends on hitting BOF or error */
476 if (content
) /* starting pos was after first sentence in file => find that sentences start */
477 while (text_iterator_byte_get(&it
, &c
) && space(c
))
478 text_iterator_byte_next(&it
, NULL
);
482 size_t text_paragraph_next(Text
*txt
, size_t pos
) {
484 Iterator it
= text_iterator_get(txt
, pos
);
486 while (text_iterator_byte_get(&it
, &c
) && (c
== '\n' || c
== '\r'))
487 text_iterator_byte_next(&it
, NULL
);
488 return text_line_empty_next(txt
, it
.pos
);
491 size_t text_paragraph_prev(Text
*txt
, size_t pos
) {
493 Iterator it
= text_iterator_get(txt
, pos
);
495 /* c == \0 catches starting the search at EOF */
496 while (text_iterator_byte_get(&it
, &c
) && (c
== '\n' || c
== '\r' || c
== '\0'))
497 text_iterator_byte_prev(&it
, NULL
);
498 return text_line_empty_prev(txt
, it
.pos
);
501 size_t text_line_empty_next(Text
*txt
, size_t pos
) {
503 Iterator it
= text_iterator_get(txt
, pos
);
504 while (text_iterator_byte_get(&it
, &c
)) {
505 if (c
== '\n' && text_iterator_byte_next(&it
, &c
)) {
506 size_t match
= it
.pos
;
508 text_iterator_byte_next(&it
, &c
);
512 text_iterator_byte_next(&it
, NULL
);
517 size_t text_line_empty_prev(Text
*txt
, size_t pos
) {
519 Iterator it
= text_iterator_get(txt
, pos
);
520 while (text_iterator_byte_prev(&it
, &c
)) {
521 if (c
== '\n' && text_iterator_byte_prev(&it
, &c
)) {
523 text_iterator_byte_prev(&it
, &c
);
531 size_t text_function_start_next(Text
*txt
, size_t pos
) {
532 size_t a
= text_function_end_next(txt
, pos
);
536 Iterator it
= text_iterator_get(txt
, a
);
537 while (text_iterator_byte_next(&it
, &c
) && (c
== '\r' || c
== '\n'));
541 size_t match
= text_bracket_match(txt
, b
);
542 b
= match
!= b
? text_line_next(txt
, text_line_empty_prev(txt
, match
)) : pos
;
544 if (a
<= pos
&& b
<= pos
)
554 size_t text_function_start_prev(Text
*txt
, size_t pos
) {
556 size_t apos
= text_byte_get(txt
, pos
, &c
) && c
== '}' && pos
> 0 ? pos
- 1 : pos
;
557 size_t a
= text_function_end_next(txt
, apos
);
558 size_t b
= text_function_end_prev(txt
, pos
);
560 size_t match
= text_bracket_match(txt
, a
);
561 a
= match
!= a
? text_line_next(txt
, text_line_empty_prev(txt
, match
)) : pos
;
564 size_t match
= text_bracket_match(txt
, b
);
565 b
= match
!= b
? text_line_next(txt
, text_line_empty_prev(txt
, match
)) : pos
;
567 if (a
>= pos
&& b
>= pos
)
577 static size_t text_function_end_direction(Text
*txt
, size_t pos
, int direction
) {
578 size_t start
= pos
, match
;
579 if (direction
< 0 && pos
> 0)
584 match
= text_find_next(txt
, pos
, "\n}");
586 match
= text_find_prev(txt
, pos
, "\n}");
587 if (text_bytes_get(txt
, match
, sizeof c
, c
) != 3 || c
[0] != '\n' || c
[1] != '}')
589 if (c
[2] == '\r' || c
[2] == '\n')
598 size_t text_function_end_next(Text
*txt
, size_t pos
) {
599 return text_function_end_direction(txt
, pos
, +1);
602 size_t text_function_end_prev(Text
*txt
, size_t pos
) {
603 return text_function_end_direction(txt
, pos
, -1);
606 size_t text_bracket_match(Text
*txt
, size_t pos
) {
607 return text_bracket_match_symbol(txt
, pos
, NULL
);
610 size_t text_bracket_match_symbol(Text
*txt
, size_t pos
, const char *symbols
) {
611 int direction
, count
= 1;
612 char search
, current
, c
;
613 bool instring
= false;
614 Iterator it
= text_iterator_get(txt
, pos
);
615 if (!text_iterator_byte_get(&it
, ¤t
))
617 if (symbols
&& !memchr(symbols
, current
, strlen(symbols
)))
620 case '(': search
= ')'; direction
= 1; break;
621 case ')': search
= '('; direction
= -1; break;
622 case '{': search
= '}'; direction
= 1; break;
623 case '}': search
= '{'; direction
= -1; break;
624 case '[': search
= ']'; direction
= 1; break;
625 case ']': search
= '['; direction
= -1; break;
626 case '<': search
= '>'; direction
= 1; break;
627 case '>': search
= '<'; direction
= -1; break;
631 char special
[] = " \n)}]>.,:;";
634 if (text_iterator_byte_next(&it
, &c
)) {
635 /* if a single or double quote is followed by
636 * a special character, search backwards */
637 if (memchr(special
, c
, sizeof(special
)))
639 text_iterator_byte_prev(&it
, NULL
);
646 if (direction
>= 0) { /* forward search */
647 while (text_iterator_byte_next(&it
, &c
)) {
648 if (c
!= current
&& c
== '"')
649 instring
= !instring
;
651 if (c
== search
&& --count
== 0)
653 else if (c
== current
)
657 } else { /* backwards */
658 while (text_iterator_byte_prev(&it
, &c
)) {
659 if (c
!= current
&& c
== '"')
660 instring
= !instring
;
662 if (c
== search
&& --count
== 0)
664 else if (c
== current
)
670 return pos
; /* no match found */
673 size_t text_search_forward(Text
*txt
, size_t pos
, Regex
*regex
) {
674 size_t start
= pos
+ 1;
675 size_t end
= text_size(txt
);
677 bool found
= start
< end
&& !text_search_range_forward(txt
, start
, end
- start
, regex
, 1, match
, 0);
682 found
= !text_search_range_forward(txt
, start
, end
, regex
, 1, match
, 0);
685 return found
? match
[0].start
: pos
;
688 size_t text_search_backward(Text
*txt
, size_t pos
, Regex
*regex
) {
692 bool found
= !text_search_range_backward(txt
, start
, end
, regex
, 1, match
, 0);
696 end
= text_size(txt
);
697 found
= start
< end
&& !text_search_range_backward(txt
, start
, end
- start
, regex
, 1, match
, 0);
700 return found
? match
[0].start
: pos
;