lexer: assume .h is ANSI C, not C++
[vis.git] / text-objects.c
blobd7e806f6c746bc83db2af6616ab61b1ddcaaab21
1 #include <errno.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <ctype.h>
5 #include "text-motions.h"
6 #include "text-objects.h"
7 #include "text-util.h"
8 #include "util.h"
10 #define space(c) (isspace((unsigned char)c))
11 #define boundary(c) (isboundary((unsigned char)c))
13 Filerange text_object_entire(Text *txt, size_t pos) {
14 return text_range_new(0, text_size(txt));
17 Filerange text_object_entire_inner(Text *txt, size_t pos) {
18 char c;
19 Filerange r = text_object_entire(txt, pos);
20 Iterator it = text_iterator_get(txt, r.start);
21 while (text_iterator_byte_get(&it, &c) && (c == '\r' || c == '\n'))
22 text_iterator_byte_next(&it, NULL);
23 r.start = it.pos;
24 it = text_iterator_get(txt, r.end);
25 while (text_iterator_byte_prev(&it, &c) && (c == '\r' || c == '\n'));
26 r.end = it.pos;
27 return text_range_linewise(txt, &r);
30 static Filerange text_object_customword(Text *txt, size_t pos, int (*isboundary)(int)) {
31 Filerange r;
32 char c, prev = '0', next = '0';
33 Iterator it = text_iterator_get(txt, pos);
34 if (!text_iterator_byte_get(&it, &c))
35 return text_range_empty();
36 if (text_iterator_byte_prev(&it, &prev))
37 text_iterator_byte_next(&it, NULL);
38 text_iterator_byte_next(&it, &next);
39 if (space(c)) {
40 r.start = text_char_next(txt, text_customword_end_prev(txt, pos, isboundary));
41 r.end = text_customword_start_next(txt, pos, isboundary);
42 } else if (boundary(prev) && boundary(next)) {
43 if (boundary(c)) {
44 r.start = text_char_next(txt, text_customword_end_prev(txt, pos, isboundary));
45 r.end = text_char_next(txt, text_customword_end_next(txt, pos, isboundary));
46 } else {
47 /* on a single character */
48 r.start = pos;
49 r.end = text_char_next(txt, pos);
51 } else if (boundary(prev)) {
52 /* at start of a word */
53 r.start = pos;
54 r.end = text_char_next(txt, text_customword_end_next(txt, pos, isboundary));
55 } else if (boundary(next)) {
56 /* at end of a word */
57 r.start = text_customword_start_prev(txt, pos, isboundary);
58 r.end = text_char_next(txt, pos);
59 } else {
60 /* in the middle of a word */
61 r.start = text_customword_start_prev(txt, pos, isboundary);
62 r.end = text_char_next(txt, text_customword_end_next(txt, pos, isboundary));
65 return r;
68 Filerange text_object_word(Text *txt, size_t pos) {
69 return text_object_customword(txt, pos, is_word_boundary);
72 Filerange text_object_longword(Text *txt, size_t pos) {
73 return text_object_customword(txt, pos, isspace);
76 static Filerange text_object_customword_outer(Text *txt, size_t pos, int (*isboundary)(int)) {
77 Filerange r;
78 char c, prev = '0', next = '0';
79 Iterator it = text_iterator_get(txt, pos);
80 if (!text_iterator_byte_get(&it, &c))
81 return text_range_empty();
82 if (text_iterator_byte_prev(&it, &prev))
83 text_iterator_byte_next(&it, NULL);
84 text_iterator_byte_next(&it, &next);
85 if (space(c)) {
86 /* middle of two words, include leading white space */
87 r.start = text_char_next(txt, text_customword_end_prev(txt, pos, isboundary));
88 r.end = text_char_next(txt, text_customword_end_next(txt, pos, isboundary));
89 } else if (boundary(prev) && boundary(next)) {
90 if (boundary(c)) {
91 r.start = text_char_next(txt, text_customword_end_prev(txt, pos, isboundary));
92 r.end = text_word_start_next(txt, text_customword_end_next(txt, pos, isboundary));
93 } else {
94 /* on a single character */
95 r.start = pos;
96 r.end = text_customword_start_next(txt, pos, isboundary);
98 } else if (boundary(prev)) {
99 /* at start of a word */
100 r.start = pos;
101 r.end = text_customword_start_next(txt, text_customword_end_next(txt, pos, isboundary), isboundary);
102 } else if (boundary(next)) {
103 /* at end of a word */
104 r.start = text_customword_start_prev(txt, pos, isboundary);
105 r.end = text_customword_start_next(txt, pos, isboundary);
106 } else {
107 /* in the middle of a word */
108 r.start = text_customword_start_prev(txt, pos, isboundary);
109 r.end = text_customword_start_next(txt, text_customword_end_next(txt, pos, isboundary), isboundary);
112 return r;
115 Filerange text_object_longword_outer(Text *txt, size_t pos) {
116 return text_object_customword_outer(txt, pos, isspace);
119 Filerange text_object_word_outer(Text *txt, size_t pos) {
120 return text_object_customword_outer(txt, pos, is_word_boundary);
123 Filerange text_object_word_find_next(Text *txt, size_t pos, const char *word) {
124 size_t len = strlen(word);
125 for (;;) {
126 size_t match_pos = text_find_next(txt, pos, word);
127 if (match_pos != pos) {
128 Filerange match_word = text_object_word(txt, match_pos);
129 if (text_range_size(&match_word) == len)
130 return match_word;
131 pos = match_word.end;
132 } else {
133 return text_range_empty();
138 Filerange text_object_word_find_prev(Text *txt, size_t pos, const char *word) {
139 size_t len = strlen(word);
140 for (;;) {
141 size_t match_pos = text_find_prev(txt, pos, word);
142 if (match_pos != pos) {
143 Filerange match_word = text_object_word(txt, match_pos);
144 if (text_range_size(&match_word) == len)
145 return match_word;
146 pos = match_pos;
147 } else {
148 return text_range_empty();
153 Filerange text_object_line(Text *txt, size_t pos) {
154 Filerange r;
155 r.start = text_line_begin(txt, pos);
156 r.end = text_line_next(txt, pos);
157 return r;
160 Filerange text_object_line_inner(Text *txt, size_t pos) {
161 Filerange r = text_object_line(txt, pos);
162 return text_range_inner(txt, &r);
165 Filerange text_object_sentence(Text *txt, size_t pos) {
166 Filerange r;
167 r.start = text_sentence_prev(txt, pos);
168 r.end = text_sentence_next(txt, pos);
169 return r;
172 Filerange text_object_paragraph(Text *txt, size_t pos) {
173 Filerange r;
174 r.start = text_paragraph_prev(txt, pos);
175 r.end = text_paragraph_next(txt, pos);
176 return r;
179 static Filerange object_function(Text *txt, size_t pos) {
180 size_t start_prev = text_function_start_prev(txt, pos);
181 size_t end_next = text_function_end_next(txt, pos);
182 size_t start = text_function_start_next(txt, start_prev);
183 size_t end = text_function_end_prev(txt, end_next);
184 if (start == pos)
185 start_prev = pos;
186 if (end == pos)
187 end_next = pos;
188 if (text_function_end_next(txt, start_prev) == end_next &&
189 text_function_start_prev(txt, end_next) == start_prev) {
190 return text_range_new(start_prev, end_next);
192 return text_range_empty();
195 Filerange text_object_function(Text *txt, size_t pos) {
196 Filerange r = object_function(txt, pos);
197 if (!text_range_valid(&r))
198 return r;
199 r.end++;
200 return text_range_linewise(txt, &r);
203 Filerange text_object_function_inner(Text *txt, size_t pos) {
204 Filerange r = object_function(txt, pos);
205 if (!text_range_valid(&r))
206 return r;
207 return text_range_new(text_bracket_match(txt, r.end)+1, r.end);
210 static Filerange text_object_bracket(Text *txt, size_t pos, char type) {
211 char c, open, close;
212 int opened = 1, closed = 1;
213 Filerange r = text_range_empty();
215 switch (type) {
216 case '(': case ')': open = '('; close = ')'; break;
217 case '{': case '}': open = '{'; close = '}'; break;
218 case '[': case ']': open = '['; close = ']'; break;
219 case '<': case '>': open = '<'; close = '>'; break;
220 case '"': open = '"'; close = '"'; break;
221 case '`': open = '`'; close = '`'; break;
222 case '\'': open = '\''; close = '\''; break;
223 default: return r;
226 Iterator it = text_iterator_get(txt, pos);
228 if (open == close && text_iterator_byte_get(&it, &c) && (c == '"' || c == '`' || c == '\'')) {
229 size_t match = text_bracket_match(txt, pos);
230 r.start = MIN(pos, match) + 1;
231 r.end = MAX(pos, match);
232 return r;
235 while (text_iterator_byte_get(&it, &c)) {
236 if (c == open && --opened == 0) {
237 r.start = it.pos + 1;
238 break;
239 } else if (c == close && it.pos != pos) {
240 opened++;
242 text_iterator_byte_prev(&it, NULL);
245 it = text_iterator_get(txt, pos);
246 while (text_iterator_byte_get(&it, &c)) {
247 if (c == close && --closed == 0) {
248 r.end = it.pos;
249 break;
250 } else if (c == open && it.pos != pos) {
251 closed++;
253 text_iterator_byte_next(&it, NULL);
256 if (!text_range_valid(&r))
257 return text_range_empty();
258 return r;
261 Filerange text_object_square_bracket(Text *txt, size_t pos) {
262 return text_object_bracket(txt, pos, ']');
265 Filerange text_object_curly_bracket(Text *txt, size_t pos) {
266 return text_object_bracket(txt, pos, '}');
269 Filerange text_object_angle_bracket(Text *txt, size_t pos) {
270 return text_object_bracket(txt, pos, '>');
273 Filerange text_object_paranthese(Text *txt, size_t pos) {
274 return text_object_bracket(txt, pos, ')');
277 Filerange text_object_quote(Text *txt, size_t pos) {
278 return text_object_bracket(txt, pos, '"');
281 Filerange text_object_single_quote(Text *txt, size_t pos) {
282 return text_object_bracket(txt, pos, '\'');
285 Filerange text_object_backtick(Text *txt, size_t pos) {
286 return text_object_bracket(txt, pos, '`');
289 Filerange text_object_range(Text *txt, size_t pos, int (*isboundary)(int)) {
290 char c;
291 size_t start;
292 Iterator it = text_iterator_get(txt, pos), rit = it;
293 if (!text_iterator_byte_get(&rit, &c) || boundary(c))
294 return text_range_empty();
295 char tmp = c;
296 do start = rit.pos; while (text_iterator_char_prev(&rit, &c) && !boundary(c));
297 for (c = tmp; !boundary(c) && text_iterator_byte_next(&it, &c););
298 return text_range_new(start, it.pos);
301 static int is_number(int c) {
302 return !(c == '-' || c == 'x' || c == 'X' ||
303 ('0' <= c && c <= '9') ||
304 ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'));
307 Filerange text_object_number(Text *txt, size_t pos) {
308 char *buf, *err = NULL;
309 Filerange r = text_object_range(txt, pos, is_number);
310 if (!text_range_valid(&r))
311 return r;
312 if (!(buf = text_bytes_alloc0(txt, r.start, text_range_size(&r))))
313 return text_range_empty();
314 errno = 0;
315 strtoll(buf, &err, 0);
316 if (errno || err == buf)
317 r = text_range_empty();
318 else
319 r.end = r.start + (err - buf);
320 free(buf);
321 return r;
324 static int is_filename_boundary(int c) {
325 switch (c) {
326 case ';': case ':': case '|':
327 case '"': case '\'':
328 case '<': case '>':
329 return true;
330 default:
331 return isspace(c);
335 Filerange text_object_filename(Text *txt, size_t pos) {
336 return text_object_range(txt, pos, is_filename_boundary);
339 Filerange text_object_search_forward(Text *txt, size_t pos, Regex *regex) {
340 size_t start = pos;
341 size_t end = text_size(txt);
342 RegexMatch match[1];
343 bool found = start < end && !text_search_range_forward(txt, start, end - start, regex, 1, match, 0);
344 if (found)
345 return text_range_new(match[0].start, match[0].end);
346 return text_range_empty();
349 Filerange text_object_search_backward(Text *txt, size_t pos, Regex *regex) {
350 size_t start = 0;
351 size_t end = pos;
352 RegexMatch match[1];
353 bool found = !text_search_range_backward(txt, start, end, regex, 1, match, 0);
354 if (found)
355 return text_range_new(match[0].start, match[0].end);
356 return text_range_empty();
359 Filerange text_object_indentation(Text *txt, size_t pos) {
360 char c;
361 size_t bol = text_line_begin(txt, pos);
362 size_t sol = text_line_start(txt, bol);
363 size_t start = bol;
364 size_t end = text_line_next(txt, bol);
365 size_t line_indent = sol - bol;
366 bool line_empty = text_byte_get(txt, bol, &c) && (c == '\r' || c == '\n');
368 char *buf = text_bytes_alloc0(txt, bol, line_indent);
369 char *tmp = malloc(line_indent);
371 if (!buf || !tmp) {
372 free(buf);
373 free(tmp);
374 return text_range_empty();
377 while ((bol = text_line_begin(txt, text_line_prev(txt, start))) != start) {
378 sol = text_line_start(txt, bol);
379 size_t indent = sol - bol;
380 if (indent < line_indent)
381 break;
382 bool empty = text_byte_get(txt, bol, &c) && (c == '\r' || c == '\n');
383 if (line_empty && !empty)
384 break;
385 if (line_indent == 0 && empty)
386 break;
387 text_bytes_get(txt, bol, line_indent, tmp);
388 if (memcmp(buf, tmp, line_indent))
389 break;
390 start = bol;
393 do {
394 bol = end;
395 sol = text_line_start(txt, bol);
396 size_t indent = sol - bol;
397 if (indent < line_indent)
398 break;
399 bool empty = text_byte_get(txt, bol, &c) && (c == '\r' || c == '\n');
400 if (line_empty && !empty)
401 break;
402 if (line_indent == 0 && empty)
403 break;
404 text_bytes_get(txt, bol, line_indent, tmp);
405 if (memcmp(buf, tmp, line_indent))
406 break;
407 end = text_line_next(txt, bol);
408 } while (bol != end);
410 free(buf);
411 free(tmp);
412 return text_range_new(start, end);
415 Filerange text_range_linewise(Text *txt, Filerange *rin) {
416 Filerange rout = *rin;
417 rout.start = text_line_begin(txt, rin->start);
418 if (rin->end != text_line_begin(txt, rin->end))
419 rout.end = text_line_next(txt, rin->end);
420 return rout;
423 bool text_range_is_linewise(Text *txt, Filerange *r) {
424 return text_range_valid(r) &&
425 r->start == text_line_begin(txt, r->start) &&
426 r->end == text_line_begin(txt, r->end);
429 Filerange text_range_inner(Text *txt, Filerange *rin) {
430 char c;
431 Filerange r = *rin;
432 Iterator it = text_iterator_get(txt, rin->start);
433 while (text_iterator_byte_get(&it, &c) && space(c))
434 text_iterator_byte_next(&it, NULL);
435 r.start = it.pos;
436 it = text_iterator_get(txt, rin->end);
437 do r.end = it.pos; while (text_iterator_byte_prev(&it, &c) && space(c));
438 return r;