vis: refactor selections_match_next
[vis.git] / text-regex-tre.c
blobeccbafbc46cc69c98d7ae77cc55985af32a33604
1 #include <stdlib.h>
2 #include <string.h>
3 #include <wchar.h>
4 #include <errno.h>
6 #include "text-regex.h"
7 #include "text-motions.h"
9 struct Regex {
10 regex_t regex;
11 tre_str_source str_source;
12 Text *text;
13 Iterator it;
14 size_t end;
17 size_t text_regex_nsub(Regex *r) {
18 if (!r)
19 return 0;
20 return r->regex.re_nsub;
23 static int str_next_char(tre_char_t *c, unsigned int *pos_add, void *context) {
24 Regex *r = context;
25 Iterator *it = &r->it;
26 if (TRE_WCHAR) {
27 mbstate_t ps = { 0 };
28 bool eof = false;
29 size_t start = it->pos;
30 for (;;) {
31 if (it->pos >= r->end) {
32 eof = true;
33 break;
35 size_t rem = r->end - it->pos;
36 size_t plen = it->end - it->text;
37 size_t len = rem < plen ? rem : plen;
38 size_t wclen = mbrtowc(c, it->text, len, &ps);
39 if (wclen == (size_t)-1 && errno == EILSEQ) {
40 ps = (mbstate_t){0};
41 *c = L'\0';
42 text_iterator_codepoint_next(it, NULL);
43 break;
44 } else if (wclen == (size_t)-2) {
45 if (!text_iterator_next(it)) {
46 eof = true;
47 break;
49 } else if (wclen == 0) {
50 text_iterator_byte_next(it, NULL);
51 break;
52 } else {
53 if (wclen < plen) {
54 it->text += wclen;
55 it->pos += wclen;
56 } else {
57 text_iterator_next(it);
59 break;
63 if (eof) {
64 *c = L'\0';
65 *pos_add = 1;
66 return 1;
67 } else {
68 *pos_add = it->pos - start;
69 return 0;
71 } else {
72 *pos_add = 1;
73 if (it->pos < r->end && text_iterator_byte_get(it, (char*)c)) {
74 text_iterator_byte_next(it, NULL);
75 return 0;
76 } else {
77 *c = '\0';
78 return 1;
83 static void str_rewind(size_t pos, void *context) {
84 Regex *r = context;
85 r->it = text_iterator_get(r->text, pos);
88 static int str_compare(size_t pos1, size_t pos2, size_t len, void *context) {
89 Regex *r = context;
90 int ret = 1;
91 void *buf1 = malloc(len), *buf2 = malloc(len);
92 if (!buf1 || !buf2)
93 goto err;
94 text_bytes_get(r->text, pos1, len, buf1);
95 text_bytes_get(r->text, pos2, len, buf2);
96 ret = memcmp(buf1, buf2, len);
97 err:
98 free(buf1);
99 free(buf2);
100 return ret;
103 Regex *text_regex_new(void) {
104 Regex *r = calloc(1, sizeof(*r));
105 if (!r)
106 return NULL;
107 r->str_source = (tre_str_source) {
108 .get_next_char = str_next_char,
109 .rewind = str_rewind,
110 .compare = str_compare,
111 .context = r,
113 return r;
116 void text_regex_free(Regex *r) {
117 if (!r)
118 return;
119 tre_regfree(&r->regex);
120 free(r);
123 int text_regex_compile(Regex *regex, const char *string, int cflags) {
124 int r = tre_regcomp(&regex->regex, string, cflags);
125 if (r)
126 tre_regcomp(&regex->regex, "\0\0", 0);
127 return r;
130 int text_regex_match(Regex *r, const char *data, int eflags) {
131 return tre_regexec(&r->regex, data, 0, NULL, eflags);
134 int text_search_range_forward(Text *txt, size_t pos, size_t len, Regex *r, size_t nmatch, RegexMatch pmatch[], int eflags) {
135 r->text = txt;
136 r->it = text_iterator_get(txt, pos);
137 r->end = pos+len;
139 regmatch_t match[MAX_REGEX_SUB];
140 int ret = tre_reguexec(&r->regex, &r->str_source, nmatch, match, eflags);
141 if (!ret) {
142 for (size_t i = 0; i < nmatch; i++) {
143 pmatch[i].start = match[i].rm_so == -1 ? EPOS : pos + match[i].rm_so;
144 pmatch[i].end = match[i].rm_eo == -1 ? EPOS : pos + match[i].rm_eo;
147 return ret;
150 int text_search_range_backward(Text *txt, size_t pos, size_t len, Regex *r, size_t nmatch, RegexMatch pmatch[], int eflags) {
151 int ret = REG_NOMATCH;
152 size_t end = pos + len;
154 while (pos < end && !text_search_range_forward(txt, pos, len, r, nmatch, pmatch, eflags)) {
155 ret = 0;
156 // FIXME: assumes nmatch >= 1
157 size_t next = pmatch[0].end;
158 if (next == pos) {
159 next = text_line_next(txt, pos);
160 if (next == pos)
161 break;
163 pos = next;
164 len = end - pos;
166 char c;
167 if (text_byte_get(txt, pos-1, &c) && c == '\n')
168 eflags &= ~REG_NOTBOL;
169 else
170 eflags |= REG_NOTBOL;
173 return ret;