view: properly advance over incomplete unicode sequence
[vis.git] / text-util.c
blob3cc994e60a7d979c3733ec3e6b17c2643a895318
1 #include "text-util.h"
2 #include "util.h"
3 #include <wchar.h>
4 #include <errno.h>
5 #include <stdlib.h>
7 bool text_range_valid(const Filerange *r) {
8 return r->start != EPOS && r->end != EPOS && r->start <= r->end;
11 size_t text_range_size(const Filerange *r) {
12 return text_range_valid(r) ? r->end - r->start : 0;
15 Filerange text_range_empty(void) {
16 return (Filerange){ .start = EPOS, .end = EPOS };
19 Filerange text_range_union(const Filerange *r1, const Filerange *r2) {
20 if (!text_range_valid(r1))
21 return *r2;
22 if (!text_range_valid(r2))
23 return *r1;
24 return (Filerange) {
25 .start = MIN(r1->start, r2->start),
26 .end = MAX(r1->end, r2->end),
30 Filerange text_range_intersect(const Filerange *r1, const Filerange *r2) {
31 if (!text_range_overlap(r1, r2))
32 return text_range_empty();
33 return text_range_new(MAX(r1->start, r2->start), MIN(r1->end, r2->end));
36 Filerange text_range_new(size_t a, size_t b) {
37 return (Filerange) {
38 .start = MIN(a, b),
39 .end = MAX(a, b),
43 bool text_range_equal(const Filerange *r1, const Filerange *r2) {
44 if (!text_range_valid(r1) && !text_range_valid(r2))
45 return true;
46 return r1->start == r2->start && r1->end == r2->end;
49 bool text_range_overlap(const Filerange *r1, const Filerange *r2) {
50 if (!text_range_valid(r1) || !text_range_valid(r2))
51 return false;
52 return r1->start < r2->end && r2->start < r1->end;
55 bool text_range_contains(const Filerange *r, size_t pos) {
56 return text_range_valid(r) && r->start <= pos && pos <= r->end;
59 int text_char_count(const char *data, size_t len) {
60 int count = 0;
61 mbstate_t ps = { 0 };
62 while (len > 0) {
63 wchar_t wc;
64 size_t wclen = mbrtowc(&wc, data, len, &ps);
65 if (wclen == (size_t)-1 && errno == EILSEQ) {
66 ps = (mbstate_t){0};
67 count++;
68 while (!ISUTF8(*data))
69 data++, len--;
70 } else if (wclen == (size_t)-2) {
71 break;
72 } else if (wclen == 0) {
73 count++;
74 data++;
75 len--;
76 } else {
77 int width = wcwidth(wc);
78 if (width != 0)
79 count++;
80 data += wclen;
81 len -= wclen;
84 return count;
87 int text_string_width(const char *data, size_t len) {
89 int width = 0;
90 mbstate_t ps = { 0 };
91 const char *s = data;
93 while (len > 0) {
94 wchar_t wc;
95 size_t wclen = mbrtowc(&wc, s, len, &ps);
96 if (wclen == (size_t)-1 && errno == EILSEQ) {
97 ps = (mbstate_t){0};
98 /* assume a replacement symbol will be displayed */
99 width++;
100 wclen = 1;
101 } else if (wclen == (size_t)-2) {
102 /* do nothing, advance to next character */
103 wclen = 1;
104 } else if (wclen == 0) {
105 /* assume NUL byte will be displayed as ^@ */
106 width += 2;
107 wclen = 1;
108 } else if (wc == L'\t') {
109 width++;
110 wclen = 1;
111 } else {
112 int w = wcwidth(wc);
113 if (w == -1)
114 w = 2; /* assume non-printable will be displayed as ^{char} */
115 width += w;
117 len -= wclen;
118 s += wclen;
121 return width;