Merge branch 'csi_event' of https://github.com/ezdiy/vis into master
[vis.git] / text.c
blob2e7c7efa7684da4051df004259f76720b1e51645
1 #ifndef _GNU_SOURCE
2 #define _GNU_SOURCE /* memrchr(3) is non-standard */
3 #endif
4 #include <unistd.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <time.h>
9 #include <fcntl.h>
10 #include <errno.h>
11 #include <wchar.h>
12 #include <stdint.h>
13 #include <libgen.h>
14 #include <limits.h>
15 #include <sys/types.h>
16 #include <sys/stat.h>
17 #include <sys/mman.h>
18 #if CONFIG_ACL
19 #include <sys/acl.h>
20 #endif
21 #if CONFIG_SELINUX
22 #include <selinux/selinux.h>
23 #endif
25 #include "text.h"
26 #include "text-util.h"
27 #include "text-motions.h"
28 #include "util.h"
30 /* Allocate blocks holding the actual file content in chunks of size: */
31 #ifndef BLOCK_SIZE
32 #define BLOCK_SIZE (1 << 20)
33 #endif
34 /* Files smaller than this value are copied on load, larger ones are mmap(2)-ed
35 * directely. Hence the former can be truncated, while doing so on the latter
36 * results in havoc. */
37 #define BLOCK_MMAP_SIZE (1 << 26)
39 /* Block holding the file content, either readonly mmap(2)-ed from the original
40 * file or heap allocated to store the modifications.
42 typedef struct Block Block;
43 struct Block {
44 size_t size; /* maximal capacity */
45 size_t len; /* current used length / insertion position */
46 char *data; /* actual data */
47 enum { /* type of allocation */
48 MMAP_ORIG, /* mmap(2)-ed from an external file */
49 MMAP, /* mmap(2)-ed from a temporary file only known to this process */
50 MALLOC, /* heap allocated block using malloc(3) */
51 } type;
52 Block *next; /* next chunk */
55 /* A piece holds a reference (but doesn't itself store) a certain amount of data.
56 * All active pieces chained together form the whole content of the document.
57 * At the beginning there exists only one piece, spanning the whole document.
58 * Upon insertion/deletion new pieces will be created to represent the changes.
59 * Generally pieces are never destroyed, but kept around to peform undo/redo
60 * operations.
62 struct Piece {
63 Text *text; /* text to which this piece belongs */
64 Piece *prev, *next; /* pointers to the logical predecessor/successor */
65 Piece *global_prev; /* double linked list in order of allocation, */
66 Piece *global_next; /* used to free individual pieces */
67 const char *data; /* pointer into a Block holding the data */
68 size_t len; /* the length in number of bytes of the data */
71 /* used to transform a global position (byte offset starting from the beginning
72 * of the text) into an offset relative to a piece.
74 typedef struct {
75 Piece *piece; /* piece holding the location */
76 size_t off; /* offset into the piece in bytes */
77 } Location;
79 /* A Span holds a certain range of pieces. Changes to the document are always
80 * performed by swapping out an existing span with a new one.
82 typedef struct {
83 Piece *start, *end; /* start/end of the span */
84 size_t len; /* the sum of the lengths of the pieces which form this span */
85 } Span;
87 /* A Change keeps all needed information to redo/undo an insertion/deletion. */
88 typedef struct Change Change;
89 struct Change {
90 Span old; /* all pieces which are being modified/swapped out by the change */
91 Span new; /* all pieces which are introduced/swapped in by the change */
92 size_t pos; /* absolute position at which the change occured */
93 Change *next; /* next change which is part of the same revision */
94 Change *prev; /* previous change which is part of the same revision */
97 /* A Revision is a list of Changes which are used to undo/redo all modifications
98 * since the last snapshot operation. Revisions are stored in a directed graph structure.
100 typedef struct Revision Revision;
101 struct Revision {
102 Change *change; /* the most recent change */
103 Revision *next; /* the next (child) revision in the undo tree */
104 Revision *prev; /* the previous (parent) revision in the undo tree */
105 Revision *earlier; /* the previous Revision, chronologically */
106 Revision *later; /* the next Revision, chronologically */
107 time_t time; /* when the first change of this revision was performed */
108 size_t seq; /* a unique, strictly increasing identifier */
111 typedef struct {
112 size_t pos; /* position in bytes from start of file */
113 size_t lineno; /* line number in file i.e. number of '\n' in [0, pos) */
114 } LineCache;
116 /* The main struct holding all information of a given file */
117 struct Text {
118 Block *block; /* original file content at the time of load operation */
119 Block *blocks; /* all blocks which have been allocated to hold insertion data */
120 Piece *pieces; /* all pieces which have been allocated, used to free them */
121 Piece *cache; /* most recently modified piece */
122 Piece begin, end; /* sentinel nodes which always exists but don't hold any data */
123 Revision *history; /* undo tree */
124 Revision *current_revision; /* revision holding all file changes until a snapshot is performed */
125 Revision *last_revision; /* the last revision added to the tree, chronologically */
126 Revision *saved_revision; /* the last revision at the time of the save operation */
127 size_t size; /* current file content size in bytes */
128 struct stat info; /* stat as probed at load time */
129 LineCache lines; /* mapping between absolute pos in bytes and logical line breaks */
132 struct TextSave { /* used to hold context between text_save_{begin,commit} calls */
133 Text *txt; /* text to operate on */
134 char *filename; /* filename to save to as given to text_save_begin */
135 char *tmpname; /* temporary name used for atomic rename(2) */
136 int fd; /* file descriptor to write data to using text_save_write */
137 int dirfd; /* directory file descriptor, relative to which we save */
138 enum TextSaveMethod type; /* method used to save file */
141 /* block management */
142 static Block *block_alloc(Text*, size_t size);
143 static Block *block_read(Text*, size_t size, int fd);
144 static Block *block_mmap(Text*, size_t size, int fd, off_t offset);
145 static void block_free(Block*);
146 static bool block_capacity(Block*, size_t len);
147 static const char *block_append(Block*, const char *data, size_t len);
148 static bool block_insert(Block*, size_t pos, const char *data, size_t len);
149 static bool block_delete(Block*, size_t pos, size_t len);
150 static const char *block_store(Text*, const char *data, size_t len);
151 /* cache layer */
152 static void cache_piece(Text *txt, Piece *p);
153 static bool cache_contains(Text *txt, Piece *p);
154 static bool cache_insert(Text *txt, Piece *p, size_t off, const char *data, size_t len);
155 static bool cache_delete(Text *txt, Piece *p, size_t off, size_t len);
156 /* piece management */
157 static Piece *piece_alloc(Text *txt);
158 static void piece_free(Piece *p);
159 static void piece_init(Piece *p, Piece *prev, Piece *next, const char *data, size_t len);
160 static Location piece_get_intern(Text *txt, size_t pos);
161 static Location piece_get_extern(Text *txt, size_t pos);
162 /* span management */
163 static void span_init(Span *span, Piece *start, Piece *end);
164 static void span_swap(Text *txt, Span *old, Span *new);
165 /* change management */
166 static Change *change_alloc(Text *txt, size_t pos);
167 static void change_free(Change *c);
168 /* revision management */
169 static Revision *revision_alloc(Text *txt);
170 static void revision_free(Revision *rev);
171 /* logical line counting cache */
172 static void lineno_cache_invalidate(LineCache *cache);
173 static size_t lines_skip_forward(Text *txt, size_t pos, size_t lines, size_t *lines_skiped);
174 static size_t lines_count(Text *txt, size_t pos, size_t len);
176 static ssize_t write_all(int fd, const char *buf, size_t count) {
177 size_t rem = count;
178 while (rem > 0) {
179 ssize_t written = write(fd, buf, rem > INT_MAX ? INT_MAX : rem);
180 if (written < 0) {
181 if (errno == EAGAIN || errno == EINTR)
182 continue;
183 return -1;
184 } else if (written == 0) {
185 break;
187 rem -= written;
188 buf += written;
190 return count - rem;
193 /* allocate a new block of MAX(size, BLOCK_SIZE) bytes */
194 static Block *block_alloc(Text *txt, size_t size) {
195 Block *blk = calloc(1, sizeof *blk);
196 if (!blk)
197 return NULL;
198 if (BLOCK_SIZE > size)
199 size = BLOCK_SIZE;
200 if (!(blk->data = malloc(size))) {
201 free(blk);
202 return NULL;
204 blk->type = MALLOC;
205 blk->size = size;
206 blk->next = txt->blocks;
207 txt->blocks = blk;
208 return blk;
211 static Block *block_read(Text *txt, size_t size, int fd) {
212 Block *blk = block_alloc(txt, size);
213 if (!blk)
214 return NULL;
215 while (size > 0) {
216 char data[4096];
217 ssize_t len = read(fd, data, MIN(sizeof(data), size));
218 if (len == -1) {
219 txt->blocks = blk->next;
220 block_free(blk);
221 return NULL;
222 } else if (len == 0) {
223 break;
224 } else {
225 block_append(blk, data, len);
226 size -= len;
229 return blk;
232 static Block *block_mmap(Text *txt, size_t size, int fd, off_t offset) {
233 Block *blk = calloc(1, sizeof *blk);
234 if (!blk)
235 return NULL;
236 if (size) {
237 blk->data = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, offset);
238 if (blk->data == MAP_FAILED) {
239 free(blk);
240 return NULL;
243 blk->type = MMAP_ORIG;
244 blk->size = size;
245 blk->len = size;
246 blk->next = txt->blocks;
247 txt->blocks = blk;
248 return blk;
251 static void block_free(Block *blk) {
252 if (!blk)
253 return;
254 if (blk->type == MALLOC)
255 free(blk->data);
256 else if ((blk->type == MMAP_ORIG || blk->type == MMAP) && blk->data)
257 munmap(blk->data, blk->size);
258 free(blk);
261 /* check whether block has enough free space to store len bytes */
262 static bool block_capacity(Block *blk, size_t len) {
263 return blk->size - blk->len >= len;
266 /* append data to block, assumes there is enough space available */
267 static const char *block_append(Block *blk, const char *data, size_t len) {
268 char *dest = memcpy(blk->data + blk->len, data, len);
269 blk->len += len;
270 return dest;
273 /* stores the given data in a block, allocates a new one if necessary. returns
274 * a pointer to the storage location or NULL if allocation failed. */
275 static const char *block_store(Text *txt, const char *data, size_t len) {
276 Block *blk = txt->blocks;
277 if ((!blk || !block_capacity(blk, len)) && !(blk = block_alloc(txt, len)))
278 return NULL;
279 return block_append(blk, data, len);
282 /* insert data into block at an arbitrary position, this should only be used with
283 * data of the most recently created piece. */
284 static bool block_insert(Block *blk, size_t pos, const char *data, size_t len) {
285 if (pos > blk->len || !block_capacity(blk, len))
286 return false;
287 if (blk->len == pos)
288 return block_append(blk, data, len);
289 char *insert = blk->data + pos;
290 memmove(insert + len, insert, blk->len - pos);
291 memcpy(insert, data, len);
292 blk->len += len;
293 return true;
296 /* delete data from a block at an arbitrary position, this should only be used with
297 * data of the most recently created piece. */
298 static bool block_delete(Block *blk, size_t pos, size_t len) {
299 size_t end;
300 if (!addu(pos, len, &end) || end > blk->len)
301 return false;
302 if (blk->len == pos) {
303 blk->len -= len;
304 return true;
306 char *delete = blk->data + pos;
307 memmove(delete, delete + len, blk->len - pos - len);
308 blk->len -= len;
309 return true;
312 /* cache the given piece if it is the most recently changed one */
313 static void cache_piece(Text *txt, Piece *p) {
314 Block *blk = txt->blocks;
315 if (!blk || p->data < blk->data || p->data + p->len != blk->data + blk->len)
316 return;
317 txt->cache = p;
320 /* check whether the given piece was the most recently modified one */
321 static bool cache_contains(Text *txt, Piece *p) {
322 Block *blk = txt->blocks;
323 Revision *rev = txt->current_revision;
324 if (!blk || !txt->cache || txt->cache != p || !rev || !rev->change)
325 return false;
327 Piece *start = rev->change->new.start;
328 Piece *end = rev->change->new.end;
329 bool found = false;
330 for (Piece *cur = start; !found; cur = cur->next) {
331 if (cur == p)
332 found = true;
333 if (cur == end)
334 break;
337 return found && p->data + p->len == blk->data + blk->len;
340 /* try to insert a chunk of data at a given piece offset. the insertion is only
341 * performed if the piece is the most recenetly changed one. the legnth of the
342 * piece, the span containing it and the whole text is adjusted accordingly */
343 static bool cache_insert(Text *txt, Piece *p, size_t off, const char *data, size_t len) {
344 if (!cache_contains(txt, p))
345 return false;
346 Block *blk = txt->blocks;
347 size_t bufpos = p->data + off - blk->data;
348 if (!block_insert(blk, bufpos, data, len))
349 return false;
350 p->len += len;
351 txt->current_revision->change->new.len += len;
352 txt->size += len;
353 return true;
356 /* try to delete a chunk of data at a given piece offset. the deletion is only
357 * performed if the piece is the most recenetly changed one and the whole
358 * affected range lies within it. the legnth of the piece, the span containing it
359 * and the whole text is adjusted accordingly */
360 static bool cache_delete(Text *txt, Piece *p, size_t off, size_t len) {
361 if (!cache_contains(txt, p))
362 return false;
363 Block *blk = txt->blocks;
364 size_t end;
365 size_t bufpos = p->data + off - blk->data;
366 if (!addu(off, len, &end) || end > p->len || !block_delete(blk, bufpos, len))
367 return false;
368 p->len -= len;
369 txt->current_revision->change->new.len -= len;
370 txt->size -= len;
371 return true;
374 /* initialize a span and calculate its length */
375 static void span_init(Span *span, Piece *start, Piece *end) {
376 size_t len = 0;
377 span->start = start;
378 span->end = end;
379 for (Piece *p = start; p; p = p->next) {
380 len += p->len;
381 if (p == end)
382 break;
384 span->len = len;
387 /* swap out an old span and replace it with a new one.
389 * - if old is an empty span do not remove anything, just insert the new one
390 * - if new is an empty span do not insert anything, just remove the old one
392 * adjusts the document size accordingly.
394 static void span_swap(Text *txt, Span *old, Span *new) {
395 if (old->len == 0 && new->len == 0) {
396 return;
397 } else if (old->len == 0) {
398 /* insert new span */
399 new->start->prev->next = new->start;
400 new->end->next->prev = new->end;
401 } else if (new->len == 0) {
402 /* delete old span */
403 old->start->prev->next = old->end->next;
404 old->end->next->prev = old->start->prev;
405 } else {
406 /* replace old with new */
407 old->start->prev->next = new->start;
408 old->end->next->prev = new->end;
410 txt->size -= old->len;
411 txt->size += new->len;
414 /* Allocate a new revision and place it in the revision graph.
415 * All further changes will be associated with this revision. */
416 static Revision *revision_alloc(Text *txt) {
417 Revision *rev = calloc(1, sizeof *rev);
418 if (!rev)
419 return NULL;
420 rev->time = time(NULL);
421 txt->current_revision = rev;
423 /* set sequence number */
424 if (!txt->last_revision)
425 rev->seq = 0;
426 else
427 rev->seq = txt->last_revision->seq + 1;
429 /* set earlier, later pointers */
430 if (txt->last_revision)
431 txt->last_revision->later = rev;
432 rev->earlier = txt->last_revision;
434 if (!txt->history) {
435 txt->history = rev;
436 return rev;
439 /* set prev, next pointers */
440 rev->prev = txt->history;
441 txt->history->next = rev;
442 txt->history = rev;
443 return rev;
446 static void revision_free(Revision *rev) {
447 if (!rev)
448 return;
449 for (Change *next, *c = rev->change; c; c = next) {
450 next = c->next;
451 change_free(c);
453 free(rev);
456 static Piece *piece_alloc(Text *txt) {
457 Piece *p = calloc(1, sizeof *p);
458 if (!p)
459 return NULL;
460 p->text = txt;
461 p->global_next = txt->pieces;
462 if (txt->pieces)
463 txt->pieces->global_prev = p;
464 txt->pieces = p;
465 return p;
468 static void piece_free(Piece *p) {
469 if (!p)
470 return;
471 if (p->global_prev)
472 p->global_prev->global_next = p->global_next;
473 if (p->global_next)
474 p->global_next->global_prev = p->global_prev;
475 if (p->text->pieces == p)
476 p->text->pieces = p->global_next;
477 if (p->text->cache == p)
478 p->text->cache = NULL;
479 free(p);
482 static void piece_init(Piece *p, Piece *prev, Piece *next, const char *data, size_t len) {
483 p->prev = prev;
484 p->next = next;
485 p->data = data;
486 p->len = len;
489 /* returns the piece holding the text at byte offset pos. if pos happens to
490 * be at a piece boundry i.e. the first byte of a piece then the previous piece
491 * to the left is returned with an offset of piece->len. this is convenient for
492 * modifications to the piece chain where both pieces (the returned one and the
493 * one following it) are needed, but unsuitable as a public interface.
495 * in particular if pos is zero, the begin sentinel piece is returned.
497 static Location piece_get_intern(Text *txt, size_t pos) {
498 size_t cur = 0;
499 for (Piece *p = &txt->begin; p->next; p = p->next) {
500 if (cur <= pos && pos <= cur + p->len)
501 return (Location){ .piece = p, .off = pos - cur };
502 cur += p->len;
505 return (Location){ 0 };
508 /* similiar to piece_get_intern but usable as a public API. returns the piece
509 * holding the text at byte offset pos. never returns a sentinel piece.
510 * it pos is the end of file (== text_size()) and the file is not empty then
511 * the last piece holding data is returned.
513 static Location piece_get_extern(Text *txt, size_t pos) {
514 size_t cur = 0;
515 Piece *p;
517 for (p = txt->begin.next; p->next; p = p->next) {
518 if (cur <= pos && pos < cur + p->len)
519 return (Location){ .piece = p, .off = pos - cur };
520 cur += p->len;
523 if (cur == pos)
524 return (Location){ .piece = p->prev, .off = p->prev->len };
526 return (Location){ 0 };
529 /* allocate a new change, associate it with current revision or a newly
530 * allocated one if none exists. */
531 static Change *change_alloc(Text *txt, size_t pos) {
532 Revision *rev = txt->current_revision;
533 if (!rev) {
534 rev = revision_alloc(txt);
535 if (!rev)
536 return NULL;
538 Change *c = calloc(1, sizeof *c);
539 if (!c)
540 return NULL;
541 c->pos = pos;
542 c->next = rev->change;
543 if (rev->change)
544 rev->change->prev = c;
545 rev->change = c;
546 return c;
549 static void change_free(Change *c) {
550 if (!c)
551 return;
552 /* only free the new part of the span, the old one is still in use */
553 if (c->new.start != c->new.end)
554 piece_free(c->new.end);
555 piece_free(c->new.start);
556 free(c);
559 /* When inserting new data there are 2 cases to consider.
561 * - in the first the insertion point falls into the middle of an exisiting
562 * piece which is replaced by three new pieces:
564 * /-+ --> +---------------+ --> +-\
565 * | | | existing text | | |
566 * \-+ <-- +---------------+ <-- +-/
568 * Insertion point for "demo "
570 * /-+ --> +---------+ --> +-----+ --> +-----+ --> +-\
571 * | | | existing| |demo | |text | | |
572 * \-+ <-- +---------+ <-- +-----+ <-- +-----+ <-- +-/
574 * - the second case deals with an insertion point at a piece boundry:
576 * /-+ --> +---------------+ --> +-\
577 * | | | existing text | | |
578 * \-+ <-- +---------------+ <-- +-/
580 * Insertion point for "short"
582 * /-+ --> +-----+ --> +---------------+ --> +-\
583 * | | |short| | existing text | | |
584 * \-+ <-- +-----+ <-- +---------------+ <-- +-/
586 bool text_insert(Text *txt, size_t pos, const char *data, size_t len) {
587 if (len == 0)
588 return true;
589 if (pos > txt->size)
590 return false;
591 if (pos < txt->lines.pos)
592 lineno_cache_invalidate(&txt->lines);
594 Location loc = piece_get_intern(txt, pos);
595 Piece *p = loc.piece;
596 if (!p)
597 return false;
598 size_t off = loc.off;
599 if (cache_insert(txt, p, off, data, len))
600 return true;
602 Change *c = change_alloc(txt, pos);
603 if (!c)
604 return false;
606 if (!(data = block_store(txt, data, len)))
607 return false;
609 Piece *new = NULL;
611 if (off == p->len) {
612 /* insert between two existing pieces, hence there is nothing to
613 * remove, just add a new piece holding the extra text */
614 if (!(new = piece_alloc(txt)))
615 return false;
616 piece_init(new, p, p->next, data, len);
617 span_init(&c->new, new, new);
618 span_init(&c->old, NULL, NULL);
619 } else {
620 /* insert into middle of an existing piece, therfore split the old
621 * piece. that is we have 3 new pieces one containing the content
622 * before the insertion point then one holding the newly inserted
623 * text and one holding the content after the insertion point.
625 Piece *before = piece_alloc(txt);
626 new = piece_alloc(txt);
627 Piece *after = piece_alloc(txt);
628 if (!before || !new || !after)
629 return false;
630 piece_init(before, p->prev, new, p->data, off);
631 piece_init(new, before, after, data, len);
632 piece_init(after, new, p->next, p->data + off, p->len - off);
634 span_init(&c->new, before, after);
635 span_init(&c->old, p, p);
638 cache_piece(txt, new);
639 span_swap(txt, &c->old, &c->new);
640 return true;
643 static bool text_vprintf(Text *txt, size_t pos, const char *format, va_list ap) {
644 va_list ap_save;
645 va_copy(ap_save, ap);
646 int len = vsnprintf(NULL, 0, format, ap);
647 if (len == -1) {
648 va_end(ap_save);
649 return false;
651 char *buf = malloc(len+1);
652 bool ret = buf && (vsnprintf(buf, len+1, format, ap_save) == len) && text_insert(txt, pos, buf, len);
653 free(buf);
654 va_end(ap_save);
655 return ret;
658 bool text_appendf(Text *txt, const char *format, ...) {
659 va_list ap;
660 va_start(ap, format);
661 bool ret = text_vprintf(txt, text_size(txt), format, ap);
662 va_end(ap);
663 return ret;
666 bool text_printf(Text *txt, size_t pos, const char *format, ...) {
667 va_list ap;
668 va_start(ap, format);
669 bool ret = text_vprintf(txt, pos, format, ap);
670 va_end(ap);
671 return ret;
674 static size_t revision_undo(Text *txt, Revision *rev) {
675 size_t pos = EPOS;
676 for (Change *c = rev->change; c; c = c->next) {
677 span_swap(txt, &c->new, &c->old);
678 pos = c->pos;
680 return pos;
683 static size_t revision_redo(Text *txt, Revision *rev) {
684 size_t pos = EPOS;
685 Change *c = rev->change;
686 while (c->next)
687 c = c->next;
688 for ( ; c; c = c->prev) {
689 span_swap(txt, &c->old, &c->new);
690 pos = c->pos;
691 if (c->new.len > c->old.len)
692 pos += c->new.len - c->old.len;
694 return pos;
697 size_t text_undo(Text *txt) {
698 size_t pos = EPOS;
699 /* taking rev snapshot makes sure that txt->current_revision is reset */
700 text_snapshot(txt);
701 Revision *rev = txt->history->prev;
702 if (!rev)
703 return pos;
704 pos = revision_undo(txt, txt->history);
705 txt->history = rev;
706 lineno_cache_invalidate(&txt->lines);
707 return pos;
710 size_t text_redo(Text *txt) {
711 size_t pos = EPOS;
712 /* taking a snapshot makes sure that txt->current_revision is reset */
713 text_snapshot(txt);
714 Revision *rev = txt->history->next;
715 if (!rev)
716 return pos;
717 pos = revision_redo(txt, rev);
718 txt->history = rev;
719 lineno_cache_invalidate(&txt->lines);
720 return pos;
723 static bool history_change_branch(Revision *rev) {
724 bool changed = false;
725 while (rev->prev) {
726 if (rev->prev->next != rev) {
727 rev->prev->next = rev;
728 changed = true;
730 rev = rev->prev;
732 return changed;
735 static size_t history_traverse_to(Text *txt, Revision *rev) {
736 size_t pos = EPOS;
737 if (!rev)
738 return pos;
739 bool changed = history_change_branch(rev);
740 if (!changed) {
741 if (rev->seq == txt->history->seq) {
742 return txt->lines.pos;
743 } else if (rev->seq > txt->history->seq) {
744 while (txt->history != rev)
745 pos = text_redo(txt);
746 return pos;
747 } else if (rev->seq < txt->history->seq) {
748 while (txt->history != rev)
749 pos = text_undo(txt);
750 return pos;
752 } else {
753 while (txt->history->prev && txt->history->prev->next == txt->history)
754 text_undo(txt);
755 pos = text_undo(txt);
756 while (txt->history != rev)
757 pos = text_redo(txt);
758 return pos;
760 return pos;
763 size_t text_earlier(Text *txt) {
764 return history_traverse_to(txt, txt->history->earlier);
767 size_t text_later(Text *txt) {
768 return history_traverse_to(txt, txt->history->later);
771 size_t text_restore(Text *txt, time_t time) {
772 Revision *rev = txt->history;
773 while (time < rev->time && rev->earlier)
774 rev = rev->earlier;
775 while (time > rev->time && rev->later)
776 rev = rev->later;
777 time_t diff = labs(rev->time - time);
778 if (rev->earlier && rev->earlier != txt->history && labs(rev->earlier->time - time) < diff)
779 rev = rev->earlier;
780 if (rev->later && rev->later != txt->history && labs(rev->later->time - time) < diff)
781 rev = rev->later;
782 return history_traverse_to(txt, rev);
785 time_t text_state(Text *txt) {
786 return txt->history->time;
789 static bool preserve_acl(int src, int dest) {
790 #if CONFIG_ACL
791 acl_t acl = acl_get_fd(src);
792 if (!acl)
793 return errno == ENOTSUP ? true : false;
794 if (acl_set_fd(dest, acl) == -1) {
795 acl_free(acl);
796 return false;
798 acl_free(acl);
799 #endif /* CONFIG_ACL */
800 return true;
803 static bool preserve_selinux_context(int src, int dest) {
804 #if CONFIG_SELINUX
805 char *context = NULL;
806 if (!is_selinux_enabled())
807 return true;
808 if (fgetfilecon(src, &context) == -1)
809 return errno == ENOTSUP ? true : false;
810 if (fsetfilecon(dest, context) == -1) {
811 freecon(context);
812 return false;
814 freecon(context);
815 #endif /* CONFIG_SELINUX */
816 return true;
819 static int mkstempat(int dirfd, char *template) {
820 if (dirfd == AT_FDCWD)
821 return mkstemp(template);
822 // FIXME: not thread safe
823 int fd = -1;
824 int cwd = open(".", O_RDONLY|O_DIRECTORY);
825 if (cwd == -1)
826 goto err;
827 if (fchdir(dirfd) == -1)
828 goto err;
829 fd = mkstemp(template);
830 err:
831 if (cwd != -1) {
832 fchdir(cwd);
833 close(cwd);
835 return fd;
838 /* Create a new file named `.filename.vis.XXXXXX` (where `XXXXXX` is a
839 * randomly generated, unique suffix) and try to preserve all important
840 * meta data. After the file content has been written to this temporary
841 * file, text_save_commit_atomic will atomically move it to its final
842 * (possibly already existing) destination using rename(2).
844 * This approach does not work if:
846 * - the file is a symbolic link
847 * - the file is a hard link
848 * - file ownership can not be preserved
849 * - file group can not be preserved
850 * - directory permissions do not allow creation of a new file
851 * - POSXI ACL can not be preserved (if enabled)
852 * - SELinux security context can not be preserved (if enabled)
854 static bool text_save_begin_atomic(TextSave *ctx) {
855 int oldfd, saved_errno;
856 if ((oldfd = openat(ctx->dirfd, ctx->filename, O_RDONLY)) == -1 && errno != ENOENT)
857 goto err;
858 struct stat oldmeta = { 0 };
859 if (oldfd != -1 && fstatat(ctx->dirfd, ctx->filename, &oldmeta, AT_SYMLINK_NOFOLLOW) == -1)
860 goto err;
861 if (oldfd != -1) {
862 if (S_ISLNK(oldmeta.st_mode)) /* symbolic link */
863 goto err;
864 if (oldmeta.st_nlink > 1) /* hard link */
865 goto err;
868 char suffix[] = ".vis.XXXXXX";
869 size_t len = strlen(ctx->filename) + sizeof("./.") + sizeof(suffix);
870 char *dir = strdup(ctx->filename);
871 char *base = strdup(ctx->filename);
873 if (!(ctx->tmpname = malloc(len)) || !dir || !base) {
874 free(dir);
875 free(base);
876 goto err;
879 snprintf(ctx->tmpname, len, "%s/.%s%s", dirname(dir), basename(base), suffix);
880 free(dir);
881 free(base);
883 if ((ctx->fd = mkstempat(ctx->dirfd, ctx->tmpname)) == -1)
884 goto err;
886 if (oldfd == -1) {
887 mode_t mask = umask(0);
888 umask(mask);
889 if (fchmod(ctx->fd, 0666 & ~mask) == -1)
890 goto err;
891 } else {
892 if (fchmod(ctx->fd, oldmeta.st_mode) == -1)
893 goto err;
894 if (!preserve_acl(oldfd, ctx->fd) || !preserve_selinux_context(oldfd, ctx->fd))
895 goto err;
896 /* change owner if necessary */
897 if (oldmeta.st_uid != getuid() && fchown(ctx->fd, oldmeta.st_uid, (uid_t)-1) == -1)
898 goto err;
899 /* change group if necessary, in case of failure some editors reset
900 * the group permissions to the same as for others */
901 if (oldmeta.st_gid != getgid() && fchown(ctx->fd, (uid_t)-1, oldmeta.st_gid) == -1)
902 goto err;
903 close(oldfd);
906 ctx->type = TEXT_SAVE_ATOMIC;
907 return true;
908 err:
909 saved_errno = errno;
910 if (oldfd != -1)
911 close(oldfd);
912 if (ctx->fd != -1)
913 close(ctx->fd);
914 ctx->fd = -1;
915 free(ctx->tmpname);
916 ctx->tmpname = NULL;
917 errno = saved_errno;
918 return false;
921 static bool text_save_commit_atomic(TextSave *ctx) {
922 if (fsync(ctx->fd) == -1)
923 return false;
925 struct stat meta = { 0 };
926 if (fstat(ctx->fd, &meta) == -1)
927 return false;
929 bool close_failed = (close(ctx->fd) == -1);
930 ctx->fd = -1;
931 if (close_failed)
932 return false;
934 if (renameat(ctx->dirfd, ctx->tmpname, ctx->dirfd, ctx->filename) == -1)
935 return false;
937 free(ctx->tmpname);
938 ctx->tmpname = NULL;
940 int dir = openat(ctx->dirfd, dirname(ctx->filename), O_DIRECTORY|O_RDONLY);
941 if (dir == -1)
942 return false;
944 if (fsync(dir) == -1 && errno != EINVAL) {
945 close(dir);
946 return false;
949 if (close(dir) == -1)
950 return false;
952 if (meta.st_mtime)
953 ctx->txt->info = meta;
954 return true;
957 static bool text_save_begin_inplace(TextSave *ctx) {
958 Text *txt = ctx->txt;
959 struct stat meta = { 0 };
960 int newfd = -1, saved_errno;
961 if ((ctx->fd = openat(ctx->dirfd, ctx->filename, O_CREAT|O_WRONLY, 0666)) == -1)
962 goto err;
963 if (fstat(ctx->fd, &meta) == -1)
964 goto err;
965 Block *block = txt->block;
966 if (meta.st_dev == txt->info.st_dev && meta.st_ino == txt->info.st_ino &&
967 block && block->type == MMAP_ORIG && block->size) {
968 /* The file we are going to overwrite is currently mmap-ed from
969 * text_load, therefore we copy the mmap-ed block to a temporary
970 * file and remap it at the same position such that all pointers
971 * from the various pieces are still valid.
973 size_t size = block->size;
974 char tmpname[32] = "/tmp/vis-XXXXXX";
975 newfd = mkstemp(tmpname);
976 if (newfd == -1)
977 goto err;
978 if (unlink(tmpname) == -1)
979 goto err;
980 ssize_t written = write_all(newfd, block->data, size);
981 if (written == -1 || (size_t)written != size)
982 goto err;
983 void *data = mmap(block->data, size, PROT_READ, MAP_SHARED|MAP_FIXED, newfd, 0);
984 if (data == MAP_FAILED)
985 goto err;
986 bool close_failed = (close(newfd) == -1);
987 newfd = -1;
988 if (close_failed)
989 goto err;
990 block->type = MMAP;
992 /* overwrite the existing file content, if something goes wrong
993 * here we are screwed, TODO: make a backup before? */
994 if (ftruncate(ctx->fd, 0) == -1)
995 goto err;
996 ctx->type = TEXT_SAVE_INPLACE;
997 return true;
998 err:
999 saved_errno = errno;
1000 if (newfd != -1)
1001 close(newfd);
1002 if (ctx->fd != -1)
1003 close(ctx->fd);
1004 ctx->fd = -1;
1005 errno = saved_errno;
1006 return false;
1009 static bool text_save_commit_inplace(TextSave *ctx) {
1010 if (fsync(ctx->fd) == -1)
1011 return false;
1012 struct stat meta = { 0 };
1013 if (fstat(ctx->fd, &meta) == -1)
1014 return false;
1015 if (close(ctx->fd) == -1)
1016 return false;
1017 ctx->txt->info = meta;
1018 return true;
1021 TextSave *text_save_begin(Text *txt, int dirfd, const char *filename, enum TextSaveMethod type) {
1022 if (!filename)
1023 return NULL;
1024 TextSave *ctx = calloc(1, sizeof *ctx);
1025 if (!ctx)
1026 return NULL;
1027 ctx->txt = txt;
1028 ctx->fd = -1;
1029 ctx->dirfd = dirfd;
1030 if (!(ctx->filename = strdup(filename)))
1031 goto err;
1032 errno = 0;
1033 if ((type == TEXT_SAVE_AUTO || type == TEXT_SAVE_ATOMIC) && text_save_begin_atomic(ctx))
1034 return ctx;
1035 if (errno == ENOSPC)
1036 goto err;
1037 if ((type == TEXT_SAVE_AUTO || type == TEXT_SAVE_INPLACE) && text_save_begin_inplace(ctx))
1038 return ctx;
1039 err:
1040 text_save_cancel(ctx);
1041 return NULL;
1044 bool text_save_commit(TextSave *ctx) {
1045 if (!ctx)
1046 return true;
1047 bool ret;
1048 Text *txt = ctx->txt;
1049 switch (ctx->type) {
1050 case TEXT_SAVE_ATOMIC:
1051 ret = text_save_commit_atomic(ctx);
1052 break;
1053 case TEXT_SAVE_INPLACE:
1054 ret = text_save_commit_inplace(ctx);
1055 break;
1056 default:
1057 ret = false;
1058 break;
1061 if (ret) {
1062 txt->saved_revision = txt->history;
1063 text_snapshot(txt);
1065 text_save_cancel(ctx);
1066 return ret;
1069 void text_save_cancel(TextSave *ctx) {
1070 if (!ctx)
1071 return;
1072 int saved_errno = errno;
1073 if (ctx->fd != -1)
1074 close(ctx->fd);
1075 if (ctx->tmpname && ctx->tmpname[0])
1076 unlinkat(ctx->dirfd, ctx->tmpname, 0);
1077 free(ctx->tmpname);
1078 free(ctx->filename);
1079 free(ctx);
1080 errno = saved_errno;
1083 /* First try to save the file atomically using rename(2) if this does not
1084 * work overwrite the file in place. However if something goes wrong during
1085 * this overwrite the original file is permanently damaged.
1087 bool text_save(Text *txt, const char *filename) {
1088 return text_saveat(txt, AT_FDCWD, filename);
1091 bool text_saveat(Text *txt, int dirfd, const char *filename) {
1092 return text_saveat_method(txt, dirfd, filename, TEXT_SAVE_AUTO);
1095 bool text_save_method(Text *txt, const char *filename, enum TextSaveMethod method) {
1096 return text_saveat_method(txt, AT_FDCWD, filename, method);
1099 bool text_saveat_method(Text *txt, int dirfd, const char *filename, enum TextSaveMethod method) {
1100 if (!filename) {
1101 txt->saved_revision = txt->history;
1102 text_snapshot(txt);
1103 return true;
1105 TextSave *ctx = text_save_begin(txt, dirfd, filename, method);
1106 if (!ctx)
1107 return false;
1108 Filerange range = (Filerange){ .start = 0, .end = text_size(txt) };
1109 ssize_t written = text_save_write_range(ctx, &range);
1110 if (written == -1 || (size_t)written != text_range_size(&range)) {
1111 text_save_cancel(ctx);
1112 return false;
1114 return text_save_commit(ctx);
1117 ssize_t text_save_write_range(TextSave *ctx, Filerange *range) {
1118 return text_write_range(ctx->txt, range, ctx->fd);
1121 ssize_t text_write(Text *txt, int fd) {
1122 Filerange r = (Filerange){ .start = 0, .end = text_size(txt) };
1123 return text_write_range(txt, &r, fd);
1126 ssize_t text_write_range(Text *txt, Filerange *range, int fd) {
1127 size_t size = text_range_size(range), rem = size;
1128 for (Iterator it = text_iterator_get(txt, range->start);
1129 rem > 0 && text_iterator_valid(&it);
1130 text_iterator_next(&it)) {
1131 size_t prem = it.end - it.text;
1132 if (prem > rem)
1133 prem = rem;
1134 ssize_t written = write_all(fd, it.text, prem);
1135 if (written == -1)
1136 return -1;
1137 rem -= written;
1138 if ((size_t)written != prem)
1139 break;
1141 return size - rem;
1144 Text *text_load(const char *filename) {
1145 return text_load_method(filename, TEXT_LOAD_AUTO);
1148 Text *text_loadat(int dirfd, const char *filename) {
1149 return text_loadat_method(dirfd, filename, TEXT_LOAD_AUTO);
1152 Text *text_load_method(const char *filename, enum TextLoadMethod method) {
1153 return text_loadat_method(AT_FDCWD, filename, method);
1156 Text *text_loadat_method(int dirfd, const char *filename, enum TextLoadMethod method) {
1157 int fd = -1;
1158 size_t size = 0;
1159 Text *txt = calloc(1, sizeof *txt);
1160 if (!txt)
1161 return NULL;
1162 Piece *p = piece_alloc(txt);
1163 if (!p)
1164 goto out;
1165 lineno_cache_invalidate(&txt->lines);
1166 if (filename) {
1167 if ((fd = openat(dirfd, filename, O_RDONLY)) == -1)
1168 goto out;
1169 if (fstat(fd, &txt->info) == -1)
1170 goto out;
1171 if (!S_ISREG(txt->info.st_mode)) {
1172 errno = S_ISDIR(txt->info.st_mode) ? EISDIR : ENOTSUP;
1173 goto out;
1175 // XXX: use lseek(fd, 0, SEEK_END); instead?
1176 size = txt->info.st_size;
1177 if (size > 0) {
1178 if (method == TEXT_LOAD_READ || (method == TEXT_LOAD_AUTO && size < BLOCK_MMAP_SIZE))
1179 txt->block = block_read(txt, size, fd);
1180 else
1181 txt->block = block_mmap(txt, size, fd, 0);
1182 if (!txt->block)
1183 goto out;
1184 piece_init(p, &txt->begin, &txt->end, txt->block->data, txt->block->len);
1188 if (size == 0)
1189 piece_init(p, &txt->begin, &txt->end, "\0", 0);
1191 piece_init(&txt->begin, NULL, p, NULL, 0);
1192 piece_init(&txt->end, p, NULL, NULL, 0);
1193 txt->size = p->len;
1194 /* write an empty revision */
1195 change_alloc(txt, EPOS);
1196 text_snapshot(txt);
1197 txt->saved_revision = txt->history;
1199 if (fd != -1)
1200 close(fd);
1201 return txt;
1202 out:
1203 if (fd != -1)
1204 close(fd);
1205 text_free(txt);
1206 return NULL;
1209 struct stat text_stat(Text *txt) {
1210 return txt->info;
1213 /* A delete operation can either start/stop midway through a piece or at
1214 * a boundry. In the former case a new piece is created to represent the
1215 * remaining text before/after the modification point.
1217 * /-+ --> +---------+ --> +-----+ --> +-----+ --> +-\
1218 * | | | existing| |demo | |text | | |
1219 * \-+ <-- +---------+ <-- +-----+ <-- +-----+ <-- +-/
1220 * ^ ^
1221 * |------ delete range -----|
1223 * /-+ --> +----+ --> +--+ --> +-\
1224 * | | | exi| |t | | |
1225 * \-+ <-- +----+ <-- +--+ <-- +-/
1227 bool text_delete(Text *txt, size_t pos, size_t len) {
1228 if (len == 0)
1229 return true;
1230 size_t pos_end;
1231 if (!addu(pos, len, &pos_end) || pos_end > txt->size)
1232 return false;
1233 if (pos < txt->lines.pos)
1234 lineno_cache_invalidate(&txt->lines);
1236 Location loc = piece_get_intern(txt, pos);
1237 Piece *p = loc.piece;
1238 if (!p)
1239 return false;
1240 size_t off = loc.off;
1241 if (cache_delete(txt, p, off, len))
1242 return true;
1243 Change *c = change_alloc(txt, pos);
1244 if (!c)
1245 return false;
1247 bool midway_start = false, midway_end = false; /* split pieces? */
1248 Piece *before, *after; /* unmodified pieces before/after deletion point */
1249 Piece *start, *end; /* span which is removed */
1250 size_t cur; /* how much has already been deleted */
1252 if (off == p->len) {
1253 /* deletion starts at a piece boundry */
1254 cur = 0;
1255 before = p;
1256 start = p->next;
1257 } else {
1258 /* deletion starts midway through a piece */
1259 midway_start = true;
1260 cur = p->len - off;
1261 start = p;
1262 before = piece_alloc(txt);
1263 if (!before)
1264 return false;
1267 /* skip all pieces which fall into deletion range */
1268 while (cur < len) {
1269 p = p->next;
1270 cur += p->len;
1273 if (cur == len) {
1274 /* deletion stops at a piece boundry */
1275 end = p;
1276 after = p->next;
1277 } else {
1278 /* cur > len: deletion stops midway through a piece */
1279 midway_end = true;
1280 end = p;
1281 after = piece_alloc(txt);
1282 if (!after)
1283 return false;
1284 piece_init(after, before, p->next, p->data + p->len - (cur - len), cur - len);
1287 if (midway_start) {
1288 /* we finally know which piece follows our newly allocated before piece */
1289 piece_init(before, start->prev, after, start->data, off);
1292 Piece *new_start = NULL, *new_end = NULL;
1293 if (midway_start) {
1294 new_start = before;
1295 if (!midway_end)
1296 new_end = before;
1298 if (midway_end) {
1299 if (!midway_start)
1300 new_start = after;
1301 new_end = after;
1304 span_init(&c->new, new_start, new_end);
1305 span_init(&c->old, start, end);
1306 span_swap(txt, &c->old, &c->new);
1307 return true;
1310 bool text_delete_range(Text *txt, Filerange *r) {
1311 if (!text_range_valid(r))
1312 return false;
1313 return text_delete(txt, r->start, text_range_size(r));
1316 /* preserve the current text content such that it can be restored by
1317 * means of undo/redo operations */
1318 void text_snapshot(Text *txt) {
1319 if (txt->current_revision)
1320 txt->last_revision = txt->current_revision;
1321 txt->current_revision = NULL;
1322 txt->cache = NULL;
1326 void text_free(Text *txt) {
1327 if (!txt)
1328 return;
1330 // free history
1331 Revision *hist = txt->history;
1332 while (hist && hist->prev)
1333 hist = hist->prev;
1334 while (hist) {
1335 Revision *later = hist->later;
1336 revision_free(hist);
1337 hist = later;
1340 for (Piece *next, *p = txt->pieces; p; p = next) {
1341 next = p->global_next;
1342 piece_free(p);
1345 for (Block *next, *blk = txt->blocks; blk; blk = next) {
1346 next = blk->next;
1347 block_free(blk);
1350 free(txt);
1353 bool text_modified(Text *txt) {
1354 return txt->saved_revision != txt->history;
1357 bool text_mmaped(Text *txt, const char *ptr) {
1358 uintptr_t addr = (uintptr_t)ptr;
1359 for (Block *blk = txt->blocks; blk; blk = blk->next) {
1360 if ((blk->type == MMAP_ORIG || blk->type == MMAP) &&
1361 (uintptr_t)(blk->data) <= addr && addr < (uintptr_t)(blk->data + blk->size))
1362 return true;
1364 return false;
1367 static bool text_iterator_init(Iterator *it, size_t pos, Piece *p, size_t off) {
1368 Iterator iter = (Iterator){
1369 .pos = pos,
1370 .piece = p,
1371 .start = p ? p->data : NULL,
1372 .end = p ? p->data + p->len : NULL,
1373 .text = p ? p->data + off : NULL,
1375 *it = iter;
1376 return text_iterator_valid(it);
1379 Iterator text_iterator_get(Text *txt, size_t pos) {
1380 Iterator it;
1381 Location loc = piece_get_extern(txt, pos);
1382 text_iterator_init(&it, pos, loc.piece, loc.off);
1383 return it;
1386 bool text_iterator_byte_get(Iterator *it, char *b) {
1387 if (text_iterator_valid(it)) {
1388 if (it->start <= it->text && it->text < it->end) {
1389 *b = *it->text;
1390 return true;
1391 } else if (it->pos == it->piece->text->size) { /* EOF */
1392 *b = '\0';
1393 return true;
1396 return false;
1399 bool text_iterator_next(Iterator *it) {
1400 size_t rem = it->end - it->text;
1401 return text_iterator_init(it, it->pos+rem, it->piece ? it->piece->next : NULL, 0);
1404 bool text_iterator_prev(Iterator *it) {
1405 size_t off = it->text - it->start;
1406 size_t len = it->piece && it->piece->prev ? it->piece->prev->len : 0;
1407 return text_iterator_init(it, it->pos-off, it->piece ? it->piece->prev : NULL, len);
1410 bool text_iterator_valid(const Iterator *it) {
1411 /* filter out sentinel nodes */
1412 return it->piece && it->piece->text;
1415 bool text_iterator_byte_next(Iterator *it, char *b) {
1416 if (!it->piece || !it->piece->next)
1417 return false;
1418 bool eof = true;
1419 if (it->text < it->end) {
1420 it->text++;
1421 it->pos++;
1422 eof = false;
1423 } else if (!it->piece->prev) {
1424 eof = false;
1427 while (it->text == it->end) {
1428 if (!text_iterator_next(it)) {
1429 if (eof)
1430 return false;
1431 if (b)
1432 *b = '\0';
1433 return text_iterator_prev(it);
1437 if (b)
1438 *b = *it->text;
1439 return true;
1442 bool text_iterator_byte_prev(Iterator *it, char *b) {
1443 if (!it->piece || !it->piece->prev)
1444 return false;
1445 bool eof = !it->piece->next;
1446 while (it->text == it->start) {
1447 if (!text_iterator_prev(it)) {
1448 if (!eof)
1449 return false;
1450 if (b)
1451 *b = '\0';
1452 return text_iterator_next(it);
1456 --it->text;
1457 --it->pos;
1459 if (b)
1460 *b = *it->text;
1461 return true;
1464 bool text_iterator_byte_find_prev(Iterator *it, char b) {
1465 while (it->text) {
1466 const char *match = memrchr(it->start, b, it->text - it->start);
1467 if (match) {
1468 it->pos -= it->text - match;
1469 it->text = match;
1470 return true;
1472 text_iterator_prev(it);
1474 text_iterator_next(it);
1475 return false;
1478 bool text_iterator_byte_find_next(Iterator *it, char b) {
1479 while (it->text) {
1480 const char *match = memchr(it->text, b, it->end - it->text);
1481 if (match) {
1482 it->pos += match - it->text;
1483 it->text = match;
1484 return true;
1486 text_iterator_next(it);
1488 text_iterator_prev(it);
1489 return false;
1492 bool text_iterator_codepoint_next(Iterator *it, char *c) {
1493 while (text_iterator_byte_next(it, NULL)) {
1494 if (ISUTF8(*it->text)) {
1495 if (c)
1496 *c = *it->text;
1497 return true;
1500 return false;
1503 bool text_iterator_codepoint_prev(Iterator *it, char *c) {
1504 while (text_iterator_byte_prev(it, NULL)) {
1505 if (ISUTF8(*it->text)) {
1506 if (c)
1507 *c = *it->text;
1508 return true;
1511 return false;
1514 bool text_iterator_char_next(Iterator *it, char *c) {
1515 if (!text_iterator_codepoint_next(it, c))
1516 return false;
1517 mbstate_t ps = { 0 };
1518 for (;;) {
1519 char buf[MB_LEN_MAX];
1520 size_t len = text_bytes_get(it->piece->text, it->pos, sizeof buf, buf);
1521 wchar_t wc;
1522 size_t wclen = mbrtowc(&wc, buf, len, &ps);
1523 if (wclen == (size_t)-1 && errno == EILSEQ) {
1524 return true;
1525 } else if (wclen == (size_t)-2) {
1526 return false;
1527 } else if (wclen == 0) {
1528 return true;
1529 } else {
1530 int width = wcwidth(wc);
1531 if (width != 0)
1532 return true;
1533 if (!text_iterator_codepoint_next(it, c))
1534 return false;
1537 return true;
1540 bool text_iterator_char_prev(Iterator *it, char *c) {
1541 if (!text_iterator_codepoint_prev(it, c))
1542 return false;
1543 for (;;) {
1544 char buf[MB_LEN_MAX];
1545 size_t len = text_bytes_get(it->piece->text, it->pos, sizeof buf, buf);
1546 wchar_t wc;
1547 mbstate_t ps = { 0 };
1548 size_t wclen = mbrtowc(&wc, buf, len, &ps);
1549 if (wclen == (size_t)-1 && errno == EILSEQ) {
1550 return true;
1551 } else if (wclen == (size_t)-2) {
1552 return false;
1553 } else if (wclen == 0) {
1554 return true;
1555 } else {
1556 int width = wcwidth(wc);
1557 if (width != 0)
1558 return true;
1559 if (!text_iterator_codepoint_prev(it, c))
1560 return false;
1563 return true;
1566 bool text_byte_get(Text *txt, size_t pos, char *byte) {
1567 return text_bytes_get(txt, pos, 1, byte);
1570 size_t text_bytes_get(Text *txt, size_t pos, size_t len, char *buf) {
1571 if (!buf)
1572 return 0;
1573 char *cur = buf;
1574 size_t rem = len;
1575 for (Iterator it = text_iterator_get(txt, pos);
1576 text_iterator_valid(&it);
1577 text_iterator_next(&it)) {
1578 if (rem == 0)
1579 break;
1580 size_t piece_len = it.end - it.text;
1581 if (piece_len > rem)
1582 piece_len = rem;
1583 if (piece_len) {
1584 memcpy(cur, it.text, piece_len);
1585 cur += piece_len;
1586 rem -= piece_len;
1589 return len - rem;
1592 char *text_bytes_alloc0(Text *txt, size_t pos, size_t len) {
1593 if (len == SIZE_MAX)
1594 return NULL;
1595 char *buf = malloc(len+1);
1596 if (!buf)
1597 return NULL;
1598 len = text_bytes_get(txt, pos, len, buf);
1599 buf[len] = '\0';
1600 return buf;
1603 size_t text_size(Text *txt) {
1604 return txt->size;
1607 /* count the number of new lines '\n' in range [pos, pos+len) */
1608 static size_t lines_count(Text *txt, size_t pos, size_t len) {
1609 size_t lines = 0;
1610 for (Iterator it = text_iterator_get(txt, pos);
1611 text_iterator_valid(&it);
1612 text_iterator_next(&it)) {
1613 const char *start = it.text;
1614 while (len > 0 && start < it.end) {
1615 size_t n = MIN(len, (size_t)(it.end - start));
1616 const char *end = memchr(start, '\n', n);
1617 if (!end) {
1618 len -= n;
1619 break;
1621 lines++;
1622 len -= end - start + 1;
1623 start = end + 1;
1626 if (len == 0)
1627 break;
1629 return lines;
1632 /* skip n lines forward and return position afterwards */
1633 static size_t lines_skip_forward(Text *txt, size_t pos, size_t lines, size_t *lines_skipped) {
1634 size_t lines_old = lines;
1635 for (Iterator it = text_iterator_get(txt, pos);
1636 text_iterator_valid(&it);
1637 text_iterator_next(&it)) {
1638 const char *start = it.text;
1639 while (lines > 0 && start < it.end) {
1640 size_t n = it.end - start;
1641 const char *end = memchr(start, '\n', n);
1642 if (!end) {
1643 pos += n;
1644 break;
1646 pos += end - start + 1;
1647 start = end + 1;
1648 lines--;
1651 if (lines == 0)
1652 break;
1654 if (lines_skipped)
1655 *lines_skipped = lines_old - lines;
1656 return pos;
1659 static void lineno_cache_invalidate(LineCache *cache) {
1660 cache->pos = 0;
1661 cache->lineno = 1;
1664 size_t text_pos_by_lineno(Text *txt, size_t lineno) {
1665 size_t lines_skipped;
1666 LineCache *cache = &txt->lines;
1667 if (lineno <= 1)
1668 return 0;
1669 if (lineno > cache->lineno) {
1670 cache->pos = lines_skip_forward(txt, cache->pos, lineno - cache->lineno, &lines_skipped);
1671 cache->lineno += lines_skipped;
1672 } else if (lineno < cache->lineno) {
1673 #if 0
1674 // TODO does it make sense to scan memory backwards here?
1675 size_t diff = cache->lineno - lineno;
1676 if (diff < lineno) {
1677 lines_skip_backward(txt, cache->pos, diff);
1678 } else
1679 #endif
1680 cache->pos = lines_skip_forward(txt, 0, lineno - 1, &lines_skipped);
1681 cache->lineno = lines_skipped + 1;
1683 return cache->lineno == lineno ? cache->pos : EPOS;
1686 size_t text_lineno_by_pos(Text *txt, size_t pos) {
1687 LineCache *cache = &txt->lines;
1688 if (pos > txt->size)
1689 pos = txt->size;
1690 if (pos < cache->pos) {
1691 size_t diff = cache->pos - pos;
1692 if (diff < pos)
1693 cache->lineno -= lines_count(txt, pos, diff);
1694 else
1695 cache->lineno = lines_count(txt, 0, pos) + 1;
1696 } else if (pos > cache->pos) {
1697 cache->lineno += lines_count(txt, cache->pos, pos - cache->pos);
1699 cache->pos = text_line_begin(txt, pos);
1700 return cache->lineno;
1703 Mark text_mark_set(Text *txt, size_t pos) {
1704 if (pos == txt->size)
1705 return (Mark)&txt->end;
1706 Location loc = piece_get_extern(txt, pos);
1707 if (!loc.piece)
1708 return EMARK;
1709 return (Mark)(loc.piece->data + loc.off);
1712 size_t text_mark_get(Text *txt, Mark mark) {
1713 size_t cur = 0;
1715 if (mark == EMARK)
1716 return EPOS;
1717 if (mark == (Mark)&txt->end)
1718 return txt->size;
1720 for (Piece *p = txt->begin.next; p->next; p = p->next) {
1721 Mark start = (Mark)(p->data);
1722 Mark end = start + p->len;
1723 if (start <= mark && mark < end)
1724 return cur + (mark - start);
1725 cur += p->len;
1728 return EPOS;