text: avoid direct access to txt->blocks in I/O related code
[vis.git] / text.c
blobfa1aa862bf9fc2c4ded0c307987b4fc8cd931c1c
1 #ifndef _GNU_SOURCE
2 #define _GNU_SOURCE /* memrchr(3) is non-standard */
3 #endif
4 #include <unistd.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <time.h>
9 #include <fcntl.h>
10 #include <errno.h>
11 #include <wchar.h>
12 #include <stdint.h>
13 #include <libgen.h>
14 #include <limits.h>
15 #include <sys/types.h>
16 #include <sys/stat.h>
17 #include <sys/mman.h>
18 #if CONFIG_ACL
19 #include <sys/acl.h>
20 #endif
21 #if CONFIG_SELINUX
22 #include <selinux/selinux.h>
23 #endif
25 #include "text.h"
26 #include "text-util.h"
27 #include "text-motions.h"
28 #include "util.h"
29 #include "array.h"
31 /* Allocate blocks holding the actual file content in chunks of size: */
32 #ifndef BLOCK_SIZE
33 #define BLOCK_SIZE (1 << 20)
34 #endif
35 /* Files smaller than this value are copied on load, larger ones are mmap(2)-ed
36 * directely. Hence the former can be truncated, while doing so on the latter
37 * results in havoc. */
38 #define BLOCK_MMAP_SIZE (1 << 26)
40 /* Block holding the file content, either readonly mmap(2)-ed from the original
41 * file or heap allocated to store the modifications.
43 typedef struct Block Block;
44 struct Block {
45 size_t size; /* maximal capacity */
46 size_t len; /* current used length / insertion position */
47 char *data; /* actual data */
48 enum { /* type of allocation */
49 MMAP_ORIG, /* mmap(2)-ed from an external file */
50 MMAP, /* mmap(2)-ed from a temporary file only known to this process */
51 MALLOC, /* heap allocated block using malloc(3) */
52 } type;
55 /* A piece holds a reference (but doesn't itself store) a certain amount of data.
56 * All active pieces chained together form the whole content of the document.
57 * At the beginning there exists only one piece, spanning the whole document.
58 * Upon insertion/deletion new pieces will be created to represent the changes.
59 * Generally pieces are never destroyed, but kept around to peform undo/redo
60 * operations.
62 struct Piece {
63 Text *text; /* text to which this piece belongs */
64 Piece *prev, *next; /* pointers to the logical predecessor/successor */
65 Piece *global_prev; /* double linked list in order of allocation, */
66 Piece *global_next; /* used to free individual pieces */
67 const char *data; /* pointer into a Block holding the data */
68 size_t len; /* the length in number of bytes of the data */
71 /* used to transform a global position (byte offset starting from the beginning
72 * of the text) into an offset relative to a piece.
74 typedef struct {
75 Piece *piece; /* piece holding the location */
76 size_t off; /* offset into the piece in bytes */
77 } Location;
79 /* A Span holds a certain range of pieces. Changes to the document are always
80 * performed by swapping out an existing span with a new one.
82 typedef struct {
83 Piece *start, *end; /* start/end of the span */
84 size_t len; /* the sum of the lengths of the pieces which form this span */
85 } Span;
87 /* A Change keeps all needed information to redo/undo an insertion/deletion. */
88 typedef struct Change Change;
89 struct Change {
90 Span old; /* all pieces which are being modified/swapped out by the change */
91 Span new; /* all pieces which are introduced/swapped in by the change */
92 size_t pos; /* absolute position at which the change occured */
93 Change *next; /* next change which is part of the same revision */
94 Change *prev; /* previous change which is part of the same revision */
97 /* A Revision is a list of Changes which are used to undo/redo all modifications
98 * since the last snapshot operation. Revisions are stored in a directed graph structure.
100 typedef struct Revision Revision;
101 struct Revision {
102 Change *change; /* the most recent change */
103 Revision *next; /* the next (child) revision in the undo tree */
104 Revision *prev; /* the previous (parent) revision in the undo tree */
105 Revision *earlier; /* the previous Revision, chronologically */
106 Revision *later; /* the next Revision, chronologically */
107 time_t time; /* when the first change of this revision was performed */
108 size_t seq; /* a unique, strictly increasing identifier */
111 typedef struct {
112 size_t pos; /* position in bytes from start of file */
113 size_t lineno; /* line number in file i.e. number of '\n' in [0, pos) */
114 } LineCache;
116 /* The main struct holding all information of a given file */
117 struct Text {
118 Array blocks; /* blocks which hold text content */
119 Piece *pieces; /* all pieces which have been allocated, used to free them */
120 Piece *cache; /* most recently modified piece */
121 Piece begin, end; /* sentinel nodes which always exists but don't hold any data */
122 Revision *history; /* undo tree */
123 Revision *current_revision; /* revision holding all file changes until a snapshot is performed */
124 Revision *last_revision; /* the last revision added to the tree, chronologically */
125 Revision *saved_revision; /* the last revision at the time of the save operation */
126 size_t size; /* current file content size in bytes */
127 struct stat info; /* stat as probed at load time */
128 LineCache lines; /* mapping between absolute pos in bytes and logical line breaks */
131 struct TextSave { /* used to hold context between text_save_{begin,commit} calls */
132 Text *txt; /* text to operate on */
133 char *filename; /* filename to save to as given to text_save_begin */
134 char *tmpname; /* temporary name used for atomic rename(2) */
135 int fd; /* file descriptor to write data to using text_save_write */
136 int dirfd; /* directory file descriptor, relative to which we save */
137 enum TextSaveMethod type; /* method used to save file */
140 /* block management */
141 static Block *block_alloc(size_t size);
142 static Block *block_read(size_t size, int fd);
143 static Block *block_mmap(size_t size, int fd, off_t offset);
144 static Block *block_load(int dirfd, const char *filename, enum TextLoadMethod method, struct stat *info);
145 static void block_free(Block*);
146 static bool block_capacity(Block*, size_t len);
147 static const char *block_append(Block*, const char *data, size_t len);
148 static bool block_insert(Block*, size_t pos, const char *data, size_t len);
149 static bool block_delete(Block*, size_t pos, size_t len);
150 static const char *block_store(Text*, const char *data, size_t len);
151 /* cache layer */
152 static void cache_piece(Text *txt, Piece *p);
153 static bool cache_contains(Text *txt, Piece *p);
154 static bool cache_insert(Text *txt, Piece *p, size_t off, const char *data, size_t len);
155 static bool cache_delete(Text *txt, Piece *p, size_t off, size_t len);
156 /* piece management */
157 static Piece *piece_alloc(Text *txt);
158 static void piece_free(Piece *p);
159 static void piece_init(Piece *p, Piece *prev, Piece *next, const char *data, size_t len);
160 static Location piece_get_intern(Text *txt, size_t pos);
161 static Location piece_get_extern(Text *txt, size_t pos);
162 /* span management */
163 static void span_init(Span *span, Piece *start, Piece *end);
164 static void span_swap(Text *txt, Span *old, Span *new);
165 /* change management */
166 static Change *change_alloc(Text *txt, size_t pos);
167 static void change_free(Change *c);
168 /* revision management */
169 static Revision *revision_alloc(Text *txt);
170 static void revision_free(Revision *rev);
171 /* logical line counting cache */
172 static void lineno_cache_invalidate(LineCache *cache);
173 static size_t lines_skip_forward(Text *txt, size_t pos, size_t lines, size_t *lines_skiped);
174 static size_t lines_count(Text *txt, size_t pos, size_t len);
175 static void text_saved(Text*, struct stat *meta);
176 static Block *text_block_mmaped(Text*);
178 static ssize_t write_all(int fd, const char *buf, size_t count) {
179 size_t rem = count;
180 while (rem > 0) {
181 ssize_t written = write(fd, buf, rem > INT_MAX ? INT_MAX : rem);
182 if (written < 0) {
183 if (errno == EAGAIN || errno == EINTR)
184 continue;
185 return -1;
186 } else if (written == 0) {
187 break;
189 rem -= written;
190 buf += written;
192 return count - rem;
195 /* allocate a new block of MAX(size, BLOCK_SIZE) bytes */
196 static Block *block_alloc(size_t size) {
197 Block *blk = calloc(1, sizeof *blk);
198 if (!blk)
199 return NULL;
200 if (BLOCK_SIZE > size)
201 size = BLOCK_SIZE;
202 if (!(blk->data = malloc(size))) {
203 free(blk);
204 return NULL;
206 blk->type = MALLOC;
207 blk->size = size;
208 return blk;
211 static Block *block_read(size_t size, int fd) {
212 Block *blk = block_alloc(size);
213 if (!blk)
214 return NULL;
215 char *data = blk->data;
216 size_t rem = size;
217 while (rem > 0) {
218 ssize_t len = read(fd, data, rem);
219 if (len == -1) {
220 block_free(blk);
221 return NULL;
222 } else if (len == 0) {
223 break;
224 } else {
225 data += len;
226 rem -= len;
229 blk->len = size - rem;
230 return blk;
233 static Block *block_mmap(size_t size, int fd, off_t offset) {
234 Block *blk = calloc(1, sizeof *blk);
235 if (!blk)
236 return NULL;
237 if (size) {
238 blk->data = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, offset);
239 if (blk->data == MAP_FAILED) {
240 free(blk);
241 return NULL;
244 blk->type = MMAP_ORIG;
245 blk->size = size;
246 blk->len = size;
247 return blk;
250 static Block *block_load(int dirfd, const char *filename, enum TextLoadMethod method, struct stat *info) {
251 Block *block = NULL;
252 int fd = openat(dirfd, filename, O_RDONLY);
253 if (fd == -1)
254 goto out;
255 if (fstat(fd, info) == -1)
256 goto out;
257 if (!S_ISREG(info->st_mode)) {
258 errno = S_ISDIR(info->st_mode) ? EISDIR : ENOTSUP;
259 goto out;
262 // XXX: use lseek(fd, 0, SEEK_END); instead?
263 size_t size = info->st_size;
264 if (size == 0)
265 goto out;
266 if (method == TEXT_LOAD_READ || (method == TEXT_LOAD_AUTO && size < BLOCK_MMAP_SIZE))
267 block = block_read(size, fd);
268 else
269 block = block_mmap(size, fd, 0);
270 out:
271 if (fd != -1)
272 close(fd);
273 return block;
276 static void block_free(Block *blk) {
277 if (!blk)
278 return;
279 if (blk->type == MALLOC)
280 free(blk->data);
281 else if ((blk->type == MMAP_ORIG || blk->type == MMAP) && blk->data)
282 munmap(blk->data, blk->size);
283 free(blk);
286 /* check whether block has enough free space to store len bytes */
287 static bool block_capacity(Block *blk, size_t len) {
288 return blk->size - blk->len >= len;
291 /* append data to block, assumes there is enough space available */
292 static const char *block_append(Block *blk, const char *data, size_t len) {
293 char *dest = memcpy(blk->data + blk->len, data, len);
294 blk->len += len;
295 return dest;
298 /* stores the given data in a block, allocates a new one if necessary. returns
299 * a pointer to the storage location or NULL if allocation failed. */
300 static const char *block_store(Text *txt, const char *data, size_t len) {
301 Block *blk = array_get_ptr(&txt->blocks, array_length(&txt->blocks)-1);
302 if (!blk || !block_capacity(blk, len)) {
303 blk = block_alloc(len);
304 if (!blk)
305 return NULL;
306 if (!array_add_ptr(&txt->blocks, blk)) {
307 block_free(blk);
308 return NULL;
311 return block_append(blk, data, len);
314 /* insert data into block at an arbitrary position, this should only be used with
315 * data of the most recently created piece. */
316 static bool block_insert(Block *blk, size_t pos, const char *data, size_t len) {
317 if (pos > blk->len || !block_capacity(blk, len))
318 return false;
319 if (blk->len == pos)
320 return block_append(blk, data, len);
321 char *insert = blk->data + pos;
322 memmove(insert + len, insert, blk->len - pos);
323 memcpy(insert, data, len);
324 blk->len += len;
325 return true;
328 /* delete data from a block at an arbitrary position, this should only be used with
329 * data of the most recently created piece. */
330 static bool block_delete(Block *blk, size_t pos, size_t len) {
331 size_t end;
332 if (!addu(pos, len, &end) || end > blk->len)
333 return false;
334 if (blk->len == pos) {
335 blk->len -= len;
336 return true;
338 char *delete = blk->data + pos;
339 memmove(delete, delete + len, blk->len - pos - len);
340 blk->len -= len;
341 return true;
344 /* cache the given piece if it is the most recently changed one */
345 static void cache_piece(Text *txt, Piece *p) {
346 Block *blk = array_get_ptr(&txt->blocks, array_length(&txt->blocks)-1);
347 if (!blk || p->data < blk->data || p->data + p->len != blk->data + blk->len)
348 return;
349 txt->cache = p;
352 /* check whether the given piece was the most recently modified one */
353 static bool cache_contains(Text *txt, Piece *p) {
354 Block *blk = array_get_ptr(&txt->blocks, array_length(&txt->blocks)-1);
355 Revision *rev = txt->current_revision;
356 if (!blk || !txt->cache || txt->cache != p || !rev || !rev->change)
357 return false;
359 Piece *start = rev->change->new.start;
360 Piece *end = rev->change->new.end;
361 bool found = false;
362 for (Piece *cur = start; !found; cur = cur->next) {
363 if (cur == p)
364 found = true;
365 if (cur == end)
366 break;
369 return found && p->data + p->len == blk->data + blk->len;
372 /* try to insert a chunk of data at a given piece offset. the insertion is only
373 * performed if the piece is the most recenetly changed one. the legnth of the
374 * piece, the span containing it and the whole text is adjusted accordingly */
375 static bool cache_insert(Text *txt, Piece *p, size_t off, const char *data, size_t len) {
376 if (!cache_contains(txt, p))
377 return false;
378 Block *blk = array_get_ptr(&txt->blocks, array_length(&txt->blocks)-1);
379 size_t bufpos = p->data + off - blk->data;
380 if (!block_insert(blk, bufpos, data, len))
381 return false;
382 p->len += len;
383 txt->current_revision->change->new.len += len;
384 txt->size += len;
385 return true;
388 /* try to delete a chunk of data at a given piece offset. the deletion is only
389 * performed if the piece is the most recenetly changed one and the whole
390 * affected range lies within it. the legnth of the piece, the span containing it
391 * and the whole text is adjusted accordingly */
392 static bool cache_delete(Text *txt, Piece *p, size_t off, size_t len) {
393 if (!cache_contains(txt, p))
394 return false;
395 Block *blk = array_get_ptr(&txt->blocks, array_length(&txt->blocks)-1);
396 size_t end;
397 size_t bufpos = p->data + off - blk->data;
398 if (!addu(off, len, &end) || end > p->len || !block_delete(blk, bufpos, len))
399 return false;
400 p->len -= len;
401 txt->current_revision->change->new.len -= len;
402 txt->size -= len;
403 return true;
406 /* initialize a span and calculate its length */
407 static void span_init(Span *span, Piece *start, Piece *end) {
408 size_t len = 0;
409 span->start = start;
410 span->end = end;
411 for (Piece *p = start; p; p = p->next) {
412 len += p->len;
413 if (p == end)
414 break;
416 span->len = len;
419 /* swap out an old span and replace it with a new one.
421 * - if old is an empty span do not remove anything, just insert the new one
422 * - if new is an empty span do not insert anything, just remove the old one
424 * adjusts the document size accordingly.
426 static void span_swap(Text *txt, Span *old, Span *new) {
427 if (old->len == 0 && new->len == 0) {
428 return;
429 } else if (old->len == 0) {
430 /* insert new span */
431 new->start->prev->next = new->start;
432 new->end->next->prev = new->end;
433 } else if (new->len == 0) {
434 /* delete old span */
435 old->start->prev->next = old->end->next;
436 old->end->next->prev = old->start->prev;
437 } else {
438 /* replace old with new */
439 old->start->prev->next = new->start;
440 old->end->next->prev = new->end;
442 txt->size -= old->len;
443 txt->size += new->len;
446 /* Allocate a new revision and place it in the revision graph.
447 * All further changes will be associated with this revision. */
448 static Revision *revision_alloc(Text *txt) {
449 Revision *rev = calloc(1, sizeof *rev);
450 if (!rev)
451 return NULL;
452 rev->time = time(NULL);
453 txt->current_revision = rev;
455 /* set sequence number */
456 if (!txt->last_revision)
457 rev->seq = 0;
458 else
459 rev->seq = txt->last_revision->seq + 1;
461 /* set earlier, later pointers */
462 if (txt->last_revision)
463 txt->last_revision->later = rev;
464 rev->earlier = txt->last_revision;
466 if (!txt->history) {
467 txt->history = rev;
468 return rev;
471 /* set prev, next pointers */
472 rev->prev = txt->history;
473 txt->history->next = rev;
474 txt->history = rev;
475 return rev;
478 static void revision_free(Revision *rev) {
479 if (!rev)
480 return;
481 for (Change *next, *c = rev->change; c; c = next) {
482 next = c->next;
483 change_free(c);
485 free(rev);
488 static Piece *piece_alloc(Text *txt) {
489 Piece *p = calloc(1, sizeof *p);
490 if (!p)
491 return NULL;
492 p->text = txt;
493 p->global_next = txt->pieces;
494 if (txt->pieces)
495 txt->pieces->global_prev = p;
496 txt->pieces = p;
497 return p;
500 static void piece_free(Piece *p) {
501 if (!p)
502 return;
503 if (p->global_prev)
504 p->global_prev->global_next = p->global_next;
505 if (p->global_next)
506 p->global_next->global_prev = p->global_prev;
507 if (p->text->pieces == p)
508 p->text->pieces = p->global_next;
509 if (p->text->cache == p)
510 p->text->cache = NULL;
511 free(p);
514 static void piece_init(Piece *p, Piece *prev, Piece *next, const char *data, size_t len) {
515 p->prev = prev;
516 p->next = next;
517 p->data = data;
518 p->len = len;
521 /* returns the piece holding the text at byte offset pos. if pos happens to
522 * be at a piece boundry i.e. the first byte of a piece then the previous piece
523 * to the left is returned with an offset of piece->len. this is convenient for
524 * modifications to the piece chain where both pieces (the returned one and the
525 * one following it) are needed, but unsuitable as a public interface.
527 * in particular if pos is zero, the begin sentinel piece is returned.
529 static Location piece_get_intern(Text *txt, size_t pos) {
530 size_t cur = 0;
531 for (Piece *p = &txt->begin; p->next; p = p->next) {
532 if (cur <= pos && pos <= cur + p->len)
533 return (Location){ .piece = p, .off = pos - cur };
534 cur += p->len;
537 return (Location){ 0 };
540 /* similiar to piece_get_intern but usable as a public API. returns the piece
541 * holding the text at byte offset pos. never returns a sentinel piece.
542 * it pos is the end of file (== text_size()) and the file is not empty then
543 * the last piece holding data is returned.
545 static Location piece_get_extern(Text *txt, size_t pos) {
546 size_t cur = 0;
547 Piece *p;
549 for (p = txt->begin.next; p->next; p = p->next) {
550 if (cur <= pos && pos < cur + p->len)
551 return (Location){ .piece = p, .off = pos - cur };
552 cur += p->len;
555 if (cur == pos)
556 return (Location){ .piece = p->prev, .off = p->prev->len };
558 return (Location){ 0 };
561 /* allocate a new change, associate it with current revision or a newly
562 * allocated one if none exists. */
563 static Change *change_alloc(Text *txt, size_t pos) {
564 Revision *rev = txt->current_revision;
565 if (!rev) {
566 rev = revision_alloc(txt);
567 if (!rev)
568 return NULL;
570 Change *c = calloc(1, sizeof *c);
571 if (!c)
572 return NULL;
573 c->pos = pos;
574 c->next = rev->change;
575 if (rev->change)
576 rev->change->prev = c;
577 rev->change = c;
578 return c;
581 static void change_free(Change *c) {
582 if (!c)
583 return;
584 /* only free the new part of the span, the old one is still in use */
585 if (c->new.start != c->new.end)
586 piece_free(c->new.end);
587 piece_free(c->new.start);
588 free(c);
591 /* When inserting new data there are 2 cases to consider.
593 * - in the first the insertion point falls into the middle of an exisiting
594 * piece which is replaced by three new pieces:
596 * /-+ --> +---------------+ --> +-\
597 * | | | existing text | | |
598 * \-+ <-- +---------------+ <-- +-/
600 * Insertion point for "demo "
602 * /-+ --> +---------+ --> +-----+ --> +-----+ --> +-\
603 * | | | existing| |demo | |text | | |
604 * \-+ <-- +---------+ <-- +-----+ <-- +-----+ <-- +-/
606 * - the second case deals with an insertion point at a piece boundry:
608 * /-+ --> +---------------+ --> +-\
609 * | | | existing text | | |
610 * \-+ <-- +---------------+ <-- +-/
612 * Insertion point for "short"
614 * /-+ --> +-----+ --> +---------------+ --> +-\
615 * | | |short| | existing text | | |
616 * \-+ <-- +-----+ <-- +---------------+ <-- +-/
618 bool text_insert(Text *txt, size_t pos, const char *data, size_t len) {
619 if (len == 0)
620 return true;
621 if (pos > txt->size)
622 return false;
623 if (pos < txt->lines.pos)
624 lineno_cache_invalidate(&txt->lines);
626 Location loc = piece_get_intern(txt, pos);
627 Piece *p = loc.piece;
628 if (!p)
629 return false;
630 size_t off = loc.off;
631 if (cache_insert(txt, p, off, data, len))
632 return true;
634 Change *c = change_alloc(txt, pos);
635 if (!c)
636 return false;
638 if (!(data = block_store(txt, data, len)))
639 return false;
641 Piece *new = NULL;
643 if (off == p->len) {
644 /* insert between two existing pieces, hence there is nothing to
645 * remove, just add a new piece holding the extra text */
646 if (!(new = piece_alloc(txt)))
647 return false;
648 piece_init(new, p, p->next, data, len);
649 span_init(&c->new, new, new);
650 span_init(&c->old, NULL, NULL);
651 } else {
652 /* insert into middle of an existing piece, therfore split the old
653 * piece. that is we have 3 new pieces one containing the content
654 * before the insertion point then one holding the newly inserted
655 * text and one holding the content after the insertion point.
657 Piece *before = piece_alloc(txt);
658 new = piece_alloc(txt);
659 Piece *after = piece_alloc(txt);
660 if (!before || !new || !after)
661 return false;
662 piece_init(before, p->prev, new, p->data, off);
663 piece_init(new, before, after, data, len);
664 piece_init(after, new, p->next, p->data + off, p->len - off);
666 span_init(&c->new, before, after);
667 span_init(&c->old, p, p);
670 cache_piece(txt, new);
671 span_swap(txt, &c->old, &c->new);
672 return true;
675 static bool text_vprintf(Text *txt, size_t pos, const char *format, va_list ap) {
676 va_list ap_save;
677 va_copy(ap_save, ap);
678 int len = vsnprintf(NULL, 0, format, ap);
679 if (len == -1) {
680 va_end(ap_save);
681 return false;
683 char *buf = malloc(len+1);
684 bool ret = buf && (vsnprintf(buf, len+1, format, ap_save) == len) && text_insert(txt, pos, buf, len);
685 free(buf);
686 va_end(ap_save);
687 return ret;
690 bool text_appendf(Text *txt, const char *format, ...) {
691 va_list ap;
692 va_start(ap, format);
693 bool ret = text_vprintf(txt, text_size(txt), format, ap);
694 va_end(ap);
695 return ret;
698 bool text_printf(Text *txt, size_t pos, const char *format, ...) {
699 va_list ap;
700 va_start(ap, format);
701 bool ret = text_vprintf(txt, pos, format, ap);
702 va_end(ap);
703 return ret;
706 static size_t revision_undo(Text *txt, Revision *rev) {
707 size_t pos = EPOS;
708 for (Change *c = rev->change; c; c = c->next) {
709 span_swap(txt, &c->new, &c->old);
710 pos = c->pos;
712 return pos;
715 static size_t revision_redo(Text *txt, Revision *rev) {
716 size_t pos = EPOS;
717 Change *c = rev->change;
718 while (c->next)
719 c = c->next;
720 for ( ; c; c = c->prev) {
721 span_swap(txt, &c->old, &c->new);
722 pos = c->pos;
723 if (c->new.len > c->old.len)
724 pos += c->new.len - c->old.len;
726 return pos;
729 size_t text_undo(Text *txt) {
730 size_t pos = EPOS;
731 /* taking rev snapshot makes sure that txt->current_revision is reset */
732 text_snapshot(txt);
733 Revision *rev = txt->history->prev;
734 if (!rev)
735 return pos;
736 pos = revision_undo(txt, txt->history);
737 txt->history = rev;
738 lineno_cache_invalidate(&txt->lines);
739 return pos;
742 size_t text_redo(Text *txt) {
743 size_t pos = EPOS;
744 /* taking a snapshot makes sure that txt->current_revision is reset */
745 text_snapshot(txt);
746 Revision *rev = txt->history->next;
747 if (!rev)
748 return pos;
749 pos = revision_redo(txt, rev);
750 txt->history = rev;
751 lineno_cache_invalidate(&txt->lines);
752 return pos;
755 static bool history_change_branch(Revision *rev) {
756 bool changed = false;
757 while (rev->prev) {
758 if (rev->prev->next != rev) {
759 rev->prev->next = rev;
760 changed = true;
762 rev = rev->prev;
764 return changed;
767 static size_t history_traverse_to(Text *txt, Revision *rev) {
768 size_t pos = EPOS;
769 if (!rev)
770 return pos;
771 bool changed = history_change_branch(rev);
772 if (!changed) {
773 if (rev->seq == txt->history->seq) {
774 return txt->lines.pos;
775 } else if (rev->seq > txt->history->seq) {
776 while (txt->history != rev)
777 pos = text_redo(txt);
778 return pos;
779 } else if (rev->seq < txt->history->seq) {
780 while (txt->history != rev)
781 pos = text_undo(txt);
782 return pos;
784 } else {
785 while (txt->history->prev && txt->history->prev->next == txt->history)
786 text_undo(txt);
787 pos = text_undo(txt);
788 while (txt->history != rev)
789 pos = text_redo(txt);
790 return pos;
792 return pos;
795 size_t text_earlier(Text *txt) {
796 return history_traverse_to(txt, txt->history->earlier);
799 size_t text_later(Text *txt) {
800 return history_traverse_to(txt, txt->history->later);
803 size_t text_restore(Text *txt, time_t time) {
804 Revision *rev = txt->history;
805 while (time < rev->time && rev->earlier)
806 rev = rev->earlier;
807 while (time > rev->time && rev->later)
808 rev = rev->later;
809 time_t diff = labs(rev->time - time);
810 if (rev->earlier && rev->earlier != txt->history && labs(rev->earlier->time - time) < diff)
811 rev = rev->earlier;
812 if (rev->later && rev->later != txt->history && labs(rev->later->time - time) < diff)
813 rev = rev->later;
814 return history_traverse_to(txt, rev);
817 time_t text_state(Text *txt) {
818 return txt->history->time;
821 static bool preserve_acl(int src, int dest) {
822 #if CONFIG_ACL
823 acl_t acl = acl_get_fd(src);
824 if (!acl)
825 return errno == ENOTSUP ? true : false;
826 if (acl_set_fd(dest, acl) == -1) {
827 acl_free(acl);
828 return false;
830 acl_free(acl);
831 #endif /* CONFIG_ACL */
832 return true;
835 static bool preserve_selinux_context(int src, int dest) {
836 #if CONFIG_SELINUX
837 char *context = NULL;
838 if (!is_selinux_enabled())
839 return true;
840 if (fgetfilecon(src, &context) == -1)
841 return errno == ENOTSUP ? true : false;
842 if (fsetfilecon(dest, context) == -1) {
843 freecon(context);
844 return false;
846 freecon(context);
847 #endif /* CONFIG_SELINUX */
848 return true;
851 static int mkstempat(int dirfd, char *template) {
852 if (dirfd == AT_FDCWD)
853 return mkstemp(template);
854 // FIXME: not thread safe
855 int fd = -1;
856 int cwd = open(".", O_RDONLY|O_DIRECTORY);
857 if (cwd == -1)
858 goto err;
859 if (fchdir(dirfd) == -1)
860 goto err;
861 fd = mkstemp(template);
862 err:
863 if (cwd != -1) {
864 fchdir(cwd);
865 close(cwd);
867 return fd;
870 /* Create a new file named `.filename.vis.XXXXXX` (where `XXXXXX` is a
871 * randomly generated, unique suffix) and try to preserve all important
872 * meta data. After the file content has been written to this temporary
873 * file, text_save_commit_atomic will atomically move it to its final
874 * (possibly already existing) destination using rename(2).
876 * This approach does not work if:
878 * - the file is a symbolic link
879 * - the file is a hard link
880 * - file ownership can not be preserved
881 * - file group can not be preserved
882 * - directory permissions do not allow creation of a new file
883 * - POSXI ACL can not be preserved (if enabled)
884 * - SELinux security context can not be preserved (if enabled)
886 static bool text_save_begin_atomic(TextSave *ctx) {
887 int oldfd, saved_errno;
888 if ((oldfd = openat(ctx->dirfd, ctx->filename, O_RDONLY)) == -1 && errno != ENOENT)
889 goto err;
890 struct stat oldmeta = { 0 };
891 if (oldfd != -1 && fstatat(ctx->dirfd, ctx->filename, &oldmeta, AT_SYMLINK_NOFOLLOW) == -1)
892 goto err;
893 if (oldfd != -1) {
894 if (S_ISLNK(oldmeta.st_mode)) /* symbolic link */
895 goto err;
896 if (oldmeta.st_nlink > 1) /* hard link */
897 goto err;
900 char suffix[] = ".vis.XXXXXX";
901 size_t len = strlen(ctx->filename) + sizeof("./.") + sizeof(suffix);
902 char *dir = strdup(ctx->filename);
903 char *base = strdup(ctx->filename);
905 if (!(ctx->tmpname = malloc(len)) || !dir || !base) {
906 free(dir);
907 free(base);
908 goto err;
911 snprintf(ctx->tmpname, len, "%s/.%s%s", dirname(dir), basename(base), suffix);
912 free(dir);
913 free(base);
915 if ((ctx->fd = mkstempat(ctx->dirfd, ctx->tmpname)) == -1)
916 goto err;
918 if (oldfd == -1) {
919 mode_t mask = umask(0);
920 umask(mask);
921 if (fchmod(ctx->fd, 0666 & ~mask) == -1)
922 goto err;
923 } else {
924 if (fchmod(ctx->fd, oldmeta.st_mode) == -1)
925 goto err;
926 if (!preserve_acl(oldfd, ctx->fd) || !preserve_selinux_context(oldfd, ctx->fd))
927 goto err;
928 /* change owner if necessary */
929 if (oldmeta.st_uid != getuid() && fchown(ctx->fd, oldmeta.st_uid, (uid_t)-1) == -1)
930 goto err;
931 /* change group if necessary, in case of failure some editors reset
932 * the group permissions to the same as for others */
933 if (oldmeta.st_gid != getgid() && fchown(ctx->fd, (uid_t)-1, oldmeta.st_gid) == -1)
934 goto err;
935 close(oldfd);
938 ctx->type = TEXT_SAVE_ATOMIC;
939 return true;
940 err:
941 saved_errno = errno;
942 if (oldfd != -1)
943 close(oldfd);
944 if (ctx->fd != -1)
945 close(ctx->fd);
946 ctx->fd = -1;
947 free(ctx->tmpname);
948 ctx->tmpname = NULL;
949 errno = saved_errno;
950 return false;
953 static bool text_save_commit_atomic(TextSave *ctx) {
954 if (fsync(ctx->fd) == -1)
955 return false;
957 struct stat meta = { 0 };
958 if (fstat(ctx->fd, &meta) == -1)
959 return false;
961 bool close_failed = (close(ctx->fd) == -1);
962 ctx->fd = -1;
963 if (close_failed)
964 return false;
966 if (renameat(ctx->dirfd, ctx->tmpname, ctx->dirfd, ctx->filename) == -1)
967 return false;
969 free(ctx->tmpname);
970 ctx->tmpname = NULL;
972 int dir = openat(ctx->dirfd, dirname(ctx->filename), O_DIRECTORY|O_RDONLY);
973 if (dir == -1)
974 return false;
976 if (fsync(dir) == -1 && errno != EINVAL) {
977 close(dir);
978 return false;
981 if (close(dir) == -1)
982 return false;
984 text_saved(ctx->txt, &meta);
985 return true;
988 static bool text_save_begin_inplace(TextSave *ctx) {
989 Text *txt = ctx->txt;
990 struct stat now = { 0 };
991 int newfd = -1, saved_errno;
992 if ((ctx->fd = openat(ctx->dirfd, ctx->filename, O_CREAT|O_WRONLY, 0666)) == -1)
993 goto err;
994 if (fstat(ctx->fd, &now) == -1)
995 goto err;
996 struct stat loaded = text_stat(txt);
997 Block *block = text_block_mmaped(txt);
998 if (block && now.st_dev == loaded.st_dev && now.st_ino == loaded.st_ino) {
999 /* The file we are going to overwrite is currently mmap-ed from
1000 * text_load, therefore we copy the mmap-ed block to a temporary
1001 * file and remap it at the same position such that all pointers
1002 * from the various pieces are still valid.
1004 size_t size = block->size;
1005 char tmpname[32] = "/tmp/vis-XXXXXX";
1006 newfd = mkstemp(tmpname);
1007 if (newfd == -1)
1008 goto err;
1009 if (unlink(tmpname) == -1)
1010 goto err;
1011 ssize_t written = write_all(newfd, block->data, size);
1012 if (written == -1 || (size_t)written != size)
1013 goto err;
1014 void *data = mmap(block->data, size, PROT_READ, MAP_SHARED|MAP_FIXED, newfd, 0);
1015 if (data == MAP_FAILED)
1016 goto err;
1017 bool close_failed = (close(newfd) == -1);
1018 newfd = -1;
1019 if (close_failed)
1020 goto err;
1021 block->type = MMAP;
1023 /* overwrite the existing file content, if something goes wrong
1024 * here we are screwed, TODO: make a backup before? */
1025 if (ftruncate(ctx->fd, 0) == -1)
1026 goto err;
1027 ctx->type = TEXT_SAVE_INPLACE;
1028 return true;
1029 err:
1030 saved_errno = errno;
1031 if (newfd != -1)
1032 close(newfd);
1033 if (ctx->fd != -1)
1034 close(ctx->fd);
1035 ctx->fd = -1;
1036 errno = saved_errno;
1037 return false;
1040 static bool text_save_commit_inplace(TextSave *ctx) {
1041 if (fsync(ctx->fd) == -1)
1042 return false;
1043 struct stat meta = { 0 };
1044 if (fstat(ctx->fd, &meta) == -1)
1045 return false;
1046 if (close(ctx->fd) == -1)
1047 return false;
1048 text_saved(ctx->txt, &meta);
1049 return true;
1052 TextSave *text_save_begin(Text *txt, int dirfd, const char *filename, enum TextSaveMethod type) {
1053 if (!filename)
1054 return NULL;
1055 TextSave *ctx = calloc(1, sizeof *ctx);
1056 if (!ctx)
1057 return NULL;
1058 ctx->txt = txt;
1059 ctx->fd = -1;
1060 ctx->dirfd = dirfd;
1061 if (!(ctx->filename = strdup(filename)))
1062 goto err;
1063 errno = 0;
1064 if ((type == TEXT_SAVE_AUTO || type == TEXT_SAVE_ATOMIC) && text_save_begin_atomic(ctx))
1065 return ctx;
1066 if (errno == ENOSPC)
1067 goto err;
1068 if ((type == TEXT_SAVE_AUTO || type == TEXT_SAVE_INPLACE) && text_save_begin_inplace(ctx))
1069 return ctx;
1070 err:
1071 text_save_cancel(ctx);
1072 return NULL;
1075 bool text_save_commit(TextSave *ctx) {
1076 if (!ctx)
1077 return true;
1078 bool ret;
1079 switch (ctx->type) {
1080 case TEXT_SAVE_ATOMIC:
1081 ret = text_save_commit_atomic(ctx);
1082 break;
1083 case TEXT_SAVE_INPLACE:
1084 ret = text_save_commit_inplace(ctx);
1085 break;
1086 default:
1087 ret = false;
1088 break;
1091 text_save_cancel(ctx);
1092 return ret;
1095 void text_save_cancel(TextSave *ctx) {
1096 if (!ctx)
1097 return;
1098 int saved_errno = errno;
1099 if (ctx->fd != -1)
1100 close(ctx->fd);
1101 if (ctx->tmpname && ctx->tmpname[0])
1102 unlinkat(ctx->dirfd, ctx->tmpname, 0);
1103 free(ctx->tmpname);
1104 free(ctx->filename);
1105 free(ctx);
1106 errno = saved_errno;
1109 /* First try to save the file atomically using rename(2) if this does not
1110 * work overwrite the file in place. However if something goes wrong during
1111 * this overwrite the original file is permanently damaged.
1113 bool text_save(Text *txt, const char *filename) {
1114 return text_saveat(txt, AT_FDCWD, filename);
1117 bool text_saveat(Text *txt, int dirfd, const char *filename) {
1118 return text_saveat_method(txt, dirfd, filename, TEXT_SAVE_AUTO);
1121 bool text_save_method(Text *txt, const char *filename, enum TextSaveMethod method) {
1122 return text_saveat_method(txt, AT_FDCWD, filename, method);
1125 bool text_saveat_method(Text *txt, int dirfd, const char *filename, enum TextSaveMethod method) {
1126 if (!filename) {
1127 text_saved(txt, NULL);
1128 return true;
1130 TextSave *ctx = text_save_begin(txt, dirfd, filename, method);
1131 if (!ctx)
1132 return false;
1133 Filerange range = (Filerange){ .start = 0, .end = text_size(txt) };
1134 ssize_t written = text_save_write_range(ctx, &range);
1135 if (written == -1 || (size_t)written != text_range_size(&range)) {
1136 text_save_cancel(ctx);
1137 return false;
1139 return text_save_commit(ctx);
1142 ssize_t text_save_write_range(TextSave *ctx, Filerange *range) {
1143 return text_write_range(ctx->txt, range, ctx->fd);
1146 ssize_t text_write(Text *txt, int fd) {
1147 Filerange r = (Filerange){ .start = 0, .end = text_size(txt) };
1148 return text_write_range(txt, &r, fd);
1151 ssize_t text_write_range(Text *txt, Filerange *range, int fd) {
1152 size_t size = text_range_size(range), rem = size;
1153 for (Iterator it = text_iterator_get(txt, range->start);
1154 rem > 0 && text_iterator_valid(&it);
1155 text_iterator_next(&it)) {
1156 size_t prem = it.end - it.text;
1157 if (prem > rem)
1158 prem = rem;
1159 ssize_t written = write_all(fd, it.text, prem);
1160 if (written == -1)
1161 return -1;
1162 rem -= written;
1163 if ((size_t)written != prem)
1164 break;
1166 return size - rem;
1169 Text *text_load(const char *filename) {
1170 return text_load_method(filename, TEXT_LOAD_AUTO);
1173 Text *text_loadat(int dirfd, const char *filename) {
1174 return text_loadat_method(dirfd, filename, TEXT_LOAD_AUTO);
1177 Text *text_load_method(const char *filename, enum TextLoadMethod method) {
1178 return text_loadat_method(AT_FDCWD, filename, method);
1181 Text *text_loadat_method(int dirfd, const char *filename, enum TextLoadMethod method) {
1182 Text *txt = calloc(1, sizeof *txt);
1183 if (!txt)
1184 return NULL;
1185 Piece *p = piece_alloc(txt);
1186 if (!p)
1187 goto out;
1188 Block *block = NULL;
1189 array_init(&txt->blocks);
1190 lineno_cache_invalidate(&txt->lines);
1191 if (filename) {
1192 errno = 0;
1193 block = block_load(dirfd, filename, method, &txt->info);
1194 if (!block && errno)
1195 goto out;
1196 if (block && !array_add_ptr(&txt->blocks, block)) {
1197 block_free(block);
1198 goto out;
1202 if (!block)
1203 piece_init(p, &txt->begin, &txt->end, "\0", 0);
1204 else
1205 piece_init(p, &txt->begin, &txt->end, block->data, block->len);
1207 piece_init(&txt->begin, NULL, p, NULL, 0);
1208 piece_init(&txt->end, p, NULL, NULL, 0);
1209 txt->size = p->len;
1210 /* write an empty revision */
1211 change_alloc(txt, EPOS);
1212 text_snapshot(txt);
1213 txt->saved_revision = txt->history;
1215 return txt;
1216 out:
1217 text_free(txt);
1218 return NULL;
1221 struct stat text_stat(Text *txt) {
1222 return txt->info;
1225 static void text_saved(Text *txt, struct stat *meta) {
1226 if (meta)
1227 txt->info = *meta;
1228 txt->saved_revision = txt->history;
1229 text_snapshot(txt);
1232 static Block *text_block_mmaped(Text *txt) {
1233 Block *block = array_get_ptr(&txt->blocks, 0);
1234 if (block && block->type == MMAP_ORIG && block->size)
1235 return block;
1236 return NULL;
1239 /* A delete operation can either start/stop midway through a piece or at
1240 * a boundry. In the former case a new piece is created to represent the
1241 * remaining text before/after the modification point.
1243 * /-+ --> +---------+ --> +-----+ --> +-----+ --> +-\
1244 * | | | existing| |demo | |text | | |
1245 * \-+ <-- +---------+ <-- +-----+ <-- +-----+ <-- +-/
1246 * ^ ^
1247 * |------ delete range -----|
1249 * /-+ --> +----+ --> +--+ --> +-\
1250 * | | | exi| |t | | |
1251 * \-+ <-- +----+ <-- +--+ <-- +-/
1253 bool text_delete(Text *txt, size_t pos, size_t len) {
1254 if (len == 0)
1255 return true;
1256 size_t pos_end;
1257 if (!addu(pos, len, &pos_end) || pos_end > txt->size)
1258 return false;
1259 if (pos < txt->lines.pos)
1260 lineno_cache_invalidate(&txt->lines);
1262 Location loc = piece_get_intern(txt, pos);
1263 Piece *p = loc.piece;
1264 if (!p)
1265 return false;
1266 size_t off = loc.off;
1267 if (cache_delete(txt, p, off, len))
1268 return true;
1269 Change *c = change_alloc(txt, pos);
1270 if (!c)
1271 return false;
1273 bool midway_start = false, midway_end = false; /* split pieces? */
1274 Piece *before, *after; /* unmodified pieces before/after deletion point */
1275 Piece *start, *end; /* span which is removed */
1276 size_t cur; /* how much has already been deleted */
1278 if (off == p->len) {
1279 /* deletion starts at a piece boundry */
1280 cur = 0;
1281 before = p;
1282 start = p->next;
1283 } else {
1284 /* deletion starts midway through a piece */
1285 midway_start = true;
1286 cur = p->len - off;
1287 start = p;
1288 before = piece_alloc(txt);
1289 if (!before)
1290 return false;
1293 /* skip all pieces which fall into deletion range */
1294 while (cur < len) {
1295 p = p->next;
1296 cur += p->len;
1299 if (cur == len) {
1300 /* deletion stops at a piece boundry */
1301 end = p;
1302 after = p->next;
1303 } else {
1304 /* cur > len: deletion stops midway through a piece */
1305 midway_end = true;
1306 end = p;
1307 after = piece_alloc(txt);
1308 if (!after)
1309 return false;
1310 piece_init(after, before, p->next, p->data + p->len - (cur - len), cur - len);
1313 if (midway_start) {
1314 /* we finally know which piece follows our newly allocated before piece */
1315 piece_init(before, start->prev, after, start->data, off);
1318 Piece *new_start = NULL, *new_end = NULL;
1319 if (midway_start) {
1320 new_start = before;
1321 if (!midway_end)
1322 new_end = before;
1324 if (midway_end) {
1325 if (!midway_start)
1326 new_start = after;
1327 new_end = after;
1330 span_init(&c->new, new_start, new_end);
1331 span_init(&c->old, start, end);
1332 span_swap(txt, &c->old, &c->new);
1333 return true;
1336 bool text_delete_range(Text *txt, Filerange *r) {
1337 if (!text_range_valid(r))
1338 return false;
1339 return text_delete(txt, r->start, text_range_size(r));
1342 /* preserve the current text content such that it can be restored by
1343 * means of undo/redo operations */
1344 void text_snapshot(Text *txt) {
1345 if (txt->current_revision)
1346 txt->last_revision = txt->current_revision;
1347 txt->current_revision = NULL;
1348 txt->cache = NULL;
1352 void text_free(Text *txt) {
1353 if (!txt)
1354 return;
1356 // free history
1357 Revision *hist = txt->history;
1358 while (hist && hist->prev)
1359 hist = hist->prev;
1360 while (hist) {
1361 Revision *later = hist->later;
1362 revision_free(hist);
1363 hist = later;
1366 for (Piece *next, *p = txt->pieces; p; p = next) {
1367 next = p->global_next;
1368 piece_free(p);
1371 for (size_t i = 0, len = array_length(&txt->blocks); i < len; i++)
1372 block_free(array_get_ptr(&txt->blocks, i));
1373 array_release(&txt->blocks);
1375 free(txt);
1378 bool text_modified(Text *txt) {
1379 return txt->saved_revision != txt->history;
1382 bool text_mmaped(Text *txt, const char *ptr) {
1383 uintptr_t addr = (uintptr_t)ptr;
1384 for (size_t i = 0, len = array_length(&txt->blocks); i < len; i++) {
1385 Block *blk = array_get_ptr(&txt->blocks, i);
1386 if ((blk->type == MMAP_ORIG || blk->type == MMAP) &&
1387 (uintptr_t)(blk->data) <= addr && addr < (uintptr_t)(blk->data + blk->size))
1388 return true;
1390 return false;
1393 static bool text_iterator_init(Iterator *it, size_t pos, Piece *p, size_t off) {
1394 Iterator iter = (Iterator){
1395 .pos = pos,
1396 .piece = p,
1397 .start = p ? p->data : NULL,
1398 .end = p ? p->data + p->len : NULL,
1399 .text = p ? p->data + off : NULL,
1401 *it = iter;
1402 return text_iterator_valid(it);
1405 Iterator text_iterator_get(Text *txt, size_t pos) {
1406 Iterator it;
1407 Location loc = piece_get_extern(txt, pos);
1408 text_iterator_init(&it, pos, loc.piece, loc.off);
1409 return it;
1412 bool text_iterator_byte_get(Iterator *it, char *b) {
1413 if (text_iterator_valid(it)) {
1414 if (it->start <= it->text && it->text < it->end) {
1415 *b = *it->text;
1416 return true;
1417 } else if (it->pos == it->piece->text->size) { /* EOF */
1418 *b = '\0';
1419 return true;
1422 return false;
1425 bool text_iterator_next(Iterator *it) {
1426 size_t rem = it->end - it->text;
1427 return text_iterator_init(it, it->pos+rem, it->piece ? it->piece->next : NULL, 0);
1430 bool text_iterator_prev(Iterator *it) {
1431 size_t off = it->text - it->start;
1432 size_t len = it->piece && it->piece->prev ? it->piece->prev->len : 0;
1433 return text_iterator_init(it, it->pos-off, it->piece ? it->piece->prev : NULL, len);
1436 bool text_iterator_valid(const Iterator *it) {
1437 /* filter out sentinel nodes */
1438 return it->piece && it->piece->text;
1441 bool text_iterator_byte_next(Iterator *it, char *b) {
1442 if (!it->piece || !it->piece->next)
1443 return false;
1444 bool eof = true;
1445 if (it->text < it->end) {
1446 it->text++;
1447 it->pos++;
1448 eof = false;
1449 } else if (!it->piece->prev) {
1450 eof = false;
1453 while (it->text == it->end) {
1454 if (!text_iterator_next(it)) {
1455 if (eof)
1456 return false;
1457 if (b)
1458 *b = '\0';
1459 return text_iterator_prev(it);
1463 if (b)
1464 *b = *it->text;
1465 return true;
1468 bool text_iterator_byte_prev(Iterator *it, char *b) {
1469 if (!it->piece || !it->piece->prev)
1470 return false;
1471 bool eof = !it->piece->next;
1472 while (it->text == it->start) {
1473 if (!text_iterator_prev(it)) {
1474 if (!eof)
1475 return false;
1476 if (b)
1477 *b = '\0';
1478 return text_iterator_next(it);
1482 --it->text;
1483 --it->pos;
1485 if (b)
1486 *b = *it->text;
1487 return true;
1490 bool text_iterator_byte_find_prev(Iterator *it, char b) {
1491 while (it->text) {
1492 const char *match = memrchr(it->start, b, it->text - it->start);
1493 if (match) {
1494 it->pos -= it->text - match;
1495 it->text = match;
1496 return true;
1498 text_iterator_prev(it);
1500 text_iterator_next(it);
1501 return false;
1504 bool text_iterator_byte_find_next(Iterator *it, char b) {
1505 while (it->text) {
1506 const char *match = memchr(it->text, b, it->end - it->text);
1507 if (match) {
1508 it->pos += match - it->text;
1509 it->text = match;
1510 return true;
1512 text_iterator_next(it);
1514 text_iterator_prev(it);
1515 return false;
1518 bool text_iterator_codepoint_next(Iterator *it, char *c) {
1519 while (text_iterator_byte_next(it, NULL)) {
1520 if (ISUTF8(*it->text)) {
1521 if (c)
1522 *c = *it->text;
1523 return true;
1526 return false;
1529 bool text_iterator_codepoint_prev(Iterator *it, char *c) {
1530 while (text_iterator_byte_prev(it, NULL)) {
1531 if (ISUTF8(*it->text)) {
1532 if (c)
1533 *c = *it->text;
1534 return true;
1537 return false;
1540 bool text_iterator_char_next(Iterator *it, char *c) {
1541 if (!text_iterator_codepoint_next(it, c))
1542 return false;
1543 mbstate_t ps = { 0 };
1544 for (;;) {
1545 char buf[MB_LEN_MAX];
1546 size_t len = text_bytes_get(it->piece->text, it->pos, sizeof buf, buf);
1547 wchar_t wc;
1548 size_t wclen = mbrtowc(&wc, buf, len, &ps);
1549 if (wclen == (size_t)-1 && errno == EILSEQ) {
1550 return true;
1551 } else if (wclen == (size_t)-2) {
1552 return false;
1553 } else if (wclen == 0) {
1554 return true;
1555 } else {
1556 int width = wcwidth(wc);
1557 if (width != 0)
1558 return true;
1559 if (!text_iterator_codepoint_next(it, c))
1560 return false;
1563 return true;
1566 bool text_iterator_char_prev(Iterator *it, char *c) {
1567 if (!text_iterator_codepoint_prev(it, c))
1568 return false;
1569 for (;;) {
1570 char buf[MB_LEN_MAX];
1571 size_t len = text_bytes_get(it->piece->text, it->pos, sizeof buf, buf);
1572 wchar_t wc;
1573 mbstate_t ps = { 0 };
1574 size_t wclen = mbrtowc(&wc, buf, len, &ps);
1575 if (wclen == (size_t)-1 && errno == EILSEQ) {
1576 return true;
1577 } else if (wclen == (size_t)-2) {
1578 return false;
1579 } else if (wclen == 0) {
1580 return true;
1581 } else {
1582 int width = wcwidth(wc);
1583 if (width != 0)
1584 return true;
1585 if (!text_iterator_codepoint_prev(it, c))
1586 return false;
1589 return true;
1592 bool text_byte_get(Text *txt, size_t pos, char *byte) {
1593 return text_bytes_get(txt, pos, 1, byte);
1596 size_t text_bytes_get(Text *txt, size_t pos, size_t len, char *buf) {
1597 if (!buf)
1598 return 0;
1599 char *cur = buf;
1600 size_t rem = len;
1601 for (Iterator it = text_iterator_get(txt, pos);
1602 text_iterator_valid(&it);
1603 text_iterator_next(&it)) {
1604 if (rem == 0)
1605 break;
1606 size_t piece_len = it.end - it.text;
1607 if (piece_len > rem)
1608 piece_len = rem;
1609 if (piece_len) {
1610 memcpy(cur, it.text, piece_len);
1611 cur += piece_len;
1612 rem -= piece_len;
1615 return len - rem;
1618 char *text_bytes_alloc0(Text *txt, size_t pos, size_t len) {
1619 if (len == SIZE_MAX)
1620 return NULL;
1621 char *buf = malloc(len+1);
1622 if (!buf)
1623 return NULL;
1624 len = text_bytes_get(txt, pos, len, buf);
1625 buf[len] = '\0';
1626 return buf;
1629 size_t text_size(Text *txt) {
1630 return txt->size;
1633 /* count the number of new lines '\n' in range [pos, pos+len) */
1634 static size_t lines_count(Text *txt, size_t pos, size_t len) {
1635 size_t lines = 0;
1636 for (Iterator it = text_iterator_get(txt, pos);
1637 text_iterator_valid(&it);
1638 text_iterator_next(&it)) {
1639 const char *start = it.text;
1640 while (len > 0 && start < it.end) {
1641 size_t n = MIN(len, (size_t)(it.end - start));
1642 const char *end = memchr(start, '\n', n);
1643 if (!end) {
1644 len -= n;
1645 break;
1647 lines++;
1648 len -= end - start + 1;
1649 start = end + 1;
1652 if (len == 0)
1653 break;
1655 return lines;
1658 /* skip n lines forward and return position afterwards */
1659 static size_t lines_skip_forward(Text *txt, size_t pos, size_t lines, size_t *lines_skipped) {
1660 size_t lines_old = lines;
1661 for (Iterator it = text_iterator_get(txt, pos);
1662 text_iterator_valid(&it);
1663 text_iterator_next(&it)) {
1664 const char *start = it.text;
1665 while (lines > 0 && start < it.end) {
1666 size_t n = it.end - start;
1667 const char *end = memchr(start, '\n', n);
1668 if (!end) {
1669 pos += n;
1670 break;
1672 pos += end - start + 1;
1673 start = end + 1;
1674 lines--;
1677 if (lines == 0)
1678 break;
1680 if (lines_skipped)
1681 *lines_skipped = lines_old - lines;
1682 return pos;
1685 static void lineno_cache_invalidate(LineCache *cache) {
1686 cache->pos = 0;
1687 cache->lineno = 1;
1690 size_t text_pos_by_lineno(Text *txt, size_t lineno) {
1691 size_t lines_skipped;
1692 LineCache *cache = &txt->lines;
1693 if (lineno <= 1)
1694 return 0;
1695 if (lineno > cache->lineno) {
1696 cache->pos = lines_skip_forward(txt, cache->pos, lineno - cache->lineno, &lines_skipped);
1697 cache->lineno += lines_skipped;
1698 } else if (lineno < cache->lineno) {
1699 #if 0
1700 // TODO does it make sense to scan memory backwards here?
1701 size_t diff = cache->lineno - lineno;
1702 if (diff < lineno) {
1703 lines_skip_backward(txt, cache->pos, diff);
1704 } else
1705 #endif
1706 cache->pos = lines_skip_forward(txt, 0, lineno - 1, &lines_skipped);
1707 cache->lineno = lines_skipped + 1;
1709 return cache->lineno == lineno ? cache->pos : EPOS;
1712 size_t text_lineno_by_pos(Text *txt, size_t pos) {
1713 LineCache *cache = &txt->lines;
1714 if (pos > txt->size)
1715 pos = txt->size;
1716 if (pos < cache->pos) {
1717 size_t diff = cache->pos - pos;
1718 if (diff < pos)
1719 cache->lineno -= lines_count(txt, pos, diff);
1720 else
1721 cache->lineno = lines_count(txt, 0, pos) + 1;
1722 } else if (pos > cache->pos) {
1723 cache->lineno += lines_count(txt, cache->pos, pos - cache->pos);
1725 cache->pos = text_line_begin(txt, pos);
1726 return cache->lineno;
1729 Mark text_mark_set(Text *txt, size_t pos) {
1730 if (pos == txt->size)
1731 return (Mark)&txt->end;
1732 Location loc = piece_get_extern(txt, pos);
1733 if (!loc.piece)
1734 return EMARK;
1735 return (Mark)(loc.piece->data + loc.off);
1738 size_t text_mark_get(Text *txt, Mark mark) {
1739 size_t cur = 0;
1741 if (mark == EMARK)
1742 return EPOS;
1743 if (mark == (Mark)&txt->end)
1744 return txt->size;
1746 for (Piece *p = txt->begin.next; p->next; p = p->next) {
1747 Mark start = (Mark)(p->data);
1748 Mark end = start + p->len;
1749 if (start <= mark && mark < end)
1750 return cur + (mark - start);
1751 cur += p->len;
1754 return EPOS;