text: store blocks in array
[vis.git] / text.c
blobdc11b17fff1301fc35dbe1761a10e1f07c2aa604
1 #ifndef _GNU_SOURCE
2 #define _GNU_SOURCE /* memrchr(3) is non-standard */
3 #endif
4 #include <unistd.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <time.h>
9 #include <fcntl.h>
10 #include <errno.h>
11 #include <wchar.h>
12 #include <stdint.h>
13 #include <libgen.h>
14 #include <limits.h>
15 #include <sys/types.h>
16 #include <sys/stat.h>
17 #include <sys/mman.h>
18 #if CONFIG_ACL
19 #include <sys/acl.h>
20 #endif
21 #if CONFIG_SELINUX
22 #include <selinux/selinux.h>
23 #endif
25 #include "text.h"
26 #include "text-util.h"
27 #include "text-motions.h"
28 #include "util.h"
29 #include "array.h"
31 /* Allocate blocks holding the actual file content in chunks of size: */
32 #ifndef BLOCK_SIZE
33 #define BLOCK_SIZE (1 << 20)
34 #endif
35 /* Files smaller than this value are copied on load, larger ones are mmap(2)-ed
36 * directely. Hence the former can be truncated, while doing so on the latter
37 * results in havoc. */
38 #define BLOCK_MMAP_SIZE (1 << 26)
40 /* Block holding the file content, either readonly mmap(2)-ed from the original
41 * file or heap allocated to store the modifications.
43 typedef struct Block Block;
44 struct Block {
45 size_t size; /* maximal capacity */
46 size_t len; /* current used length / insertion position */
47 char *data; /* actual data */
48 enum { /* type of allocation */
49 MMAP_ORIG, /* mmap(2)-ed from an external file */
50 MMAP, /* mmap(2)-ed from a temporary file only known to this process */
51 MALLOC, /* heap allocated block using malloc(3) */
52 } type;
55 /* A piece holds a reference (but doesn't itself store) a certain amount of data.
56 * All active pieces chained together form the whole content of the document.
57 * At the beginning there exists only one piece, spanning the whole document.
58 * Upon insertion/deletion new pieces will be created to represent the changes.
59 * Generally pieces are never destroyed, but kept around to peform undo/redo
60 * operations.
62 struct Piece {
63 Text *text; /* text to which this piece belongs */
64 Piece *prev, *next; /* pointers to the logical predecessor/successor */
65 Piece *global_prev; /* double linked list in order of allocation, */
66 Piece *global_next; /* used to free individual pieces */
67 const char *data; /* pointer into a Block holding the data */
68 size_t len; /* the length in number of bytes of the data */
71 /* used to transform a global position (byte offset starting from the beginning
72 * of the text) into an offset relative to a piece.
74 typedef struct {
75 Piece *piece; /* piece holding the location */
76 size_t off; /* offset into the piece in bytes */
77 } Location;
79 /* A Span holds a certain range of pieces. Changes to the document are always
80 * performed by swapping out an existing span with a new one.
82 typedef struct {
83 Piece *start, *end; /* start/end of the span */
84 size_t len; /* the sum of the lengths of the pieces which form this span */
85 } Span;
87 /* A Change keeps all needed information to redo/undo an insertion/deletion. */
88 typedef struct Change Change;
89 struct Change {
90 Span old; /* all pieces which are being modified/swapped out by the change */
91 Span new; /* all pieces which are introduced/swapped in by the change */
92 size_t pos; /* absolute position at which the change occured */
93 Change *next; /* next change which is part of the same revision */
94 Change *prev; /* previous change which is part of the same revision */
97 /* A Revision is a list of Changes which are used to undo/redo all modifications
98 * since the last snapshot operation. Revisions are stored in a directed graph structure.
100 typedef struct Revision Revision;
101 struct Revision {
102 Change *change; /* the most recent change */
103 Revision *next; /* the next (child) revision in the undo tree */
104 Revision *prev; /* the previous (parent) revision in the undo tree */
105 Revision *earlier; /* the previous Revision, chronologically */
106 Revision *later; /* the next Revision, chronologically */
107 time_t time; /* when the first change of this revision was performed */
108 size_t seq; /* a unique, strictly increasing identifier */
111 typedef struct {
112 size_t pos; /* position in bytes from start of file */
113 size_t lineno; /* line number in file i.e. number of '\n' in [0, pos) */
114 } LineCache;
116 /* The main struct holding all information of a given file */
117 struct Text {
118 Array blocks; /* blocks which hold text content */
119 Piece *pieces; /* all pieces which have been allocated, used to free them */
120 Piece *cache; /* most recently modified piece */
121 Piece begin, end; /* sentinel nodes which always exists but don't hold any data */
122 Revision *history; /* undo tree */
123 Revision *current_revision; /* revision holding all file changes until a snapshot is performed */
124 Revision *last_revision; /* the last revision added to the tree, chronologically */
125 Revision *saved_revision; /* the last revision at the time of the save operation */
126 size_t size; /* current file content size in bytes */
127 struct stat info; /* stat as probed at load time */
128 LineCache lines; /* mapping between absolute pos in bytes and logical line breaks */
131 struct TextSave { /* used to hold context between text_save_{begin,commit} calls */
132 Text *txt; /* text to operate on */
133 char *filename; /* filename to save to as given to text_save_begin */
134 char *tmpname; /* temporary name used for atomic rename(2) */
135 int fd; /* file descriptor to write data to using text_save_write */
136 int dirfd; /* directory file descriptor, relative to which we save */
137 enum TextSaveMethod type; /* method used to save file */
140 /* block management */
141 static Block *block_alloc(size_t size);
142 static Block *block_read(size_t size, int fd);
143 static Block *block_mmap(size_t size, int fd, off_t offset);
144 static void block_free(Block*);
145 static bool block_capacity(Block*, size_t len);
146 static const char *block_append(Block*, const char *data, size_t len);
147 static bool block_insert(Block*, size_t pos, const char *data, size_t len);
148 static bool block_delete(Block*, size_t pos, size_t len);
149 static const char *block_store(Text*, const char *data, size_t len);
150 /* cache layer */
151 static void cache_piece(Text *txt, Piece *p);
152 static bool cache_contains(Text *txt, Piece *p);
153 static bool cache_insert(Text *txt, Piece *p, size_t off, const char *data, size_t len);
154 static bool cache_delete(Text *txt, Piece *p, size_t off, size_t len);
155 /* piece management */
156 static Piece *piece_alloc(Text *txt);
157 static void piece_free(Piece *p);
158 static void piece_init(Piece *p, Piece *prev, Piece *next, const char *data, size_t len);
159 static Location piece_get_intern(Text *txt, size_t pos);
160 static Location piece_get_extern(Text *txt, size_t pos);
161 /* span management */
162 static void span_init(Span *span, Piece *start, Piece *end);
163 static void span_swap(Text *txt, Span *old, Span *new);
164 /* change management */
165 static Change *change_alloc(Text *txt, size_t pos);
166 static void change_free(Change *c);
167 /* revision management */
168 static Revision *revision_alloc(Text *txt);
169 static void revision_free(Revision *rev);
170 /* logical line counting cache */
171 static void lineno_cache_invalidate(LineCache *cache);
172 static size_t lines_skip_forward(Text *txt, size_t pos, size_t lines, size_t *lines_skiped);
173 static size_t lines_count(Text *txt, size_t pos, size_t len);
175 static ssize_t write_all(int fd, const char *buf, size_t count) {
176 size_t rem = count;
177 while (rem > 0) {
178 ssize_t written = write(fd, buf, rem > INT_MAX ? INT_MAX : rem);
179 if (written < 0) {
180 if (errno == EAGAIN || errno == EINTR)
181 continue;
182 return -1;
183 } else if (written == 0) {
184 break;
186 rem -= written;
187 buf += written;
189 return count - rem;
192 /* allocate a new block of MAX(size, BLOCK_SIZE) bytes */
193 static Block *block_alloc(size_t size) {
194 Block *blk = calloc(1, sizeof *blk);
195 if (!blk)
196 return NULL;
197 if (BLOCK_SIZE > size)
198 size = BLOCK_SIZE;
199 if (!(blk->data = malloc(size))) {
200 free(blk);
201 return NULL;
203 blk->type = MALLOC;
204 blk->size = size;
205 return blk;
208 static Block *block_read(size_t size, int fd) {
209 Block *blk = block_alloc(size);
210 if (!blk)
211 return NULL;
212 char *data = blk->data;
213 size_t rem = size;
214 while (rem > 0) {
215 ssize_t len = read(fd, data, rem);
216 if (len == -1) {
217 block_free(blk);
218 return NULL;
219 } else if (len == 0) {
220 break;
221 } else {
222 data += len;
223 rem -= len;
226 blk->len = size - rem;
227 return blk;
230 static Block *block_mmap(size_t size, int fd, off_t offset) {
231 Block *blk = calloc(1, sizeof *blk);
232 if (!blk)
233 return NULL;
234 if (size) {
235 blk->data = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, offset);
236 if (blk->data == MAP_FAILED) {
237 free(blk);
238 return NULL;
241 blk->type = MMAP_ORIG;
242 blk->size = size;
243 blk->len = size;
244 return blk;
247 static void block_free(Block *blk) {
248 if (!blk)
249 return;
250 if (blk->type == MALLOC)
251 free(blk->data);
252 else if ((blk->type == MMAP_ORIG || blk->type == MMAP) && blk->data)
253 munmap(blk->data, blk->size);
254 free(blk);
257 /* check whether block has enough free space to store len bytes */
258 static bool block_capacity(Block *blk, size_t len) {
259 return blk->size - blk->len >= len;
262 /* append data to block, assumes there is enough space available */
263 static const char *block_append(Block *blk, const char *data, size_t len) {
264 char *dest = memcpy(blk->data + blk->len, data, len);
265 blk->len += len;
266 return dest;
269 /* stores the given data in a block, allocates a new one if necessary. returns
270 * a pointer to the storage location or NULL if allocation failed. */
271 static const char *block_store(Text *txt, const char *data, size_t len) {
272 Block *blk = array_get_ptr(&txt->blocks, array_length(&txt->blocks)-1);
273 if (!blk || !block_capacity(blk, len)) {
274 blk = block_alloc(len);
275 if (!blk)
276 return NULL;
277 if (!array_add_ptr(&txt->blocks, blk)) {
278 block_free(blk);
279 return NULL;
282 return block_append(blk, data, len);
285 /* insert data into block at an arbitrary position, this should only be used with
286 * data of the most recently created piece. */
287 static bool block_insert(Block *blk, size_t pos, const char *data, size_t len) {
288 if (pos > blk->len || !block_capacity(blk, len))
289 return false;
290 if (blk->len == pos)
291 return block_append(blk, data, len);
292 char *insert = blk->data + pos;
293 memmove(insert + len, insert, blk->len - pos);
294 memcpy(insert, data, len);
295 blk->len += len;
296 return true;
299 /* delete data from a block at an arbitrary position, this should only be used with
300 * data of the most recently created piece. */
301 static bool block_delete(Block *blk, size_t pos, size_t len) {
302 size_t end;
303 if (!addu(pos, len, &end) || end > blk->len)
304 return false;
305 if (blk->len == pos) {
306 blk->len -= len;
307 return true;
309 char *delete = blk->data + pos;
310 memmove(delete, delete + len, blk->len - pos - len);
311 blk->len -= len;
312 return true;
315 /* cache the given piece if it is the most recently changed one */
316 static void cache_piece(Text *txt, Piece *p) {
317 Block *blk = array_get_ptr(&txt->blocks, array_length(&txt->blocks)-1);
318 if (!blk || p->data < blk->data || p->data + p->len != blk->data + blk->len)
319 return;
320 txt->cache = p;
323 /* check whether the given piece was the most recently modified one */
324 static bool cache_contains(Text *txt, Piece *p) {
325 Block *blk = array_get_ptr(&txt->blocks, array_length(&txt->blocks)-1);
326 Revision *rev = txt->current_revision;
327 if (!blk || !txt->cache || txt->cache != p || !rev || !rev->change)
328 return false;
330 Piece *start = rev->change->new.start;
331 Piece *end = rev->change->new.end;
332 bool found = false;
333 for (Piece *cur = start; !found; cur = cur->next) {
334 if (cur == p)
335 found = true;
336 if (cur == end)
337 break;
340 return found && p->data + p->len == blk->data + blk->len;
343 /* try to insert a chunk of data at a given piece offset. the insertion is only
344 * performed if the piece is the most recenetly changed one. the legnth of the
345 * piece, the span containing it and the whole text is adjusted accordingly */
346 static bool cache_insert(Text *txt, Piece *p, size_t off, const char *data, size_t len) {
347 if (!cache_contains(txt, p))
348 return false;
349 Block *blk = array_get_ptr(&txt->blocks, array_length(&txt->blocks)-1);
350 size_t bufpos = p->data + off - blk->data;
351 if (!block_insert(blk, bufpos, data, len))
352 return false;
353 p->len += len;
354 txt->current_revision->change->new.len += len;
355 txt->size += len;
356 return true;
359 /* try to delete a chunk of data at a given piece offset. the deletion is only
360 * performed if the piece is the most recenetly changed one and the whole
361 * affected range lies within it. the legnth of the piece, the span containing it
362 * and the whole text is adjusted accordingly */
363 static bool cache_delete(Text *txt, Piece *p, size_t off, size_t len) {
364 if (!cache_contains(txt, p))
365 return false;
366 Block *blk = array_get_ptr(&txt->blocks, array_length(&txt->blocks)-1);
367 size_t end;
368 size_t bufpos = p->data + off - blk->data;
369 if (!addu(off, len, &end) || end > p->len || !block_delete(blk, bufpos, len))
370 return false;
371 p->len -= len;
372 txt->current_revision->change->new.len -= len;
373 txt->size -= len;
374 return true;
377 /* initialize a span and calculate its length */
378 static void span_init(Span *span, Piece *start, Piece *end) {
379 size_t len = 0;
380 span->start = start;
381 span->end = end;
382 for (Piece *p = start; p; p = p->next) {
383 len += p->len;
384 if (p == end)
385 break;
387 span->len = len;
390 /* swap out an old span and replace it with a new one.
392 * - if old is an empty span do not remove anything, just insert the new one
393 * - if new is an empty span do not insert anything, just remove the old one
395 * adjusts the document size accordingly.
397 static void span_swap(Text *txt, Span *old, Span *new) {
398 if (old->len == 0 && new->len == 0) {
399 return;
400 } else if (old->len == 0) {
401 /* insert new span */
402 new->start->prev->next = new->start;
403 new->end->next->prev = new->end;
404 } else if (new->len == 0) {
405 /* delete old span */
406 old->start->prev->next = old->end->next;
407 old->end->next->prev = old->start->prev;
408 } else {
409 /* replace old with new */
410 old->start->prev->next = new->start;
411 old->end->next->prev = new->end;
413 txt->size -= old->len;
414 txt->size += new->len;
417 /* Allocate a new revision and place it in the revision graph.
418 * All further changes will be associated with this revision. */
419 static Revision *revision_alloc(Text *txt) {
420 Revision *rev = calloc(1, sizeof *rev);
421 if (!rev)
422 return NULL;
423 rev->time = time(NULL);
424 txt->current_revision = rev;
426 /* set sequence number */
427 if (!txt->last_revision)
428 rev->seq = 0;
429 else
430 rev->seq = txt->last_revision->seq + 1;
432 /* set earlier, later pointers */
433 if (txt->last_revision)
434 txt->last_revision->later = rev;
435 rev->earlier = txt->last_revision;
437 if (!txt->history) {
438 txt->history = rev;
439 return rev;
442 /* set prev, next pointers */
443 rev->prev = txt->history;
444 txt->history->next = rev;
445 txt->history = rev;
446 return rev;
449 static void revision_free(Revision *rev) {
450 if (!rev)
451 return;
452 for (Change *next, *c = rev->change; c; c = next) {
453 next = c->next;
454 change_free(c);
456 free(rev);
459 static Piece *piece_alloc(Text *txt) {
460 Piece *p = calloc(1, sizeof *p);
461 if (!p)
462 return NULL;
463 p->text = txt;
464 p->global_next = txt->pieces;
465 if (txt->pieces)
466 txt->pieces->global_prev = p;
467 txt->pieces = p;
468 return p;
471 static void piece_free(Piece *p) {
472 if (!p)
473 return;
474 if (p->global_prev)
475 p->global_prev->global_next = p->global_next;
476 if (p->global_next)
477 p->global_next->global_prev = p->global_prev;
478 if (p->text->pieces == p)
479 p->text->pieces = p->global_next;
480 if (p->text->cache == p)
481 p->text->cache = NULL;
482 free(p);
485 static void piece_init(Piece *p, Piece *prev, Piece *next, const char *data, size_t len) {
486 p->prev = prev;
487 p->next = next;
488 p->data = data;
489 p->len = len;
492 /* returns the piece holding the text at byte offset pos. if pos happens to
493 * be at a piece boundry i.e. the first byte of a piece then the previous piece
494 * to the left is returned with an offset of piece->len. this is convenient for
495 * modifications to the piece chain where both pieces (the returned one and the
496 * one following it) are needed, but unsuitable as a public interface.
498 * in particular if pos is zero, the begin sentinel piece is returned.
500 static Location piece_get_intern(Text *txt, size_t pos) {
501 size_t cur = 0;
502 for (Piece *p = &txt->begin; p->next; p = p->next) {
503 if (cur <= pos && pos <= cur + p->len)
504 return (Location){ .piece = p, .off = pos - cur };
505 cur += p->len;
508 return (Location){ 0 };
511 /* similiar to piece_get_intern but usable as a public API. returns the piece
512 * holding the text at byte offset pos. never returns a sentinel piece.
513 * it pos is the end of file (== text_size()) and the file is not empty then
514 * the last piece holding data is returned.
516 static Location piece_get_extern(Text *txt, size_t pos) {
517 size_t cur = 0;
518 Piece *p;
520 for (p = txt->begin.next; p->next; p = p->next) {
521 if (cur <= pos && pos < cur + p->len)
522 return (Location){ .piece = p, .off = pos - cur };
523 cur += p->len;
526 if (cur == pos)
527 return (Location){ .piece = p->prev, .off = p->prev->len };
529 return (Location){ 0 };
532 /* allocate a new change, associate it with current revision or a newly
533 * allocated one if none exists. */
534 static Change *change_alloc(Text *txt, size_t pos) {
535 Revision *rev = txt->current_revision;
536 if (!rev) {
537 rev = revision_alloc(txt);
538 if (!rev)
539 return NULL;
541 Change *c = calloc(1, sizeof *c);
542 if (!c)
543 return NULL;
544 c->pos = pos;
545 c->next = rev->change;
546 if (rev->change)
547 rev->change->prev = c;
548 rev->change = c;
549 return c;
552 static void change_free(Change *c) {
553 if (!c)
554 return;
555 /* only free the new part of the span, the old one is still in use */
556 if (c->new.start != c->new.end)
557 piece_free(c->new.end);
558 piece_free(c->new.start);
559 free(c);
562 /* When inserting new data there are 2 cases to consider.
564 * - in the first the insertion point falls into the middle of an exisiting
565 * piece which is replaced by three new pieces:
567 * /-+ --> +---------------+ --> +-\
568 * | | | existing text | | |
569 * \-+ <-- +---------------+ <-- +-/
571 * Insertion point for "demo "
573 * /-+ --> +---------+ --> +-----+ --> +-----+ --> +-\
574 * | | | existing| |demo | |text | | |
575 * \-+ <-- +---------+ <-- +-----+ <-- +-----+ <-- +-/
577 * - the second case deals with an insertion point at a piece boundry:
579 * /-+ --> +---------------+ --> +-\
580 * | | | existing text | | |
581 * \-+ <-- +---------------+ <-- +-/
583 * Insertion point for "short"
585 * /-+ --> +-----+ --> +---------------+ --> +-\
586 * | | |short| | existing text | | |
587 * \-+ <-- +-----+ <-- +---------------+ <-- +-/
589 bool text_insert(Text *txt, size_t pos, const char *data, size_t len) {
590 if (len == 0)
591 return true;
592 if (pos > txt->size)
593 return false;
594 if (pos < txt->lines.pos)
595 lineno_cache_invalidate(&txt->lines);
597 Location loc = piece_get_intern(txt, pos);
598 Piece *p = loc.piece;
599 if (!p)
600 return false;
601 size_t off = loc.off;
602 if (cache_insert(txt, p, off, data, len))
603 return true;
605 Change *c = change_alloc(txt, pos);
606 if (!c)
607 return false;
609 if (!(data = block_store(txt, data, len)))
610 return false;
612 Piece *new = NULL;
614 if (off == p->len) {
615 /* insert between two existing pieces, hence there is nothing to
616 * remove, just add a new piece holding the extra text */
617 if (!(new = piece_alloc(txt)))
618 return false;
619 piece_init(new, p, p->next, data, len);
620 span_init(&c->new, new, new);
621 span_init(&c->old, NULL, NULL);
622 } else {
623 /* insert into middle of an existing piece, therfore split the old
624 * piece. that is we have 3 new pieces one containing the content
625 * before the insertion point then one holding the newly inserted
626 * text and one holding the content after the insertion point.
628 Piece *before = piece_alloc(txt);
629 new = piece_alloc(txt);
630 Piece *after = piece_alloc(txt);
631 if (!before || !new || !after)
632 return false;
633 piece_init(before, p->prev, new, p->data, off);
634 piece_init(new, before, after, data, len);
635 piece_init(after, new, p->next, p->data + off, p->len - off);
637 span_init(&c->new, before, after);
638 span_init(&c->old, p, p);
641 cache_piece(txt, new);
642 span_swap(txt, &c->old, &c->new);
643 return true;
646 static bool text_vprintf(Text *txt, size_t pos, const char *format, va_list ap) {
647 va_list ap_save;
648 va_copy(ap_save, ap);
649 int len = vsnprintf(NULL, 0, format, ap);
650 if (len == -1) {
651 va_end(ap_save);
652 return false;
654 char *buf = malloc(len+1);
655 bool ret = buf && (vsnprintf(buf, len+1, format, ap_save) == len) && text_insert(txt, pos, buf, len);
656 free(buf);
657 va_end(ap_save);
658 return ret;
661 bool text_appendf(Text *txt, const char *format, ...) {
662 va_list ap;
663 va_start(ap, format);
664 bool ret = text_vprintf(txt, text_size(txt), format, ap);
665 va_end(ap);
666 return ret;
669 bool text_printf(Text *txt, size_t pos, const char *format, ...) {
670 va_list ap;
671 va_start(ap, format);
672 bool ret = text_vprintf(txt, pos, format, ap);
673 va_end(ap);
674 return ret;
677 static size_t revision_undo(Text *txt, Revision *rev) {
678 size_t pos = EPOS;
679 for (Change *c = rev->change; c; c = c->next) {
680 span_swap(txt, &c->new, &c->old);
681 pos = c->pos;
683 return pos;
686 static size_t revision_redo(Text *txt, Revision *rev) {
687 size_t pos = EPOS;
688 Change *c = rev->change;
689 while (c->next)
690 c = c->next;
691 for ( ; c; c = c->prev) {
692 span_swap(txt, &c->old, &c->new);
693 pos = c->pos;
694 if (c->new.len > c->old.len)
695 pos += c->new.len - c->old.len;
697 return pos;
700 size_t text_undo(Text *txt) {
701 size_t pos = EPOS;
702 /* taking rev snapshot makes sure that txt->current_revision is reset */
703 text_snapshot(txt);
704 Revision *rev = txt->history->prev;
705 if (!rev)
706 return pos;
707 pos = revision_undo(txt, txt->history);
708 txt->history = rev;
709 lineno_cache_invalidate(&txt->lines);
710 return pos;
713 size_t text_redo(Text *txt) {
714 size_t pos = EPOS;
715 /* taking a snapshot makes sure that txt->current_revision is reset */
716 text_snapshot(txt);
717 Revision *rev = txt->history->next;
718 if (!rev)
719 return pos;
720 pos = revision_redo(txt, rev);
721 txt->history = rev;
722 lineno_cache_invalidate(&txt->lines);
723 return pos;
726 static bool history_change_branch(Revision *rev) {
727 bool changed = false;
728 while (rev->prev) {
729 if (rev->prev->next != rev) {
730 rev->prev->next = rev;
731 changed = true;
733 rev = rev->prev;
735 return changed;
738 static size_t history_traverse_to(Text *txt, Revision *rev) {
739 size_t pos = EPOS;
740 if (!rev)
741 return pos;
742 bool changed = history_change_branch(rev);
743 if (!changed) {
744 if (rev->seq == txt->history->seq) {
745 return txt->lines.pos;
746 } else if (rev->seq > txt->history->seq) {
747 while (txt->history != rev)
748 pos = text_redo(txt);
749 return pos;
750 } else if (rev->seq < txt->history->seq) {
751 while (txt->history != rev)
752 pos = text_undo(txt);
753 return pos;
755 } else {
756 while (txt->history->prev && txt->history->prev->next == txt->history)
757 text_undo(txt);
758 pos = text_undo(txt);
759 while (txt->history != rev)
760 pos = text_redo(txt);
761 return pos;
763 return pos;
766 size_t text_earlier(Text *txt) {
767 return history_traverse_to(txt, txt->history->earlier);
770 size_t text_later(Text *txt) {
771 return history_traverse_to(txt, txt->history->later);
774 size_t text_restore(Text *txt, time_t time) {
775 Revision *rev = txt->history;
776 while (time < rev->time && rev->earlier)
777 rev = rev->earlier;
778 while (time > rev->time && rev->later)
779 rev = rev->later;
780 time_t diff = labs(rev->time - time);
781 if (rev->earlier && rev->earlier != txt->history && labs(rev->earlier->time - time) < diff)
782 rev = rev->earlier;
783 if (rev->later && rev->later != txt->history && labs(rev->later->time - time) < diff)
784 rev = rev->later;
785 return history_traverse_to(txt, rev);
788 time_t text_state(Text *txt) {
789 return txt->history->time;
792 static bool preserve_acl(int src, int dest) {
793 #if CONFIG_ACL
794 acl_t acl = acl_get_fd(src);
795 if (!acl)
796 return errno == ENOTSUP ? true : false;
797 if (acl_set_fd(dest, acl) == -1) {
798 acl_free(acl);
799 return false;
801 acl_free(acl);
802 #endif /* CONFIG_ACL */
803 return true;
806 static bool preserve_selinux_context(int src, int dest) {
807 #if CONFIG_SELINUX
808 char *context = NULL;
809 if (!is_selinux_enabled())
810 return true;
811 if (fgetfilecon(src, &context) == -1)
812 return errno == ENOTSUP ? true : false;
813 if (fsetfilecon(dest, context) == -1) {
814 freecon(context);
815 return false;
817 freecon(context);
818 #endif /* CONFIG_SELINUX */
819 return true;
822 static int mkstempat(int dirfd, char *template) {
823 if (dirfd == AT_FDCWD)
824 return mkstemp(template);
825 // FIXME: not thread safe
826 int fd = -1;
827 int cwd = open(".", O_RDONLY|O_DIRECTORY);
828 if (cwd == -1)
829 goto err;
830 if (fchdir(dirfd) == -1)
831 goto err;
832 fd = mkstemp(template);
833 err:
834 if (cwd != -1) {
835 fchdir(cwd);
836 close(cwd);
838 return fd;
841 /* Create a new file named `.filename.vis.XXXXXX` (where `XXXXXX` is a
842 * randomly generated, unique suffix) and try to preserve all important
843 * meta data. After the file content has been written to this temporary
844 * file, text_save_commit_atomic will atomically move it to its final
845 * (possibly already existing) destination using rename(2).
847 * This approach does not work if:
849 * - the file is a symbolic link
850 * - the file is a hard link
851 * - file ownership can not be preserved
852 * - file group can not be preserved
853 * - directory permissions do not allow creation of a new file
854 * - POSXI ACL can not be preserved (if enabled)
855 * - SELinux security context can not be preserved (if enabled)
857 static bool text_save_begin_atomic(TextSave *ctx) {
858 int oldfd, saved_errno;
859 if ((oldfd = openat(ctx->dirfd, ctx->filename, O_RDONLY)) == -1 && errno != ENOENT)
860 goto err;
861 struct stat oldmeta = { 0 };
862 if (oldfd != -1 && fstatat(ctx->dirfd, ctx->filename, &oldmeta, AT_SYMLINK_NOFOLLOW) == -1)
863 goto err;
864 if (oldfd != -1) {
865 if (S_ISLNK(oldmeta.st_mode)) /* symbolic link */
866 goto err;
867 if (oldmeta.st_nlink > 1) /* hard link */
868 goto err;
871 char suffix[] = ".vis.XXXXXX";
872 size_t len = strlen(ctx->filename) + sizeof("./.") + sizeof(suffix);
873 char *dir = strdup(ctx->filename);
874 char *base = strdup(ctx->filename);
876 if (!(ctx->tmpname = malloc(len)) || !dir || !base) {
877 free(dir);
878 free(base);
879 goto err;
882 snprintf(ctx->tmpname, len, "%s/.%s%s", dirname(dir), basename(base), suffix);
883 free(dir);
884 free(base);
886 if ((ctx->fd = mkstempat(ctx->dirfd, ctx->tmpname)) == -1)
887 goto err;
889 if (oldfd == -1) {
890 mode_t mask = umask(0);
891 umask(mask);
892 if (fchmod(ctx->fd, 0666 & ~mask) == -1)
893 goto err;
894 } else {
895 if (fchmod(ctx->fd, oldmeta.st_mode) == -1)
896 goto err;
897 if (!preserve_acl(oldfd, ctx->fd) || !preserve_selinux_context(oldfd, ctx->fd))
898 goto err;
899 /* change owner if necessary */
900 if (oldmeta.st_uid != getuid() && fchown(ctx->fd, oldmeta.st_uid, (uid_t)-1) == -1)
901 goto err;
902 /* change group if necessary, in case of failure some editors reset
903 * the group permissions to the same as for others */
904 if (oldmeta.st_gid != getgid() && fchown(ctx->fd, (uid_t)-1, oldmeta.st_gid) == -1)
905 goto err;
906 close(oldfd);
909 ctx->type = TEXT_SAVE_ATOMIC;
910 return true;
911 err:
912 saved_errno = errno;
913 if (oldfd != -1)
914 close(oldfd);
915 if (ctx->fd != -1)
916 close(ctx->fd);
917 ctx->fd = -1;
918 free(ctx->tmpname);
919 ctx->tmpname = NULL;
920 errno = saved_errno;
921 return false;
924 static bool text_save_commit_atomic(TextSave *ctx) {
925 if (fsync(ctx->fd) == -1)
926 return false;
928 struct stat meta = { 0 };
929 if (fstat(ctx->fd, &meta) == -1)
930 return false;
932 bool close_failed = (close(ctx->fd) == -1);
933 ctx->fd = -1;
934 if (close_failed)
935 return false;
937 if (renameat(ctx->dirfd, ctx->tmpname, ctx->dirfd, ctx->filename) == -1)
938 return false;
940 free(ctx->tmpname);
941 ctx->tmpname = NULL;
943 int dir = openat(ctx->dirfd, dirname(ctx->filename), O_DIRECTORY|O_RDONLY);
944 if (dir == -1)
945 return false;
947 if (fsync(dir) == -1 && errno != EINVAL) {
948 close(dir);
949 return false;
952 if (close(dir) == -1)
953 return false;
955 if (meta.st_mtime)
956 ctx->txt->info = meta;
957 return true;
960 static bool text_save_begin_inplace(TextSave *ctx) {
961 Text *txt = ctx->txt;
962 struct stat meta = { 0 };
963 int newfd = -1, saved_errno;
964 if ((ctx->fd = openat(ctx->dirfd, ctx->filename, O_CREAT|O_WRONLY, 0666)) == -1)
965 goto err;
966 if (fstat(ctx->fd, &meta) == -1)
967 goto err;
968 Block *block = array_get_ptr(&txt->blocks, 0);
969 if (meta.st_dev == txt->info.st_dev && meta.st_ino == txt->info.st_ino &&
970 block && block->type == MMAP_ORIG && block->size) {
971 /* The file we are going to overwrite is currently mmap-ed from
972 * text_load, therefore we copy the mmap-ed block to a temporary
973 * file and remap it at the same position such that all pointers
974 * from the various pieces are still valid.
976 size_t size = block->size;
977 char tmpname[32] = "/tmp/vis-XXXXXX";
978 newfd = mkstemp(tmpname);
979 if (newfd == -1)
980 goto err;
981 if (unlink(tmpname) == -1)
982 goto err;
983 ssize_t written = write_all(newfd, block->data, size);
984 if (written == -1 || (size_t)written != size)
985 goto err;
986 void *data = mmap(block->data, size, PROT_READ, MAP_SHARED|MAP_FIXED, newfd, 0);
987 if (data == MAP_FAILED)
988 goto err;
989 bool close_failed = (close(newfd) == -1);
990 newfd = -1;
991 if (close_failed)
992 goto err;
993 block->type = MMAP;
995 /* overwrite the existing file content, if something goes wrong
996 * here we are screwed, TODO: make a backup before? */
997 if (ftruncate(ctx->fd, 0) == -1)
998 goto err;
999 ctx->type = TEXT_SAVE_INPLACE;
1000 return true;
1001 err:
1002 saved_errno = errno;
1003 if (newfd != -1)
1004 close(newfd);
1005 if (ctx->fd != -1)
1006 close(ctx->fd);
1007 ctx->fd = -1;
1008 errno = saved_errno;
1009 return false;
1012 static bool text_save_commit_inplace(TextSave *ctx) {
1013 if (fsync(ctx->fd) == -1)
1014 return false;
1015 struct stat meta = { 0 };
1016 if (fstat(ctx->fd, &meta) == -1)
1017 return false;
1018 if (close(ctx->fd) == -1)
1019 return false;
1020 ctx->txt->info = meta;
1021 return true;
1024 TextSave *text_save_begin(Text *txt, int dirfd, const char *filename, enum TextSaveMethod type) {
1025 if (!filename)
1026 return NULL;
1027 TextSave *ctx = calloc(1, sizeof *ctx);
1028 if (!ctx)
1029 return NULL;
1030 ctx->txt = txt;
1031 ctx->fd = -1;
1032 ctx->dirfd = dirfd;
1033 if (!(ctx->filename = strdup(filename)))
1034 goto err;
1035 errno = 0;
1036 if ((type == TEXT_SAVE_AUTO || type == TEXT_SAVE_ATOMIC) && text_save_begin_atomic(ctx))
1037 return ctx;
1038 if (errno == ENOSPC)
1039 goto err;
1040 if ((type == TEXT_SAVE_AUTO || type == TEXT_SAVE_INPLACE) && text_save_begin_inplace(ctx))
1041 return ctx;
1042 err:
1043 text_save_cancel(ctx);
1044 return NULL;
1047 bool text_save_commit(TextSave *ctx) {
1048 if (!ctx)
1049 return true;
1050 bool ret;
1051 Text *txt = ctx->txt;
1052 switch (ctx->type) {
1053 case TEXT_SAVE_ATOMIC:
1054 ret = text_save_commit_atomic(ctx);
1055 break;
1056 case TEXT_SAVE_INPLACE:
1057 ret = text_save_commit_inplace(ctx);
1058 break;
1059 default:
1060 ret = false;
1061 break;
1064 if (ret) {
1065 txt->saved_revision = txt->history;
1066 text_snapshot(txt);
1068 text_save_cancel(ctx);
1069 return ret;
1072 void text_save_cancel(TextSave *ctx) {
1073 if (!ctx)
1074 return;
1075 int saved_errno = errno;
1076 if (ctx->fd != -1)
1077 close(ctx->fd);
1078 if (ctx->tmpname && ctx->tmpname[0])
1079 unlinkat(ctx->dirfd, ctx->tmpname, 0);
1080 free(ctx->tmpname);
1081 free(ctx->filename);
1082 free(ctx);
1083 errno = saved_errno;
1086 /* First try to save the file atomically using rename(2) if this does not
1087 * work overwrite the file in place. However if something goes wrong during
1088 * this overwrite the original file is permanently damaged.
1090 bool text_save(Text *txt, const char *filename) {
1091 return text_saveat(txt, AT_FDCWD, filename);
1094 bool text_saveat(Text *txt, int dirfd, const char *filename) {
1095 return text_saveat_method(txt, dirfd, filename, TEXT_SAVE_AUTO);
1098 bool text_save_method(Text *txt, const char *filename, enum TextSaveMethod method) {
1099 return text_saveat_method(txt, AT_FDCWD, filename, method);
1102 bool text_saveat_method(Text *txt, int dirfd, const char *filename, enum TextSaveMethod method) {
1103 if (!filename) {
1104 txt->saved_revision = txt->history;
1105 text_snapshot(txt);
1106 return true;
1108 TextSave *ctx = text_save_begin(txt, dirfd, filename, method);
1109 if (!ctx)
1110 return false;
1111 Filerange range = (Filerange){ .start = 0, .end = text_size(txt) };
1112 ssize_t written = text_save_write_range(ctx, &range);
1113 if (written == -1 || (size_t)written != text_range_size(&range)) {
1114 text_save_cancel(ctx);
1115 return false;
1117 return text_save_commit(ctx);
1120 ssize_t text_save_write_range(TextSave *ctx, Filerange *range) {
1121 return text_write_range(ctx->txt, range, ctx->fd);
1124 ssize_t text_write(Text *txt, int fd) {
1125 Filerange r = (Filerange){ .start = 0, .end = text_size(txt) };
1126 return text_write_range(txt, &r, fd);
1129 ssize_t text_write_range(Text *txt, Filerange *range, int fd) {
1130 size_t size = text_range_size(range), rem = size;
1131 for (Iterator it = text_iterator_get(txt, range->start);
1132 rem > 0 && text_iterator_valid(&it);
1133 text_iterator_next(&it)) {
1134 size_t prem = it.end - it.text;
1135 if (prem > rem)
1136 prem = rem;
1137 ssize_t written = write_all(fd, it.text, prem);
1138 if (written == -1)
1139 return -1;
1140 rem -= written;
1141 if ((size_t)written != prem)
1142 break;
1144 return size - rem;
1147 Text *text_load(const char *filename) {
1148 return text_load_method(filename, TEXT_LOAD_AUTO);
1151 Text *text_loadat(int dirfd, const char *filename) {
1152 return text_loadat_method(dirfd, filename, TEXT_LOAD_AUTO);
1155 Text *text_load_method(const char *filename, enum TextLoadMethod method) {
1156 return text_loadat_method(AT_FDCWD, filename, method);
1159 Text *text_loadat_method(int dirfd, const char *filename, enum TextLoadMethod method) {
1160 int fd = -1;
1161 size_t size = 0;
1162 Text *txt = calloc(1, sizeof *txt);
1163 if (!txt)
1164 return NULL;
1165 Piece *p = piece_alloc(txt);
1166 if (!p)
1167 goto out;
1168 array_init(&txt->blocks);
1169 lineno_cache_invalidate(&txt->lines);
1170 if (filename) {
1171 if ((fd = openat(dirfd, filename, O_RDONLY)) == -1)
1172 goto out;
1173 if (fstat(fd, &txt->info) == -1)
1174 goto out;
1175 if (!S_ISREG(txt->info.st_mode)) {
1176 errno = S_ISDIR(txt->info.st_mode) ? EISDIR : ENOTSUP;
1177 goto out;
1179 // XXX: use lseek(fd, 0, SEEK_END); instead?
1180 size = txt->info.st_size;
1181 if (size > 0) {
1182 Block *block;
1183 if (method == TEXT_LOAD_READ || (method == TEXT_LOAD_AUTO && size < BLOCK_MMAP_SIZE))
1184 block = block_read(size, fd);
1185 else
1186 block = block_mmap(size, fd, 0);
1187 if (!block || !array_add_ptr(&txt->blocks, block))
1188 goto out;
1189 piece_init(p, &txt->begin, &txt->end, block->data, block->len);
1193 if (size == 0)
1194 piece_init(p, &txt->begin, &txt->end, "\0", 0);
1196 piece_init(&txt->begin, NULL, p, NULL, 0);
1197 piece_init(&txt->end, p, NULL, NULL, 0);
1198 txt->size = p->len;
1199 /* write an empty revision */
1200 change_alloc(txt, EPOS);
1201 text_snapshot(txt);
1202 txt->saved_revision = txt->history;
1204 if (fd != -1)
1205 close(fd);
1206 return txt;
1207 out:
1208 if (fd != -1)
1209 close(fd);
1210 text_free(txt);
1211 return NULL;
1214 struct stat text_stat(Text *txt) {
1215 return txt->info;
1218 /* A delete operation can either start/stop midway through a piece or at
1219 * a boundry. In the former case a new piece is created to represent the
1220 * remaining text before/after the modification point.
1222 * /-+ --> +---------+ --> +-----+ --> +-----+ --> +-\
1223 * | | | existing| |demo | |text | | |
1224 * \-+ <-- +---------+ <-- +-----+ <-- +-----+ <-- +-/
1225 * ^ ^
1226 * |------ delete range -----|
1228 * /-+ --> +----+ --> +--+ --> +-\
1229 * | | | exi| |t | | |
1230 * \-+ <-- +----+ <-- +--+ <-- +-/
1232 bool text_delete(Text *txt, size_t pos, size_t len) {
1233 if (len == 0)
1234 return true;
1235 size_t pos_end;
1236 if (!addu(pos, len, &pos_end) || pos_end > txt->size)
1237 return false;
1238 if (pos < txt->lines.pos)
1239 lineno_cache_invalidate(&txt->lines);
1241 Location loc = piece_get_intern(txt, pos);
1242 Piece *p = loc.piece;
1243 if (!p)
1244 return false;
1245 size_t off = loc.off;
1246 if (cache_delete(txt, p, off, len))
1247 return true;
1248 Change *c = change_alloc(txt, pos);
1249 if (!c)
1250 return false;
1252 bool midway_start = false, midway_end = false; /* split pieces? */
1253 Piece *before, *after; /* unmodified pieces before/after deletion point */
1254 Piece *start, *end; /* span which is removed */
1255 size_t cur; /* how much has already been deleted */
1257 if (off == p->len) {
1258 /* deletion starts at a piece boundry */
1259 cur = 0;
1260 before = p;
1261 start = p->next;
1262 } else {
1263 /* deletion starts midway through a piece */
1264 midway_start = true;
1265 cur = p->len - off;
1266 start = p;
1267 before = piece_alloc(txt);
1268 if (!before)
1269 return false;
1272 /* skip all pieces which fall into deletion range */
1273 while (cur < len) {
1274 p = p->next;
1275 cur += p->len;
1278 if (cur == len) {
1279 /* deletion stops at a piece boundry */
1280 end = p;
1281 after = p->next;
1282 } else {
1283 /* cur > len: deletion stops midway through a piece */
1284 midway_end = true;
1285 end = p;
1286 after = piece_alloc(txt);
1287 if (!after)
1288 return false;
1289 piece_init(after, before, p->next, p->data + p->len - (cur - len), cur - len);
1292 if (midway_start) {
1293 /* we finally know which piece follows our newly allocated before piece */
1294 piece_init(before, start->prev, after, start->data, off);
1297 Piece *new_start = NULL, *new_end = NULL;
1298 if (midway_start) {
1299 new_start = before;
1300 if (!midway_end)
1301 new_end = before;
1303 if (midway_end) {
1304 if (!midway_start)
1305 new_start = after;
1306 new_end = after;
1309 span_init(&c->new, new_start, new_end);
1310 span_init(&c->old, start, end);
1311 span_swap(txt, &c->old, &c->new);
1312 return true;
1315 bool text_delete_range(Text *txt, Filerange *r) {
1316 if (!text_range_valid(r))
1317 return false;
1318 return text_delete(txt, r->start, text_range_size(r));
1321 /* preserve the current text content such that it can be restored by
1322 * means of undo/redo operations */
1323 void text_snapshot(Text *txt) {
1324 if (txt->current_revision)
1325 txt->last_revision = txt->current_revision;
1326 txt->current_revision = NULL;
1327 txt->cache = NULL;
1331 void text_free(Text *txt) {
1332 if (!txt)
1333 return;
1335 // free history
1336 Revision *hist = txt->history;
1337 while (hist && hist->prev)
1338 hist = hist->prev;
1339 while (hist) {
1340 Revision *later = hist->later;
1341 revision_free(hist);
1342 hist = later;
1345 for (Piece *next, *p = txt->pieces; p; p = next) {
1346 next = p->global_next;
1347 piece_free(p);
1350 for (size_t i = 0, len = array_length(&txt->blocks); i < len; i++)
1351 block_free(array_get_ptr(&txt->blocks, i));
1352 array_release(&txt->blocks);
1354 free(txt);
1357 bool text_modified(Text *txt) {
1358 return txt->saved_revision != txt->history;
1361 bool text_mmaped(Text *txt, const char *ptr) {
1362 uintptr_t addr = (uintptr_t)ptr;
1363 for (size_t i = 0, len = array_length(&txt->blocks); i < len; i++) {
1364 Block *blk = array_get_ptr(&txt->blocks, i);
1365 if ((blk->type == MMAP_ORIG || blk->type == MMAP) &&
1366 (uintptr_t)(blk->data) <= addr && addr < (uintptr_t)(blk->data + blk->size))
1367 return true;
1369 return false;
1372 static bool text_iterator_init(Iterator *it, size_t pos, Piece *p, size_t off) {
1373 Iterator iter = (Iterator){
1374 .pos = pos,
1375 .piece = p,
1376 .start = p ? p->data : NULL,
1377 .end = p ? p->data + p->len : NULL,
1378 .text = p ? p->data + off : NULL,
1380 *it = iter;
1381 return text_iterator_valid(it);
1384 Iterator text_iterator_get(Text *txt, size_t pos) {
1385 Iterator it;
1386 Location loc = piece_get_extern(txt, pos);
1387 text_iterator_init(&it, pos, loc.piece, loc.off);
1388 return it;
1391 bool text_iterator_byte_get(Iterator *it, char *b) {
1392 if (text_iterator_valid(it)) {
1393 if (it->start <= it->text && it->text < it->end) {
1394 *b = *it->text;
1395 return true;
1396 } else if (it->pos == it->piece->text->size) { /* EOF */
1397 *b = '\0';
1398 return true;
1401 return false;
1404 bool text_iterator_next(Iterator *it) {
1405 size_t rem = it->end - it->text;
1406 return text_iterator_init(it, it->pos+rem, it->piece ? it->piece->next : NULL, 0);
1409 bool text_iterator_prev(Iterator *it) {
1410 size_t off = it->text - it->start;
1411 size_t len = it->piece && it->piece->prev ? it->piece->prev->len : 0;
1412 return text_iterator_init(it, it->pos-off, it->piece ? it->piece->prev : NULL, len);
1415 bool text_iterator_valid(const Iterator *it) {
1416 /* filter out sentinel nodes */
1417 return it->piece && it->piece->text;
1420 bool text_iterator_byte_next(Iterator *it, char *b) {
1421 if (!it->piece || !it->piece->next)
1422 return false;
1423 bool eof = true;
1424 if (it->text < it->end) {
1425 it->text++;
1426 it->pos++;
1427 eof = false;
1428 } else if (!it->piece->prev) {
1429 eof = false;
1432 while (it->text == it->end) {
1433 if (!text_iterator_next(it)) {
1434 if (eof)
1435 return false;
1436 if (b)
1437 *b = '\0';
1438 return text_iterator_prev(it);
1442 if (b)
1443 *b = *it->text;
1444 return true;
1447 bool text_iterator_byte_prev(Iterator *it, char *b) {
1448 if (!it->piece || !it->piece->prev)
1449 return false;
1450 bool eof = !it->piece->next;
1451 while (it->text == it->start) {
1452 if (!text_iterator_prev(it)) {
1453 if (!eof)
1454 return false;
1455 if (b)
1456 *b = '\0';
1457 return text_iterator_next(it);
1461 --it->text;
1462 --it->pos;
1464 if (b)
1465 *b = *it->text;
1466 return true;
1469 bool text_iterator_byte_find_prev(Iterator *it, char b) {
1470 while (it->text) {
1471 const char *match = memrchr(it->start, b, it->text - it->start);
1472 if (match) {
1473 it->pos -= it->text - match;
1474 it->text = match;
1475 return true;
1477 text_iterator_prev(it);
1479 text_iterator_next(it);
1480 return false;
1483 bool text_iterator_byte_find_next(Iterator *it, char b) {
1484 while (it->text) {
1485 const char *match = memchr(it->text, b, it->end - it->text);
1486 if (match) {
1487 it->pos += match - it->text;
1488 it->text = match;
1489 return true;
1491 text_iterator_next(it);
1493 text_iterator_prev(it);
1494 return false;
1497 bool text_iterator_codepoint_next(Iterator *it, char *c) {
1498 while (text_iterator_byte_next(it, NULL)) {
1499 if (ISUTF8(*it->text)) {
1500 if (c)
1501 *c = *it->text;
1502 return true;
1505 return false;
1508 bool text_iterator_codepoint_prev(Iterator *it, char *c) {
1509 while (text_iterator_byte_prev(it, NULL)) {
1510 if (ISUTF8(*it->text)) {
1511 if (c)
1512 *c = *it->text;
1513 return true;
1516 return false;
1519 bool text_iterator_char_next(Iterator *it, char *c) {
1520 if (!text_iterator_codepoint_next(it, c))
1521 return false;
1522 mbstate_t ps = { 0 };
1523 for (;;) {
1524 char buf[MB_LEN_MAX];
1525 size_t len = text_bytes_get(it->piece->text, it->pos, sizeof buf, buf);
1526 wchar_t wc;
1527 size_t wclen = mbrtowc(&wc, buf, len, &ps);
1528 if (wclen == (size_t)-1 && errno == EILSEQ) {
1529 return true;
1530 } else if (wclen == (size_t)-2) {
1531 return false;
1532 } else if (wclen == 0) {
1533 return true;
1534 } else {
1535 int width = wcwidth(wc);
1536 if (width != 0)
1537 return true;
1538 if (!text_iterator_codepoint_next(it, c))
1539 return false;
1542 return true;
1545 bool text_iterator_char_prev(Iterator *it, char *c) {
1546 if (!text_iterator_codepoint_prev(it, c))
1547 return false;
1548 for (;;) {
1549 char buf[MB_LEN_MAX];
1550 size_t len = text_bytes_get(it->piece->text, it->pos, sizeof buf, buf);
1551 wchar_t wc;
1552 mbstate_t ps = { 0 };
1553 size_t wclen = mbrtowc(&wc, buf, len, &ps);
1554 if (wclen == (size_t)-1 && errno == EILSEQ) {
1555 return true;
1556 } else if (wclen == (size_t)-2) {
1557 return false;
1558 } else if (wclen == 0) {
1559 return true;
1560 } else {
1561 int width = wcwidth(wc);
1562 if (width != 0)
1563 return true;
1564 if (!text_iterator_codepoint_prev(it, c))
1565 return false;
1568 return true;
1571 bool text_byte_get(Text *txt, size_t pos, char *byte) {
1572 return text_bytes_get(txt, pos, 1, byte);
1575 size_t text_bytes_get(Text *txt, size_t pos, size_t len, char *buf) {
1576 if (!buf)
1577 return 0;
1578 char *cur = buf;
1579 size_t rem = len;
1580 for (Iterator it = text_iterator_get(txt, pos);
1581 text_iterator_valid(&it);
1582 text_iterator_next(&it)) {
1583 if (rem == 0)
1584 break;
1585 size_t piece_len = it.end - it.text;
1586 if (piece_len > rem)
1587 piece_len = rem;
1588 if (piece_len) {
1589 memcpy(cur, it.text, piece_len);
1590 cur += piece_len;
1591 rem -= piece_len;
1594 return len - rem;
1597 char *text_bytes_alloc0(Text *txt, size_t pos, size_t len) {
1598 if (len == SIZE_MAX)
1599 return NULL;
1600 char *buf = malloc(len+1);
1601 if (!buf)
1602 return NULL;
1603 len = text_bytes_get(txt, pos, len, buf);
1604 buf[len] = '\0';
1605 return buf;
1608 size_t text_size(Text *txt) {
1609 return txt->size;
1612 /* count the number of new lines '\n' in range [pos, pos+len) */
1613 static size_t lines_count(Text *txt, size_t pos, size_t len) {
1614 size_t lines = 0;
1615 for (Iterator it = text_iterator_get(txt, pos);
1616 text_iterator_valid(&it);
1617 text_iterator_next(&it)) {
1618 const char *start = it.text;
1619 while (len > 0 && start < it.end) {
1620 size_t n = MIN(len, (size_t)(it.end - start));
1621 const char *end = memchr(start, '\n', n);
1622 if (!end) {
1623 len -= n;
1624 break;
1626 lines++;
1627 len -= end - start + 1;
1628 start = end + 1;
1631 if (len == 0)
1632 break;
1634 return lines;
1637 /* skip n lines forward and return position afterwards */
1638 static size_t lines_skip_forward(Text *txt, size_t pos, size_t lines, size_t *lines_skipped) {
1639 size_t lines_old = lines;
1640 for (Iterator it = text_iterator_get(txt, pos);
1641 text_iterator_valid(&it);
1642 text_iterator_next(&it)) {
1643 const char *start = it.text;
1644 while (lines > 0 && start < it.end) {
1645 size_t n = it.end - start;
1646 const char *end = memchr(start, '\n', n);
1647 if (!end) {
1648 pos += n;
1649 break;
1651 pos += end - start + 1;
1652 start = end + 1;
1653 lines--;
1656 if (lines == 0)
1657 break;
1659 if (lines_skipped)
1660 *lines_skipped = lines_old - lines;
1661 return pos;
1664 static void lineno_cache_invalidate(LineCache *cache) {
1665 cache->pos = 0;
1666 cache->lineno = 1;
1669 size_t text_pos_by_lineno(Text *txt, size_t lineno) {
1670 size_t lines_skipped;
1671 LineCache *cache = &txt->lines;
1672 if (lineno <= 1)
1673 return 0;
1674 if (lineno > cache->lineno) {
1675 cache->pos = lines_skip_forward(txt, cache->pos, lineno - cache->lineno, &lines_skipped);
1676 cache->lineno += lines_skipped;
1677 } else if (lineno < cache->lineno) {
1678 #if 0
1679 // TODO does it make sense to scan memory backwards here?
1680 size_t diff = cache->lineno - lineno;
1681 if (diff < lineno) {
1682 lines_skip_backward(txt, cache->pos, diff);
1683 } else
1684 #endif
1685 cache->pos = lines_skip_forward(txt, 0, lineno - 1, &lines_skipped);
1686 cache->lineno = lines_skipped + 1;
1688 return cache->lineno == lineno ? cache->pos : EPOS;
1691 size_t text_lineno_by_pos(Text *txt, size_t pos) {
1692 LineCache *cache = &txt->lines;
1693 if (pos > txt->size)
1694 pos = txt->size;
1695 if (pos < cache->pos) {
1696 size_t diff = cache->pos - pos;
1697 if (diff < pos)
1698 cache->lineno -= lines_count(txt, pos, diff);
1699 else
1700 cache->lineno = lines_count(txt, 0, pos) + 1;
1701 } else if (pos > cache->pos) {
1702 cache->lineno += lines_count(txt, cache->pos, pos - cache->pos);
1704 cache->pos = text_line_begin(txt, pos);
1705 return cache->lineno;
1708 Mark text_mark_set(Text *txt, size_t pos) {
1709 if (pos == txt->size)
1710 return (Mark)&txt->end;
1711 Location loc = piece_get_extern(txt, pos);
1712 if (!loc.piece)
1713 return EMARK;
1714 return (Mark)(loc.piece->data + loc.off);
1717 size_t text_mark_get(Text *txt, Mark mark) {
1718 size_t cur = 0;
1720 if (mark == EMARK)
1721 return EPOS;
1722 if (mark == (Mark)&txt->end)
1723 return txt->size;
1725 for (Piece *p = txt->begin.next; p->next; p = p->next) {
1726 Mark start = (Mark)(p->data);
1727 Mark end = start + p->len;
1728 if (start <= mark && mark < end)
1729 return cur + (mark - start);
1730 cur += p->len;
1733 return EPOS;