build: properly strip elf executables
[vis.git] / text.c
blobb0517bb7a27281a68da3ffb895176e0c99a09b3f
1 #include <unistd.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include <time.h>
6 #include <fcntl.h>
7 #include <errno.h>
8 #include <wchar.h>
9 #include <stdint.h>
10 #include <libgen.h>
11 #include <sys/types.h>
12 #include <sys/stat.h>
13 #include <sys/mman.h>
14 #if CONFIG_ACL
15 #include <sys/acl.h>
16 #endif
17 #if CONFIG_SELINUX
18 #include <selinux/selinux.h>
19 #endif
21 #include "text.h"
22 #include "text-util.h"
23 #include "text-motions.h"
24 #include "util.h"
26 /* Allocate blocks holding the actual file content in junks of size: */
27 #ifndef BLOCK_SIZE
28 #define BLOCK_SIZE (1 << 20)
29 #endif
30 /* Files smaller than this value are copied on load, larger ones are mmap(2)-ed
31 * directely. Hence the former can be truncated, while doing so on the latter
32 * results in havoc. */
33 #define BLOCK_MMAP_SIZE (1 << 23)
35 /* Block holding the file content, either readonly mmap(2)-ed from the original
36 * file or heap allocated to store the modifications.
38 typedef struct Block Block;
39 struct Block {
40 size_t size; /* maximal capacity */
41 size_t len; /* current used length / insertion position */
42 char *data; /* actual data */
43 enum { /* type of allocation */
44 MMAP_ORIG, /* mmap(2)-ed from an external file */
45 MMAP, /* mmap(2)-ed from a temporary file only known to this process */
46 MALLOC, /* heap allocated block using malloc(3) */
47 } type;
48 Block *next; /* next junk */
51 /* A piece holds a reference (but doesn't itself store) a certain amount of data.
52 * All active pieces chained together form the whole content of the document.
53 * At the beginning there exists only one piece, spanning the whole document.
54 * Upon insertion/deletion new pieces will be created to represent the changes.
55 * Generally pieces are never destroyed, but kept around to peform undo/redo
56 * operations.
58 struct Piece {
59 Text *text; /* text to which this piece belongs */
60 Piece *prev, *next; /* pointers to the logical predecessor/successor */
61 Piece *global_prev; /* double linked list in order of allocation, */
62 Piece *global_next; /* used to free individual pieces */
63 const char *data; /* pointer into a Block holding the data */
64 size_t len; /* the length in number of bytes of the data */
67 /* used to transform a global position (byte offset starting from the beginning
68 * of the text) into an offset relative to a piece.
70 typedef struct {
71 Piece *piece; /* piece holding the location */
72 size_t off; /* offset into the piece in bytes */
73 } Location;
75 /* A Span holds a certain range of pieces. Changes to the document are always
76 * performed by swapping out an existing span with a new one.
78 typedef struct {
79 Piece *start, *end; /* start/end of the span */
80 size_t len; /* the sum of the lengths of the pieces which form this span */
81 } Span;
83 /* A Change keeps all needed information to redo/undo an insertion/deletion. */
84 typedef struct Change Change;
85 struct Change {
86 Span old; /* all pieces which are being modified/swapped out by the change */
87 Span new; /* all pieces which are introduced/swapped in by the change */
88 size_t pos; /* absolute position at which the change occured */
89 Change *next; /* next change which is part of the same revision */
90 Change *prev; /* previous change which is part of the same revision */
93 /* A Revision is a list of Changes which are used to undo/redo all modifications
94 * since the last snapshot operation. Revisions are stored in a directed graph structure.
96 typedef struct Revision Revision;
97 struct Revision {
98 Change *change; /* the most recent change */
99 Revision *next; /* the next (child) revision in the undo tree */
100 Revision *prev; /* the previous (parent) revision in the undo tree */
101 Revision *earlier; /* the previous Revision, chronologically */
102 Revision *later; /* the next Revision, chronologically */
103 time_t time; /* when the first change of this revision was performed */
104 size_t seq; /* a unique, strictly increasing identifier */
107 typedef struct {
108 size_t pos; /* position in bytes from start of file */
109 size_t lineno; /* line number in file i.e. number of '\n' in [0, pos) */
110 } LineCache;
112 /* The main struct holding all information of a given file */
113 struct Text {
114 Block *block; /* original file content at the time of load operation */
115 Block *blocks; /* all blocks which have been allocated to hold insertion data */
116 Piece *pieces; /* all pieces which have been allocated, used to free them */
117 Piece *cache; /* most recently modified piece */
118 Piece begin, end; /* sentinel nodes which always exists but don't hold any data */
119 Revision *history; /* undo tree */
120 Revision *current_revision; /* revision holding all file changes until a snapshot is performed */
121 Revision *last_revision; /* the last revision added to the tree, chronologically */
122 Revision *saved_revision; /* the last revision at the time of the save operation */
123 size_t size; /* current file content size in bytes */
124 struct stat info; /* stat as probed at load time */
125 LineCache lines; /* mapping between absolute pos in bytes and logical line breaks */
126 enum TextNewLine newlines; /* which type of new lines does the file use */
129 struct TextSave { /* used to hold context between text_save_{begin,commit} calls */
130 Text *txt; /* text to operate on */
131 char *filename; /* filename to save to as given to text_save_begin */
132 char *tmpname; /* temporary name used for atomic rename(2) */
133 int fd; /* file descriptor to write data to using text_save_write */
134 enum TextSaveMethod type; /* method used to save file */
137 /* block management */
138 static Block *block_alloc(Text*, size_t size);
139 static Block *block_read(Text*, size_t size, int fd);
140 static Block *block_mmap(Text*, size_t size, int fd, off_t offset);
141 static void block_free(Block*);
142 static bool block_capacity(Block*, size_t len);
143 static const char *block_append(Block*, const char *data, size_t len);
144 static bool block_insert(Block*, size_t pos, const char *data, size_t len);
145 static bool block_delete(Block*, size_t pos, size_t len);
146 static const char *block_store(Text*, const char *data, size_t len);
147 /* cache layer */
148 static void cache_piece(Text *txt, Piece *p);
149 static bool cache_contains(Text *txt, Piece *p);
150 static bool cache_insert(Text *txt, Piece *p, size_t off, const char *data, size_t len);
151 static bool cache_delete(Text *txt, Piece *p, size_t off, size_t len);
152 /* piece management */
153 static Piece *piece_alloc(Text *txt);
154 static void piece_free(Piece *p);
155 static void piece_init(Piece *p, Piece *prev, Piece *next, const char *data, size_t len);
156 static Location piece_get_intern(Text *txt, size_t pos);
157 static Location piece_get_extern(Text *txt, size_t pos);
158 /* span management */
159 static void span_init(Span *span, Piece *start, Piece *end);
160 static void span_swap(Text *txt, Span *old, Span *new);
161 /* change management */
162 static Change *change_alloc(Text *txt, size_t pos);
163 static void change_free(Change *c);
164 /* revision management */
165 static Revision *revision_alloc(Text *txt);
166 static void revision_free(Revision *rev);
167 /* logical line counting cache */
168 static void lineno_cache_invalidate(LineCache *cache);
169 static size_t lines_skip_forward(Text *txt, size_t pos, size_t lines, size_t *lines_skiped);
170 static size_t lines_count(Text *txt, size_t pos, size_t len);
172 static ssize_t write_all(int fd, const char *buf, size_t count) {
173 size_t rem = count;
174 while (rem > 0) {
175 ssize_t written = write(fd, buf, rem);
176 if (written < 0) {
177 if (errno == EAGAIN || errno == EINTR)
178 continue;
179 return -1;
180 } else if (written == 0) {
181 break;
183 rem -= written;
184 buf += written;
186 return count - rem;
189 /* allocate a new block of MAX(size, BLOCK_SIZE) bytes */
190 static Block *block_alloc(Text *txt, size_t size) {
191 Block *blk = calloc(1, sizeof *blk);
192 if (!blk)
193 return NULL;
194 if (BLOCK_SIZE > size)
195 size = BLOCK_SIZE;
196 if (!(blk->data = malloc(size))) {
197 free(blk);
198 return NULL;
200 blk->type = MALLOC;
201 blk->size = size;
202 blk->next = txt->blocks;
203 txt->blocks = blk;
204 return blk;
207 static Block *block_read(Text *txt, size_t size, int fd) {
208 Block *blk = block_alloc(txt, size);
209 if (!blk)
210 return NULL;
211 while (size > 0) {
212 char data[4096];
213 ssize_t len = read(fd, data, MIN(sizeof(data), size));
214 if (len == -1) {
215 txt->blocks = blk->next;
216 block_free(blk);
217 return NULL;
218 } else if (len == 0) {
219 break;
220 } else {
221 block_append(blk, data, len);
222 size -= len;
225 return blk;
228 static Block *block_mmap(Text *txt, size_t size, int fd, off_t offset) {
229 Block *blk = calloc(1, sizeof *blk);
230 if (!blk)
231 return NULL;
232 if (size) {
233 blk->data = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, offset);
234 if (blk->data == MAP_FAILED) {
235 free(blk);
236 return NULL;
239 blk->type = MMAP_ORIG;
240 blk->size = size;
241 blk->len = size;
242 blk->next = txt->blocks;
243 txt->blocks = blk;
244 return blk;
247 static void block_free(Block *blk) {
248 if (!blk)
249 return;
250 if (blk->type == MALLOC)
251 free(blk->data);
252 else if ((blk->type == MMAP_ORIG || blk->type == MMAP) && blk->data)
253 munmap(blk->data, blk->size);
254 free(blk);
257 /* check whether block has enough free space to store len bytes */
258 static bool block_capacity(Block *blk, size_t len) {
259 return blk->size - blk->len >= len;
262 /* append data to block, assumes there is enough space available */
263 static const char *block_append(Block *blk, const char *data, size_t len) {
264 char *dest = memcpy(blk->data + blk->len, data, len);
265 blk->len += len;
266 return dest;
269 /* stores the given data in a block, allocates a new one if necessary. returns
270 * a pointer to the storage location or NULL if allocation failed. */
271 static const char *block_store(Text *txt, const char *data, size_t len) {
272 Block *blk = txt->blocks;
273 if ((!blk || !block_capacity(blk, len)) && !(blk = block_alloc(txt, len)))
274 return NULL;
275 return block_append(blk, data, len);
278 /* insert data into block at an arbitrary position, this should only be used with
279 * data of the most recently created piece. */
280 static bool block_insert(Block *blk, size_t pos, const char *data, size_t len) {
281 if (pos > blk->len || !block_capacity(blk, len))
282 return false;
283 if (blk->len == pos)
284 return block_append(blk, data, len);
285 char *insert = blk->data + pos;
286 memmove(insert + len, insert, blk->len - pos);
287 memcpy(insert, data, len);
288 blk->len += len;
289 return true;
292 /* delete data from a block at an arbitrary position, this should only be used with
293 * data of the most recently created piece. */
294 static bool block_delete(Block *blk, size_t pos, size_t len) {
295 if (pos + len > blk->len)
296 return false;
297 if (blk->len == pos) {
298 blk->len -= len;
299 return true;
301 char *delete = blk->data + pos;
302 memmove(delete, delete + len, blk->len - pos - len);
303 blk->len -= len;
304 return true;
307 /* cache the given piece if it is the most recently changed one */
308 static void cache_piece(Text *txt, Piece *p) {
309 Block *blk = txt->blocks;
310 if (!blk || p->data < blk->data || p->data + p->len != blk->data + blk->len)
311 return;
312 txt->cache = p;
315 /* check whether the given piece was the most recently modified one */
316 static bool cache_contains(Text *txt, Piece *p) {
317 Block *blk = txt->blocks;
318 Revision *rev = txt->current_revision;
319 if (!blk || !txt->cache || txt->cache != p || !rev || !rev->change)
320 return false;
322 Piece *start = rev->change->new.start;
323 Piece *end = rev->change->new.end;
324 bool found = false;
325 for (Piece *cur = start; !found; cur = cur->next) {
326 if (cur == p)
327 found = true;
328 if (cur == end)
329 break;
332 return found && p->data + p->len == blk->data + blk->len;
335 /* try to insert a junk of data at a given piece offset. the insertion is only
336 * performed if the piece is the most recenetly changed one. the legnth of the
337 * piece, the span containing it and the whole text is adjusted accordingly */
338 static bool cache_insert(Text *txt, Piece *p, size_t off, const char *data, size_t len) {
339 if (!cache_contains(txt, p))
340 return false;
341 Block *blk = txt->blocks;
342 size_t bufpos = p->data + off - blk->data;
343 if (!block_insert(blk, bufpos, data, len))
344 return false;
345 p->len += len;
346 txt->current_revision->change->new.len += len;
347 txt->size += len;
348 return true;
351 /* try to delete a junk of data at a given piece offset. the deletion is only
352 * performed if the piece is the most recenetly changed one and the whole
353 * affected range lies within it. the legnth of the piece, the span containing it
354 * and the whole text is adjusted accordingly */
355 static bool cache_delete(Text *txt, Piece *p, size_t off, size_t len) {
356 if (!cache_contains(txt, p))
357 return false;
358 Block *blk = txt->blocks;
359 size_t bufpos = p->data + off - blk->data;
360 if (off + len > p->len || !block_delete(blk, bufpos, len))
361 return false;
362 p->len -= len;
363 txt->current_revision->change->new.len -= len;
364 txt->size -= len;
365 return true;
368 /* initialize a span and calculate its length */
369 static void span_init(Span *span, Piece *start, Piece *end) {
370 size_t len = 0;
371 span->start = start;
372 span->end = end;
373 for (Piece *p = start; p; p = p->next) {
374 len += p->len;
375 if (p == end)
376 break;
378 span->len = len;
381 /* swap out an old span and replace it with a new one.
383 * - if old is an empty span do not remove anything, just insert the new one
384 * - if new is an empty span do not insert anything, just remove the old one
386 * adjusts the document size accordingly.
388 static void span_swap(Text *txt, Span *old, Span *new) {
389 if (old->len == 0 && new->len == 0) {
390 return;
391 } else if (old->len == 0) {
392 /* insert new span */
393 new->start->prev->next = new->start;
394 new->end->next->prev = new->end;
395 } else if (new->len == 0) {
396 /* delete old span */
397 old->start->prev->next = old->end->next;
398 old->end->next->prev = old->start->prev;
399 } else {
400 /* replace old with new */
401 old->start->prev->next = new->start;
402 old->end->next->prev = new->end;
404 txt->size -= old->len;
405 txt->size += new->len;
408 /* Allocate a new revision and place it in the revision graph.
409 * All further changes will be associated with this revision. */
410 static Revision *revision_alloc(Text *txt) {
411 Revision *rev = calloc(1, sizeof *rev);
412 if (!rev)
413 return NULL;
414 rev->time = time(NULL);
415 txt->current_revision = rev;
417 /* set sequence number */
418 if (!txt->last_revision)
419 rev->seq = 0;
420 else
421 rev->seq = txt->last_revision->seq + 1;
423 /* set earlier, later pointers */
424 if (txt->last_revision)
425 txt->last_revision->later = rev;
426 rev->earlier = txt->last_revision;
428 if (!txt->history) {
429 txt->history = rev;
430 return rev;
433 /* set prev, next pointers */
434 rev->prev = txt->history;
435 txt->history->next = rev;
436 txt->history = rev;
437 return rev;
440 static void revision_free(Revision *rev) {
441 if (!rev)
442 return;
443 for (Change *next, *c = rev->change; c; c = next) {
444 next = c->next;
445 change_free(c);
447 free(rev);
450 static Piece *piece_alloc(Text *txt) {
451 Piece *p = calloc(1, sizeof *p);
452 if (!p)
453 return NULL;
454 p->text = txt;
455 p->global_next = txt->pieces;
456 if (txt->pieces)
457 txt->pieces->global_prev = p;
458 txt->pieces = p;
459 return p;
462 static void piece_free(Piece *p) {
463 if (!p)
464 return;
465 if (p->global_prev)
466 p->global_prev->global_next = p->global_next;
467 if (p->global_next)
468 p->global_next->global_prev = p->global_prev;
469 if (p->text->pieces == p)
470 p->text->pieces = p->global_next;
471 if (p->text->cache == p)
472 p->text->cache = NULL;
473 free(p);
476 static void piece_init(Piece *p, Piece *prev, Piece *next, const char *data, size_t len) {
477 p->prev = prev;
478 p->next = next;
479 p->data = data;
480 p->len = len;
483 /* returns the piece holding the text at byte offset pos. if pos happens to
484 * be at a piece boundry i.e. the first byte of a piece then the previous piece
485 * to the left is returned with an offset of piece->len. this is convenient for
486 * modifications to the piece chain where both pieces (the returned one and the
487 * one following it) are needed, but unsuitable as a public interface.
489 * in particular if pos is zero, the begin sentinel piece is returned.
491 static Location piece_get_intern(Text *txt, size_t pos) {
492 size_t cur = 0;
493 for (Piece *p = &txt->begin; p->next; p = p->next) {
494 if (cur <= pos && pos <= cur + p->len)
495 return (Location){ .piece = p, .off = pos - cur };
496 cur += p->len;
499 return (Location){ 0 };
502 /* similiar to piece_get_intern but usable as a public API. returns the piece
503 * holding the text at byte offset pos. never returns a sentinel piece.
504 * it pos is the end of file (== text_size()) and the file is not empty then
505 * the last piece holding data is returned.
507 static Location piece_get_extern(Text *txt, size_t pos) {
508 size_t cur = 0;
510 if (pos > 0 && pos == txt->size) {
511 Piece *p = txt->begin.next;
512 while (p->next->next)
513 p = p->next;
514 return (Location){ .piece = p, .off = p->len };
517 for (Piece *p = txt->begin.next; p->next; p = p->next) {
518 if (cur <= pos && pos < cur + p->len)
519 return (Location){ .piece = p, .off = pos - cur };
520 cur += p->len;
523 return (Location){ 0 };
526 /* allocate a new change, associate it with current revision or a newly
527 * allocated one if none exists. */
528 static Change *change_alloc(Text *txt, size_t pos) {
529 Revision *rev = txt->current_revision;
530 if (!rev) {
531 rev = revision_alloc(txt);
532 if (!rev)
533 return NULL;
535 Change *c = calloc(1, sizeof *c);
536 if (!c)
537 return NULL;
538 c->pos = pos;
539 c->next = rev->change;
540 if (rev->change)
541 rev->change->prev = c;
542 rev->change = c;
543 return c;
546 static void change_free(Change *c) {
547 if (!c)
548 return;
549 /* only free the new part of the span, the old one is still in use */
550 if (c->new.start != c->new.end)
551 piece_free(c->new.end);
552 piece_free(c->new.start);
553 free(c);
556 /* When inserting new data there are 2 cases to consider.
558 * - in the first the insertion point falls into the middle of an exisiting
559 * piece which is replaced by three new pieces:
561 * /-+ --> +---------------+ --> +-\
562 * | | | existing text | | |
563 * \-+ <-- +---------------+ <-- +-/
565 * Insertion point for "demo "
567 * /-+ --> +---------+ --> +-----+ --> +-----+ --> +-\
568 * | | | existing| |demo | |text | | |
569 * \-+ <-- +---------+ <-- +-----+ <-- +-----+ <-- +-/
571 * - the second case deals with an insertion point at a piece boundry:
573 * /-+ --> +---------------+ --> +-\
574 * | | | existing text | | |
575 * \-+ <-- +---------------+ <-- +-/
577 * Insertion point for "short"
579 * /-+ --> +-----+ --> +---------------+ --> +-\
580 * | | |short| | existing text | | |
581 * \-+ <-- +-----+ <-- +---------------+ <-- +-/
583 bool text_insert(Text *txt, size_t pos, const char *data, size_t len) {
584 if (len == 0)
585 return true;
586 if (pos > txt->size)
587 return false;
588 if (pos < txt->lines.pos)
589 lineno_cache_invalidate(&txt->lines);
591 Location loc = piece_get_intern(txt, pos);
592 Piece *p = loc.piece;
593 if (!p)
594 return false;
595 size_t off = loc.off;
596 if (cache_insert(txt, p, off, data, len))
597 return true;
599 Change *c = change_alloc(txt, pos);
600 if (!c)
601 return false;
603 if (!(data = block_store(txt, data, len)))
604 return false;
606 Piece *new = NULL;
608 if (off == p->len) {
609 /* insert between two existing pieces, hence there is nothing to
610 * remove, just add a new piece holding the extra text */
611 if (!(new = piece_alloc(txt)))
612 return false;
613 piece_init(new, p, p->next, data, len);
614 span_init(&c->new, new, new);
615 span_init(&c->old, NULL, NULL);
616 } else {
617 /* insert into middle of an existing piece, therfore split the old
618 * piece. that is we have 3 new pieces one containing the content
619 * before the insertion point then one holding the newly inserted
620 * text and one holding the content after the insertion point.
622 Piece *before = piece_alloc(txt);
623 new = piece_alloc(txt);
624 Piece *after = piece_alloc(txt);
625 if (!before || !new || !after)
626 return false;
627 piece_init(before, p->prev, new, p->data, off);
628 piece_init(new, before, after, data, len);
629 piece_init(after, new, p->next, p->data + off, p->len - off);
631 span_init(&c->new, before, after);
632 span_init(&c->old, p, p);
635 cache_piece(txt, new);
636 span_swap(txt, &c->old, &c->new);
637 return true;
640 static bool text_vprintf(Text *txt, size_t pos, const char *format, va_list ap) {
641 va_list ap_save;
642 va_copy(ap_save, ap);
643 int len = vsnprintf(NULL, 0, format, ap);
644 if (len == -1)
645 return false;
646 char *buf = malloc(len+1);
647 bool ret = buf && (vsnprintf(buf, len+1, format, ap_save) == len) && text_insert(txt, pos, buf, len);
648 free(buf);
649 va_end(ap_save);
650 return ret;
653 bool text_appendf(Text *txt, const char *format, ...) {
654 va_list ap;
655 va_start(ap, format);
656 bool ret = text_vprintf(txt, text_size(txt), format, ap);
657 va_end(ap);
658 return ret;
661 bool text_printf(Text *txt, size_t pos, const char *format, ...) {
662 va_list ap;
663 va_start(ap, format);
664 bool ret = text_vprintf(txt, pos, format, ap);
665 va_end(ap);
666 return ret;
669 size_t text_insert_newline(Text *txt, size_t pos) {
670 const char *data = text_newline_char(txt);
671 size_t len = strlen(data);
672 return text_insert(txt, pos, data, len) ? len : 0;
675 static size_t revision_undo(Text *txt, Revision *rev) {
676 size_t pos = EPOS;
677 for (Change *c = rev->change; c; c = c->next) {
678 span_swap(txt, &c->new, &c->old);
679 pos = c->pos;
681 return pos;
684 static size_t revision_redo(Text *txt, Revision *rev) {
685 size_t pos = EPOS;
686 Change *c = rev->change;
687 while (c->next)
688 c = c->next;
689 for ( ; c; c = c->prev) {
690 span_swap(txt, &c->old, &c->new);
691 pos = c->pos;
692 if (c->new.len > c->old.len)
693 pos += c->new.len - c->old.len;
695 return pos;
698 size_t text_undo(Text *txt) {
699 size_t pos = EPOS;
700 /* taking rev snapshot makes sure that txt->current_revision is reset */
701 text_snapshot(txt);
702 Revision *rev = txt->history->prev;
703 if (!rev)
704 return pos;
705 pos = revision_undo(txt, txt->history);
706 txt->history = rev;
707 lineno_cache_invalidate(&txt->lines);
708 return pos;
711 size_t text_redo(Text *txt) {
712 size_t pos = EPOS;
713 /* taking a snapshot makes sure that txt->current_revision is reset */
714 text_snapshot(txt);
715 Revision *rev = txt->history->next;
716 if (!rev)
717 return pos;
718 pos = revision_redo(txt, rev);
719 txt->history = rev;
720 lineno_cache_invalidate(&txt->lines);
721 return pos;
724 static bool history_change_branch(Revision *rev) {
725 bool changed = false;
726 while (rev->prev) {
727 if (rev->prev->next != rev) {
728 rev->prev->next = rev;
729 changed = true;
731 rev = rev->prev;
733 return changed;
736 static size_t history_traverse_to(Text *txt, Revision *rev) {
737 size_t pos = EPOS;
738 if (!rev)
739 return pos;
740 bool changed = history_change_branch(rev);
741 if (!changed) {
742 if (rev->seq == txt->history->seq) {
743 return txt->lines.pos;
744 } else if (rev->seq > txt->history->seq) {
745 while (txt->history != rev)
746 pos = text_redo(txt);
747 return pos;
748 } else if (rev->seq < txt->history->seq) {
749 while (txt->history != rev)
750 pos = text_undo(txt);
751 return pos;
753 } else {
754 while (txt->history->prev && txt->history->prev->next == txt->history)
755 text_undo(txt);
756 pos = text_undo(txt);
757 while (txt->history != rev)
758 pos = text_redo(txt);
759 return pos;
761 return pos;
764 size_t text_earlier(Text *txt, int count) {
765 Revision *rev = txt->history;
766 while (count-- > 0 && rev->earlier)
767 rev = rev->earlier;
768 return history_traverse_to(txt, rev);
771 size_t text_later(Text *txt, int count) {
772 Revision *rev = txt->history;
773 while (count-- > 0 && rev->later)
774 rev = rev->later;
775 return history_traverse_to(txt, rev);
778 size_t text_restore(Text *txt, time_t time) {
779 Revision *rev = txt->history;
780 while (time < rev->time && rev->earlier)
781 rev = rev->earlier;
782 while (time > rev->time && rev->later)
783 rev = rev->later;
784 time_t diff = labs(rev->time - time);
785 if (rev->earlier && rev->earlier != txt->history && labs(rev->earlier->time - time) < diff)
786 rev = rev->earlier;
787 if (rev->later && rev->later != txt->history && labs(rev->later->time - time) < diff)
788 rev = rev->later;
789 return history_traverse_to(txt, rev);
792 time_t text_state(Text *txt) {
793 return txt->history->time;
796 static bool preserve_acl(int src, int dest) {
797 #if CONFIG_ACL
798 acl_t acl = acl_get_fd(src);
799 if (!acl)
800 return errno == ENOTSUP ? true : false;
801 if (acl_set_fd(dest, acl) == -1) {
802 acl_free(acl);
803 return false;
805 acl_free(acl);
806 #endif /* CONFIG_ACL */
807 return true;
810 static bool preserve_selinux_context(int src, int dest) {
811 #if CONFIG_SELINUX
812 char *context = NULL;
813 if (!is_selinux_enabled())
814 return true;
815 if (fgetfilecon(src, &context) == -1)
816 return errno == ENOTSUP ? true : false;
817 if (fsetfilecon(dest, context) == -1) {
818 freecon(context);
819 return false;
821 freecon(context);
822 #endif /* CONFIG_SELINUX */
823 return true;
826 /* Create a new file named `filename~` and try to preserve all important
827 * meta data. After the file content has been written to this temporary
828 * file, text_save_commit_atomic will atomically move it to its final
829 * (possibly already existing) destination using rename(2).
831 * This approach does not work if:
833 * - the file is a symbolic link
834 * - the file is a hard link
835 * - file ownership can not be preserved
836 * - file group can not be preserved
837 * - directory permissions do not allow creation of a new file
838 * - POSXI ACL can not be preserved (if enabled)
839 * - SELinux security context can not be preserved (if enabled)
841 static bool text_save_begin_atomic(TextSave *ctx) {
842 int oldfd, saved_errno;
843 if ((oldfd = open(ctx->filename, O_RDONLY)) == -1 && errno != ENOENT)
844 goto err;
845 struct stat oldmeta = { 0 };
846 if (oldfd != -1 && lstat(ctx->filename, &oldmeta) == -1)
847 goto err;
848 if (oldfd != -1) {
849 if (S_ISLNK(oldmeta.st_mode)) /* symbolic link */
850 goto err;
851 if (oldmeta.st_nlink > 1) /* hard link */
852 goto err;
855 size_t namelen = strlen(ctx->filename) + 1 /* ~ */ + 1 /* \0 */;
856 if (!(ctx->tmpname = calloc(1, namelen)))
857 goto err;
858 snprintf(ctx->tmpname, namelen, "%s~", ctx->filename);
860 if ((ctx->fd = open(ctx->tmpname, O_CREAT|O_WRONLY|O_TRUNC, oldfd == -1 ? S_IRUSR|S_IWUSR : oldmeta.st_mode)) == -1)
861 goto err;
862 if (oldfd != -1) {
863 if (!preserve_acl(oldfd, ctx->fd) || !preserve_selinux_context(oldfd, ctx->fd))
864 goto err;
865 /* change owner if necessary */
866 if (oldmeta.st_uid != getuid() && fchown(ctx->fd, oldmeta.st_uid, (uid_t)-1) == -1)
867 goto err;
868 /* change group if necessary, in case of failure some editors reset
869 * the group permissions to the same as for others */
870 if (oldmeta.st_gid != getgid() && fchown(ctx->fd, (uid_t)-1, oldmeta.st_gid) == -1)
871 goto err;
872 close(oldfd);
875 ctx->type = TEXT_SAVE_ATOMIC;
876 return true;
877 err:
878 saved_errno = errno;
879 if (oldfd != -1)
880 close(oldfd);
881 if (ctx->fd != -1)
882 close(ctx->fd);
883 ctx->fd = -1;
884 errno = saved_errno;
885 return false;
888 static bool text_save_commit_atomic(TextSave *ctx) {
889 if (fsync(ctx->fd) == -1)
890 return false;
892 struct stat meta = { 0 };
893 if (fstat(ctx->fd, &meta) == -1)
894 return false;
896 bool close_failed = (close(ctx->fd) == -1);
897 ctx->fd = -1;
898 if (close_failed)
899 return false;
901 if (rename(ctx->tmpname, ctx->filename) == -1)
902 return false;
904 free(ctx->tmpname);
905 ctx->tmpname = NULL;
907 int dir = open(dirname(ctx->filename), O_DIRECTORY|O_RDONLY);
908 if (dir == -1)
909 return false;
911 if (fsync(dir) == -1) {
912 close(dir);
913 return false;
916 if (close(dir) == -1)
917 return false;
919 if (meta.st_mtime)
920 ctx->txt->info = meta;
921 return true;
924 static bool text_save_begin_inplace(TextSave *ctx) {
925 Text *txt = ctx->txt;
926 struct stat meta = { 0 };
927 int newfd = -1, saved_errno;
928 if ((ctx->fd = open(ctx->filename, O_CREAT|O_WRONLY, S_IRUSR|S_IWUSR)) == -1)
929 goto err;
930 if (fstat(ctx->fd, &meta) == -1)
931 goto err;
932 if (meta.st_dev == txt->info.st_dev && meta.st_ino == txt->info.st_ino &&
933 txt->block && txt->block->type == MMAP_ORIG && txt->block->size) {
934 /* The file we are going to overwrite is currently mmap-ed from
935 * text_load, therefore we copy the mmap-ed block to a temporary
936 * file and remap it at the same position such that all pointers
937 * from the various pieces are still valid.
939 size_t size = txt->block->size;
940 char tmpname[32] = "/tmp/vis-XXXXXX";
941 mode_t mask = umask(S_IXUSR | S_IRWXG | S_IRWXO);
942 newfd = mkstemp(tmpname);
943 umask(mask);
944 if (newfd == -1)
945 goto err;
946 if (unlink(tmpname) == -1)
947 goto err;
948 ssize_t written = write_all(newfd, txt->block->data, size);
949 if (written == -1 || (size_t)written != size)
950 goto err;
951 if (munmap(txt->block->data, size) == -1)
952 goto err;
954 void *data = mmap(txt->block->data, size, PROT_READ, MAP_SHARED, newfd, 0);
955 if (data == MAP_FAILED)
956 goto err;
957 if (data != txt->block->data) {
958 munmap(data, size);
959 goto err;
961 bool close_failed = (close(newfd) == -1);
962 newfd = -1;
963 if (close_failed)
964 goto err;
965 txt->block->data = data;
966 txt->block->type = MMAP;
967 newfd = -1;
969 /* overwrite the exisiting file content, if somehting goes wrong
970 * here we are screwed, TODO: make a backup before? */
971 if (ftruncate(ctx->fd, 0) == -1)
972 goto err;
973 ctx->type = TEXT_SAVE_INPLACE;
974 return true;
975 err:
976 saved_errno = errno;
977 if (newfd != -1)
978 close(newfd);
979 if (ctx->fd != -1)
980 close(ctx->fd);
981 ctx->fd = -1;
982 errno = saved_errno;
983 return false;
986 static bool text_save_commit_inplace(TextSave *ctx) {
987 if (fsync(ctx->fd) == -1)
988 return false;
989 struct stat meta = { 0 };
990 if (fstat(ctx->fd, &meta) == -1)
991 return false;
992 if (close(ctx->fd) == -1)
993 return false;
994 ctx->txt->info = meta;
995 return true;
998 TextSave *text_save_begin(Text *txt, const char *filename, enum TextSaveMethod type) {
999 if (!filename)
1000 return NULL;
1001 TextSave *ctx = calloc(1, sizeof *ctx);
1002 if (!ctx)
1003 return NULL;
1004 ctx->txt = txt;
1005 ctx->fd = -1;
1006 if (!(ctx->filename = strdup(filename)))
1007 goto err;
1008 errno = 0;
1009 if ((type == TEXT_SAVE_AUTO || type == TEXT_SAVE_ATOMIC) && text_save_begin_atomic(ctx))
1010 return ctx;
1011 if (errno == ENOSPC)
1012 goto err;
1013 if ((type == TEXT_SAVE_AUTO || type == TEXT_SAVE_INPLACE) && text_save_begin_inplace(ctx))
1014 return ctx;
1015 err:
1016 text_save_cancel(ctx);
1017 return NULL;
1020 bool text_save_commit(TextSave *ctx) {
1021 if (!ctx)
1022 return true;
1023 bool ret;
1024 Text *txt = ctx->txt;
1025 switch (ctx->type) {
1026 case TEXT_SAVE_ATOMIC:
1027 ret = text_save_commit_atomic(ctx);
1028 break;
1029 case TEXT_SAVE_INPLACE:
1030 ret = text_save_commit_inplace(ctx);
1031 break;
1032 default:
1033 ret = false;
1034 break;
1037 if (ret) {
1038 txt->saved_revision = txt->history;
1039 text_snapshot(txt);
1041 text_save_cancel(ctx);
1042 return ret;
1045 void text_save_cancel(TextSave *ctx) {
1046 if (!ctx)
1047 return;
1048 int saved_errno = errno;
1049 if (ctx->fd != -1)
1050 close(ctx->fd);
1051 if (ctx->tmpname && ctx->tmpname[0])
1052 unlink(ctx->tmpname);
1053 free(ctx->tmpname);
1054 free(ctx->filename);
1055 free(ctx);
1056 errno = saved_errno;
1059 bool text_save(Text *txt, const char *filename) {
1060 Filerange r = (Filerange){ .start = 0, .end = text_size(txt) };
1061 return text_save_range(txt, &r, filename);
1064 /* First try to save the file atomically using rename(2) if this does not
1065 * work overwrite the file in place. However if something goes wrong during
1066 * this overwrite the original file is permanently damaged.
1068 bool text_save_range(Text *txt, Filerange *range, const char *filename) {
1069 if (!filename) {
1070 txt->saved_revision = txt->history;
1071 text_snapshot(txt);
1072 return true;
1074 TextSave *ctx = text_save_begin(txt, filename, TEXT_SAVE_AUTO);
1075 if (!ctx)
1076 return false;
1077 ssize_t written = text_write_range(txt, range, ctx->fd);
1078 if (written == -1 || (size_t)written != text_range_size(range)) {
1079 text_save_cancel(ctx);
1080 return false;
1082 return text_save_commit(ctx);
1085 ssize_t text_save_write_range(TextSave *ctx, Filerange *range) {
1086 return text_write_range(ctx->txt, range, ctx->fd);
1089 ssize_t text_write(Text *txt, int fd) {
1090 Filerange r = (Filerange){ .start = 0, .end = text_size(txt) };
1091 return text_write_range(txt, &r, fd);
1094 ssize_t text_write_range(Text *txt, Filerange *range, int fd) {
1095 size_t size = text_range_size(range), rem = size;
1096 for (Iterator it = text_iterator_get(txt, range->start);
1097 rem > 0 && text_iterator_valid(&it);
1098 text_iterator_next(&it)) {
1099 size_t prem = it.end - it.text;
1100 if (prem > rem)
1101 prem = rem;
1102 ssize_t written = write_all(fd, it.text, prem);
1103 if (written == -1)
1104 return -1;
1105 rem -= written;
1106 if ((size_t)written != prem)
1107 break;
1109 return size - rem;
1112 /* load the given file as starting point for further editing operations.
1113 * to start with an empty document, pass NULL as filename. */
1114 Text *text_load(const char *filename) {
1115 Text *txt = calloc(1, sizeof *txt);
1116 if (!txt)
1117 return NULL;
1118 int fd = -1;
1119 piece_init(&txt->begin, NULL, &txt->end, NULL, 0);
1120 piece_init(&txt->end, &txt->begin, NULL, NULL, 0);
1121 lineno_cache_invalidate(&txt->lines);
1122 if (filename) {
1123 if ((fd = open(filename, O_RDONLY)) == -1)
1124 goto out;
1125 if (fstat(fd, &txt->info) == -1)
1126 goto out;
1127 if (!S_ISREG(txt->info.st_mode)) {
1128 errno = S_ISDIR(txt->info.st_mode) ? EISDIR : ENOTSUP;
1129 goto out;
1131 // XXX: use lseek(fd, 0, SEEK_END); instead?
1132 size_t size = txt->info.st_size;
1133 if (size < BLOCK_MMAP_SIZE)
1134 txt->block = block_read(txt, size, fd);
1135 else
1136 txt->block = block_mmap(txt, size, fd, 0);
1137 if (!txt->block)
1138 goto out;
1139 Piece *p = piece_alloc(txt);
1140 if (!p)
1141 goto out;
1142 piece_init(&txt->begin, NULL, p, NULL, 0);
1143 piece_init(p, &txt->begin, &txt->end, txt->block->data, txt->block->len);
1144 piece_init(&txt->end, p, NULL, NULL, 0);
1145 txt->size = txt->block->len;
1147 /* write an empty revision */
1148 change_alloc(txt, EPOS);
1149 text_snapshot(txt);
1150 txt->saved_revision = txt->history;
1152 if (fd != -1)
1153 close(fd);
1154 return txt;
1155 out:
1156 if (fd != -1)
1157 close(fd);
1158 text_free(txt);
1159 return NULL;
1162 struct stat text_stat(Text *txt) {
1163 return txt->info;
1166 /* A delete operation can either start/stop midway through a piece or at
1167 * a boundry. In the former case a new piece is created to represent the
1168 * remaining text before/after the modification point.
1170 * /-+ --> +---------+ --> +-----+ --> +-----+ --> +-\
1171 * | | | existing| |demo | |text | | |
1172 * \-+ <-- +---------+ <-- +-----+ <-- +-----+ <-- +-/
1173 * ^ ^
1174 * |------ delete range -----|
1176 * /-+ --> +----+ --> +--+ --> +-\
1177 * | | | exi| |t | | |
1178 * \-+ <-- +----+ <-- +--+ <-- +-/
1180 bool text_delete(Text *txt, size_t pos, size_t len) {
1181 if (len == 0)
1182 return true;
1183 if (pos + len > txt->size)
1184 return false;
1185 if (pos < txt->lines.pos)
1186 lineno_cache_invalidate(&txt->lines);
1188 Location loc = piece_get_intern(txt, pos);
1189 Piece *p = loc.piece;
1190 if (!p)
1191 return false;
1192 size_t off = loc.off;
1193 if (cache_delete(txt, p, off, len))
1194 return true;
1195 Change *c = change_alloc(txt, pos);
1196 if (!c)
1197 return false;
1199 bool midway_start = false, midway_end = false; /* split pieces? */
1200 Piece *before, *after; /* unmodified pieces before/after deletion point */
1201 Piece *start, *end; /* span which is removed */
1202 size_t cur; /* how much has already been deleted */
1204 if (off == p->len) {
1205 /* deletion starts at a piece boundry */
1206 cur = 0;
1207 before = p;
1208 start = p->next;
1209 } else {
1210 /* deletion starts midway through a piece */
1211 midway_start = true;
1212 cur = p->len - off;
1213 start = p;
1214 before = piece_alloc(txt);
1215 if (!before)
1216 return false;
1219 /* skip all pieces which fall into deletion range */
1220 while (cur < len) {
1221 p = p->next;
1222 cur += p->len;
1225 if (cur == len) {
1226 /* deletion stops at a piece boundry */
1227 end = p;
1228 after = p->next;
1229 } else {
1230 /* cur > len: deletion stops midway through a piece */
1231 midway_end = true;
1232 end = p;
1233 after = piece_alloc(txt);
1234 if (!after)
1235 return false;
1236 piece_init(after, before, p->next, p->data + p->len - (cur - len), cur - len);
1239 if (midway_start) {
1240 /* we finally know which piece follows our newly allocated before piece */
1241 piece_init(before, start->prev, after, start->data, off);
1244 Piece *new_start = NULL, *new_end = NULL;
1245 if (midway_start) {
1246 new_start = before;
1247 if (!midway_end)
1248 new_end = before;
1250 if (midway_end) {
1251 if (!midway_start)
1252 new_start = after;
1253 new_end = after;
1256 span_init(&c->new, new_start, new_end);
1257 span_init(&c->old, start, end);
1258 span_swap(txt, &c->old, &c->new);
1259 return true;
1262 bool text_delete_range(Text *txt, Filerange *r) {
1263 if (!text_range_valid(r))
1264 return false;
1265 return text_delete(txt, r->start, text_range_size(r));
1268 /* preserve the current text content such that it can be restored by
1269 * means of undo/redo operations */
1270 void text_snapshot(Text *txt) {
1271 if (txt->current_revision)
1272 txt->last_revision = txt->current_revision;
1273 txt->current_revision = NULL;
1274 txt->cache = NULL;
1278 void text_free(Text *txt) {
1279 if (!txt)
1280 return;
1282 // free history
1283 Revision *hist = txt->history;
1284 while (hist && hist->prev)
1285 hist = hist->prev;
1286 while (hist) {
1287 Revision *later = hist->later;
1288 revision_free(hist);
1289 hist = later;
1292 for (Piece *next, *p = txt->pieces; p; p = next) {
1293 next = p->global_next;
1294 piece_free(p);
1297 for (Block *next, *blk = txt->blocks; blk; blk = next) {
1298 next = blk->next;
1299 block_free(blk);
1302 free(txt);
1305 bool text_modified(Text *txt) {
1306 return txt->saved_revision != txt->history;
1309 bool text_sigbus(Text *txt, const char *addr) {
1310 for (Block *blk = txt->blocks; blk; blk = blk->next) {
1311 if ((blk->type == MMAP_ORIG || blk->type == MMAP) &&
1312 blk->data <= addr && addr < blk->data + blk->size)
1313 return true;
1315 return false;
1318 enum TextNewLine text_newline_type(Text *txt){
1319 if (!txt->newlines) {
1320 txt->newlines = TEXT_NEWLINE_LF; /* default to UNIX style \n new lines */
1321 const char *start = txt->block ? txt->block->data : NULL;
1322 if (start) {
1323 const char *nl = memchr(start, '\n', txt->block->len);
1324 if (nl > start && nl[-1] == '\r')
1325 txt->newlines = TEXT_NEWLINE_CRLF;
1326 } else {
1327 char c;
1328 size_t nl = lines_skip_forward(txt, 0, 1, NULL);
1329 if (nl > 1 && text_byte_get(txt, nl-2, &c) && c == '\r')
1330 txt->newlines = TEXT_NEWLINE_CRLF;
1334 return txt->newlines;
1337 const char *text_newline_char(Text *txt) {
1338 static const char *types[] = {
1339 [TEXT_NEWLINE_LF] = "\n",
1340 [TEXT_NEWLINE_CRLF] = "\r\n",
1342 return types[text_newline_type(txt)];
1345 static bool text_iterator_init(Iterator *it, size_t pos, Piece *p, size_t off) {
1346 Iterator iter = (Iterator){
1347 .pos = pos,
1348 .piece = p,
1349 .start = p ? p->data : NULL,
1350 .end = p ? p->data + p->len : NULL,
1351 .text = p ? p->data + off : NULL,
1353 *it = iter;
1354 return text_iterator_valid(it);
1357 Iterator text_iterator_get(Text *txt, size_t pos) {
1358 Iterator it;
1359 Location loc = piece_get_extern(txt, pos);
1360 text_iterator_init(&it, pos, loc.piece, loc.off);
1361 return it;
1364 bool text_iterator_byte_get(Iterator *it, char *b) {
1365 if (text_iterator_valid(it)) {
1366 if (it->start <= it->text && it->text < it->end) {
1367 *b = *it->text;
1368 return true;
1369 } else if (it->pos == it->piece->text->size) { /* EOF */
1370 *b = '\0';
1371 return true;
1374 return false;
1377 bool text_iterator_next(Iterator *it) {
1378 return text_iterator_init(it, it->pos, it->piece ? it->piece->next : NULL, 0);
1381 bool text_iterator_prev(Iterator *it) {
1382 return text_iterator_init(it, it->pos, it->piece ? it->piece->prev : NULL, 0);
1385 bool text_iterator_valid(const Iterator *it) {
1386 /* filter out sentinel nodes */
1387 return it->piece && it->piece->text;
1390 bool text_iterator_byte_next(Iterator *it, char *b) {
1391 if (!text_iterator_valid(it))
1392 return false;
1393 it->text++;
1394 /* special case for advancement to EOF */
1395 if (it->pos+1 == it->piece->text->size) {
1396 it->pos++;
1397 if (b)
1398 *b = '\0';
1399 return true;
1401 while (it->text >= it->end) {
1402 if (!text_iterator_next(it))
1403 return false;
1404 it->text = it->start;
1406 it->pos++;
1407 if (b)
1408 *b = *it->text;
1409 return true;
1412 bool text_iterator_byte_prev(Iterator *it, char *b) {
1413 if (!text_iterator_valid(it))
1414 return false;
1415 while (it->text == it->start) {
1416 if (!text_iterator_prev(it))
1417 return false;
1418 it->text = it->end;
1420 --it->text;
1421 --it->pos;
1422 if (b)
1423 *b = *it->text;
1424 return true;
1427 bool text_iterator_codepoint_next(Iterator *it, char *c) {
1428 while (text_iterator_byte_next(it, NULL)) {
1429 if (ISUTF8(*it->text)) {
1430 if (c)
1431 *c = *it->text;
1432 return true;
1435 return false;
1438 bool text_iterator_codepoint_prev(Iterator *it, char *c) {
1439 while (text_iterator_byte_prev(it, NULL)) {
1440 if (ISUTF8(*it->text)) {
1441 if (c)
1442 *c = *it->text;
1443 return true;
1446 return false;
1449 bool text_iterator_char_next(Iterator *it, char *c) {
1450 if (!text_iterator_codepoint_next(it, c))
1451 return false;
1452 mbstate_t ps = { 0 };
1453 for (;;) {
1454 char buf[MB_CUR_MAX];
1455 size_t len = text_bytes_get(it->piece->text, it->pos, sizeof buf, buf);
1456 wchar_t wc;
1457 size_t wclen = mbrtowc(&wc, buf, len, &ps);
1458 if (wclen == (size_t)-1 && errno == EILSEQ) {
1459 return true;
1460 } else if (wclen == (size_t)-2) {
1461 return false;
1462 } else if (wclen == 0) {
1463 return true;
1464 } else {
1465 int width = wcwidth(wc);
1466 if (width != 0)
1467 return true;
1468 if (!text_iterator_codepoint_next(it, c))
1469 return false;
1472 return true;
1475 bool text_iterator_char_prev(Iterator *it, char *c) {
1476 if (!text_iterator_codepoint_prev(it, c))
1477 return false;
1478 for (;;) {
1479 char buf[MB_CUR_MAX];
1480 size_t len = text_bytes_get(it->piece->text, it->pos, sizeof buf, buf);
1481 wchar_t wc;
1482 mbstate_t ps = { 0 };
1483 size_t wclen = mbrtowc(&wc, buf, len, &ps);
1484 if (wclen == (size_t)-1 && errno == EILSEQ) {
1485 return true;
1486 } else if (wclen == (size_t)-2) {
1487 return false;
1488 } else if (wclen == 0) {
1489 return true;
1490 } else {
1491 int width = wcwidth(wc);
1492 if (width != 0)
1493 return true;
1494 if (!text_iterator_codepoint_prev(it, c))
1495 return false;
1498 return true;
1501 bool text_byte_get(Text *txt, size_t pos, char *buf) {
1502 return text_bytes_get(txt, pos, 1, buf);
1505 size_t text_bytes_get(Text *txt, size_t pos, size_t len, char *buf) {
1506 if (!buf)
1507 return 0;
1508 char *cur = buf;
1509 size_t rem = len;
1510 text_iterate(txt, it, pos) {
1511 if (rem == 0)
1512 break;
1513 size_t piece_len = it.end - it.text;
1514 if (piece_len > rem)
1515 piece_len = rem;
1516 memcpy(cur, it.text, piece_len);
1517 cur += piece_len;
1518 rem -= piece_len;
1520 return len - rem;
1523 char *text_bytes_alloc0(Text *txt, size_t pos, size_t len) {
1524 if (len == SIZE_MAX)
1525 return NULL;
1526 char *buf = malloc(len+1);
1527 if (!buf)
1528 return NULL;
1529 len = text_bytes_get(txt, pos, len, buf);
1530 buf[len] = '\0';
1531 return buf;
1534 size_t text_size(Text *txt) {
1535 return txt->size;
1538 /* count the number of new lines '\n' in range [pos, pos+len) */
1539 static size_t lines_count(Text *txt, size_t pos, size_t len) {
1540 size_t lines = 0;
1541 text_iterate(txt, it, pos) {
1542 const char *start = it.text;
1543 while (len > 0 && start < it.end) {
1544 size_t n = MIN(len, (size_t)(it.end - start));
1545 const char *end = memchr(start, '\n', n);
1546 if (!end) {
1547 len -= n;
1548 break;
1550 lines++;
1551 len -= end - start + 1;
1552 start = end + 1;
1555 if (len == 0)
1556 break;
1558 return lines;
1561 /* skip n lines forward and return position afterwards */
1562 static size_t lines_skip_forward(Text *txt, size_t pos, size_t lines, size_t *lines_skipped) {
1563 size_t lines_old = lines;
1564 text_iterate(txt, it, pos) {
1565 const char *start = it.text;
1566 while (lines > 0 && start < it.end) {
1567 size_t n = it.end - start;
1568 const char *end = memchr(start, '\n', n);
1569 if (!end) {
1570 pos += n;
1571 break;
1573 pos += end - start + 1;
1574 start = end + 1;
1575 lines--;
1578 if (lines == 0)
1579 break;
1581 if (lines_skipped)
1582 *lines_skipped = lines_old - lines;
1583 return pos;
1586 static void lineno_cache_invalidate(LineCache *cache) {
1587 cache->pos = 0;
1588 cache->lineno = 1;
1591 size_t text_pos_by_lineno(Text *txt, size_t lineno) {
1592 size_t lines_skipped;
1593 LineCache *cache = &txt->lines;
1594 if (lineno <= 1)
1595 return 0;
1596 if (lineno > cache->lineno) {
1597 cache->pos = lines_skip_forward(txt, cache->pos, lineno - cache->lineno, &lines_skipped);
1598 cache->lineno += lines_skipped;
1599 } else if (lineno < cache->lineno) {
1600 #if 0
1601 // TODO does it make sense to scan memory backwards here?
1602 size_t diff = cache->lineno - lineno;
1603 if (diff < lineno) {
1604 lines_skip_backward(txt, cache->pos, diff);
1605 } else
1606 #endif
1607 cache->pos = lines_skip_forward(txt, 0, lineno - 1, &lines_skipped);
1608 cache->lineno = lines_skipped + 1;
1610 return cache->lineno == lineno ? cache->pos : EPOS;
1613 size_t text_lineno_by_pos(Text *txt, size_t pos) {
1614 LineCache *cache = &txt->lines;
1615 if (pos > txt->size)
1616 pos = txt->size;
1617 if (pos < cache->pos) {
1618 size_t diff = cache->pos - pos;
1619 if (diff < pos)
1620 cache->lineno -= lines_count(txt, pos, diff);
1621 else
1622 cache->lineno = lines_count(txt, 0, pos) + 1;
1623 } else if (pos > cache->pos) {
1624 cache->lineno += lines_count(txt, cache->pos, pos - cache->pos);
1626 cache->pos = text_line_begin(txt, pos);
1627 return cache->lineno;
1630 Mark text_mark_set(Text *txt, size_t pos) {
1631 if (pos == 0)
1632 return (Mark)&txt->begin;
1633 if (pos == txt->size)
1634 return (Mark)&txt->end;
1635 Location loc = piece_get_extern(txt, pos);
1636 if (!loc.piece)
1637 return EMARK;
1638 return (Mark)(loc.piece->data + loc.off);
1641 size_t text_mark_get(Text *txt, Mark mark) {
1642 size_t cur = 0;
1644 if (mark == EMARK)
1645 return EPOS;
1646 if (mark == (Mark)&txt->begin)
1647 return 0;
1648 if (mark == (Mark)&txt->end)
1649 return txt->size;
1651 for (Piece *p = txt->begin.next; p->next; p = p->next) {
1652 Mark start = (Mark)(p->data);
1653 Mark end = start + p->len;
1654 if (start <= mark && mark < end)
1655 return cur + (mark - start);
1656 cur += p->len;
1659 return EPOS;
1662 size_t text_history_get(Text *txt, size_t index) {
1663 for (Revision *rev = txt->current_revision ? txt->current_revision : txt->history; rev; rev = rev->prev) {
1664 if (index-- == 0) {
1665 Change *c = rev->change;
1666 while (c && c->next)
1667 c = c->next;
1668 return c ? c->pos : EPOS;
1671 return EPOS;