Overhaul build system
[vis.git] / text.c
bloba7016cae223380dba19f99ec2d1940cffc05de1b
1 /*
2 * Copyright (c) 2014 Marc André Tanner <mat at brain-dump.org>
4 * Permission to use, copy, modify, and/or distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 #include <unistd.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <time.h>
21 #include <fcntl.h>
22 #include <errno.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <sys/mman.h>
26 #ifdef HAVE_ACL
27 #include <sys/acl.h>
28 #endif
29 #ifdef HAVE_SELINUX
30 #include <selinux/selinux.h>
31 #endif
33 #include "text.h"
34 #include "text-util.h"
35 #include "util.h"
37 /* Allocate buffers holding the actual file content in junks of size: */
38 #define BUFFER_SIZE (1 << 20)
39 /* Files smaller than this value are copied on load, larger ones are mmap(2)-ed
40 * directely. Hence the former can be truncated, while doing so on the latter
41 * results in havoc. */
42 #define BUFFER_MMAP_SIZE (1 << 23)
44 /* Buffer holding the file content, either readonly mmap(2)-ed from the original
45 * file or heap allocated to store the modifications.
47 typedef struct Buffer Buffer;
48 struct Buffer {
49 size_t size; /* maximal capacity */
50 size_t len; /* current used length / insertion position */
51 char *data; /* actual data */
52 enum { MMAP, MALLOC} type; /* type of allocation */
53 Buffer *next; /* next junk */
56 /* A piece holds a reference (but doesn't itself store) a certain amount of data.
57 * All active pieces chained together form the whole content of the document.
58 * At the beginning there exists only one piece, spanning the whole document.
59 * Upon insertion/deletion new pieces will be created to represent the changes.
60 * Generally pieces are never destroyed, but kept around to peform undo/redo
61 * operations.
63 struct Piece {
64 Text *text; /* text to which this piece belongs */
65 Piece *prev, *next; /* pointers to the logical predecessor/successor */
66 Piece *global_prev; /* double linked list in order of allocation, */
67 Piece *global_next; /* used to free individual pieces */
68 const char *data; /* pointer into a Buffer holding the data */
69 size_t len; /* the length in number of bytes of the data */
72 /* used to transform a global position (byte offset starting from the beginning
73 * of the text) into an offset relative to a piece.
75 typedef struct {
76 Piece *piece; /* piece holding the location */
77 size_t off; /* offset into the piece in bytes */
78 } Location;
80 /* A Span holds a certain range of pieces. Changes to the document are always
81 * performed by swapping out an existing span with a new one.
83 typedef struct {
84 Piece *start, *end; /* start/end of the span */
85 size_t len; /* the sum of the lengths of the pieces which form this span */
86 } Span;
88 /* A Change keeps all needed information to redo/undo an insertion/deletion. */
89 typedef struct Change Change;
90 struct Change {
91 Span old; /* all pieces which are being modified/swapped out by the change */
92 Span new; /* all pieces which are introduced/swapped in by the change */
93 size_t pos; /* absolute position at which the change occured */
94 Change *next; /* next change which is part of the same action */
95 Change *prev; /* previous change which is part of the same action */
98 /* An Action is a list of Changes which are used to undo/redo all modifications
99 * since the last snapshot operation. Actions are stored in a directed graph structure.
101 typedef struct Action Action;
102 struct Action {
103 Change *change; /* the most recent change */
104 Action *next; /* the next (child) action in the undo tree */
105 Action *prev; /* the previous (parent) operation in the undo tree */
106 Action *earlier; /* the previous Action, chronologically */
107 Action *later; /* the next Action, chronologically */
108 time_t time; /* when the first change of this action was performed */
109 size_t seq; /* a unique, strictly increasing identifier */
112 typedef struct {
113 size_t pos; /* position in bytes from start of file */
114 size_t lineno; /* line number in file i.e. number of '\n' in [0, pos) */
115 } LineCache;
117 /* The main struct holding all information of a given file */
118 struct Text {
119 Buffer *buf; /* original file content at the time of load operation */
120 Buffer *buffers; /* all buffers which have been allocated to hold insertion data */
121 Piece *pieces; /* all pieces which have been allocated, used to free them */
122 Piece *cache; /* most recently modified piece */
123 Piece begin, end; /* sentinel nodes which always exists but don't hold any data */
124 Action *history; /* undo tree */
125 Action *current_action; /* action holding all file changes until a snapshot is performed */
126 Action *last_action; /* the last action added to the tree, chronologically */
127 Action *saved_action; /* the last action at the time of the save operation */
128 size_t size; /* current file content size in bytes */
129 struct stat info; /* stat as probed at load time */
130 LineCache lines; /* mapping between absolute pos in bytes and logical line breaks */
131 enum TextNewLine newlines; /* which type of new lines does the file use */
134 /* buffer management */
135 static Buffer *buffer_alloc(Text *txt, size_t size);
136 static Buffer *buffer_read(Text *txt, size_t size, int fd);
137 static Buffer *buffer_mmap(Text *txt, size_t size, int fd, off_t offset);
138 static void buffer_free(Buffer *buf);
139 static bool buffer_capacity(Buffer *buf, size_t len);
140 static const char *buffer_append(Buffer *buf, const char *data, size_t len);
141 static bool buffer_insert(Buffer *buf, size_t pos, const char *data, size_t len);
142 static bool buffer_delete(Buffer *buf, size_t pos, size_t len);
143 static const char *buffer_store(Text *txt, const char *data, size_t len);
144 /* cache layer */
145 static void cache_piece(Text *txt, Piece *p);
146 static bool cache_contains(Text *txt, Piece *p);
147 static bool cache_insert(Text *txt, Piece *p, size_t off, const char *data, size_t len);
148 static bool cache_delete(Text *txt, Piece *p, size_t off, size_t len);
149 /* piece management */
150 static Piece *piece_alloc(Text *txt);
151 static void piece_free(Piece *p);
152 static void piece_init(Piece *p, Piece *prev, Piece *next, const char *data, size_t len);
153 static Location piece_get_intern(Text *txt, size_t pos);
154 static Location piece_get_extern(Text *txt, size_t pos);
155 /* span management */
156 static void span_init(Span *span, Piece *start, Piece *end);
157 static void span_swap(Text *txt, Span *old, Span *new);
158 /* change management */
159 static Change *change_alloc(Text *txt, size_t pos);
160 static void change_free(Change *c);
161 /* action management */
162 static Action *action_alloc(Text *txt);
163 static void action_free(Action *a);
164 /* logical line counting cache */
165 static void lineno_cache_invalidate(LineCache *cache);
166 static size_t lines_skip_forward(Text *txt, size_t pos, size_t lines, size_t *lines_skiped);
167 static size_t lines_count(Text *txt, size_t pos, size_t len);
169 static ssize_t write_all(int fd, const char *buf, size_t count) {
170 size_t rem = count;
171 while (rem > 0) {
172 ssize_t written = write(fd, buf, rem);
173 if (written < 0) {
174 if (errno == EAGAIN || errno == EINTR)
175 continue;
176 return -1;
177 } else if (written == 0) {
178 break;
180 rem -= written;
181 buf += written;
183 return count - rem;
186 /* allocate a new buffer of MAX(size, BUFFER_SIZE) bytes */
187 static Buffer *buffer_alloc(Text *txt, size_t size) {
188 Buffer *buf = calloc(1, sizeof(Buffer));
189 if (!buf)
190 return NULL;
191 if (BUFFER_SIZE > size)
192 size = BUFFER_SIZE;
193 if (!(buf->data = malloc(size))) {
194 free(buf);
195 return NULL;
197 buf->type = MALLOC;
198 buf->size = size;
199 buf->next = txt->buffers;
200 txt->buffers = buf;
201 return buf;
204 static Buffer *buffer_read(Text *txt, size_t size, int fd) {
205 Buffer *buf = buffer_alloc(txt, size);
206 if (!buf)
207 return NULL;
208 while (size > 0) {
209 char data[4096];
210 ssize_t len = read(fd, data, MIN(sizeof(data), size));
211 if (len == -1) {
212 txt->buffers = buf->next;
213 buffer_free(buf);
214 return NULL;
215 } else if (len == 0) {
216 break;
217 } else {
218 buffer_append(buf, data, len);
219 size -= len;
222 return buf;
225 static Buffer *buffer_mmap(Text *txt, size_t size, int fd, off_t offset) {
226 Buffer *buf = calloc(1, sizeof(Buffer));
227 if (!buf)
228 return NULL;
229 if (size) {
230 buf->data = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, offset);
231 if (buf->data == MAP_FAILED) {
232 free(buf);
233 return NULL;
236 buf->type = MMAP;
237 buf->size = size;
238 buf->len = size;
239 buf->next = txt->buffers;
240 txt->buffers = buf;
241 return buf;
244 static void buffer_free(Buffer *buf) {
245 if (!buf)
246 return;
247 if (buf->type == MALLOC)
248 free(buf->data);
249 else if (buf->type == MMAP && buf->data)
250 munmap(buf->data, buf->size);
251 free(buf);
254 /* check whether buffer has enough free space to store len bytes */
255 static bool buffer_capacity(Buffer *buf, size_t len) {
256 return buf->size - buf->len >= len;
259 /* append data to buffer, assumes there is enough space available */
260 static const char *buffer_append(Buffer *buf, const char *data, size_t len) {
261 char *dest = memcpy(buf->data + buf->len, data, len);
262 buf->len += len;
263 return dest;
266 /* stores the given data in a buffer, allocates a new one if necessary. returns
267 * a pointer to the storage location or NULL if allocation failed. */
268 static const char *buffer_store(Text *txt, const char *data, size_t len) {
269 Buffer *buf = txt->buffers;
270 if ((!buf || !buffer_capacity(buf, len)) && !(buf = buffer_alloc(txt, len)))
271 return NULL;
272 return buffer_append(buf, data, len);
275 /* insert data into buffer at an arbitrary position, this should only be used with
276 * data of the most recently created piece. */
277 static bool buffer_insert(Buffer *buf, size_t pos, const char *data, size_t len) {
278 if (pos > buf->len || !buffer_capacity(buf, len))
279 return false;
280 if (buf->len == pos)
281 return buffer_append(buf, data, len);
282 char *insert = buf->data + pos;
283 memmove(insert + len, insert, buf->len - pos);
284 memcpy(insert, data, len);
285 buf->len += len;
286 return true;
289 /* delete data from a buffer at an arbitrary position, this should only be used with
290 * data of the most recently created piece. */
291 static bool buffer_delete(Buffer *buf, size_t pos, size_t len) {
292 if (pos + len > buf->len)
293 return false;
294 if (buf->len == pos) {
295 buf->len -= len;
296 return true;
298 char *delete = buf->data + pos;
299 memmove(delete, delete + len, buf->len - pos - len);
300 buf->len -= len;
301 return true;
304 /* cache the given piece if it is the most recently changed one */
305 static void cache_piece(Text *txt, Piece *p) {
306 Buffer *buf = txt->buffers;
307 if (!buf || p->data < buf->data || p->data + p->len != buf->data + buf->len)
308 return;
309 txt->cache = p;
312 /* check whether the given piece was the most recently modified one */
313 static bool cache_contains(Text *txt, Piece *p) {
314 Buffer *buf = txt->buffers;
315 Action *a = txt->current_action;
316 if (!buf || !txt->cache || txt->cache != p || !a || !a->change)
317 return false;
319 Piece *start = a->change->new.start;
320 Piece *end = a->change->new.end;
321 bool found = false;
322 for (Piece *cur = start; !found; cur = cur->next) {
323 if (cur == p)
324 found = true;
325 if (cur == end)
326 break;
329 return found && p->data + p->len == buf->data + buf->len;
332 /* try to insert a junk of data at a given piece offset. the insertion is only
333 * performed if the piece is the most recenetly changed one. the legnth of the
334 * piece, the span containing it and the whole text is adjusted accordingly */
335 static bool cache_insert(Text *txt, Piece *p, size_t off, const char *data, size_t len) {
336 if (!cache_contains(txt, p))
337 return false;
338 Buffer *buf = txt->buffers;
339 size_t bufpos = p->data + off - buf->data;
340 if (!buffer_insert(buf, bufpos, data, len))
341 return false;
342 p->len += len;
343 txt->current_action->change->new.len += len;
344 txt->size += len;
345 return true;
348 /* try to delete a junk of data at a given piece offset. the deletion is only
349 * performed if the piece is the most recenetly changed one and the whole
350 * affected range lies within it. the legnth of the piece, the span containing it
351 * and the whole text is adjusted accordingly */
352 static bool cache_delete(Text *txt, Piece *p, size_t off, size_t len) {
353 if (!cache_contains(txt, p))
354 return false;
355 Buffer *buf = txt->buffers;
356 size_t bufpos = p->data + off - buf->data;
357 if (off + len > p->len || !buffer_delete(buf, bufpos, len))
358 return false;
359 p->len -= len;
360 txt->current_action->change->new.len -= len;
361 txt->size -= len;
362 return true;
365 /* initialize a span and calculate its length */
366 static void span_init(Span *span, Piece *start, Piece *end) {
367 size_t len = 0;
368 span->start = start;
369 span->end = end;
370 for (Piece *p = start; p; p = p->next) {
371 len += p->len;
372 if (p == end)
373 break;
375 span->len = len;
378 /* swap out an old span and replace it with a new one.
380 * - if old is an empty span do not remove anything, just insert the new one
381 * - if new is an empty span do not insert anything, just remove the old one
383 * adjusts the document size accordingly.
385 static void span_swap(Text *txt, Span *old, Span *new) {
386 if (old->len == 0 && new->len == 0) {
387 return;
388 } else if (old->len == 0) {
389 /* insert new span */
390 new->start->prev->next = new->start;
391 new->end->next->prev = new->end;
392 } else if (new->len == 0) {
393 /* delete old span */
394 old->start->prev->next = old->end->next;
395 old->end->next->prev = old->start->prev;
396 } else {
397 /* replace old with new */
398 old->start->prev->next = new->start;
399 old->end->next->prev = new->end;
401 txt->size -= old->len;
402 txt->size += new->len;
405 /* allocate a new action, set its pointers to the other actions in the history,
406 * and set it as txt->history. All further changes will be associated with this action. */
407 static Action *action_alloc(Text *txt) {
408 Action *new = calloc(1, sizeof(Action));
409 if (!new)
410 return NULL;
411 new->time = time(NULL);
412 txt->current_action = new;
414 /* set sequence number */
415 if (!txt->last_action)
416 new->seq = 0;
417 else
418 new->seq = txt->last_action->seq + 1;
420 /* set earlier, later pointers */
421 if (txt->last_action)
422 txt->last_action->later = new;
423 new->earlier = txt->last_action;
425 if (!txt->history) {
426 txt->history = new;
427 return new;
430 /* set prev, next pointers */
431 new->prev = txt->history;
432 txt->history->next = new;
433 txt->history = new;
434 return new;
437 static void action_free(Action *a) {
438 if (!a)
439 return;
440 for (Change *next, *c = a->change; c; c = next) {
441 next = c->next;
442 change_free(c);
444 free(a);
447 static Piece *piece_alloc(Text *txt) {
448 Piece *p = calloc(1, sizeof(Piece));
449 if (!p)
450 return NULL;
451 p->text = txt;
452 p->global_next = txt->pieces;
453 if (txt->pieces)
454 txt->pieces->global_prev = p;
455 txt->pieces = p;
456 return p;
459 static void piece_free(Piece *p) {
460 if (!p)
461 return;
462 if (p->global_prev)
463 p->global_prev->global_next = p->global_next;
464 if (p->global_next)
465 p->global_next->global_prev = p->global_prev;
466 if (p->text->pieces == p)
467 p->text->pieces = p->global_next;
468 if (p->text->cache == p)
469 p->text->cache = NULL;
470 free(p);
473 static void piece_init(Piece *p, Piece *prev, Piece *next, const char *data, size_t len) {
474 p->prev = prev;
475 p->next = next;
476 p->data = data;
477 p->len = len;
480 /* returns the piece holding the text at byte offset pos. if pos happens to
481 * be at a piece boundry i.e. the first byte of a piece then the previous piece
482 * to the left is returned with an offset of piece->len. this is convenient for
483 * modifications to the piece chain where both pieces (the returned one and the
484 * one following it) are needed, but unsuitable as a public interface.
486 * in particular if pos is zero, the begin sentinel piece is returned.
488 static Location piece_get_intern(Text *txt, size_t pos) {
489 size_t cur = 0;
490 for (Piece *p = &txt->begin; p->next; p = p->next) {
491 if (cur <= pos && pos <= cur + p->len)
492 return (Location){ .piece = p, .off = pos - cur };
493 cur += p->len;
496 return (Location){ 0 };
499 /* similiar to piece_get_intern but usable as a public API. returns the piece
500 * holding the text at byte offset pos. never returns a sentinel piece.
501 * it pos is the end of file (== text_size()) and the file is not empty then
502 * the last piece holding data is returned.
504 static Location piece_get_extern(Text *txt, size_t pos) {
505 size_t cur = 0;
507 if (pos > 0 && pos == txt->size) {
508 Piece *p = txt->begin.next;
509 while (p->next->next)
510 p = p->next;
511 return (Location){ .piece = p, .off = p->len };
514 for (Piece *p = txt->begin.next; p->next; p = p->next) {
515 if (cur <= pos && pos < cur + p->len)
516 return (Location){ .piece = p, .off = pos - cur };
517 cur += p->len;
520 return (Location){ 0 };
523 /* allocate a new change, associate it with current action or a newly
524 * allocated one if none exists. */
525 static Change *change_alloc(Text *txt, size_t pos) {
526 Action *a = txt->current_action;
527 if (!a) {
528 a = action_alloc(txt);
529 if (!a)
530 return NULL;
532 Change *c = calloc(1, sizeof(Change));
533 if (!c)
534 return NULL;
535 c->pos = pos;
536 c->next = a->change;
537 if (a->change)
538 a->change->prev = c;
539 a->change = c;
540 return c;
543 static void change_free(Change *c) {
544 if (!c)
545 return;
546 /* only free the new part of the span, the old one is still in use */
547 piece_free(c->new.start);
548 if (c->new.start != c->new.end)
549 piece_free(c->new.end);
550 free(c);
553 /* When inserting new data there are 2 cases to consider.
555 * - in the first the insertion point falls into the middle of an exisiting
556 * piece which is replaced by three new pieces:
558 * /-+ --> +---------------+ --> +-\
559 * | | | existing text | | |
560 * \-+ <-- +---------------+ <-- +-/
562 * Insertion point for "demo "
564 * /-+ --> +---------+ --> +-----+ --> +-----+ --> +-\
565 * | | | existing| |demo | |text | | |
566 * \-+ <-- +---------+ <-- +-----+ <-- +-----+ <-- +-/
568 * - the second case deals with an insertion point at a piece boundry:
570 * /-+ --> +---------------+ --> +-\
571 * | | | existing text | | |
572 * \-+ <-- +---------------+ <-- +-/
574 * Insertion point for "short"
576 * /-+ --> +-----+ --> +---------------+ --> +-\
577 * | | |short| | existing text | | |
578 * \-+ <-- +-----+ <-- +---------------+ <-- +-/
580 bool text_insert(Text *txt, size_t pos, const char *data, size_t len) {
581 if (len == 0)
582 return true;
583 if (pos > txt->size)
584 return false;
585 if (pos < txt->lines.pos)
586 lineno_cache_invalidate(&txt->lines);
588 Location loc = piece_get_intern(txt, pos);
589 Piece *p = loc.piece;
590 if (!p)
591 return false;
592 size_t off = loc.off;
593 if (cache_insert(txt, p, off, data, len))
594 return true;
596 Change *c = change_alloc(txt, pos);
597 if (!c)
598 return false;
600 if (!(data = buffer_store(txt, data, len)))
601 return false;
603 Piece *new = NULL;
605 if (off == p->len) {
606 /* insert between two existing pieces, hence there is nothing to
607 * remove, just add a new piece holding the extra text */
608 if (!(new = piece_alloc(txt)))
609 return false;
610 piece_init(new, p, p->next, data, len);
611 span_init(&c->new, new, new);
612 span_init(&c->old, NULL, NULL);
613 } else {
614 /* insert into middle of an existing piece, therfore split the old
615 * piece. that is we have 3 new pieces one containing the content
616 * before the insertion point then one holding the newly inserted
617 * text and one holding the content after the insertion point.
619 Piece *before = piece_alloc(txt);
620 new = piece_alloc(txt);
621 Piece *after = piece_alloc(txt);
622 if (!before || !new || !after)
623 return false;
624 piece_init(before, p->prev, new, p->data, off);
625 piece_init(new, before, after, data, len);
626 piece_init(after, new, p->next, p->data + off, p->len - off);
628 span_init(&c->new, before, after);
629 span_init(&c->old, p, p);
632 cache_piece(txt, new);
633 span_swap(txt, &c->old, &c->new);
634 return true;
637 bool text_appendf(Text *txt, const char *format, ...) {
638 va_list ap;
639 va_start(ap, format);
640 bool ret = text_vprintf(txt, text_size(txt), format, ap);
641 va_end(ap);
642 return ret;
645 bool text_printf(Text *txt, size_t pos, const char *format, ...) {
646 va_list ap;
647 va_start(ap, format);
648 bool ret = text_vprintf(txt, pos, format, ap);
649 va_end(ap);
650 return ret;
653 bool text_vprintf(Text *txt, size_t pos, const char *format, va_list ap) {
654 va_list ap_save;
655 va_copy(ap_save, ap);
656 int len = vsnprintf(NULL, 0, format, ap);
657 if (len == -1)
658 return false;
659 char *buf = malloc(len+1);
660 bool ret = buf && (vsnprintf(buf, len+1, format, ap_save) == len) && text_insert(txt, pos, buf, len);
661 free(buf);
662 va_end(ap_save);
663 return ret;
666 static size_t action_undo(Text *txt, Action *a) {
667 size_t pos = EPOS;
668 for (Change *c = a->change; c; c = c->next) {
669 span_swap(txt, &c->new, &c->old);
670 pos = c->pos;
672 return pos;
675 static size_t action_redo(Text *txt, Action *a) {
676 size_t pos = EPOS;
677 Change *c = a->change;
678 while (c->next)
679 c = c->next;
680 for ( ; c; c = c->prev) {
681 span_swap(txt, &c->old, &c->new);
682 pos = c->pos;
683 if (c->new.len > c->old.len)
684 pos += c->new.len - c->old.len;
686 return pos;
689 size_t text_undo(Text *txt) {
690 size_t pos = EPOS;
691 /* taking a snapshot makes sure that txt->current_action is reset */
692 text_snapshot(txt);
693 Action *a = txt->history->prev;
694 if (!a)
695 return pos;
696 pos = action_undo(txt, txt->history);
697 txt->history = a;
698 lineno_cache_invalidate(&txt->lines);
699 return pos;
702 size_t text_redo(Text *txt) {
703 size_t pos = EPOS;
704 /* taking a snapshot makes sure that txt->current_action is reset */
705 text_snapshot(txt);
706 Action *a = txt->history->next;
707 if (!a)
708 return pos;
709 pos = action_redo(txt, a);
710 txt->history = a;
711 lineno_cache_invalidate(&txt->lines);
712 return pos;
715 static bool history_change_branch(Action *a) {
716 bool changed = false;
717 while (a->prev) {
718 if (a->prev->next != a) {
719 a->prev->next = a;
720 changed = true;
722 a = a->prev;
724 return changed;
727 static size_t history_traverse_to(Text *txt, Action *a) {
728 size_t pos = EPOS;
729 if (!a)
730 return pos;
731 bool changed = history_change_branch(a);
732 if (!changed) {
733 if (a->seq == txt->history->seq) {
734 return txt->lines.pos;
735 } else if (a->seq > txt->history->seq) {
736 while (txt->history != a)
737 pos = text_redo(txt);
738 return pos;
739 } else if (a->seq < txt->history->seq) {
740 while (txt->history != a)
741 pos = text_undo(txt);
742 return pos;
744 } else {
745 while (txt->history->prev && txt->history->prev->next == txt->history)
746 text_undo(txt);
747 pos = text_undo(txt);
748 while (txt->history != a)
749 pos = text_redo(txt);
750 return pos;
752 return pos;
755 size_t text_earlier(Text *txt, int count) {
756 Action *a = txt->history;
757 while (count-- > 0 && a->earlier)
758 a = a->earlier;
759 return history_traverse_to(txt, a);
762 size_t text_later(Text *txt, int count) {
763 Action *a = txt->history;
764 while (count-- > 0 && a->later)
765 a = a->later;
766 return history_traverse_to(txt, a);
769 size_t text_restore(Text *txt, time_t time) {
770 Action *a = txt->history;
771 while (time < a->time && a->earlier)
772 a = a->earlier;
773 while (time > a->time && a->later)
774 a = a->later;
775 time_t diff = labs(a->time - time);
776 if (a->earlier && a->earlier != txt->history && labs(a->earlier->time - time) < diff)
777 a = a->earlier;
778 if (a->later && a->later != txt->history && labs(a->later->time - time) < diff)
779 a = a->later;
780 return history_traverse_to(txt, a);
783 time_t text_state(Text *txt) {
784 return txt->history->time;
787 static bool preserve_acl(int src, int dest) {
788 #ifdef HAVE_ACL
789 acl_t acl = acl_get_fd(src);
790 if (!acl)
791 return errno == ENOTSUP ? true : false;
792 if (acl_set_fd(dest, acl) == -1) {
793 acl_free(acl);
794 return false;
796 acl_free(acl);
797 #endif /* HAVE_ACL */
798 return true;
801 static bool preserve_selinux_context(int src, int dest) {
802 #ifdef HAVE_SELINUX
803 char *context = NULL;
804 if (!is_selinux_enabled())
805 return true;
806 if (fgetfilecon(src, &context) == -1)
807 return errno == ENOTSUP ? true : false;
808 if (fsetfilecon(dest, context) == -1) {
809 freecon(context);
810 return false;
812 freecon(context);
813 #endif /* HAVE_SELINUX */
814 return true;
817 /* Save current content to given filename. The data is first saved to `filename~`
818 * and then atomically moved to its final (possibly alredy existing) destination
819 * using rename(2). This approach does not work if:
821 * - the file is a symbolic link
822 * - the file is a hard link
823 * - file ownership can not be preserved
824 * - file group can not be preserved
825 * - directory permissions do not allow creation of a new file
826 * - POSXI ACL can not be preserved (if enabled)
827 * - SELinux security context can not be preserved (if enabled)
829 static bool text_save_atomic_range(Text *txt, Filerange *range, const char *filename) {
830 struct stat meta = { 0 }, oldmeta = { 0 };
831 int fd = -1, oldfd = -1, saved_errno;
832 char *tmpname = NULL;
833 size_t size = text_range_size(range);
834 size_t namelen = strlen(filename) + 1 /* ~ */ + 1 /* \0 */;
836 if ((oldfd = open(filename, O_RDONLY)) == -1 && errno != ENOENT)
837 goto err;
838 if (oldfd != -1 && lstat(filename, &oldmeta) == -1)
839 goto err;
840 if (oldfd != -1) {
841 if (S_ISLNK(oldmeta.st_mode)) /* symbolic link */
842 goto err;
843 if (oldmeta.st_nlink > 1) /* hard link */
844 goto err;
846 if (!(tmpname = calloc(1, namelen)))
847 goto err;
848 snprintf(tmpname, namelen, "%s~", filename);
850 /* O_RDWR is needed because otherwise we can't map with MAP_SHARED */
851 if ((fd = open(tmpname, O_CREAT|O_RDWR|O_TRUNC, oldfd == -1 ? S_IRUSR|S_IWUSR : oldmeta.st_mode)) == -1)
852 goto err;
853 if (ftruncate(fd, size) == -1)
854 goto err;
855 if (oldfd != -1) {
856 if (!preserve_acl(oldfd, fd) || !preserve_selinux_context(oldfd, fd))
857 goto err;
858 /* change owner if necessary */
859 if (oldmeta.st_uid != getuid() && fchown(fd, oldmeta.st_uid, (uid_t)-1) == -1)
860 goto err;
861 /* change group if necessary, in case of failure some editors reset
862 * the group permissions to the same as for others */
863 if (oldmeta.st_gid != getgid() && fchown(fd, (uid_t)-1, oldmeta.st_gid) == -1)
864 goto err;
867 if (size > 0) {
868 void *buf = mmap(NULL, size, PROT_WRITE, MAP_SHARED, fd, 0);
869 if (buf == MAP_FAILED)
870 goto err;
872 char *cur = buf;
873 size_t rem = size;
875 for (Iterator it = text_iterator_get(txt, range->start);
876 rem > 0 && text_iterator_valid(&it);
877 text_iterator_next(&it)) {
878 size_t len = it.end - it.text;
879 if (len > rem)
880 len = rem;
881 memcpy(cur, it.text, len);
882 cur += len;
883 rem -= len;
886 if (munmap(buf, size) == -1)
887 goto err;
890 if (oldfd != -1) {
891 close(oldfd);
892 oldfd = -1;
895 if (fsync(fd) == -1)
896 goto err;
898 if (fstat(fd, &meta) == -1)
899 goto err;
901 if (close(fd) == -1) {
902 fd = -1;
903 goto err;
905 fd = -1;
907 if (rename(tmpname, filename) == -1)
908 goto err;
910 if (meta.st_mtime)
911 txt->info = meta;
912 free(tmpname);
913 return true;
914 err:
915 saved_errno = errno;
916 if (oldfd != -1)
917 close(oldfd);
918 if (fd != -1)
919 close(fd);
920 if (tmpname && *tmpname)
921 unlink(tmpname);
922 free(tmpname);
923 errno = saved_errno;
924 return false;
927 bool text_save(Text *txt, const char *filename) {
928 Filerange r = (Filerange){ .start = 0, .end = text_size(txt) };
929 return text_save_range(txt, &r, filename);
932 /* First try to save the file atomically using rename(2) if this does not
933 * work overwrite the file in place. However if something goes wrong during
934 * this overwrite the original file is permanently damaged.
936 bool text_save_range(Text *txt, Filerange *range, const char *filename) {
937 struct stat meta;
938 int fd = -1, newfd = -1;
939 if (!filename || text_save_atomic_range(txt, range, filename))
940 goto ok;
941 if ((fd = open(filename, O_CREAT|O_WRONLY, S_IRUSR|S_IWUSR)) == -1)
942 goto err;
943 if (fstat(fd, &meta) == -1)
944 goto err;
945 if (meta.st_dev == txt->info.st_dev && meta.st_ino == txt->info.st_ino &&
946 txt->buf && txt->buf->type == MMAP && txt->buf->size) {
947 /* The file we are going to overwrite is currently mmap-ed from
948 * text_load, therefore we copy the mmap-ed buffer to a temporary
949 * file and remap it at the same position such that all pointers
950 * from the various pieces are still valid.
952 size_t size = txt->buf->size;
953 char tmpname[32] = "/tmp/vis-XXXXXX";
954 mode_t mask = umask(S_IXUSR | S_IRWXG | S_IRWXO);
955 newfd = mkstemp(tmpname);
956 umask(mask);
957 if (newfd == -1)
958 goto err;
959 if (unlink(tmpname) == -1)
960 goto err;
961 ssize_t written = write_all(newfd, txt->buf->data, size);
962 if (written == -1 || (size_t)written != size)
963 goto err;
964 if (munmap(txt->buf->data, size) == -1)
965 goto err;
967 void *data = mmap(txt->buf->data, size, PROT_READ, MAP_SHARED, newfd, 0);
968 if (data == MAP_FAILED)
969 goto err;
970 if (data != txt->buf->data) {
971 munmap(data, size);
972 goto err;
974 if (close(newfd) == -1) {
975 newfd = -1;
976 goto err;
978 txt->buf->data = data;
979 newfd = -1;
981 /* overwrite the exisiting file content, if somehting goes wrong
982 * here we are screwed, TODO: make a backup before? */
983 if (ftruncate(fd, 0) == -1)
984 goto err;
985 ssize_t written = text_write_range(txt, range, fd);
986 if (written == -1 || (size_t)written != text_range_size(range))
987 goto err;
989 if (fsync(fd) == -1)
990 goto err;
991 if (close(fd) == -1)
992 return false;
993 txt->info = meta;
995 txt->saved_action = txt->history;
996 text_snapshot(txt);
997 return true;
998 err:
999 if (fd != -1)
1000 close(fd);
1001 if (newfd != -1)
1002 close(newfd);
1003 return false;
1006 ssize_t text_write(Text *txt, int fd) {
1007 Filerange r = (Filerange){ .start = 0, .end = text_size(txt) };
1008 return text_write_range(txt, &r, fd);
1011 ssize_t text_write_range(Text *txt, Filerange *range, int fd) {
1012 size_t size = text_range_size(range), rem = size;
1013 for (Iterator it = text_iterator_get(txt, range->start);
1014 rem > 0 && text_iterator_valid(&it);
1015 text_iterator_next(&it)) {
1016 size_t prem = it.end - it.text;
1017 if (prem > rem)
1018 prem = rem;
1019 ssize_t written = write_all(fd, it.text, prem);
1020 if (written == -1)
1021 return -1;
1022 rem -= written;
1023 if ((size_t)written != prem)
1024 break;
1026 return size - rem;
1029 /* load the given file as starting point for further editing operations.
1030 * to start with an empty document, pass NULL as filename. */
1031 Text *text_load(const char *filename) {
1032 Text *txt = calloc(1, sizeof(Text));
1033 if (!txt)
1034 return NULL;
1035 int fd = -1;
1036 piece_init(&txt->begin, NULL, &txt->end, NULL, 0);
1037 piece_init(&txt->end, &txt->begin, NULL, NULL, 0);
1038 lineno_cache_invalidate(&txt->lines);
1039 if (filename) {
1040 if ((fd = open(filename, O_RDONLY)) == -1)
1041 goto out;
1042 if (fstat(fd, &txt->info) == -1)
1043 goto out;
1044 if (!S_ISREG(txt->info.st_mode)) {
1045 errno = S_ISDIR(txt->info.st_mode) ? EISDIR : ENOTSUP;
1046 goto out;
1048 // XXX: use lseek(fd, 0, SEEK_END); instead?
1049 size_t size = txt->info.st_size;
1050 if (size < BUFFER_MMAP_SIZE)
1051 txt->buf = buffer_read(txt, size, fd);
1052 else
1053 txt->buf = buffer_mmap(txt, size, fd, 0);
1054 if (!txt->buf)
1055 goto out;
1056 Piece *p = piece_alloc(txt);
1057 if (!p)
1058 goto out;
1059 piece_init(&txt->begin, NULL, p, NULL, 0);
1060 piece_init(p, &txt->begin, &txt->end, txt->buf->data, txt->buf->len);
1061 piece_init(&txt->end, p, NULL, NULL, 0);
1062 txt->size = txt->buf->len;
1064 /* write an empty action */
1065 change_alloc(txt, EPOS);
1066 text_snapshot(txt);
1067 txt->saved_action = txt->history;
1069 if (fd != -1)
1070 close(fd);
1071 return txt;
1072 out:
1073 if (fd != -1)
1074 close(fd);
1075 text_free(txt);
1076 return NULL;
1079 struct stat text_stat(Text *txt) {
1080 return txt->info;
1083 /* A delete operation can either start/stop midway through a piece or at
1084 * a boundry. In the former case a new piece is created to represent the
1085 * remaining text before/after the modification point.
1087 * /-+ --> +---------+ --> +-----+ --> +-----+ --> +-\
1088 * | | | existing| |demo | |text | | |
1089 * \-+ <-- +---------+ <-- +-----+ <-- +-----+ <-- +-/
1090 * ^ ^
1091 * |------ delete range -----|
1093 * /-+ --> +----+ --> +--+ --> +-\
1094 * | | | exi| |t | | |
1095 * \-+ <-- +----+ <-- +--+ <-- +-/
1097 bool text_delete(Text *txt, size_t pos, size_t len) {
1098 if (len == 0)
1099 return true;
1100 if (pos + len > txt->size)
1101 return false;
1102 if (pos < txt->lines.pos)
1103 lineno_cache_invalidate(&txt->lines);
1105 Location loc = piece_get_intern(txt, pos);
1106 Piece *p = loc.piece;
1107 if (!p)
1108 return false;
1109 size_t off = loc.off;
1110 if (cache_delete(txt, p, off, len))
1111 return true;
1112 Change *c = change_alloc(txt, pos);
1113 if (!c)
1114 return false;
1116 bool midway_start = false, midway_end = false; /* split pieces? */
1117 Piece *before, *after; /* unmodified pieces before/after deletion point */
1118 Piece *start, *end; /* span which is removed */
1119 size_t cur; /* how much has already been deleted */
1121 if (off == p->len) {
1122 /* deletion starts at a piece boundry */
1123 cur = 0;
1124 before = p;
1125 start = p->next;
1126 } else {
1127 /* deletion starts midway through a piece */
1128 midway_start = true;
1129 cur = p->len - off;
1130 start = p;
1131 before = piece_alloc(txt);
1132 if (!before)
1133 return false;
1136 /* skip all pieces which fall into deletion range */
1137 while (cur < len) {
1138 p = p->next;
1139 cur += p->len;
1142 if (cur == len) {
1143 /* deletion stops at a piece boundry */
1144 end = p;
1145 after = p->next;
1146 } else {
1147 /* cur > len: deletion stops midway through a piece */
1148 midway_end = true;
1149 end = p;
1150 after = piece_alloc(txt);
1151 if (!after)
1152 return false;
1153 piece_init(after, before, p->next, p->data + p->len - (cur - len), cur - len);
1156 if (midway_start) {
1157 /* we finally know which piece follows our newly allocated before piece */
1158 piece_init(before, start->prev, after, start->data, off);
1161 Piece *new_start = NULL, *new_end = NULL;
1162 if (midway_start) {
1163 new_start = before;
1164 if (!midway_end)
1165 new_end = before;
1167 if (midway_end) {
1168 if (!midway_start)
1169 new_start = after;
1170 new_end = after;
1173 span_init(&c->new, new_start, new_end);
1174 span_init(&c->old, start, end);
1175 span_swap(txt, &c->old, &c->new);
1176 return true;
1179 bool text_delete_range(Text *txt, Filerange *r) {
1180 if (!text_range_valid(r))
1181 return false;
1182 return text_delete(txt, r->start, text_range_size(r));
1185 /* preserve the current text content such that it can be restored by
1186 * means of undo/redo operations */
1187 void text_snapshot(Text *txt) {
1188 if (txt->current_action)
1189 txt->last_action = txt->current_action;
1190 txt->current_action = NULL;
1191 txt->cache = NULL;
1195 void text_free(Text *txt) {
1196 if (!txt)
1197 return;
1199 // free history
1200 Action *hist = txt->history;
1201 while (hist && hist->prev)
1202 hist = hist->prev;
1203 while (hist) {
1204 Action *later = hist->later;
1205 action_free(hist);
1206 hist = later;
1209 for (Piece *next, *p = txt->pieces; p; p = next) {
1210 next = p->global_next;
1211 piece_free(p);
1214 for (Buffer *next, *buf = txt->buffers; buf; buf = next) {
1215 next = buf->next;
1216 buffer_free(buf);
1219 free(txt);
1222 bool text_modified(Text *txt) {
1223 return txt->saved_action != txt->history;
1226 bool text_sigbus(Text *txt, const char *addr) {
1227 for (Buffer *buf = txt->buffers; buf; buf = buf->next) {
1228 if (buf->type == MMAP && buf->data <= addr && addr < buf->data + buf->size)
1229 return true;
1231 return false;
1234 enum TextNewLine text_newline_type(Text *txt){
1235 if (!txt->newlines) {
1236 txt->newlines = TEXT_NEWLINE_NL; /* default to UNIX style \n new lines */
1237 const char *start = txt->buf ? txt->buf->data : NULL;
1238 if (start) {
1239 const char *nl = memchr(start, '\n', txt->buf->len);
1240 if (nl > start && nl[-1] == '\r')
1241 txt->newlines = TEXT_NEWLINE_CRNL;
1242 } else {
1243 char c;
1244 size_t nl = lines_skip_forward(txt, 0, 1, NULL);
1245 if (nl > 1 && text_byte_get(txt, nl-2, &c) && c == '\r')
1246 txt->newlines = TEXT_NEWLINE_CRNL;
1250 return txt->newlines;
1253 static bool text_iterator_init(Iterator *it, size_t pos, Piece *p, size_t off) {
1254 *it = (Iterator){
1255 .pos = pos,
1256 .piece = p,
1257 .start = p ? p->data : NULL,
1258 .end = p ? p->data + p->len : NULL,
1259 .text = p ? p->data + off : NULL,
1261 return text_iterator_valid(it);
1264 Iterator text_iterator_get(Text *txt, size_t pos) {
1265 Iterator it;
1266 Location loc = piece_get_extern(txt, pos);
1267 text_iterator_init(&it, pos, loc.piece, loc.off);
1268 return it;
1271 bool text_iterator_byte_get(Iterator *it, char *b) {
1272 if (text_iterator_valid(it)) {
1273 if (it->start <= it->text && it->text < it->end) {
1274 *b = *it->text;
1275 return true;
1276 } else if (it->pos == it->piece->text->size) { /* EOF */
1277 *b = '\0';
1278 return true;
1281 return false;
1284 bool text_iterator_next(Iterator *it) {
1285 return text_iterator_init(it, it->pos, it->piece ? it->piece->next : NULL, 0);
1288 bool text_iterator_prev(Iterator *it) {
1289 return text_iterator_init(it, it->pos, it->piece ? it->piece->prev : NULL, 0);
1292 bool text_iterator_valid(const Iterator *it) {
1293 /* filter out sentinel nodes */
1294 return it->piece && it->piece->text;
1297 bool text_iterator_byte_next(Iterator *it, char *b) {
1298 if (!text_iterator_valid(it))
1299 return false;
1300 it->text++;
1301 /* special case for advancement to EOF */
1302 if (it->text == it->end && !it->piece->next->text) {
1303 it->pos++;
1304 if (b)
1305 *b = '\0';
1306 return true;
1308 while (it->text >= it->end) {
1309 if (!text_iterator_next(it))
1310 return false;
1311 it->text = it->start;
1313 it->pos++;
1314 if (b)
1315 *b = *it->text;
1316 return true;
1319 bool text_iterator_byte_prev(Iterator *it, char *b) {
1320 if (!text_iterator_valid(it))
1321 return false;
1322 while (it->text == it->start) {
1323 if (!text_iterator_prev(it))
1324 return false;
1325 it->text = it->end;
1327 --it->text;
1328 --it->pos;
1329 if (b)
1330 *b = *it->text;
1331 return true;
1334 bool text_iterator_char_next(Iterator *it, char *c) {
1335 while (text_iterator_byte_next(it, NULL)) {
1336 if (ISUTF8(*it->text)) {
1337 if (c)
1338 *c = *it->text;
1339 return true;
1342 return false;
1345 bool text_iterator_char_prev(Iterator *it, char *c) {
1346 while (text_iterator_byte_prev(it, NULL)) {
1347 if (ISUTF8(*it->text)) {
1348 if (c)
1349 *c = *it->text;
1350 return true;
1353 return false;
1356 bool text_byte_get(Text *txt, size_t pos, char *buf) {
1357 return text_bytes_get(txt, pos, 1, buf);
1360 size_t text_bytes_get(Text *txt, size_t pos, size_t len, char *buf) {
1361 if (!buf)
1362 return 0;
1363 char *cur = buf;
1364 size_t rem = len;
1365 text_iterate(txt, it, pos) {
1366 if (rem == 0)
1367 break;
1368 size_t piece_len = it.end - it.text;
1369 if (piece_len > rem)
1370 piece_len = rem;
1371 memcpy(cur, it.text, piece_len);
1372 cur += piece_len;
1373 rem -= piece_len;
1375 return len - rem;
1378 size_t text_size(Text *txt) {
1379 return txt->size;
1382 /* count the number of new lines '\n' in range [pos, pos+len) */
1383 static size_t lines_count(Text *txt, size_t pos, size_t len) {
1384 size_t lines = 0;
1385 text_iterate(txt, it, pos) {
1386 const char *start = it.text;
1387 while (len > 0 && start < it.end) {
1388 size_t n = MIN(len, (size_t)(it.end - start));
1389 const char *end = memchr(start, '\n', n);
1390 if (!end) {
1391 len -= n;
1392 break;
1394 lines++;
1395 len -= end - start + 1;
1396 start = end + 1;
1399 if (len == 0)
1400 break;
1402 return lines;
1405 /* skip n lines forward and return position afterwards */
1406 static size_t lines_skip_forward(Text *txt, size_t pos, size_t lines, size_t *lines_skipped) {
1407 size_t lines_old = lines;
1408 text_iterate(txt, it, pos) {
1409 const char *start = it.text;
1410 while (lines > 0 && start < it.end) {
1411 size_t n = it.end - start;
1412 const char *end = memchr(start, '\n', n);
1413 if (!end) {
1414 pos += n;
1415 break;
1417 pos += end - start + 1;
1418 start = end + 1;
1419 lines--;
1422 if (lines == 0)
1423 break;
1425 if (lines_skipped)
1426 *lines_skipped = lines_old - lines;
1427 return pos;
1430 static void lineno_cache_invalidate(LineCache *cache) {
1431 cache->pos = 0;
1432 cache->lineno = 1;
1435 size_t text_pos_by_lineno(Text *txt, size_t lineno) {
1436 size_t lines_skipped;
1437 LineCache *cache = &txt->lines;
1438 if (lineno <= 1)
1439 return 0;
1440 if (lineno > cache->lineno) {
1441 cache->pos = lines_skip_forward(txt, cache->pos, lineno - cache->lineno, &lines_skipped);
1442 cache->lineno += lines_skipped;
1443 } else if (lineno < cache->lineno) {
1444 #if 0
1445 // TODO does it make sense to scan memory backwards here?
1446 size_t diff = cache->lineno - lineno;
1447 if (diff < lineno) {
1448 lines_skip_backward(txt, cache->pos, diff);
1449 } else
1450 #endif
1451 cache->pos = lines_skip_forward(txt, 0, lineno - 1, &lines_skipped);
1452 cache->lineno = lines_skipped + 1;
1454 return cache->pos;
1457 size_t text_lineno_by_pos(Text *txt, size_t pos) {
1458 LineCache *cache = &txt->lines;
1459 if (pos > txt->size)
1460 pos = txt->size;
1461 if (pos < cache->pos) {
1462 size_t diff = cache->pos - pos;
1463 if (diff < pos)
1464 cache->lineno -= lines_count(txt, pos, diff);
1465 else
1466 cache->lineno = lines_count(txt, 0, pos) + 1;
1467 } else if (pos > cache->pos) {
1468 cache->lineno += lines_count(txt, cache->pos, pos - cache->pos);
1470 cache->pos = pos;
1471 return cache->lineno;
1474 Mark text_mark_set(Text *txt, size_t pos) {
1475 if (pos == 0)
1476 return (Mark)&txt->begin;
1477 if (pos == txt->size)
1478 return (Mark)&txt->end;
1479 Location loc = piece_get_extern(txt, pos);
1480 if (!loc.piece)
1481 return NULL;
1482 return loc.piece->data + loc.off;
1485 size_t text_mark_get(Text *txt, Mark mark) {
1486 size_t cur = 0;
1488 if (!mark)
1489 return EPOS;
1490 if (mark == (Mark)&txt->begin)
1491 return 0;
1492 if (mark == (Mark)&txt->end)
1493 return txt->size;
1495 for (Piece *p = txt->begin.next; p->next; p = p->next) {
1496 if (p->data <= mark && mark < p->data + p->len)
1497 return cur + (mark - p->data);
1498 cur += p->len;
1501 return EPOS;
1504 size_t text_history_get(Text *txt, size_t index) {
1505 for (Action *a = txt->current_action ? txt->current_action : txt->history; a; a = a->prev) {
1506 if (index-- == 0) {
1507 Change *c = a->change;
1508 while (c && c->next)
1509 c = c->next;
1510 return c ? c->pos : EPOS;
1513 return EPOS;