Add gemini lexer
[vis.git] / text-io.c
blobbfaad417559d8d7d140bff43b0fa97791861f297
1 #include <fcntl.h>
2 #include <unistd.h>
3 #include <stdlib.h>
4 #include <libgen.h>
5 #include <stdio.h>
6 #include <errno.h>
7 #include <string.h>
8 #include <limits.h>
9 #include <sys/mman.h>
10 #if CONFIG_ACL
11 #include <sys/acl.h>
12 #endif
13 #if CONFIG_SELINUX
14 #include <selinux/selinux.h>
15 #endif
17 #include "text.h"
18 #include "text-internal.h"
19 #include "text-util.h"
20 #include "util.h"
22 struct TextSave { /* used to hold context between text_save_{begin,commit} calls */
23 Text *txt; /* text to operate on */
24 char *filename; /* filename to save to as given to text_save_begin */
25 char *tmpname; /* temporary name used for atomic rename(2) */
26 int fd; /* file descriptor to write data to using text_save_write */
27 int dirfd; /* directory file descriptor, relative to which we save */
28 enum TextSaveMethod type; /* method used to save file */
31 /* Allocate blocks holding the actual file content in chunks of size: */
32 #ifndef BLOCK_SIZE
33 #define BLOCK_SIZE (1 << 20)
34 #endif
35 /* Files smaller than this value are copied on load, larger ones are mmap(2)-ed
36 * directely. Hence the former can be truncated, while doing so on the latter
37 * results in havoc. */
38 #define BLOCK_MMAP_SIZE (1 << 26)
40 /* allocate a new block of MAX(size, BLOCK_SIZE) bytes */
41 Block *block_alloc(size_t size) {
42 Block *blk = calloc(1, sizeof *blk);
43 if (!blk)
44 return NULL;
45 if (BLOCK_SIZE > size)
46 size = BLOCK_SIZE;
47 if (!(blk->data = malloc(size))) {
48 free(blk);
49 return NULL;
51 blk->type = BLOCK_TYPE_MALLOC;
52 blk->size = size;
53 return blk;
56 Block *block_read(size_t size, int fd) {
57 Block *blk = block_alloc(size);
58 if (!blk)
59 return NULL;
60 char *data = blk->data;
61 size_t rem = size;
62 while (rem > 0) {
63 ssize_t len = read(fd, data, rem);
64 if (len == -1) {
65 block_free(blk);
66 return NULL;
67 } else if (len == 0) {
68 break;
69 } else {
70 data += len;
71 rem -= len;
74 blk->len = size - rem;
75 return blk;
78 Block *block_mmap(size_t size, int fd, off_t offset) {
79 Block *blk = calloc(1, sizeof *blk);
80 if (!blk)
81 return NULL;
82 if (size) {
83 blk->data = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, offset);
84 if (blk->data == MAP_FAILED) {
85 free(blk);
86 return NULL;
89 blk->type = BLOCK_TYPE_MMAP_ORIG;
90 blk->size = size;
91 blk->len = size;
92 return blk;
95 Block *block_load(int dirfd, const char *filename, enum TextLoadMethod method, struct stat *info) {
96 Block *block = NULL;
97 int fd = openat(dirfd, filename, O_RDONLY);
98 if (fd == -1)
99 goto out;
100 if (fstat(fd, info) == -1)
101 goto out;
102 if (!S_ISREG(info->st_mode)) {
103 errno = S_ISDIR(info->st_mode) ? EISDIR : ENOTSUP;
104 goto out;
107 // XXX: use lseek(fd, 0, SEEK_END); instead?
108 size_t size = info->st_size;
109 if (size == 0)
110 goto out;
111 if (method == TEXT_LOAD_READ || (method == TEXT_LOAD_AUTO && size < BLOCK_MMAP_SIZE))
112 block = block_read(size, fd);
113 else
114 block = block_mmap(size, fd, 0);
115 out:
116 if (fd != -1)
117 close(fd);
118 return block;
121 void block_free(Block *blk) {
122 if (!blk)
123 return;
124 if (blk->type == BLOCK_TYPE_MALLOC)
125 free(blk->data);
126 else if ((blk->type == BLOCK_TYPE_MMAP_ORIG || blk->type == BLOCK_TYPE_MMAP) && blk->data)
127 munmap(blk->data, blk->size);
128 free(blk);
131 /* check whether block has enough free space to store len bytes */
132 bool block_capacity(Block *blk, size_t len) {
133 return blk->size - blk->len >= len;
136 /* append data to block, assumes there is enough space available */
137 const char *block_append(Block *blk, const char *data, size_t len) {
138 char *dest = memcpy(blk->data + blk->len, data, len);
139 blk->len += len;
140 return dest;
143 /* insert data into block at an arbitrary position, this should only be used with
144 * data of the most recently created piece. */
145 bool block_insert(Block *blk, size_t pos, const char *data, size_t len) {
146 if (pos > blk->len || !block_capacity(blk, len))
147 return false;
148 if (blk->len == pos)
149 return block_append(blk, data, len);
150 char *insert = blk->data + pos;
151 memmove(insert + len, insert, blk->len - pos);
152 memcpy(insert, data, len);
153 blk->len += len;
154 return true;
157 /* delete data from a block at an arbitrary position, this should only be used with
158 * data of the most recently created piece. */
159 bool block_delete(Block *blk, size_t pos, size_t len) {
160 size_t end;
161 if (!addu(pos, len, &end) || end > blk->len)
162 return false;
163 if (blk->len == pos) {
164 blk->len -= len;
165 return true;
167 char *delete = blk->data + pos;
168 memmove(delete, delete + len, blk->len - pos - len);
169 blk->len -= len;
170 return true;
173 Text *text_load(const char *filename) {
174 return text_load_method(filename, TEXT_LOAD_AUTO);
177 Text *text_loadat(int dirfd, const char *filename) {
178 return text_loadat_method(dirfd, filename, TEXT_LOAD_AUTO);
181 Text *text_load_method(const char *filename, enum TextLoadMethod method) {
182 return text_loadat_method(AT_FDCWD, filename, method);
185 static ssize_t write_all(int fd, const char *buf, size_t count) {
186 size_t rem = count;
187 while (rem > 0) {
188 ssize_t written = write(fd, buf, rem > INT_MAX ? INT_MAX : rem);
189 if (written < 0) {
190 if (errno == EAGAIN || errno == EINTR)
191 continue;
192 return -1;
193 } else if (written == 0) {
194 break;
196 rem -= written;
197 buf += written;
199 return count - rem;
202 static bool preserve_acl(int src, int dest) {
203 #if CONFIG_ACL
204 acl_t acl = acl_get_fd(src);
205 if (!acl)
206 return errno == ENOTSUP ? true : false;
207 if (acl_set_fd(dest, acl) == -1) {
208 acl_free(acl);
209 return false;
211 acl_free(acl);
212 #endif /* CONFIG_ACL */
213 return true;
216 static bool preserve_selinux_context(int src, int dest) {
217 #if CONFIG_SELINUX
218 char *context = NULL;
219 if (!is_selinux_enabled())
220 return true;
221 if (fgetfilecon(src, &context) == -1)
222 return errno == ENOTSUP ? true : false;
223 if (fsetfilecon(dest, context) == -1) {
224 freecon(context);
225 return false;
227 freecon(context);
228 #endif /* CONFIG_SELINUX */
229 return true;
232 static int mkstempat(int dirfd, char *template) {
233 if (dirfd == AT_FDCWD)
234 return mkstemp(template);
235 // FIXME: not thread safe
236 int fd = -1;
237 int cwd = open(".", O_RDONLY|O_DIRECTORY);
238 if (cwd == -1)
239 goto err;
240 if (fchdir(dirfd) == -1)
241 goto err;
242 fd = mkstemp(template);
243 err:
244 if (cwd != -1) {
245 fchdir(cwd);
246 close(cwd);
248 return fd;
251 /* Create a new file named `.filename.vis.XXXXXX` (where `XXXXXX` is a
252 * randomly generated, unique suffix) and try to preserve all important
253 * meta data. After the file content has been written to this temporary
254 * file, text_save_commit_atomic will atomically move it to its final
255 * (possibly already existing) destination using rename(2).
257 * This approach does not work if:
259 * - the file is a symbolic link
260 * - the file is a hard link
261 * - file ownership can not be preserved
262 * - file group can not be preserved
263 * - directory permissions do not allow creation of a new file
264 * - POSXI ACL can not be preserved (if enabled)
265 * - SELinux security context can not be preserved (if enabled)
267 static bool text_save_begin_atomic(TextSave *ctx) {
268 int oldfd, saved_errno;
269 if ((oldfd = openat(ctx->dirfd, ctx->filename, O_RDONLY)) == -1 && errno != ENOENT)
270 goto err;
271 struct stat oldmeta = { 0 };
272 if (oldfd != -1 && fstatat(ctx->dirfd, ctx->filename, &oldmeta, AT_SYMLINK_NOFOLLOW) == -1)
273 goto err;
274 if (oldfd != -1) {
275 if (S_ISLNK(oldmeta.st_mode)) /* symbolic link */
276 goto err;
277 if (oldmeta.st_nlink > 1) /* hard link */
278 goto err;
281 char suffix[] = ".vis.XXXXXX";
282 size_t len = strlen(ctx->filename) + sizeof("./.") + sizeof(suffix);
283 char *dir = strdup(ctx->filename);
284 char *base = strdup(ctx->filename);
286 if (!(ctx->tmpname = malloc(len)) || !dir || !base) {
287 free(dir);
288 free(base);
289 goto err;
292 snprintf(ctx->tmpname, len, "%s/.%s%s", dirname(dir), basename(base), suffix);
293 free(dir);
294 free(base);
296 if ((ctx->fd = mkstempat(ctx->dirfd, ctx->tmpname)) == -1)
297 goto err;
299 if (oldfd == -1) {
300 mode_t mask = umask(0);
301 umask(mask);
302 if (fchmod(ctx->fd, 0666 & ~mask) == -1)
303 goto err;
304 } else {
305 if (fchmod(ctx->fd, oldmeta.st_mode) == -1)
306 goto err;
307 if (!preserve_acl(oldfd, ctx->fd) || !preserve_selinux_context(oldfd, ctx->fd))
308 goto err;
309 /* change owner if necessary */
310 if (oldmeta.st_uid != getuid() && fchown(ctx->fd, oldmeta.st_uid, (uid_t)-1) == -1)
311 goto err;
312 /* change group if necessary, in case of failure some editors reset
313 * the group permissions to the same as for others */
314 if (oldmeta.st_gid != getgid() && fchown(ctx->fd, (uid_t)-1, oldmeta.st_gid) == -1)
315 goto err;
316 close(oldfd);
319 ctx->type = TEXT_SAVE_ATOMIC;
320 return true;
321 err:
322 saved_errno = errno;
323 if (oldfd != -1)
324 close(oldfd);
325 if (ctx->fd != -1)
326 close(ctx->fd);
327 ctx->fd = -1;
328 free(ctx->tmpname);
329 ctx->tmpname = NULL;
330 errno = saved_errno;
331 return false;
334 static bool text_save_commit_atomic(TextSave *ctx) {
335 if (fsync(ctx->fd) == -1)
336 return false;
338 struct stat meta = { 0 };
339 if (fstat(ctx->fd, &meta) == -1)
340 return false;
342 bool close_failed = (close(ctx->fd) == -1);
343 ctx->fd = -1;
344 if (close_failed)
345 return false;
347 if (renameat(ctx->dirfd, ctx->tmpname, ctx->dirfd, ctx->filename) == -1)
348 return false;
350 free(ctx->tmpname);
351 ctx->tmpname = NULL;
353 int dir = openat(ctx->dirfd, dirname(ctx->filename), O_DIRECTORY|O_RDONLY);
354 if (dir == -1)
355 return false;
357 if (fsync(dir) == -1 && errno != EINVAL) {
358 close(dir);
359 return false;
362 if (close(dir) == -1)
363 return false;
365 text_saved(ctx->txt, &meta);
366 return true;
369 static bool text_save_begin_inplace(TextSave *ctx) {
370 Text *txt = ctx->txt;
371 struct stat now = { 0 };
372 int newfd = -1, saved_errno;
373 if ((ctx->fd = openat(ctx->dirfd, ctx->filename, O_CREAT|O_WRONLY, 0666)) == -1)
374 goto err;
375 if (fstat(ctx->fd, &now) == -1)
376 goto err;
377 struct stat loaded = text_stat(txt);
378 Block *block = text_block_mmaped(txt);
379 if (block && now.st_dev == loaded.st_dev && now.st_ino == loaded.st_ino) {
380 /* The file we are going to overwrite is currently mmap-ed from
381 * text_load, therefore we copy the mmap-ed block to a temporary
382 * file and remap it at the same position such that all pointers
383 * from the various pieces are still valid.
385 size_t size = block->size;
386 char tmpname[32] = "/tmp/vis-XXXXXX";
387 newfd = mkstemp(tmpname);
388 if (newfd == -1)
389 goto err;
390 if (unlink(tmpname) == -1)
391 goto err;
392 ssize_t written = write_all(newfd, block->data, size);
393 if (written == -1 || (size_t)written != size)
394 goto err;
395 void *data = mmap(block->data, size, PROT_READ, MAP_SHARED|MAP_FIXED, newfd, 0);
396 if (data == MAP_FAILED)
397 goto err;
398 bool close_failed = (close(newfd) == -1);
399 newfd = -1;
400 if (close_failed)
401 goto err;
402 block->type = BLOCK_TYPE_MMAP;
404 /* overwrite the existing file content, if something goes wrong
405 * here we are screwed, TODO: make a backup before? */
406 if (ftruncate(ctx->fd, 0) == -1)
407 goto err;
408 ctx->type = TEXT_SAVE_INPLACE;
409 return true;
410 err:
411 saved_errno = errno;
412 if (newfd != -1)
413 close(newfd);
414 if (ctx->fd != -1)
415 close(ctx->fd);
416 ctx->fd = -1;
417 errno = saved_errno;
418 return false;
421 static bool text_save_commit_inplace(TextSave *ctx) {
422 if (fsync(ctx->fd) == -1)
423 return false;
424 struct stat meta = { 0 };
425 if (fstat(ctx->fd, &meta) == -1)
426 return false;
427 if (close(ctx->fd) == -1)
428 return false;
429 text_saved(ctx->txt, &meta);
430 return true;
433 TextSave *text_save_begin(Text *txt, int dirfd, const char *filename, enum TextSaveMethod type) {
434 if (!filename)
435 return NULL;
436 TextSave *ctx = calloc(1, sizeof *ctx);
437 if (!ctx)
438 return NULL;
439 ctx->txt = txt;
440 ctx->fd = -1;
441 ctx->dirfd = dirfd;
442 if (!(ctx->filename = strdup(filename)))
443 goto err;
444 errno = 0;
445 if ((type == TEXT_SAVE_AUTO || type == TEXT_SAVE_ATOMIC) && text_save_begin_atomic(ctx))
446 return ctx;
447 if (errno == ENOSPC)
448 goto err;
449 if ((type == TEXT_SAVE_AUTO || type == TEXT_SAVE_INPLACE) && text_save_begin_inplace(ctx))
450 return ctx;
451 err:
452 text_save_cancel(ctx);
453 return NULL;
456 bool text_save_commit(TextSave *ctx) {
457 if (!ctx)
458 return true;
459 bool ret;
460 switch (ctx->type) {
461 case TEXT_SAVE_ATOMIC:
462 ret = text_save_commit_atomic(ctx);
463 break;
464 case TEXT_SAVE_INPLACE:
465 ret = text_save_commit_inplace(ctx);
466 break;
467 default:
468 ret = false;
469 break;
472 text_save_cancel(ctx);
473 return ret;
476 void text_save_cancel(TextSave *ctx) {
477 if (!ctx)
478 return;
479 int saved_errno = errno;
480 if (ctx->fd != -1)
481 close(ctx->fd);
482 if (ctx->tmpname && ctx->tmpname[0])
483 unlinkat(ctx->dirfd, ctx->tmpname, 0);
484 free(ctx->tmpname);
485 free(ctx->filename);
486 free(ctx);
487 errno = saved_errno;
490 /* First try to save the file atomically using rename(2) if this does not
491 * work overwrite the file in place. However if something goes wrong during
492 * this overwrite the original file is permanently damaged.
494 bool text_save(Text *txt, const char *filename) {
495 return text_saveat(txt, AT_FDCWD, filename);
498 bool text_saveat(Text *txt, int dirfd, const char *filename) {
499 return text_saveat_method(txt, dirfd, filename, TEXT_SAVE_AUTO);
502 bool text_save_method(Text *txt, const char *filename, enum TextSaveMethod method) {
503 return text_saveat_method(txt, AT_FDCWD, filename, method);
506 bool text_saveat_method(Text *txt, int dirfd, const char *filename, enum TextSaveMethod method) {
507 if (!filename) {
508 text_saved(txt, NULL);
509 return true;
511 TextSave *ctx = text_save_begin(txt, dirfd, filename, method);
512 if (!ctx)
513 return false;
514 Filerange range = (Filerange){ .start = 0, .end = text_size(txt) };
515 ssize_t written = text_save_write_range(ctx, &range);
516 if (written == -1 || (size_t)written != text_range_size(&range)) {
517 text_save_cancel(ctx);
518 return false;
520 return text_save_commit(ctx);
523 ssize_t text_save_write_range(TextSave *ctx, const Filerange *range) {
524 return text_write_range(ctx->txt, range, ctx->fd);
527 ssize_t text_write(const Text *txt, int fd) {
528 Filerange r = (Filerange){ .start = 0, .end = text_size(txt) };
529 return text_write_range(txt, &r, fd);
532 ssize_t text_write_range(const Text *txt, const Filerange *range, int fd) {
533 size_t size = text_range_size(range), rem = size;
534 for (Iterator it = text_iterator_get(txt, range->start);
535 rem > 0 && text_iterator_valid(&it);
536 text_iterator_next(&it)) {
537 size_t prem = it.end - it.text;
538 if (prem > rem)
539 prem = rem;
540 ssize_t written = write_all(fd, it.text, prem);
541 if (written == -1)
542 return -1;
543 rem -= written;
544 if ((size_t)written != prem)
545 break;
547 return size - rem;