cleanup: remove tree parameter where possible
[hed.git] / libhed / file.c
blobd917faf1ec4967c4f9a2c1a23a84d894282792fc
1 /* $Id$ */
3 /*
4 * hed - Hexadecimal editor
5 * Copyright (C) 2004 Petr Baudis <pasky@ucw.cz>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of version 2 of the GNU General Public License as
9 * published by the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * There hammer on the anvil smote,
23 * There chisel clove, and graver wrote;
24 * There forged was blade, and bound was hilt;
25 * The delver mined, the mason built.
26 * There beryl, pearl, and opal pale,
27 * And metal wrought like fishes' mail,
28 * Buckler and corslet, axe and sword,
29 * And shining spears were laid in hoard.
32 /* Feature macros needed for:
33 * - memrchr
34 * - pread, pwrite
35 * - stpcpy
37 #define _GNU_SOURCE
39 #include <config.h>
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <limits.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <sys/ioctl.h>
48 #include <unistd.h>
49 #include <linux/fs.h> /* BLKGETSIZE and BLKGETSIZE64 */
51 #include "file.h"
52 #include "access.h"
53 #include "cache.h"
54 #include "swap.h"
55 #include "tree.h"
56 #include "expr.h"
58 /* memrchr() might not be available */
59 #ifndef HAVE_MEMRCHR
60 # include "memrchr.c"
61 #endif /* HAVE_MEMRCHR */
64 * `Piles of jewels?' said Gandalf. `No. The Orcs have often plundered Moria;
65 * there is nothing left in the upper halls. And since the dwarves fled, no one
66 * dares to seek the shafts and treasuries down in the deep places: they are
67 * drowned in water--or in a shadow of fear.'
70 /* TODO: Currently the file blocks allocation is not very sophisticated and
71 * when the weather is bad it could probably have rather horrible results. */
73 #undef BLOCKS_DEBUG
74 #ifdef BLOCKS_DEBUG
75 #define BDEBUG(x...) fprintf(stderr, x)
76 #else
77 #define BDEBUG(x...)
78 #endif
80 /* Number of blocks in cache */
81 #define CACHE_LENGTH 64
83 /* Blocks for readahead */
84 #define FILE_READAHEAD (CACHE_LENGTH/2)
86 #define file_block hed_block
87 #define blockoff_t hed_cursor_t
89 #define first_block(tree) (tree_entry(first_in_tree(tree),struct file_block,t))
90 #define prev_block(b) (tree_entry(prev_in_tree(&(b)->t),struct file_block,t))
91 #define next_block(b) (tree_entry(next_in_tree(&(b)->t),struct file_block,t))
92 #define last_block(tree) (tree_entry(last_in_tree(tree),struct file_block,t))
94 #define block_offset(b) tree_block_offset(&(b)->t)
96 #define recalc_block_recursive(b) recalc_node_recursive(&(b)->t)
98 #define chain_block(tree,b) insert_into_tree((tree), &(b)->t, NULL)
99 #define recalc_chain_block(tree,b) do { \
100 chain_block((tree), (b)); \
101 recalc_block_recursive((b)); \
102 } while (0)
104 #define chain_block_after(tree,p,b) insert_into_tree((tree), &(b)->t, &(p)->t)
106 #define recalc_chain_block_after(tree,p,b) do { \
107 chain_block_after((tree), (p), (b)); \
108 recalc_block_recursive((b)); \
109 } while (0)
111 #define unchain_block(tree,b) del_from_tree((tree), &(b)->t)
112 #define recalc_unchain_block(tree,b) recalc_del_from_tree((tree), &(b)->t)
114 #define init_block_list(tree,b) init_tree(tree, &(b)->t)
115 #define init_block_link(b) init_node(&(b)->t)
117 #define find_block(tree,o) tree_entry(find_in_tree((tree),(o)),struct file_block,t)
119 #define file_size hed_file_size
120 #define file_blocks hed_file_blocks
122 #ifdef HED_CONFIG_SWAP
124 /* Return the swp file object */
125 static inline struct swp_file *
126 file_swp(struct hed_file *file)
128 return file->swp;
131 #else
133 /* Provide a stub for the non-swap case */
134 static inline void *
135 file_swp(struct hed_file *file)
137 return NULL;
140 #endif
142 #ifdef HED_CONFIG_READAHEAD
144 #define file_ra_none(f) ((f)->readahead == HED_RA_NONE)
145 #define file_ra_forward(f) ((f)->readahead == HED_RA_FORWARD)
146 #define file_ra_backward(f) ((f)->readahead == HED_RA_BACKWARD)
148 #else
150 #define file_ra_none(f) (1)
151 #define file_ra_forward(f) (0)
152 #define file_ra_backward(f) (0)
154 #endif /* HED_CONFIG_READAHEAD */
156 /* Get the physical offset of the byte immediately following @block. */
157 static inline hed_uoff_t
158 phys_end(const struct hed_block *block)
160 return hed_block_is_inserted(block)
161 ? block->phys_pos
162 : block->phys_pos + hed_block_size(block);
165 static struct hed_block *
166 next_nonzero_block(const struct hed_tree *tree, struct hed_block *block)
168 while (!hed_block_is_eof(block)) {
169 block = next_block(block);
170 if (hed_block_size(block))
171 return block;
173 return NULL;
176 static struct hed_block *
177 prev_nonzero_block(const struct hed_tree *tree, struct hed_block *block)
179 do {
180 block = prev_block(block);
181 if (hed_block_is_eof(block))
182 return NULL;
183 } while (!hed_block_size(block));
184 return block;
187 bool
188 hed_block_is_after_erase(const struct hed_tree *tree, struct hed_block *block)
190 struct hed_block *prev = prev_nonzero_block(tree, block);
191 return prev
192 ? block->phys_pos > phys_end(prev)
193 : !!block->phys_pos;
196 bool
197 hed_block_is_after_insert(const struct hed_tree *tree, struct hed_block *block)
199 struct hed_block *prev = prev_nonzero_block(tree, block);
200 return prev && hed_block_is_inserted(prev);
203 #ifndef BLOCKS_DEBUG
204 # define dump_blocks(file) {}
205 #else
207 static hed_uoff_t
208 block_phys_size(struct hed_file *file, struct file_block *block)
210 struct file_block *next;
212 if (hed_block_is_eof(block))
213 return 0;
214 next = next_block(block);
215 return next->phys_pos - block->phys_pos;
218 static void
219 dump_block(int level, struct hed_file *file, struct hed_tree_node *node,
220 hed_uoff_t *cur_offset, hed_uoff_t *cur_poffset)
222 struct hed_block *block = tree_entry(node, struct hed_block, t);
223 bool virtual = hed_block_is_virtual(block);
224 unsigned char *p;
225 hed_cursor_t *cur;
226 char t[21] = " ";
228 if (node->left)
229 dump_block(level + 1, file, node->left, cur_offset, cur_poffset);
230 p = hed_block_data(block);
231 if (level < 20) t[level] = '>'; else t[19] = '.';
232 fprintf(stderr, "%s [%06llx] [%06llx] %c%c%c %05llx %05llx"
233 " {%02x%02x%02x%02x} -- %p ^%p [%06llx]\n",
235 (unsigned long long) *cur_offset,
236 (unsigned long long) *cur_poffset,
237 virtual ? 'v' : ' ',
238 hed_block_is_inserted(block) ? 'i' : ' ',
239 hed_block_is_dirty(block) ? '*' : ' ',
240 (unsigned long long) node->size,
241 (unsigned long long) block_phys_size(file, block),
242 p && block->t.size > 0 ? p[0] : 0,
243 p && block->t.size > 1 ? p[1] : 0,
244 p && block->t.size > 2 ? p[2] : 0,
245 p && block->t.size > 3 ? p[3] : 0,
246 node, node->up,
247 (unsigned long long) node->cover_size
249 list_for_each_entry (cur, &block->refs, list) {
250 fprintf(stderr, " <%p>: %llx->%p:%llx\n",
251 cur, (long long)cur->pos,
252 cur->block, (unsigned long long)cur->off);
254 assert(*cur_poffset == block->phys_pos);
255 *cur_offset += node->size;
256 *cur_poffset += block_phys_size(file, block);
257 if (node->right)
258 dump_block(level + 1, file, node->right, cur_offset, cur_poffset);
259 assert(node->cover_size == (node->left ? node->left->cover_size : 0)
260 + (node->right ? node->right->cover_size : 0)
261 + node->size);
264 /* Walk the tree manually here, because foreach_block() does not provide
265 * the tree structure.
266 * TODO: Change this if you plan to debug any other block containers.
268 static void
269 dump_blocks(struct hed_file *file)
271 struct file_block *first = first_block(file_blocks(file));
272 hed_uoff_t cur_offset, cur_poffset;
274 fprintf(stderr, "-- blocks dump --\n");
275 cur_offset = 0;
276 cur_poffset = first->phys_pos;
277 dump_block(0, file, file_blocks(file)->root,
278 &cur_offset, &cur_poffset);
279 fprintf(stderr, "-- blocks dump end --\n");
281 #endif
283 static void
284 get_cursor(struct hed_file *file, hed_uoff_t offset, hed_cursor_t *curs)
286 struct file_block *block;
288 block = find_block(file_blocks(file), offset);
289 assert(block != NULL);
290 curs->pos = offset;
291 curs->block = block;
292 curs->off = offset - block_offset(block);
293 list_add(&curs->list, &block->refs);
295 BDEBUG("Mapped %llx to %llx+%llx/%llx\n",
296 offset, offset - curs->off, curs->off, block->t.size);
299 void
300 hed_get_cursor(struct hed_file *file, hed_uoff_t offset, hed_cursor_t *curs)
302 get_cursor(file, offset, curs);
305 static inline void
306 put_cursor(hed_cursor_t *curs)
308 list_del(&curs->list);
311 void
312 hed_put_cursor(hed_cursor_t *curs)
314 put_cursor(curs);
317 void
318 hed_update_cursor(struct hed_file *file, hed_uoff_t offset, hed_cursor_t *curs)
320 put_cursor(curs);
321 get_cursor(file, offset, curs);
324 void
325 hed_dup_cursor(const hed_cursor_t *src, hed_cursor_t *dst)
327 dst->pos = src->pos;
328 dst->block = src->block;
329 dst->off = src->off;
330 list_add_tail(&dst->list, &src->block->refs);
333 void
334 hed_dup2_cursor(const hed_cursor_t *src, hed_cursor_t *dst)
336 if (hed_is_a_cursor(dst))
337 put_cursor(dst);
338 hed_dup_cursor(src, dst);
341 /* Move blockoff's from @old to @new, adding @off to their block
342 * offsets to keep them at the same position. */
343 static void
344 update_blockoffs(const struct file_block *old, struct file_block *new,
345 hed_off_t off)
347 blockoff_t *blockoff;
349 BDEBUG("Updating blockoffs from <%p> to <%p>%c%llx\n",
350 old, new, off >= 0 ? '+' : '-', off >= 0 ? off : -off);
352 list_for_each_entry(blockoff, &old->refs, list) {
353 blockoff->block = new;
354 blockoff->off += off;
358 /* Move blockoff's in the range <@start;@end> from @old to @new,
359 * adding @off to their block offset, plus moving the reference list. */
360 static void
361 move_blockoffs(const struct file_block *old, struct file_block *new,
362 hed_uoff_t start, hed_uoff_t end, hed_off_t off)
364 blockoff_t *blockoff, *nextoff;
366 BDEBUG("Moving blockoffs from <%p>:%llx:%llx to <%p>%c%llx\n",
367 old, start, end, new,
368 off >= 0 ? '+' : '-', off >= 0 ? off : -off);
370 list_for_each_entry_safe(blockoff, nextoff, &old->refs, list)
371 if (blockoff->off >= start && blockoff->off <= end) {
372 blockoff->block = new;
373 blockoff->off += off;
374 list_move(&blockoff->list, &new->refs);
378 /* Move cursors in the range @block:<@start;@end> to @newpos */
379 static void
380 move_cursors_abs(const struct file_block *block,
381 hed_uoff_t start, hed_uoff_t end,
382 const hed_cursor_t *newpos)
384 hed_cursor_t *curs, *nextcurs;
386 BDEBUG("Moving blockoffs from <%p>:%llx:%llx to <%p>:%llx\n",
387 block, start, end, newpos->block, newpos->off);
389 list_for_each_entry_safe(curs, nextcurs, &block->refs, list)
390 if (curs->off >= start && curs->off <= end) {
391 curs->pos = newpos->pos;
392 curs->block = newpos->block;
393 curs->off = newpos->off;
394 list_move(&curs->list, &newpos->block->refs);
398 /* Update the positions of blockoffs at and after @start for all
399 * blocks starting at @block */
400 static void
401 slide_blockoffs(struct hed_file *file, const struct file_block *block,
402 hed_uoff_t start, hed_off_t off)
404 blockoff_t *blockoff;
405 const struct hed_block *nblock;
407 BDEBUG("Sliding blockoffs >= %llx by %c%llx, starting at <%p>\n",
408 start, off >= 0 ? '+' : '-', off >= 0 ? off : -off, block);
409 nblock = block;
410 do {
411 block = nblock;
412 list_for_each_entry(blockoff, &block->refs, list)
413 if (blockoff->pos >= start)
414 blockoff->pos += off;
415 nblock = next_block(block);
416 } while (!hed_block_is_eof(block));
419 static struct hed_block *
420 new_block(struct hed_file *file, long flags)
422 struct file_block *new;
424 if (! (new = swp_zalloc(file_swp(file), sizeof(struct file_block))) )
425 return NULL;
427 new->flags = flags;
428 init_block_link(new);
429 INIT_LIST_HEAD(&new->refs);
430 if (flags & HED_BLOCK_EXCACHE)
431 INIT_LIST_HEAD(&new->lru);
432 else
433 list_add_tail(&new->lru, &file->lru);
435 return new;
438 static struct hed_block *
439 new_virt_block(struct hed_file *file, hed_uoff_t pos, hed_uoff_t size,
440 long extraflags)
442 struct hed_block *new =
443 new_block(file, (HED_BLOCK_EXCACHE |
444 HED_BLOCK_VIRTUAL |
445 extraflags));
446 if (!new)
447 return NULL;
449 new->t.size = size;
450 new->phys_pos = pos;
451 BDEBUG("Spawned new virtual block [%llx] at %llx\n", size, pos);
452 return new;
455 static struct hed_block *
456 new_data_block(struct hed_file *file, hed_uoff_t pos, hed_uoff_t size,
457 struct hed_block_data *dataobj)
459 struct hed_block *new =
460 new_block(file, 0);
461 if (!new)
462 return NULL;
464 cache_get(dataobj);
465 new->dataobj = dataobj;
466 new->t.size = size;
467 new->phys_pos = pos;
468 new->dataoff = FILE_BLOCK_OFF(pos);
469 BDEBUG("Spawned new data block [%llx] at %llx\n", size, pos);
470 return new;
473 static void
474 file_free_block(struct hed_file *file, struct file_block *block)
476 if (block->dataobj)
477 cache_put(file->cache, block->dataobj);
478 list_del(&block->lru);
480 swp_free(file_swp(file), block);
483 static bool
484 kill_block_if_empty(struct hed_file *file, struct file_block *block)
486 if (!hed_block_is_eof(block) && block->t.size == 0 &&
487 list_empty(&block->refs)) {
488 /* No recalculation needed, zero size. */
489 unchain_block(file_blocks(file), block);
490 file_free_block(file, block);
491 return true;
493 return false;
496 /* This may kill the previous block as well, if it can be merged
497 * with the next one. It will never kill anything which _follows_. */
498 static void
499 file_kill_block(struct hed_file *file, struct file_block *block)
501 hed_uoff_t phys_pos = block->phys_pos;
502 struct file_block *prev = prev_block(block);
503 struct file_block *next = next_block(block);
504 struct file_block *merger;
505 bool killprev = false;
507 /* We should never kill a dirty block! */
508 assert(!hed_block_is_dirty(block));
509 /* We shouldn't get with an empty block here (that might
510 * need special considerations with virtualization). */
511 assert(block->t.size > 0);
513 if (!hed_block_is_eof(block) &&
514 hed_block_is_inner_virtual(next) &&
515 phys_pos + block->t.size == next->phys_pos) {
516 if (!hed_block_is_eof(prev) &&
517 hed_block_is_inner_virtual(prev) &&
518 prev->phys_pos + prev->t.size == phys_pos)
519 killprev = true;
520 merger = next;
521 merger->phys_pos -= block->t.size;
522 update_blockoffs(merger, merger, block->t.size);
523 update_blockoffs(block, merger, 0);
524 } else if (!hed_block_is_eof(prev) &&
525 hed_block_is_inner_virtual(prev) &&
526 prev->phys_pos + prev->t.size == phys_pos) {
527 merger = prev;
528 update_blockoffs(block, merger, merger->t.size);
529 } else if (!hed_block_is_virtual(block)) {
530 /* Convert physical to virtual */
531 assert(block->dataobj);
532 cache_put(file->cache, block->dataobj);
533 block->dataobj = NULL;
535 list_del_init(&block->lru); /* unlink the block from LRU */
536 hed_block_set_excache(block); /* say it's unlinked */
537 hed_block_set_virtual(block);
538 return;
539 } else
540 /* Already virtual and cannot merge */
541 return;
543 list_splice(&block->refs, &merger->refs);
545 /* Messing with block sizes and unchaining is a bit tricky
546 * since unchain_block() can splay(). So we really need
547 * to recalc_block_recursive() right after we update the size.
548 * If this place turns out to be a hot-spot, we can optimize
549 * the tree operations here. */
550 merger->t.size += block->t.size;
551 recalc_block_recursive(merger);
553 /* Destroy the block */
554 recalc_unchain_block(file_blocks(file), block);
555 file_free_block(file, block);
557 if (killprev)
558 file_kill_block(file, prev);
561 static struct file_block *
562 split_block(struct hed_file *file, struct hed_block *block,
563 hed_uoff_t splitpoint)
565 struct file_block *next;
567 next = new_block(file, block->flags);
568 if (!next)
569 return NULL;
571 if ( (next->dataobj = block->dataobj) ) {
572 cache_get(next->dataobj);
573 next->dataoff = block->dataoff + splitpoint;
574 } else
575 assert(hed_block_is_virtual(block));
577 next->t.size = block->t.size - splitpoint;
578 next->phys_pos = block->phys_pos;
579 if (!hed_block_is_inserted(block))
580 next->phys_pos += splitpoint;
582 block->t.size = splitpoint;
583 recalc_block_recursive(block);
584 recalc_chain_block_after(file_blocks(file), block, next);
586 move_blockoffs(block, next, splitpoint, UOFF_MAX, -splitpoint);
588 return next;
591 /* Replace a chunk in @block with @newblock */
592 static int
593 replace_chunk(struct hed_file *file, struct hed_block *block,
594 hed_uoff_t offset, struct hed_block *newblock)
596 size_t len = newblock->t.size;
597 hed_uoff_t leadlen = offset + len;
599 assert(offset < block->t.size);
600 assert(len <= block->t.size - offset);
602 /* Re-create the tail block if necessary */
603 if (hed_block_is_eof(block) || block->t.size - offset > len) {
604 struct file_block *tail;
606 tail = new_block(file, block->flags);
607 if (!tail)
608 return -1;
609 tail->t.size = block->t.size - leadlen;
610 tail->dataobj = block->dataobj;
611 tail->dataoff = block->dataoff + leadlen;
612 tail->phys_pos = block->phys_pos + leadlen;
614 hed_block_clear_eof(block);
615 recalc_chain_block_after(file_blocks(file), block, tail);
617 /* Move offsets to the tail */
618 move_blockoffs(block, tail, leadlen, UOFF_MAX, -leadlen);
621 /* Move pointers to the new block */
622 assert(leadlen > 0);
623 move_blockoffs(block, newblock, offset, leadlen - 1, -offset);
625 /* Shorten the leading block */
626 block->t.size = offset;
627 recalc_block_recursive(block);
629 /* Insert the new block */
630 recalc_chain_block_after(file_blocks(file), block, newblock);
632 /* Kill the leading block if possible */
633 kill_block_if_empty(file, block);
635 return 0;
638 #ifdef HED_CONFIG_SWAP
640 static char *
641 swp_filename(const char *filename)
643 size_t fnlen = strlen(filename);
644 char *swp;
645 char *file;
647 if (!(swp = malloc(fnlen + 9)) )
648 return NULL;
649 strcpy(swp, filename);
651 file = strrchr(swp, '/');
652 file = file ? file + 1 : swp;
653 *file = '.';
654 strcpy(stpcpy(file + 1, filename + (file -swp)), ".hedswp");
655 return swp;
658 static char *
659 newswp_filename(char *swpname)
661 char *ret, *p;
663 ret = swpname;
664 while (!access(ret, F_OK)) {
665 if (ret == swpname) {
666 if (! (ret = strdup(swpname)) )
667 return NULL;
668 p = ret + strlen(ret) - 1;
671 if (*p == 'a') {
672 free(ret);
673 return NULL;
675 --*p;
677 return ret;
681 hed_file_remove_swap(struct hed_file *file)
683 if (remove(file->swpname))
684 return -1;
685 if (rename(file->newswpname, file->swpname))
686 return -1;
688 free(file->newswpname);
689 file->newswpname = file->swpname;
690 return 0;
693 static inline struct hed_file *
694 file_swp_init(const char *name)
696 char *swpname, *newswpname;
697 struct swp_file *swp;
698 struct hed_file *file;
700 swpname = swp_filename(name);
701 if (!swpname)
702 goto fail;
703 newswpname = newswp_filename(swpname);
704 if (!newswpname)
705 goto fail_free_name;
706 swp = swp_init_write(newswpname);
707 if (!swp)
708 goto fail_free_newname;
710 assert(sizeof(struct swp_header) + sizeof(struct hed_file)
711 <= FILE_BLOCK_SIZE);
712 file = swp_private(swp);
713 memset(file, 0, sizeof *file);
715 file->swp = swp;
716 file->swpname = swpname;
717 file->newswpname = newswpname;
719 return file;
721 fail_free_newname:
722 free(newswpname);
723 fail_free_name:
724 if (swpname != newswpname)
725 free(swpname);
726 fail:
727 return NULL;
730 static inline void
731 file_swp_done(struct hed_file *file)
733 remove(file->newswpname);
734 if (file->newswpname != file->swpname)
735 free(file->newswpname);
736 free(file->swpname);
737 swp_done(file_swp(file));
738 /* file was de-allocated automatically with file->swp */
741 #else /* HED_CONFIG_SWAP */
743 static inline struct hed_file *
744 file_swp_init(const char *name)
746 return calloc(1, sizeof(struct hed_file));
749 static inline void
750 file_swp_done(struct hed_file *file)
754 #endif /* HED_CONFIG_SWAP */
756 static inline struct stat *
757 file_stat(struct hed_file *file)
759 return &file->s;
763 hed_file_update_size(struct hed_file *file)
765 hed_uoff_t oldsize = file->phys_size;
767 if(fstat(file->fd, file_stat(file)) < 0)
768 return -1;
770 if (S_ISBLK(file_stat(file)->st_mode)) {
771 if (ioctl(file->fd, BLKGETSIZE64, &file->phys_size)) {
772 unsigned long size_in_blocks;
773 if (ioctl(file->fd, BLKGETSIZE, &size_in_blocks))
774 return -1;
775 file->phys_size = (hed_uoff_t)size_in_blocks << 9;
777 } else if (S_ISREG(file_stat(file)->st_mode)) {
778 file->phys_size = file_stat(file)->st_size;
779 } else if (S_ISCHR(file_stat(file)->st_mode)) {
780 if (lseek(file->fd, 0, SEEK_SET) < 0)
781 return -1;
782 file->phys_size = (hed_uoff_t)OFF_MAX + 1;
783 } else {
784 errno = EINVAL;
785 return -1;
788 file->size += file->phys_size - oldsize;
789 return 0;
792 static int
793 do_file_open(struct hed_file *file)
795 file->fd = open(file->name, O_RDONLY);
796 if (file->fd < 0) {
797 if (errno != ENOENT)
798 return errno;
799 fprintf(stderr, "Warning: File %s does not exist\n",
800 file->name);
801 memset(file_stat(file), 0, sizeof(struct stat));
803 } else if (hed_file_update_size(file)) {
804 int errsv = errno;
805 close(file->fd);
806 return errsv;
808 return 0;
811 static int
812 file_setup_blocks(struct hed_file *file)
814 hed_uoff_t phys_size = file->phys_size;
815 struct file_block *block;
817 block = &file->eof_block;
818 block->flags = HED_BLOCK_EXCACHE | HED_BLOCK_VIRTUAL | HED_BLOCK_EOF;
819 INIT_LIST_HEAD(&block->lru);
820 INIT_LIST_HEAD(&block->refs);
821 block->t.size = OFF_MAX - phys_size + 1;
822 block->phys_pos = phys_size;
824 init_block_list(file_blocks(file), block);
826 if (phys_size) {
827 block = new_virt_block(file, 0, phys_size, 0);
828 if (!block)
829 return -1;
830 recalc_chain_block(file_blocks(file), block);
833 return 0;
837 libhed_init(void)
839 return file_access_init();
842 struct hed_file *
843 hed_open(const char *name)
845 struct hed_file *file;
847 if (! (file = file_swp_init(name)) )
848 goto fail;
850 file->name = name;
852 file->cache = cache_init(CACHE_LENGTH, file_swp(file));
853 if (!file->cache)
854 goto fail_file;
855 INIT_LIST_HEAD(&file->lru);
857 if (do_file_open(file))
858 goto fail_file;
860 if (file_setup_blocks(file))
861 goto fail_file;
863 fixup_register(file);
865 return file;
867 fail_file:
868 hed_close(file);
869 fail:
870 return NULL;
873 void
874 hed_close(struct hed_file *file)
876 assert(file);
878 /* Do not check for errors:
879 * 1. This FD is read-only => no data loss possbile
880 * 2. We're about to exit anyway => no resource leak
882 if (file->fd >= 0)
883 close(file->fd);
885 fixup_deregister(file);
887 /* No need to free file blocks here, because all data is
888 * allocated either from the cache or from the swap file
889 * and both is going to be destroyed now.
892 if (file->cache)
893 cache_done(file->cache);
895 file_swp_done(file);
898 /* Adjust blockoff after off gets outside its block */
899 static void
900 fixup_blockoff_slow(blockoff_t *blockoff)
902 struct file_block *block = blockoff->block;
903 hed_uoff_t off = blockoff->off;
905 do {
906 if ((hed_off_t)off < 0) {
907 block = prev_block(block);
908 off += block->t.size;
909 } else {
910 off -= block->t.size;
911 block = next_block(block);
913 } while (off >= block->t.size);
915 blockoff->block = block;
916 blockoff->off = off;
917 list_move(&blockoff->list, &block->refs);
920 /* Adjust blockoff if off gets outside its block.
921 * This is separate from fixup_blockoff_slow, because it is supposed
922 * to be small enough to be inlined (which is a win, because most of
923 * the time no fixup has to be done, so the fast inlined path is used).
925 static inline void
926 fixup_blockoff(blockoff_t *blockoff)
928 if (blockoff->off >= blockoff->block->t.size)
929 fixup_blockoff_slow(blockoff);
932 hed_off_t
933 hed_move_relative(blockoff_t *blockoff, hed_off_t num)
935 hed_off_t newpos = blockoff->pos + num;
936 if (newpos < 0) {
937 newpos = num < 0 ? 0 : OFF_MAX;
938 num = newpos - blockoff->pos;
940 blockoff->pos = newpos;
941 blockoff->off += num;
942 fixup_blockoff(blockoff);
943 return num;
946 /* Relative move with no checking (and only by a small amount) */
947 static inline void
948 move_rel_fast(blockoff_t *blockoff, ssize_t num)
950 blockoff->off += num;
951 blockoff->pos += num;
952 fixup_blockoff(blockoff);
955 static void
956 alloc_caches(struct hed_file *file, struct hed_block_data **caches, int n)
958 struct remap_control rc;
959 int i;
961 BDEBUG("Allocate %d caches (%d free slots available)\n",
962 n, file->cache->nfree);
964 assert(n <= CACHE_LENGTH);
965 while (file->cache->nfree < n) {
966 struct file_block *block;
968 assert(!list_empty(&file->lru));
969 block = list_entry(file->lru.next, struct file_block, lru);
970 BDEBUG("Killing block at physical %llx\n", block->phys_pos);
971 file_kill_block(file, block);
974 for (i = 0; i < n; ++i) {
975 caches[i] = cache_alloc(file->cache);
976 assert(caches[i]);
979 remap_init(&rc);
980 remap_compact(&rc, file->cache, caches, n);
981 for (i = 0; i < n; ++i)
982 remap_add(&rc, caches[i]->data);
983 remap_finish(&rc);
986 static inline void
987 free_caches(struct hed_file *file, struct hed_block_data **preload, int n)
989 int i;
991 for (i = 0; i < n; ++i)
992 if (preload[i])
993 cache_put(file->cache, preload[i]);
996 static int
997 file_load_data(struct hed_file *file,
998 struct hed_block_data **caches, int n,
999 hed_uoff_t offset)
1001 struct hed_block_data *dataobj = caches[0];
1002 void *data = dataobj->data;
1003 ssize_t rsize, total, segsize;
1005 segsize = n << FILE_BLOCK_SHIFT;
1006 for (total = 0; total < segsize; total += rsize) {
1007 rsize = pread(file->fd, data + total,
1008 segsize - total, offset + total);
1009 if (!rsize)
1010 break;
1011 if (rsize < 0) {
1012 dataobj = caches[total >> FILE_BLOCK_SHIFT];
1013 caches[total >> FILE_BLOCK_SHIFT] = NULL;
1014 data = dataobj->data;
1015 cache_put(file->cache, dataobj);
1016 total = FILE_BLOCK_ROUND(total);
1017 rsize = FILE_BLOCK_SIZE;
1018 BDEBUG("Error reading block at phys %llx: %s\n",
1019 offset + total, strerror(errno));
1023 BDEBUG("Loaded data at phys %llx up to %llx\n",
1024 offset, offset + segsize);
1025 return 0;
1028 #ifdef HED_CONFIG_MMAP
1030 static int
1031 file_load_data_mmap(struct hed_file *file,
1032 struct hed_block_data **caches, int n,
1033 hed_uoff_t offset)
1035 void *data;
1036 ssize_t segsize;
1037 int i;
1039 segsize = n << FILE_BLOCK_SHIFT;
1040 data = mmap(NULL, segsize,
1041 PROT_READ | PROT_WRITE,
1042 MAP_PRIVATE | (file->fd < 0 ? MAP_ANONYMOUS : 0),
1043 file->fd, offset);
1045 if (data == MAP_FAILED) {
1046 BDEBUG("mmap failed at %llx: fail over to traditional read\n",
1047 offset);
1049 data = mmap(NULL, segsize,
1050 PROT_READ | PROT_WRITE,
1051 MAP_PRIVATE | MAP_ANONYMOUS,
1052 0, 0);
1053 if (data == MAP_FAILED)
1054 return -1;
1056 for (i = 0; i < n; ++i)
1057 caches[i]->data = data + (i << FILE_BLOCK_SHIFT);
1058 return file_load_data(file, caches, n, offset);
1061 for (i = 0; i < n; ++i)
1062 caches[i]->data = data + (i << FILE_BLOCK_SHIFT);
1064 BDEBUG("Loaded data at phys %llx up to %llx\n",
1065 offset, offset + segsize);
1066 return 0;
1068 # define file_load_data file_load_data_mmap
1070 #endif
1072 /* Find the block with the lowest physical position that intersects
1073 * the loaded segment. The search starts at @block.
1075 static struct hed_block *
1076 first_load_block(struct hed_tree *tree, struct hed_block *block,
1077 hed_uoff_t segstart)
1079 struct hed_block *prev = block;
1080 do {
1081 block = prev;
1082 prev = prev_block(prev);
1083 } while (!hed_block_is_eof(prev) && phys_end(prev) > segstart);
1084 return block;
1087 static int
1088 load_blocks(struct hed_file *file, const blockoff_t *from)
1090 hed_uoff_t physpos, segstart;
1091 struct hed_block_data *preload[FILE_READAHEAD];
1092 size_t ra_bkw, ra_fwd, ra_off;
1093 hed_cursor_t pos;
1094 int nblocks;
1096 segstart = hed_cursor_phys_pos(from);
1097 ra_bkw = FILE_BLOCK_OFF(segstart);
1098 ra_fwd = FILE_BLOCK_SIZE - ra_bkw;
1100 if (file_ra_forward(file))
1101 ra_fwd += (FILE_READAHEAD - 1) << FILE_BLOCK_SHIFT;
1102 else if (file_ra_backward(file))
1103 ra_bkw += (FILE_READAHEAD - 1) << FILE_BLOCK_SHIFT;
1105 if (ra_bkw > segstart)
1106 ra_bkw = segstart;
1107 if (ra_fwd > file->phys_size - segstart)
1108 ra_fwd = file->phys_size - segstart;
1110 segstart -= ra_bkw;
1111 ra_fwd += ra_bkw;
1112 pos.block = first_load_block(file_blocks(file), from->block, segstart);
1113 pos.off = segstart >= pos.block->phys_pos
1114 ? segstart - pos.block->phys_pos
1115 : 0;
1117 list_add(&pos.list, &pos.block->refs);
1118 nblocks = ((ra_fwd - 1) >> FILE_BLOCK_SHIFT) + 1;
1119 alloc_caches(file, preload, nblocks);
1120 put_cursor(&pos);
1122 if (file_load_data(file, preload, nblocks, segstart)) {
1123 free_caches(file, preload, nblocks);
1124 return -1;
1127 while (physpos = hed_cursor_phys_pos(&pos),
1128 ra_off = physpos - segstart,
1129 ra_off < ra_fwd) {
1130 struct hed_block_data *dataobj;
1131 struct hed_block *newblock;
1132 size_t datalen;
1134 if (!hed_block_is_virtual(pos.block)) {
1135 pos.block = next_block(pos.block);
1136 pos.off = 0;
1137 continue;
1140 datalen = FILE_BLOCK_SIZE - FILE_BLOCK_OFF(physpos);
1141 if (datalen > hed_block_size(pos.block) - pos.off)
1142 datalen = hed_block_size(pos.block) - pos.off;
1144 dataobj = preload[ra_off >> FILE_BLOCK_SHIFT];
1145 newblock = dataobj
1146 ? new_data_block(file, physpos, datalen, dataobj)
1147 : new_virt_block(file, physpos, datalen,
1148 HED_BLOCK_BAD);
1150 /* Punch the new block */
1151 BDEBUG("Add %s block at %llx, length %llx\n",
1152 hed_block_is_virtual(newblock) ? "error" : "physical",
1153 newblock->phys_pos, newblock->t.size);
1154 if (replace_chunk(file, pos.block, pos.off, newblock)) {
1155 file_free_block(file, newblock);
1156 free_caches(file, preload, nblocks);
1157 return -1;
1160 pos.block = next_block(newblock);
1161 pos.off = 0;
1164 /* All cache objects now have an extra reference from the
1165 * allocation. Drop it. */
1166 free_caches(file, preload, nblocks);
1168 dump_blocks(file);
1169 return 0;
1172 /* Shorten a block at beginning and enlarge the preceding block.
1174 * Re-allocate at most @len bytes from the beginning of @block to the
1175 * end of the preceding block.
1176 * If @block is virtual, this will effectively devirtualize the range.
1177 * If @block is not virtual, this will change the backing store of
1178 * the bytes in the range.
1179 * Returns: the number of bytes actually moved.
1181 static size_t
1182 shrink_at_begin(struct hed_file *file, struct file_block *block,
1183 size_t len, long state)
1185 struct file_block *prev = prev_block(block);
1186 size_t maxgrow;
1188 /* Basic assumptions */
1189 assert(!(state & HED_BLOCK_VIRTUAL));
1191 /* The previous block must exist: */
1192 if (hed_block_is_eof(prev))
1193 return 0;
1195 /* The block flags must match the requested @state: */
1196 if ((prev->flags & HED_BLOCK_STATEMASK) != state)
1197 return 0;
1199 /* No deletions at end, or similar: */
1200 if (prev->phys_pos + prev->t.size != block->phys_pos)
1201 return 0;
1203 /* Append less bytes than requested if not all are available */
1204 assert(prev->t.size <= prev->dataobj->size);
1205 maxgrow = prev->dataobj->size - prev->dataoff - prev->t.size;
1206 if (len > maxgrow)
1207 len = maxgrow;
1208 if (!len)
1209 return 0;
1211 BDEBUG("Appending 0:%lx to the previous block\n", len);
1213 /* Move blockoffs away from the to-be-chopped beginning */
1214 move_blockoffs(block, prev, 0, len - 1, prev->t.size);
1216 /* Enlarge the previous block */
1217 prev->t.size += len;
1218 recalc_block_recursive(prev);
1220 /* Shorten the original block */
1221 block->t.size -= len;
1222 block->dataoff += len;
1223 block->phys_pos += len;
1224 recalc_block_recursive(block);
1225 return len;
1228 /* Shorten a block at end and enlarge the following block.
1230 * Re-allocate at most @len bytes from the end of @block to the
1231 * beginning of the following block.
1232 * If @block is virtual, this will effectively devirtualize the range.
1233 * If @block is not virtual, this will change the backing store of
1234 * the bytes in the range.
1235 * Returns: the number of bytes actually moved.
1237 static size_t
1238 shrink_at_end(struct hed_file *file, struct file_block *block,
1239 size_t len, long state)
1241 struct file_block *next = next_block(block);
1242 hed_uoff_t off;
1244 /* Basic assumptions */
1245 assert(!(state & HED_BLOCK_VIRTUAL));
1247 /* The next block must exist: */
1248 if (hed_block_is_eof(block))
1249 return 0;
1251 /* The block flags must match the requested @state: */
1252 if ((next->flags & HED_BLOCK_STATEMASK) != state)
1253 return 0;
1255 /* No deletions at end, or similar: */
1256 if (block->phys_pos + block->t.size != next->phys_pos)
1257 return 0;
1259 /* Prepend less bytes than requested if not all are available */
1260 if (len > next->dataoff)
1261 len = next->dataoff;
1262 if (!len)
1263 return 0;
1264 off = block->t.size - len;
1266 BDEBUG("Prepending %llx:%lx to the next block\n", off, len);
1268 /* Shift blockoffs in the new physical block */
1269 update_blockoffs(next, next, len);
1271 /* Move blockoffs away from the to-be-chopped end */
1272 move_blockoffs(block, next, off, UOFF_MAX, -off);
1274 /* Enlarge the next block */
1275 next->dataoff -= len;
1276 next->phys_pos -= len;
1277 next->t.size += len;
1278 recalc_block_recursive(next);
1280 /* Shorten the original block */
1281 block->t.size -= len;
1282 recalc_block_recursive(block);
1283 return len;
1286 /* Search for an existing data object within the same physical block
1287 * as @curs, and having the given @state flags.
1289 static struct hed_block_data *
1290 search_data(struct hed_file *file, const hed_cursor_t *curs, long state)
1292 struct file_block *block;
1293 hed_uoff_t physpos;
1295 physpos = FILE_BLOCK_ROUND(curs->block->phys_pos + curs->off);
1296 BDEBUG("Search for already loaded data at %llx starting at %llx...",
1297 physpos, curs->block->phys_pos);
1299 /* Search backwards */
1300 block = curs->block;
1301 while (!hed_block_is_eof(block = prev_block(block))) {
1302 if (block->phys_pos < physpos)
1303 break;
1304 if ((block->flags & HED_BLOCK_STATEMASK) == state) {
1305 BDEBUG(" found at %llx\n", block->phys_pos);
1306 assert(block->dataobj);
1307 return block->dataobj;
1311 /* Search forwards */
1312 block = curs->block;
1313 while (!hed_block_is_eof(block)) {
1314 block = next_block(block);
1315 if (block->phys_pos >= physpos + FILE_BLOCK_SIZE)
1316 break;
1317 if ((block->flags & HED_BLOCK_STATEMASK) == state) {
1318 BDEBUG(" found at %llx\n", block->phys_pos);
1319 assert(block->dataobj);
1320 return block->dataobj;
1324 BDEBUG(" not found\n");
1325 return NULL;
1328 static int
1329 reuse_loaded_data(struct hed_file *file, const blockoff_t *blockoff,
1330 struct hed_block_data *data)
1332 struct file_block *physblock;
1333 struct file_block *block = blockoff->block;
1334 hed_uoff_t block_offset = blockoff->off;
1335 hed_uoff_t physpos = block->phys_pos + block_offset;
1336 size_t part = FILE_BLOCK_OFF(physpos);
1337 size_t len =
1338 FILE_BLOCK_SIZE - part <= block->t.size - block_offset
1339 ? FILE_BLOCK_SIZE - part
1340 : block->t.size - block_offset;
1342 if (part > block_offset)
1343 part = block_offset;
1344 physpos -= part;
1345 len += part;
1346 block_offset -= part;
1348 if (! (physblock = new_data_block(file, physpos, len, data)) )
1349 return -1;
1351 BDEBUG("Add physical block at %llx, length %llx\n",
1352 physblock->phys_pos, physblock->t.size);
1353 if (replace_chunk(file, block, block_offset, physblock)) {
1354 file_free_block(file, physblock);
1355 return -1;
1358 dump_blocks(file);
1359 return 0;
1362 /* Replace a part of a virtual block with content loaded
1363 * from disk. The amount of data loaded from the disk depends
1364 * on various factors with the goal to choose the most efficient
1365 * ratio. The only guarantee is that the byte at @blockoff will
1366 * be in a non-virtual block when this function returns 0.
1368 static int
1369 devirtualize_clean(struct hed_file *file, const blockoff_t *blockoff)
1371 struct file_block *block = blockoff->block;
1372 hed_uoff_t block_offset = blockoff->off;
1373 hed_uoff_t remain = block->t.size - block_offset;
1374 struct hed_block_data *data;
1376 BDEBUG("punch a clean hole at %llx into %llx:%llx\n", block_offset,
1377 block_offset(block), block->t.size);
1378 assert(hed_block_is_virtual(block));
1380 /* Check if we can combine with a neighbouring block */
1381 if (shrink_at_begin(file, block, SIZE_MAX, 0) > block_offset ||
1382 shrink_at_end(file, block, SIZE_MAX, 0) >= remain) {
1383 kill_block_if_empty(file, block);
1384 dump_blocks(file);
1385 return 0;
1388 /* Check if the block is already loaded elsewhere */
1389 data = search_data(file, blockoff, 0);
1390 return data
1391 ? reuse_loaded_data(file, blockoff, data)
1392 : load_blocks(file, blockoff);
1395 /* Replace at most @len bytes of a virtual block with a newly
1396 * allocated out-of-cache block. The block is marked dirty
1397 * and its data is left uninitialized.
1398 * If the block at @blockoff is not virtual, make it dirty.
1399 * Note that this function may devirtualize less than @len bytes.
1400 * In the worst case only 1 byte at @blockoff will be available.
1402 static int
1403 prepare_modify(struct hed_file *file, blockoff_t *blockoff, size_t len)
1405 struct file_block *block = blockoff->block;
1406 hed_uoff_t block_offset = blockoff->off;
1407 hed_uoff_t remain = block->t.size - block_offset;
1408 struct file_block *newblock;
1410 if (hed_block_is_dirty(block))
1411 return 0;
1413 if (len > remain)
1414 len = remain;
1416 BDEBUG("punch a dirty hole at %llx:%lx into %llx:%llx\n",
1417 block_offset, len,
1418 block_offset(block), block->t.size);
1420 /* Check if we can combine with a neighbouring block */
1421 if ((block_offset == 0 &&
1422 shrink_at_begin(file, block, len, HED_BLOCK_DIRTY)) ||
1423 (remain == len &&
1424 shrink_at_end(file, block, len, HED_BLOCK_DIRTY) >= len)) {
1425 kill_block_if_empty(file, block);
1426 dump_blocks(file);
1427 return 0;
1430 /* Initialize a new block */
1431 newblock = new_block(file, HED_BLOCK_EXCACHE | HED_BLOCK_DIRTY);
1432 if (!newblock)
1433 goto out_err;
1435 /* Allocate data */
1436 if ( (newblock->dataobj = search_data(file, blockoff,
1437 HED_BLOCK_DIRTY)) )
1438 cache_get(newblock->dataobj);
1439 else if (! (newblock->dataobj = block_data_new(file->cache,
1440 FILE_BLOCK_SIZE)) )
1441 goto out_err_free;
1443 newblock->phys_pos = block->phys_pos + block_offset;
1444 newblock->dataoff = FILE_BLOCK_OFF(newblock->phys_pos);
1445 if (len > FILE_BLOCK_SIZE - newblock->dataoff)
1446 len = FILE_BLOCK_SIZE - newblock->dataoff;
1447 newblock->t.size = len;
1449 if (replace_chunk(file, block, block_offset, newblock))
1450 goto out_err_free;
1452 dump_blocks(file);
1453 return 0;
1455 out_err_free:
1456 file_free_block(file, newblock);
1457 out_err:
1458 return -1;
1461 /* Ensure that blockoff points to an up-to-date non-virtual block.
1462 * Load the data from disk if necessary, return 0 on success. */
1463 size_t
1464 hed_prepare_read(struct hed_file *file, const hed_cursor_t *curs, size_t len)
1466 struct file_block *block = curs->block;
1467 if (hed_block_is_inner_virtual(block) &&
1468 devirtualize_clean(file, curs) < 0)
1469 return 0;
1471 return hed_cursor_chunk_len(curs, len);
1474 /* Move the block pointer to the next block */
1475 static struct hed_block *
1476 cursor_next_block(struct hed_file *file, hed_cursor_t *curs)
1478 struct hed_block *block =
1479 next_nonzero_block(file_blocks(file), curs->block);
1481 if (block) {
1482 curs->block = block;
1483 curs->off = 0;
1484 list_move(&curs->list, &block->refs);
1486 return block;
1489 /* This is an optimized way of doing:
1491 * hed_move_relative(blockoff, blockoff->block->t.size);
1493 * for the case when blockoff->off == 0.
1495 static struct hed_block *
1496 move_to_next(struct hed_file *file, hed_cursor_t *curs)
1498 curs->pos += hed_block_size(curs->block);
1499 return cursor_next_block(file, curs);
1502 /* Copy in @count bytes from @pos.
1503 * Returns the number of bytes that were not read (i.e. zero on success).
1504 * The @pos blockoff is moved by the amount of data read.
1505 * CAUTION: If you read up to MAX_BLOCKOFF, then @pos points one byte
1506 * beyond the EOF block upon return.
1508 static size_t
1509 copy_in(struct hed_file *file, void *buf, size_t count, blockoff_t *pos)
1511 size_t cpylen;
1513 pos->pos += count;
1514 while (count && (cpylen = hed_prepare_read(file, pos, count))) {
1515 if (hed_block_is_virtual(pos->block))
1516 memset(buf, 0, cpylen);
1517 else
1518 memcpy(buf, hed_cursor_data(pos), cpylen);
1520 buf += cpylen;
1521 count -= cpylen;
1522 if ( (pos->off += cpylen) >= hed_block_size(pos->block) )
1523 if (!cursor_next_block(file, pos))
1524 break;
1526 pos->pos -= count;
1527 return count;
1530 size_t
1531 hed_file_cpin(struct hed_file *file, void *buf, size_t count,
1532 const hed_cursor_t *pos)
1534 blockoff_t mypos;
1535 size_t ret;
1537 hed_dup_cursor(pos, &mypos);
1538 ret = copy_in(file, buf, count, &mypos);
1539 put_cursor(&mypos);
1540 return ret;
1543 /* Set the modified flag */
1544 static inline void
1545 set_modified(struct hed_file *file)
1547 file->modified = true;
1550 /* Clear the modified flag */
1551 static inline void
1552 clear_modified(struct hed_file *file)
1554 file->modified = false;
1558 hed_file_set_byte(struct hed_file *file, blockoff_t *blockoff,
1559 unsigned char byte)
1561 hed_uoff_t offset = blockoff->pos;
1563 if (prepare_modify(file, blockoff, 1))
1564 return -1;
1565 set_modified(file);
1567 if (offset >= file->size)
1568 file->size = offset + 1;
1570 hed_block_data(blockoff->block)[blockoff->off] = byte;
1571 return 0;
1574 size_t
1575 hed_file_set_block(struct hed_file *file, blockoff_t *blockoff,
1576 unsigned char *buf, size_t len)
1578 while (len) {
1579 size_t span;
1581 if (prepare_modify(file, blockoff, len))
1582 break;
1583 set_modified(file);
1585 span = hed_cursor_chunk_len(blockoff, len);
1586 memcpy(hed_cursor_data(blockoff), buf, span);
1587 buf += span;
1588 len -= span;
1589 move_rel_fast(blockoff, span);
1591 if (blockoff->pos > file->size)
1592 file->size = blockoff->pos;
1594 return len;
1597 hed_uoff_t
1598 hed_file_set_bytes(struct hed_file *file, blockoff_t *blockoff,
1599 unsigned char byte, hed_uoff_t rep)
1601 while (rep) {
1602 size_t span;
1604 if (prepare_modify(file, blockoff, rep))
1605 break;
1606 set_modified(file);
1608 span = hed_cursor_chunk_len(blockoff, rep);
1609 memset(hed_cursor_data(blockoff), byte, span);
1610 rep -= span;
1611 move_rel_fast(blockoff, span);
1613 if (blockoff->pos > file->size)
1614 file->size = blockoff->pos;
1616 return rep;
1619 static int
1620 file_erase_continuous(struct hed_file *file, blockoff_t *blockoff, size_t len)
1622 struct file_block *block = blockoff->block;
1623 hed_uoff_t block_offset = blockoff->off;
1625 /* Find the new position */
1626 hed_move_relative(blockoff, len);
1628 /* Move all other cursors in the erased range to the new position */
1629 assert(len > 0);
1630 move_cursors_abs(block, block_offset,
1631 block_offset + len - 1, blockoff);
1633 if (!block_offset) {
1634 block->dataoff += len;
1635 if (!hed_block_is_inserted(block))
1636 block->phys_pos += len;
1637 } else if (block_offset + len < block->t.size &&
1638 !split_block(file, block, block_offset + len))
1639 return -1;
1641 move_blockoffs(block, block, block_offset, UOFF_MAX, -(hed_uoff_t)len);
1643 block->t.size -= len;
1644 recalc_block_recursive(block);
1646 kill_block_if_empty(file, block);
1647 return 0;
1650 size_t
1651 hed_file_erase_block(struct hed_file *file, blockoff_t *blockoff,
1652 hed_uoff_t len)
1654 hed_uoff_t offset;
1655 hed_uoff_t todo;
1656 struct file_block *eofblock;
1658 offset = blockoff->pos;
1659 if (offset > file_size(file))
1660 return 0;
1661 else if (len > file_size(file) - offset)
1662 len = file_size(file) - offset;
1664 todo = len;
1665 while (todo) {
1666 size_t span = hed_cursor_chunk_len(blockoff, todo);
1668 if (file_erase_continuous(file, blockoff, span))
1669 break;
1671 todo -= span;
1673 len -= todo;
1675 file->size -= len;
1676 set_modified(file);
1678 eofblock = last_block(file_blocks(file));
1679 assert(hed_block_is_virtual(eofblock));
1680 assert(hed_block_is_eof(eofblock));
1681 eofblock->t.size += len;
1682 recalc_block_recursive(eofblock);
1684 struct hed_block *slideblock = prev_block(blockoff->block);
1685 if (hed_block_is_eof(slideblock))
1686 slideblock = blockoff->block;
1687 slide_blockoffs(file, slideblock, blockoff->pos, -len);
1689 return todo;
1693 hed_file_insert_begin(struct hed_file *file, const hed_cursor_t *curs,
1694 hed_cursor_t *curs_ins)
1696 struct file_block *block, *newblock;
1698 BDEBUG("Starting insert at %llx\n", curs->pos);
1700 newblock = new_block(file,
1701 HED_BLOCK_EXCACHE | HED_BLOCK_INSERTED);
1702 if (!newblock)
1703 return -1;
1705 newblock->phys_pos = hed_cursor_phys_pos(curs);
1706 newblock->dataobj = block_data_new(file->cache, FILE_BLOCK_SIZE);
1707 if (!newblock->dataobj) {
1708 file_free_block(file, newblock);
1709 return -1;
1712 block = curs->block;
1713 if (curs->off) {
1714 if (!split_block(file, block, curs->off)) {
1715 file_free_block(file, newblock);
1716 return -1;
1718 } else
1719 block = prev_block(block);
1721 chain_block_after(file_blocks(file), block, newblock);
1723 curs_ins->pos = curs->pos;
1724 curs_ins->off = newblock->t.size;
1725 curs_ins->block = newblock;
1726 list_add(&curs_ins->list, &newblock->refs);
1728 dump_blocks(file);
1729 return 0;
1732 void
1733 hed_file_insert_end(struct hed_file *file, blockoff_t *blockoff_ins)
1735 struct file_block *block = blockoff_ins->block;
1737 BDEBUG("End insert at %llx\n", blockoff_ins->pos);
1739 blockoff_ins->block = NULL;
1740 list_del(&blockoff_ins->list);
1741 if (!kill_block_if_empty(file, block))
1742 block_data_shrink(file->cache, block->dataobj,
1743 block->dataoff + block->t.size);
1745 dump_blocks(file);
1748 static void
1749 insert_block(struct hed_file *file, blockoff_t *blockoff,
1750 unsigned char *buf, size_t len)
1752 struct file_block *block = blockoff->block;
1753 hed_uoff_t offset = blockoff->pos;
1755 assert(file && offset >= 0);
1757 assert(hed_block_is_excache(block));
1758 hed_block_set_dirty(block);
1759 set_modified(file);
1761 memcpy(hed_block_data(block) + blockoff->off, buf, len);
1762 block->t.size += len;
1763 recalc_block_recursive(block);
1764 blockoff->off += len;
1765 blockoff->pos += len;
1767 if (blockoff->pos > file->size)
1768 file->size = blockoff->pos;
1769 else
1770 file->size += len;
1772 slide_blockoffs(file, next_block(block), offset, len);
1775 size_t
1776 hed_file_insert_block(struct hed_file *file, blockoff_t *blockoff,
1777 unsigned char *buf, size_t len)
1779 while (len) {
1780 struct file_block *block = blockoff->block;
1781 size_t remain = block->dataobj->size - blockoff->off;
1783 if (!remain) {
1784 list_del(&blockoff->list);
1785 blockoff->block = next_block(block);
1786 blockoff->off = 0;
1788 if (!hed_file_insert_begin(file, blockoff, blockoff))
1789 continue;
1791 blockoff->block = block;
1792 blockoff->off = block->t.size;
1793 list_add(&blockoff->list, &block->refs);
1794 break;
1797 if (remain > len)
1798 remain = len;
1800 BDEBUG("Append %ld bytes to the insert block\n",
1801 (long) remain);
1802 insert_block(file, blockoff, buf, remain);
1803 buf += remain;
1804 len -= remain;
1806 return len;
1810 hed_file_insert_byte(struct hed_file *file, blockoff_t *blockoff,
1811 unsigned char byte)
1813 return hed_file_insert_block(file, blockoff, &byte, 1);
1816 size_t
1817 hed_file_insert_once(struct hed_file *file, blockoff_t *blockoff,
1818 unsigned char *buf, size_t len)
1820 blockoff_t insert;
1822 if (!hed_file_insert_begin(file, blockoff, &insert)) {
1823 len = hed_file_insert_block(file, &insert, buf, len);
1824 hed_file_insert_end(file, &insert);
1826 return len;
1829 struct commit_control {
1830 struct hed_file *file;
1831 int wfd; /* file descriptor for writing */
1832 int needwrite; /* non-zero if write is needed */
1833 blockoff_t begoff, endoff;
1834 hed_off_t shift;
1835 void *partbuf; /* allocated 3*FILE_BLOCK_SIZE */
1836 void *partial; /* pointer into partbuf */
1839 /* Get the logical<->physical shift value after the specified block.
1840 * It is essential to get the value _AFTER_ the block, because the
1841 * shift value is used to decide how the current block will be handled.
1843 static hed_off_t
1844 get_shift(const blockoff_t *blockoff)
1846 struct file_block *block = blockoff->block;
1847 size_t curshift = hed_block_is_inserted(block) ? block->t.size : 0;
1848 return curshift +
1849 blockoff->pos - blockoff->off - block->phys_pos;
1852 static void
1853 switch_partial(struct commit_control *cc)
1855 cc->partial += FILE_BLOCK_SIZE;
1856 if (cc->partial >= cc->partbuf + 3*FILE_BLOCK_SIZE)
1857 cc->partial = cc->partbuf;
1860 /* Write @writelen bytes from the partial buffer at @cc->begoff. */
1861 static int
1862 commit_block(struct commit_control *cc, size_t len)
1864 ssize_t written;
1866 assert(len > 0);
1867 BDEBUG(" -> write %lx bytes at %llx\n",
1868 (unsigned long)len, cc->begoff.pos - len);
1869 written = pwrite(cc->wfd, cc->partial, len, cc->begoff.pos - len);
1870 if (written < len)
1871 /* TODO: keep data in a new list of dirty blocks */
1872 return -1;
1873 return 0;
1876 static int
1877 commit_partial(struct commit_control *cc)
1879 size_t partoff, remain, left;
1880 size_t writelen;
1882 partoff = FILE_BLOCK_OFF(cc->begoff.pos);
1883 remain = FILE_BLOCK_SIZE - partoff;
1884 if (remain > cc->endoff.pos - cc->begoff.pos)
1885 remain = cc->endoff.pos - cc->begoff.pos;
1886 if ((writelen = partoff + remain) == 0)
1887 return 0;
1889 BDEBUG("Fill partial %llx-%llx\n",
1890 cc->begoff.pos, cc->begoff.pos + remain);
1892 left = copy_in(cc->file, cc->partial + partoff, remain, &cc->begoff);
1893 if (left) {
1894 hed_move_relative(&cc->begoff, left);
1895 return -1;
1898 if (FILE_BLOCK_OFF(cc->begoff.pos) &&
1899 !hed_block_is_eof(cc->begoff.block))
1900 return 0;
1902 return commit_block(cc, writelen);
1905 /* Commit forwards.
1906 * Beware, cc->begoff is undefined upon return!
1908 static int
1909 commit_forwards(struct commit_control *cc)
1911 hed_uoff_t endpos = cc->endoff.pos;
1912 int ret = 0;
1914 BDEBUG("Writing forwards %llx-%llx\n",
1915 cc->begoff.pos, cc->endoff.pos);
1917 if (!cc->needwrite)
1918 return 0;
1920 while (cc->begoff.pos < endpos)
1921 ret |= commit_partial(cc);
1923 return ret;
1926 /* Commit forwards.
1927 * Beware, cc->begoff is undefined upon return!
1929 static int
1930 commit_backwards(struct commit_control *cc)
1932 void *retpartial = cc->partial;
1933 hed_uoff_t begpos = cc->begoff.pos;
1934 hed_uoff_t blkpos; /* start of current partial block */
1935 int ret = 0;
1937 BDEBUG("Writing backwards %llx-%llx\n",
1938 cc->begoff.pos, cc->endoff.pos);
1940 if (!cc->needwrite)
1941 return 0;
1943 blkpos = FILE_BLOCK_ROUND(cc->endoff.pos);
1944 if (blkpos <= begpos)
1945 goto final;
1947 /* Handle the trailing partial block */
1948 hed_update_cursor(cc->file, blkpos, &cc->begoff);
1949 switch_partial(cc);
1950 ret |= commit_partial(cc);
1951 retpartial = cc->partial;
1953 /* Handle the middle part */
1954 switch_partial(cc);
1955 while ( (blkpos -= FILE_BLOCK_SIZE) > begpos) {
1956 hed_update_cursor(cc->file, blkpos, &cc->begoff);
1957 ret |= commit_partial(cc);
1959 switch_partial(cc); /* wrap around */
1961 final:
1962 /* Handle the first block (partiall or not) */
1963 hed_update_cursor(cc->file, begpos, &cc->begoff);
1964 ret |= commit_partial(cc);
1966 cc->partial = retpartial;
1967 return ret;
1970 /* Handle the partial block before a skipped one. */
1971 static int
1972 begin_skip(struct commit_control *cc)
1974 size_t minsize = FILE_BLOCK_SIZE - FILE_BLOCK_OFF(cc->endoff.pos);
1975 size_t remain;
1976 int ret = 0;
1978 /* Check if at least one complete physical block can be skipped */
1979 if (cc->endoff.block->t.size < minsize)
1980 return 0;
1982 /* Write out the partially dirty block */
1983 remain = FILE_BLOCK_OFF(minsize);
1984 hed_move_relative(&cc->endoff, remain);
1985 if (cc->shift <= 0)
1986 ret |= commit_forwards(cc);
1987 else
1988 ret |= commit_backwards(cc);
1989 hed_move_relative(&cc->endoff, -(hed_off_t)remain);
1990 hed_dup2_cursor(&cc->endoff, &cc->begoff);
1992 cc->needwrite = 0;
1993 return ret;
1996 /* Handle the last partially skipped physical block. */
1997 static int
1998 end_skip(struct commit_control *cc)
2000 size_t partlen;
2001 int ret = 0;
2003 /* Find the beginning of the physical block */
2004 hed_dup2_cursor(&cc->endoff, &cc->begoff);
2005 partlen = FILE_BLOCK_OFF(cc->begoff.pos);
2006 hed_move_relative(&cc->begoff, -(hed_off_t)partlen);
2008 /* Read the partial data before this block */
2009 if (hed_file_cpin(cc->file, cc->partial, partlen, &cc->begoff))
2010 ret = -1;
2012 cc->needwrite = 1;
2013 return ret;
2016 static void
2017 undirty_blocks(struct hed_file *file)
2019 struct file_block *block, *next;
2020 hed_uoff_t pos = 0;
2022 BDEBUG("Undirtying blocks:\n");
2023 dump_blocks(file);
2025 next = first_block(file_blocks(file));
2026 while (!hed_block_is_eof(next)) {
2027 block = next;
2028 next = next_block(block);
2030 if (kill_block_if_empty(file, block))
2031 continue;
2033 if (!hed_block_is_virtual(block)) {
2034 cache_put(file->cache, block->dataobj);
2035 block->dataobj = NULL;
2036 list_del_init(&block->lru);
2037 block->flags = HED_BLOCK_EXCACHE | HED_BLOCK_VIRTUAL;
2040 block->phys_pos = pos;
2041 pos += block->t.size;
2044 block = first_block(file_blocks(file));
2045 while (!hed_block_is_eof(block)) {
2046 next = next_block(block);
2047 file_kill_block(file, block);
2048 block = next;
2051 BDEBUG("After undirtying\n");
2052 dump_blocks(file);
2055 static int
2056 commit_init(struct commit_control *cc, struct hed_file *file)
2058 cc->file = file;
2060 cc->partbuf = malloc(3*FILE_BLOCK_SIZE);
2061 if (!cc->partbuf)
2062 goto err;
2064 cc->wfd = open(file->name,
2065 O_RDWR | (file->fd < 0 ? O_CREAT : 0), 0666);
2066 if (cc->wfd < 0)
2067 goto err_free;
2069 if (file->fd < 0 &&
2070 (file->fd = open(file->name, O_RDONLY)) < 0)
2071 goto err_close;
2073 return 0;
2075 err_close:
2076 close(cc->wfd);
2077 err_free:
2078 free(cc->partbuf);
2079 err:
2080 return -1;
2084 hed_file_commit(struct hed_file *file)
2086 struct commit_control cc;
2087 int ret = 0;
2089 if (commit_init(&cc, file))
2090 return -1;
2092 dump_blocks(file);
2094 cc.partial = cc.partbuf;
2095 get_cursor(file, 0,&cc.begoff);
2096 hed_dup_cursor(&cc.begoff, &cc.endoff);
2097 cc.shift = -cc.begoff.block->phys_pos;
2098 cc.needwrite = 0;
2099 while(!hed_block_is_eof(cc.endoff.block)) {
2100 hed_off_t newshift = cc.endoff.pos < file->phys_size
2101 ? get_shift(&cc.endoff)
2102 : 0;
2104 if (cc.shift <= 0 && newshift > 0) {
2105 ret |= commit_forwards(&cc);
2106 hed_dup2_cursor(&cc.endoff, &cc.begoff);
2107 } else if (cc.shift > 0 && newshift <= 0) {
2108 ret |= commit_backwards(&cc);
2109 hed_dup2_cursor(&cc.endoff, &cc.begoff);
2111 cc.shift = newshift;
2113 if (!newshift && !hed_block_is_dirty(cc.endoff.block)) {
2114 if (cc.needwrite)
2115 ret |= begin_skip(&cc);
2116 } else if (!cc.needwrite)
2117 ret |= end_skip(&cc);
2119 if (!move_to_next(file, &cc.endoff))
2120 break;
2122 assert(cc.endoff.pos == file_size(file));
2124 if (cc.begoff.pos < file_size(file)) {
2125 if (cc.shift <= 0)
2126 ret |= commit_forwards(&cc);
2127 else
2128 ret |= commit_backwards(&cc);
2131 put_cursor(&cc.begoff);
2132 put_cursor(&cc.endoff);
2134 ftruncate(cc.wfd, file_size(file));
2135 file->phys_size = file_size(file);
2137 ret |= close(cc.wfd);
2138 free(cc.partbuf);
2140 undirty_blocks(file);
2142 if (!ret)
2143 clear_modified(file);
2145 return ret;
2148 #ifdef HED_CONFIG_SWAP
2150 hed_file_write_swap(struct hed_file *file)
2152 return swp_write(file_swp(file));
2155 static int
2156 do_read_swap(struct hed_file *file, struct swp_file *swp, blockoff_t *pos)
2158 struct hed_file *swpfile = swp_private(swp);
2159 struct file_block *cur, block;
2160 hed_uoff_t phys_pos;
2162 if (file_stat(swpfile)->st_size != file_stat(file)->st_size ||
2163 file_stat(swpfile)->st_mtime != file_stat(file)->st_mtime) {
2164 fprintf(stderr, "stat info mismatch (you modified the file since hed ran on it; refusing to touch it)\n");
2165 return -1;
2168 BDEBUG("Swap header match\n");
2170 phys_pos = 0;
2171 cur = first_block(file_blocks(swpfile));
2172 do {
2173 struct hed_block_data dataobj;
2174 size_t (*mergefn)(struct hed_file*, blockoff_t*,
2175 unsigned char*, size_t);
2176 void *data;
2177 size_t res;
2179 if (swp_cpin(swp, &block, cur, sizeof(struct file_block))) {
2180 perror("Cannot read block descriptor");
2181 return -1;
2183 BDEBUG("BLOCK %p: flags %02lx phys 0x%02llx size 0x%llx\n",
2184 cur, block.flags, (long long)block.phys_pos,
2185 (long long)hed_block_size(&block));
2187 if (block.phys_pos - phys_pos) {
2188 if (hed_file_erase_block(file, pos,
2189 block.phys_pos - phys_pos)) {
2190 perror("Cannot erase");
2191 return -1;
2193 phys_pos = block.phys_pos;
2196 if (!hed_block_is_inserted(&block))
2197 phys_pos += hed_block_size(&block);
2199 if (!hed_block_is_dirty(&block)) {
2200 hed_move_relative(pos, hed_block_size(&block));
2201 continue;
2204 if (swp_cpin(swp, &dataobj, block.dataobj,
2205 sizeof(struct hed_block_data))) {
2206 perror("Cannot read data descriptor");
2207 return -1;
2209 BDEBUG("DATA %p: size 0x%lx\n",
2210 block.dataobj, (long)dataobj.size);
2212 if (! (data = malloc(hed_block_size(&block))) ) {
2213 perror("Cannot allocate data");
2214 return -1;
2217 if (swp_cpin(swp, data, dataobj.data + block.dataoff,
2218 hed_block_size(&block))) {
2219 perror("Cannot read data");
2220 return -1;
2223 mergefn = hed_block_is_inserted(&block)
2224 ? hed_file_insert_once
2225 : hed_file_set_block;
2226 res = mergefn(file, pos, data, hed_block_size(&block));
2227 free(data);
2228 if (res) {
2229 perror("Cannot merge data");
2230 return -1;
2232 } while (cur = next_block(&block), !hed_block_is_eof(&block));
2234 dump_blocks(file);
2235 return 0;
2239 hed_file_read_swap(struct hed_file *file)
2241 struct swp_file *swp;
2242 blockoff_t pos;
2243 int ret;
2245 if (! (swp = swp_init_read(file->swpname)) )
2246 return -1;
2248 get_cursor(file, 0, &pos);
2249 ret = do_read_swap(file, swp, &pos);
2250 put_cursor(&pos);
2252 swp_done(swp);
2253 return ret;
2256 #endif /* HED_CONFIG_SWAP */
2258 struct ffb_hookdata {
2259 struct hed_file *file;
2260 blockoff_t *pos;
2261 hed_expr_reg_cb base_ecb;
2262 void *base_ecb_data;
2265 static long
2266 eval_reg_cb(void *hookdata, char reg, hed_off_t ofs,
2267 unsigned char *scramble, size_t len)
2269 struct ffb_hookdata *data = hookdata;
2270 if (reg == '.') {
2271 blockoff_t pos;
2272 long ret = HED_AEF_DYNAMIC;
2274 hed_dup_cursor(data->pos, &pos);
2275 hed_move_relative(&pos, ofs);
2276 if (copy_in(data->file, scramble, len, &pos))
2277 ret = HED_AEF_ERROR;
2278 put_cursor(&pos);
2279 return ret;
2282 return data->base_ecb(data->base_ecb_data, reg, ofs, scramble, len);
2285 static void
2286 reverse(unsigned char *p, size_t len)
2288 unsigned char *q = p + len;
2289 while (p < q) {
2290 unsigned char x = *p;
2291 *p++ = *--q;
2292 *q = x;
2296 static void
2297 compute_badchar(ssize_t *badchar, const unsigned char *s, ssize_t len)
2299 size_t i = 1;
2300 while (i < len)
2301 badchar[*s++] = i++;
2304 static void
2305 compute_sfx(ssize_t *sfx, const unsigned char *s, ssize_t len)
2307 ssize_t f, g, i;
2309 sfx[len - 1] = len;
2310 g = len - 1;
2311 for (i = len - 2; i >= 0; --i) {
2312 if (i > g && sfx[i + len - 1 - f] < i - g)
2313 sfx[i] = sfx[i + len - 1 - f];
2314 else {
2315 if (i < g)
2316 g = i;
2317 f = i;
2318 while (g >= 0 && s[g] == s[g + len - 1 - f])
2319 --g;
2320 sfx[i] = f - g;
2325 static void
2326 compute_goodsfx(ssize_t *goodsfx, const unsigned char *s, ssize_t len)
2328 ssize_t i, j, *sfx = goodsfx + len;
2330 compute_sfx(sfx, s, len);
2332 for (i = 0; i < len; ++i)
2333 goodsfx[i] = len;
2334 j = 0;
2335 for (i = len - 1; i >= 0; --i)
2336 if (sfx[i] == i + 1)
2337 for (; j < len - 1 - i; ++j)
2338 if (goodsfx[j] == len)
2339 goodsfx[j] = len - 1 - i;
2340 for (i = 0; i <= len - 2; ++i)
2341 goodsfx[len - 1 - sfx[i]] = len - 1 - i;
2344 /* Search for a constant byte string using the Boyer-Moore algorithm. */
2345 static inline unsigned char*
2346 bm_find(unsigned char *buf, size_t buflen, unsigned char *needle,
2347 size_t maxidx, ssize_t *badchar, ssize_t *goodsfx)
2349 while (buflen > maxidx) {
2350 unsigned char *p;
2351 size_t i;
2352 ssize_t shift;
2354 for (p = buf + maxidx, i = maxidx; p >= buf; --p, --i)
2355 if (needle[i] != *p)
2356 break;
2357 if (p < buf)
2358 return buf;
2360 shift = i + 1 - badchar[*p];
2361 if (shift < goodsfx[i])
2362 shift = goodsfx[i];
2364 buf += shift;
2365 buflen -= shift;
2367 return NULL;
2370 /* Search for a constant byte string backwards. */
2371 static inline unsigned char*
2372 bm_find_rev(unsigned char *buf, size_t buflen, unsigned char *needle,
2373 size_t maxidx, ssize_t *badchar, ssize_t *goodsfx)
2375 buf -= maxidx;
2376 while (buflen > maxidx) {
2377 unsigned char *p;
2378 size_t i;
2379 ssize_t shift;
2381 for (p = buf, i = maxidx; p <= buf + maxidx; ++p, --i)
2382 if (needle[i] != *p)
2383 break;
2384 if (p > buf + maxidx)
2385 return buf;
2387 shift = i + 1 - badchar[*p];
2388 if (shift < goodsfx[i])
2389 shift = goodsfx[i];
2391 buf -= shift;
2392 buflen -= shift;
2394 return NULL;
2397 /* Search for a constant byte string in @buf.
2398 * If @buflen is negative, search backwards, otherwise search forwards.
2400 static inline unsigned char*
2401 find_bytestr_buf(unsigned char *buf, ssize_t buflen,
2402 unsigned char *needle, ssize_t maxidx,
2403 ssize_t *badchar, ssize_t *goodsfx)
2405 if (buflen < 0) {
2406 buflen = -buflen;
2407 if (!maxidx)
2408 return memrchr(buf - buflen + 1, *needle, buflen);
2409 return bm_find_rev(buf, buflen, needle, maxidx,
2410 badchar, goodsfx);
2411 } else {
2412 if (!maxidx)
2413 return memchr(buf, *needle, buflen);
2414 return bm_find(buf, buflen, needle, maxidx,
2415 badchar, goodsfx);
2419 /* Search for a constant byte string using the Boyer-Moore algorithm. */
2420 static int
2421 find_bytestr(struct hed_file *file, blockoff_t *from, int dir,
2422 unsigned char *needle, ssize_t len)
2424 void *dynalloc;
2425 ssize_t *badchar, *goodsfx;
2426 unsigned char *readbuf;
2427 ssize_t slen;
2428 int ret;
2430 if (len > 1) {
2431 dynalloc = calloc(sizeof(ssize_t) * (256 + 2*len)
2432 + 2*(len-1), 1);
2433 if (!dynalloc)
2434 return HED_FINDOFF_ERROR;
2435 badchar = dynalloc;
2436 goodsfx = badchar + 256;
2437 readbuf = dynalloc + sizeof(ssize_t) * (256 + 2*len);
2439 if (dir < 0)
2440 reverse(needle, len);
2441 compute_badchar(badchar, needle, len);
2442 compute_goodsfx(goodsfx, needle, len);
2443 } else {
2444 dynalloc = NULL;
2445 badchar = goodsfx = NULL;
2446 readbuf = NULL;
2449 --len; /* simplify offset computing */
2450 if (dir < 0) {
2451 from->off += len;
2452 from->pos += len;
2453 slen = -len;
2454 } else
2455 slen = len;
2457 ret = HED_FINDOFF_NO_MATCH;
2458 while (from->pos >= 0) {
2459 ssize_t remain;
2461 fixup_blockoff(from);
2462 if (hed_block_is_eof(from->block))
2463 break;
2465 remain = hed_prepare_read(file, from, SSIZE_MAX);
2466 if (!remain) {
2467 ret = HED_FINDOFF_ERROR;
2468 break;
2470 if (dir < 0)
2471 remain = -(from->off + 1);
2473 if (!hed_block_is_bad(from->block)) {
2474 unsigned char *p, *q;
2476 if ((dir >= 0 && remain > slen) ||
2477 (dir < 0 && remain < slen)) {
2478 assert(!hed_block_is_virtual(from->block));
2479 assert(from->block->dataobj);
2480 p = from->block->dataobj->data + from->off;
2481 from->off += remain;
2482 from->pos += remain;
2483 } else if (dir >= 0) {
2484 remain += slen;
2485 if (copy_in(file, readbuf, remain, from)) {
2486 ret = HED_FINDOFF_ERROR;
2487 break;
2489 p = readbuf;
2490 } else {
2491 remain += slen;
2492 from->off += remain + 1;
2493 from->pos += remain + 1;
2494 if (from->pos < 0)
2495 break;
2496 fixup_blockoff_slow(from);
2497 if (copy_in(file, readbuf, -remain, from)) {
2498 ret = HED_FINDOFF_ERROR;
2499 break;
2501 from->off -= -remain + 1;
2502 from->pos -= -remain + 1;
2503 p = readbuf + (-remain - 1);
2506 q = find_bytestr_buf(p, remain, needle, len,
2507 badchar, goodsfx);
2508 if (q) {
2509 move_rel_fast(from, q - p - remain);
2510 ret = 0;
2511 break;
2513 from->off -= slen;
2514 from->pos -= slen;
2515 } else {
2516 /* bad blocks cannot match anything */
2517 from->off += remain;
2518 from->pos += remain;
2522 if (dynalloc)
2523 free(dynalloc);
2524 return ret;
2527 static int
2528 find_expr(struct hed_file *file, blockoff_t *from, int dir,
2529 struct hed_expr *expr, struct ffb_hookdata *data)
2531 int len = hed_expr_len(expr);
2532 unsigned char *buf;
2534 assert(len > 0);
2536 if (len > file_size(file))
2537 return HED_FINDOFF_NO_MATCH;
2538 if ((hed_off_t)file_size(file) - from->pos - len < 0)
2539 hed_move_relative(from,
2540 (hed_off_t)file_size(file) - from->pos - len);
2542 for (;;) {
2543 blockoff_t match;
2544 size_t remain;
2545 unsigned char *p;
2546 int pos;
2548 buf = hed_expr_eval(expr, eval_reg_cb, NULL, data);
2549 if (!buf)
2550 return HED_FINDOFF_ERROR;
2552 hed_dup_cursor(from, &match);
2553 remain = 0;
2554 for (pos = 0; pos < len; pos++) {
2555 if (!remain) {
2556 remain = hed_prepare_read(file, &match,
2557 SIZE_MAX);
2558 if (!remain ||
2559 hed_block_is_bad(match.block))
2560 break;
2561 p = hed_cursor_data(&match);
2562 cursor_next_block(file, &match);
2564 if (*p++ != buf[pos])
2565 break;
2566 remain--;
2568 put_cursor(&match);
2570 if (pos == len)
2571 return 0;
2572 if (!remain)
2573 return HED_FINDOFF_ERROR;
2575 from->pos += dir;
2576 from->off += dir;
2577 if (0 > from->pos || from->pos > file_size(file) - len)
2578 break;
2579 fixup_blockoff(from);
2581 if (! (hed_expr_flags(expr) & HED_AEF_DYNAMIC) )
2582 return find_bytestr(file, from, dir, buf, len);
2585 return HED_FINDOFF_NO_MATCH;
2589 hed_file_find_expr(struct hed_file *file, blockoff_t *pos, int dir,
2590 struct hed_expr *expr,
2591 hed_expr_reg_cb expr_cb, void *expr_cb_data)
2593 struct ffb_hookdata data;
2594 int res;
2596 assert(dir == 1 || dir == -1);
2598 data.file = file;
2599 data.pos = pos;
2600 data.base_ecb = expr_cb;
2601 data.base_ecb_data = expr_cb_data;
2603 hed_file_set_readahead(file,
2604 dir > 0 ? HED_RA_FORWARD : HED_RA_BACKWARD);
2605 res = find_expr(file, pos, dir, expr, &data);
2606 hed_file_set_readahead(file, HED_RA_NONE);
2608 return res;