Fix search for first block in load_blocks
[hed.git] / libhed / file.c
blob0d137f268df905c7edadcfdafc4721fa961cd70b
1 /* $Id$ */
3 /*
4 * hed - Hexadecimal editor
5 * Copyright (C) 2004 Petr Baudis <pasky@ucw.cz>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of version 2 of the GNU General Public License as
9 * published by the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * There hammer on the anvil smote,
23 * There chisel clove, and graver wrote;
24 * There forged was blade, and bound was hilt;
25 * The delver mined, the mason built.
26 * There beryl, pearl, and opal pale,
27 * And metal wrought like fishes' mail,
28 * Buckler and corslet, axe and sword,
29 * And shining spears were laid in hoard.
32 /* Feature macros needed for:
33 * - memrchr
34 * - pread, pwrite
35 * - stpcpy
37 #define _GNU_SOURCE
39 #include <config.h>
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <limits.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <sys/ioctl.h>
48 #include <unistd.h>
49 #include <linux/fs.h> /* BLKGETSIZE and BLKGETSIZE64 */
51 #include "file.h"
52 #include "access.h"
53 #include "cache.h"
54 #include "swap.h"
55 #include "tree.h"
56 #include "expr.h"
58 /* memrchr() might not be available */
59 #ifndef HAVE_MEMRCHR
60 # include "memrchr.c"
61 #endif /* HAVE_MEMRCHR */
64 * `Piles of jewels?' said Gandalf. `No. The Orcs have often plundered Moria;
65 * there is nothing left in the upper halls. And since the dwarves fled, no one
66 * dares to seek the shafts and treasuries down in the deep places: they are
67 * drowned in water--or in a shadow of fear.'
70 /* TODO: Currently the file blocks allocation is not very sophisticated and
71 * when the weather is bad it could probably have rather horrible results. */
73 #undef BLOCKS_DEBUG
74 #ifdef BLOCKS_DEBUG
75 #define BDEBUG(x...) fprintf(stderr, x)
76 #else
77 #define BDEBUG(x...)
78 #endif
80 /* Number of blocks in cache */
81 #define CACHE_LENGTH 64
83 /* Blocks for readahead */
84 #define FILE_READAHEAD (CACHE_LENGTH/2)
86 #define file_block hed_block
87 #define blockoff_t hed_cursor_t
89 #define null_block(tree) (tree_entry(null_node(tree),struct file_block,t))
91 #define first_block(tree) (tree_entry(first_in_tree(tree),struct file_block,t))
92 #define prev_block(b) (tree_entry(prev_in_tree(&(b)->t),struct file_block,t))
93 #define next_block(b) (tree_entry(next_in_tree(&(b)->t),struct file_block,t))
94 #define last_block(tree) (tree_entry(last_in_tree(tree),struct file_block,t))
96 /* false if this is not a block - out of list bounds */
97 #define is_a_block(tree,b) (&(b)->t != null_node(tree))
99 #define block_offset(tree,b) tree_block_offset((tree), &(b)->t)
101 #define recalc_block_recursive(tree,b) recalc_node_recursive((tree),&(b)->t)
103 #define chain_block(tree,b) append_to_tree((tree), &(b)->t)
104 #define recalc_chain_block(tree,b) do { \
105 chain_block((tree), (b)); \
106 recalc_block_recursive((tree), (b)); \
107 } while (0)
109 #define chain_block_after(tree,p,b) insert_into_tree((tree), &(b)->t, &(p)->t)
111 #define recalc_chain_block_after(tree,p,b) do { \
112 chain_block_after((tree), (p), (b)); \
113 recalc_block_recursive((tree), (b)); \
114 } while (0)
116 #define unchain_block(tree,b) del_from_tree((tree), &(b)->t)
117 #define recalc_unchain_block(tree,b) recalc_del_from_tree((tree), &(b)->t)
119 #define init_block_list(tree) init_tree(tree)
120 #define init_block_link(b) init_node(&(b)->t)
122 #define foreach_block(b,tree) foreach_in_tree ((b),(tree),struct file_block,t)
123 #define foreachsafe_block(b,n,tree) foreachsafe_in_tree ((b),(n),(tree),struct file_block,t)
125 #define find_block(tree,o) tree_entry(find_in_tree((tree),(o)),struct file_block,t)
127 #define file_size hed_file_size
128 #define file_blocks hed_file_blocks
130 #ifdef HED_CONFIG_SWAP
132 /* Return the swp file object */
133 static inline struct swp_file *
134 file_swp(struct hed_file *file)
136 return file->swp;
139 #else
141 /* Provide a stub for the non-swap case */
142 static inline void *
143 file_swp(struct hed_file *file)
145 return NULL;
148 #endif
150 #ifdef HED_CONFIG_READAHEAD
152 #define file_ra_none(f) ((f)->readahead == HED_RA_NONE)
153 #define file_ra_forward(f) ((f)->readahead == HED_RA_FORWARD)
154 #define file_ra_backward(f) ((f)->readahead == HED_RA_BACKWARD)
156 #else
158 #define file_ra_none(f) (1)
159 #define file_ra_forward(f) (0)
160 #define file_ra_backward(f) (0)
162 #endif /* HED_CONFIG_READAHEAD */
164 static struct hed_block *
165 next_nonzero_block(const struct hed_tree *tree, struct hed_block *block)
167 while (is_a_block(tree, block = next_block(block)))
168 if (hed_block_size(block))
169 return block;
170 return NULL;
173 static struct hed_block *
174 prev_nonzero_block(const struct hed_tree *tree, struct hed_block *block)
176 while (is_a_block(tree, block = prev_block(block)))
177 if (hed_block_size(block))
178 return block;
179 return NULL;
182 bool
183 hed_block_is_after_erase(const struct hed_tree *tree, struct hed_block *block)
185 struct hed_block *prev = prev_nonzero_block(tree, block);
186 if (prev) {
187 hed_uoff_t prevend = prev->phys_pos;
188 if (!hed_block_is_inserted(prev))
189 prevend += prev->t.size;
190 return block->phys_pos > prevend;
191 } else
192 return block->phys_pos;
195 bool
196 hed_block_is_after_insert(const struct hed_tree *tree, struct hed_block *block)
198 struct hed_block *prev = prev_nonzero_block(tree, block);
199 return prev && hed_block_is_inserted(prev);
202 #ifndef BLOCKS_DEBUG
203 # define dump_blocks(file) {}
204 #else
206 static hed_uoff_t
207 block_phys_size(struct hed_file *file, struct file_block *block)
209 struct file_block *next;
211 next = next_block(block);
212 if (!is_a_block(file_blocks(file), next)) {
213 assert(hed_block_is_virtual(block));
214 return 0;
217 return next->phys_pos - block->phys_pos;
220 static void
221 dump_block(int level, struct hed_file *file, struct hed_tree_head *node,
222 hed_uoff_t *cur_offset, hed_uoff_t *cur_poffset)
224 struct hed_block *block = tree_entry(node, struct hed_block, t);
225 bool virtual = hed_block_is_virtual(block);
226 unsigned char *p;
227 hed_cursor_t *cur;
228 char t[21] = " ";
230 if (is_a_node(file_blocks(file), node->left))
231 dump_block(level + 1, file, node->left, cur_offset, cur_poffset);
232 p = hed_block_data(block);
233 if (level < 20) t[level] = '>'; else t[19] = '.';
234 fprintf(stderr, "%s [%06llx] [%06llx] %c%c%c %05llx %05llx"
235 " {%02x%02x%02x%02x} -- %p ^%p [%06llx]\n",
237 (unsigned long long) *cur_offset,
238 (unsigned long long) *cur_poffset,
239 virtual ? 'v' : ' ',
240 hed_block_is_inserted(block) ? 'i' : ' ',
241 hed_block_is_dirty(block) ? '*' : ' ',
242 (unsigned long long) node->size,
243 (unsigned long long) block_phys_size(file, block),
244 p && block->t.size > 0 ? p[0] : 0,
245 p && block->t.size > 1 ? p[1] : 0,
246 p && block->t.size > 2 ? p[2] : 0,
247 p && block->t.size > 3 ? p[3] : 0,
248 node,
249 is_a_node(file_blocks(file), node->up) ? node->up : NULL,
250 (unsigned long long) node->cover_size
252 list_for_each_entry (cur, &block->refs, list) {
253 fprintf(stderr, " <%p>: %llx->%p:%llx\n",
254 cur, (long long)cur->pos,
255 cur->block, (unsigned long long)cur->off);
257 assert(*cur_poffset == block->phys_pos);
258 *cur_offset += node->size;
259 *cur_poffset += block_phys_size(file, block);
260 if (is_a_node(file_blocks(file), node->right))
261 dump_block(level + 1, file, node->right, cur_offset, cur_poffset);
262 assert(node->cover_size == (is_a_node(file_blocks(file), node->left) ? node->left->cover_size : 0)
263 + (is_a_node(file_blocks(file), node->right) ? node->right->cover_size : 0)
264 + node->size);
267 /* Walk the tree manually here, because foreach_block() does not provide
268 * the tree structure.
269 * TODO: Change this if you plan to debug any other block containers.
271 static void
272 dump_blocks(struct hed_file *file)
274 struct file_block *first = first_block(file_blocks(file));
275 hed_uoff_t cur_offset, cur_poffset;
277 fprintf(stderr, "-- blocks dump --\n");
278 if (is_a_block(file_blocks(file), first)) {
279 cur_offset = 0;
280 cur_poffset = first->phys_pos;
281 dump_block(0, file, file_blocks(file)->root,
282 &cur_offset, &cur_poffset);
284 fprintf(stderr, "-- blocks dump end --\n");
286 #endif
288 static void
289 get_cursor(struct hed_file *file, hed_uoff_t offset, hed_cursor_t *curs)
291 struct file_block *block;
293 block = find_block(file_blocks(file), offset);
294 assert(is_a_block(file_blocks(file), block));
295 curs->pos = offset;
296 curs->block = block;
297 curs->off = offset - block_offset(file_blocks(file), block);
298 list_add(&curs->list, &block->refs);
300 BDEBUG("Mapped %llx to %llx+%llx/%llx\n",
301 offset, offset - curs->off, curs->off, block->t.size);
304 static inline void
305 put_cursor(hed_cursor_t *curs)
307 list_del(&curs->list);
310 void
311 hed_get_cursor(struct hed_file *file, hed_uoff_t offset, hed_cursor_t *curs)
313 if (hed_is_a_cursor(curs))
314 put_cursor(curs);
315 get_cursor(file, offset, curs);
318 void
319 hed_put_cursor(hed_cursor_t *curs)
321 put_cursor(curs);
324 void
325 hed_dup_cursor(const hed_cursor_t *src, hed_cursor_t *dst)
327 dst->pos = src->pos;
328 dst->block = src->block;
329 dst->off = src->off;
330 list_add_tail(&dst->list, &src->block->refs);
333 void
334 hed_dup2_cursor(const hed_cursor_t *src, hed_cursor_t *dst)
336 if (hed_is_a_cursor(dst))
337 put_cursor(dst);
338 hed_dup_cursor(src, dst);
341 /* Move blockoff's from @old to @new, adding @off to their block
342 * offsets to keep them at the same position. */
343 static void
344 update_blockoffs(const struct file_block *old, struct file_block *new,
345 hed_off_t off)
347 blockoff_t *blockoff;
349 BDEBUG("Updating blockoffs from <%p> to <%p>%c%llx\n",
350 old, new, off >= 0 ? '+' : '-', off >= 0 ? off : -off);
352 list_for_each_entry(blockoff, &old->refs, list) {
353 blockoff->block = new;
354 blockoff->off += off;
358 /* Move blockoff's in the range <@start;@end> from @old to @new,
359 * adding @off to their block offset, plus moving the reference list. */
360 static void
361 move_blockoffs(const struct file_block *old, struct file_block *new,
362 hed_uoff_t start, hed_uoff_t end, hed_off_t off)
364 blockoff_t *blockoff, *nextoff;
366 BDEBUG("Moving blockoffs from <%p>:%llx:%llx to <%p>%c%llx\n",
367 old, start, end, new,
368 off >= 0 ? '+' : '-', off >= 0 ? off : -off);
370 list_for_each_entry_safe(blockoff, nextoff, &old->refs, list)
371 if (blockoff->off >= start && blockoff->off <= end) {
372 blockoff->block = new;
373 blockoff->off += off;
374 list_move(&blockoff->list, &new->refs);
378 /* Move cursors in the range @block:<@start;@end> to @newpos */
379 static void
380 move_cursors_abs(const struct file_block *block,
381 hed_uoff_t start, hed_uoff_t end,
382 const hed_cursor_t *newpos)
384 hed_cursor_t *curs, *nextcurs;
386 BDEBUG("Moving blockoffs from <%p>:%llx:%llx to <%p>:%llx\n",
387 block, start, end, newpos->block, newpos->off);
389 list_for_each_entry_safe(curs, nextcurs, &block->refs, list)
390 if (curs->off >= start && curs->off <= end) {
391 curs->pos = newpos->pos;
392 curs->block = newpos->block;
393 curs->off = newpos->off;
394 list_move(&curs->list, &newpos->block->refs);
398 /* Update the positions of blockoffs at and after @start for all
399 * blocks starting at @block */
400 static void
401 slide_blockoffs(struct hed_file *file, const struct file_block *block,
402 hed_uoff_t start, hed_off_t off)
404 blockoff_t *blockoff;
406 BDEBUG("Sliding blockoffs >= %llx by %c%llx, starting at <%p>\n",
407 start, off >= 0 ? '+' : '-', off >= 0 ? off : -off, block);
408 while (is_a_block(file_blocks(file), block)) {
409 list_for_each_entry(blockoff, &block->refs, list)
410 if (blockoff->pos >= start)
411 blockoff->pos += off;
412 block = next_block(block);
416 static struct hed_block *
417 new_block(struct hed_file *file, long flags)
419 struct file_block *new;
421 if (! (new = swp_zalloc(file_swp(file), sizeof(struct file_block))) )
422 return NULL;
424 new->flags = flags;
425 init_block_link(new);
426 INIT_LIST_HEAD(&new->refs);
427 if (flags & HED_BLOCK_EXCACHE)
428 INIT_LIST_HEAD(&new->lru);
429 else
430 list_add_tail(&new->lru, &file->lru);
432 return new;
435 static struct hed_block *
436 new_virt_block(struct hed_file *file, hed_uoff_t pos, hed_uoff_t size,
437 long extraflags)
439 struct hed_block *new =
440 new_block(file, (HED_BLOCK_EXCACHE |
441 HED_BLOCK_VIRTUAL |
442 extraflags));
443 if (!new)
444 return NULL;
446 new->t.size = size;
447 new->phys_pos = pos;
448 BDEBUG("Spawned new virtual block [%llx] at %llx\n", size, pos);
449 return new;
452 static struct hed_block *
453 new_data_block(struct hed_file *file, hed_uoff_t pos, hed_uoff_t size,
454 struct hed_block_data *dataobj)
456 struct hed_block *new =
457 new_block(file, 0);
458 if (!new)
459 return NULL;
461 cache_get(dataobj);
462 new->dataobj = dataobj;
463 new->t.size = size;
464 new->phys_pos = pos;
465 new->dataoff = FILE_BLOCK_OFF(pos);
466 BDEBUG("Spawned new data block [%llx] at %llx\n", size, pos);
467 return new;
470 static void
471 file_free_block(struct hed_file *file, struct file_block *block)
473 if (block->dataobj)
474 cache_put(file->cache, block->dataobj);
475 list_del(&block->lru);
477 swp_free(file_swp(file), block);
480 static bool
481 kill_block_if_empty(struct hed_file *file, struct file_block *block)
483 if (!hed_block_is_eof(block) && block->t.size == 0 &&
484 list_empty(&block->refs)) {
485 /* No recalculation needed, zero size. */
486 unchain_block(file_blocks(file), block);
487 file_free_block(file, block);
488 return true;
490 return false;
493 /* This may kill the previous block as well, if it can be merged
494 * with the next one. It will never kill anything which _follows_. */
495 static void
496 file_kill_block(struct hed_file *file, struct file_block *block)
498 hed_uoff_t phys_pos = block->phys_pos;
499 struct file_block *prev = prev_block(block);
500 struct file_block *next = next_block(block);
501 struct file_block *merger;
502 bool killprev = false;
504 /* We should never kill a dirty block! */
505 assert(!hed_block_is_dirty(block));
506 /* We shouldn't get with an empty block here (that might
507 * need special considerations with virtualization). */
508 assert(block->t.size > 0);
510 if (is_a_block(file_blocks(file), next) &&
511 hed_block_is_inner_virtual(next) &&
512 phys_pos + block->t.size == next->phys_pos) {
513 if (is_a_block(file_blocks(file), prev) &&
514 hed_block_is_inner_virtual(prev) &&
515 prev->phys_pos + prev->t.size == phys_pos)
516 killprev = true;
517 merger = next;
518 merger->phys_pos -= block->t.size;
519 update_blockoffs(merger, merger, block->t.size);
520 update_blockoffs(block, merger, 0);
521 } else if (is_a_block(file_blocks(file), prev) &&
522 hed_block_is_inner_virtual(prev) &&
523 prev->phys_pos + prev->t.size == phys_pos) {
524 merger = prev;
525 update_blockoffs(block, merger, merger->t.size);
526 } else if (!hed_block_is_virtual(block)) {
527 /* Convert physical to virtual */
528 assert(block->dataobj);
529 cache_put(file->cache, block->dataobj);
530 block->dataobj = NULL;
532 list_del_init(&block->lru); /* unlink the block from LRU */
533 hed_block_set_excache(block); /* say it's unlinked */
534 hed_block_set_virtual(block);
535 return;
536 } else
537 /* Already virtual and cannot merge */
538 return;
540 list_splice(&block->refs, &merger->refs);
542 /* Messing with block sizes and unchaining is a bit tricky
543 * since unchain_block() can splay(). So we really need
544 * to recalc_block_recursive() right after we update the size.
545 * If this place turns out to be a hot-spot, we can optimize
546 * the tree operations here. */
547 merger->t.size += block->t.size;
548 recalc_block_recursive(file_blocks(file), merger);
550 /* Destroy the block */
551 recalc_unchain_block(file_blocks(file), block);
552 file_free_block(file, block);
554 if (killprev)
555 file_kill_block(file, prev);
558 static struct file_block *
559 split_block(struct hed_file *file, struct hed_block *block,
560 hed_uoff_t splitpoint)
562 struct file_block *next;
564 next = new_block(file, block->flags);
565 if (!next)
566 return NULL;
568 if ( (next->dataobj = block->dataobj) ) {
569 cache_get(next->dataobj);
570 next->dataoff = block->dataoff + splitpoint;
571 } else
572 assert(hed_block_is_virtual(block));
574 next->t.size = block->t.size - splitpoint;
575 next->phys_pos = block->phys_pos;
576 if (!hed_block_is_inserted(block))
577 next->phys_pos += splitpoint;
579 block->t.size = splitpoint;
580 recalc_block_recursive(file_blocks(file), block);
581 recalc_chain_block_after(file_blocks(file), block, next);
583 move_blockoffs(block, next, splitpoint, UOFF_MAX, -splitpoint);
585 return next;
588 /* Replace a chunk in @block with @newblock */
589 static int
590 replace_chunk(struct hed_file *file, struct hed_block *block,
591 hed_uoff_t offset, struct hed_block *newblock)
593 size_t len = newblock->t.size;
594 hed_uoff_t leadlen = offset + len;
596 assert(offset < block->t.size);
597 assert(len <= block->t.size - offset);
599 /* Re-create the tail block if necessary */
600 if (hed_block_is_eof(block) || block->t.size - offset > len) {
601 struct file_block *tail;
603 tail = new_block(file, block->flags);
604 if (!tail)
605 return -1;
606 tail->t.size = block->t.size - leadlen;
607 tail->dataobj = block->dataobj;
608 tail->dataoff = block->dataoff + leadlen;
609 tail->phys_pos = block->phys_pos + leadlen;
611 hed_block_clear_eof(block);
612 recalc_chain_block_after(file_blocks(file), block, tail);
614 /* Move offsets to the tail */
615 move_blockoffs(block, tail, leadlen, UOFF_MAX, -leadlen);
618 /* Move pointers to the new block */
619 assert(leadlen > 0);
620 move_blockoffs(block, newblock, offset, leadlen - 1, -offset);
622 /* Shorten the leading block */
623 block->t.size = offset;
624 recalc_block_recursive(file_blocks(file), block);
626 /* Insert the new block */
627 recalc_chain_block_after(file_blocks(file), block, newblock);
629 /* Kill the leading block if possible */
630 kill_block_if_empty(file, block);
632 return 0;
635 #ifdef HED_CONFIG_SWAP
637 static char *
638 swp_filename(const char *filename)
640 size_t fnlen = strlen(filename);
641 char *swp;
642 char *file;
644 if (!(swp = malloc(fnlen + 9)) )
645 return NULL;
646 strcpy(swp, filename);
648 file = strrchr(swp, '/');
649 file = file ? file + 1 : swp;
650 *file = '.';
651 strcpy(stpcpy(file + 1, filename + (file -swp)), ".hedswp");
652 return swp;
655 static char *
656 newswp_filename(char *swpname)
658 char *ret, *p;
660 ret = swpname;
661 while (!access(ret, F_OK)) {
662 if (ret == swpname) {
663 if (! (ret = strdup(swpname)) )
664 return NULL;
665 p = ret + strlen(ret) - 1;
668 if (*p == 'a') {
669 free(ret);
670 return NULL;
672 --*p;
674 return ret;
678 hed_file_remove_swap(struct hed_file *file)
680 if (remove(file->swpname))
681 return -1;
682 if (rename(file->newswpname, file->swpname))
683 return -1;
685 free(file->newswpname);
686 file->newswpname = file->swpname;
687 return 0;
690 static inline struct hed_file *
691 file_swp_init(const char *name)
693 char *swpname, *newswpname;
694 struct swp_file *swp;
695 struct hed_file *file;
697 swpname = swp_filename(name);
698 if (!swpname)
699 goto fail;
700 newswpname = newswp_filename(swpname);
701 if (!newswpname)
702 goto fail_free_name;
703 swp = swp_init_write(newswpname);
704 if (!swp)
705 goto fail_free_newname;
707 assert(sizeof(struct swp_header) + sizeof(struct hed_file)
708 <= FILE_BLOCK_SIZE);
709 file = swp_private(swp);
710 memset(file, 0, sizeof *file);
712 file->swp = swp;
713 file->swpname = swpname;
714 file->newswpname = newswpname;
716 return file;
718 fail_free_newname:
719 free(newswpname);
720 fail_free_name:
721 if (swpname != newswpname)
722 free(swpname);
723 fail:
724 return NULL;
727 static inline void
728 file_swp_done(struct hed_file *file)
730 remove(file->newswpname);
731 if (file->newswpname != file->swpname)
732 free(file->newswpname);
733 free(file->swpname);
734 swp_done(file_swp(file));
735 /* file was de-allocated automatically with file->swp */
738 static inline void
739 init_null_block(struct hed_file *file)
741 file->null_block = null_block(file_blocks(file));
744 #else /* HED_CONFIG_SWAP */
746 static inline struct hed_file *
747 file_swp_init(const char *name)
749 return calloc(1, sizeof(struct hed_file));
752 static inline void
753 file_swp_done(struct hed_file *file)
757 #define init_null_block(f) do {} while(0)
759 #endif /* HED_CONFIG_SWAP */
761 static inline struct stat *
762 file_stat(struct hed_file *file)
764 return &file->s;
768 hed_file_update_size(struct hed_file *file)
770 hed_uoff_t oldsize = file->phys_size;
772 if(fstat(file->fd, file_stat(file)) < 0)
773 return -1;
775 if (S_ISBLK(file_stat(file)->st_mode)) {
776 if (ioctl(file->fd, BLKGETSIZE64, &file->phys_size)) {
777 unsigned long size_in_blocks;
778 if (ioctl(file->fd, BLKGETSIZE, &size_in_blocks))
779 return -1;
780 file->phys_size = (hed_uoff_t)size_in_blocks << 9;
782 } else if (S_ISREG(file_stat(file)->st_mode)) {
783 file->phys_size = file_stat(file)->st_size;
784 } else if (S_ISCHR(file_stat(file)->st_mode)) {
785 if (lseek(file->fd, 0, SEEK_SET) < 0)
786 return -1;
787 file->phys_size = (hed_uoff_t)OFF_MAX + 1;
788 } else {
789 errno = EINVAL;
790 return -1;
793 file->size += file->phys_size - oldsize;
794 return 0;
797 static int
798 do_file_open(struct hed_file *file)
800 file->fd = open(file->name, O_RDONLY);
801 if (file->fd < 0) {
802 if (errno != ENOENT)
803 return errno;
804 fprintf(stderr, "Warning: File %s does not exist\n",
805 file->name);
806 memset(file_stat(file), 0, sizeof(struct stat));
808 } else if (hed_file_update_size(file)) {
809 int errsv = errno;
810 close(file->fd);
811 return errsv;
813 return 0;
816 static int
817 file_setup_blocks(struct hed_file *file)
819 hed_uoff_t phys_size = file->phys_size;
820 struct file_block *block;
821 struct file_block *vblock;
823 if (phys_size) {
824 block = new_virt_block(file, 0, phys_size, 0);
825 if (!block)
826 return -1;
827 chain_block(file_blocks(file), block);
830 vblock = new_virt_block(file, phys_size, OFF_MAX - phys_size + 1,
831 HED_BLOCK_EOF);
832 if (!vblock)
833 return -1;
834 recalc_chain_block(file_blocks(file), vblock);
836 return 0;
840 libhed_init(void)
842 return file_access_init();
845 struct hed_file *
846 hed_open(const char *name)
848 struct hed_file *file;
850 if (! (file = file_swp_init(name)) )
851 goto fail;
853 file->name = name;
855 init_block_list(file_blocks(file));
856 init_null_block(file);
858 file->cache = cache_init(CACHE_LENGTH, file_swp(file));
859 if (!file->cache)
860 goto fail_file;
861 INIT_LIST_HEAD(&file->lru);
863 if (do_file_open(file))
864 goto fail_file;
866 if (file_setup_blocks(file))
867 goto fail_file;
869 fixup_register(file);
871 return file;
873 fail_file:
874 hed_close(file);
875 fail:
876 return NULL;
879 void
880 hed_close(struct hed_file *file)
882 assert(file);
884 /* Do not check for errors:
885 * 1. This FD is read-only => no data loss possbile
886 * 2. We're about to exit anyway => no resource leak
888 if (file->fd >= 0)
889 close(file->fd);
891 fixup_deregister(file);
893 /* No need to free file blocks here, because all data is
894 * allocated either from the cache or from the swap file
895 * and both is going to be destroyed now.
898 if (file->cache)
899 cache_done(file->cache);
901 file_swp_done(file);
904 /* Adjust blockoff after off gets outside its block */
905 static void
906 fixup_blockoff_slow(blockoff_t *blockoff)
908 struct file_block *block = blockoff->block;
909 hed_uoff_t off = blockoff->off;
911 do {
912 if ((hed_off_t)off < 0) {
913 block = prev_block(block);
914 off += block->t.size;
915 } else {
916 off -= block->t.size;
917 block = next_block(block);
919 } while (off >= block->t.size);
921 blockoff->block = block;
922 blockoff->off = off;
923 list_move(&blockoff->list, &block->refs);
926 /* Adjust blockoff if off gets outside its block.
927 * This is separate from fixup_blockoff_slow, because it is supposed
928 * to be small enough to be inlined (which is a win, because most of
929 * the time no fixup has to be done, so the fast inlined path is used).
931 static inline void
932 fixup_blockoff(blockoff_t *blockoff)
934 if (blockoff->off >= blockoff->block->t.size)
935 fixup_blockoff_slow(blockoff);
938 hed_off_t
939 hed_move_relative(blockoff_t *blockoff, hed_off_t num)
941 hed_off_t newpos = blockoff->pos + num;
942 if (newpos < 0) {
943 newpos = num < 0 ? 0 : OFF_MAX;
944 num = newpos - blockoff->pos;
946 blockoff->pos = newpos;
947 blockoff->off += num;
948 fixup_blockoff(blockoff);
949 return num;
952 /* Relative move with no checking (and only by a small amount) */
953 static inline void
954 move_rel_fast(blockoff_t *blockoff, ssize_t num)
956 blockoff->off += num;
957 blockoff->pos += num;
958 fixup_blockoff(blockoff);
961 static void
962 alloc_caches(struct hed_file *file, struct hed_block_data **caches, int n)
964 struct remap_control rc;
965 int i;
967 BDEBUG("Allocate %d caches (%d free slots available)\n",
968 n, file->cache->nfree);
970 assert(n <= CACHE_LENGTH);
971 while (file->cache->nfree < n) {
972 struct file_block *block;
974 assert(!list_empty(&file->lru));
975 block = list_entry(file->lru.next, struct file_block, lru);
976 BDEBUG("Killing block at physical %llx\n", block->phys_pos);
977 file_kill_block(file, block);
980 for (i = 0; i < n; ++i) {
981 caches[i] = cache_alloc(file->cache);
982 assert(caches[i]);
985 remap_init(&rc);
986 remap_compact(&rc, file->cache, caches, n);
987 for (i = 0; i < n; ++i)
988 remap_add(&rc, caches[i]->data);
989 remap_finish(&rc);
992 static inline void
993 free_caches(struct hed_file *file, struct hed_block_data **preload, int n)
995 int i;
997 for (i = 0; i < n; ++i)
998 if (preload[i])
999 cache_put(file->cache, preload[i]);
1002 static int
1003 file_load_data(struct hed_file *file,
1004 struct hed_block_data **caches, int n,
1005 hed_uoff_t offset)
1007 struct hed_block_data *dataobj = caches[0];
1008 void *data = dataobj->data;
1009 ssize_t rsize, total, segsize;
1011 segsize = n << FILE_BLOCK_SHIFT;
1012 for (total = 0; total < segsize; total += rsize) {
1013 rsize = pread(file->fd, data + total,
1014 segsize - total, offset + total);
1015 if (!rsize)
1016 break;
1017 if (rsize < 0) {
1018 dataobj = caches[total >> FILE_BLOCK_SHIFT];
1019 caches[total >> FILE_BLOCK_SHIFT] = NULL;
1020 data = dataobj->data;
1021 cache_put(file->cache, dataobj);
1022 total = FILE_BLOCK_ROUND(total);
1023 rsize = FILE_BLOCK_SIZE;
1024 BDEBUG("Error reading block at phys %llx: %s\n",
1025 offset + total, strerror(errno));
1029 BDEBUG("Loaded data at phys %llx up to %llx\n",
1030 offset, offset + segsize);
1031 return 0;
1034 #ifdef HED_CONFIG_MMAP
1036 static int
1037 file_load_data_mmap(struct hed_file *file,
1038 struct hed_block_data **caches, int n,
1039 hed_uoff_t offset)
1041 void *data;
1042 ssize_t segsize;
1043 int i;
1045 segsize = n << FILE_BLOCK_SHIFT;
1046 data = mmap(NULL, segsize,
1047 PROT_READ | PROT_WRITE,
1048 MAP_PRIVATE | (file->fd < 0 ? MAP_ANONYMOUS : 0),
1049 file->fd, offset);
1051 if (data == MAP_FAILED) {
1052 BDEBUG("mmap failed at %llx: fail over to traditional read\n",
1053 offset);
1055 data = mmap(NULL, segsize,
1056 PROT_READ | PROT_WRITE,
1057 MAP_PRIVATE | MAP_ANONYMOUS,
1058 0, 0);
1059 if (data == MAP_FAILED)
1060 return -1;
1062 for (i = 0; i < n; ++i)
1063 caches[i]->data = data + (i << FILE_BLOCK_SHIFT);
1064 return file_load_data(file, caches, n, offset);
1067 for (i = 0; i < n; ++i)
1068 caches[i]->data = data + (i << FILE_BLOCK_SHIFT);
1070 BDEBUG("Loaded data at phys %llx up to %llx\n",
1071 offset, offset + segsize);
1072 return 0;
1074 # define file_load_data file_load_data_mmap
1076 #endif
1078 /* Find the block with the lowest physical position that intersects
1079 * the loaded segment. The search starts at @block.
1081 static struct hed_block *
1082 first_load_block(struct hed_tree *tree, struct hed_block *block,
1083 hed_uoff_t segstart)
1085 struct hed_block *prev = block;
1086 hed_uoff_t prevend;
1087 do {
1088 block = prev;
1089 prev = prev_block(prev);
1090 if (!is_a_block(tree, prev))
1091 break;
1092 prevend = hed_block_is_inserted(prev)
1093 ? prev->phys_pos
1094 : prev->phys_pos + hed_block_size(prev);
1095 } while (prevend > segstart);
1096 return block;
1099 static int
1100 load_blocks(struct hed_file *file, const blockoff_t *from)
1102 hed_uoff_t physpos, segstart;
1103 struct hed_block_data *preload[FILE_READAHEAD];
1104 size_t ra_bkw, ra_fwd, ra_off;
1105 hed_cursor_t pos;
1106 int nblocks;
1108 segstart = hed_cursor_phys_pos(from);
1109 ra_bkw = FILE_BLOCK_OFF(segstart);
1110 ra_fwd = FILE_BLOCK_SIZE - ra_bkw;
1112 if (file_ra_forward(file))
1113 ra_fwd += (FILE_READAHEAD - 1) << FILE_BLOCK_SHIFT;
1114 else if (file_ra_backward(file))
1115 ra_bkw += (FILE_READAHEAD - 1) << FILE_BLOCK_SHIFT;
1117 if (ra_bkw > segstart)
1118 ra_bkw = segstart;
1119 if (ra_fwd > file->phys_size - segstart)
1120 ra_fwd = file->phys_size - segstart;
1122 segstart -= ra_bkw;
1123 ra_fwd += ra_bkw;
1124 pos.block = first_load_block(file_blocks(file), from->block, segstart);
1125 pos.off = segstart >= pos.block->phys_pos
1126 ? segstart - pos.block->phys_pos
1127 : 0;
1129 list_add(&pos.list, &pos.block->refs);
1130 nblocks = ((ra_fwd - 1) >> FILE_BLOCK_SHIFT) + 1;
1131 alloc_caches(file, preload, nblocks);
1132 put_cursor(&pos);
1134 if (file_load_data(file, preload, nblocks, segstart)) {
1135 free_caches(file, preload, nblocks);
1136 return -1;
1139 while (physpos = hed_cursor_phys_pos(&pos),
1140 ra_off = physpos - segstart,
1141 ra_off < ra_fwd) {
1142 struct hed_block_data *dataobj;
1143 struct hed_block *newblock;
1144 size_t datalen;
1146 if (!hed_block_is_virtual(pos.block)) {
1147 pos.block = next_block(pos.block);
1148 pos.off = 0;
1149 continue;
1152 datalen = FILE_BLOCK_SIZE - FILE_BLOCK_OFF(physpos);
1153 if (datalen > hed_block_size(pos.block) - pos.off)
1154 datalen = hed_block_size(pos.block) - pos.off;
1156 dataobj = preload[ra_off >> FILE_BLOCK_SHIFT];
1157 newblock = dataobj
1158 ? new_data_block(file, physpos, datalen, dataobj)
1159 : new_virt_block(file, physpos, datalen,
1160 HED_BLOCK_BAD);
1162 /* Punch the new block */
1163 BDEBUG("Add %s block at %llx, length %llx\n",
1164 hed_block_is_virtual(newblock) ? "error" : "physical",
1165 newblock->phys_pos, newblock->t.size);
1166 if (replace_chunk(file, pos.block, pos.off, newblock)) {
1167 file_free_block(file, newblock);
1168 free_caches(file, preload, nblocks);
1169 return -1;
1172 pos.block = next_block(newblock);
1173 pos.off = 0;
1176 /* All cache objects now have an extra reference from the
1177 * allocation. Drop it. */
1178 free_caches(file, preload, nblocks);
1180 dump_blocks(file);
1181 return 0;
1184 /* Shorten a block at beginning and enlarge the preceding block.
1186 * Re-allocate at most @len bytes from the beginning of @block to the
1187 * end of the preceding block.
1188 * If @block is virtual, this will effectively devirtualize the range.
1189 * If @block is not virtual, this will change the backing store of
1190 * the bytes in the range.
1191 * Returns: the number of bytes actually moved.
1193 static size_t
1194 shrink_at_begin(struct hed_file *file, struct file_block *block,
1195 size_t len, long state)
1197 struct file_block *prev = prev_block(block);
1198 size_t maxgrow;
1200 /* Basic assumptions */
1201 assert(!(state & HED_BLOCK_VIRTUAL));
1203 /* The previous block must exist: */
1204 if (!is_a_block(file_blocks(file), prev))
1205 return 0;
1207 /* The block flags must match the requested @state: */
1208 if ((prev->flags & HED_BLOCK_STATEMASK) != state)
1209 return 0;
1211 /* No deletions at end, or similar: */
1212 if (prev->phys_pos + prev->t.size != block->phys_pos)
1213 return 0;
1215 /* Append less bytes than requested if not all are available */
1216 assert(prev->t.size <= prev->dataobj->size);
1217 maxgrow = prev->dataobj->size - prev->dataoff - prev->t.size;
1218 if (len > maxgrow)
1219 len = maxgrow;
1220 if (!len)
1221 return 0;
1223 BDEBUG("Appending 0:%lx to the previous block\n", len);
1225 /* Move blockoffs away from the to-be-chopped beginning */
1226 move_blockoffs(block, prev, 0, len - 1, prev->t.size);
1228 /* Enlarge the previous block */
1229 prev->t.size += len;
1230 recalc_block_recursive(file_blocks(file), prev);
1232 /* Shorten the original block */
1233 block->t.size -= len;
1234 block->dataoff += len;
1235 block->phys_pos += len;
1236 recalc_block_recursive(file_blocks(file), block);
1237 return len;
1240 /* Shorten a block at end and enlarge the following block.
1242 * Re-allocate at most @len bytes from the end of @block to the
1243 * beginning of the following block.
1244 * If @block is virtual, this will effectively devirtualize the range.
1245 * If @block is not virtual, this will change the backing store of
1246 * the bytes in the range.
1247 * Returns: the number of bytes actually moved.
1249 static size_t
1250 shrink_at_end(struct hed_file *file, struct file_block *block,
1251 size_t len, long state)
1253 struct file_block *next = next_block(block);
1254 hed_uoff_t off;
1256 /* Basic assumptions */
1257 assert(!(state & HED_BLOCK_VIRTUAL));
1259 /* The next block must exist: */
1260 if (!is_a_block(file_blocks(file), next))
1261 return 0;
1263 /* The block flags must match the requested @state: */
1264 if ((next->flags & HED_BLOCK_STATEMASK) != state)
1265 return 0;
1267 /* No deletions at end, or similar: */
1268 if (block->phys_pos + block->t.size != next->phys_pos)
1269 return 0;
1271 /* Prepend less bytes than requested if not all are available */
1272 if (len > next->dataoff)
1273 len = next->dataoff;
1274 if (!len)
1275 return 0;
1276 off = block->t.size - len;
1278 BDEBUG("Prepending %llx:%lx to the next block\n", off, len);
1280 /* Shift blockoffs in the new physical block */
1281 update_blockoffs(next, next, len);
1283 /* Move blockoffs away from the to-be-chopped end */
1284 move_blockoffs(block, next, off, UOFF_MAX, -off);
1286 /* Enlarge the next block */
1287 next->dataoff -= len;
1288 next->phys_pos -= len;
1289 next->t.size += len;
1290 recalc_block_recursive(file_blocks(file), next);
1292 /* Shorten the original block */
1293 block->t.size -= len;
1294 recalc_block_recursive(file_blocks(file), block);
1295 return len;
1298 /* Search for an existing data object within the same physical block
1299 * as @curs, and having the given @state flags.
1301 static struct hed_block_data *
1302 search_data(struct hed_file *file, const hed_cursor_t *curs, long state)
1304 struct file_block *block;
1305 hed_uoff_t physpos;
1307 physpos = FILE_BLOCK_ROUND(curs->block->phys_pos + curs->off);
1308 BDEBUG("Search for already loaded data at %llx starting at %llx...",
1309 physpos, curs->block->phys_pos);
1311 /* Search backwards */
1312 block = prev_block(curs->block);
1313 while (is_a_block(file_blocks(file), block) &&
1314 block->phys_pos >= physpos) {
1315 if ((block->flags & HED_BLOCK_STATEMASK) == state) {
1316 BDEBUG(" found at %llx\n", block->phys_pos);
1317 assert(block->dataobj);
1318 return block->dataobj;
1320 block = prev_block(block);
1323 /* Search forwards */
1324 block = next_block(curs->block);
1325 while (is_a_block(file_blocks(file), block) &&
1326 block->phys_pos < physpos + FILE_BLOCK_SIZE) {
1327 if ((block->flags & HED_BLOCK_STATEMASK) == state) {
1328 BDEBUG(" found at %llx\n", block->phys_pos);
1329 assert(block->dataobj);
1330 return block->dataobj;
1332 block = next_block(block);
1335 BDEBUG(" not found\n");
1336 return NULL;
1339 static int
1340 reuse_loaded_data(struct hed_file *file, const blockoff_t *blockoff,
1341 struct hed_block_data *data)
1343 struct file_block *physblock;
1344 struct file_block *block = blockoff->block;
1345 hed_uoff_t block_offset = blockoff->off;
1346 hed_uoff_t physpos = block->phys_pos + block_offset;
1347 size_t part = FILE_BLOCK_OFF(physpos);
1348 size_t len =
1349 FILE_BLOCK_SIZE - part <= block->t.size - block_offset
1350 ? FILE_BLOCK_SIZE - part
1351 : block->t.size - block_offset;
1353 if (part > block_offset)
1354 part = block_offset;
1355 physpos -= part;
1356 len += part;
1357 block_offset -= part;
1359 if (! (physblock = new_data_block(file, physpos, len, data)) )
1360 return -1;
1362 BDEBUG("Add physical block at %llx, length %llx\n",
1363 physblock->phys_pos, physblock->t.size);
1364 if (replace_chunk(file, block, block_offset, physblock)) {
1365 file_free_block(file, physblock);
1366 return -1;
1369 dump_blocks(file);
1370 return 0;
1373 /* Replace a part of a virtual block with content loaded
1374 * from disk. The amount of data loaded from the disk depends
1375 * on various factors with the goal to choose the most efficient
1376 * ratio. The only guarantee is that the byte at @blockoff will
1377 * be in a non-virtual block when this function returns 0.
1379 static int
1380 devirtualize_clean(struct hed_file *file, const blockoff_t *blockoff)
1382 struct file_block *block = blockoff->block;
1383 hed_uoff_t block_offset = blockoff->off;
1384 hed_uoff_t remain = block->t.size - block_offset;
1385 struct hed_block_data *data;
1387 BDEBUG("punch a clean hole at %llx into %llx:%llx\n", block_offset,
1388 block_offset(file_blocks(file), block), block->t.size);
1389 assert(hed_block_is_virtual(block));
1391 /* Check if we can combine with a neighbouring block */
1392 if (shrink_at_begin(file, block, SIZE_MAX, 0) > block_offset ||
1393 shrink_at_end(file, block, SIZE_MAX, 0) >= remain) {
1394 kill_block_if_empty(file, block);
1395 dump_blocks(file);
1396 return 0;
1399 /* Check if the block is already loaded elsewhere */
1400 data = search_data(file, blockoff, 0);
1401 return data
1402 ? reuse_loaded_data(file, blockoff, data)
1403 : load_blocks(file, blockoff);
1406 /* Replace at most @len bytes of a virtual block with a newly
1407 * allocated out-of-cache block. The block is marked dirty
1408 * and its data is left uninitialized.
1409 * If the block at @blockoff is not virtual, make it dirty.
1410 * Note that this function may devirtualize less than @len bytes.
1411 * In the worst case only 1 byte at @blockoff will be available.
1413 static int
1414 prepare_modify(struct hed_file *file, blockoff_t *blockoff, size_t len)
1416 struct file_block *block = blockoff->block;
1417 hed_uoff_t block_offset = blockoff->off;
1418 hed_uoff_t remain = block->t.size - block_offset;
1419 struct file_block *newblock;
1421 if (hed_block_is_dirty(block))
1422 return 0;
1424 if (len > remain)
1425 len = remain;
1427 BDEBUG("punch a dirty hole at %llx:%lx into %llx:%llx\n",
1428 block_offset, len,
1429 block_offset(file_blocks(file), block), block->t.size);
1431 /* Check if we can combine with a neighbouring block */
1432 if ((block_offset == 0 &&
1433 shrink_at_begin(file, block, len, HED_BLOCK_DIRTY)) ||
1434 (remain == len &&
1435 shrink_at_end(file, block, len, HED_BLOCK_DIRTY) >= len)) {
1436 kill_block_if_empty(file, block);
1437 dump_blocks(file);
1438 return 0;
1441 /* Initialize a new block */
1442 newblock = new_block(file, HED_BLOCK_EXCACHE | HED_BLOCK_DIRTY);
1443 if (!newblock)
1444 goto out_err;
1446 /* Allocate data */
1447 if ( (newblock->dataobj = search_data(file, blockoff,
1448 HED_BLOCK_DIRTY)) )
1449 cache_get(newblock->dataobj);
1450 else if (! (newblock->dataobj = block_data_new(file->cache,
1451 FILE_BLOCK_SIZE)) )
1452 goto out_err_free;
1454 newblock->phys_pos = block->phys_pos + block_offset;
1455 newblock->dataoff = FILE_BLOCK_OFF(newblock->phys_pos);
1456 if (len > FILE_BLOCK_SIZE - newblock->dataoff)
1457 len = FILE_BLOCK_SIZE - newblock->dataoff;
1458 newblock->t.size = len;
1460 if (replace_chunk(file, block, block_offset, newblock))
1461 goto out_err_free;
1463 dump_blocks(file);
1464 return 0;
1466 out_err_free:
1467 file_free_block(file, newblock);
1468 out_err:
1469 return -1;
1472 /* Ensure that blockoff points to an up-to-date non-virtual block.
1473 * Load the data from disk if necessary, return 0 on success. */
1474 size_t
1475 hed_prepare_read(struct hed_file *file, const hed_cursor_t *curs, size_t len)
1477 struct file_block *block = curs->block;
1478 if (hed_block_is_inner_virtual(block) &&
1479 devirtualize_clean(file, curs) < 0)
1480 return 0;
1482 return hed_cursor_chunk_len(curs, len);
1485 /* Move the block pointer to the next block */
1486 static struct hed_block *
1487 cursor_next_block(struct hed_file *file, hed_cursor_t *curs)
1489 struct hed_block *block =
1490 next_nonzero_block(file_blocks(file), curs->block);
1492 if (block) {
1493 curs->block = block;
1494 curs->off = 0;
1495 list_move(&curs->list, &block->refs);
1497 return block;
1500 /* This is an optimized way of doing:
1502 * hed_move_relative(blockoff, blockoff->block->t.size);
1504 * for the case when blockoff->off == 0.
1506 static struct hed_block *
1507 move_to_next(struct hed_file *file, hed_cursor_t *curs)
1509 curs->pos += hed_block_size(curs->block);
1510 return cursor_next_block(file, curs);
1513 /* Copy in @count bytes from @pos.
1514 * Returns the number of bytes that were not read (i.e. zero on success).
1515 * The @pos blockoff is moved by the amount of data read.
1516 * CAUTION: If you read up to EOF, then @pos points to the null_block
1517 * upon return.
1519 static size_t
1520 copy_in(struct hed_file *file, void *buf, size_t count, blockoff_t *pos)
1522 size_t cpylen;
1524 assert(is_a_block(file_blocks(file), pos->block));
1526 pos->pos += count;
1527 while (count && (cpylen = hed_prepare_read(file, pos, count))) {
1528 if (hed_block_is_virtual(pos->block))
1529 memset(buf, 0, cpylen);
1530 else
1531 memcpy(buf, hed_cursor_data(pos), cpylen);
1533 buf += cpylen;
1534 count -= cpylen;
1535 if ( (pos->off += cpylen) >= hed_block_size(pos->block) )
1536 if (!cursor_next_block(file, pos))
1537 break;
1539 pos->pos -= count;
1540 return count;
1543 size_t
1544 hed_file_cpin(struct hed_file *file, void *buf, size_t count,
1545 const hed_cursor_t *pos)
1547 blockoff_t mypos;
1548 size_t ret;
1550 hed_dup_cursor(pos, &mypos);
1551 ret = copy_in(file, buf, count, &mypos);
1552 put_cursor(&mypos);
1553 return ret;
1556 /* Set the modified flag */
1557 static inline void
1558 set_modified(struct hed_file *file)
1560 file->modified = true;
1563 /* Clear the modified flag */
1564 static inline void
1565 clear_modified(struct hed_file *file)
1567 file->modified = false;
1571 hed_file_set_byte(struct hed_file *file, blockoff_t *blockoff,
1572 unsigned char byte)
1574 hed_uoff_t offset = blockoff->pos;
1576 if (prepare_modify(file, blockoff, 1))
1577 return -1;
1578 set_modified(file);
1580 if (offset >= file->size)
1581 file->size = offset + 1;
1583 hed_block_data(blockoff->block)[blockoff->off] = byte;
1584 return 0;
1587 size_t
1588 hed_file_set_block(struct hed_file *file, blockoff_t *blockoff,
1589 unsigned char *buf, size_t len)
1591 while (len) {
1592 size_t span;
1594 if (prepare_modify(file, blockoff, len))
1595 break;
1596 set_modified(file);
1598 span = hed_cursor_chunk_len(blockoff, len);
1599 memcpy(hed_cursor_data(blockoff), buf, span);
1600 buf += span;
1601 len -= span;
1602 move_rel_fast(blockoff, span);
1604 if (blockoff->pos > file->size)
1605 file->size = blockoff->pos;
1607 return len;
1610 hed_uoff_t
1611 hed_file_set_bytes(struct hed_file *file, blockoff_t *blockoff,
1612 unsigned char byte, hed_uoff_t rep)
1614 while (rep) {
1615 size_t span;
1617 if (prepare_modify(file, blockoff, rep))
1618 break;
1619 set_modified(file);
1621 span = hed_cursor_chunk_len(blockoff, rep);
1622 memset(hed_cursor_data(blockoff), byte, span);
1623 rep -= span;
1624 move_rel_fast(blockoff, span);
1626 if (blockoff->pos > file->size)
1627 file->size = blockoff->pos;
1629 return rep;
1632 static int
1633 file_erase_continuous(struct hed_file *file, blockoff_t *blockoff, size_t len)
1635 struct file_block *block = blockoff->block;
1636 hed_uoff_t block_offset = blockoff->off;
1638 /* Find the new position */
1639 hed_move_relative(blockoff, len);
1641 /* Move all other cursors in the erased range to the new position */
1642 assert(len > 0);
1643 move_cursors_abs(block, block_offset,
1644 block_offset + len - 1, blockoff);
1646 if (!block_offset) {
1647 block->dataoff += len;
1648 if (!hed_block_is_inserted(block))
1649 block->phys_pos += len;
1650 } else if (block_offset + len < block->t.size &&
1651 !split_block(file, block, block_offset + len))
1652 return -1;
1654 move_blockoffs(block, block, block_offset, UOFF_MAX, -(hed_uoff_t)len);
1656 block->t.size -= len;
1657 recalc_block_recursive(file_blocks(file), block);
1659 kill_block_if_empty(file, block);
1660 return 0;
1663 size_t
1664 hed_file_erase_block(struct hed_file *file, blockoff_t *blockoff,
1665 hed_uoff_t len)
1667 hed_uoff_t offset;
1668 hed_uoff_t todo;
1669 struct file_block *eofblock;
1671 offset = blockoff->pos;
1672 if (offset > file_size(file))
1673 return 0;
1674 else if (len > file_size(file) - offset)
1675 len = file_size(file) - offset;
1677 todo = len;
1678 while (todo) {
1679 size_t span = hed_cursor_chunk_len(blockoff, todo);
1681 if (file_erase_continuous(file, blockoff, span))
1682 break;
1684 todo -= span;
1686 len -= todo;
1688 file->size -= len;
1689 set_modified(file);
1691 eofblock = last_block(file_blocks(file));
1692 assert(hed_block_is_virtual(eofblock));
1693 assert(hed_block_is_eof(eofblock));
1694 eofblock->t.size += len;
1695 recalc_block_recursive(file_blocks(file), eofblock);
1697 struct hed_block *slideblock = prev_block(blockoff->block);
1698 if (!is_a_block(file_blocks(file), slideblock))
1699 slideblock = blockoff->block;
1700 slide_blockoffs(file, slideblock, blockoff->pos, -len);
1702 return todo;
1706 hed_file_insert_begin(struct hed_file *file, const hed_cursor_t *curs,
1707 hed_cursor_t *curs_ins)
1709 struct file_block *block, *newblock;
1711 BDEBUG("Starting insert at %llx\n", curs->pos);
1713 newblock = new_block(file,
1714 HED_BLOCK_EXCACHE | HED_BLOCK_INSERTED);
1715 if (!newblock)
1716 return -1;
1718 newblock->phys_pos = hed_cursor_phys_pos(curs);
1719 newblock->dataobj = block_data_new(file->cache, FILE_BLOCK_SIZE);
1720 if (!newblock->dataobj) {
1721 file_free_block(file, newblock);
1722 return -1;
1725 block = curs->block;
1726 if (curs->off) {
1727 if (!split_block(file, block, curs->off)) {
1728 file_free_block(file, newblock);
1729 return -1;
1731 } else
1732 block = prev_block(block);
1734 chain_block_after(file_blocks(file), block, newblock);
1736 curs_ins->pos = curs->pos;
1737 curs_ins->off = newblock->t.size;
1738 curs_ins->block = newblock;
1739 list_add(&curs_ins->list, &newblock->refs);
1741 dump_blocks(file);
1742 return 0;
1745 void
1746 hed_file_insert_end(struct hed_file *file, blockoff_t *blockoff_ins)
1748 struct file_block *block = blockoff_ins->block;
1750 BDEBUG("End insert at %llx\n", blockoff_ins->pos);
1752 blockoff_ins->block = NULL;
1753 list_del(&blockoff_ins->list);
1754 if (!kill_block_if_empty(file, block))
1755 block_data_shrink(file->cache, block->dataobj,
1756 block->dataoff + block->t.size);
1758 dump_blocks(file);
1761 static void
1762 insert_block(struct hed_file *file, blockoff_t *blockoff,
1763 unsigned char *buf, size_t len)
1765 struct file_block *block = blockoff->block;
1766 hed_uoff_t offset = blockoff->pos;
1768 assert(file && offset >= 0);
1770 assert(hed_block_is_excache(block));
1771 hed_block_set_dirty(block);
1772 set_modified(file);
1774 memcpy(hed_block_data(block) + blockoff->off, buf, len);
1775 block->t.size += len;
1776 recalc_block_recursive(file_blocks(file), block);
1777 blockoff->off += len;
1778 blockoff->pos += len;
1780 if (blockoff->pos > file->size)
1781 file->size = blockoff->pos;
1782 else
1783 file->size += len;
1785 slide_blockoffs(file, next_block(block), offset, len);
1788 size_t
1789 hed_file_insert_block(struct hed_file *file, blockoff_t *blockoff,
1790 unsigned char *buf, size_t len)
1792 while (len) {
1793 struct file_block *block = blockoff->block;
1794 size_t remain = block->dataobj->size - blockoff->off;
1796 if (!remain) {
1797 list_del(&blockoff->list);
1798 blockoff->block = next_block(block);
1799 blockoff->off = 0;
1801 if (!hed_file_insert_begin(file, blockoff, blockoff))
1802 continue;
1804 blockoff->block = block;
1805 blockoff->off = block->t.size;
1806 list_add(&blockoff->list, &block->refs);
1807 break;
1810 if (remain > len)
1811 remain = len;
1813 BDEBUG("Append %ld bytes to the insert block\n",
1814 (long) remain);
1815 insert_block(file, blockoff, buf, remain);
1816 buf += remain;
1817 len -= remain;
1819 return len;
1823 hed_file_insert_byte(struct hed_file *file, blockoff_t *blockoff,
1824 unsigned char byte)
1826 return hed_file_insert_block(file, blockoff, &byte, 1);
1829 size_t
1830 hed_file_insert_once(struct hed_file *file, blockoff_t *blockoff,
1831 unsigned char *buf, size_t len)
1833 blockoff_t insert;
1835 if (!hed_file_insert_begin(file, blockoff, &insert)) {
1836 len = hed_file_insert_block(file, &insert, buf, len);
1837 hed_file_insert_end(file, &insert);
1839 return len;
1842 struct commit_control {
1843 struct hed_file *file;
1844 int wfd; /* file descriptor for writing */
1845 int needwrite; /* non-zero if write is needed */
1846 blockoff_t begoff, endoff;
1847 hed_off_t shift;
1848 void *partbuf; /* allocated 3*FILE_BLOCK_SIZE */
1849 void *partial; /* pointer into partbuf */
1852 /* Get the logical<->physical shift value after the specified block.
1853 * It is essential to get the value _AFTER_ the block, because the
1854 * shift value is used to decide how the current block will be handled.
1856 static hed_off_t
1857 get_shift(const blockoff_t *blockoff)
1859 struct file_block *block = blockoff->block;
1860 size_t curshift = hed_block_is_inserted(block) ? block->t.size : 0;
1861 return curshift +
1862 blockoff->pos - blockoff->off - block->phys_pos;
1865 static void
1866 switch_partial(struct commit_control *cc)
1868 cc->partial += FILE_BLOCK_SIZE;
1869 if (cc->partial >= cc->partbuf + 3*FILE_BLOCK_SIZE)
1870 cc->partial = cc->partbuf;
1873 /* Write @writelen bytes from the partial buffer at @cc->begoff. */
1874 static int
1875 commit_block(struct commit_control *cc, size_t len)
1877 ssize_t written;
1879 assert(len > 0);
1880 BDEBUG(" -> write %lx bytes at %llx\n",
1881 (unsigned long)len, cc->begoff.pos - len);
1882 written = pwrite(cc->wfd, cc->partial, len, cc->begoff.pos - len);
1883 if (written < len)
1884 /* TODO: keep data in a new list of dirty blocks */
1885 return -1;
1886 return 0;
1889 static int
1890 commit_partial(struct commit_control *cc)
1892 size_t partoff, remain, left;
1893 size_t writelen;
1895 partoff = FILE_BLOCK_OFF(cc->begoff.pos);
1896 remain = FILE_BLOCK_SIZE - partoff;
1897 if (remain > cc->endoff.pos - cc->begoff.pos)
1898 remain = cc->endoff.pos - cc->begoff.pos;
1899 if ((writelen = partoff + remain) == 0)
1900 return 0;
1902 BDEBUG("Fill partial %llx-%llx\n",
1903 cc->begoff.pos, cc->begoff.pos + remain);
1905 left = copy_in(cc->file, cc->partial + partoff, remain, &cc->begoff);
1906 if (left) {
1907 hed_move_relative(&cc->begoff, left);
1908 return -1;
1911 if (FILE_BLOCK_OFF(cc->begoff.pos) &&
1912 !hed_block_is_eof(cc->begoff.block))
1913 return 0;
1915 return commit_block(cc, writelen);
1918 /* Commit forwards.
1919 * Beware, cc->begoff is undefined upon return!
1921 static int
1922 commit_forwards(struct commit_control *cc)
1924 hed_uoff_t endpos = cc->endoff.pos;
1925 int ret = 0;
1927 BDEBUG("Writing forwards %llx-%llx\n",
1928 cc->begoff.pos, cc->endoff.pos);
1930 if (!cc->needwrite)
1931 return 0;
1933 while (cc->begoff.pos < endpos)
1934 ret |= commit_partial(cc);
1936 return ret;
1939 /* Commit forwards.
1940 * Beware, cc->begoff is undefined upon return!
1942 static int
1943 commit_backwards(struct commit_control *cc)
1945 void *retpartial = cc->partial;
1946 hed_uoff_t begpos = cc->begoff.pos;
1947 hed_uoff_t blkpos; /* start of current partial block */
1948 int ret = 0;
1950 BDEBUG("Writing backwards %llx-%llx\n",
1951 cc->begoff.pos, cc->endoff.pos);
1953 if (!cc->needwrite)
1954 return 0;
1956 blkpos = FILE_BLOCK_ROUND(cc->endoff.pos);
1957 if (blkpos <= begpos)
1958 goto final;
1960 /* Handle the trailing partial block */
1961 hed_get_cursor(cc->file, blkpos, &cc->begoff);
1962 switch_partial(cc);
1963 ret |= commit_partial(cc);
1964 retpartial = cc->partial;
1966 /* Handle the middle part */
1967 switch_partial(cc);
1968 while ( (blkpos -= FILE_BLOCK_SIZE) > begpos) {
1969 hed_get_cursor(cc->file, blkpos, &cc->begoff);
1970 ret |= commit_partial(cc);
1972 switch_partial(cc); /* wrap around */
1974 final:
1975 /* Handle the first block (partiall or not) */
1976 hed_get_cursor(cc->file, begpos, &cc->begoff);
1977 ret |= commit_partial(cc);
1979 cc->partial = retpartial;
1980 return ret;
1983 /* Handle the partial block before a skipped one. */
1984 static int
1985 begin_skip(struct commit_control *cc)
1987 size_t minsize = FILE_BLOCK_SIZE - FILE_BLOCK_OFF(cc->endoff.pos);
1988 size_t remain;
1989 int ret = 0;
1991 /* Check if at least one complete physical block can be skipped */
1992 if (cc->endoff.block->t.size < minsize)
1993 return 0;
1995 /* Write out the partially dirty block */
1996 remain = FILE_BLOCK_OFF(minsize);
1997 hed_move_relative(&cc->endoff, remain);
1998 if (cc->shift <= 0)
1999 ret |= commit_forwards(cc);
2000 else
2001 ret |= commit_backwards(cc);
2002 hed_move_relative(&cc->endoff, -(hed_off_t)remain);
2003 hed_dup2_cursor(&cc->endoff, &cc->begoff);
2005 cc->needwrite = 0;
2006 return ret;
2009 /* Handle the last partially skipped physical block. */
2010 static int
2011 end_skip(struct commit_control *cc)
2013 size_t partlen;
2014 int ret = 0;
2016 /* Find the beginning of the physical block */
2017 hed_dup2_cursor(&cc->endoff, &cc->begoff);
2018 partlen = FILE_BLOCK_OFF(cc->begoff.pos);
2019 hed_move_relative(&cc->begoff, -(hed_off_t)partlen);
2021 /* Read the partial data before this block */
2022 if (hed_file_cpin(cc->file, cc->partial, partlen, &cc->begoff))
2023 ret = -1;
2025 cc->needwrite = 1;
2026 return ret;
2029 static void
2030 undirty_blocks(struct hed_file *file)
2032 struct file_block *block, *next;
2033 hed_uoff_t pos = 0;
2035 BDEBUG("Undirtying blocks:\n");
2036 dump_blocks(file);
2038 foreachsafe_block (block, next, file_blocks(file)) {
2039 if (kill_block_if_empty(file, block))
2040 continue;
2042 if (!hed_block_is_virtual(block)) {
2043 cache_put(file->cache, block->dataobj);
2044 block->dataobj = NULL;
2045 list_del_init(&block->lru);
2046 block->flags = HED_BLOCK_EXCACHE | HED_BLOCK_VIRTUAL;
2049 block->phys_pos = pos;
2050 pos += block->t.size;
2053 block = first_block(file_blocks(file));
2054 while (!hed_block_is_eof(block)) {
2055 next = next_block(block);
2056 file_kill_block(file, block);
2057 block = next;
2060 BDEBUG("After undirtying\n");
2061 dump_blocks(file);
2064 static int
2065 commit_init(struct commit_control *cc, struct hed_file *file)
2067 cc->file = file;
2069 cc->partbuf = malloc(3*FILE_BLOCK_SIZE);
2070 if (!cc->partbuf)
2071 goto err;
2073 cc->wfd = open(file->name,
2074 O_RDWR | (file->fd < 0 ? O_CREAT : 0), 0666);
2075 if (cc->wfd < 0)
2076 goto err_free;
2078 if (file->fd < 0 &&
2079 (file->fd = open(file->name, O_RDONLY)) < 0)
2080 goto err_close;
2082 return 0;
2084 err_close:
2085 close(cc->wfd);
2086 err_free:
2087 free(cc->partbuf);
2088 err:
2089 return -1;
2093 hed_file_commit(struct hed_file *file)
2095 struct commit_control cc;
2096 int ret = 0;
2098 if (commit_init(&cc, file))
2099 return -1;
2101 dump_blocks(file);
2103 cc.partial = cc.partbuf;
2104 get_cursor(file, 0,&cc.begoff);
2105 hed_dup_cursor(&cc.begoff, &cc.endoff);
2106 cc.shift = -cc.begoff.block->phys_pos;
2107 cc.needwrite = 0;
2108 while(!hed_block_is_eof(cc.endoff.block)) {
2109 hed_off_t newshift = cc.endoff.pos < file->phys_size
2110 ? get_shift(&cc.endoff)
2111 : 0;
2113 if (cc.shift <= 0 && newshift > 0) {
2114 ret |= commit_forwards(&cc);
2115 hed_dup2_cursor(&cc.endoff, &cc.begoff);
2116 } else if (cc.shift > 0 && newshift <= 0) {
2117 ret |= commit_backwards(&cc);
2118 hed_dup2_cursor(&cc.endoff, &cc.begoff);
2120 cc.shift = newshift;
2122 if (!newshift && !hed_block_is_dirty(cc.endoff.block)) {
2123 if (cc.needwrite)
2124 ret |= begin_skip(&cc);
2125 } else if (!cc.needwrite)
2126 ret |= end_skip(&cc);
2128 if (!move_to_next(file, &cc.endoff))
2129 break;
2131 assert(cc.endoff.pos == file_size(file));
2133 if (cc.begoff.pos < file_size(file)) {
2134 if (cc.shift <= 0)
2135 ret |= commit_forwards(&cc);
2136 else
2137 ret |= commit_backwards(&cc);
2140 put_cursor(&cc.begoff);
2141 put_cursor(&cc.endoff);
2143 ftruncate(cc.wfd, file_size(file));
2144 file->phys_size = file_size(file);
2146 ret |= close(cc.wfd);
2147 free(cc.partbuf);
2149 undirty_blocks(file);
2151 if (!ret)
2152 clear_modified(file);
2154 return ret;
2157 #ifdef HED_CONFIG_SWAP
2159 hed_file_write_swap(struct hed_file *file)
2161 return swp_write(file_swp(file));
2164 static int
2165 do_read_swap(struct hed_file *file, struct swp_file *swp, blockoff_t *pos)
2167 struct hed_file *swpfile = swp_private(swp);
2168 struct file_block *cur, block;
2169 hed_uoff_t phys_pos;
2171 if (file_stat(swpfile)->st_size != file_stat(file)->st_size ||
2172 file_stat(swpfile)->st_mtime != file_stat(file)->st_mtime) {
2173 fprintf(stderr, "stat info mismatch (you modified the file since hed ran on it; refusing to touch it)\n");
2174 return -1;
2177 BDEBUG("Swap header match\n");
2179 phys_pos = 0;
2180 for (cur = first_block(file_blocks(swpfile));
2181 cur != swpfile->null_block; cur = next_block(&block)) {
2182 struct hed_block_data dataobj;
2183 size_t (*mergefn)(struct hed_file*, blockoff_t*,
2184 unsigned char*, size_t);
2185 void *data;
2186 size_t res;
2188 if (swp_cpin(swp, &block, cur, sizeof(struct file_block))) {
2189 perror("Cannot read block descriptor");
2190 return -1;
2192 BDEBUG("BLOCK %p: flags %02lx phys 0x%02llx size 0x%llx\n",
2193 cur, block.flags, (long long)block.phys_pos,
2194 (long long)hed_block_size(&block));
2196 if (block.phys_pos - phys_pos) {
2197 if (hed_file_erase_block(file, pos,
2198 block.phys_pos - phys_pos)) {
2199 perror("Cannot erase");
2200 return -1;
2202 phys_pos = block.phys_pos;
2205 if (!hed_block_is_inserted(&block))
2206 phys_pos += hed_block_size(&block);
2208 if (!hed_block_is_dirty(&block)) {
2209 hed_move_relative(pos, hed_block_size(&block));
2210 continue;
2213 if (swp_cpin(swp, &dataobj, block.dataobj,
2214 sizeof(struct hed_block_data))) {
2215 perror("Cannot read data descriptor");
2216 return -1;
2218 BDEBUG("DATA %p: size 0x%lx\n",
2219 block.dataobj, (long)dataobj.size);
2221 if (! (data = malloc(hed_block_size(&block))) ) {
2222 perror("Cannot allocate data");
2223 return -1;
2226 if (swp_cpin(swp, data, dataobj.data + block.dataoff,
2227 hed_block_size(&block))) {
2228 perror("Cannot read data");
2229 return -1;
2232 mergefn = hed_block_is_inserted(&block)
2233 ? hed_file_insert_once
2234 : hed_file_set_block;
2235 res = mergefn(file, pos, data, hed_block_size(&block));
2236 free(data);
2237 if (res) {
2238 perror("Cannot merge data");
2239 return -1;
2243 dump_blocks(file);
2244 return 0;
2248 hed_file_read_swap(struct hed_file *file)
2250 struct swp_file *swp;
2251 blockoff_t pos;
2252 int ret;
2254 if (! (swp = swp_init_read(file->swpname)) )
2255 return -1;
2257 get_cursor(file, 0, &pos);
2258 ret = do_read_swap(file, swp, &pos);
2259 put_cursor(&pos);
2261 swp_done(swp);
2262 return ret;
2265 #endif /* HED_CONFIG_SWAP */
2267 struct ffb_hookdata {
2268 struct hed_file *file;
2269 blockoff_t *pos;
2270 hed_expr_reg_cb base_ecb;
2271 void *base_ecb_data;
2274 static long
2275 eval_reg_cb(void *hookdata, char reg, hed_off_t ofs,
2276 unsigned char *scramble, size_t len)
2278 struct ffb_hookdata *data = hookdata;
2279 if (reg == '.') {
2280 blockoff_t pos;
2281 long ret = HED_AEF_DYNAMIC;
2283 hed_dup_cursor(data->pos, &pos);
2284 hed_move_relative(&pos, ofs);
2285 if (copy_in(data->file, scramble, len, &pos))
2286 ret = HED_AEF_ERROR;
2287 put_cursor(&pos);
2288 return ret;
2291 return data->base_ecb(data->base_ecb_data, reg, ofs, scramble, len);
2294 static void
2295 reverse(unsigned char *p, size_t len)
2297 unsigned char *q = p + len;
2298 while (p < q) {
2299 unsigned char x = *p;
2300 *p++ = *--q;
2301 *q = x;
2305 static void
2306 compute_badchar(ssize_t *badchar, const unsigned char *s, ssize_t len)
2308 size_t i = 1;
2309 while (i < len)
2310 badchar[*s++] = i++;
2313 static void
2314 compute_sfx(ssize_t *sfx, const unsigned char *s, ssize_t len)
2316 ssize_t f, g, i;
2318 sfx[len - 1] = len;
2319 g = len - 1;
2320 for (i = len - 2; i >= 0; --i) {
2321 if (i > g && sfx[i + len - 1 - f] < i - g)
2322 sfx[i] = sfx[i + len - 1 - f];
2323 else {
2324 if (i < g)
2325 g = i;
2326 f = i;
2327 while (g >= 0 && s[g] == s[g + len - 1 - f])
2328 --g;
2329 sfx[i] = f - g;
2334 static void
2335 compute_goodsfx(ssize_t *goodsfx, const unsigned char *s, ssize_t len)
2337 ssize_t i, j, *sfx = goodsfx + len;
2339 compute_sfx(sfx, s, len);
2341 for (i = 0; i < len; ++i)
2342 goodsfx[i] = len;
2343 j = 0;
2344 for (i = len - 1; i >= 0; --i)
2345 if (sfx[i] == i + 1)
2346 for (; j < len - 1 - i; ++j)
2347 if (goodsfx[j] == len)
2348 goodsfx[j] = len - 1 - i;
2349 for (i = 0; i <= len - 2; ++i)
2350 goodsfx[len - 1 - sfx[i]] = len - 1 - i;
2353 /* Search for a constant byte string using the Boyer-Moore algorithm. */
2354 static inline unsigned char*
2355 bm_find(unsigned char *buf, size_t buflen, unsigned char *needle,
2356 size_t maxidx, ssize_t *badchar, ssize_t *goodsfx)
2358 while (buflen > maxidx) {
2359 unsigned char *p;
2360 size_t i;
2361 ssize_t shift;
2363 for (p = buf + maxidx, i = maxidx; p >= buf; --p, --i)
2364 if (needle[i] != *p)
2365 break;
2366 if (p < buf)
2367 return buf;
2369 shift = i + 1 - badchar[*p];
2370 if (shift < goodsfx[i])
2371 shift = goodsfx[i];
2373 buf += shift;
2374 buflen -= shift;
2376 return NULL;
2379 /* Search for a constant byte string backwards. */
2380 static inline unsigned char*
2381 bm_find_rev(unsigned char *buf, size_t buflen, unsigned char *needle,
2382 size_t maxidx, ssize_t *badchar, ssize_t *goodsfx)
2384 buf -= maxidx;
2385 while (buflen > maxidx) {
2386 unsigned char *p;
2387 size_t i;
2388 ssize_t shift;
2390 for (p = buf, i = maxidx; p <= buf + maxidx; ++p, --i)
2391 if (needle[i] != *p)
2392 break;
2393 if (p > buf + maxidx)
2394 return buf;
2396 shift = i + 1 - badchar[*p];
2397 if (shift < goodsfx[i])
2398 shift = goodsfx[i];
2400 buf -= shift;
2401 buflen -= shift;
2403 return NULL;
2406 /* Search for a constant byte string in @buf.
2407 * If @buflen is negative, search backwards, otherwise search forwards.
2409 static inline unsigned char*
2410 find_bytestr_buf(unsigned char *buf, ssize_t buflen,
2411 unsigned char *needle, ssize_t maxidx,
2412 ssize_t *badchar, ssize_t *goodsfx)
2414 if (buflen < 0) {
2415 buflen = -buflen;
2416 if (!maxidx)
2417 return memrchr(buf - buflen + 1, *needle, buflen);
2418 return bm_find_rev(buf, buflen, needle, maxidx,
2419 badchar, goodsfx);
2420 } else {
2421 if (!maxidx)
2422 return memchr(buf, *needle, buflen);
2423 return bm_find(buf, buflen, needle, maxidx,
2424 badchar, goodsfx);
2428 /* Search for a constant byte string using the Boyer-Moore algorithm. */
2429 static int
2430 find_bytestr(struct hed_file *file, blockoff_t *from, int dir,
2431 unsigned char *needle, ssize_t len)
2433 void *dynalloc;
2434 ssize_t *badchar, *goodsfx;
2435 unsigned char *readbuf;
2436 ssize_t slen;
2437 int ret;
2439 if (len > 1) {
2440 dynalloc = calloc(sizeof(ssize_t) * (256 + 2*len)
2441 + 2*(len-1), 1);
2442 if (!dynalloc)
2443 return HED_FINDOFF_ERROR;
2444 badchar = dynalloc;
2445 goodsfx = badchar + 256;
2446 readbuf = dynalloc + sizeof(ssize_t) * (256 + 2*len);
2448 if (dir < 0)
2449 reverse(needle, len);
2450 compute_badchar(badchar, needle, len);
2451 compute_goodsfx(goodsfx, needle, len);
2452 } else {
2453 dynalloc = NULL;
2454 badchar = goodsfx = NULL;
2455 readbuf = NULL;
2458 --len; /* simplify offset computing */
2459 if (dir < 0) {
2460 from->off += len;
2461 from->pos += len;
2462 slen = -len;
2463 } else
2464 slen = len;
2466 ret = HED_FINDOFF_NO_MATCH;
2467 while (from->pos >= 0) {
2468 ssize_t remain;
2470 fixup_blockoff(from);
2471 if (hed_block_is_eof(from->block))
2472 break;
2474 remain = hed_prepare_read(file, from, SSIZE_MAX);
2475 if (!remain) {
2476 ret = HED_FINDOFF_ERROR;
2477 break;
2479 if (dir < 0)
2480 remain = -(from->off + 1);
2482 if (!hed_block_is_bad(from->block)) {
2483 unsigned char *p, *q;
2485 if ((dir >= 0 && remain > slen) ||
2486 (dir < 0 && remain < slen)) {
2487 assert(!hed_block_is_virtual(from->block));
2488 assert(from->block->dataobj);
2489 p = from->block->dataobj->data + from->off;
2490 from->off += remain;
2491 from->pos += remain;
2492 } else if (dir >= 0) {
2493 remain += slen;
2494 if (copy_in(file, readbuf, remain, from)) {
2495 ret = HED_FINDOFF_ERROR;
2496 break;
2498 p = readbuf;
2499 } else {
2500 remain += slen;
2501 from->off += remain + 1;
2502 from->pos += remain + 1;
2503 if (from->pos < 0)
2504 break;
2505 fixup_blockoff_slow(from);
2506 if (copy_in(file, readbuf, -remain, from)) {
2507 ret = HED_FINDOFF_ERROR;
2508 break;
2510 from->off -= -remain + 1;
2511 from->pos -= -remain + 1;
2512 p = readbuf + (-remain - 1);
2515 q = find_bytestr_buf(p, remain, needle, len,
2516 badchar, goodsfx);
2517 if (q) {
2518 move_rel_fast(from, q - p - remain);
2519 ret = 0;
2520 break;
2522 from->off -= slen;
2523 from->pos -= slen;
2524 } else {
2525 /* bad blocks cannot match anything */
2526 from->off += remain;
2527 from->pos += remain;
2531 if (dynalloc)
2532 free(dynalloc);
2533 return ret;
2536 static int
2537 find_expr(struct hed_file *file, blockoff_t *from, int dir,
2538 struct hed_expr *expr, struct ffb_hookdata *data)
2540 int len = hed_expr_len(expr);
2541 unsigned char *buf;
2543 assert(len > 0);
2545 if (len > file_size(file))
2546 return HED_FINDOFF_NO_MATCH;
2547 if ((hed_off_t)file_size(file) - from->pos - len < 0)
2548 hed_move_relative(from,
2549 (hed_off_t)file_size(file) - from->pos - len);
2551 for (;;) {
2552 blockoff_t match;
2553 size_t remain;
2554 unsigned char *p;
2555 int pos;
2557 buf = hed_expr_eval(expr, eval_reg_cb, NULL, data);
2558 if (!buf)
2559 return HED_FINDOFF_ERROR;
2561 hed_dup_cursor(from, &match);
2562 remain = 0;
2563 for (pos = 0; pos < len; pos++) {
2564 if (!remain) {
2565 remain = hed_prepare_read(file, &match,
2566 SIZE_MAX);
2567 if (!remain ||
2568 hed_block_is_bad(match.block))
2569 break;
2570 p = hed_cursor_data(&match);
2571 cursor_next_block(file, &match);
2573 if (*p++ != buf[pos])
2574 break;
2575 remain--;
2577 put_cursor(&match);
2579 if (pos == len)
2580 return 0;
2581 if (!remain)
2582 return HED_FINDOFF_ERROR;
2584 from->pos += dir;
2585 from->off += dir;
2586 if (0 > from->pos || from->pos > file_size(file) - len)
2587 break;
2588 fixup_blockoff(from);
2590 if (! (hed_expr_flags(expr) & HED_AEF_DYNAMIC) )
2591 return find_bytestr(file, from, dir, buf, len);
2594 return HED_FINDOFF_NO_MATCH;
2598 hed_file_find_expr(struct hed_file *file, blockoff_t *pos, int dir,
2599 struct hed_expr *expr,
2600 hed_expr_reg_cb expr_cb, void *expr_cb_data)
2602 struct ffb_hookdata data;
2603 int res;
2605 assert(dir == 1 || dir == -1);
2607 data.file = file;
2608 data.pos = pos;
2609 data.base_ecb = expr_cb;
2610 data.base_ecb_data = expr_cb_data;
2612 hed_file_set_readahead(file,
2613 dir > 0 ? HED_RA_FORWARD : HED_RA_BACKWARD);
2614 res = find_expr(file, pos, dir, expr, &data);
2615 hed_file_set_readahead(file, HED_RA_NONE);
2617 return res;