Fix hed expression length in find_bytestr()
[hed.git] / libhed / file.c
blob3b3d775fabbd43ca89034bfa45e0bff48667473b
1 /* $Id$ */
3 /*
4 * hed - Hexadecimal editor
5 * Copyright (C) 2004 Petr Baudis <pasky@ucw.cz>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of version 2 of the GNU General Public License as
9 * published by the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * There hammer on the anvil smote,
23 * There chisel clove, and graver wrote;
24 * There forged was blade, and bound was hilt;
25 * The delver mined, the mason built.
26 * There beryl, pearl, and opal pale,
27 * And metal wrought like fishes' mail,
28 * Buckler and corslet, axe and sword,
29 * And shining spears were laid in hoard.
32 /* Feature macros needed for:
33 * - memrchr
34 * - pread, pwrite
35 * - stpcpy
37 #define _GNU_SOURCE
39 #include <config.h>
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <limits.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <sys/ioctl.h>
48 #include <unistd.h>
49 #include <linux/fs.h> /* BLKGETSIZE and BLKGETSIZE64 */
51 #include "file.h"
52 #include "access.h"
53 #include "cache.h"
54 #include "swap.h"
55 #include "tree.h"
56 #include "expr.h"
58 /* memrchr() might not be available */
59 #ifndef HAVE_MEMRCHR
60 # include "memrchr.c"
61 #endif /* HAVE_MEMRCHR */
64 * `Piles of jewels?' said Gandalf. `No. The Orcs have often plundered Moria;
65 * there is nothing left in the upper halls. And since the dwarves fled, no one
66 * dares to seek the shafts and treasuries down in the deep places: they are
67 * drowned in water--or in a shadow of fear.'
70 /* TODO: Currently the file blocks allocation is not very sophisticated and
71 * when the weather is bad it could probably have rather horrible results. */
73 #undef BLOCKS_DEBUG
74 #ifdef BLOCKS_DEBUG
75 #define BDEBUG(x...) fprintf(stderr, x)
76 #else
77 #define BDEBUG(x...)
78 #endif
80 /* Number of blocks in cache */
81 #define CACHE_LENGTH 64
83 /* Blocks for readahead */
84 #define FILE_READAHEAD (CACHE_LENGTH/2)
86 #define first_block(f) next_block(last_block(f))
87 #define prev_block(b) (tree_entry(prev_in_tree(&(b)->t),struct hed_block,t))
88 #define next_block(b) (tree_entry(next_in_tree(&(b)->t),struct hed_block,t))
89 #define last_block(f) (&(f)->eof_block)
91 #define block_offset(b) tree_block_offset(&(b)->t)
93 #define recalc_block_recursive(b) recalc_node_recursive(&(b)->t)
95 #define chain_block_before(tree,b,p) insert_into_tree((tree), &(b)->t, &(p)->t)
96 #define recalc_chain_block_before(tree,b,p) do { \
97 chain_block_before((tree), (b), (p)); \
98 recalc_block_recursive((b)); \
99 } while (0)
101 #define unchain_block(tree,b) del_from_tree((tree), &(b)->t)
102 #define recalc_unchain_block(tree,b) recalc_del_from_tree((tree), &(b)->t)
104 #define init_block_list(tree,b) init_tree(tree, &(b)->t)
105 #define init_block_link(b) init_node(&(b)->t)
107 #define find_block(tree,o) tree_entry(find_in_tree((tree),(o)),struct hed_block,t)
109 #ifdef HED_CONFIG_SWAP
111 /* Return the swp file object */
112 static inline struct swp_file *
113 file_swp(struct hed_file *file)
115 return file->swp;
118 #else
120 /* Provide a stub for the non-swap case */
121 static inline void *
122 file_swp(struct hed_file *file)
124 return NULL;
127 #endif
129 #ifdef HED_CONFIG_READAHEAD
131 #define file_ra_none(f) ((f)->readahead == HED_RA_NONE)
132 #define file_ra_forward(f) ((f)->readahead == HED_RA_FORWARD)
133 #define file_ra_backward(f) ((f)->readahead == HED_RA_BACKWARD)
135 #else
137 #define file_ra_none(f) (1)
138 #define file_ra_forward(f) (0)
139 #define file_ra_backward(f) (0)
141 #endif /* HED_CONFIG_READAHEAD */
143 /* Get the physical offset of the byte immediately following @block. */
144 static inline hed_uoff_t
145 phys_end(const struct hed_block *block)
147 return hed_block_is_inserted(block)
148 ? block->phys_pos
149 : block->phys_pos + hed_block_size(block);
152 static struct hed_block *
153 next_nonzero_block(struct hed_block *block)
155 while (!hed_block_is_eof(block)) {
156 block = next_block(block);
157 if (hed_block_size(block))
158 return block;
160 return NULL;
163 static struct hed_block *
164 prev_nonzero_block(struct hed_block *block)
166 do {
167 block = prev_block(block);
168 if (hed_block_is_eof(block))
169 return NULL;
170 } while (!hed_block_size(block));
171 return block;
174 bool
175 hed_block_is_after_erase(struct hed_block *block)
177 struct hed_block *prev = prev_nonzero_block(block);
178 return prev
179 ? block->phys_pos > phys_end(prev)
180 : !!block->phys_pos;
183 bool
184 hed_block_is_after_insert(struct hed_block *block)
186 struct hed_block *prev = prev_nonzero_block(block);
187 return prev && hed_block_is_inserted(prev);
190 #ifndef BLOCKS_DEBUG
191 # define dump_blocks(file) {}
192 #else
194 static hed_uoff_t
195 block_phys_size(struct hed_file *file, struct hed_block *block)
197 struct hed_block *next;
199 if (hed_block_is_eof(block))
200 return 0;
201 next = next_block(block);
202 return next->phys_pos - block->phys_pos;
205 static void
206 dump_block(int level, struct hed_file *file, struct hed_tree_node *node,
207 hed_uoff_t *cur_offset, hed_uoff_t *cur_poffset)
209 struct hed_block *block = tree_entry(node, struct hed_block, t);
210 bool virtual = hed_block_is_virtual(block);
211 unsigned char *p;
212 hed_cursor_t *cur;
213 char t[21] = " ";
215 if (node->left)
216 dump_block(level + 1, file, node->left, cur_offset, cur_poffset);
217 p = hed_block_data(block);
218 if (level < 20) t[level] = '>'; else t[19] = '.';
219 fprintf(stderr, "%s [%06llx] [%06llx] %c%c%c %05llx %05llx"
220 " {%02x%02x%02x%02x} -- %p ^%p [%06llx]\n",
222 (unsigned long long) *cur_offset,
223 (unsigned long long) *cur_poffset,
224 virtual ? 'v' : ' ',
225 hed_block_is_inserted(block) ? 'i' : ' ',
226 hed_block_is_dirty(block) ? '*' : ' ',
227 (unsigned long long) node->size,
228 (unsigned long long) block_phys_size(file, block),
229 p && block->t.size > 0 ? p[0] : 0,
230 p && block->t.size > 1 ? p[1] : 0,
231 p && block->t.size > 2 ? p[2] : 0,
232 p && block->t.size > 3 ? p[3] : 0,
233 node, node->up,
234 (unsigned long long) node->cover_size
236 list_for_each_entry (cur, &block->refs, list) {
237 fprintf(stderr, " <%p>: %llx->%p:%llx\n",
238 cur, (long long)cur->pos,
239 cur->block, (unsigned long long)cur->off);
241 assert(*cur_poffset == block->phys_pos);
242 *cur_offset += node->size;
243 *cur_poffset += block_phys_size(file, block);
244 if (node->right)
245 dump_block(level + 1, file, node->right, cur_offset, cur_poffset);
246 assert(node->cover_size == (node->left ? node->left->cover_size : 0)
247 + (node->right ? node->right->cover_size : 0)
248 + node->size);
251 /* Walk the tree manually here, because foreach_block() does not provide
252 * the tree structure.
253 * TODO: Change this if you plan to debug any other block containers.
255 static void
256 dump_blocks(struct hed_file *file)
258 struct hed_block *first = first_block(file);
259 hed_uoff_t cur_offset, cur_poffset;
261 fprintf(stderr, "-- blocks dump --\n");
262 cur_offset = 0;
263 cur_poffset = first->phys_pos;
264 dump_block(0, file, hed_file_blocks(file)->root,
265 &cur_offset, &cur_poffset);
266 fprintf(stderr, "-- blocks dump end --\n");
268 #endif
270 static void
271 get_cursor(struct hed_file *file, hed_uoff_t offset, hed_cursor_t *curs)
273 struct hed_block *block;
275 block = find_block(hed_file_blocks(file), offset);
276 assert(block != NULL);
277 curs->pos = offset;
278 curs->block = block;
279 curs->off = offset - block_offset(block);
280 list_add(&curs->list, &block->refs);
282 BDEBUG("Mapped %llx to %llx+%llx/%llx\n",
283 offset, offset - curs->off, curs->off, block->t.size);
286 void
287 hed_get_cursor(struct hed_file *file, hed_uoff_t offset, hed_cursor_t *curs)
289 get_cursor(file, offset, curs);
292 static inline void
293 put_cursor(hed_cursor_t *curs)
295 list_del(&curs->list);
298 void
299 hed_put_cursor(hed_cursor_t *curs)
301 put_cursor(curs);
304 void
305 hed_update_cursor(struct hed_file *file, hed_uoff_t offset, hed_cursor_t *curs)
307 put_cursor(curs);
308 get_cursor(file, offset, curs);
311 void
312 hed_dup_cursor(const hed_cursor_t *src, hed_cursor_t *dst)
314 dst->pos = src->pos;
315 dst->block = src->block;
316 dst->off = src->off;
317 list_add_tail(&dst->list, &src->block->refs);
320 void
321 hed_dup2_cursor(const hed_cursor_t *src, hed_cursor_t *dst)
323 if (hed_is_a_cursor(dst))
324 put_cursor(dst);
325 hed_dup_cursor(src, dst);
328 /* Move cursors from @old to @new, adding @off to their block
329 * offsets to keep them at the same position. */
330 static void
331 update_cursors(const struct hed_block *old, struct hed_block *new,
332 hed_off_t off)
334 hed_cursor_t *curs;
336 BDEBUG("Updating cursors from <%p> to <%p>%c%llx\n",
337 old, new, off >= 0 ? '+' : '-', off >= 0 ? off : -off);
339 list_for_each_entry(curs, &old->refs, list) {
340 curs->block = new;
341 curs->off += off;
345 /* Move cursors in the range <@start;@end> from @old to @new,
346 * adding @off to their block offset, plus moving the reference list. */
347 static void
348 move_cursors(const struct hed_block *old, struct hed_block *new,
349 hed_uoff_t start, hed_uoff_t end, hed_off_t off)
351 hed_cursor_t *curs, *nextcurs;
353 BDEBUG("Moving cursors from <%p>:%llx:%llx to <%p>%c%llx\n",
354 old, start, end, new,
355 off >= 0 ? '+' : '-', off >= 0 ? off : -off);
357 list_for_each_entry_safe(curs, nextcurs, &old->refs, list)
358 if (curs->off >= start && curs->off <= end) {
359 curs->block = new;
360 curs->off += off;
361 list_move(&curs->list, &new->refs);
365 /* Move cursors in the range @block:<@start;@end> to @newpos */
366 static void
367 move_cursors_abs(const struct hed_block *block,
368 hed_uoff_t start, hed_uoff_t end,
369 const hed_cursor_t *newpos)
371 hed_cursor_t *curs, *nextcurs;
373 BDEBUG("Moving cursors from <%p>:%llx:%llx to <%p>:%llx\n",
374 block, start, end, newpos->block, newpos->off);
376 list_for_each_entry_safe(curs, nextcurs, &block->refs, list)
377 if (curs->off >= start && curs->off <= end) {
378 curs->pos = newpos->pos;
379 curs->block = newpos->block;
380 curs->off = newpos->off;
381 list_move(&curs->list, &newpos->block->refs);
385 /* Update the positions of cursors at and after @start for all
386 * blocks starting at @block */
387 static void
388 slide_cursors(struct hed_file *file, const struct hed_block *block,
389 hed_uoff_t start, hed_off_t off)
391 hed_cursor_t *curs;
392 const struct hed_block *nblock;
394 BDEBUG("Sliding cursors >= %llx by %c%llx, starting at <%p>\n",
395 start, off >= 0 ? '+' : '-', off >= 0 ? off : -off, block);
396 nblock = block;
397 do {
398 block = nblock;
399 list_for_each_entry(curs, &block->refs, list)
400 if (curs->pos >= start)
401 curs->pos += off;
402 nblock = next_block(block);
403 } while (!hed_block_is_eof(block));
406 static struct hed_block *
407 new_block(struct hed_file *file, long flags)
409 struct hed_block *new;
411 if (! (new = swp_zalloc(file_swp(file), sizeof(struct hed_block))) )
412 return NULL;
414 new->flags = flags;
415 init_block_link(new);
416 INIT_LIST_HEAD(&new->refs);
417 if (flags & HED_BLOCK_EXCACHE)
418 INIT_LIST_HEAD(&new->lru);
419 else
420 list_add_tail(&new->lru, &file->lru);
422 return new;
425 static struct hed_block *
426 new_virt_block(struct hed_file *file, hed_uoff_t pos, hed_uoff_t size,
427 long extraflags)
429 struct hed_block *new =
430 new_block(file, (HED_BLOCK_EXCACHE |
431 HED_BLOCK_VIRTUAL |
432 extraflags));
433 if (!new)
434 return NULL;
436 new->t.size = size;
437 new->phys_pos = pos;
438 BDEBUG("Spawned new virtual block [%llx] at %llx\n", size, pos);
439 return new;
442 static struct hed_block *
443 new_data_block(struct hed_file *file, hed_uoff_t pos, hed_uoff_t size,
444 struct hed_block_data *dataobj)
446 struct hed_block *new =
447 new_block(file, 0);
448 if (!new)
449 return NULL;
451 cache_get(dataobj);
452 new->dataobj = dataobj;
453 new->t.size = size;
454 new->phys_pos = pos;
455 new->dataoff = FILE_BLOCK_OFF(pos);
456 BDEBUG("Spawned new data block [%llx] at %llx\n", size, pos);
457 return new;
460 static void
461 file_free_block(struct hed_file *file, struct hed_block *block)
463 if (block->dataobj)
464 cache_put(file->cache, block->dataobj);
465 list_del(&block->lru);
467 swp_free(file_swp(file), block);
470 static bool
471 kill_block_if_empty(struct hed_file *file, struct hed_block *block)
473 if (!hed_block_is_eof(block) && block->t.size == 0 &&
474 list_empty(&block->refs)) {
475 /* No recalculation needed, zero size. */
476 unchain_block(hed_file_blocks(file), block);
477 file_free_block(file, block);
478 return true;
480 return false;
483 /* This may kill the previous block as well, if it can be merged
484 * with the next one. It will never kill anything which _follows_. */
485 static void
486 file_kill_block(struct hed_file *file, struct hed_block *block)
488 hed_uoff_t phys_pos = block->phys_pos;
489 struct hed_block *prev = prev_block(block);
490 struct hed_block *next = next_block(block);
491 struct hed_block *merger;
492 bool killprev = false;
494 /* We should never kill a dirty block! */
495 assert(!hed_block_is_dirty(block));
496 /* We shouldn't get with an empty block here (that might
497 * need special considerations with virtualization). */
498 assert(block->t.size > 0);
500 if (!hed_block_is_eof(block) &&
501 hed_block_is_inner_virtual(next) &&
502 phys_pos + block->t.size == next->phys_pos) {
503 if (!hed_block_is_eof(prev) &&
504 hed_block_is_inner_virtual(prev) &&
505 prev->phys_pos + prev->t.size == phys_pos)
506 killprev = true;
507 merger = next;
508 merger->phys_pos -= block->t.size;
509 update_cursors(merger, merger, block->t.size);
510 update_cursors(block, merger, 0);
511 } else if (!hed_block_is_eof(prev) &&
512 hed_block_is_inner_virtual(prev) &&
513 prev->phys_pos + prev->t.size == phys_pos) {
514 merger = prev;
515 update_cursors(block, merger, merger->t.size);
516 } else if (!hed_block_is_virtual(block)) {
517 /* Convert physical to virtual */
518 assert(block->dataobj);
519 cache_put(file->cache, block->dataobj);
520 block->dataobj = NULL;
522 list_del_init(&block->lru); /* unlink the block from LRU */
523 hed_block_set_excache(block); /* say it's unlinked */
524 hed_block_set_virtual(block);
525 return;
526 } else
527 /* Already virtual and cannot merge */
528 return;
530 list_splice(&block->refs, &merger->refs);
532 /* Messing with block sizes and unchaining is a bit tricky
533 * since unchain_block() can splay(). So we really need
534 * to recalc_block_recursive() right after we update the size.
535 * If this place turns out to be a hot-spot, we can optimize
536 * the tree operations here. */
537 merger->t.size += block->t.size;
538 recalc_block_recursive(merger);
540 /* Destroy the block */
541 recalc_unchain_block(hed_file_blocks(file), block);
542 file_free_block(file, block);
544 if (killprev)
545 file_kill_block(file, prev);
548 static struct hed_block *
549 split_block(struct hed_file *file, struct hed_block *block,
550 hed_uoff_t splitpoint)
552 struct hed_block *head;
554 head = new_block(file, block->flags & ~HED_BLOCK_EOF);
555 if (!head)
556 return NULL;
558 if ( (head->dataobj = block->dataobj) ) {
559 cache_get(head->dataobj);
560 head->dataoff = block->dataoff;
561 block->dataoff += splitpoint;
562 } else
563 assert(hed_block_is_virtual(block));
565 head->t.size = splitpoint;
566 head->phys_pos = block->phys_pos;
568 block->t.size -= splitpoint;
569 if (!hed_block_is_inserted(block))
570 block->phys_pos += splitpoint;
571 recalc_block_recursive(block);
572 recalc_chain_block_before(hed_file_blocks(file), head, block);
574 move_cursors(block, head, 0, splitpoint - 1, 0);
575 update_cursors(block, block, -splitpoint);
577 return head;
580 /* Replace a chunk in @block with @newblock */
581 static int
582 replace_chunk(struct hed_file *file, struct hed_block *block,
583 hed_uoff_t offset, struct hed_block *newblock)
585 size_t len = newblock->t.size;
586 hed_uoff_t leadlen = offset + len;
588 assert(offset < block->t.size);
589 assert(len <= block->t.size - offset);
591 /* Re-create the head block if necessary */
592 if (offset) {
593 struct hed_block *head;
595 head = new_block(file, block->flags & ~HED_BLOCK_EOF);
596 if (!head)
597 return -1;
598 head->t.size = offset;
599 head->dataobj = block->dataobj;
600 head->dataoff = block->dataoff;
601 head->phys_pos = block->phys_pos;
603 recalc_chain_block_before(hed_file_blocks(file),
604 head, block);
606 /* Move cursors to the head */
607 move_cursors(block, head, 0, offset - 1, 0);
610 /* Move pointers to the new block */
611 assert(leadlen > 0);
612 move_cursors(block, newblock, offset, leadlen - 1, -offset);
614 /* Shorten the tail block */
615 block->t.size -= leadlen;
616 block->dataoff += leadlen;
617 assert(!hed_block_is_inserted(block));
618 block->phys_pos += leadlen;
619 recalc_block_recursive(block);
620 update_cursors(block, block, -leadlen);
622 /* Insert the new block */
623 recalc_chain_block_before(hed_file_blocks(file), newblock, block);
625 /* Kill the leading block if possible */
626 kill_block_if_empty(file, block);
628 return 0;
631 #ifdef HED_CONFIG_SWAP
633 static char *
634 swp_filename(const char *filename)
636 size_t fnlen = strlen(filename);
637 char *swp;
638 char *file;
640 if (!(swp = malloc(fnlen + 9)) )
641 return NULL;
642 strcpy(swp, filename);
644 file = strrchr(swp, '/');
645 file = file ? file + 1 : swp;
646 *file = '.';
647 strcpy(stpcpy(file + 1, filename + (file -swp)), ".hedswp");
648 return swp;
651 static char *
652 newswp_filename(char *swpname)
654 char *ret, *p;
656 ret = swpname;
657 while (!access(ret, F_OK)) {
658 if (ret == swpname) {
659 if (! (ret = strdup(swpname)) )
660 return NULL;
661 p = ret + strlen(ret) - 1;
664 if (*p == 'a') {
665 free(ret);
666 return NULL;
668 --*p;
670 return ret;
674 hed_file_remove_swap(struct hed_file *file)
676 if (remove(file->swpname))
677 return -1;
678 if (rename(file->newswpname, file->swpname))
679 return -1;
681 free(file->newswpname);
682 file->newswpname = file->swpname;
683 return 0;
686 static inline struct hed_file *
687 file_swp_init(const char *name)
689 char *swpname, *newswpname;
690 struct swp_file *swp;
691 struct hed_file *file;
693 swpname = swp_filename(name);
694 if (!swpname)
695 goto fail;
696 newswpname = newswp_filename(swpname);
697 if (!newswpname)
698 goto fail_free_name;
699 swp = swp_init_write(newswpname);
700 if (!swp)
701 goto fail_free_newname;
703 assert(sizeof(struct swp_header) + sizeof(struct hed_file)
704 <= FILE_BLOCK_SIZE);
705 file = swp_private(swp);
706 memset(file, 0, sizeof *file);
708 file->swp = swp;
709 file->swpname = swpname;
710 file->newswpname = newswpname;
712 return file;
714 fail_free_newname:
715 free(newswpname);
716 fail_free_name:
717 if (swpname != newswpname)
718 free(swpname);
719 fail:
720 return NULL;
723 static inline void
724 file_swp_done(struct hed_file *file)
726 remove(file->newswpname);
727 if (file->newswpname != file->swpname)
728 free(file->newswpname);
729 free(file->swpname);
730 swp_done(file_swp(file));
731 /* file was de-allocated automatically with file->swp */
734 #else /* HED_CONFIG_SWAP */
736 static inline struct hed_file *
737 file_swp_init(const char *name)
739 return calloc(1, sizeof(struct hed_file));
742 static inline void
743 file_swp_done(struct hed_file *file)
745 free(file);
748 #endif /* HED_CONFIG_SWAP */
750 static inline struct stat *
751 file_stat(struct hed_file *file)
753 return &file->s;
757 hed_file_update_size(struct hed_file *file)
759 hed_uoff_t oldsize = file->phys_size;
761 if(fstat(file->fd, file_stat(file)) < 0)
762 return -1;
764 if (S_ISBLK(file_stat(file)->st_mode)) {
765 if (ioctl(file->fd, BLKGETSIZE64, &file->phys_size)) {
766 unsigned long size_in_blocks;
767 if (ioctl(file->fd, BLKGETSIZE, &size_in_blocks))
768 return -1;
769 file->phys_size = (hed_uoff_t)size_in_blocks << 9;
771 } else if (S_ISREG(file_stat(file)->st_mode)) {
772 file->phys_size = file_stat(file)->st_size;
773 } else if (S_ISCHR(file_stat(file)->st_mode)) {
774 if (lseek(file->fd, 0, SEEK_SET) < 0)
775 return -1;
776 file->phys_size = (hed_uoff_t)OFF_MAX + 1;
777 } else {
778 errno = EINVAL;
779 return -1;
782 file->size += file->phys_size - oldsize;
783 return 0;
786 static int
787 do_file_open(struct hed_file *file)
789 file->fd = open(file->name, O_RDONLY);
790 if (file->fd < 0) {
791 if (errno != ENOENT)
792 return errno;
793 fprintf(stderr, "Warning: File %s does not exist\n",
794 file->name);
795 memset(file_stat(file), 0, sizeof(struct stat));
797 } else if (hed_file_update_size(file)) {
798 int errsv = errno;
799 close(file->fd);
800 return errsv;
802 return 0;
805 static int
806 file_setup_blocks(struct hed_file *file)
808 hed_uoff_t phys_size = file->phys_size;
809 struct hed_block *block;
811 block = &file->eof_block;
812 block->flags = HED_BLOCK_EXCACHE | HED_BLOCK_VIRTUAL | HED_BLOCK_EOF;
813 INIT_LIST_HEAD(&block->lru);
814 INIT_LIST_HEAD(&block->refs);
815 block->t.size = OFF_MAX - phys_size + 1;
816 block->phys_pos = phys_size;
818 init_block_list(hed_file_blocks(file), block);
820 if (phys_size) {
821 block = new_virt_block(file, 0, phys_size, 0);
822 if (!block)
823 return -1;
824 recalc_chain_block_before(hed_file_blocks(file), block,
825 &file->eof_block);
828 return 0;
832 libhed_init(void)
834 return file_access_init();
837 struct hed_file *
838 hed_open(const char *name)
840 struct hed_file *file;
842 if (! (file = file_swp_init(name)) )
843 goto fail;
845 file->name = name;
847 file->cache = cache_init(CACHE_LENGTH, file_swp(file));
848 if (!file->cache)
849 goto fail_file;
850 INIT_LIST_HEAD(&file->lru);
852 if (do_file_open(file))
853 goto fail_file;
855 if (file_setup_blocks(file))
856 goto fail_file;
858 fixup_register(file);
860 return file;
862 fail_file:
863 hed_close(file);
864 fail:
865 return NULL;
868 void
869 hed_close(struct hed_file *file)
871 assert(file);
873 /* Do not check for errors:
874 * 1. This FD is read-only => no data loss possbile
875 * 2. We're about to exit anyway => no resource leak
877 if (file->fd >= 0)
878 close(file->fd);
880 fixup_deregister(file);
882 /* No need to free file blocks here, because all data is
883 * allocated either from the cache or from the swap file
884 * and both is going to be destroyed now.
887 if (file->cache)
888 cache_done(file->cache);
890 file_swp_done(file);
893 /* Adjust a cursor after off gets outside its block */
894 static void
895 fixup_cursor_slow(hed_cursor_t *curs)
897 struct hed_block *block = curs->block;
898 hed_uoff_t off = curs->off;
900 do {
901 if ((hed_off_t)off < 0) {
902 block = prev_block(block);
903 off += block->t.size;
904 } else {
905 off -= block->t.size;
906 block = next_block(block);
908 } while (off >= block->t.size);
910 curs->block = block;
911 curs->off = off;
912 list_move(&curs->list, &block->refs);
915 /* Adjust a cursor if off gets outside its block.
916 * This is separate from fixup_cursor_slow, because it is supposed
917 * to be small enough to be inlined (which is a win, because most of
918 * the time no fixup has to be done, so the fast inlined path is used).
920 static inline void
921 fixup_cursor(hed_cursor_t *curs)
923 if (curs->off >= curs->block->t.size)
924 fixup_cursor_slow(curs);
927 hed_off_t
928 hed_move_relative(hed_cursor_t *curs, hed_off_t num)
930 hed_off_t newpos = curs->pos + num;
931 if (newpos < 0) {
932 newpos = num < 0 ? 0 : OFF_MAX;
933 num = newpos - curs->pos;
935 curs->pos = newpos;
936 curs->off += num;
937 fixup_cursor(curs);
938 return num;
941 /* Relative move with no checking (and only by a small amount) */
942 static inline void
943 move_rel_fast(hed_cursor_t *curs, ssize_t num)
945 curs->off += num;
946 curs->pos += num;
947 fixup_cursor(curs);
950 static void
951 alloc_caches(struct hed_file *file, struct hed_block_data **caches, int n)
953 struct remap_control rc;
954 int i;
956 BDEBUG("Allocate %d caches (%d free slots available)\n",
957 n, file->cache->nfree);
959 assert(n <= CACHE_LENGTH);
960 while (file->cache->nfree < n) {
961 struct hed_block *block;
963 assert(!list_empty(&file->lru));
964 block = list_entry(file->lru.next, struct hed_block, lru);
965 BDEBUG("Killing block at physical %llx\n", block->phys_pos);
966 file_kill_block(file, block);
969 for (i = 0; i < n; ++i) {
970 caches[i] = cache_alloc(file->cache);
971 assert(caches[i]);
974 remap_init(&rc);
975 remap_compact(&rc, file->cache, caches, n);
976 for (i = 0; i < n; ++i)
977 remap_add(&rc, caches[i]->data);
978 remap_finish(&rc);
981 static inline void
982 free_caches(struct hed_file *file, struct hed_block_data **preload, int n)
984 int i;
986 for (i = 0; i < n; ++i)
987 if (preload[i])
988 cache_put(file->cache, preload[i]);
991 static int
992 file_load_data(struct hed_file *file,
993 struct hed_block_data **caches, int n,
994 hed_uoff_t offset)
996 struct hed_block_data *dataobj = caches[0];
997 void *data = dataobj->data;
998 ssize_t rsize, total, segsize;
1000 segsize = n << FILE_BLOCK_SHIFT;
1001 for (total = 0; total < segsize; total += rsize) {
1002 rsize = pread(file->fd, data + total,
1003 segsize - total, offset + total);
1004 if (!rsize) {
1005 memset(data + total, 0, segsize - total);
1006 break;
1008 if (rsize < 0) {
1009 dataobj = caches[total >> FILE_BLOCK_SHIFT];
1010 caches[total >> FILE_BLOCK_SHIFT] = NULL;
1011 data = dataobj->data;
1012 cache_put(file->cache, dataobj);
1013 total = FILE_BLOCK_ROUND(total);
1014 rsize = FILE_BLOCK_SIZE;
1015 BDEBUG("Error reading block at phys %llx: %s\n",
1016 offset + total, strerror(errno));
1020 BDEBUG("Loaded data at phys %llx up to %llx\n",
1021 offset, offset + segsize);
1022 return 0;
1025 #ifdef HED_CONFIG_MMAP
1027 static int
1028 file_load_data_mmap(struct hed_file *file,
1029 struct hed_block_data **caches, int n,
1030 hed_uoff_t offset)
1032 void *data;
1033 ssize_t segsize;
1034 int i;
1036 segsize = n << FILE_BLOCK_SHIFT;
1037 data = mmap(NULL, segsize,
1038 PROT_READ | PROT_WRITE,
1039 MAP_PRIVATE | (file->fd < 0 ? MAP_ANONYMOUS : 0),
1040 file->fd, offset);
1042 if (data == MAP_FAILED) {
1043 BDEBUG("mmap failed at %llx: fail over to traditional read\n",
1044 offset);
1046 data = mmap(NULL, segsize,
1047 PROT_READ | PROT_WRITE,
1048 MAP_PRIVATE | MAP_ANONYMOUS,
1049 0, 0);
1050 if (data == MAP_FAILED)
1051 return -1;
1053 for (i = 0; i < n; ++i)
1054 caches[i]->data = data + (i << FILE_BLOCK_SHIFT);
1055 return file_load_data(file, caches, n, offset);
1058 for (i = 0; i < n; ++i)
1059 caches[i]->data = data + (i << FILE_BLOCK_SHIFT);
1061 BDEBUG("Loaded data at phys %llx up to %llx\n",
1062 offset, offset + segsize);
1063 return 0;
1065 # define file_load_data file_load_data_mmap
1067 #endif
1069 /* Find the block with the lowest physical position that intersects
1070 * the loaded segment. The search starts at @block.
1072 static struct hed_block *
1073 first_load_block(struct hed_tree *tree, struct hed_block *block,
1074 hed_uoff_t segstart)
1076 struct hed_block *prev = block;
1077 do {
1078 block = prev;
1079 prev = prev_block(prev);
1080 } while (!hed_block_is_eof(prev) && phys_end(prev) > segstart);
1081 return block;
1084 static int
1085 load_blocks(struct hed_file *file, const hed_cursor_t *from)
1087 hed_uoff_t physpos, segstart;
1088 struct hed_block_data *preload[FILE_READAHEAD];
1089 size_t ra_bkw, ra_fwd, ra_off;
1090 hed_cursor_t pos;
1091 int nblocks;
1093 segstart = hed_cursor_phys_pos(from);
1094 ra_bkw = FILE_BLOCK_OFF(segstart);
1095 ra_fwd = FILE_BLOCK_SIZE - ra_bkw;
1097 if (file_ra_forward(file))
1098 ra_fwd += (FILE_READAHEAD - 1) << FILE_BLOCK_SHIFT;
1099 else if (file_ra_backward(file))
1100 ra_bkw += (FILE_READAHEAD - 1) << FILE_BLOCK_SHIFT;
1102 if (ra_bkw > segstart)
1103 ra_bkw = segstart;
1104 if (ra_fwd > file->phys_size - segstart)
1105 ra_fwd = file->phys_size - segstart;
1107 segstart -= ra_bkw;
1108 ra_fwd += ra_bkw;
1109 pos.block = first_load_block(hed_file_blocks(file),
1110 from->block, segstart);
1111 pos.off = segstart >= pos.block->phys_pos
1112 ? segstart - pos.block->phys_pos
1113 : 0;
1115 list_add(&pos.list, &pos.block->refs);
1116 nblocks = ((ra_fwd - 1) >> FILE_BLOCK_SHIFT) + 1;
1117 alloc_caches(file, preload, nblocks);
1118 put_cursor(&pos);
1120 if (file_load_data(file, preload, nblocks, segstart)) {
1121 free_caches(file, preload, nblocks);
1122 return -1;
1125 while (physpos = hed_cursor_phys_pos(&pos),
1126 ra_off = physpos - segstart,
1127 ra_off < ra_fwd) {
1128 struct hed_block_data *dataobj;
1129 struct hed_block *newblock;
1130 size_t datalen;
1132 if (!hed_block_is_virtual(pos.block)) {
1133 pos.block = next_block(pos.block);
1134 pos.off = 0;
1135 continue;
1138 datalen = FILE_BLOCK_SIZE - FILE_BLOCK_OFF(physpos);
1139 if (datalen > hed_block_size(pos.block) - pos.off)
1140 datalen = hed_block_size(pos.block) - pos.off;
1142 dataobj = preload[ra_off >> FILE_BLOCK_SHIFT];
1143 newblock = dataobj
1144 ? new_data_block(file, physpos, datalen, dataobj)
1145 : new_virt_block(file, physpos, datalen,
1146 HED_BLOCK_BAD);
1148 /* Punch the new block */
1149 BDEBUG("Add %s block at %llx, length %llx\n",
1150 hed_block_is_virtual(newblock) ? "error" : "physical",
1151 newblock->phys_pos, newblock->t.size);
1152 if (replace_chunk(file, pos.block, pos.off, newblock)) {
1153 file_free_block(file, newblock);
1154 free_caches(file, preload, nblocks);
1155 return -1;
1158 pos.block = next_block(newblock);
1159 pos.off = 0;
1162 /* All cache objects now have an extra reference from the
1163 * allocation. Drop it. */
1164 free_caches(file, preload, nblocks);
1166 dump_blocks(file);
1167 return 0;
1170 /* Shorten a block at beginning and enlarge the preceding block.
1172 * Re-allocate at most @len bytes from the beginning of @block to the
1173 * end of the preceding block.
1174 * If @block is virtual, this will effectively devirtualize the range.
1175 * If @block is not virtual, this will change the backing store of
1176 * the bytes in the range.
1177 * Returns: the number of bytes actually moved.
1179 static size_t
1180 shrink_at_begin(struct hed_file *file, struct hed_block *block,
1181 size_t len, long state)
1183 struct hed_block *prev = prev_block(block);
1184 size_t maxgrow;
1186 /* Basic assumptions */
1187 assert(!(state & HED_BLOCK_VIRTUAL));
1189 /* The previous block must exist: */
1190 if (hed_block_is_eof(prev))
1191 return 0;
1193 /* The block flags must match the requested @state: */
1194 if ((prev->flags & HED_BLOCK_STATEMASK) != state)
1195 return 0;
1197 /* No deletions at end, or similar: */
1198 if (prev->phys_pos + prev->t.size != block->phys_pos)
1199 return 0;
1201 /* Append less bytes than requested if not all are available */
1202 assert(prev->t.size <= prev->dataobj->size - prev->dataoff);
1203 maxgrow = prev->dataobj->size - prev->dataoff - prev->t.size;
1204 if (len > maxgrow)
1205 len = maxgrow;
1206 if (!len)
1207 return 0;
1209 BDEBUG("Appending 0:%lx to the previous block\n", len);
1211 /* Move cursors away from the to-be-chopped beginning */
1212 move_cursors(block, prev, 0, len - 1, prev->t.size);
1214 /* Enlarge the previous block */
1215 prev->t.size += len;
1216 recalc_block_recursive(prev);
1218 /* Shorten the original block */
1219 block->t.size -= len;
1220 block->dataoff += len;
1221 block->phys_pos += len;
1222 recalc_block_recursive(block);
1223 return len;
1226 /* Shorten a block at end and enlarge the following block.
1228 * Re-allocate at most @len bytes from the end of @block to the
1229 * beginning of the following block.
1230 * If @block is virtual, this will effectively devirtualize the range.
1231 * If @block is not virtual, this will change the backing store of
1232 * the bytes in the range.
1233 * Returns: the number of bytes actually moved.
1235 static size_t
1236 shrink_at_end(struct hed_file *file, struct hed_block *block,
1237 size_t len, long state)
1239 struct hed_block *next = next_block(block);
1240 hed_uoff_t off;
1242 /* Basic assumptions */
1243 assert(!(state & HED_BLOCK_VIRTUAL));
1245 /* The next block must exist: */
1246 if (hed_block_is_eof(block))
1247 return 0;
1249 /* The block flags must match the requested @state: */
1250 if ((next->flags & HED_BLOCK_STATEMASK) != state)
1251 return 0;
1253 /* No deletions at end, or similar: */
1254 if (block->phys_pos + block->t.size != next->phys_pos)
1255 return 0;
1257 /* Prepend less bytes than requested if not all are available */
1258 if (len > next->dataoff)
1259 len = next->dataoff;
1260 if (!len)
1261 return 0;
1262 off = block->t.size - len;
1264 BDEBUG("Prepending %llx:%lx to the next block\n", off, len);
1266 /* Shift cursors in the new physical block */
1267 update_cursors(next, next, len);
1269 /* Move cursors away from the to-be-chopped end */
1270 move_cursors(block, next, off, UOFF_MAX, -off);
1272 /* Enlarge the next block */
1273 next->dataoff -= len;
1274 next->phys_pos -= len;
1275 next->t.size += len;
1276 recalc_block_recursive(next);
1278 /* Shorten the original block */
1279 block->t.size -= len;
1280 recalc_block_recursive(block);
1281 return len;
1284 /* Search for an existing data object within the same physical block
1285 * as @curs, and having the given @state flags.
1287 static struct hed_block_data *
1288 search_data(struct hed_file *file, const hed_cursor_t *curs, long state)
1290 struct hed_block *block;
1291 hed_uoff_t physpos;
1293 physpos = FILE_BLOCK_ROUND(curs->block->phys_pos + curs->off);
1294 BDEBUG("Search for already loaded data at %llx starting at %llx...",
1295 physpos, curs->block->phys_pos);
1297 /* Search backwards */
1298 block = curs->block;
1299 while (!hed_block_is_eof(block = prev_block(block))) {
1300 if (block->phys_pos < physpos)
1301 break;
1302 if ((block->flags & HED_BLOCK_STATEMASK) == state) {
1303 BDEBUG(" found at %llx\n", block->phys_pos);
1304 assert(block->dataobj);
1305 return block->dataobj;
1309 /* Search forwards */
1310 block = curs->block;
1311 while (!hed_block_is_eof(block)) {
1312 block = next_block(block);
1313 if (block->phys_pos >= physpos + FILE_BLOCK_SIZE)
1314 break;
1315 if ((block->flags & HED_BLOCK_STATEMASK) == state) {
1316 BDEBUG(" found at %llx\n", block->phys_pos);
1317 assert(block->dataobj);
1318 return block->dataobj;
1322 BDEBUG(" not found\n");
1323 return NULL;
1326 static int
1327 reuse_loaded_data(struct hed_file *file, const hed_cursor_t *curs,
1328 struct hed_block_data *data)
1330 struct hed_block *physblock;
1331 struct hed_block *block = curs->block;
1332 hed_uoff_t block_offset = curs->off;
1333 hed_uoff_t physpos = block->phys_pos + block_offset;
1334 size_t part = FILE_BLOCK_OFF(physpos);
1335 size_t len =
1336 FILE_BLOCK_SIZE - part <= block->t.size - block_offset
1337 ? FILE_BLOCK_SIZE - part
1338 : block->t.size - block_offset;
1340 if (part > block_offset)
1341 part = block_offset;
1342 physpos -= part;
1343 len += part;
1344 block_offset -= part;
1346 if (! (physblock = new_data_block(file, physpos, len, data)) )
1347 return -1;
1349 BDEBUG("Add physical block at %llx, length %llx\n",
1350 physblock->phys_pos, physblock->t.size);
1351 if (replace_chunk(file, block, block_offset, physblock)) {
1352 file_free_block(file, physblock);
1353 return -1;
1356 dump_blocks(file);
1357 return 0;
1360 /* Replace a part of a virtual block with content loaded
1361 * from disk. The amount of data loaded from the disk depends
1362 * on various factors with the goal to choose the most efficient
1363 * ratio. The only guarantee is that the byte at @curs will
1364 * be in a non-virtual block when this function returns 0.
1366 static int
1367 devirtualize_clean(struct hed_file *file, const hed_cursor_t *curs)
1369 struct hed_block *block = curs->block;
1370 hed_uoff_t block_offset = curs->off;
1371 hed_uoff_t remain = block->t.size - block_offset;
1372 struct hed_block_data *data;
1374 BDEBUG("punch a clean hole at %llx into %llx:%llx\n", block_offset,
1375 block_offset(block), block->t.size);
1376 assert(hed_block_is_virtual(block));
1378 /* Check if we can combine with a neighbouring block */
1379 if (shrink_at_begin(file, block,
1380 hed_block_size(block), 0) > block_offset
1381 || shrink_at_end(file, block,
1382 hed_block_size(block), 0) >= remain) {
1383 kill_block_if_empty(file, block);
1384 dump_blocks(file);
1385 return 0;
1388 /* Check if the block is already loaded elsewhere */
1389 data = search_data(file, curs, 0);
1390 return data
1391 ? reuse_loaded_data(file, curs, data)
1392 : load_blocks(file, curs);
1395 /* Unles the block at @curs is already dirty, replace at most
1396 * @len bytes at @curs with a newly allocated out-of-cache block.
1397 * The block is marked dirty and its data is left uninitialized.
1398 * Note that this function may devirtualize less than @len bytes.
1399 * In the worst case only 1 byte at @curs will be available.
1401 static int
1402 prepare_modify(struct hed_file *file, hed_cursor_t *curs, size_t len)
1404 struct hed_block *block = curs->block;
1405 hed_uoff_t block_offset = curs->off;
1406 hed_uoff_t remain = block->t.size - block_offset;
1407 struct hed_block *newblock;
1409 if (hed_block_is_dirty(block))
1410 return 0;
1412 if (len > remain)
1413 len = remain;
1415 BDEBUG("punch a dirty hole at %llx:%lx into %llx:%llx\n",
1416 block_offset, len,
1417 block_offset(block), block->t.size);
1419 /* Check if we can combine with a neighbouring block */
1420 if ((block_offset == 0 &&
1421 shrink_at_begin(file, block, len, HED_BLOCK_DIRTY)) ||
1422 (remain == len &&
1423 shrink_at_end(file, block, len, HED_BLOCK_DIRTY) >= len)) {
1424 kill_block_if_empty(file, block);
1425 dump_blocks(file);
1426 return 0;
1429 /* Initialize a new block */
1430 newblock = new_block(file, HED_BLOCK_EXCACHE | HED_BLOCK_DIRTY);
1431 if (!newblock)
1432 goto out_err;
1434 /* Allocate data */
1435 if ( (newblock->dataobj = search_data(file, curs, HED_BLOCK_DIRTY)) )
1436 cache_get(newblock->dataobj);
1437 else if (! (newblock->dataobj = block_data_new(file->cache,
1438 FILE_BLOCK_SIZE)) )
1439 goto out_err_free;
1441 newblock->phys_pos = block->phys_pos + block_offset;
1442 newblock->dataoff = FILE_BLOCK_OFF(newblock->phys_pos);
1443 if (len > FILE_BLOCK_SIZE - newblock->dataoff)
1444 len = FILE_BLOCK_SIZE - newblock->dataoff;
1445 newblock->t.size = len;
1447 if (replace_chunk(file, block, block_offset, newblock))
1448 goto out_err_free;
1450 dump_blocks(file);
1451 return 0;
1453 out_err_free:
1454 file_free_block(file, newblock);
1455 out_err:
1456 return -1;
1459 /* Ensure that @curs points to an up-to-date non-virtual block.
1460 * Load the data from disk if necessary, return zero on failure. */
1461 size_t
1462 hed_prepare_read(struct hed_file *file, const hed_cursor_t *curs, size_t len)
1464 struct hed_block *block = curs->block;
1465 if (hed_block_is_inner_virtual(block) &&
1466 devirtualize_clean(file, curs) < 0)
1467 return 0;
1469 return hed_cursor_chunk_len(curs, len);
1472 /* Move the block pointer to the next block */
1473 static struct hed_block *
1474 cursor_next_block(hed_cursor_t *curs)
1476 struct hed_block *block = next_nonzero_block(curs->block);
1478 if (block) {
1479 curs->block = block;
1480 curs->off = 0;
1481 list_move(&curs->list, &block->refs);
1483 return block;
1486 /* This is an optimized way of doing:
1488 * hed_move_relative(curs, curs->block->t.size);
1490 * for the case when curs->off == 0.
1492 static struct hed_block *
1493 move_to_next(hed_cursor_t *curs)
1495 curs->pos += hed_block_size(curs->block);
1496 return cursor_next_block(curs);
1499 /* Copy in @count bytes from @pos.
1500 * Returns the number of bytes that were not read (i.e. zero on success).
1501 * The @pos cursor is moved by the amount of data read.
1502 * CAUTION: If you read up to MAX_OFF, then @pos points one byte
1503 * beyond the EOF block upon return.
1505 static size_t
1506 copy_in(struct hed_file *file, void *buf, size_t count, hed_cursor_t *pos)
1508 size_t cpylen;
1510 pos->pos += count;
1511 while (count && (cpylen = hed_prepare_read(file, pos, count))) {
1512 if (hed_block_is_virtual(pos->block))
1513 memset(buf, 0, cpylen);
1514 else
1515 memcpy(buf, hed_cursor_data(pos), cpylen);
1517 buf += cpylen;
1518 count -= cpylen;
1519 if ( (pos->off += cpylen) >= hed_block_size(pos->block) )
1520 if (!cursor_next_block(pos))
1521 break;
1523 pos->pos -= count;
1524 return count;
1527 size_t
1528 hed_file_cpin(struct hed_file *file, void *buf, size_t count,
1529 const hed_cursor_t *pos)
1531 hed_cursor_t mypos;
1532 size_t ret;
1534 hed_dup_cursor(pos, &mypos);
1535 ret = copy_in(file, buf, count, &mypos);
1536 put_cursor(&mypos);
1537 return ret;
1540 /* Set the modified flag */
1541 static inline void
1542 set_modified(struct hed_file *file)
1544 file->modified = true;
1547 /* Clear the modified flag */
1548 static inline void
1549 clear_modified(struct hed_file *file)
1551 file->modified = false;
1555 hed_file_set_byte(struct hed_file *file, hed_cursor_t *curs,
1556 unsigned char byte)
1558 hed_uoff_t offset = curs->pos;
1560 if (prepare_modify(file, curs, 1))
1561 return -1;
1562 set_modified(file);
1564 if (offset >= file->size)
1565 file->size = offset + 1;
1567 hed_block_data(curs->block)[curs->off] = byte;
1568 return 0;
1571 size_t
1572 hed_file_set_block(struct hed_file *file, hed_cursor_t *curs,
1573 unsigned char *buf, size_t len)
1575 while (len) {
1576 size_t span;
1578 if (prepare_modify(file, curs, len))
1579 break;
1580 set_modified(file);
1582 span = hed_cursor_chunk_len(curs, len);
1583 memcpy(hed_cursor_data(curs), buf, span);
1584 buf += span;
1585 len -= span;
1586 move_rel_fast(curs, span);
1588 if (curs->pos > file->size)
1589 file->size = curs->pos;
1591 return len;
1594 hed_uoff_t
1595 hed_file_set_bytes(struct hed_file *file, hed_cursor_t *curs,
1596 unsigned char byte, hed_uoff_t rep)
1598 while (rep) {
1599 size_t span;
1601 if (prepare_modify(file, curs, rep))
1602 break;
1603 set_modified(file);
1605 span = hed_cursor_chunk_len(curs, rep);
1606 memset(hed_cursor_data(curs), byte, span);
1607 rep -= span;
1608 move_rel_fast(curs, span);
1610 if (curs->pos > file->size)
1611 file->size = curs->pos;
1613 return rep;
1616 static int
1617 file_erase_continuous(struct hed_file *file, hed_cursor_t *curs, size_t len)
1619 struct hed_block *block = curs->block;
1620 hed_uoff_t block_offset = curs->off;
1622 /* Find the new position */
1623 hed_move_relative(curs, len);
1625 /* Move all other cursors in the erased range to the new position */
1626 assert(len > 0);
1627 move_cursors_abs(block, block_offset,
1628 block_offset + len - 1, curs);
1630 if (!block_offset) {
1631 block->dataoff += len;
1632 if (!hed_block_is_inserted(block))
1633 block->phys_pos += len;
1634 } else if (block_offset + len < block->t.size) {
1635 block = split_block(file, block, block_offset + len);
1636 if (!block)
1637 return -1;
1640 move_cursors(block, block, block_offset, UOFF_MAX, -(hed_uoff_t)len);
1642 block->t.size -= len;
1643 recalc_block_recursive(block);
1645 kill_block_if_empty(file, block);
1646 return 0;
1649 size_t
1650 hed_file_erase_block(struct hed_file *file, hed_cursor_t *curs, hed_uoff_t len)
1652 hed_uoff_t offset;
1653 hed_uoff_t todo;
1655 offset = curs->pos;
1656 if (offset > hed_file_size(file))
1657 return 0;
1658 else if (len > hed_file_size(file) - offset)
1659 len = hed_file_size(file) - offset;
1661 todo = len;
1662 while (todo) {
1663 size_t span = hed_cursor_chunk_len(curs, todo);
1665 if (file_erase_continuous(file, curs, span))
1666 break;
1668 todo -= span;
1670 len -= todo;
1672 file->size -= len;
1673 set_modified(file);
1675 file->eof_block.t.size += len;
1676 recalc_block_recursive(&file->eof_block);
1678 struct hed_block *slideblock = prev_block(curs->block);
1679 if (hed_block_is_eof(slideblock))
1680 slideblock = curs->block;
1681 slide_cursors(file, slideblock, curs->pos, -len);
1683 return todo;
1687 hed_file_insert_begin(struct hed_file *file, const hed_cursor_t *curs,
1688 hed_cursor_t *curs_ins)
1690 struct hed_block *newblock;
1692 BDEBUG("Starting insert at %llx\n", curs->pos);
1694 newblock = new_block(file,
1695 HED_BLOCK_EXCACHE | HED_BLOCK_INSERTED);
1696 if (!newblock)
1697 return -1;
1699 newblock->phys_pos = hed_cursor_phys_pos(curs);
1700 newblock->dataobj = block_data_new(file->cache, FILE_BLOCK_SIZE);
1701 if (!newblock->dataobj) {
1702 file_free_block(file, newblock);
1703 return -1;
1706 if (curs->off && !split_block(file, curs->block, curs->off)) {
1707 file_free_block(file, newblock);
1708 return -1;
1711 chain_block_before(hed_file_blocks(file), newblock, curs->block);
1713 curs_ins->pos = curs->pos;
1714 curs_ins->off = newblock->t.size;
1715 curs_ins->block = newblock;
1716 list_add(&curs_ins->list, &newblock->refs);
1718 dump_blocks(file);
1719 return 0;
1722 void
1723 hed_file_insert_end(struct hed_file *file, hed_cursor_t *curs_ins)
1725 struct hed_block *block = curs_ins->block;
1727 BDEBUG("End insert at %llx\n", curs_ins->pos);
1729 curs_ins->block = NULL;
1730 list_del(&curs_ins->list);
1731 if (!kill_block_if_empty(file, block))
1732 block_data_shrink(file->cache, block->dataobj,
1733 block->dataoff + block->t.size);
1735 dump_blocks(file);
1738 static void
1739 insert_block(struct hed_file *file, hed_cursor_t *curs,
1740 unsigned char *buf, size_t len)
1742 struct hed_block *block = curs->block;
1743 hed_uoff_t offset = curs->pos;
1745 assert(file && offset >= 0);
1747 assert(hed_block_is_excache(block));
1748 hed_block_set_dirty(block);
1749 set_modified(file);
1751 memcpy(hed_block_data(block) + curs->off, buf, len);
1752 block->t.size += len;
1753 recalc_block_recursive(block);
1754 curs->off += len;
1755 curs->pos += len;
1757 if (curs->pos > file->size)
1758 file->size = curs->pos;
1759 else
1760 file->size += len;
1762 slide_cursors(file, next_block(block), offset, len);
1765 size_t
1766 hed_file_insert_block(struct hed_file *file, hed_cursor_t *curs,
1767 unsigned char *buf, size_t len)
1769 while (len) {
1770 struct hed_block *block = curs->block;
1771 size_t remain = block->dataobj->size - curs->off;
1773 if (!remain) {
1774 list_del(&curs->list);
1775 curs->block = next_block(block);
1776 curs->off = 0;
1778 if (!hed_file_insert_begin(file, curs, curs))
1779 continue;
1781 curs->block = block;
1782 curs->off = block->t.size;
1783 list_add(&curs->list, &block->refs);
1784 break;
1787 if (remain > len)
1788 remain = len;
1790 BDEBUG("Append %ld bytes to the insert block\n",
1791 (long) remain);
1792 insert_block(file, curs, buf, remain);
1793 buf += remain;
1794 len -= remain;
1796 return len;
1800 hed_file_insert_byte(struct hed_file *file, hed_cursor_t *curs,
1801 unsigned char byte)
1803 return hed_file_insert_block(file, curs, &byte, 1);
1806 size_t
1807 hed_file_insert_once(struct hed_file *file, hed_cursor_t *curs,
1808 unsigned char *buf, size_t len)
1810 hed_cursor_t insert;
1812 if (!hed_file_insert_begin(file, curs, &insert)) {
1813 len = hed_file_insert_block(file, &insert, buf, len);
1814 hed_file_insert_end(file, &insert);
1816 return len;
1819 struct commit_control {
1820 struct hed_file *file;
1821 int wfd; /* file descriptor for writing */
1822 int needwrite; /* non-zero if write is needed */
1823 hed_cursor_t begoff, endoff;
1824 hed_off_t shift;
1825 void *partbuf; /* allocated 3*FILE_BLOCK_SIZE */
1826 void *partial; /* pointer into partbuf */
1829 /* Get the logical<->physical shift value after the specified block.
1830 * It is essential to get the value _AFTER_ the block, because the
1831 * shift value is used to decide how the current block will be handled.
1833 static hed_off_t
1834 get_shift(const hed_cursor_t *curs)
1836 struct hed_block *block = curs->block;
1837 size_t curshift = hed_block_is_inserted(block) ? block->t.size : 0;
1838 return curshift + curs->pos - curs->off - block->phys_pos;
1841 static void
1842 switch_partial(struct commit_control *cc)
1844 cc->partial += FILE_BLOCK_SIZE;
1845 if (cc->partial >= cc->partbuf + 3*FILE_BLOCK_SIZE)
1846 cc->partial = cc->partbuf;
1849 /* Write @writelen bytes from the partial buffer at @cc->begoff. */
1850 static int
1851 commit_block(struct commit_control *cc, size_t len)
1853 ssize_t written;
1855 assert(len > 0);
1856 BDEBUG(" -> write %lx bytes at %llx\n",
1857 (unsigned long)len, cc->begoff.pos - len);
1858 written = pwrite(cc->wfd, cc->partial, len, cc->begoff.pos - len);
1859 if (written < len)
1860 /* TODO: keep data in a new list of dirty blocks */
1861 return -1;
1862 return 0;
1865 static int
1866 commit_partial(struct commit_control *cc)
1868 size_t partoff, remain, left;
1869 size_t writelen;
1871 partoff = FILE_BLOCK_OFF(cc->begoff.pos);
1872 remain = FILE_BLOCK_SIZE - partoff;
1873 if (remain > cc->endoff.pos - cc->begoff.pos)
1874 remain = cc->endoff.pos - cc->begoff.pos;
1875 if ((writelen = partoff + remain) == 0)
1876 return 0;
1878 BDEBUG("Fill partial %llx-%llx\n",
1879 cc->begoff.pos, cc->begoff.pos + remain);
1881 left = copy_in(cc->file, cc->partial + partoff, remain, &cc->begoff);
1882 if (left) {
1883 hed_move_relative(&cc->begoff, left);
1884 return -1;
1887 if (FILE_BLOCK_OFF(cc->begoff.pos) &&
1888 !hed_block_is_eof(cc->begoff.block))
1889 return 0;
1891 return commit_block(cc, writelen);
1894 /* Commit forwards.
1895 * Beware, cc->begoff is undefined upon return!
1897 static int
1898 commit_forwards(struct commit_control *cc)
1900 hed_uoff_t endpos = cc->endoff.pos;
1901 int ret = 0;
1903 BDEBUG("Writing forwards %llx-%llx\n",
1904 cc->begoff.pos, cc->endoff.pos);
1906 if (!cc->needwrite)
1907 return 0;
1909 while (cc->begoff.pos < endpos)
1910 ret |= commit_partial(cc);
1912 return ret;
1915 /* Commit forwards.
1916 * Beware, cc->begoff is undefined upon return!
1918 static int
1919 commit_backwards(struct commit_control *cc)
1921 void *retpartial = cc->partial;
1922 hed_uoff_t begpos = cc->begoff.pos;
1923 hed_uoff_t blkpos; /* start of current partial block */
1924 int ret = 0;
1926 BDEBUG("Writing backwards %llx-%llx\n",
1927 cc->begoff.pos, cc->endoff.pos);
1929 if (!cc->needwrite)
1930 return 0;
1932 blkpos = FILE_BLOCK_ROUND(cc->endoff.pos);
1933 if (blkpos <= begpos)
1934 goto final;
1936 /* Handle the trailing partial block */
1937 hed_update_cursor(cc->file, blkpos, &cc->begoff);
1938 switch_partial(cc);
1939 ret |= commit_partial(cc);
1940 retpartial = cc->partial;
1942 /* Handle the middle part */
1943 switch_partial(cc);
1944 while ( (blkpos -= FILE_BLOCK_SIZE) > begpos) {
1945 hed_update_cursor(cc->file, blkpos, &cc->begoff);
1946 ret |= commit_partial(cc);
1948 switch_partial(cc); /* wrap around */
1950 final:
1951 /* Handle the first block (partiall or not) */
1952 hed_update_cursor(cc->file, begpos, &cc->begoff);
1953 ret |= commit_partial(cc);
1955 cc->partial = retpartial;
1956 return ret;
1959 /* Handle the partial block before a skipped one. */
1960 static int
1961 begin_skip(struct commit_control *cc)
1963 size_t minsize = FILE_BLOCK_SIZE - FILE_BLOCK_OFF(cc->endoff.pos);
1964 size_t remain;
1965 int ret = 0;
1967 /* Check if at least one complete physical block can be skipped */
1968 if (cc->endoff.block->t.size < minsize)
1969 return 0;
1971 /* Write out the partially dirty block */
1972 remain = FILE_BLOCK_OFF(minsize);
1973 hed_move_relative(&cc->endoff, remain);
1974 if (cc->shift <= 0)
1975 ret |= commit_forwards(cc);
1976 else
1977 ret |= commit_backwards(cc);
1978 hed_move_relative(&cc->endoff, -(hed_off_t)remain);
1979 hed_dup2_cursor(&cc->endoff, &cc->begoff);
1981 cc->needwrite = 0;
1982 return ret;
1985 /* Handle the last partially skipped physical block. */
1986 static int
1987 end_skip(struct commit_control *cc)
1989 size_t partlen;
1990 int ret = 0;
1992 /* Find the beginning of the physical block */
1993 hed_dup2_cursor(&cc->endoff, &cc->begoff);
1994 partlen = FILE_BLOCK_OFF(cc->begoff.pos);
1995 hed_move_relative(&cc->begoff, -(hed_off_t)partlen);
1997 /* Read the partial data before this block */
1998 if (hed_file_cpin(cc->file, cc->partial, partlen, &cc->begoff))
1999 ret = -1;
2001 cc->needwrite = 1;
2002 return ret;
2005 static void
2006 undirty_blocks(struct hed_file *file)
2008 struct hed_block *block, *next;
2009 hed_uoff_t pos = 0;
2011 BDEBUG("Undirtying blocks:\n");
2012 dump_blocks(file);
2014 next = first_block(file);
2015 while (!hed_block_is_eof(next)) {
2016 block = next;
2017 next = next_block(block);
2019 if (kill_block_if_empty(file, block))
2020 continue;
2022 if (!hed_block_is_virtual(block)) {
2023 cache_put(file->cache, block->dataobj);
2024 block->dataobj = NULL;
2025 list_del_init(&block->lru);
2026 block->flags = HED_BLOCK_EXCACHE | HED_BLOCK_VIRTUAL;
2029 block->phys_pos = pos;
2030 pos += block->t.size;
2033 block = first_block(file);
2034 while (!hed_block_is_eof(block)) {
2035 next = next_block(block);
2036 file_kill_block(file, block);
2037 block = next;
2040 BDEBUG("After undirtying\n");
2041 dump_blocks(file);
2044 static int
2045 commit_init(struct commit_control *cc, struct hed_file *file)
2047 cc->file = file;
2049 cc->partbuf = malloc(3*FILE_BLOCK_SIZE);
2050 if (!cc->partbuf)
2051 goto err;
2053 cc->wfd = open(file->name,
2054 O_RDWR | (file->fd < 0 ? O_CREAT : 0), 0666);
2055 if (cc->wfd < 0)
2056 goto err_free;
2058 if (file->fd < 0 &&
2059 (file->fd = open(file->name, O_RDONLY)) < 0)
2060 goto err_close;
2062 return 0;
2064 err_close:
2065 close(cc->wfd);
2066 err_free:
2067 free(cc->partbuf);
2068 err:
2069 return -1;
2073 hed_file_commit(struct hed_file *file)
2075 struct commit_control cc;
2076 int ret = 0;
2078 if (commit_init(&cc, file))
2079 return -1;
2081 dump_blocks(file);
2083 cc.partial = cc.partbuf;
2084 get_cursor(file, 0,&cc.begoff);
2085 hed_dup_cursor(&cc.begoff, &cc.endoff);
2086 cc.shift = -cc.begoff.block->phys_pos;
2087 cc.needwrite = 0;
2088 while(!hed_block_is_eof(cc.endoff.block)) {
2089 hed_off_t newshift = cc.endoff.pos < file->phys_size
2090 ? get_shift(&cc.endoff)
2091 : 0;
2093 if (cc.shift <= 0 && newshift > 0) {
2094 ret |= commit_forwards(&cc);
2095 hed_dup2_cursor(&cc.endoff, &cc.begoff);
2096 } else if (cc.shift > 0 && newshift <= 0) {
2097 ret |= commit_backwards(&cc);
2098 hed_dup2_cursor(&cc.endoff, &cc.begoff);
2100 cc.shift = newshift;
2102 if (!newshift && !hed_block_is_dirty(cc.endoff.block)) {
2103 if (cc.needwrite)
2104 ret |= begin_skip(&cc);
2105 } else if (!cc.needwrite)
2106 ret |= end_skip(&cc);
2108 if (!move_to_next(&cc.endoff))
2109 break;
2111 assert(cc.endoff.pos == hed_file_size(file));
2113 if (cc.begoff.pos < hed_file_size(file)) {
2114 if (cc.shift <= 0)
2115 ret |= commit_forwards(&cc);
2116 else
2117 ret |= commit_backwards(&cc);
2120 put_cursor(&cc.begoff);
2121 put_cursor(&cc.endoff);
2123 ftruncate(cc.wfd, hed_file_size(file));
2124 file->phys_size = hed_file_size(file);
2126 ret |= close(cc.wfd);
2127 free(cc.partbuf);
2129 undirty_blocks(file);
2131 if (!ret)
2132 clear_modified(file);
2134 return ret;
2137 #ifdef HED_CONFIG_SWAP
2139 hed_file_write_swap(struct hed_file *file)
2141 return swp_write(file_swp(file));
2144 static int
2145 do_read_swap(struct hed_file *file, struct swp_file *swp, hed_cursor_t *pos)
2147 struct hed_file *swpfile = swp_private(swp);
2148 struct hed_block *cur, block;
2149 hed_uoff_t phys_pos;
2151 if (file_stat(swpfile)->st_size != file_stat(file)->st_size ||
2152 file_stat(swpfile)->st_mtime != file_stat(file)->st_mtime) {
2153 fprintf(stderr, "stat info mismatch (you modified the file since hed ran on it; refusing to touch it)\n");
2154 return -1;
2157 BDEBUG("Swap header match\n");
2159 phys_pos = 0;
2160 cur = first_block(swpfile);
2161 do {
2162 struct hed_block_data dataobj;
2163 size_t (*mergefn)(struct hed_file*, hed_cursor_t*,
2164 unsigned char*, size_t);
2165 void *data;
2166 size_t res;
2168 if (swp_cpin(swp, &block, cur, sizeof(struct hed_block))) {
2169 perror("Cannot read block descriptor");
2170 return -1;
2172 BDEBUG("BLOCK %p: flags %02lx phys 0x%02llx size 0x%llx\n",
2173 cur, block.flags, (long long)block.phys_pos,
2174 (long long)hed_block_size(&block));
2176 if (block.phys_pos - phys_pos) {
2177 if (hed_file_erase_block(file, pos,
2178 block.phys_pos - phys_pos)) {
2179 perror("Cannot erase");
2180 return -1;
2182 phys_pos = block.phys_pos;
2185 if (!hed_block_is_inserted(&block))
2186 phys_pos += hed_block_size(&block);
2188 if (!hed_block_is_dirty(&block)) {
2189 hed_move_relative(pos, hed_block_size(&block));
2190 continue;
2193 if (swp_cpin(swp, &dataobj, block.dataobj,
2194 sizeof(struct hed_block_data))) {
2195 perror("Cannot read data descriptor");
2196 return -1;
2198 BDEBUG("DATA %p: size 0x%lx\n",
2199 block.dataobj, (long)dataobj.size);
2201 if (! (data = malloc(hed_block_size(&block))) ) {
2202 perror("Cannot allocate data");
2203 return -1;
2206 if (swp_cpin(swp, data, dataobj.data + block.dataoff,
2207 hed_block_size(&block))) {
2208 perror("Cannot read data");
2209 return -1;
2212 mergefn = hed_block_is_inserted(&block)
2213 ? hed_file_insert_once
2214 : hed_file_set_block;
2215 res = mergefn(file, pos, data, hed_block_size(&block));
2216 free(data);
2217 if (res) {
2218 perror("Cannot merge data");
2219 return -1;
2221 } while (cur = next_block(&block), !hed_block_is_eof(&block));
2223 dump_blocks(file);
2224 return 0;
2228 hed_file_read_swap(struct hed_file *file)
2230 struct swp_file *swp;
2231 hed_cursor_t pos;
2232 int ret;
2234 if (! (swp = swp_init_read(file->swpname)) )
2235 return -1;
2237 get_cursor(file, 0, &pos);
2238 ret = do_read_swap(file, swp, &pos);
2239 put_cursor(&pos);
2241 swp_done(swp);
2242 return ret;
2245 #endif /* HED_CONFIG_SWAP */
2247 struct ffb_hookdata {
2248 struct hed_file *file;
2249 hed_cursor_t *pos;
2250 hed_expr_reg_cb base_ecb;
2251 void *base_ecb_data;
2254 static long
2255 eval_reg_cb(void *hookdata, char reg, hed_off_t ofs,
2256 unsigned char *scramble, size_t len)
2258 struct ffb_hookdata *data = hookdata;
2259 if (reg == '.') {
2260 hed_cursor_t pos;
2261 long ret = HED_AEF_DYNAMIC;
2263 hed_dup_cursor(data->pos, &pos);
2264 hed_move_relative(&pos, ofs);
2265 if (copy_in(data->file, scramble, len, &pos))
2266 ret = HED_AEF_ERROR;
2267 put_cursor(&pos);
2268 return ret;
2271 return data->base_ecb(data->base_ecb_data, reg, ofs, scramble, len);
2274 static void
2275 reverse(unsigned char *p, size_t len)
2277 unsigned char *q = p + len;
2278 while (p < q) {
2279 unsigned char x = *p;
2280 *p++ = *--q;
2281 *q = x;
2285 static void
2286 compute_badchar(ssize_t *badchar, const unsigned char *s, ssize_t len)
2288 size_t i = 1;
2289 while (i < len)
2290 badchar[*s++] = i++;
2293 static void
2294 compute_sfx(ssize_t *sfx, const unsigned char *s, ssize_t len)
2296 ssize_t f, g, i;
2298 sfx[len - 1] = len;
2299 g = len - 1;
2300 for (i = len - 2; i >= 0; --i) {
2301 if (i > g && sfx[i + len - 1 - f] < i - g)
2302 sfx[i] = sfx[i + len - 1 - f];
2303 else {
2304 if (i < g)
2305 g = i;
2306 f = i;
2307 while (g >= 0 && s[g] == s[g + len - 1 - f])
2308 --g;
2309 sfx[i] = f - g;
2314 static void
2315 compute_goodsfx(ssize_t *goodsfx, const unsigned char *s, ssize_t len)
2317 ssize_t i, j, *sfx = goodsfx + len;
2319 compute_sfx(sfx, s, len);
2321 for (i = 0; i < len; ++i)
2322 goodsfx[i] = len;
2323 j = 0;
2324 for (i = len - 1; i >= 0; --i)
2325 if (sfx[i] == i + 1)
2326 for (; j < len - 1 - i; ++j)
2327 if (goodsfx[j] == len)
2328 goodsfx[j] = len - 1 - i;
2329 for (i = 0; i <= len - 2; ++i)
2330 goodsfx[len - 1 - sfx[i]] = len - 1 - i;
2333 /* Search for a constant byte string using the Boyer-Moore algorithm. */
2334 static inline unsigned char*
2335 bm_find(unsigned char *buf, size_t buflen, unsigned char *needle,
2336 size_t maxidx, ssize_t *badchar, ssize_t *goodsfx)
2338 while (buflen > maxidx) {
2339 unsigned char *p;
2340 size_t i;
2341 ssize_t shift;
2343 for (p = buf + maxidx, i = maxidx; p >= buf; --p, --i)
2344 if (needle[i] != *p)
2345 break;
2346 if (p < buf)
2347 return buf;
2349 shift = i + 1 - badchar[*p];
2350 if (shift < goodsfx[i])
2351 shift = goodsfx[i];
2353 buf += shift;
2354 buflen -= shift;
2356 return NULL;
2359 /* Search for a constant byte string backwards. */
2360 static inline unsigned char*
2361 bm_find_rev(unsigned char *buf, size_t buflen, unsigned char *needle,
2362 size_t maxidx, ssize_t *badchar, ssize_t *goodsfx)
2364 buf -= maxidx;
2365 while (buflen > maxidx) {
2366 unsigned char *p;
2367 size_t i;
2368 ssize_t shift;
2370 for (p = buf, i = maxidx; p <= buf + maxidx; ++p, --i)
2371 if (needle[i] != *p)
2372 break;
2373 if (p > buf + maxidx)
2374 return buf;
2376 shift = i + 1 - badchar[*p];
2377 if (shift < goodsfx[i])
2378 shift = goodsfx[i];
2380 buf -= shift;
2381 buflen -= shift;
2383 return NULL;
2386 /* Search for a constant byte string in @buf.
2387 * If @buflen is negative, search backwards, otherwise search forwards.
2389 static inline unsigned char*
2390 find_bytestr_buf(unsigned char *buf, ssize_t buflen,
2391 unsigned char *needle, ssize_t maxidx,
2392 ssize_t *badchar, ssize_t *goodsfx)
2394 if (buflen < 0) {
2395 buflen = -buflen;
2396 if (!maxidx)
2397 return memrchr(buf - buflen + 1, *needle, buflen);
2398 return bm_find_rev(buf, buflen, needle, maxidx,
2399 badchar, goodsfx);
2400 } else {
2401 if (!maxidx)
2402 return memchr(buf, *needle, buflen);
2403 return bm_find(buf, buflen, needle, maxidx,
2404 badchar, goodsfx);
2408 /* Search for a constant byte string using the Boyer-Moore algorithm. */
2409 static int
2410 find_bytestr(struct hed_file *file, hed_cursor_t *from, int dir,
2411 unsigned char *needle, ssize_t len)
2413 void *dynalloc;
2414 ssize_t *badchar, *goodsfx;
2415 unsigned char *readbuf;
2416 ssize_t slen;
2417 int ret;
2419 if (len > 1) {
2420 dynalloc = calloc(sizeof(ssize_t) * (256 + 2*len)
2421 + 2*(len-1), 1);
2422 if (!dynalloc)
2423 return HED_FINDOFF_ERROR;
2424 badchar = dynalloc;
2425 goodsfx = badchar + 256;
2426 readbuf = dynalloc + sizeof(ssize_t) * (256 + 2*len);
2428 if (dir < 0)
2429 reverse(needle, len);
2430 compute_badchar(badchar, needle, len);
2431 compute_goodsfx(goodsfx, needle, len);
2432 } else {
2433 dynalloc = NULL;
2434 badchar = goodsfx = NULL;
2435 readbuf = NULL;
2438 --len; /* simplify offset computing */
2439 if (dir < 0) {
2440 from->off += len;
2441 from->pos += len;
2442 slen = -len;
2443 } else
2444 slen = len;
2446 ret = HED_FINDOFF_NO_MATCH;
2447 while (from->pos >= 0) {
2448 ssize_t remain;
2450 fixup_cursor(from);
2451 if (hed_block_is_eof(from->block))
2452 break;
2454 remain = hed_prepare_read(file, from, SSIZE_MAX);
2455 if (!remain) {
2456 ret = HED_FINDOFF_ERROR;
2457 break;
2459 if (dir < 0)
2460 remain = -(from->off + 1);
2462 if (!hed_block_is_bad(from->block)) {
2463 unsigned char *p, *q;
2465 if ((dir >= 0 && remain > slen) ||
2466 (dir < 0 && remain < slen)) {
2467 assert(!hed_block_is_virtual(from->block));
2468 assert(from->block->dataobj);
2469 p = from->block->dataobj->data +
2470 from->block->dataoff + from->off;
2471 from->off += remain;
2472 from->pos += remain;
2473 } else if (dir >= 0) {
2474 remain += slen;
2475 if (copy_in(file, readbuf, remain, from)) {
2476 ret = HED_FINDOFF_ERROR;
2477 break;
2479 p = readbuf;
2480 } else {
2481 remain += slen;
2482 from->off += remain + 1;
2483 from->pos += remain + 1;
2484 if (from->pos < 0)
2485 break;
2486 fixup_cursor_slow(from);
2487 if (copy_in(file, readbuf, -remain, from)) {
2488 ret = HED_FINDOFF_ERROR;
2489 break;
2491 from->off -= -remain + 1;
2492 from->pos -= -remain + 1;
2493 p = readbuf + (-remain - 1);
2496 q = find_bytestr_buf(p, remain, needle, len,
2497 badchar, goodsfx);
2498 if (q) {
2499 move_rel_fast(from, q - p - remain);
2500 ret = 0;
2501 break;
2503 from->off -= slen;
2504 from->pos -= slen;
2505 } else {
2506 /* bad blocks cannot match anything */
2507 from->off += remain;
2508 from->pos += remain;
2512 if (dynalloc)
2513 free(dynalloc);
2514 return ret;
2517 static int
2518 find_expr(struct hed_file *file, hed_cursor_t *from, int dir,
2519 struct hed_expr *expr, struct ffb_hookdata *data)
2521 size_t len = hed_expr_len(expr);
2522 unsigned char *buf;
2524 if (len > hed_file_size(file))
2525 return HED_FINDOFF_NO_MATCH;
2526 if ((hed_off_t)hed_file_size(file) - from->pos - len < 0)
2527 hed_move_relative(from, ((hed_off_t)hed_file_size(file) -
2528 from->pos - len));
2530 for (;;) {
2531 hed_cursor_t match;
2532 size_t remain;
2533 unsigned char *p;
2534 size_t pos;
2536 buf = hed_expr_eval(expr, eval_reg_cb, NULL, data);
2537 if (!buf)
2538 return HED_FINDOFF_ERROR;
2540 hed_dup_cursor(from, &match);
2541 remain = 0;
2542 for (pos = 0; pos < len; pos++) {
2543 if (!remain) {
2544 remain = hed_prepare_read(file, &match,
2545 SIZE_MAX);
2546 if (!remain ||
2547 hed_block_is_bad(match.block))
2548 break;
2549 p = hed_cursor_data(&match);
2550 cursor_next_block(&match);
2552 if (*p++ != buf[pos])
2553 break;
2554 remain--;
2556 put_cursor(&match);
2558 if (pos == len)
2559 return 0;
2560 if (!remain)
2561 return HED_FINDOFF_ERROR;
2563 from->pos += dir;
2564 from->off += dir;
2565 if (0 > from->pos || from->pos > hed_file_size(file) - len)
2566 break;
2567 fixup_cursor(from);
2569 if (! (hed_expr_flags(expr) & HED_AEF_DYNAMIC) )
2570 return find_bytestr(file, from, dir, buf, len);
2573 return HED_FINDOFF_NO_MATCH;
2577 hed_file_find_expr(struct hed_file *file, hed_cursor_t *pos, int dir,
2578 struct hed_expr *expr,
2579 hed_expr_reg_cb expr_cb, void *expr_cb_data)
2581 struct ffb_hookdata data;
2582 int res;
2584 assert(dir == 1 || dir == -1);
2586 data.file = file;
2587 data.pos = pos;
2588 data.base_ecb = expr_cb;
2589 data.base_ecb_data = expr_cb_data;
2591 hed_file_set_readahead(file,
2592 dir > 0 ? HED_RA_FORWARD : HED_RA_BACKWARD);
2593 res = find_expr(file, pos, dir, expr, &data);
2594 hed_file_set_readahead(file, HED_RA_NONE);
2596 return res;