mm-only debug patch...
[mmotm.git] / fs / reiser4 / block_alloc.c
blob6aa8418271e7e0d97d15530f00ee9a12cc804d51
1 /* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
2 reiser4/README */
4 #include "debug.h"
5 #include "dformat.h"
6 #include "plugin/plugin.h"
7 #include "txnmgr.h"
8 #include "znode.h"
9 #include "block_alloc.h"
10 #include "tree.h"
11 #include "super.h"
13 #include <linux/types.h> /* for __u?? */
14 #include <linux/fs.h> /* for struct super_block */
15 #include <linux/spinlock.h>
17 /* THE REISER4 DISK SPACE RESERVATION SCHEME. */
19 /* We need to be able to reserve enough disk space to ensure that an atomic
20 operation will have enough disk space to flush (see flush.c and
21 http://namesys.com/v4/v4.html) and commit it once it is started.
23 In our design a call for reserving disk space may fail but not an actual
24 block allocation.
26 All free blocks, already allocated blocks, and all kinds of reserved blocks
27 are counted in different per-fs block counters.
29 A reiser4 super block's set of block counters currently is:
31 free -- free blocks,
32 used -- already allocated blocks,
34 grabbed -- initially reserved for performing an fs operation, those blocks
35 are taken from free blocks, then grabbed disk space leaks from grabbed
36 blocks counter to other counters like "fake allocated", "flush
37 reserved", "used", the rest of not used grabbed space is returned to
38 free space at the end of fs operation;
40 fake allocated -- counts all nodes without real disk block numbers assigned,
41 we have separate accounting for formatted and unformatted
42 nodes (for easier debugging);
44 flush reserved -- disk space needed for flushing and committing an atom.
45 Each dirty already allocated block could be written as a
46 part of atom's overwrite set or as a part of atom's
47 relocate set. In both case one additional block is needed,
48 it is used as a wandered block if we do overwrite or as a
49 new location for a relocated block.
51 In addition, blocks in some states are counted on per-thread and per-atom
52 basis. A reiser4 context has a counter of blocks grabbed by this transaction
53 and the sb's grabbed blocks counter is a sum of grabbed blocks counter values
54 of each reiser4 context. Each reiser4 atom has a counter of "flush reserved"
55 blocks, which are reserved for flush processing and atom commit. */
57 /* AN EXAMPLE: suppose we insert new item to the reiser4 tree. We estimate
58 number of blocks to grab for most expensive case of balancing when the leaf
59 node we insert new item to gets split and new leaf node is allocated.
61 So, we need to grab blocks for
63 1) one block for possible dirtying the node we insert an item to. That block
64 would be used for node relocation at flush time or for allocating of a
65 wandered one, it depends what will be a result (what set, relocate or
66 overwrite the node gets assigned to) of the node processing by the flush
67 algorithm.
69 2) one block for either allocating a new node, or dirtying of right or left
70 clean neighbor, only one case may happen.
72 VS-FIXME-HANS: why can only one case happen? I would expect to see dirtying
73 of left neighbor, right neighbor, current node, and creation of new node.
74 Have I forgotten something? email me.
76 These grabbed blocks are counted in both reiser4 context "grabbed blocks"
77 counter and in the fs-wide one (both ctx->grabbed_blocks and
78 sbinfo->blocks_grabbed get incremented by 2), sb's free blocks counter is
79 decremented by 2.
81 Suppose both two blocks were spent for dirtying of an already allocated clean
82 node (one block went from "grabbed" to "flush reserved") and for new block
83 allocating (one block went from "grabbed" to "fake allocated formatted").
85 Inserting of a child pointer to the parent node caused parent node to be
86 split, the balancing code takes care about this grabbing necessary space
87 immediately by calling reiser4_grab with BA_RESERVED flag set which means
88 "can use the 5% reserved disk space".
90 At this moment insertion completes and grabbed blocks (if they were not used)
91 should be returned to the free space counter.
93 However the atom life-cycle is not completed. The atom had one "flush
94 reserved" block added by our insertion and the new fake allocated node is
95 counted as a "fake allocated formatted" one. The atom has to be fully
96 processed by flush before commit. Suppose that the flush moved the first,
97 already allocated node to the atom's overwrite list, the new fake allocated
98 node, obviously, went into the atom relocate set. The reiser4 flush
99 allocates the new node using one unit from "fake allocated formatted"
100 counter, the log writer uses one from "flush reserved" for wandered block
101 allocation.
103 And, it is not the end. When the wandered block is deallocated after the
104 atom gets fully played (see wander.c for term description), the disk space
105 occupied for it is returned to free blocks. */
107 /* BLOCK NUMBERS */
109 /* Any reiser4 node has a block number assigned to it. We use these numbers for
110 indexing in hash tables, so if a block has not yet been assigned a location
111 on disk we need to give it a temporary fake block number.
113 Current implementation of reiser4 uses 64-bit integers for block numbers. We
114 use highest bit in 64-bit block number to distinguish fake and real block
115 numbers. So, only 63 bits may be used to addressing of real device
116 blocks. That "fake" block numbers space is divided into subspaces of fake
117 block numbers for data blocks and for shadow (working) bitmap blocks.
119 Fake block numbers for data blocks are generated by a cyclic counter, which
120 gets incremented after each real block allocation. We assume that it is
121 impossible to overload this counter during one transaction life. */
123 /* Initialize a blocknr hint. */
124 void reiser4_blocknr_hint_init(reiser4_blocknr_hint * hint)
126 memset(hint, 0, sizeof(reiser4_blocknr_hint));
129 /* Release any resources of a blocknr hint. */
130 void reiser4_blocknr_hint_done(reiser4_blocknr_hint * hint UNUSED_ARG)
132 /* No resources should be freed in current blocknr_hint implementation. */
135 /* see above for explanation of fake block number. */
136 /* Audited by: green(2002.06.11) */
137 int reiser4_blocknr_is_fake(const reiser4_block_nr * da)
139 /* The reason for not simply returning result of '&' operation is that
140 while return value is (possibly 32bit) int, the reiser4_block_nr is
141 at least 64 bits long, and high bit (which is the only possible
142 non zero bit after the masking) would be stripped off */
143 return (*da & REISER4_FAKE_BLOCKNR_BIT_MASK) ? 1 : 0;
146 /* Static functions for <reiser4 super block>/<reiser4 context> block counters
147 arithmetic. Mostly, they are isolated to not to code same assertions in
148 several places. */
149 static void sub_from_ctx_grabbed(reiser4_context * ctx, __u64 count)
151 BUG_ON(ctx->grabbed_blocks < count);
152 assert("zam-527", ctx->grabbed_blocks >= count);
153 ctx->grabbed_blocks -= count;
156 static void add_to_ctx_grabbed(reiser4_context * ctx, __u64 count)
158 ctx->grabbed_blocks += count;
161 static void sub_from_sb_grabbed(reiser4_super_info_data * sbinfo, __u64 count)
163 assert("zam-525", sbinfo->blocks_grabbed >= count);
164 sbinfo->blocks_grabbed -= count;
167 /* Decrease the counter of block reserved for flush in super block. */
168 static void
169 sub_from_sb_flush_reserved(reiser4_super_info_data * sbinfo, __u64 count)
171 assert("vpf-291", sbinfo->blocks_flush_reserved >= count);
172 sbinfo->blocks_flush_reserved -= count;
175 static void
176 sub_from_sb_fake_allocated(reiser4_super_info_data * sbinfo, __u64 count,
177 reiser4_ba_flags_t flags)
179 if (flags & BA_FORMATTED) {
180 assert("zam-806", sbinfo->blocks_fake_allocated >= count);
181 sbinfo->blocks_fake_allocated -= count;
182 } else {
183 assert("zam-528",
184 sbinfo->blocks_fake_allocated_unformatted >= count);
185 sbinfo->blocks_fake_allocated_unformatted -= count;
189 static void sub_from_sb_used(reiser4_super_info_data * sbinfo, __u64 count)
191 assert("zam-530",
192 sbinfo->blocks_used >= count + sbinfo->min_blocks_used);
193 sbinfo->blocks_used -= count;
196 static void
197 sub_from_cluster_reserved(reiser4_super_info_data * sbinfo, __u64 count)
199 assert("edward-501", sbinfo->blocks_clustered >= count);
200 sbinfo->blocks_clustered -= count;
203 /* Increase the counter of block reserved for flush in atom. */
204 static void add_to_atom_flush_reserved_nolock(txn_atom * atom, __u32 count)
206 assert("zam-772", atom != NULL);
207 assert_spin_locked(&(atom->alock));
208 atom->flush_reserved += count;
211 /* Decrease the counter of block reserved for flush in atom. */
212 static void sub_from_atom_flush_reserved_nolock(txn_atom * atom, __u32 count)
214 assert("zam-774", atom != NULL);
215 assert_spin_locked(&(atom->alock));
216 assert("nikita-2790", atom->flush_reserved >= count);
217 atom->flush_reserved -= count;
220 /* super block has 6 counters: free, used, grabbed, fake allocated
221 (formatted and unformatted) and flush reserved. Their sum must be
222 number of blocks on a device. This function checks this */
223 int reiser4_check_block_counters(const struct super_block *super)
225 __u64 sum;
227 sum = reiser4_grabbed_blocks(super) + reiser4_free_blocks(super) +
228 reiser4_data_blocks(super) + reiser4_fake_allocated(super) +
229 reiser4_fake_allocated_unformatted(super) + reiser4_flush_reserved(super) +
230 reiser4_clustered_blocks(super);
231 if (reiser4_block_count(super) != sum) {
232 printk("super block counters: "
233 "used %llu, free %llu, "
234 "grabbed %llu, fake allocated (formatetd %llu, unformatted %llu), "
235 "reserved %llu, clustered %llu, sum %llu, must be (block count) %llu\n",
236 (unsigned long long)reiser4_data_blocks(super),
237 (unsigned long long)reiser4_free_blocks(super),
238 (unsigned long long)reiser4_grabbed_blocks(super),
239 (unsigned long long)reiser4_fake_allocated(super),
240 (unsigned long long)
241 reiser4_fake_allocated_unformatted(super),
242 (unsigned long long)reiser4_flush_reserved(super),
243 (unsigned long long)reiser4_clustered_blocks(super),
244 (unsigned long long)sum,
245 (unsigned long long)reiser4_block_count(super));
246 return 0;
248 return 1;
251 /* Adjust "working" free blocks counter for number of blocks we are going to
252 allocate. Record number of grabbed blocks in fs-wide and per-thread
253 counters. This function should be called before bitmap scanning or
254 allocating fake block numbers
256 @super -- pointer to reiser4 super block;
257 @count -- number of blocks we reserve;
259 @return -- 0 if success, -ENOSPC, if all
260 free blocks are preserved or already allocated.
263 static int
264 reiser4_grab(reiser4_context * ctx, __u64 count, reiser4_ba_flags_t flags)
266 __u64 free_blocks;
267 int ret = 0, use_reserved = flags & BA_RESERVED;
268 reiser4_super_info_data *sbinfo;
270 assert("vs-1276", ctx == get_current_context());
272 /* Do not grab anything on ro-mounted fs. */
273 if (rofs_super(ctx->super)) {
274 ctx->grab_enabled = 0;
275 return 0;
278 sbinfo = get_super_private(ctx->super);
280 spin_lock_reiser4_super(sbinfo);
282 free_blocks = sbinfo->blocks_free;
284 if ((use_reserved && free_blocks < count) ||
285 (!use_reserved && free_blocks < count + sbinfo->blocks_reserved)) {
286 ret = RETERR(-ENOSPC);
287 goto unlock_and_ret;
290 add_to_ctx_grabbed(ctx, count);
292 sbinfo->blocks_grabbed += count;
293 sbinfo->blocks_free -= count;
295 #if REISER4_DEBUG
296 if (ctx->grabbed_initially == 0)
297 ctx->grabbed_initially = count;
298 #endif
300 assert("nikita-2986", reiser4_check_block_counters(ctx->super));
302 /* disable grab space in current context */
303 ctx->grab_enabled = 0;
305 unlock_and_ret:
306 spin_unlock_reiser4_super(sbinfo);
308 return ret;
311 int reiser4_grab_space(__u64 count, reiser4_ba_flags_t flags)
313 int ret;
314 reiser4_context *ctx;
316 assert("nikita-2964", ergo(flags & BA_CAN_COMMIT,
317 lock_stack_isclean(get_current_lock_stack
318 ())));
319 ctx = get_current_context();
320 if (!(flags & BA_FORCE) && !is_grab_enabled(ctx))
321 return 0;
323 ret = reiser4_grab(ctx, count, flags);
324 if (ret == -ENOSPC) {
326 /* Trying to commit the all transactions if BA_CAN_COMMIT flag
327 present */
328 if (flags & BA_CAN_COMMIT) {
329 txnmgr_force_commit_all(ctx->super, 0);
330 ctx->grab_enabled = 1;
331 ret = reiser4_grab(ctx, count, flags);
335 * allocation from reserved pool cannot fail. This is severe error.
337 assert("nikita-3005", ergo(flags & BA_RESERVED, ret == 0));
338 return ret;
342 * SPACE RESERVED FOR UNLINK/TRUNCATE
344 * Unlink and truncate require space in transaction (to update stat data, at
345 * least). But we don't want rm(1) to fail with "No space on device" error.
347 * Solution is to reserve 5% of disk space for truncates and
348 * unlinks. Specifically, normal space grabbing requests don't grab space from
349 * reserved area. Only requests with BA_RESERVED bit in flags are allowed to
350 * drain it. Per super block delete mutex is used to allow only one
351 * thread at a time to grab from reserved area.
353 * Grabbing from reserved area should always be performed with BA_CAN_COMMIT
354 * flag.
358 int reiser4_grab_reserved(struct super_block *super,
359 __u64 count, reiser4_ba_flags_t flags)
361 reiser4_super_info_data *sbinfo = get_super_private(super);
363 assert("nikita-3175", flags & BA_CAN_COMMIT);
365 /* Check the delete mutex already taken by us, we assume that
366 * reading of machine word is atomic. */
367 if (sbinfo->delete_mutex_owner == current) {
368 if (reiser4_grab_space
369 (count, (flags | BA_RESERVED) & ~BA_CAN_COMMIT)) {
370 warning("zam-1003",
371 "nested call of grab_reserved fails count=(%llu)",
372 (unsigned long long)count);
373 reiser4_release_reserved(super);
374 return RETERR(-ENOSPC);
376 return 0;
379 if (reiser4_grab_space(count, flags)) {
380 mutex_lock(&sbinfo->delete_mutex);
381 assert("nikita-2929", sbinfo->delete_mutex_owner == NULL);
382 sbinfo->delete_mutex_owner = current;
384 if (reiser4_grab_space(count, flags | BA_RESERVED)) {
385 warning("zam-833",
386 "reserved space is not enough (%llu)",
387 (unsigned long long)count);
388 reiser4_release_reserved(super);
389 return RETERR(-ENOSPC);
392 return 0;
395 void reiser4_release_reserved(struct super_block *super)
397 reiser4_super_info_data *info;
399 info = get_super_private(super);
400 if (info->delete_mutex_owner == current) {
401 info->delete_mutex_owner = NULL;
402 mutex_unlock(&info->delete_mutex);
406 static reiser4_super_info_data *grabbed2fake_allocated_head(int count)
408 reiser4_context *ctx;
409 reiser4_super_info_data *sbinfo;
411 ctx = get_current_context();
412 sub_from_ctx_grabbed(ctx, count);
414 sbinfo = get_super_private(ctx->super);
415 spin_lock_reiser4_super(sbinfo);
417 sub_from_sb_grabbed(sbinfo, count);
418 /* return sbinfo locked */
419 return sbinfo;
422 /* is called after @count fake block numbers are allocated and pointer to
423 those blocks are inserted into tree. */
424 static void grabbed2fake_allocated_formatted(void)
426 reiser4_super_info_data *sbinfo;
428 sbinfo = grabbed2fake_allocated_head(1);
429 sbinfo->blocks_fake_allocated++;
431 assert("vs-922", reiser4_check_block_counters(reiser4_get_current_sb()));
433 spin_unlock_reiser4_super(sbinfo);
437 * grabbed2fake_allocated_unformatted
438 * @count:
441 static void grabbed2fake_allocated_unformatted(int count)
443 reiser4_super_info_data *sbinfo;
445 sbinfo = grabbed2fake_allocated_head(count);
446 sbinfo->blocks_fake_allocated_unformatted += count;
448 assert("vs-9221", reiser4_check_block_counters(reiser4_get_current_sb()));
450 spin_unlock_reiser4_super(sbinfo);
453 void grabbed2cluster_reserved(int count)
455 reiser4_context *ctx;
456 reiser4_super_info_data *sbinfo;
458 ctx = get_current_context();
459 sub_from_ctx_grabbed(ctx, count);
461 sbinfo = get_super_private(ctx->super);
462 spin_lock_reiser4_super(sbinfo);
464 sub_from_sb_grabbed(sbinfo, count);
465 sbinfo->blocks_clustered += count;
467 assert("edward-504", reiser4_check_block_counters(ctx->super));
469 spin_unlock_reiser4_super(sbinfo);
472 void cluster_reserved2grabbed(int count)
474 reiser4_context *ctx;
475 reiser4_super_info_data *sbinfo;
477 ctx = get_current_context();
479 sbinfo = get_super_private(ctx->super);
480 spin_lock_reiser4_super(sbinfo);
482 sub_from_cluster_reserved(sbinfo, count);
483 sbinfo->blocks_grabbed += count;
485 assert("edward-505", reiser4_check_block_counters(ctx->super));
487 spin_unlock_reiser4_super(sbinfo);
488 add_to_ctx_grabbed(ctx, count);
491 void cluster_reserved2free(int count)
493 reiser4_context *ctx;
494 reiser4_super_info_data *sbinfo;
496 ctx = get_current_context();
497 sbinfo = get_super_private(ctx->super);
499 cluster_reserved2grabbed(count);
500 grabbed2free(ctx, sbinfo, count);
503 static DEFINE_SPINLOCK(fake_lock);
504 static reiser4_block_nr fake_gen = 0;
507 * assign_fake_blocknr
508 * @blocknr:
509 * @count:
511 * Obtain a fake block number for new node which will be used to refer to
512 * this newly allocated node until real allocation is done.
514 static void assign_fake_blocknr(reiser4_block_nr *blocknr, int count)
516 spin_lock(&fake_lock);
517 *blocknr = fake_gen;
518 fake_gen += count;
519 spin_unlock(&fake_lock);
521 BUG_ON(*blocknr & REISER4_BLOCKNR_STATUS_BIT_MASK);
522 /**blocknr &= ~REISER4_BLOCKNR_STATUS_BIT_MASK;*/
523 *blocknr |= REISER4_UNALLOCATED_STATUS_VALUE;
524 assert("zam-394", zlook(current_tree, blocknr) == NULL);
527 int assign_fake_blocknr_formatted(reiser4_block_nr * blocknr)
529 assign_fake_blocknr(blocknr, 1);
530 grabbed2fake_allocated_formatted();
531 return 0;
535 * fake_blocknrs_unformatted
536 * @count: number of fake numbers to get
538 * Allocates @count fake block numbers which will be assigned to jnodes
540 reiser4_block_nr fake_blocknr_unformatted(int count)
542 reiser4_block_nr blocknr;
544 assign_fake_blocknr(&blocknr, count);
545 grabbed2fake_allocated_unformatted(count);
547 return blocknr;
550 /* adjust sb block counters, if real (on-disk) block allocation immediately
551 follows grabbing of free disk space. */
552 static void grabbed2used(reiser4_context *ctx, reiser4_super_info_data *sbinfo,
553 __u64 count)
555 sub_from_ctx_grabbed(ctx, count);
557 spin_lock_reiser4_super(sbinfo);
559 sub_from_sb_grabbed(sbinfo, count);
560 sbinfo->blocks_used += count;
562 assert("nikita-2679", reiser4_check_block_counters(ctx->super));
564 spin_unlock_reiser4_super(sbinfo);
567 /* adjust sb block counters when @count unallocated blocks get mapped to disk */
568 static void fake_allocated2used(reiser4_super_info_data *sbinfo, __u64 count,
569 reiser4_ba_flags_t flags)
571 spin_lock_reiser4_super(sbinfo);
573 sub_from_sb_fake_allocated(sbinfo, count, flags);
574 sbinfo->blocks_used += count;
576 assert("nikita-2680",
577 reiser4_check_block_counters(reiser4_get_current_sb()));
579 spin_unlock_reiser4_super(sbinfo);
582 static void flush_reserved2used(txn_atom * atom, __u64 count)
584 reiser4_super_info_data *sbinfo;
586 assert("zam-787", atom != NULL);
587 assert_spin_locked(&(atom->alock));
589 sub_from_atom_flush_reserved_nolock(atom, (__u32) count);
591 sbinfo = get_current_super_private();
592 spin_lock_reiser4_super(sbinfo);
594 sub_from_sb_flush_reserved(sbinfo, count);
595 sbinfo->blocks_used += count;
597 assert("zam-789",
598 reiser4_check_block_counters(reiser4_get_current_sb()));
600 spin_unlock_reiser4_super(sbinfo);
603 /* update the per fs blocknr hint default value. */
604 void
605 update_blocknr_hint_default(const struct super_block *s,
606 const reiser4_block_nr * block)
608 reiser4_super_info_data *sbinfo = get_super_private(s);
610 assert("nikita-3342", !reiser4_blocknr_is_fake(block));
612 spin_lock_reiser4_super(sbinfo);
613 if (*block < sbinfo->block_count) {
614 sbinfo->blocknr_hint_default = *block;
615 } else {
616 warning("zam-676",
617 "block number %llu is too large to be used in a blocknr hint\n",
618 (unsigned long long)*block);
619 dump_stack();
620 DEBUGON(1);
622 spin_unlock_reiser4_super(sbinfo);
625 /* get current value of the default blocknr hint. */
626 void get_blocknr_hint_default(reiser4_block_nr * result)
628 reiser4_super_info_data *sbinfo = get_current_super_private();
630 spin_lock_reiser4_super(sbinfo);
631 *result = sbinfo->blocknr_hint_default;
632 assert("zam-677", *result < sbinfo->block_count);
633 spin_unlock_reiser4_super(sbinfo);
636 /* Allocate "real" disk blocks by calling a proper space allocation plugin
637 * method. Blocks are allocated in one contiguous disk region. The plugin
638 * independent part accounts blocks by subtracting allocated amount from grabbed
639 * or fake block counter and add the same amount to the counter of allocated
640 * blocks.
642 * @hint -- a reiser4 blocknr hint object which contains further block
643 * allocation hints and parameters (search start, a stage of block
644 * which will be mapped to disk, etc.),
645 * @blk -- an out parameter for the beginning of the allocated region,
646 * @len -- in/out parameter, it should contain the maximum number of allocated
647 * blocks, after block allocation completes, it contains the length of
648 * allocated disk region.
649 * @flags -- see reiser4_ba_flags_t description.
651 * @return -- 0 if success, error code otherwise.
654 reiser4_alloc_blocks(reiser4_blocknr_hint * hint, reiser4_block_nr * blk,
655 reiser4_block_nr * len, reiser4_ba_flags_t flags)
657 __u64 needed = *len;
658 reiser4_context *ctx;
659 reiser4_super_info_data *sbinfo;
660 int ret;
662 assert("zam-986", hint != NULL);
664 ctx = get_current_context();
665 sbinfo = get_super_private(ctx->super);
667 /* For write-optimized data we use default search start value, which is
668 * close to last write location. */
669 if (flags & BA_USE_DEFAULT_SEARCH_START)
670 get_blocknr_hint_default(&hint->blk);
672 /* VITALY: allocator should grab this for internal/tx-lists/similar
673 only. */
674 /* VS-FIXME-HANS: why is this comment above addressed to vitaly (from vitaly)?*/
675 if (hint->block_stage == BLOCK_NOT_COUNTED) {
676 ret = reiser4_grab_space_force(*len, flags);
677 if (ret != 0)
678 return ret;
681 ret =
682 sa_alloc_blocks(reiser4_get_space_allocator(ctx->super),
683 hint, (int)needed, blk, len);
685 if (!ret) {
686 assert("zam-680", *blk < reiser4_block_count(ctx->super));
687 assert("zam-681",
688 *blk + *len <= reiser4_block_count(ctx->super));
690 if (flags & BA_PERMANENT) {
691 /* we assume that current atom exists at this moment */
692 txn_atom *atom = get_current_atom_locked();
693 atom->nr_blocks_allocated += *len;
694 spin_unlock_atom(atom);
697 switch (hint->block_stage) {
698 case BLOCK_NOT_COUNTED:
699 case BLOCK_GRABBED:
700 grabbed2used(ctx, sbinfo, *len);
701 break;
702 case BLOCK_UNALLOCATED:
703 fake_allocated2used(sbinfo, *len, flags);
704 break;
705 case BLOCK_FLUSH_RESERVED:
707 txn_atom *atom = get_current_atom_locked();
708 flush_reserved2used(atom, *len);
709 spin_unlock_atom(atom);
711 break;
712 default:
713 impossible("zam-531", "wrong block stage");
715 } else {
716 assert("zam-821",
717 ergo(hint->max_dist == 0
718 && !hint->backward, ret != -ENOSPC));
719 if (hint->block_stage == BLOCK_NOT_COUNTED)
720 grabbed2free(ctx, sbinfo, needed);
723 return ret;
726 /* used -> fake_allocated -> grabbed -> free */
728 /* adjust sb block counters when @count unallocated blocks get unmapped from
729 disk */
730 static void
731 used2fake_allocated(reiser4_super_info_data * sbinfo, __u64 count,
732 int formatted)
734 spin_lock_reiser4_super(sbinfo);
736 if (formatted)
737 sbinfo->blocks_fake_allocated += count;
738 else
739 sbinfo->blocks_fake_allocated_unformatted += count;
741 sub_from_sb_used(sbinfo, count);
743 assert("nikita-2681",
744 reiser4_check_block_counters(reiser4_get_current_sb()));
746 spin_unlock_reiser4_super(sbinfo);
749 static void
750 used2flush_reserved(reiser4_super_info_data * sbinfo, txn_atom * atom,
751 __u64 count, reiser4_ba_flags_t flags UNUSED_ARG)
753 assert("nikita-2791", atom != NULL);
754 assert_spin_locked(&(atom->alock));
756 add_to_atom_flush_reserved_nolock(atom, (__u32) count);
758 spin_lock_reiser4_super(sbinfo);
760 sbinfo->blocks_flush_reserved += count;
761 /*add_to_sb_flush_reserved(sbinfo, count); */
762 sub_from_sb_used(sbinfo, count);
764 assert("nikita-2681",
765 reiser4_check_block_counters(reiser4_get_current_sb()));
767 spin_unlock_reiser4_super(sbinfo);
770 /* disk space, virtually used by fake block numbers is counted as "grabbed"
771 again. */
772 static void
773 fake_allocated2grabbed(reiser4_context * ctx, reiser4_super_info_data * sbinfo,
774 __u64 count, reiser4_ba_flags_t flags)
776 add_to_ctx_grabbed(ctx, count);
778 spin_lock_reiser4_super(sbinfo);
780 assert("nikita-2682", reiser4_check_block_counters(ctx->super));
782 sbinfo->blocks_grabbed += count;
783 sub_from_sb_fake_allocated(sbinfo, count, flags & BA_FORMATTED);
785 assert("nikita-2683", reiser4_check_block_counters(ctx->super));
787 spin_unlock_reiser4_super(sbinfo);
790 void fake_allocated2free(__u64 count, reiser4_ba_flags_t flags)
792 reiser4_context *ctx;
793 reiser4_super_info_data *sbinfo;
795 ctx = get_current_context();
796 sbinfo = get_super_private(ctx->super);
798 fake_allocated2grabbed(ctx, sbinfo, count, flags);
799 grabbed2free(ctx, sbinfo, count);
802 void grabbed2free_mark(__u64 mark)
804 reiser4_context *ctx;
805 reiser4_super_info_data *sbinfo;
807 ctx = get_current_context();
808 sbinfo = get_super_private(ctx->super);
810 assert("nikita-3007", (__s64) mark >= 0);
811 assert("nikita-3006", ctx->grabbed_blocks >= mark);
812 grabbed2free(ctx, sbinfo, ctx->grabbed_blocks - mark);
816 * grabbed2free - adjust grabbed and free block counters
817 * @ctx: context to update grabbed block counter of
818 * @sbinfo: super block to update grabbed and free block counters of
819 * @count: number of blocks to adjust counters by
821 * Decreases context's and per filesystem's counters of grabbed
822 * blocks. Increases per filesystem's counter of free blocks.
824 void grabbed2free(reiser4_context *ctx, reiser4_super_info_data *sbinfo,
825 __u64 count)
827 sub_from_ctx_grabbed(ctx, count);
829 spin_lock_reiser4_super(sbinfo);
831 sub_from_sb_grabbed(sbinfo, count);
832 sbinfo->blocks_free += count;
833 assert("nikita-2684", reiser4_check_block_counters(ctx->super));
835 spin_unlock_reiser4_super(sbinfo);
838 void grabbed2flush_reserved_nolock(txn_atom * atom, __u64 count)
840 reiser4_context *ctx;
841 reiser4_super_info_data *sbinfo;
843 assert("vs-1095", atom);
845 ctx = get_current_context();
846 sbinfo = get_super_private(ctx->super);
848 sub_from_ctx_grabbed(ctx, count);
850 add_to_atom_flush_reserved_nolock(atom, count);
852 spin_lock_reiser4_super(sbinfo);
854 sbinfo->blocks_flush_reserved += count;
855 sub_from_sb_grabbed(sbinfo, count);
857 assert("vpf-292", reiser4_check_block_counters(ctx->super));
859 spin_unlock_reiser4_super(sbinfo);
862 void grabbed2flush_reserved(__u64 count)
864 txn_atom *atom = get_current_atom_locked();
866 grabbed2flush_reserved_nolock(atom, count);
868 spin_unlock_atom(atom);
871 void flush_reserved2grabbed(txn_atom * atom, __u64 count)
873 reiser4_context *ctx;
874 reiser4_super_info_data *sbinfo;
876 assert("nikita-2788", atom != NULL);
877 assert_spin_locked(&(atom->alock));
879 ctx = get_current_context();
880 sbinfo = get_super_private(ctx->super);
882 add_to_ctx_grabbed(ctx, count);
884 sub_from_atom_flush_reserved_nolock(atom, (__u32) count);
886 spin_lock_reiser4_super(sbinfo);
888 sbinfo->blocks_grabbed += count;
889 sub_from_sb_flush_reserved(sbinfo, count);
891 assert("vpf-292", reiser4_check_block_counters(ctx->super));
893 spin_unlock_reiser4_super(sbinfo);
897 * all_grabbed2free - releases all blocks grabbed in context
899 * Decreases context's and super block's grabbed block counters by number of
900 * blocks grabbed by current context and increases super block's free block
901 * counter correspondingly.
903 void all_grabbed2free(void)
905 reiser4_context *ctx = get_current_context();
907 grabbed2free(ctx, get_super_private(ctx->super), ctx->grabbed_blocks);
910 /* adjust sb block counters if real (on-disk) blocks do not become unallocated
911 after freeing, @count blocks become "grabbed". */
912 static void
913 used2grabbed(reiser4_context * ctx, reiser4_super_info_data * sbinfo,
914 __u64 count)
916 add_to_ctx_grabbed(ctx, count);
918 spin_lock_reiser4_super(sbinfo);
920 sbinfo->blocks_grabbed += count;
921 sub_from_sb_used(sbinfo, count);
923 assert("nikita-2685", reiser4_check_block_counters(ctx->super));
925 spin_unlock_reiser4_super(sbinfo);
928 /* this used to be done through used2grabbed and grabbed2free*/
929 static void used2free(reiser4_super_info_data * sbinfo, __u64 count)
931 spin_lock_reiser4_super(sbinfo);
933 sbinfo->blocks_free += count;
934 sub_from_sb_used(sbinfo, count);
936 assert("nikita-2685",
937 reiser4_check_block_counters(reiser4_get_current_sb()));
939 spin_unlock_reiser4_super(sbinfo);
942 #if REISER4_DEBUG
944 /* check "allocated" state of given block range */
945 static void
946 reiser4_check_blocks(const reiser4_block_nr * start,
947 const reiser4_block_nr * len, int desired)
949 sa_check_blocks(start, len, desired);
952 /* check "allocated" state of given block */
953 void reiser4_check_block(const reiser4_block_nr * block, int desired)
955 const reiser4_block_nr one = 1;
957 reiser4_check_blocks(block, &one, desired);
960 #endif
962 /* Blocks deallocation function may do an actual deallocation through space
963 plugin allocation or store deleted block numbers in atom's delete_set data
964 structure depend on @defer parameter. */
966 /* if BA_DEFER bit is not turned on, @target_stage means the stage of blocks
967 which will be deleted from WORKING bitmap. They might be just unmapped from
968 disk, or freed but disk space is still grabbed by current thread, or these
969 blocks must not be counted in any reiser4 sb block counters,
970 see block_stage_t comment */
972 /* BA_FORMATTED bit is only used when BA_DEFER in not present: it is used to
973 distinguish blocks allocated for unformatted and formatted nodes */
976 reiser4_dealloc_blocks(const reiser4_block_nr * start,
977 const reiser4_block_nr * len,
978 block_stage_t target_stage, reiser4_ba_flags_t flags)
980 txn_atom *atom = NULL;
981 int ret;
982 reiser4_context *ctx;
983 reiser4_super_info_data *sbinfo;
985 ctx = get_current_context();
986 sbinfo = get_super_private(ctx->super);
988 if (REISER4_DEBUG) {
989 assert("zam-431", *len != 0);
990 assert("zam-432", *start != 0);
991 assert("zam-558", !reiser4_blocknr_is_fake(start));
993 spin_lock_reiser4_super(sbinfo);
994 assert("zam-562", *start < sbinfo->block_count);
995 spin_unlock_reiser4_super(sbinfo);
998 if (flags & BA_DEFER) {
999 blocknr_set_entry *bsep = NULL;
1001 /* storing deleted block numbers in a blocknr set
1002 datastructure for further actual deletion */
1003 do {
1004 atom = get_current_atom_locked();
1005 assert("zam-430", atom != NULL);
1007 ret =
1008 blocknr_set_add_extent(atom, &atom->delete_set,
1009 &bsep, start, len);
1011 if (ret == -ENOMEM)
1012 return ret;
1014 /* This loop might spin at most two times */
1015 } while (ret == -E_REPEAT);
1017 assert("zam-477", ret == 0);
1018 assert("zam-433", atom != NULL);
1020 spin_unlock_atom(atom);
1022 } else {
1023 assert("zam-425", get_current_super_private() != NULL);
1024 sa_dealloc_blocks(reiser4_get_space_allocator(ctx->super),
1025 *start, *len);
1027 if (flags & BA_PERMANENT) {
1028 /* These blocks were counted as allocated, we have to
1029 * revert it back if allocation is discarded. */
1030 txn_atom *atom = get_current_atom_locked();
1031 atom->nr_blocks_allocated -= *len;
1032 spin_unlock_atom(atom);
1035 switch (target_stage) {
1036 case BLOCK_NOT_COUNTED:
1037 assert("vs-960", flags & BA_FORMATTED);
1038 /* VITALY: This is what was grabbed for
1039 internal/tx-lists/similar only */
1040 used2free(sbinfo, *len);
1041 break;
1043 case BLOCK_GRABBED:
1044 used2grabbed(ctx, sbinfo, *len);
1045 break;
1047 case BLOCK_UNALLOCATED:
1048 used2fake_allocated(sbinfo, *len, flags & BA_FORMATTED);
1049 break;
1051 case BLOCK_FLUSH_RESERVED:{
1052 txn_atom *atom;
1054 atom = get_current_atom_locked();
1055 used2flush_reserved(sbinfo, atom, *len,
1056 flags & BA_FORMATTED);
1057 spin_unlock_atom(atom);
1058 break;
1060 default:
1061 impossible("zam-532", "wrong block stage");
1065 return 0;
1068 /* wrappers for block allocator plugin methods */
1069 int reiser4_pre_commit_hook(void)
1071 assert("zam-502", get_current_super_private() != NULL);
1072 sa_pre_commit_hook();
1073 return 0;
1076 /* an actor which applies delete set to block allocator data */
1077 static int
1078 apply_dset(txn_atom * atom UNUSED_ARG, const reiser4_block_nr * a,
1079 const reiser4_block_nr * b, void *data UNUSED_ARG)
1081 reiser4_context *ctx;
1082 reiser4_super_info_data *sbinfo;
1084 __u64 len = 1;
1086 ctx = get_current_context();
1087 sbinfo = get_super_private(ctx->super);
1089 assert("zam-877", atom->stage >= ASTAGE_PRE_COMMIT);
1090 assert("zam-552", sbinfo != NULL);
1092 if (b != NULL)
1093 len = *b;
1095 if (REISER4_DEBUG) {
1096 spin_lock_reiser4_super(sbinfo);
1098 assert("zam-554", *a < reiser4_block_count(ctx->super));
1099 assert("zam-555", *a + len <= reiser4_block_count(ctx->super));
1101 spin_unlock_reiser4_super(sbinfo);
1104 sa_dealloc_blocks(&sbinfo->space_allocator, *a, len);
1105 /* adjust sb block counters */
1106 used2free(sbinfo, len);
1107 return 0;
1110 void reiser4_post_commit_hook(void)
1112 txn_atom *atom;
1114 atom = get_current_atom_locked();
1115 assert("zam-452", atom->stage == ASTAGE_POST_COMMIT);
1116 spin_unlock_atom(atom);
1118 /* do the block deallocation which was deferred
1119 until commit is done */
1120 blocknr_set_iterator(atom, &atom->delete_set, apply_dset, NULL, 1);
1122 assert("zam-504", get_current_super_private() != NULL);
1123 sa_post_commit_hook();
1126 void reiser4_post_write_back_hook(void)
1128 assert("zam-504", get_current_super_private() != NULL);
1130 sa_post_commit_hook();
1134 Local variables:
1135 c-indentation-style: "K&R"
1136 mode-name: "LC"
1137 c-basic-offset: 8
1138 tab-width: 8
1139 fill-column: 120
1140 scroll-step: 1
1141 End: