1 /* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
3 #include "../../debug.h"
4 #include "../../dformat.h"
5 #include "../../txnmgr.h"
6 #include "../../jnode.h"
7 #include "../../block_alloc.h"
8 #include "../../tree.h"
9 #include "../../super.h"
10 #include "../plugin.h"
11 #include "space_allocator.h"
14 #include <linux/types.h>
15 #include <linux/fs.h> /* for struct super_block */
16 #include <linux/mutex.h>
17 #include <asm/div64.h>
19 /* Proposed (but discarded) optimization: dynamic loading/unloading of bitmap
22 A useful optimization of reiser4 bitmap handling would be dynamic bitmap
23 blocks loading/unloading which is different from v3.x where all bitmap
24 blocks are loaded at mount time.
26 To implement bitmap blocks unloading we need to count bitmap block usage
27 and detect currently unused blocks allowing them to be unloaded. It is not
28 a simple task since we allow several threads to modify one bitmap block
31 Briefly speaking, the following schema is proposed: we count in special
32 variable associated with each bitmap block. That is for counting of block
33 alloc/dealloc operations on that bitmap block. With a deferred block
34 deallocation feature of reiser4 all those operation will be represented in
35 atom dirty/deleted lists as jnodes for freshly allocated or deleted
38 So, we increment usage counter for each new node allocated or deleted, and
39 decrement it at atom commit one time for each node from the dirty/deleted
40 atom's list. Of course, freshly allocated node deletion and node reusing
41 from atom deleted (if we do so) list should decrement bitmap usage counter
44 This schema seems to be working but that reference counting is
45 not easy to debug. I think we should agree with Hans and do not implement
46 it in v4.0. Current code implements "on-demand" bitmap blocks loading only.
48 For simplicity all bitmap nodes (both commit and working bitmap blocks) are
49 loaded into memory on fs mount time or each bitmap nodes are loaded at the
50 first access to it, the "dont_load_bitmap" mount option controls whether
51 bimtap nodes should be loaded at mount time. Dynamic unloading of bitmap
52 nodes currently is not supported. */
54 #define CHECKSUM_SIZE 4
56 #define BYTES_PER_LONG (sizeof(long))
58 #if BITS_PER_LONG == 64
59 # define LONG_INT_SHIFT (6)
61 # define LONG_INT_SHIFT (5)
64 #define LONG_INT_MASK (BITS_PER_LONG - 1UL)
66 typedef unsigned long ulong_t
;
68 #define bmap_size(blocksize) ((blocksize) - CHECKSUM_SIZE)
69 #define bmap_bit_count(blocksize) (bmap_size(blocksize) << 3)
71 /* Block allocation/deallocation are done through special bitmap objects which
72 are allocated in an array at fs mount. */
74 struct mutex mutex
; /* long term lock object */
76 jnode
*wjnode
; /* j-nodes for WORKING ... */
77 jnode
*cjnode
; /* ... and COMMIT bitmap blocks */
79 bmap_off_t first_zero_bit
; /* for skip_busy option implementation */
81 atomic_t loaded
; /* a flag which shows that bnode is loaded
85 static inline char *bnode_working_data(struct bitmap_node
*bnode
)
89 data
= jdata(bnode
->wjnode
);
90 assert("zam-429", data
!= NULL
);
92 return data
+ CHECKSUM_SIZE
;
95 static inline char *bnode_commit_data(const struct bitmap_node
*bnode
)
99 data
= jdata(bnode
->cjnode
);
100 assert("zam-430", data
!= NULL
);
102 return data
+ CHECKSUM_SIZE
;
105 static inline __u32
bnode_commit_crc(const struct bitmap_node
*bnode
)
109 data
= jdata(bnode
->cjnode
);
110 assert("vpf-261", data
!= NULL
);
112 return le32_to_cpu(get_unaligned((d32
*)data
));
115 static inline void bnode_set_commit_crc(struct bitmap_node
*bnode
, __u32 crc
)
119 data
= jdata(bnode
->cjnode
);
120 assert("vpf-261", data
!= NULL
);
122 put_unaligned(cpu_to_le32(crc
), (d32
*)data
);
125 /* ZAM-FIXME-HANS: is the idea that this might be a union someday? having
126 * written the code, does this added abstraction still have */
127 /* ANSWER(Zam): No, the abstractions is in the level above (exact place is the
128 * reiser4_space_allocator structure) */
129 /* ZAM-FIXME-HANS: I don't understand your english in comment above. */
130 /* FIXME-HANS(Zam): I don't understand the questions like "might be a union
131 * someday?". What they about? If there is a reason to have a union, it should
132 * be a union, if not, it should not be a union. "..might be someday" means no
134 struct bitmap_allocator_data
{
135 /* an array for bitmap blocks direct access */
136 struct bitmap_node
*bitmap
;
139 #define get_barray(super) \
140 (((struct bitmap_allocator_data *)(get_super_private(super)->space_allocator.u.generic)) -> bitmap)
142 #define get_bnode(super, i) (get_barray(super) + i)
144 /* allocate and initialize jnode with JNODE_BITMAP type */
145 static jnode
*bnew(void)
147 jnode
*jal
= jalloc();
150 jnode_init(jal
, current_tree
, JNODE_BITMAP
);
155 /* this file contains:
156 - bitmap based implementation of space allocation plugin
157 - all the helper functions like set bit, find_first_zero_bit, etc */
159 /* Audited by: green(2002.06.12) */
160 static int find_next_zero_bit_in_word(ulong_t word
, int start_bit
)
162 ulong_t mask
= 1UL << start_bit
;
165 while ((word
& mask
) != 0) {
167 if (++i
>= BITS_PER_LONG
)
174 #include <linux/bitops.h>
176 #if BITS_PER_LONG == 64
178 #define OFF(addr) (((ulong_t)(addr) & (BYTES_PER_LONG - 1)) << 3)
179 #define BASE(addr) ((ulong_t*) ((ulong_t)(addr) & ~(BYTES_PER_LONG - 1)))
181 static inline void reiser4_set_bit(int nr
, void *addr
)
183 ext2_set_bit(nr
+ OFF(addr
), BASE(addr
));
186 static inline void reiser4_clear_bit(int nr
, void *addr
)
188 ext2_clear_bit(nr
+ OFF(addr
), BASE(addr
));
191 static inline int reiser4_test_bit(int nr
, void *addr
)
193 return ext2_test_bit(nr
+ OFF(addr
), BASE(addr
));
195 static inline int reiser4_find_next_zero_bit(void *addr
, int maxoffset
,
200 return ext2_find_next_zero_bit(BASE(addr
), maxoffset
+ off
,
206 #define reiser4_set_bit(nr, addr) ext2_set_bit(nr, addr)
207 #define reiser4_clear_bit(nr, addr) ext2_clear_bit(nr, addr)
208 #define reiser4_test_bit(nr, addr) ext2_test_bit(nr, addr)
210 #define reiser4_find_next_zero_bit(addr, maxoffset, offset) \
211 ext2_find_next_zero_bit(addr, maxoffset, offset)
214 /* Search for a set bit in the bit array [@start_offset, @max_offset[, offsets
215 * are counted from @addr, return the offset of the first bit if it is found,
216 * @maxoffset otherwise. */
217 static bmap_off_t
__reiser4_find_next_set_bit(void *addr
, bmap_off_t max_offset
,
218 bmap_off_t start_offset
)
220 ulong_t
*base
= addr
;
221 /* start_offset is in bits, convert it to byte offset within bitmap. */
222 int word_nr
= start_offset
>> LONG_INT_SHIFT
;
223 /* bit number within the byte. */
224 int bit_nr
= start_offset
& LONG_INT_MASK
;
225 int max_word_nr
= (max_offset
- 1) >> LONG_INT_SHIFT
;
227 assert("zam-387", max_offset
!= 0);
229 /* Unaligned @start_offset case. */
233 nr
= find_next_zero_bit_in_word(~(base
[word_nr
]), bit_nr
);
235 if (nr
< BITS_PER_LONG
)
236 return (word_nr
<< LONG_INT_SHIFT
) + nr
;
241 /* Fast scan trough aligned words. */
242 while (word_nr
<= max_word_nr
) {
243 if (base
[word_nr
] != 0) {
244 return (word_nr
<< LONG_INT_SHIFT
)
245 + find_next_zero_bit_in_word(~(base
[word_nr
]), 0);
254 #if BITS_PER_LONG == 64
256 static bmap_off_t
reiser4_find_next_set_bit(void *addr
, bmap_off_t max_offset
,
257 bmap_off_t start_offset
)
259 bmap_off_t off
= OFF(addr
);
261 return __reiser4_find_next_set_bit(BASE(addr
), max_offset
+ off
,
262 start_offset
+ off
) - off
;
266 #define reiser4_find_next_set_bit(addr, max_offset, start_offset) \
267 __reiser4_find_next_set_bit(addr, max_offset, start_offset)
270 /* search for the first set bit in single word. */
271 static int find_last_set_bit_in_word(ulong_t word
, int start_bit
)
276 assert("zam-965", start_bit
< BITS_PER_LONG
);
277 assert("zam-966", start_bit
>= 0);
279 bit_mask
= (1UL << nr
);
281 while (bit_mask
!= 0) {
287 return BITS_PER_LONG
;
290 /* Search bitmap for a set bit in backward direction from the end to the
291 * beginning of given region
293 * @result: result offset of the last set bit
294 * @addr: base memory address,
295 * @low_off: low end of the search region, edge bit included into the region,
296 * @high_off: high end of the search region, edge bit included into the region,
298 * @return: 0 - set bit was found, -1 otherwise.
301 reiser4_find_last_set_bit(bmap_off_t
* result
, void *addr
, bmap_off_t low_off
,
304 ulong_t
*base
= addr
;
310 assert("zam-962", high_off
>= low_off
);
312 last_word
= high_off
>> LONG_INT_SHIFT
;
313 last_bit
= high_off
& LONG_INT_MASK
;
314 first_word
= low_off
>> LONG_INT_SHIFT
;
316 if (last_bit
< BITS_PER_LONG
) {
317 nr
= find_last_set_bit_in_word(base
[last_word
], last_bit
);
318 if (nr
< BITS_PER_LONG
) {
319 *result
= (last_word
<< LONG_INT_SHIFT
) + nr
;
324 while (last_word
>= first_word
) {
325 if (base
[last_word
] != 0x0) {
327 find_last_set_bit_in_word(base
[last_word
],
329 assert("zam-972", last_bit
< BITS_PER_LONG
);
330 *result
= (last_word
<< LONG_INT_SHIFT
) + last_bit
;
336 return -1; /* set bit not found */
339 /* Search bitmap for a clear bit in backward direction from the end to the
340 * beginning of given region */
342 reiser4_find_last_zero_bit(bmap_off_t
* result
, void *addr
, bmap_off_t low_off
,
345 ulong_t
*base
= addr
;
351 last_word
= high_off
>> LONG_INT_SHIFT
;
352 last_bit
= high_off
& LONG_INT_MASK
;
353 first_word
= low_off
>> LONG_INT_SHIFT
;
355 if (last_bit
< BITS_PER_LONG
) {
356 nr
= find_last_set_bit_in_word(~base
[last_word
], last_bit
);
357 if (nr
< BITS_PER_LONG
) {
358 *result
= (last_word
<< LONG_INT_SHIFT
) + nr
;
363 while (last_word
>= first_word
) {
364 if (base
[last_word
] != (ulong_t
) (-1)) {
365 *result
= (last_word
<< LONG_INT_SHIFT
) +
366 find_last_set_bit_in_word(~base
[last_word
],
373 return -1; /* zero bit not found */
376 /* Audited by: green(2002.06.12) */
377 static void reiser4_clear_bits(char *addr
, bmap_off_t start
, bmap_off_t end
)
382 unsigned char first_byte_mask
= 0xFF;
383 unsigned char last_byte_mask
= 0xFF;
385 assert("zam-410", start
< end
);
387 first_byte
= start
>> 3;
388 last_byte
= (end
- 1) >> 3;
390 if (last_byte
> first_byte
+ 1)
391 memset(addr
+ first_byte
+ 1, 0,
392 (size_t) (last_byte
- first_byte
- 1));
394 first_byte_mask
>>= 8 - (start
& 0x7);
395 last_byte_mask
<<= ((end
- 1) & 0x7) + 1;
397 if (first_byte
== last_byte
) {
398 addr
[first_byte
] &= (first_byte_mask
| last_byte_mask
);
400 addr
[first_byte
] &= first_byte_mask
;
401 addr
[last_byte
] &= last_byte_mask
;
405 /* Audited by: green(2002.06.12) */
406 /* ZAM-FIXME-HANS: comment this */
407 static void reiser4_set_bits(char *addr
, bmap_off_t start
, bmap_off_t end
)
412 unsigned char first_byte_mask
= 0xFF;
413 unsigned char last_byte_mask
= 0xFF;
415 assert("zam-386", start
< end
);
417 first_byte
= start
>> 3;
418 last_byte
= (end
- 1) >> 3;
420 if (last_byte
> first_byte
+ 1)
421 memset(addr
+ first_byte
+ 1, 0xFF,
422 (size_t) (last_byte
- first_byte
- 1));
424 first_byte_mask
<<= start
& 0x7;
425 last_byte_mask
>>= 7 - ((end
- 1) & 0x7);
427 if (first_byte
== last_byte
) {
428 addr
[first_byte
] |= (first_byte_mask
& last_byte_mask
);
430 addr
[first_byte
] |= first_byte_mask
;
431 addr
[last_byte
] |= last_byte_mask
;
435 #define ADLER_BASE 65521
436 #define ADLER_NMAX 5552
438 /* Calculates the adler32 checksum for the data pointed by `data` of the
439 length `len`. This function was originally taken from zlib, version 1.1.3,
442 Copyright (C) 1995-1998 Jean-loup Gailly and Mark Adler
444 This software is provided 'as-is', without any express or implied
445 warranty. In no event will the authors be held liable for any damages
446 arising from the use of this software.
448 Permission is granted to anyone to use this software for any purpose,
449 including commercial applications, and to alter it and redistribute it
450 freely, subject to the following restrictions:
452 1. The origin of this software must not be misrepresented; you must not
453 claim that you wrote the original software. If you use this software
454 in a product, an acknowledgment in the product documentation would be
455 appreciated but is not required.
456 2. Altered source versions must be plainly marked as such, and must not be
457 misrepresented as being the original software.
458 3. This notice may not be removed or altered from any source distribution.
460 Jean-loup Gailly Mark Adler
461 jloup@gzip.org madler@alumni.caltech.edu
463 The above comment applies only to the reiser4_adler32 function.
466 __u32
reiser4_adler32(char *data
, __u32 len
)
468 unsigned char *t
= data
;
474 k
= len
< ADLER_NMAX
? len
: ADLER_NMAX
;
485 return (s2
<< 16) | s1
;
488 #define sb_by_bnode(bnode) \
489 ((struct super_block *)jnode_get_tree(bnode->wjnode)->super)
491 static __u32
bnode_calc_crc(const struct bitmap_node
*bnode
, unsigned long size
)
493 return reiser4_adler32(bnode_commit_data(bnode
), bmap_size(size
));
497 bnode_check_adler32(const struct bitmap_node
*bnode
, unsigned long size
)
499 if (bnode_calc_crc(bnode
, size
) != bnode_commit_crc(bnode
)) {
502 bmap
= bnode
- get_bnode(sb_by_bnode(bnode
), 0);
505 "Checksum for the bitmap block %llu is incorrect",
514 #define REISER4_CHECK_BMAP_CRC (0)
516 #if REISER4_CHECK_BMAP_CRC
517 static int bnode_check_crc(const struct bitmap_node
*bnode
)
519 return bnode_check_adler32(bnode
,
520 bmap_size(sb_by_bnode(bnode
)->s_blocksize
));
523 /* REISER4_CHECK_BMAP_CRC */
526 #define bnode_check_crc(bnode) (0)
528 /* REISER4_CHECK_BMAP_CRC */
531 /* Recalculates the adler32 checksum for only 1 byte change.
532 adler - previous adler checksum
533 old_data, data - old, new byte values.
534 tail == (chunk - offset) : length, checksum was calculated for, - offset of
535 the changed byte within this chunk.
536 This function can be used for checksum calculation optimisation.
540 adler32_recalc(__u32 adler
, unsigned char old_data
, unsigned char data
,
543 __u32 delta
= data
- old_data
+ 2 * ADLER_BASE
;
544 __u32 s1
= adler
& 0xffff;
545 __u32 s2
= (adler
>> 16) & 0xffff;
547 s1
= (delta
+ s1
) % ADLER_BASE
;
548 s2
= (delta
* tail
+ s2
) % ADLER_BASE
;
550 return (s2
<< 16) | s1
;
553 #define LIMIT(val, boundary) ((val) > (boundary) ? (boundary) : (val))
556 * get_nr_bitmap - calculate number of bitmap blocks
557 * @super: super block with initialized blocksize and block count
559 * Calculates number of bitmap blocks of a filesystem which uses bitmaps to
560 * maintain free disk space. It assumes that each bitmap addresses the same
561 * number of blocks which is calculated by bmap_block_count macro defined in
562 * above. Number of blocks in the filesystem has to be initialized in reiser4
563 * private data of super block already so that it can be obtained via
564 * reiser4_block_count(). Unfortunately, number of blocks addressed by a bitmap
565 * is not power of 2 because 4 bytes are used for checksum. Therefore, we have
566 * to use special function to divide and modulo 64bits filesystem block
569 * Example: suppose filesystem have 32768 blocks. Blocksize is 4096. Each bitmap
570 * block addresses (4096 - 4) * 8 = 32736 blocks. Number of bitmaps to address
571 * all 32768 blocks is calculated as (32768 - 1) / 32736 + 1 = 2.
573 static bmap_nr_t
get_nr_bmap(const struct super_block
*super
)
577 assert("zam-393", reiser4_block_count(super
) != 0);
579 quotient
= reiser4_block_count(super
) - 1;
580 do_div(quotient
, bmap_bit_count(super
->s_blocksize
));
585 * parse_blocknr - calculate bitmap number and offset in it by block number
586 * @block: pointer to block number to calculate location in bitmap of
587 * @bmap: pointer where to store bitmap block number
588 * @offset: pointer where to store offset within bitmap block
590 * Calculates location of bit which is responsible for allocation/freeing of
591 * block @*block. That location is represented by bitmap block number and offset
592 * within that bitmap block.
595 parse_blocknr(const reiser4_block_nr
*block
, bmap_nr_t
*bmap
,
598 struct super_block
*super
= get_current_context()->super
;
599 u64 quotient
= *block
;
601 *offset
= do_div(quotient
, bmap_bit_count(super
->s_blocksize
));
604 assert("zam-433", *bmap
< get_nr_bmap(super
));
605 assert("", *offset
< bmap_bit_count(super
->s_blocksize
));
609 /* Audited by: green(2002.06.12) */
611 check_block_range(const reiser4_block_nr
* start
, const reiser4_block_nr
* len
)
613 struct super_block
*sb
= reiser4_get_current_sb();
615 assert("zam-436", sb
!= NULL
);
617 assert("zam-455", start
!= NULL
);
618 assert("zam-437", *start
!= 0);
619 assert("zam-541", !reiser4_blocknr_is_fake(start
));
620 assert("zam-441", *start
< reiser4_block_count(sb
));
623 assert("zam-438", *len
!= 0);
624 assert("zam-442", *start
+ *len
<= reiser4_block_count(sb
));
628 static void check_bnode_loaded(const struct bitmap_node
*bnode
)
630 assert("zam-485", bnode
!= NULL
);
631 assert("zam-483", jnode_page(bnode
->wjnode
) != NULL
);
632 assert("zam-484", jnode_page(bnode
->cjnode
) != NULL
);
633 assert("nikita-2820", jnode_is_loaded(bnode
->wjnode
));
634 assert("nikita-2821", jnode_is_loaded(bnode
->cjnode
));
639 # define check_block_range(start, len) do { /* nothing */} while(0)
640 # define check_bnode_loaded(bnode) do { /* nothing */} while(0)
644 /* modify bnode->first_zero_bit (if we free bits before); bnode should be
647 adjust_first_zero_bit(struct bitmap_node
*bnode
, bmap_off_t offset
)
649 if (offset
< bnode
->first_zero_bit
)
650 bnode
->first_zero_bit
= offset
;
653 /* return a physical disk address for logical bitmap number @bmap */
654 /* FIXME-VS: this is somehow related to disk layout? */
655 /* ZAM-FIXME-HANS: your answer is? Use not more than one function dereference
656 * per block allocation so that performance is not affected. Probably this
657 * whole file should be considered part of the disk layout plugin, and other
658 * disk layouts can use other defines and efficiency will not be significantly
661 #define REISER4_FIRST_BITMAP_BLOCK \
662 ((REISER4_MASTER_OFFSET / PAGE_CACHE_SIZE) + 2)
664 /* Audited by: green(2002.06.12) */
666 get_bitmap_blocknr(struct super_block
*super
, bmap_nr_t bmap
,
667 reiser4_block_nr
* bnr
)
670 assert("zam-390", bmap
< get_nr_bmap(super
));
672 #ifdef CONFIG_REISER4_BADBLOCKS
673 #define BITMAP_PLUGIN_DISKMAP_ID ((0xc0e1<<16) | (0xe0ff))
674 /* Check if the diskmap have this already, first. */
675 if (reiser4_get_diskmap_value(BITMAP_PLUGIN_DISKMAP_ID
, bmap
, bnr
) == 0)
676 return; /* Found it in diskmap */
678 /* FIXME_ZAM: before discussing of disk layouts and disk format
679 plugins I implement bitmap location scheme which is close to scheme
680 used in reiser 3.6 */
682 *bnr
= REISER4_FIRST_BITMAP_BLOCK
;
684 *bnr
= bmap
* bmap_bit_count(super
->s_blocksize
);
688 /* construct a fake block number for shadow bitmap (WORKING BITMAP) block */
689 /* Audited by: green(2002.06.12) */
690 static void get_working_bitmap_blocknr(bmap_nr_t bmap
, reiser4_block_nr
* bnr
)
693 (reiser4_block_nr
) ((bmap
& ~REISER4_BLOCKNR_STATUS_BIT_MASK
) |
694 REISER4_BITMAP_BLOCKS_STATUS_VALUE
);
697 /* bnode structure initialization */
699 init_bnode(struct bitmap_node
*bnode
,
700 struct super_block
*super UNUSED_ARG
, bmap_nr_t bmap UNUSED_ARG
)
702 memset(bnode
, 0, sizeof(struct bitmap_node
));
704 mutex_init(&bnode
->mutex
);
705 atomic_set(&bnode
->loaded
, 0);
708 static void release(jnode
* node
)
711 JF_SET(node
, JNODE_HEARD_BANSHEE
);
715 /* This function is for internal bitmap.c use because it assumes that jnode is
716 in under full control of this thread */
717 static void done_bnode(struct bitmap_node
*bnode
)
720 atomic_set(&bnode
->loaded
, 0);
721 if (bnode
->wjnode
!= NULL
)
722 release(bnode
->wjnode
);
723 if (bnode
->cjnode
!= NULL
)
724 release(bnode
->cjnode
);
725 bnode
->wjnode
= bnode
->cjnode
= NULL
;
729 /* ZAM-FIXME-HANS: comment this. Called only by load_and_lock_bnode()*/
730 static int prepare_bnode(struct bitmap_node
*bnode
, jnode
**cjnode_ret
,
733 struct super_block
*super
;
739 super
= reiser4_get_current_sb();
741 *wjnode_ret
= wjnode
= bnew();
742 if (wjnode
== NULL
) {
744 return RETERR(-ENOMEM
);
747 *cjnode_ret
= cjnode
= bnew();
749 return RETERR(-ENOMEM
);
751 bmap
= bnode
- get_bnode(super
, 0);
753 get_working_bitmap_blocknr(bmap
, &wjnode
->blocknr
);
754 get_bitmap_blocknr(super
, bmap
, &cjnode
->blocknr
);
759 /* load commit bitmap */
760 ret
= jload_gfp(cjnode
, GFP_NOFS
, 1);
765 /* allocate memory for working bitmap block. Note that for
766 * bitmaps jinit_new() doesn't actually modifies node content,
767 * so parallel calls to this are ok. */
768 ret
= jinit_new(wjnode
, GFP_NOFS
);
780 *wjnode_ret
= *cjnode_ret
= NULL
;
785 /* Check the bnode data on read. */
786 static int check_struct_bnode(struct bitmap_node
*bnode
, __u32 blksize
)
792 ret
= bnode_check_adler32(bnode
, blksize
);
798 data
= jdata(bnode
->cjnode
) + CHECKSUM_SIZE
;
800 /* Check the very first bit -- it must be busy. */
801 if (!reiser4_test_bit(0, data
)) {
802 warning("vpf-1362", "The allocator block %llu is not marked "
803 "as used.", (unsigned long long)bnode
->cjnode
->blocknr
);
811 /* load bitmap blocks "on-demand" */
812 static int load_and_lock_bnode(struct bitmap_node
*bnode
)
819 assert("nikita-3040", reiser4_schedulable());
821 /* ZAM-FIXME-HANS: since bitmaps are never unloaded, this does not
822 * need to be atomic, right? Just leave a comment that if bitmaps were
823 * unloadable, this would need to be atomic. */
824 if (atomic_read(&bnode
->loaded
)) {
825 /* bitmap is already loaded, nothing to do */
826 check_bnode_loaded(bnode
);
827 mutex_lock(&bnode
->mutex
);
828 assert("nikita-2827", atomic_read(&bnode
->loaded
));
832 ret
= prepare_bnode(bnode
, &cjnode
, &wjnode
);
834 mutex_lock(&bnode
->mutex
);
836 if (!atomic_read(&bnode
->loaded
)) {
837 assert("nikita-2822", cjnode
!= NULL
);
838 assert("nikita-2823", wjnode
!= NULL
);
839 assert("nikita-2824", jnode_is_loaded(cjnode
));
840 assert("nikita-2825", jnode_is_loaded(wjnode
));
842 bnode
->wjnode
= wjnode
;
843 bnode
->cjnode
= cjnode
;
845 ret
= check_struct_bnode(bnode
, current_blocksize
);
847 cjnode
= wjnode
= NULL
;
848 atomic_set(&bnode
->loaded
, 1);
849 /* working bitmap is initialized by on-disk
850 * commit bitmap. This should be performed
852 memcpy(bnode_working_data(bnode
),
853 bnode_commit_data(bnode
),
854 bmap_size(current_blocksize
));
856 mutex_unlock(&bnode
->mutex
);
858 /* race: someone already loaded bitmap while we were
859 * busy initializing data. */
860 check_bnode_loaded(bnode
);
863 if (wjnode
!= NULL
) {
865 bnode
->wjnode
= NULL
;
867 if (cjnode
!= NULL
) {
869 bnode
->cjnode
= NULL
;
875 static void release_and_unlock_bnode(struct bitmap_node
*bnode
)
877 check_bnode_loaded(bnode
);
878 mutex_unlock(&bnode
->mutex
);
881 /* This function does all block allocation work but only for one bitmap
883 /* FIXME_ZAM: It does not allow us to allocate block ranges across bitmap
884 block responsibility zone boundaries. This had no sense in v3.6 but may
886 /* ZAM-FIXME-HANS: do you mean search one bitmap block forward? */
888 search_one_bitmap_forward(bmap_nr_t bmap
, bmap_off_t
* offset
,
889 bmap_off_t max_offset
, int min_len
, int max_len
)
891 struct super_block
*super
= get_current_context()->super
;
892 struct bitmap_node
*bnode
= get_bnode(super
, bmap
);
896 bmap_off_t search_end
;
900 int set_first_zero_bit
= 0;
904 assert("zam-364", min_len
> 0);
905 assert("zam-365", max_len
>= min_len
);
906 assert("zam-366", *offset
<= max_offset
);
908 ret
= load_and_lock_bnode(bnode
);
913 data
= bnode_working_data(bnode
);
917 if (bnode
->first_zero_bit
>= start
) {
918 start
= bnode
->first_zero_bit
;
919 set_first_zero_bit
= 1;
922 while (start
+ min_len
< max_offset
) {
925 reiser4_find_next_zero_bit((long *)data
, max_offset
, start
);
926 if (set_first_zero_bit
) {
927 bnode
->first_zero_bit
= start
;
928 set_first_zero_bit
= 0;
930 if (start
>= max_offset
)
933 search_end
= LIMIT(start
+ max_len
, max_offset
);
935 reiser4_find_next_set_bit((long *)data
, search_end
, start
);
936 if (end
>= start
+ min_len
) {
937 /* we can't trust find_next_set_bit result if set bit
938 was not fount, result may be bigger than
940 if (end
> search_end
)
946 reiser4_set_bits(data
, start
, end
);
948 /* FIXME: we may advance first_zero_bit if [start,
949 end] region overlaps the first_zero_bit point */
957 release_and_unlock_bnode(bnode
);
963 search_one_bitmap_backward(bmap_nr_t bmap
, bmap_off_t
* start_offset
,
964 bmap_off_t end_offset
, int min_len
, int max_len
)
966 struct super_block
*super
= get_current_context()->super
;
967 struct bitmap_node
*bnode
= get_bnode(super
, bmap
);
972 assert("zam-958", min_len
> 0);
973 assert("zam-959", max_len
>= min_len
);
974 assert("zam-960", *start_offset
>= end_offset
);
976 ret
= load_and_lock_bnode(bnode
);
980 data
= bnode_working_data(bnode
);
981 start
= *start_offset
;
984 bmap_off_t end
, search_end
;
986 /* Find the beginning of the zero filled region */
987 if (reiser4_find_last_zero_bit(&start
, data
, end_offset
, start
))
989 /* Is there more than `min_len' bits from `start' to
991 if (start
< end_offset
+ min_len
- 1)
994 /* Do not search to `end_offset' if we need to find less than
995 * `max_len' zero bits. */
996 if (end_offset
+ max_len
- 1 < start
)
997 search_end
= start
- max_len
+ 1;
999 search_end
= end_offset
;
1001 if (reiser4_find_last_set_bit(&end
, data
, search_end
, start
))
1006 if (end
+ min_len
<= start
+ 1) {
1007 if (end
< search_end
)
1009 ret
= start
- end
+ 1;
1010 *start_offset
= end
; /* `end' is lowest offset */
1012 reiser4_find_next_set_bit(data
, start
+ 1,
1014 reiser4_set_bits(data
, end
, start
+ 1);
1018 if (end
<= end_offset
)
1019 /* left search boundary reached. */
1024 release_and_unlock_bnode(bnode
);
1028 /* allocate contiguous range of blocks in bitmap */
1029 static int bitmap_alloc_forward(reiser4_block_nr
* start
,
1030 const reiser4_block_nr
* end
, int min_len
,
1033 bmap_nr_t bmap
, end_bmap
;
1034 bmap_off_t offset
, end_offset
;
1037 reiser4_block_nr tmp
;
1039 struct super_block
*super
= get_current_context()->super
;
1040 const bmap_off_t max_offset
= bmap_bit_count(super
->s_blocksize
);
1042 parse_blocknr(start
, &bmap
, &offset
);
1045 parse_blocknr(&tmp
, &end_bmap
, &end_offset
);
1048 assert("zam-358", end_bmap
>= bmap
);
1049 assert("zam-359", ergo(end_bmap
== bmap
, end_offset
>= offset
));
1051 for (; bmap
< end_bmap
; bmap
++, offset
= 0) {
1053 search_one_bitmap_forward(bmap
, &offset
, max_offset
,
1060 search_one_bitmap_forward(bmap
, &offset
, end_offset
, min_len
,
1063 *start
= bmap
* max_offset
+ offset
;
1067 /* allocate contiguous range of blocks in bitmap (from @start to @end in
1068 * backward direction) */
1069 static int bitmap_alloc_backward(reiser4_block_nr
* start
,
1070 const reiser4_block_nr
* end
, int min_len
,
1073 bmap_nr_t bmap
, end_bmap
;
1074 bmap_off_t offset
, end_offset
;
1076 struct super_block
*super
= get_current_context()->super
;
1077 const bmap_off_t max_offset
= bmap_bit_count(super
->s_blocksize
);
1079 parse_blocknr(start
, &bmap
, &offset
);
1080 parse_blocknr(end
, &end_bmap
, &end_offset
);
1082 assert("zam-961", end_bmap
<= bmap
);
1083 assert("zam-962", ergo(end_bmap
== bmap
, end_offset
<= offset
));
1085 for (; bmap
> end_bmap
; bmap
--, offset
= max_offset
- 1) {
1087 search_one_bitmap_backward(bmap
, &offset
, 0, min_len
,
1094 search_one_bitmap_backward(bmap
, &offset
, end_offset
, min_len
,
1097 *start
= bmap
* max_offset
+ offset
;
1101 /* plugin->u.space_allocator.alloc_blocks() */
1102 static int alloc_blocks_forward(reiser4_blocknr_hint
*hint
, int needed
,
1103 reiser4_block_nr
*start
, reiser4_block_nr
*len
)
1105 struct super_block
*super
= get_current_context()->super
;
1108 reiser4_block_nr search_start
;
1109 reiser4_block_nr search_end
;
1111 assert("zam-398", super
!= NULL
);
1112 assert("zam-412", hint
!= NULL
);
1113 assert("zam-397", hint
->blk
<= reiser4_block_count(super
));
1115 if (hint
->max_dist
== 0)
1116 search_end
= reiser4_block_count(super
);
1119 LIMIT(hint
->blk
+ hint
->max_dist
,
1120 reiser4_block_count(super
));
1122 /* We use @hint -> blk as a search start and search from it to the end
1123 of the disk or in given region if @hint -> max_dist is not zero */
1124 search_start
= hint
->blk
;
1127 bitmap_alloc_forward(&search_start
, &search_end
, 1, needed
);
1129 /* There is only one bitmap search if max_dist was specified or first
1130 pass was from the beginning of the bitmap. We also do one pass for
1131 scanning bitmap in backward direction. */
1132 if (!(actual_len
!= 0 || hint
->max_dist
!= 0 || search_start
== 0)) {
1133 /* next step is a scanning from 0 to search_start */
1134 search_end
= search_start
;
1137 bitmap_alloc_forward(&search_start
, &search_end
, 1, needed
);
1139 if (actual_len
== 0)
1140 return RETERR(-ENOSPC
);
1142 return RETERR(actual_len
);
1144 *start
= search_start
;
1148 static int alloc_blocks_backward(reiser4_blocknr_hint
* hint
, int needed
,
1149 reiser4_block_nr
* start
,
1150 reiser4_block_nr
* len
)
1152 reiser4_block_nr search_start
;
1153 reiser4_block_nr search_end
;
1156 ON_DEBUG(struct super_block
*super
= reiser4_get_current_sb());
1158 assert("zam-969", super
!= NULL
);
1159 assert("zam-970", hint
!= NULL
);
1160 assert("zam-971", hint
->blk
<= reiser4_block_count(super
));
1162 search_start
= hint
->blk
;
1163 if (hint
->max_dist
== 0 || search_start
<= hint
->max_dist
)
1166 search_end
= search_start
- hint
->max_dist
;
1169 bitmap_alloc_backward(&search_start
, &search_end
, 1, needed
);
1170 if (actual_len
== 0)
1171 return RETERR(-ENOSPC
);
1173 return RETERR(actual_len
);
1175 *start
= search_start
;
1179 /* plugin->u.space_allocator.alloc_blocks() */
1180 int reiser4_alloc_blocks_bitmap(reiser4_space_allocator
* allocator
,
1181 reiser4_blocknr_hint
* hint
, int needed
,
1182 reiser4_block_nr
* start
, reiser4_block_nr
* len
)
1185 return alloc_blocks_backward(hint
, needed
, start
, len
);
1186 return alloc_blocks_forward(hint
, needed
, start
, len
);
1189 /* plugin->u.space_allocator.dealloc_blocks(). */
1190 /* It just frees blocks in WORKING BITMAP. Usually formatted an unformatted
1191 nodes deletion is deferred until transaction commit. However, deallocation
1192 of temporary objects like wandered blocks and transaction commit records
1193 requires immediate node deletion from WORKING BITMAP.*/
1194 void reiser4_dealloc_blocks_bitmap(reiser4_space_allocator
* allocator
,
1195 reiser4_block_nr start
, reiser4_block_nr len
)
1197 struct super_block
*super
= reiser4_get_current_sb();
1202 struct bitmap_node
*bnode
;
1205 assert("zam-468", len
!= 0);
1206 check_block_range(&start
, &len
);
1208 parse_blocknr(&start
, &bmap
, &offset
);
1210 assert("zam-469", offset
+ len
<= bmap_bit_count(super
->s_blocksize
));
1212 bnode
= get_bnode(super
, bmap
);
1214 assert("zam-470", bnode
!= NULL
);
1216 ret
= load_and_lock_bnode(bnode
);
1217 assert("zam-481", ret
== 0);
1219 reiser4_clear_bits(bnode_working_data(bnode
), offset
,
1220 (bmap_off_t
) (offset
+ len
));
1222 adjust_first_zero_bit(bnode
, offset
);
1224 release_and_unlock_bnode(bnode
);
1227 /* plugin->u.space_allocator.check_blocks(). */
1228 void reiser4_check_blocks_bitmap(const reiser4_block_nr
* start
,
1229 const reiser4_block_nr
* len
, int desired
)
1232 struct super_block
*super
= reiser4_get_current_sb();
1235 bmap_off_t start_offset
;
1236 bmap_off_t end_offset
;
1238 struct bitmap_node
*bnode
;
1241 assert("zam-622", len
!= NULL
);
1242 check_block_range(start
, len
);
1243 parse_blocknr(start
, &bmap
, &start_offset
);
1245 end_offset
= start_offset
+ *len
;
1246 assert("nikita-2214", end_offset
<= bmap_bit_count(super
->s_blocksize
));
1248 bnode
= get_bnode(super
, bmap
);
1250 assert("nikita-2215", bnode
!= NULL
);
1252 ret
= load_and_lock_bnode(bnode
);
1253 assert("zam-626", ret
== 0);
1255 assert("nikita-2216", jnode_is_loaded(bnode
->wjnode
));
1259 reiser4_find_next_zero_bit(bnode_working_data(bnode
),
1260 end_offset
, start_offset
)
1264 reiser4_find_next_set_bit(bnode_working_data(bnode
),
1265 end_offset
, start_offset
)
1269 release_and_unlock_bnode(bnode
);
1273 /* conditional insertion of @node into atom's overwrite set if it was not there */
1274 static void cond_add_to_overwrite_set(txn_atom
* atom
, jnode
* node
)
1276 assert("zam-546", atom
!= NULL
);
1277 assert("zam-547", atom
->stage
== ASTAGE_PRE_COMMIT
);
1278 assert("zam-548", node
!= NULL
);
1280 spin_lock_atom(atom
);
1281 spin_lock_jnode(node
);
1283 if (node
->atom
== NULL
) {
1284 JF_SET(node
, JNODE_OVRWR
);
1285 insert_into_atom_ovrwr_list(atom
, node
);
1287 assert("zam-549", node
->atom
== atom
);
1290 spin_unlock_jnode(node
);
1291 spin_unlock_atom(atom
);
1294 /* an actor which applies delete set to COMMIT bitmap pages and link modified
1295 pages in a single-linked list */
1297 apply_dset_to_commit_bmap(txn_atom
* atom
, const reiser4_block_nr
* start
,
1298 const reiser4_block_nr
* len
, void *data
)
1305 long long *blocks_freed_p
= data
;
1307 struct bitmap_node
*bnode
;
1309 struct super_block
*sb
= reiser4_get_current_sb();
1311 check_block_range(start
, len
);
1313 parse_blocknr(start
, &bmap
, &offset
);
1315 /* FIXME-ZAM: we assume that all block ranges are allocated by this
1316 bitmap-based allocator and each block range can't go over a zone of
1317 responsibility of one bitmap block; same assumption is used in
1318 other journal hooks in bitmap code. */
1319 bnode
= get_bnode(sb
, bmap
);
1320 assert("zam-448", bnode
!= NULL
);
1322 /* it is safe to unlock atom with is in ASTAGE_PRE_COMMIT */
1323 assert("zam-767", atom
->stage
== ASTAGE_PRE_COMMIT
);
1324 ret
= load_and_lock_bnode(bnode
);
1328 /* put bnode into atom's overwrite set */
1329 cond_add_to_overwrite_set(atom
, bnode
->cjnode
);
1331 data
= bnode_commit_data(bnode
);
1333 ret
= bnode_check_crc(bnode
);
1338 /* FIXME-ZAM: a check that all bits are set should be there */
1340 offset
+ *len
<= bmap_bit_count(sb
->s_blocksize
));
1341 reiser4_clear_bits(data
, offset
, (bmap_off_t
) (offset
+ *len
));
1343 (*blocks_freed_p
) += *len
;
1345 reiser4_clear_bit(offset
, data
);
1346 (*blocks_freed_p
)++;
1349 bnode_set_commit_crc(bnode
, bnode_calc_crc(bnode
, sb
->s_blocksize
));
1351 release_and_unlock_bnode(bnode
);
1356 /* plugin->u.space_allocator.pre_commit_hook(). */
1357 /* It just applies transaction changes to fs-wide COMMIT BITMAP, hoping the
1358 rest is done by transaction manager (allocate wandered locations for COMMIT
1359 BITMAP blocks, copy COMMIT BITMAP blocks data). */
1360 /* Only one instance of this function can be running at one given time, because
1361 only one transaction can be committed a time, therefore it is safe to access
1362 some global variables without any locking */
1364 int reiser4_pre_commit_hook_bitmap(void)
1366 struct super_block
*super
= reiser4_get_current_sb();
1369 long long blocks_freed
= 0;
1371 atom
= get_current_atom_locked();
1372 assert("zam-876", atom
->stage
== ASTAGE_PRE_COMMIT
);
1373 spin_unlock_atom(atom
);
1375 { /* scan atom's captured list and find all freshly allocated nodes,
1376 * mark corresponded bits in COMMIT BITMAP as used */
1377 struct list_head
*head
= ATOM_CLEAN_LIST(atom
);
1378 jnode
*node
= list_entry(head
->next
, jnode
, capture_link
);
1380 while (head
!= &node
->capture_link
) {
1381 /* we detect freshly allocated jnodes */
1382 if (JF_ISSET(node
, JNODE_RELOC
)) {
1388 struct bitmap_node
*bn
;
1389 __u32 size
= bmap_size(super
->s_blocksize
);
1393 assert("zam-559", !JF_ISSET(node
, JNODE_OVRWR
));
1395 !reiser4_blocknr_is_fake(&node
->blocknr
));
1397 parse_blocknr(&node
->blocknr
, &bmap
, &offset
);
1398 bn
= get_bnode(super
, bmap
);
1400 index
= offset
>> 3;
1401 assert("vpf-276", index
< size
);
1403 ret
= bnode_check_crc(bnode
);
1407 check_bnode_loaded(bn
);
1408 load_and_lock_bnode(bn
);
1410 byte
= *(bnode_commit_data(bn
) + index
);
1411 reiser4_set_bit(offset
, bnode_commit_data(bn
));
1413 crc
= adler32_recalc(bnode_commit_crc(bn
), byte
,
1414 *(bnode_commit_data(bn
) +
1417 bnode_set_commit_crc(bn
, crc
);
1419 release_and_unlock_bnode(bn
);
1421 ret
= bnode_check_crc(bn
);
1425 /* working of this depends on how it inserts
1426 new j-node into clean list, because we are
1427 scanning the same list now. It is OK, if
1428 insertion is done to the list front */
1429 cond_add_to_overwrite_set(atom
, bn
->cjnode
);
1432 node
= list_entry(node
->capture_link
.next
, jnode
, capture_link
);
1436 blocknr_set_iterator(atom
, &atom
->delete_set
, apply_dset_to_commit_bmap
,
1439 blocks_freed
-= atom
->nr_blocks_allocated
;
1442 reiser4_super_info_data
*sbinfo
;
1444 sbinfo
= get_super_private(super
);
1446 spin_lock_reiser4_super(sbinfo
);
1447 sbinfo
->blocks_free_committed
+= blocks_freed
;
1448 spin_unlock_reiser4_super(sbinfo
);
1454 /* plugin->u.space_allocator.init_allocator
1455 constructor of reiser4_space_allocator object. It is called on fs mount */
1456 int reiser4_init_allocator_bitmap(reiser4_space_allocator
* allocator
,
1457 struct super_block
*super
, void *arg
)
1459 struct bitmap_allocator_data
*data
= NULL
;
1460 bmap_nr_t bitmap_blocks_nr
;
1463 assert("nikita-3039", reiser4_schedulable());
1465 /* getting memory for bitmap allocator private data holder */
1467 kmalloc(sizeof(struct bitmap_allocator_data
),
1468 reiser4_ctx_gfp_mask_get());
1471 return RETERR(-ENOMEM
);
1473 /* allocation and initialization for the array of bnodes */
1474 bitmap_blocks_nr
= get_nr_bmap(super
);
1476 /* FIXME-ZAM: it is not clear what to do with huge number of bitmaps
1477 which is bigger than 2^32 (= 8 * 4096 * 4096 * 2^32 bytes = 5.76e+17,
1478 may I never meet someone who still uses the ia32 architecture when
1479 storage devices of that size enter the market, and wants to use ia32
1480 with that storage device, much less reiser4. ;-) -Hans). Kmalloc is not possible and,
1481 probably, another dynamic data structure should replace a static
1483 /*data->bitmap = reiser4_kmalloc((size_t) (sizeof (struct bitmap_node) * bitmap_blocks_nr), GFP_KERNEL); */
1484 data
->bitmap
= reiser4_vmalloc(sizeof(struct bitmap_node
) * bitmap_blocks_nr
);
1485 if (data
->bitmap
== NULL
) {
1487 return RETERR(-ENOMEM
);
1490 for (i
= 0; i
< bitmap_blocks_nr
; i
++)
1491 init_bnode(data
->bitmap
+ i
, super
, i
);
1493 allocator
->u
.generic
= data
;
1496 get_super_private(super
)->min_blocks_used
+= bitmap_blocks_nr
;
1499 /* Load all bitmap blocks at mount time. */
1501 (REISER4_DONT_LOAD_BITMAP
, &get_super_private(super
)->fs_flags
)) {
1502 __u64 start_time
, elapsed_time
;
1503 struct bitmap_node
*bnode
;
1507 printk(KERN_INFO
"loading reiser4 bitmap...");
1508 start_time
= jiffies
;
1510 for (i
= 0; i
< bitmap_blocks_nr
; i
++) {
1511 bnode
= data
->bitmap
+ i
;
1512 ret
= load_and_lock_bnode(bnode
);
1514 reiser4_destroy_allocator_bitmap(allocator
,
1518 release_and_unlock_bnode(bnode
);
1521 elapsed_time
= jiffies
- start_time
;
1523 printk("...done (%llu jiffies)\n",
1524 (unsigned long long)elapsed_time
);
1530 /* plugin->u.space_allocator.destroy_allocator
1531 destructor. It is called on fs unmount */
1532 int reiser4_destroy_allocator_bitmap(reiser4_space_allocator
* allocator
,
1533 struct super_block
*super
)
1535 bmap_nr_t bitmap_blocks_nr
;
1538 struct bitmap_allocator_data
*data
= allocator
->u
.generic
;
1540 assert("zam-414", data
!= NULL
);
1541 assert("zam-376", data
->bitmap
!= NULL
);
1543 bitmap_blocks_nr
= get_nr_bmap(super
);
1545 for (i
= 0; i
< bitmap_blocks_nr
; i
++) {
1546 struct bitmap_node
*bnode
= data
->bitmap
+ i
;
1548 mutex_lock(&bnode
->mutex
);
1551 if (atomic_read(&bnode
->loaded
)) {
1552 jnode
*wj
= bnode
->wjnode
;
1553 jnode
*cj
= bnode
->cjnode
;
1555 assert("zam-480", jnode_page(cj
) != NULL
);
1556 assert("zam-633", jnode_page(wj
) != NULL
);
1559 memcmp(jdata(wj
), jdata(wj
),
1560 bmap_size(super
->s_blocksize
)) == 0);
1565 mutex_unlock(&bnode
->mutex
);
1568 vfree(data
->bitmap
);
1571 allocator
->u
.generic
= NULL
;
1578 * c-indentation-style: "K&R"