2 * linux/fs/ext2/balloc.c
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
9 * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
10 * Big-endian to little-endian byte-swapping/bitmaps by
11 * David S. Miller (davem@caip.rutgers.edu), 1995
14 #include <linux/config.h>
16 #include <linux/quotaops.h>
17 #include <linux/sched.h>
18 #include <linux/buffer_head.h>
19 #include <linux/capability.h>
22 * balloc.c contains the blocks allocation and deallocation routines
26 * The free blocks are managed by bitmaps. A file system contains several
27 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
28 * block for inodes, N blocks for the inode table and data blocks.
30 * The file system contains group descriptors which are located after the
31 * super block. Each descriptor contains the number of the bitmap block and
32 * the free blocks count in the block. The descriptors are loaded in memory
33 * when a file system is mounted (see ext2_read_super).
37 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
39 struct ext2_group_desc
* ext2_get_group_desc(struct super_block
* sb
,
40 unsigned int block_group
,
41 struct buffer_head
** bh
)
43 unsigned long group_desc
;
45 struct ext2_group_desc
* desc
;
46 struct ext2_sb_info
*sbi
= EXT2_SB(sb
);
48 if (block_group
>= sbi
->s_groups_count
) {
49 ext2_error (sb
, "ext2_get_group_desc",
50 "block_group >= groups_count - "
51 "block_group = %d, groups_count = %lu",
52 block_group
, sbi
->s_groups_count
);
57 group_desc
= block_group
>> EXT2_DESC_PER_BLOCK_BITS(sb
);
58 offset
= block_group
& (EXT2_DESC_PER_BLOCK(sb
) - 1);
59 if (!sbi
->s_group_desc
[group_desc
]) {
60 ext2_error (sb
, "ext2_get_group_desc",
61 "Group descriptor not loaded - "
62 "block_group = %d, group_desc = %lu, desc = %lu",
63 block_group
, group_desc
, offset
);
67 desc
= (struct ext2_group_desc
*) sbi
->s_group_desc
[group_desc
]->b_data
;
69 *bh
= sbi
->s_group_desc
[group_desc
];
74 * Read the bitmap for a given block_group, reading into the specified
75 * slot in the superblock's bitmap cache.
77 * Return buffer_head on success or NULL in case of failure.
79 static struct buffer_head
*
80 read_block_bitmap(struct super_block
*sb
, unsigned int block_group
)
82 struct ext2_group_desc
* desc
;
83 struct buffer_head
* bh
= NULL
;
85 desc
= ext2_get_group_desc (sb
, block_group
, NULL
);
88 bh
= sb_bread(sb
, le32_to_cpu(desc
->bg_block_bitmap
));
90 ext2_error (sb
, "read_block_bitmap",
91 "Cannot read block bitmap - "
92 "block_group = %d, block_bitmap = %u",
93 block_group
, le32_to_cpu(desc
->bg_block_bitmap
));
99 * Set sb->s_dirt here because the superblock was "logically" altered. We
100 * need to recalculate its free blocks count and flush it out.
102 static int reserve_blocks(struct super_block
*sb
, int count
)
104 struct ext2_sb_info
*sbi
= EXT2_SB(sb
);
105 struct ext2_super_block
*es
= sbi
->s_es
;
106 unsigned free_blocks
;
107 unsigned root_blocks
;
109 free_blocks
= percpu_counter_read_positive(&sbi
->s_freeblocks_counter
);
110 root_blocks
= le32_to_cpu(es
->s_r_blocks_count
);
112 if (free_blocks
< count
)
115 if (free_blocks
< root_blocks
+ count
&& !capable(CAP_SYS_RESOURCE
) &&
116 sbi
->s_resuid
!= current
->fsuid
&&
117 (sbi
->s_resgid
== 0 || !in_group_p (sbi
->s_resgid
))) {
119 * We are too close to reserve and we are not privileged.
120 * Can we allocate anything at all?
122 if (free_blocks
> root_blocks
)
123 count
= free_blocks
- root_blocks
;
128 percpu_counter_mod(&sbi
->s_freeblocks_counter
, -count
);
133 static void release_blocks(struct super_block
*sb
, int count
)
136 struct ext2_sb_info
*sbi
= EXT2_SB(sb
);
138 percpu_counter_mod(&sbi
->s_freeblocks_counter
, count
);
143 static int group_reserve_blocks(struct ext2_sb_info
*sbi
, int group_no
,
144 struct ext2_group_desc
*desc
, struct buffer_head
*bh
, int count
)
146 unsigned free_blocks
;
148 if (!desc
->bg_free_blocks_count
)
151 spin_lock(sb_bgl_lock(sbi
, group_no
));
152 free_blocks
= le16_to_cpu(desc
->bg_free_blocks_count
);
153 if (free_blocks
< count
)
155 desc
->bg_free_blocks_count
= cpu_to_le16(free_blocks
- count
);
156 spin_unlock(sb_bgl_lock(sbi
, group_no
));
157 mark_buffer_dirty(bh
);
161 static void group_release_blocks(struct super_block
*sb
, int group_no
,
162 struct ext2_group_desc
*desc
, struct buffer_head
*bh
, int count
)
165 struct ext2_sb_info
*sbi
= EXT2_SB(sb
);
166 unsigned free_blocks
;
168 spin_lock(sb_bgl_lock(sbi
, group_no
));
169 free_blocks
= le16_to_cpu(desc
->bg_free_blocks_count
);
170 desc
->bg_free_blocks_count
= cpu_to_le16(free_blocks
+ count
);
171 spin_unlock(sb_bgl_lock(sbi
, group_no
));
173 mark_buffer_dirty(bh
);
177 /* Free given blocks, update quota and i_blocks field */
178 void ext2_free_blocks (struct inode
* inode
, unsigned long block
,
181 struct buffer_head
*bitmap_bh
= NULL
;
182 struct buffer_head
* bh2
;
183 unsigned long block_group
;
186 unsigned long overflow
;
187 struct super_block
* sb
= inode
->i_sb
;
188 struct ext2_sb_info
* sbi
= EXT2_SB(sb
);
189 struct ext2_group_desc
* desc
;
190 struct ext2_super_block
* es
= sbi
->s_es
;
191 unsigned freed
= 0, group_freed
;
193 if (block
< le32_to_cpu(es
->s_first_data_block
) ||
194 block
+ count
< block
||
195 block
+ count
> le32_to_cpu(es
->s_blocks_count
)) {
196 ext2_error (sb
, "ext2_free_blocks",
197 "Freeing blocks not in datazone - "
198 "block = %lu, count = %lu", block
, count
);
202 ext2_debug ("freeing block(s) %lu-%lu\n", block
, block
+ count
- 1);
206 block_group
= (block
- le32_to_cpu(es
->s_first_data_block
)) /
207 EXT2_BLOCKS_PER_GROUP(sb
);
208 bit
= (block
- le32_to_cpu(es
->s_first_data_block
)) %
209 EXT2_BLOCKS_PER_GROUP(sb
);
211 * Check to see if we are freeing blocks across a group
214 if (bit
+ count
> EXT2_BLOCKS_PER_GROUP(sb
)) {
215 overflow
= bit
+ count
- EXT2_BLOCKS_PER_GROUP(sb
);
219 bitmap_bh
= read_block_bitmap(sb
, block_group
);
223 desc
= ext2_get_group_desc (sb
, block_group
, &bh2
);
227 if (in_range (le32_to_cpu(desc
->bg_block_bitmap
), block
, count
) ||
228 in_range (le32_to_cpu(desc
->bg_inode_bitmap
), block
, count
) ||
229 in_range (block
, le32_to_cpu(desc
->bg_inode_table
),
230 sbi
->s_itb_per_group
) ||
231 in_range (block
+ count
- 1, le32_to_cpu(desc
->bg_inode_table
),
232 sbi
->s_itb_per_group
))
233 ext2_error (sb
, "ext2_free_blocks",
234 "Freeing blocks in system zones - "
235 "Block = %lu, count = %lu",
238 for (i
= 0, group_freed
= 0; i
< count
; i
++) {
239 if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi
, block_group
),
240 bit
+ i
, bitmap_bh
->b_data
)) {
241 ext2_error(sb
, __FUNCTION__
,
242 "bit already cleared for block %lu", block
+ i
);
248 mark_buffer_dirty(bitmap_bh
);
249 if (sb
->s_flags
& MS_SYNCHRONOUS
)
250 sync_dirty_buffer(bitmap_bh
);
252 group_release_blocks(sb
, block_group
, desc
, bh2
, group_freed
);
253 freed
+= group_freed
;
262 release_blocks(sb
, freed
);
263 DQUOT_FREE_BLOCK(inode
, freed
);
266 static int grab_block(spinlock_t
*lock
, char *map
, unsigned size
, int goal
)
271 if (!ext2_test_bit(goal
, map
))
277 * The goal was occupied; search forward for a free
278 * block within the next XX blocks.
280 * end_goal is more or less random, but it has to be
281 * less than EXT2_BLOCKS_PER_GROUP. Aligning up to the
282 * next 64-bit boundary is simple..
284 k
= (goal
+ 63) & ~63;
285 goal
= ext2_find_next_zero_bit(map
, k
, goal
);
289 * Search in the remainder of the current group.
293 p
= map
+ (goal
>> 3);
294 r
= memscan(p
, 0, (size
- goal
+ 7) >> 3);
298 * We have succeeded in finding a free byte in the block
299 * bitmap. Now search backwards to find the start of this
300 * group of free blocks - won't take more than 7 iterations.
302 for (goal
= k
; goal
&& !ext2_test_bit (goal
- 1, map
); goal
--)
307 k
= ext2_find_next_zero_bit ((u32
*)map
, size
, goal
);
314 if (ext2_set_bit_atomic(lock
, goal
, (void *) map
))
320 * ext2_new_block uses a goal block to assist allocation. If the goal is
321 * free, or there is a free block within 32 blocks of the goal, that block
322 * is allocated. Otherwise a forward search is made for a free block; within
323 * each block group the search first looks for an entire free byte in the block
324 * bitmap, and then for any free bit if that fails.
325 * This function also updates quota and i_blocks field.
327 int ext2_new_block(struct inode
*inode
, unsigned long goal
,
328 u32
*prealloc_count
, u32
*prealloc_block
, int *err
)
330 struct buffer_head
*bitmap_bh
= NULL
;
331 struct buffer_head
*gdp_bh
; /* bh2 */
332 struct ext2_group_desc
*desc
;
333 int group_no
; /* i */
334 int ret_block
; /* j */
335 int group_idx
; /* k */
336 int target_block
; /* tmp */
338 struct super_block
*sb
= inode
->i_sb
;
339 struct ext2_sb_info
*sbi
= EXT2_SB(sb
);
340 struct ext2_super_block
*es
= sbi
->s_es
;
341 unsigned group_size
= EXT2_BLOCKS_PER_GROUP(sb
);
342 unsigned prealloc_goal
= es
->s_prealloc_blocks
;
343 unsigned group_alloc
= 0, es_alloc
, dq_alloc
;
344 int nr_scanned_groups
;
346 if (!prealloc_goal
--)
347 prealloc_goal
= EXT2_DEFAULT_PREALLOC_BLOCKS
- 1;
348 if (!prealloc_count
|| *prealloc_count
)
351 if (DQUOT_ALLOC_BLOCK(inode
, 1)) {
356 while (prealloc_goal
&& DQUOT_PREALLOC_BLOCK(inode
, prealloc_goal
))
359 dq_alloc
= prealloc_goal
+ 1;
360 es_alloc
= reserve_blocks(sb
, dq_alloc
);
366 ext2_debug ("goal=%lu.\n", goal
);
368 if (goal
< le32_to_cpu(es
->s_first_data_block
) ||
369 goal
>= le32_to_cpu(es
->s_blocks_count
))
370 goal
= le32_to_cpu(es
->s_first_data_block
);
371 group_no
= (goal
- le32_to_cpu(es
->s_first_data_block
)) / group_size
;
372 desc
= ext2_get_group_desc (sb
, group_no
, &gdp_bh
);
375 * gdp_bh may still be uninitialised. But group_release_blocks
376 * will not touch it because group_alloc is zero.
381 group_alloc
= group_reserve_blocks(sbi
, group_no
, desc
,
384 ret_block
= ((goal
- le32_to_cpu(es
->s_first_data_block
)) %
387 bitmap_bh
= read_block_bitmap(sb
, group_no
);
391 ext2_debug("goal is at %d:%d.\n", group_no
, ret_block
);
393 ret_block
= grab_block(sb_bgl_lock(sbi
, group_no
),
394 bitmap_bh
->b_data
, group_size
, ret_block
);
397 group_release_blocks(sb
, group_no
, desc
, gdp_bh
, group_alloc
);
401 ext2_debug ("Bit not found in block group %d.\n", group_no
);
404 * Now search the rest of the groups. We assume that
405 * i and desc correctly point to the last group visited.
407 nr_scanned_groups
= 0;
409 for (group_idx
= 0; !group_alloc
&&
410 group_idx
< sbi
->s_groups_count
; group_idx
++) {
412 if (group_no
>= sbi
->s_groups_count
)
414 desc
= ext2_get_group_desc(sb
, group_no
, &gdp_bh
);
417 group_alloc
= group_reserve_blocks(sbi
, group_no
, desc
,
425 bitmap_bh
= read_block_bitmap(sb
, group_no
);
429 ret_block
= grab_block(sb_bgl_lock(sbi
, group_no
), bitmap_bh
->b_data
,
433 * If a free block counter is corrupted we can loop inifintely.
437 if (nr_scanned_groups
> 2 * sbi
->s_groups_count
) {
438 ext2_error(sb
, "ext2_new_block",
439 "corrupted free blocks counters");
443 * Someone else grabbed the last free block in this blockgroup
444 * before us. Retry the scan.
446 group_release_blocks(sb
, group_no
, desc
, gdp_bh
, group_alloc
);
452 ext2_debug("using block group %d(%d)\n",
453 group_no
, desc
->bg_free_blocks_count
);
455 target_block
= ret_block
+ group_no
* group_size
+
456 le32_to_cpu(es
->s_first_data_block
);
458 if (target_block
== le32_to_cpu(desc
->bg_block_bitmap
) ||
459 target_block
== le32_to_cpu(desc
->bg_inode_bitmap
) ||
460 in_range(target_block
, le32_to_cpu(desc
->bg_inode_table
),
461 sbi
->s_itb_per_group
))
462 ext2_error (sb
, "ext2_new_block",
463 "Allocating block in system zone - "
464 "block = %u", target_block
);
466 if (target_block
>= le32_to_cpu(es
->s_blocks_count
)) {
467 ext2_error (sb
, "ext2_new_block",
468 "block(%d) >= blocks count(%d) - "
469 "block_group = %d, es == %p ", ret_block
,
470 le32_to_cpu(es
->s_blocks_count
), group_no
, es
);
473 block
= target_block
;
475 /* OK, we _had_ allocated something */
476 ext2_debug("found bit %d\n", ret_block
);
483 * Do block preallocation now if required.
485 write_lock(&EXT2_I(inode
)->i_meta_lock
);
486 if (group_alloc
&& !*prealloc_count
) {
489 for (n
= 0; n
< group_alloc
&& ++ret_block
< group_size
; n
++) {
490 if (ext2_set_bit_atomic(sb_bgl_lock(sbi
, group_no
),
492 (void*) bitmap_bh
->b_data
))
495 *prealloc_block
= block
+ 1;
501 write_unlock(&EXT2_I(inode
)->i_meta_lock
);
503 mark_buffer_dirty(bitmap_bh
);
504 if (sb
->s_flags
& MS_SYNCHRONOUS
)
505 sync_dirty_buffer(bitmap_bh
);
507 ext2_debug ("allocating block %d. ", block
);
511 group_release_blocks(sb
, group_no
, desc
, gdp_bh
, group_alloc
);
512 release_blocks(sb
, es_alloc
);
514 DQUOT_FREE_BLOCK(inode
, dq_alloc
);
524 unsigned long ext2_count_free_blocks (struct super_block
* sb
)
526 struct ext2_group_desc
* desc
;
527 unsigned long desc_count
= 0;
530 unsigned long bitmap_count
, x
;
531 struct ext2_super_block
*es
;
534 es
= EXT2_SB(sb
)->s_es
;
538 for (i
= 0; i
< EXT2_SB(sb
)->s_groups_count
; i
++) {
539 struct buffer_head
*bitmap_bh
;
540 desc
= ext2_get_group_desc (sb
, i
, NULL
);
543 desc_count
+= le16_to_cpu(desc
->bg_free_blocks_count
);
544 bitmap_bh
= read_block_bitmap(sb
, i
);
548 x
= ext2_count_free(bitmap_bh
, sb
->s_blocksize
);
549 printk ("group %d: stored = %d, counted = %lu\n",
550 i
, le16_to_cpu(desc
->bg_free_blocks_count
), x
);
554 printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n",
555 (long)le32_to_cpu(es
->s_free_blocks_count
),
556 desc_count
, bitmap_count
);
560 for (i
= 0; i
< EXT2_SB(sb
)->s_groups_count
; i
++) {
561 desc
= ext2_get_group_desc (sb
, i
, NULL
);
564 desc_count
+= le16_to_cpu(desc
->bg_free_blocks_count
);
571 block_in_use(unsigned long block
, struct super_block
*sb
, unsigned char *map
)
573 return ext2_test_bit ((block
-
574 le32_to_cpu(EXT2_SB(sb
)->s_es
->s_first_data_block
)) %
575 EXT2_BLOCKS_PER_GROUP(sb
), map
);
578 static inline int test_root(int a
, int b
)
587 static int ext2_group_sparse(int group
)
591 return (test_root(group
, 3) || test_root(group
, 5) ||
592 test_root(group
, 7));
596 * ext2_bg_has_super - number of blocks used by the superblock in group
597 * @sb: superblock for filesystem
598 * @group: group number to check
600 * Return the number of blocks used by the superblock (primary or backup)
601 * in this group. Currently this will be only 0 or 1.
603 int ext2_bg_has_super(struct super_block
*sb
, int group
)
605 if (EXT2_HAS_RO_COMPAT_FEATURE(sb
,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER
)&&
606 !ext2_group_sparse(group
))
612 * ext2_bg_num_gdb - number of blocks used by the group table in group
613 * @sb: superblock for filesystem
614 * @group: group number to check
616 * Return the number of blocks used by the group descriptor table
617 * (primary or backup) in this group. In the future there may be a
618 * different number of descriptor blocks in each group.
620 unsigned long ext2_bg_num_gdb(struct super_block
*sb
, int group
)
622 if (EXT2_HAS_RO_COMPAT_FEATURE(sb
,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER
)&&
623 !ext2_group_sparse(group
))
625 return EXT2_SB(sb
)->s_gdb_count
;