2 * linux/fs/ext2/balloc.c
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
9 * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
10 * Big-endian to little-endian byte-swapping/bitmaps by
11 * David S. Miller (davem@caip.rutgers.edu), 1995
15 #include <linux/quotaops.h>
16 #include <linux/sched.h>
17 #include <linux/buffer_head.h>
18 #include <linux/capability.h>
21 * balloc.c contains the blocks allocation and deallocation routines
25 * The free blocks are managed by bitmaps. A file system contains several
26 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
27 * block for inodes, N blocks for the inode table and data blocks.
29 * The file system contains group descriptors which are located after the
30 * super block. Each descriptor contains the number of the bitmap block and
31 * the free blocks count in the block. The descriptors are loaded in memory
32 * when a file system is mounted (see ext2_read_super).
36 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
38 struct ext2_group_desc
* ext2_get_group_desc(struct super_block
* sb
,
39 unsigned int block_group
,
40 struct buffer_head
** bh
)
42 unsigned long group_desc
;
44 struct ext2_group_desc
* desc
;
45 struct ext2_sb_info
*sbi
= EXT2_SB(sb
);
47 if (block_group
>= sbi
->s_groups_count
) {
48 ext2_error (sb
, "ext2_get_group_desc",
49 "block_group >= groups_count - "
50 "block_group = %d, groups_count = %lu",
51 block_group
, sbi
->s_groups_count
);
56 group_desc
= block_group
>> EXT2_DESC_PER_BLOCK_BITS(sb
);
57 offset
= block_group
& (EXT2_DESC_PER_BLOCK(sb
) - 1);
58 if (!sbi
->s_group_desc
[group_desc
]) {
59 ext2_error (sb
, "ext2_get_group_desc",
60 "Group descriptor not loaded - "
61 "block_group = %d, group_desc = %lu, desc = %lu",
62 block_group
, group_desc
, offset
);
66 desc
= (struct ext2_group_desc
*) sbi
->s_group_desc
[group_desc
]->b_data
;
68 *bh
= sbi
->s_group_desc
[group_desc
];
73 * Read the bitmap for a given block_group, reading into the specified
74 * slot in the superblock's bitmap cache.
76 * Return buffer_head on success or NULL in case of failure.
78 static struct buffer_head
*
79 read_block_bitmap(struct super_block
*sb
, unsigned int block_group
)
81 struct ext2_group_desc
* desc
;
82 struct buffer_head
* bh
= NULL
;
84 desc
= ext2_get_group_desc (sb
, block_group
, NULL
);
87 bh
= sb_bread(sb
, le32_to_cpu(desc
->bg_block_bitmap
));
89 ext2_error (sb
, "read_block_bitmap",
90 "Cannot read block bitmap - "
91 "block_group = %d, block_bitmap = %u",
92 block_group
, le32_to_cpu(desc
->bg_block_bitmap
));
98 * Set sb->s_dirt here because the superblock was "logically" altered. We
99 * need to recalculate its free blocks count and flush it out.
101 static int reserve_blocks(struct super_block
*sb
, int count
)
103 struct ext2_sb_info
*sbi
= EXT2_SB(sb
);
104 struct ext2_super_block
*es
= sbi
->s_es
;
105 unsigned free_blocks
;
106 unsigned root_blocks
;
108 free_blocks
= percpu_counter_read_positive(&sbi
->s_freeblocks_counter
);
109 root_blocks
= le32_to_cpu(es
->s_r_blocks_count
);
111 if (free_blocks
< count
)
114 if (free_blocks
< root_blocks
+ count
&& !capable(CAP_SYS_RESOURCE
) &&
115 sbi
->s_resuid
!= current
->fsuid
&&
116 (sbi
->s_resgid
== 0 || !in_group_p (sbi
->s_resgid
))) {
118 * We are too close to reserve and we are not privileged.
119 * Can we allocate anything at all?
121 if (free_blocks
> root_blocks
)
122 count
= free_blocks
- root_blocks
;
127 percpu_counter_mod(&sbi
->s_freeblocks_counter
, -count
);
132 static void release_blocks(struct super_block
*sb
, int count
)
135 struct ext2_sb_info
*sbi
= EXT2_SB(sb
);
137 percpu_counter_mod(&sbi
->s_freeblocks_counter
, count
);
142 static int group_reserve_blocks(struct ext2_sb_info
*sbi
, int group_no
,
143 struct ext2_group_desc
*desc
, struct buffer_head
*bh
, int count
)
145 unsigned free_blocks
;
147 if (!desc
->bg_free_blocks_count
)
150 spin_lock(sb_bgl_lock(sbi
, group_no
));
151 free_blocks
= le16_to_cpu(desc
->bg_free_blocks_count
);
152 if (free_blocks
< count
)
154 desc
->bg_free_blocks_count
= cpu_to_le16(free_blocks
- count
);
155 spin_unlock(sb_bgl_lock(sbi
, group_no
));
156 mark_buffer_dirty(bh
);
160 static void group_release_blocks(struct super_block
*sb
, int group_no
,
161 struct ext2_group_desc
*desc
, struct buffer_head
*bh
, int count
)
164 struct ext2_sb_info
*sbi
= EXT2_SB(sb
);
165 unsigned free_blocks
;
167 spin_lock(sb_bgl_lock(sbi
, group_no
));
168 free_blocks
= le16_to_cpu(desc
->bg_free_blocks_count
);
169 desc
->bg_free_blocks_count
= cpu_to_le16(free_blocks
+ count
);
170 spin_unlock(sb_bgl_lock(sbi
, group_no
));
172 mark_buffer_dirty(bh
);
176 /* Free given blocks, update quota and i_blocks field */
177 void ext2_free_blocks (struct inode
* inode
, unsigned long block
,
180 struct buffer_head
*bitmap_bh
= NULL
;
181 struct buffer_head
* bh2
;
182 unsigned long block_group
;
185 unsigned long overflow
;
186 struct super_block
* sb
= inode
->i_sb
;
187 struct ext2_sb_info
* sbi
= EXT2_SB(sb
);
188 struct ext2_group_desc
* desc
;
189 struct ext2_super_block
* es
= sbi
->s_es
;
190 unsigned freed
= 0, group_freed
;
192 if (block
< le32_to_cpu(es
->s_first_data_block
) ||
193 block
+ count
< block
||
194 block
+ count
> le32_to_cpu(es
->s_blocks_count
)) {
195 ext2_error (sb
, "ext2_free_blocks",
196 "Freeing blocks not in datazone - "
197 "block = %lu, count = %lu", block
, count
);
201 ext2_debug ("freeing block(s) %lu-%lu\n", block
, block
+ count
- 1);
205 block_group
= (block
- le32_to_cpu(es
->s_first_data_block
)) /
206 EXT2_BLOCKS_PER_GROUP(sb
);
207 bit
= (block
- le32_to_cpu(es
->s_first_data_block
)) %
208 EXT2_BLOCKS_PER_GROUP(sb
);
210 * Check to see if we are freeing blocks across a group
213 if (bit
+ count
> EXT2_BLOCKS_PER_GROUP(sb
)) {
214 overflow
= bit
+ count
- EXT2_BLOCKS_PER_GROUP(sb
);
218 bitmap_bh
= read_block_bitmap(sb
, block_group
);
222 desc
= ext2_get_group_desc (sb
, block_group
, &bh2
);
226 if (in_range (le32_to_cpu(desc
->bg_block_bitmap
), block
, count
) ||
227 in_range (le32_to_cpu(desc
->bg_inode_bitmap
), block
, count
) ||
228 in_range (block
, le32_to_cpu(desc
->bg_inode_table
),
229 sbi
->s_itb_per_group
) ||
230 in_range (block
+ count
- 1, le32_to_cpu(desc
->bg_inode_table
),
231 sbi
->s_itb_per_group
))
232 ext2_error (sb
, "ext2_free_blocks",
233 "Freeing blocks in system zones - "
234 "Block = %lu, count = %lu",
237 for (i
= 0, group_freed
= 0; i
< count
; i
++) {
238 if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi
, block_group
),
239 bit
+ i
, bitmap_bh
->b_data
)) {
240 ext2_error(sb
, __FUNCTION__
,
241 "bit already cleared for block %lu", block
+ i
);
247 mark_buffer_dirty(bitmap_bh
);
248 if (sb
->s_flags
& MS_SYNCHRONOUS
)
249 sync_dirty_buffer(bitmap_bh
);
251 group_release_blocks(sb
, block_group
, desc
, bh2
, group_freed
);
252 freed
+= group_freed
;
261 release_blocks(sb
, freed
);
262 DQUOT_FREE_BLOCK(inode
, freed
);
265 static int grab_block(spinlock_t
*lock
, char *map
, unsigned size
, int goal
)
270 if (!ext2_test_bit(goal
, map
))
276 * The goal was occupied; search forward for a free
277 * block within the next XX blocks.
279 * end_goal is more or less random, but it has to be
280 * less than EXT2_BLOCKS_PER_GROUP. Aligning up to the
281 * next 64-bit boundary is simple..
283 k
= (goal
+ 63) & ~63;
284 goal
= ext2_find_next_zero_bit(map
, k
, goal
);
288 * Search in the remainder of the current group.
292 p
= map
+ (goal
>> 3);
293 r
= memscan(p
, 0, (size
- goal
+ 7) >> 3);
297 * We have succeeded in finding a free byte in the block
298 * bitmap. Now search backwards to find the start of this
299 * group of free blocks - won't take more than 7 iterations.
301 for (goal
= k
; goal
&& !ext2_test_bit (goal
- 1, map
); goal
--)
306 k
= ext2_find_next_zero_bit ((u32
*)map
, size
, goal
);
313 if (ext2_set_bit_atomic(lock
, goal
, (void *) map
))
319 * ext2_new_block uses a goal block to assist allocation. If the goal is
320 * free, or there is a free block within 32 blocks of the goal, that block
321 * is allocated. Otherwise a forward search is made for a free block; within
322 * each block group the search first looks for an entire free byte in the block
323 * bitmap, and then for any free bit if that fails.
324 * This function also updates quota and i_blocks field.
326 int ext2_new_block(struct inode
*inode
, unsigned long goal
,
327 u32
*prealloc_count
, u32
*prealloc_block
, int *err
)
329 struct buffer_head
*bitmap_bh
= NULL
;
330 struct buffer_head
*gdp_bh
; /* bh2 */
331 struct ext2_group_desc
*desc
;
332 int group_no
; /* i */
333 int ret_block
; /* j */
334 int group_idx
; /* k */
335 int target_block
; /* tmp */
337 struct super_block
*sb
= inode
->i_sb
;
338 struct ext2_sb_info
*sbi
= EXT2_SB(sb
);
339 struct ext2_super_block
*es
= sbi
->s_es
;
340 unsigned group_size
= EXT2_BLOCKS_PER_GROUP(sb
);
341 unsigned prealloc_goal
= es
->s_prealloc_blocks
;
342 unsigned group_alloc
= 0, es_alloc
, dq_alloc
;
343 int nr_scanned_groups
;
345 if (!prealloc_goal
--)
346 prealloc_goal
= EXT2_DEFAULT_PREALLOC_BLOCKS
- 1;
347 if (!prealloc_count
|| *prealloc_count
)
350 if (DQUOT_ALLOC_BLOCK(inode
, 1)) {
355 while (prealloc_goal
&& DQUOT_PREALLOC_BLOCK(inode
, prealloc_goal
))
358 dq_alloc
= prealloc_goal
+ 1;
359 es_alloc
= reserve_blocks(sb
, dq_alloc
);
365 ext2_debug ("goal=%lu.\n", goal
);
367 if (goal
< le32_to_cpu(es
->s_first_data_block
) ||
368 goal
>= le32_to_cpu(es
->s_blocks_count
))
369 goal
= le32_to_cpu(es
->s_first_data_block
);
370 group_no
= (goal
- le32_to_cpu(es
->s_first_data_block
)) / group_size
;
371 desc
= ext2_get_group_desc (sb
, group_no
, &gdp_bh
);
374 * gdp_bh may still be uninitialised. But group_release_blocks
375 * will not touch it because group_alloc is zero.
380 group_alloc
= group_reserve_blocks(sbi
, group_no
, desc
,
383 ret_block
= ((goal
- le32_to_cpu(es
->s_first_data_block
)) %
386 bitmap_bh
= read_block_bitmap(sb
, group_no
);
390 ext2_debug("goal is at %d:%d.\n", group_no
, ret_block
);
392 ret_block
= grab_block(sb_bgl_lock(sbi
, group_no
),
393 bitmap_bh
->b_data
, group_size
, ret_block
);
396 group_release_blocks(sb
, group_no
, desc
, gdp_bh
, group_alloc
);
400 ext2_debug ("Bit not found in block group %d.\n", group_no
);
403 * Now search the rest of the groups. We assume that
404 * i and desc correctly point to the last group visited.
406 nr_scanned_groups
= 0;
408 for (group_idx
= 0; !group_alloc
&&
409 group_idx
< sbi
->s_groups_count
; group_idx
++) {
411 if (group_no
>= sbi
->s_groups_count
)
413 desc
= ext2_get_group_desc(sb
, group_no
, &gdp_bh
);
416 group_alloc
= group_reserve_blocks(sbi
, group_no
, desc
,
424 bitmap_bh
= read_block_bitmap(sb
, group_no
);
428 ret_block
= grab_block(sb_bgl_lock(sbi
, group_no
), bitmap_bh
->b_data
,
432 * If a free block counter is corrupted we can loop inifintely.
436 if (nr_scanned_groups
> 2 * sbi
->s_groups_count
) {
437 ext2_error(sb
, "ext2_new_block",
438 "corrupted free blocks counters");
442 * Someone else grabbed the last free block in this blockgroup
443 * before us. Retry the scan.
445 group_release_blocks(sb
, group_no
, desc
, gdp_bh
, group_alloc
);
451 ext2_debug("using block group %d(%d)\n",
452 group_no
, desc
->bg_free_blocks_count
);
454 target_block
= ret_block
+ group_no
* group_size
+
455 le32_to_cpu(es
->s_first_data_block
);
457 if (target_block
== le32_to_cpu(desc
->bg_block_bitmap
) ||
458 target_block
== le32_to_cpu(desc
->bg_inode_bitmap
) ||
459 in_range(target_block
, le32_to_cpu(desc
->bg_inode_table
),
460 sbi
->s_itb_per_group
))
461 ext2_error (sb
, "ext2_new_block",
462 "Allocating block in system zone - "
463 "block = %u", target_block
);
465 if (target_block
>= le32_to_cpu(es
->s_blocks_count
)) {
466 ext2_error (sb
, "ext2_new_block",
467 "block(%d) >= blocks count(%d) - "
468 "block_group = %d, es == %p ", ret_block
,
469 le32_to_cpu(es
->s_blocks_count
), group_no
, es
);
472 block
= target_block
;
474 /* OK, we _had_ allocated something */
475 ext2_debug("found bit %d\n", ret_block
);
482 * Do block preallocation now if required.
484 write_lock(&EXT2_I(inode
)->i_meta_lock
);
485 if (group_alloc
&& !*prealloc_count
) {
488 for (n
= 0; n
< group_alloc
&& ++ret_block
< group_size
; n
++) {
489 if (ext2_set_bit_atomic(sb_bgl_lock(sbi
, group_no
),
491 (void*) bitmap_bh
->b_data
))
494 *prealloc_block
= block
+ 1;
500 write_unlock(&EXT2_I(inode
)->i_meta_lock
);
502 mark_buffer_dirty(bitmap_bh
);
503 if (sb
->s_flags
& MS_SYNCHRONOUS
)
504 sync_dirty_buffer(bitmap_bh
);
506 ext2_debug ("allocating block %d. ", block
);
510 group_release_blocks(sb
, group_no
, desc
, gdp_bh
, group_alloc
);
511 release_blocks(sb
, es_alloc
);
513 DQUOT_FREE_BLOCK(inode
, dq_alloc
);
525 static int nibblemap
[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
527 unsigned long ext2_count_free (struct buffer_head
* map
, unsigned int numchars
)
530 unsigned long sum
= 0;
534 for (i
= 0; i
< numchars
; i
++)
535 sum
+= nibblemap
[map
->b_data
[i
] & 0xf] +
536 nibblemap
[(map
->b_data
[i
] >> 4) & 0xf];
540 #endif /* EXT2FS_DEBUG */
542 unsigned long ext2_count_free_blocks (struct super_block
* sb
)
544 struct ext2_group_desc
* desc
;
545 unsigned long desc_count
= 0;
548 unsigned long bitmap_count
, x
;
549 struct ext2_super_block
*es
;
551 es
= EXT2_SB(sb
)->s_es
;
555 for (i
= 0; i
< EXT2_SB(sb
)->s_groups_count
; i
++) {
556 struct buffer_head
*bitmap_bh
;
557 desc
= ext2_get_group_desc (sb
, i
, NULL
);
560 desc_count
+= le16_to_cpu(desc
->bg_free_blocks_count
);
561 bitmap_bh
= read_block_bitmap(sb
, i
);
565 x
= ext2_count_free(bitmap_bh
, sb
->s_blocksize
);
566 printk ("group %d: stored = %d, counted = %lu\n",
567 i
, le16_to_cpu(desc
->bg_free_blocks_count
), x
);
571 printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n",
572 (long)le32_to_cpu(es
->s_free_blocks_count
),
573 desc_count
, bitmap_count
);
576 for (i
= 0; i
< EXT2_SB(sb
)->s_groups_count
; i
++) {
577 desc
= ext2_get_group_desc (sb
, i
, NULL
);
580 desc_count
+= le16_to_cpu(desc
->bg_free_blocks_count
);
587 block_in_use(unsigned long block
, struct super_block
*sb
, unsigned char *map
)
589 return ext2_test_bit ((block
-
590 le32_to_cpu(EXT2_SB(sb
)->s_es
->s_first_data_block
)) %
591 EXT2_BLOCKS_PER_GROUP(sb
), map
);
594 static inline int test_root(int a
, int b
)
603 static int ext2_group_sparse(int group
)
607 return (test_root(group
, 3) || test_root(group
, 5) ||
608 test_root(group
, 7));
612 * ext2_bg_has_super - number of blocks used by the superblock in group
613 * @sb: superblock for filesystem
614 * @group: group number to check
616 * Return the number of blocks used by the superblock (primary or backup)
617 * in this group. Currently this will be only 0 or 1.
619 int ext2_bg_has_super(struct super_block
*sb
, int group
)
621 if (EXT2_HAS_RO_COMPAT_FEATURE(sb
,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER
)&&
622 !ext2_group_sparse(group
))
628 * ext2_bg_num_gdb - number of blocks used by the group table in group
629 * @sb: superblock for filesystem
630 * @group: group number to check
632 * Return the number of blocks used by the group descriptor table
633 * (primary or backup) in this group. In the future there may be a
634 * different number of descriptor blocks in each group.
636 unsigned long ext2_bg_num_gdb(struct super_block
*sb
, int group
)
638 if (EXT2_HAS_RO_COMPAT_FEATURE(sb
,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER
)&&
639 !ext2_group_sparse(group
))
641 return EXT2_SB(sb
)->s_gdb_count
;