1 /* $NetBSD: ext2fs_alloc.c,v 1.42 2011/03/06 04:46:26 rmind Exp $ */
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * @(#)ffs_alloc.c 8.11 (Berkeley) 10/27/94
32 * Modified for ext2fs by Manuel Bouyer.
36 * Copyright (c) 1997 Manuel Bouyer.
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
47 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
48 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
49 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
50 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
51 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
52 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
53 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
54 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
55 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
56 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
58 * @(#)ffs_alloc.c 8.11 (Berkeley) 10/27/94
59 * Modified for ext2fs by Manuel Bouyer.
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: ext2fs_alloc.c,v 1.42 2011/03/06 04:46:26 rmind Exp $");
65 #include <sys/param.h>
66 #include <sys/systm.h>
69 #include <sys/vnode.h>
70 #include <sys/mount.h>
71 #include <sys/kernel.h>
72 #include <sys/syslog.h>
73 #include <sys/kauth.h>
75 #include <ufs/ufs/inode.h>
76 #include <ufs/ufs/ufs_extern.h>
77 #include <ufs/ufs/ufsmount.h>
79 #include <ufs/ext2fs/ext2fs.h>
80 #include <ufs/ext2fs/ext2fs_extern.h>
84 static daddr_t
ext2fs_alloccg(struct inode
*, int, daddr_t
, int);
85 static u_long
ext2fs_dirpref(struct m_ext2fs
*);
86 static void ext2fs_fserr(struct m_ext2fs
*, u_int
, const char *);
87 static u_long
ext2fs_hashalloc(struct inode
*, int, long, int,
88 daddr_t (*)(struct inode
*, int, daddr_t
, int));
89 static daddr_t
ext2fs_nodealloccg(struct inode
*, int, daddr_t
, int);
90 static daddr_t
ext2fs_mapsearch(struct m_ext2fs
*, char *, daddr_t
);
93 * Allocate a block in the file system.
95 * A preference may be optionally specified. If a preference is given
96 * the following hierarchy is used to allocate a block:
97 * 1) allocate the requested block.
98 * 2) allocate a rotationally optimal block in the same cylinder.
99 * 3) allocate a block in the same cylinder group.
100 * 4) quadradically rehash into other cylinder groups, until an
101 * available block is located.
102 * If no block preference is given the following hierarchy is used
103 * to allocate a block:
104 * 1) allocate a block in the cylinder group that contains the
105 * inode for the file.
106 * 2) quadradically rehash into other cylinder groups, until an
107 * available block is located.
110 ext2fs_alloc(struct inode
*ip
, daddr_t lbn
, daddr_t bpref
,
111 kauth_cred_t cred
, daddr_t
*bnp
)
121 panic("ext2fs_alloc: missing credential");
122 #endif /* DIAGNOSTIC */
123 if (fs
->e2fs
.e2fs_fbcount
== 0)
125 if (kauth_authorize_system(cred
, KAUTH_SYSTEM_FS_RESERVEDSPACE
, 0, NULL
,
129 if (bpref
>= fs
->e2fs
.e2fs_bcount
)
132 cg
= ino_to_cg(fs
, ip
->i_number
);
134 cg
= dtog(fs
, bpref
);
135 bno
= (daddr_t
)ext2fs_hashalloc(ip
, cg
, bpref
, fs
->e2fs_bsize
,
138 ip
->i_e2fs_nblock
+= btodb(fs
->e2fs_bsize
);
139 ip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
144 ext2fs_fserr(fs
, kauth_cred_geteuid(cred
), "file system full");
145 uprintf("\n%s: write failed, file system is full\n", fs
->e2fs_fsmnt
);
150 * Allocate an inode in the file system.
152 * If allocating a directory, use ext2fs_dirpref to select the inode.
153 * If allocating in a directory, the following hierarchy is followed:
154 * 1) allocate the preferred inode.
155 * 2) allocate an inode in the same cylinder group.
156 * 3) quadradically rehash into other cylinder groups, until an
157 * available inode is located.
158 * If no inode preference is given the following hierarchy is used
159 * to allocate an inode:
160 * 1) allocate an inode in cylinder group 0.
161 * 2) quadradically rehash into other cylinder groups, until an
162 * available inode is located.
165 ext2fs_valloc(struct vnode
*pvp
, int mode
, kauth_cred_t cred
,
177 if (fs
->e2fs
.e2fs_ficount
== 0)
180 if ((mode
& IFMT
) == IFDIR
)
181 cg
= ext2fs_dirpref(fs
);
183 cg
= ino_to_cg(fs
, pip
->i_number
);
184 ipref
= cg
* fs
->e2fs
.e2fs_ipg
+ 1;
185 ino
= (ino_t
)ext2fs_hashalloc(pip
, cg
, (long)ipref
, mode
, ext2fs_nodealloccg
);
188 error
= VFS_VGET(pvp
->v_mount
, ino
, vpp
);
190 ext2fs_vfree(pvp
, ino
, mode
);
194 if (ip
->i_e2fs_mode
&& ip
->i_e2fs_nlink
!= 0) {
195 printf("mode = 0%o, nlinks %d, inum = %llu, fs = %s\n",
196 ip
->i_e2fs_mode
, ip
->i_e2fs_nlink
,
197 (unsigned long long)ip
->i_number
, fs
->e2fs_fsmnt
);
198 panic("ext2fs_valloc: dup alloc");
201 memset(ip
->i_din
.e2fs_din
, 0, sizeof(struct ext2fs_dinode
));
204 * Set up a new generation number for this inode.
206 if (++ext2gennumber
< time_second
)
207 ext2gennumber
= time_second
;
208 ip
->i_e2fs_gen
= ext2gennumber
;
211 ext2fs_fserr(fs
, kauth_cred_geteuid(cred
), "out of inodes");
212 uprintf("\n%s: create/symlink failed, no inodes free\n", fs
->e2fs_fsmnt
);
217 * Find a cylinder to place a directory.
219 * The policy implemented by this algorithm is to select from
220 * among those cylinder groups with above the average number of
221 * free inodes, the one with the smallest number of directories.
224 ext2fs_dirpref(struct m_ext2fs
*fs
)
226 int cg
, maxspace
, mincg
, avgifree
;
228 avgifree
= fs
->e2fs
.e2fs_ficount
/ fs
->e2fs_ncg
;
231 for (cg
= 0; cg
< fs
->e2fs_ncg
; cg
++)
232 if ( fs
->e2fs_gd
[cg
].ext2bgd_nifree
>= avgifree
) {
233 if (mincg
== -1 || fs
->e2fs_gd
[cg
].ext2bgd_nbfree
> maxspace
) {
235 maxspace
= fs
->e2fs_gd
[cg
].ext2bgd_nbfree
;
242 * Select the desired position for the next block in a file. The file is
243 * logically divided into sections. The first section is composed of the
244 * direct blocks. Each additional section contains fs_maxbpg blocks.
246 * If no blocks have been allocated in the first section, the policy is to
247 * request a block in the same cylinder group as the inode that describes
248 * the file. Otherwise, the policy is to try to allocate the blocks
249 * contigously. The two fields of the ext2 inode extension (see
250 * ufs/ufs/inode.h) help this.
253 ext2fs_blkpref(struct inode
*ip
, daddr_t lbn
, int indx
,
254 int32_t *bap
/* XXX ondisk32 */)
261 * if we are doing contigous lbn allocation, try to alloc blocks
262 * contigously on disk
265 if ( ip
->i_e2fs_last_blk
&& lbn
== ip
->i_e2fs_last_lblk
+ 1) {
266 return ip
->i_e2fs_last_blk
+ 1;
270 * bap, if provided, gives us a list of blocks to which we want to
275 for (i
= indx
; i
>= 0 ; i
--) {
277 return fs2h32(bap
[i
]) + 1;
282 /* fall back to the first block of the cylinder containing the inode */
284 cg
= ino_to_cg(fs
, ip
->i_number
);
285 return fs
->e2fs
.e2fs_bpg
* cg
+ fs
->e2fs
.e2fs_first_dblock
+ 1;
289 * Implement the cylinder overflow algorithm.
291 * The policy implemented by this algorithm is:
292 * 1) allocate the block in its requested cylinder group.
293 * 2) quadradically rehash on the cylinder group number.
294 * 3) brute force search for a free block.
297 ext2fs_hashalloc(struct inode
*ip
, int cg
, long pref
, int size
,
298 daddr_t (*allocator
)(struct inode
*, int, daddr_t
, int))
306 * 1: preferred cylinder group
308 result
= (*allocator
)(ip
, cg
, pref
, size
);
312 * 2: quadratic rehash
314 for (i
= 1; i
< fs
->e2fs_ncg
; i
*= 2) {
316 if (cg
>= fs
->e2fs_ncg
)
318 result
= (*allocator
)(ip
, cg
, 0, size
);
323 * 3: brute force search
324 * Note that we start at i == 2, since 0 was checked initially,
325 * and 1 is always checked in the quadratic rehash.
327 cg
= (icg
+ 2) % fs
->e2fs_ncg
;
328 for (i
= 2; i
< fs
->e2fs_ncg
; i
++) {
329 result
= (*allocator
)(ip
, cg
, 0, size
);
333 if (cg
== fs
->e2fs_ncg
)
340 * Determine whether a block can be allocated.
342 * Check to see if a block of the appropriate size is available,
343 * and if it is, allocate it.
347 ext2fs_alloccg(struct inode
*ip
, int cg
, daddr_t bpref
, int size
)
353 int error
, bno
, start
, end
, loc
;
356 if (fs
->e2fs_gd
[cg
].ext2bgd_nbfree
== 0)
358 error
= bread(ip
->i_devvp
, fsbtodb(fs
,
359 fs
->e2fs_gd
[cg
].ext2bgd_b_bitmap
),
360 (int)fs
->e2fs_bsize
, NOCRED
, B_MODIFY
, &bp
);
365 bbp
= (char *)bp
->b_data
;
367 if (dtog(fs
, bpref
) != cg
)
370 bpref
= dtogd(fs
, bpref
);
372 * if the requested block is available, use it
374 if (isclr(bbp
, bpref
)) {
380 * no blocks in the requested cylinder, so take next
381 * available one in this cylinder group.
382 * first try to get 8 contigous blocks, then fall back to a single
386 start
= dtogd(fs
, bpref
) / NBBY
;
389 end
= howmany(fs
->e2fs
.e2fs_fpg
, NBBY
) - start
;
390 for (loc
= start
; loc
< end
; loc
++) {
396 for (loc
= 0; loc
< start
; loc
++) {
403 bno
= ext2fs_mapsearch(fs
, bbp
, bpref
);
408 if (isset(bbp
, (daddr_t
)bno
)) {
409 printf("ext2fs_alloccgblk: cg=%d bno=%d fs=%s\n",
410 cg
, bno
, fs
->e2fs_fsmnt
);
411 panic("ext2fs_alloccg: dup alloc");
414 setbit(bbp
, (daddr_t
)bno
);
415 fs
->e2fs
.e2fs_fbcount
--;
416 fs
->e2fs_gd
[cg
].ext2bgd_nbfree
--;
419 return (cg
* fs
->e2fs
.e2fs_fpg
+ fs
->e2fs
.e2fs_first_dblock
+ bno
);
423 * Determine whether an inode can be allocated.
425 * Check to see if an inode is available, and if it is,
426 * allocate it using the following policy:
427 * 1) allocate the requested inode.
428 * 2) allocate the next available inode after the requested
429 * inode in the specified cylinder group.
432 ext2fs_nodealloccg(struct inode
*ip
, int cg
, daddr_t ipref
, int mode
)
437 int error
, start
, len
, loc
, map
, i
;
439 ipref
--; /* to avoid a lot of (ipref -1) */
443 if (fs
->e2fs_gd
[cg
].ext2bgd_nifree
== 0)
445 error
= bread(ip
->i_devvp
, fsbtodb(fs
,
446 fs
->e2fs_gd
[cg
].ext2bgd_i_bitmap
),
447 (int)fs
->e2fs_bsize
, NOCRED
, B_MODIFY
, &bp
);
452 ibp
= (char *)bp
->b_data
;
454 ipref
%= fs
->e2fs
.e2fs_ipg
;
455 if (isclr(ibp
, ipref
))
458 start
= ipref
/ NBBY
;
459 len
= howmany(fs
->e2fs
.e2fs_ipg
- ipref
, NBBY
);
460 loc
= skpc(0xff, len
, &ibp
[start
]);
464 loc
= skpc(0xff, len
, &ibp
[0]);
466 printf("cg = %d, ipref = %lld, fs = %s\n",
467 cg
, (long long)ipref
, fs
->e2fs_fsmnt
);
468 panic("ext2fs_nodealloccg: map corrupted");
472 i
= start
+ len
- loc
;
475 printf("fs = %s\n", fs
->e2fs_fsmnt
);
476 panic("ext2fs_nodealloccg: block not in map");
478 ipref
= i
* NBBY
+ ffs(map
) - 1;
481 fs
->e2fs
.e2fs_ficount
--;
482 fs
->e2fs_gd
[cg
].ext2bgd_nifree
--;
484 if ((mode
& IFMT
) == IFDIR
) {
485 fs
->e2fs_gd
[cg
].ext2bgd_ndirs
++;
488 return (cg
* fs
->e2fs
.e2fs_ipg
+ ipref
+1);
494 * The specified block is placed back in the
498 ext2fs_blkfree(struct inode
*ip
, daddr_t bno
)
507 if ((u_int
)bno
>= fs
->e2fs
.e2fs_bcount
) {
508 printf("bad block %lld, ino %llu\n", (long long)bno
,
509 (unsigned long long)ip
->i_number
);
510 ext2fs_fserr(fs
, ip
->i_uid
, "bad block");
513 error
= bread(ip
->i_devvp
,
514 fsbtodb(fs
, fs
->e2fs_gd
[cg
].ext2bgd_b_bitmap
),
515 (int)fs
->e2fs_bsize
, NOCRED
, B_MODIFY
, &bp
);
520 bbp
= (char *)bp
->b_data
;
521 bno
= dtogd(fs
, bno
);
522 if (isclr(bbp
, bno
)) {
523 printf("dev = 0x%llx, block = %lld, fs = %s\n",
524 (unsigned long long)ip
->i_dev
, (long long)bno
,
526 panic("blkfree: freeing free block");
529 fs
->e2fs
.e2fs_fbcount
++;
530 fs
->e2fs_gd
[cg
].ext2bgd_nbfree
++;
539 * The specified inode is placed back in the free map.
542 ext2fs_vfree(struct vnode
*pvp
, ino_t ino
, int mode
)
552 if ((u_int
)ino
> fs
->e2fs
.e2fs_icount
|| (u_int
)ino
< EXT2_FIRSTINO
)
553 panic("ifree: range: dev = 0x%llx, ino = %llu, fs = %s",
554 (unsigned long long)pip
->i_dev
, (unsigned long long)ino
,
556 cg
= ino_to_cg(fs
, ino
);
557 error
= bread(pip
->i_devvp
,
558 fsbtodb(fs
, fs
->e2fs_gd
[cg
].ext2bgd_i_bitmap
),
559 (int)fs
->e2fs_bsize
, NOCRED
, B_MODIFY
, &bp
);
564 ibp
= (char *)bp
->b_data
;
565 ino
= (ino
- 1) % fs
->e2fs
.e2fs_ipg
;
566 if (isclr(ibp
, ino
)) {
567 printf("dev = 0x%llx, ino = %llu, fs = %s\n",
568 (unsigned long long)pip
->i_dev
,
569 (unsigned long long)ino
, fs
->e2fs_fsmnt
);
570 if (fs
->e2fs_ronly
== 0)
571 panic("ifree: freeing free inode");
574 fs
->e2fs
.e2fs_ficount
++;
575 fs
->e2fs_gd
[cg
].ext2bgd_nifree
++;
576 if ((mode
& IFMT
) == IFDIR
) {
577 fs
->e2fs_gd
[cg
].ext2bgd_ndirs
--;
585 * Find a block in the specified cylinder group.
587 * It is a panic if a request is made to find a block if none are
592 ext2fs_mapsearch(struct m_ext2fs
*fs
, char *bbp
, daddr_t bpref
)
594 int start
, len
, loc
, i
, map
;
597 * find the fragment by searching through the free block
598 * map for an appropriate bit pattern
601 start
= dtogd(fs
, bpref
) / NBBY
;
604 len
= howmany(fs
->e2fs
.e2fs_fpg
, NBBY
) - start
;
605 loc
= skpc(0xff, len
, &bbp
[start
]);
609 loc
= skpc(0xff, len
, &bbp
[start
]);
611 printf("start = %d, len = %d, fs = %s\n",
612 start
, len
, fs
->e2fs_fsmnt
);
613 panic("ext2fs_alloccg: map corrupted");
617 i
= start
+ len
- loc
;
620 printf("fs = %s\n", fs
->e2fs_fsmnt
);
621 panic("ext2fs_mapsearch: block not in map");
623 return i
* NBBY
+ ffs(map
) - 1;
627 * Fserr prints the name of a file system with an error diagnostic.
629 * The form of the error message is:
633 ext2fs_fserr(struct m_ext2fs
*fs
, u_int uid
, const char *cp
)
636 log(LOG_ERR
, "uid %d on %s: %s\n", uid
, fs
->e2fs_fsmnt
, cp
);