1 /* $NetBSD: efs_subr.c,v 1.6 2007/10/08 18:04:03 ad Exp $ */
4 * Copyright (c) 2006 Stephen M. Rumble <rumble@ephemeral.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 #include <sys/cdefs.h>
20 __KERNEL_RCSID(0, "$NetBSD: efs_subr.c,v 1.6 2007/10/08 18:04:03 ad Exp $");
22 #include <sys/param.h>
23 #include <sys/kauth.h>
27 #include <sys/mount.h>
28 #include <sys/vnode.h>
29 #include <sys/namei.h>
31 #include <sys/malloc.h>
33 #include <miscfs/genfs/genfs_node.h>
35 #include <fs/efs/efs.h>
36 #include <fs/efs/efs_sb.h>
37 #include <fs/efs/efs_dir.h>
38 #include <fs/efs/efs_genfs.h>
39 #include <fs/efs/efs_mount.h>
40 #include <fs/efs/efs_extent.h>
41 #include <fs/efs/efs_dinode.h>
42 #include <fs/efs/efs_inode.h>
43 #include <fs/efs/efs_subr.h>
45 struct pool efs_inode_pool
;
48 * Calculate a checksum for the provided superblock in __host byte order__.
50 * At some point SGI changed the checksum algorithm slightly, which can be
51 * enabled with the 'new' flag.
53 * Presumably this change occured on or before 24 Oct 1988 (around IRIX 3.1),
54 * so we're pretty unlikely to ever actually see an old checksum. Further, it
55 * means that EFS_NEWMAGIC filesystems (IRIX >= 3.3) must match the new
56 * checksum whereas EFS_MAGIC filesystems could potentially use either
59 * See comp.sys.sgi <1991Aug9.050838.16876@odin.corp.sgi.com>
62 efs_sb_checksum(struct efs_sb
*esb
, int new)
66 uint16_t *sbarray
= (uint16_t *)esb
;
68 KASSERT((EFS_SB_CHECKSUM_SIZE
% 2) == 0);
70 for (i
= cksum
= 0; i
< (EFS_SB_CHECKSUM_SIZE
/ 2); i
++) {
71 cksum
^= be16toh(sbarray
[i
]);
72 cksum
= (cksum
<< 1) | (new && cksum
< 0);
79 * Determine if the superblock is valid.
81 * Returns 0 if valid, else invalid. If invalid, 'why' is set to an
85 efs_sb_validate(struct efs_sb
*esb
, const char **why
)
87 uint32_t ocksum
, ncksum
;
91 if (be32toh(esb
->sb_magic
) != EFS_SB_MAGIC
&&
92 be32toh(esb
->sb_magic
) != EFS_SB_NEWMAGIC
) {
93 *why
= "sb_magic invalid";
97 ocksum
= htobe32(efs_sb_checksum(esb
, 0));
98 ncksum
= htobe32(efs_sb_checksum(esb
, 1));
99 if (esb
->sb_checksum
!= ocksum
&& esb
->sb_checksum
!= ncksum
) {
100 *why
= "sb_checksum invalid";
104 if (be32toh(esb
->sb_size
) > EFS_SIZE_MAX
) {
105 *why
= "sb_size > EFS_SIZE_MAX";
109 if (be32toh(esb
->sb_firstcg
) <= EFS_BB_BITMAP
) {
110 *why
= "sb_firstcg <= EFS_BB_BITMAP";
114 /* XXX - add better sb consistency checks here */
115 if (esb
->sb_cgfsize
== 0 ||
116 esb
->sb_cgisize
== 0 ||
118 esb
->sb_bmsize
== 0) {
119 *why
= "something bad happened";
127 * Determine the basic block offset and inode index within that block, given
128 * the inode 'ino' and filesystem parameters _in host byte order_. The inode
129 * will live at byte address 'bboff' * EFS_BB_SIZE + 'index' * EFS_DINODE_SIZE.
132 efs_locate_inode(ino_t ino
, struct efs_sb
*sbp
, uint32_t *bboff
, int *index
)
134 uint32_t cgfsize
, firstcg
;
137 cgisize
= be16toh(sbp
->sb_cgisize
);
138 cgfsize
= be32toh(sbp
->sb_cgfsize
);
139 firstcg
= be32toh(sbp
->sb_firstcg
),
141 *bboff
= firstcg
+ ((ino
/ (cgisize
* EFS_DINODES_PER_BB
)) * cgfsize
) +
142 ((ino
% (cgisize
* EFS_DINODES_PER_BB
)) / EFS_DINODES_PER_BB
);
143 *index
= ino
& (EFS_DINODES_PER_BB
- 1);
147 * Read in an inode from disk.
149 * We actually take in four inodes at a time. Hopefully these will stick
150 * around in the buffer cache and get used without going to disk.
152 * Returns 0 on success.
155 efs_read_inode(struct efs_mount
*emp
, ino_t ino
, struct lwp
*l
,
156 struct efs_dinode
*di
)
164 efs_locate_inode(ino
, sbp
, &bboff
, &index
);
166 err
= efs_bread(emp
, bboff
, l
, &bp
);
171 memcpy(di
, ((struct efs_dinode
*)bp
->b_data
) + index
, sizeof(*di
));
178 * Perform a read from our device handling the potential DEV_BSIZE
179 * messiness (although as of 19.2.2006, all ports appear to use 512) as
180 * we as EFS block sizing.
182 * bboff: basic block offset
184 * Returns 0 on success.
187 efs_bread(struct efs_mount
*emp
, uint32_t bboff
, struct lwp
*l
, struct buf
**bp
)
189 KASSERT(bboff
< EFS_SIZE_MAX
);
191 return (bread(emp
->em_devvp
, (daddr_t
)bboff
* (EFS_BB_SIZE
/ DEV_BSIZE
),
192 EFS_BB_SIZE
, (l
== NULL
) ? NOCRED
: l
->l_cred
, 0, bp
));
196 * Synchronise the in-core, host ordered and typed inode fields with their
197 * corresponding on-disk, EFS ordered and typed copies.
199 * This is the inverse of efs_dinode_sync_inode(), and should be called when
200 * an inode is loaded from disk.
203 efs_sync_dinode_to_inode(struct efs_inode
*ei
)
206 ei
->ei_mode
= be16toh(ei
->ei_di
.di_mode
); /*same as nbsd*/
207 ei
->ei_nlink
= be16toh(ei
->ei_di
.di_nlink
);
208 ei
->ei_uid
= be16toh(ei
->ei_di
.di_uid
);
209 ei
->ei_gid
= be16toh(ei
->ei_di
.di_gid
);
210 ei
->ei_size
= be32toh(ei
->ei_di
.di_size
);
211 ei
->ei_atime
= be32toh(ei
->ei_di
.di_atime
);
212 ei
->ei_mtime
= be32toh(ei
->ei_di
.di_mtime
);
213 ei
->ei_ctime
= be32toh(ei
->ei_di
.di_ctime
);
214 ei
->ei_gen
= be32toh(ei
->ei_di
.di_gen
);
215 ei
->ei_numextents
= be16toh(ei
->ei_di
.di_numextents
);
216 ei
->ei_version
= ei
->ei_di
.di_version
;
220 * Synchronise the on-disk, EFS ordered and typed inode fields with their
221 * corresponding in-core, host ordered and typed copies.
223 * This is the inverse of efs_inode_sync_dinode(), and should be called before
224 * an inode is flushed to disk.
227 efs_sync_inode_to_dinode(struct efs_inode
*ei
)
230 panic("readonly -- no need to call me");
235 * Ensure that the in-core inode's host cached fields match its on-disk copy.
237 * Returns 0 if they match.
240 efs_is_inode_synced(struct efs_inode
*ei
)
245 /* XXX -- see above remarks about assumption */
246 s
+= (ei
->ei_mode
!= be16toh(ei
->ei_di
.di_mode
));
247 s
+= (ei
->ei_nlink
!= be16toh(ei
->ei_di
.di_nlink
));
248 s
+= (ei
->ei_uid
!= be16toh(ei
->ei_di
.di_uid
));
249 s
+= (ei
->ei_gid
!= be16toh(ei
->ei_di
.di_gid
));
250 s
+= (ei
->ei_size
!= be32toh(ei
->ei_di
.di_size
));
251 s
+= (ei
->ei_atime
!= be32toh(ei
->ei_di
.di_atime
));
252 s
+= (ei
->ei_mtime
!= be32toh(ei
->ei_di
.di_mtime
));
253 s
+= (ei
->ei_ctime
!= be32toh(ei
->ei_di
.di_ctime
));
254 s
+= (ei
->ei_gen
!= be32toh(ei
->ei_di
.di_gen
));
255 s
+= (ei
->ei_numextents
!= be16toh(ei
->ei_di
.di_numextents
));
256 s
+= (ei
->ei_version
!= ei
->ei_di
.di_version
);
263 * Given an efs_dirblk structure and a componentname to search for, return the
264 * corresponding inode if it is found.
266 * Returns 0 on success.
269 efs_dirblk_lookup(struct efs_dirblk
*dir
, struct componentname
*cn
,
272 struct efs_dirent
*de
;
275 KASSERT(cn
->cn_namelen
<= EFS_DIRENT_NAMELEN_MAX
);
279 for (i
= 0; i
< dir
->db_slots
; i
++) {
280 offset
= EFS_DIRENT_OFF_EXPND(dir
->db_space
[i
]);
282 if (offset
== EFS_DIRBLK_SLOT_FREE
)
285 de
= (struct efs_dirent
*)((char *)dir
+ offset
);
286 if (de
->de_namelen
== cn
->cn_namelen
&&
287 (strncmp(cn
->cn_nameptr
, de
->de_name
, cn
->cn_namelen
) == 0)){
292 if (i
== dir
->db_slots
)
295 KASSERT(slot
< offset
&& offset
< EFS_DIRBLK_SPACE_SIZE
);
296 de
= (struct efs_dirent
*)((char *)dir
+ offset
);
297 *inode
= be32toh(de
->de_inumber
);
303 * Given an extent descriptor that represents a directory, look up
304 * componentname within its efs_dirblk's. If it is found, return the
305 * corresponding inode in 'ino'.
307 * Returns 0 on success.
310 efs_extent_lookup(struct efs_mount
*emp
, struct efs_extent
*ex
,
311 struct componentname
*cn
, ino_t
*ino
)
313 struct efs_dirblk
*db
;
318 * Read in each of the dirblks until we find our entry.
319 * If we don't, return ENOENT.
321 for (i
= 0; i
< ex
->ex_length
; i
++) {
322 err
= efs_bread(emp
, ex
->ex_bn
+ i
, NULL
, &bp
);
324 printf("efs: warning: invalid extent descriptor\n");
329 db
= (struct efs_dirblk
*)bp
->b_data
;
330 if (efs_dirblk_lookup(db
, cn
, ino
) == 0) {
341 * Given the provided in-core inode, look up the pathname requested. If
342 * we find it, 'ino' reflects its corresponding on-disk inode number.
344 * Returns 0 on success.
347 efs_inode_lookup(struct efs_mount
*emp
, struct efs_inode
*ei
,
348 struct componentname
*cn
, ino_t
*ino
)
350 struct efs_extent ex
;
351 struct efs_extent_iterator exi
;
354 KASSERT(VOP_ISLOCKED(ei
->ei_vp
));
355 KASSERT(efs_is_inode_synced(ei
) == 0);
356 KASSERT((ei
->ei_mode
& S_IFMT
) == S_IFDIR
);
358 efs_extent_iterator_init(&exi
, ei
, 0);
359 while ((ret
= efs_extent_iterator_next(&exi
, &ex
)) == 0) {
360 if (efs_extent_lookup(emp
, &ex
, cn
, ino
) == 0) {
365 return ((ret
== -1) ? ENOENT
: ret
);
369 * Convert on-disk extent structure to in-core format.
372 efs_dextent_to_extent(struct efs_dextent
*dex
, struct efs_extent
*ex
)
375 KASSERT(dex
!= NULL
&& ex
!= NULL
);
377 ex
->ex_magic
= dex
->ex_bytes
[0];
378 ex
->ex_bn
= be32toh(dex
->ex_words
[0]) & 0x00ffffff;
379 ex
->ex_length
= dex
->ex_bytes
[4];
380 ex
->ex_offset
= be32toh(dex
->ex_words
[1]) & 0x00ffffff;
384 * Convert in-core extent format to on-disk structure.
387 efs_extent_to_dextent(struct efs_extent
*ex
, struct efs_dextent
*dex
)
390 KASSERT(ex
!= NULL
&& dex
!= NULL
);
391 KASSERT(ex
->ex_magic
== EFS_EXTENT_MAGIC
);
392 KASSERT((ex
->ex_bn
& ~EFS_EXTENT_BN_MASK
) == 0);
393 KASSERT((ex
->ex_offset
& ~EFS_EXTENT_OFFSET_MASK
) == 0);
395 dex
->ex_words
[0] = htobe32(ex
->ex_bn
);
396 dex
->ex_bytes
[0] = ex
->ex_magic
;
397 dex
->ex_words
[1] = htobe32(ex
->ex_offset
);
398 dex
->ex_bytes
[4] = ex
->ex_length
;
402 * Initialise an extent iterator.
404 * If start_hint is non-0, attempt to set up the iterator beginning with the
405 * extent descriptor in which the start_hint'th byte exists. Callers must not
406 * expect success (this is simply an optimisation), so we reserve the right
407 * to start from the beginning.
410 efs_extent_iterator_init(struct efs_extent_iterator
*exi
, struct efs_inode
*eip
,
413 struct efs_extent ex
, ex2
;
415 struct efs_mount
*emp
= VFSTOEFS(eip
->ei_vp
->v_mount
);
416 off_t offset
, length
, next
;
417 int i
, err
, numextents
, numinextents
;
429 /* force iterator to end if hint is too big */
430 if (start_hint
>= eip
->ei_size
) {
431 exi
->exi_next
= eip
->ei_numextents
;
436 * Use start_hint to jump to the right extent descriptor. We'll
437 * iterate over the 12 indirect extents because it's cheap, then
438 * bring the appropriate vector into core and binary search it.
442 * Handle the small file case separately first...
444 if (eip
->ei_numextents
<= EFS_DIRECTEXTENTS
) {
445 for (i
= 0; i
< eip
->ei_numextents
; i
++) {
446 efs_dextent_to_extent(&eip
->ei_di
.di_extents
[i
], &ex
);
448 offset
= ex
.ex_offset
* EFS_BB_SIZE
;
449 length
= ex
.ex_length
* EFS_BB_SIZE
;
451 if (start_hint
>= offset
&&
452 start_hint
< (offset
+ length
)) {
453 exi
->exi_next
= exi
->exi_dnext
= i
;
458 /* shouldn't get here, no? */
459 EFS_DPRINTF(("efs_extent_iterator_init: bad direct extents\n"));
464 * Now do the large files with indirect extents...
466 * The first indirect extent's ex_offset field contains the
467 * number of indirect extents used.
469 efs_dextent_to_extent(&eip
->ei_di
.di_extents
[0], &ex
);
471 numinextents
= ex
.ex_offset
;
472 if (numinextents
< 1 || numinextents
>= EFS_DIRECTEXTENTS
) {
473 EFS_DPRINTF(("efs_extent_iterator_init: bad ex.ex_offset\n"));
480 for (i
= 0; i
< numinextents
; i
++) {
481 efs_dextent_to_extent(&eip
->ei_di
.di_extents
[i
], &ex
);
483 err
= efs_bread(emp
, ex
.ex_bn
, NULL
, &bp
);
489 efs_dextent_to_extent((struct efs_dextent
*)bp
->b_data
, &ex2
);
492 offset
= ex2
.ex_offset
* EFS_BB_SIZE
;
494 if (offset
> start_hint
) {
495 indir
= MAX(0, i
- 1);
499 /* number of extents prior to this indirect vector of extents */
502 /* number of extents within this indirect vector of extents */
503 numextents
= ex
.ex_length
* EFS_EXTENTS_PER_BB
;
504 numextents
= MIN(numextents
, eip
->ei_numextents
- next
);
508 * We hit the end, so assume it's in the last extent.
511 indir
= numinextents
- 1;
514 * Binary search to find our desired direct extent.
519 efs_dextent_to_extent(&eip
->ei_di
.di_extents
[indir
], &ex
);
526 bboff
= mid
/ EFS_EXTENTS_PER_BB
;
527 index
= mid
% EFS_EXTENTS_PER_BB
;
529 err
= efs_bread(emp
, ex
.ex_bn
+ bboff
, NULL
, &bp
);
532 EFS_DPRINTF(("efs_extent_iterator_init: bsrch read\n"));
536 efs_dextent_to_extent((struct efs_dextent
*)bp
->b_data
+ index
,
540 offset
= ex2
.ex_offset
* EFS_BB_SIZE
;
541 length
= ex2
.ex_length
* EFS_BB_SIZE
;
543 if (start_hint
>= offset
&& start_hint
< (offset
+ length
))
546 if (start_hint
< offset
)
553 * This is bad. Either the hint is bogus (which shouldn't
554 * happen) or the extent list must be screwed up. We
558 EFS_DPRINTF(("efs_extent_iterator_init: bsearch "
559 "failed to find extent\n"));
563 exi
->exi_next
= next
+ mid
;
564 exi
->exi_dnext
= indir
;
565 exi
->exi_innext
= mid
;
569 * Return the next EFS extent.
571 * Returns 0 if another extent was iterated, -1 if we've exhausted all
572 * extents, or an error number. If 'exi' is non-NULL, the next extent is
573 * written to it (should it exist).
576 efs_extent_iterator_next(struct efs_extent_iterator
*exi
,
577 struct efs_extent
*exp
)
579 struct efs_extent ex
;
580 struct efs_dextent
*dexp
;
581 struct efs_inode
*eip
= exi
->exi_eip
;
583 int err
, bboff
, index
;
585 if (exi
->exi_next
++ >= eip
->ei_numextents
)
588 /* direct or indirect extents? */
589 if (eip
->ei_numextents
<= EFS_DIRECTEXTENTS
) {
591 dexp
= &eip
->ei_di
.di_extents
[exi
->exi_dnext
++];
592 efs_dextent_to_extent(dexp
, exp
);
595 efs_dextent_to_extent(
596 &eip
->ei_di
.di_extents
[exi
->exi_dnext
], &ex
);
598 bboff
= exi
->exi_innext
/ EFS_EXTENTS_PER_BB
;
599 index
= exi
->exi_innext
% EFS_EXTENTS_PER_BB
;
601 err
= efs_bread(VFSTOEFS(eip
->ei_vp
->v_mount
),
602 ex
.ex_bn
+ bboff
, NULL
, &bp
);
604 EFS_DPRINTF(("efs_extent_iterator_next: "
605 "efs_bread failed: %d\n", err
));
611 dexp
= (struct efs_dextent
*)bp
->b_data
+ index
;
612 efs_dextent_to_extent(dexp
, exp
);
616 bboff
= exi
->exi_innext
++ / EFS_EXTENTS_PER_BB
;
617 if (bboff
>= ex
.ex_length
) {