1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
9 #include "xfs_shared.h"
10 #include "xfs_format.h"
11 #include "xfs_log_format.h"
12 #include "xfs_trans_resv.h"
13 #include "xfs_mount.h"
14 #include "xfs_inode.h"
15 #include "xfs_trans.h"
16 #include "xfs_inode_item.h"
17 #include "xfs_btree.h"
18 #include "xfs_bmap_btree.h"
20 #include "xfs_error.h"
21 #include "xfs_trace.h"
22 #include "xfs_da_format.h"
23 #include "xfs_da_btree.h"
24 #include "xfs_dir2_priv.h"
25 #include "xfs_attr_leaf.h"
27 kmem_zone_t
*xfs_ifork_zone
;
29 STATIC
int xfs_iformat_local(xfs_inode_t
*, xfs_dinode_t
*, int, int);
30 STATIC
int xfs_iformat_extents(xfs_inode_t
*, xfs_dinode_t
*, int);
31 STATIC
int xfs_iformat_btree(xfs_inode_t
*, xfs_dinode_t
*, int);
34 * Copy inode type and data and attr format specific information from the
35 * on-disk inode to the in-core inode and fork structures. For fifos, devices,
36 * and sockets this means set i_rdev to the proper value. For files,
37 * directories, and symlinks this means to bring in the in-line data or extent
38 * pointers as well as the attribute fork. For a fork in B-tree format, only
39 * the root is immediately brought in-core. The rest will be read in later when
40 * first referenced (see xfs_iread_extents()).
45 struct xfs_dinode
*dip
)
47 struct inode
*inode
= VFS_I(ip
);
48 struct xfs_attr_shortform
*atp
;
53 switch (inode
->i_mode
& S_IFMT
) {
59 inode
->i_rdev
= xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip
));
65 switch (dip
->di_format
) {
66 case XFS_DINODE_FMT_LOCAL
:
67 di_size
= be64_to_cpu(dip
->di_size
);
69 error
= xfs_iformat_local(ip
, dip
, XFS_DATA_FORK
, size
);
71 case XFS_DINODE_FMT_EXTENTS
:
72 error
= xfs_iformat_extents(ip
, dip
, XFS_DATA_FORK
);
74 case XFS_DINODE_FMT_BTREE
:
75 error
= xfs_iformat_btree(ip
, dip
, XFS_DATA_FORK
);
78 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
, __func__
,
79 dip
, sizeof(*dip
), __this_address
);
85 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
, __func__
, dip
,
86 sizeof(*dip
), __this_address
);
92 if (xfs_is_reflink_inode(ip
)) {
93 ASSERT(ip
->i_cowfp
== NULL
);
94 xfs_ifork_init_cow(ip
);
97 if (!XFS_DFORK_Q(dip
))
100 ASSERT(ip
->i_afp
== NULL
);
101 ip
->i_afp
= kmem_zone_zalloc(xfs_ifork_zone
, KM_NOFS
);
103 switch (dip
->di_aformat
) {
104 case XFS_DINODE_FMT_LOCAL
:
105 atp
= (xfs_attr_shortform_t
*)XFS_DFORK_APTR(dip
);
106 size
= be16_to_cpu(atp
->hdr
.totsize
);
108 error
= xfs_iformat_local(ip
, dip
, XFS_ATTR_FORK
, size
);
110 case XFS_DINODE_FMT_EXTENTS
:
111 error
= xfs_iformat_extents(ip
, dip
, XFS_ATTR_FORK
);
113 case XFS_DINODE_FMT_BTREE
:
114 error
= xfs_iformat_btree(ip
, dip
, XFS_ATTR_FORK
);
117 xfs_inode_verifier_error(ip
, error
, __func__
, dip
,
118 sizeof(*dip
), __this_address
);
119 error
= -EFSCORRUPTED
;
123 kmem_cache_free(xfs_ifork_zone
, ip
->i_afp
);
126 kmem_cache_free(xfs_ifork_zone
, ip
->i_cowfp
);
128 xfs_idestroy_fork(ip
, XFS_DATA_FORK
);
135 struct xfs_inode
*ip
,
140 struct xfs_ifork
*ifp
= XFS_IFORK_PTR(ip
, whichfork
);
141 int mem_size
= size
, real_size
= 0;
145 * If we are using the local fork to store a symlink body we need to
146 * zero-terminate it so that we can pass it back to the VFS directly.
147 * Overallocate the in-memory fork by one for that and add a zero
148 * to terminate it below.
150 zero_terminate
= S_ISLNK(VFS_I(ip
)->i_mode
);
155 real_size
= roundup(mem_size
, 4);
156 ifp
->if_u1
.if_data
= kmem_alloc(real_size
, KM_NOFS
);
157 memcpy(ifp
->if_u1
.if_data
, data
, size
);
159 ifp
->if_u1
.if_data
[size
] = '\0';
161 ifp
->if_u1
.if_data
= NULL
;
164 ifp
->if_bytes
= size
;
165 ifp
->if_flags
&= ~(XFS_IFEXTENTS
| XFS_IFBROOT
);
166 ifp
->if_flags
|= XFS_IFINLINE
;
170 * The file is in-lined in the on-disk inode.
180 * If the size is unreasonable, then something
181 * is wrong and we just bail out rather than crash in
182 * kmem_alloc() or memcpy() below.
184 if (unlikely(size
> XFS_DFORK_SIZE(dip
, ip
->i_mount
, whichfork
))) {
185 xfs_warn(ip
->i_mount
,
186 "corrupt inode %Lu (bad size %d for local fork, size = %d).",
187 (unsigned long long) ip
->i_ino
, size
,
188 XFS_DFORK_SIZE(dip
, ip
->i_mount
, whichfork
));
189 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
190 "xfs_iformat_local", dip
, sizeof(*dip
),
192 return -EFSCORRUPTED
;
195 xfs_init_local_fork(ip
, whichfork
, XFS_DFORK_PTR(dip
, whichfork
), size
);
200 * The file consists of a set of extents all of which fit into the on-disk
205 struct xfs_inode
*ip
,
206 struct xfs_dinode
*dip
,
209 struct xfs_mount
*mp
= ip
->i_mount
;
210 struct xfs_ifork
*ifp
= XFS_IFORK_PTR(ip
, whichfork
);
211 int state
= xfs_bmap_fork_to_state(whichfork
);
212 int nex
= XFS_DFORK_NEXTENTS(dip
, whichfork
);
213 int size
= nex
* sizeof(xfs_bmbt_rec_t
);
214 struct xfs_iext_cursor icur
;
215 struct xfs_bmbt_rec
*dp
;
216 struct xfs_bmbt_irec
new;
220 * If the number of extents is unreasonable, then something is wrong and
221 * we just bail out rather than crash in kmem_alloc() or memcpy() below.
223 if (unlikely(size
< 0 || size
> XFS_DFORK_SIZE(dip
, mp
, whichfork
))) {
224 xfs_warn(ip
->i_mount
, "corrupt inode %Lu ((a)extents = %d).",
225 (unsigned long long) ip
->i_ino
, nex
);
226 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
227 "xfs_iformat_extents(1)", dip
, sizeof(*dip
),
229 return -EFSCORRUPTED
;
233 ifp
->if_u1
.if_root
= NULL
;
236 dp
= (xfs_bmbt_rec_t
*) XFS_DFORK_PTR(dip
, whichfork
);
238 xfs_iext_first(ifp
, &icur
);
239 for (i
= 0; i
< nex
; i
++, dp
++) {
242 xfs_bmbt_disk_get_all(dp
, &new);
243 fa
= xfs_bmap_validate_extent(ip
, whichfork
, &new);
245 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
246 "xfs_iformat_extents(2)",
247 dp
, sizeof(*dp
), fa
);
248 return -EFSCORRUPTED
;
251 xfs_iext_insert(ip
, &icur
, &new, state
);
252 trace_xfs_read_extent(ip
, &icur
, state
, _THIS_IP_
);
253 xfs_iext_next(ifp
, &icur
);
256 ifp
->if_flags
|= XFS_IFEXTENTS
;
261 * The file has too many extents to fit into
262 * the inode, so they are in B-tree format.
263 * Allocate a buffer for the root of the B-tree
264 * and copy the root into it. The i_extents
265 * field will remain NULL until all of the
266 * extents are read in (when they are needed).
274 struct xfs_mount
*mp
= ip
->i_mount
;
275 xfs_bmdr_block_t
*dfp
;
276 struct xfs_ifork
*ifp
;
282 ifp
= XFS_IFORK_PTR(ip
, whichfork
);
283 dfp
= (xfs_bmdr_block_t
*)XFS_DFORK_PTR(dip
, whichfork
);
284 size
= XFS_BMAP_BROOT_SPACE(mp
, dfp
);
285 nrecs
= be16_to_cpu(dfp
->bb_numrecs
);
286 level
= be16_to_cpu(dfp
->bb_level
);
289 * blow out if -- fork has less extents than can fit in
290 * fork (fork shouldn't be a btree format), root btree
291 * block has more records than can fit into the fork,
292 * or the number of extents is greater than the number of
295 if (unlikely(XFS_IFORK_NEXTENTS(ip
, whichfork
) <=
296 XFS_IFORK_MAXEXT(ip
, whichfork
) ||
298 XFS_BMDR_SPACE_CALC(nrecs
) >
299 XFS_DFORK_SIZE(dip
, mp
, whichfork
) ||
300 XFS_IFORK_NEXTENTS(ip
, whichfork
) > ip
->i_d
.di_nblocks
) ||
301 level
== 0 || level
> XFS_BTREE_MAXLEVELS
) {
302 xfs_warn(mp
, "corrupt inode %Lu (btree).",
303 (unsigned long long) ip
->i_ino
);
304 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
305 "xfs_iformat_btree", dfp
, size
,
307 return -EFSCORRUPTED
;
310 ifp
->if_broot_bytes
= size
;
311 ifp
->if_broot
= kmem_alloc(size
, KM_NOFS
);
312 ASSERT(ifp
->if_broot
!= NULL
);
314 * Copy and convert from the on-disk structure
315 * to the in-memory structure.
317 xfs_bmdr_to_bmbt(ip
, dfp
, XFS_DFORK_SIZE(dip
, ip
->i_mount
, whichfork
),
318 ifp
->if_broot
, size
);
319 ifp
->if_flags
&= ~XFS_IFEXTENTS
;
320 ifp
->if_flags
|= XFS_IFBROOT
;
323 ifp
->if_u1
.if_root
= NULL
;
329 * Reallocate the space for if_broot based on the number of records
330 * being added or deleted as indicated in rec_diff. Move the records
331 * and pointers in if_broot to fit the new size. When shrinking this
332 * will eliminate holes between the records and pointers created by
333 * the caller. When growing this will create holes to be filled in
336 * The caller must not request to add more records than would fit in
337 * the on-disk inode root. If the if_broot is currently NULL, then
338 * if we are adding records, one will be allocated. The caller must also
339 * not request that the number of records go below zero, although
342 * ip -- the inode whose if_broot area is changing
343 * ext_diff -- the change in the number of records, positive or negative,
344 * requested for the if_broot array.
352 struct xfs_mount
*mp
= ip
->i_mount
;
354 struct xfs_ifork
*ifp
;
355 struct xfs_btree_block
*new_broot
;
362 * Handle the degenerate case quietly.
368 ifp
= XFS_IFORK_PTR(ip
, whichfork
);
371 * If there wasn't any memory allocated before, just
372 * allocate it now and get out.
374 if (ifp
->if_broot_bytes
== 0) {
375 new_size
= XFS_BMAP_BROOT_SPACE_CALC(mp
, rec_diff
);
376 ifp
->if_broot
= kmem_alloc(new_size
, KM_NOFS
);
377 ifp
->if_broot_bytes
= (int)new_size
;
382 * If there is already an existing if_broot, then we need
383 * to realloc() it and shift the pointers to their new
384 * location. The records don't change location because
385 * they are kept butted up against the btree block header.
387 cur_max
= xfs_bmbt_maxrecs(mp
, ifp
->if_broot_bytes
, 0);
388 new_max
= cur_max
+ rec_diff
;
389 new_size
= XFS_BMAP_BROOT_SPACE_CALC(mp
, new_max
);
390 ifp
->if_broot
= kmem_realloc(ifp
->if_broot
, new_size
,
392 op
= (char *)XFS_BMAP_BROOT_PTR_ADDR(mp
, ifp
->if_broot
, 1,
393 ifp
->if_broot_bytes
);
394 np
= (char *)XFS_BMAP_BROOT_PTR_ADDR(mp
, ifp
->if_broot
, 1,
396 ifp
->if_broot_bytes
= (int)new_size
;
397 ASSERT(XFS_BMAP_BMDR_SPACE(ifp
->if_broot
) <=
398 XFS_IFORK_SIZE(ip
, whichfork
));
399 memmove(np
, op
, cur_max
* (uint
)sizeof(xfs_fsblock_t
));
404 * rec_diff is less than 0. In this case, we are shrinking the
405 * if_broot buffer. It must already exist. If we go to zero
406 * records, just get rid of the root and clear the status bit.
408 ASSERT((ifp
->if_broot
!= NULL
) && (ifp
->if_broot_bytes
> 0));
409 cur_max
= xfs_bmbt_maxrecs(mp
, ifp
->if_broot_bytes
, 0);
410 new_max
= cur_max
+ rec_diff
;
411 ASSERT(new_max
>= 0);
413 new_size
= XFS_BMAP_BROOT_SPACE_CALC(mp
, new_max
);
417 new_broot
= kmem_alloc(new_size
, KM_NOFS
);
419 * First copy over the btree block header.
421 memcpy(new_broot
, ifp
->if_broot
,
422 XFS_BMBT_BLOCK_LEN(ip
->i_mount
));
425 ifp
->if_flags
&= ~XFS_IFBROOT
;
429 * Only copy the records and pointers if there are any.
433 * First copy the records.
435 op
= (char *)XFS_BMBT_REC_ADDR(mp
, ifp
->if_broot
, 1);
436 np
= (char *)XFS_BMBT_REC_ADDR(mp
, new_broot
, 1);
437 memcpy(np
, op
, new_max
* (uint
)sizeof(xfs_bmbt_rec_t
));
440 * Then copy the pointers.
442 op
= (char *)XFS_BMAP_BROOT_PTR_ADDR(mp
, ifp
->if_broot
, 1,
443 ifp
->if_broot_bytes
);
444 np
= (char *)XFS_BMAP_BROOT_PTR_ADDR(mp
, new_broot
, 1,
446 memcpy(np
, op
, new_max
* (uint
)sizeof(xfs_fsblock_t
));
448 kmem_free(ifp
->if_broot
);
449 ifp
->if_broot
= new_broot
;
450 ifp
->if_broot_bytes
= (int)new_size
;
452 ASSERT(XFS_BMAP_BMDR_SPACE(ifp
->if_broot
) <=
453 XFS_IFORK_SIZE(ip
, whichfork
));
459 * This is called when the amount of space needed for if_data
460 * is increased or decreased. The change in size is indicated by
461 * the number of bytes that need to be added or deleted in the
462 * byte_diff parameter.
464 * If the amount of space needed has decreased below the size of the
465 * inline buffer, then switch to using the inline buffer. Otherwise,
466 * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
469 * ip -- the inode whose if_data area is changing
470 * byte_diff -- the change in the number of bytes, positive or negative,
471 * requested for the if_data array.
475 struct xfs_inode
*ip
,
479 struct xfs_ifork
*ifp
= XFS_IFORK_PTR(ip
, whichfork
);
480 int64_t new_size
= ifp
->if_bytes
+ byte_diff
;
482 ASSERT(new_size
>= 0);
483 ASSERT(new_size
<= XFS_IFORK_SIZE(ip
, whichfork
));
489 kmem_free(ifp
->if_u1
.if_data
);
490 ifp
->if_u1
.if_data
= NULL
;
496 * For inline data, the underlying buffer must be a multiple of 4 bytes
497 * in size so that it can be logged and stay on word boundaries.
498 * We enforce that here.
500 ifp
->if_u1
.if_data
= kmem_realloc(ifp
->if_u1
.if_data
,
501 roundup(new_size
, 4), KM_NOFS
);
502 ifp
->if_bytes
= new_size
;
510 struct xfs_ifork
*ifp
;
512 ifp
= XFS_IFORK_PTR(ip
, whichfork
);
513 if (ifp
->if_broot
!= NULL
) {
514 kmem_free(ifp
->if_broot
);
515 ifp
->if_broot
= NULL
;
519 * If the format is local, then we can't have an extents
520 * array so just look for an inline data array. If we're
521 * not local then we may or may not have an extents list,
522 * so check and free it up if we do.
524 if (XFS_IFORK_FORMAT(ip
, whichfork
) == XFS_DINODE_FMT_LOCAL
) {
525 if (ifp
->if_u1
.if_data
!= NULL
) {
526 kmem_free(ifp
->if_u1
.if_data
);
527 ifp
->if_u1
.if_data
= NULL
;
529 } else if ((ifp
->if_flags
& XFS_IFEXTENTS
) && ifp
->if_height
) {
530 xfs_iext_destroy(ifp
);
533 if (whichfork
== XFS_ATTR_FORK
) {
534 kmem_cache_free(xfs_ifork_zone
, ip
->i_afp
);
536 } else if (whichfork
== XFS_COW_FORK
) {
537 kmem_cache_free(xfs_ifork_zone
, ip
->i_cowfp
);
543 * Convert in-core extents to on-disk form
545 * In the case of the data fork, the in-core and on-disk fork sizes can be
546 * different due to delayed allocation extents. We only copy on-disk extents
547 * here, so callers must always use the physical fork size to determine the
548 * size of the buffer passed to this routine. We will return the size actually
553 struct xfs_inode
*ip
,
554 struct xfs_bmbt_rec
*dp
,
557 int state
= xfs_bmap_fork_to_state(whichfork
);
558 struct xfs_ifork
*ifp
= XFS_IFORK_PTR(ip
, whichfork
);
559 struct xfs_iext_cursor icur
;
560 struct xfs_bmbt_irec rec
;
563 ASSERT(xfs_isilocked(ip
, XFS_ILOCK_EXCL
| XFS_ILOCK_SHARED
));
564 ASSERT(ifp
->if_bytes
> 0);
566 for_each_xfs_iext(ifp
, &icur
, &rec
) {
567 if (isnullstartblock(rec
.br_startblock
))
569 ASSERT(xfs_bmap_validate_extent(ip
, whichfork
, &rec
) == NULL
);
570 xfs_bmbt_disk_set_all(dp
, &rec
);
571 trace_xfs_write_extent(ip
, &icur
, state
, _RET_IP_
);
572 copied
+= sizeof(struct xfs_bmbt_rec
);
577 ASSERT(copied
<= ifp
->if_bytes
);
582 * Each of the following cases stores data into the same region
583 * of the on-disk inode, so only one of them can be valid at
584 * any given time. While it is possible to have conflicting formats
585 * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
586 * in EXTENTS format, this can only happen when the fork has
587 * changed formats after being modified but before being flushed.
588 * In these cases, the format always takes precedence, because the
589 * format indicates the current state of the fork.
595 xfs_inode_log_item_t
*iip
,
599 struct xfs_ifork
*ifp
;
601 static const short brootflag
[2] =
602 { XFS_ILOG_DBROOT
, XFS_ILOG_ABROOT
};
603 static const short dataflag
[2] =
604 { XFS_ILOG_DDATA
, XFS_ILOG_ADATA
};
605 static const short extflag
[2] =
606 { XFS_ILOG_DEXT
, XFS_ILOG_AEXT
};
610 ifp
= XFS_IFORK_PTR(ip
, whichfork
);
612 * This can happen if we gave up in iformat in an error path,
613 * for the attribute fork.
616 ASSERT(whichfork
== XFS_ATTR_FORK
);
619 cp
= XFS_DFORK_PTR(dip
, whichfork
);
621 switch (XFS_IFORK_FORMAT(ip
, whichfork
)) {
622 case XFS_DINODE_FMT_LOCAL
:
623 if ((iip
->ili_fields
& dataflag
[whichfork
]) &&
624 (ifp
->if_bytes
> 0)) {
625 ASSERT(ifp
->if_u1
.if_data
!= NULL
);
626 ASSERT(ifp
->if_bytes
<= XFS_IFORK_SIZE(ip
, whichfork
));
627 memcpy(cp
, ifp
->if_u1
.if_data
, ifp
->if_bytes
);
631 case XFS_DINODE_FMT_EXTENTS
:
632 ASSERT((ifp
->if_flags
& XFS_IFEXTENTS
) ||
633 !(iip
->ili_fields
& extflag
[whichfork
]));
634 if ((iip
->ili_fields
& extflag
[whichfork
]) &&
635 (ifp
->if_bytes
> 0)) {
636 ASSERT(XFS_IFORK_NEXTENTS(ip
, whichfork
) > 0);
637 (void)xfs_iextents_copy(ip
, (xfs_bmbt_rec_t
*)cp
,
642 case XFS_DINODE_FMT_BTREE
:
643 if ((iip
->ili_fields
& brootflag
[whichfork
]) &&
644 (ifp
->if_broot_bytes
> 0)) {
645 ASSERT(ifp
->if_broot
!= NULL
);
646 ASSERT(XFS_BMAP_BMDR_SPACE(ifp
->if_broot
) <=
647 XFS_IFORK_SIZE(ip
, whichfork
));
648 xfs_bmbt_to_bmdr(mp
, ifp
->if_broot
, ifp
->if_broot_bytes
,
649 (xfs_bmdr_block_t
*)cp
,
650 XFS_DFORK_SIZE(dip
, mp
, whichfork
));
654 case XFS_DINODE_FMT_DEV
:
655 if (iip
->ili_fields
& XFS_ILOG_DEV
) {
656 ASSERT(whichfork
== XFS_DATA_FORK
);
657 xfs_dinode_put_rdev(dip
,
658 linux_to_xfs_dev_t(VFS_I(ip
)->i_rdev
));
668 /* Convert bmap state flags to an inode fork. */
670 xfs_iext_state_to_fork(
671 struct xfs_inode
*ip
,
674 if (state
& BMAP_COWFORK
)
676 else if (state
& BMAP_ATTRFORK
)
682 * Initialize an inode's copy-on-write fork.
686 struct xfs_inode
*ip
)
691 ip
->i_cowfp
= kmem_zone_zalloc(xfs_ifork_zone
,
693 ip
->i_cowfp
->if_flags
= XFS_IFEXTENTS
;
694 ip
->i_cformat
= XFS_DINODE_FMT_EXTENTS
;
698 /* Default fork content verifiers. */
699 struct xfs_ifork_ops xfs_default_ifork_ops
= {
700 .verify_attr
= xfs_attr_shortform_verify
,
701 .verify_dir
= xfs_dir2_sf_verify
,
702 .verify_symlink
= xfs_symlink_shortform_verify
,
705 /* Verify the inline contents of the data fork of an inode. */
707 xfs_ifork_verify_data(
708 struct xfs_inode
*ip
,
709 struct xfs_ifork_ops
*ops
)
711 /* Non-local data fork, we're done. */
712 if (ip
->i_d
.di_format
!= XFS_DINODE_FMT_LOCAL
)
715 /* Check the inline data fork if there is one. */
716 switch (VFS_I(ip
)->i_mode
& S_IFMT
) {
718 return ops
->verify_dir(ip
);
720 return ops
->verify_symlink(ip
);
726 /* Verify the inline contents of the attr fork of an inode. */
728 xfs_ifork_verify_attr(
729 struct xfs_inode
*ip
,
730 struct xfs_ifork_ops
*ops
)
732 /* There has to be an attr fork allocated if aformat is local. */
733 if (ip
->i_d
.di_aformat
!= XFS_DINODE_FMT_LOCAL
)
735 if (!XFS_IFORK_PTR(ip
, XFS_ATTR_FORK
))
736 return __this_address
;
737 return ops
->verify_attr(ip
);