1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
6 #include <linux/log2.h>
10 #include "xfs_format.h"
11 #include "xfs_log_format.h"
12 #include "xfs_trans_resv.h"
13 #include "xfs_mount.h"
14 #include "xfs_inode.h"
15 #include "xfs_trans.h"
16 #include "xfs_inode_item.h"
17 #include "xfs_btree.h"
18 #include "xfs_bmap_btree.h"
20 #include "xfs_error.h"
21 #include "xfs_trace.h"
22 #include "xfs_attr_sf.h"
23 #include "xfs_da_format.h"
24 #include "xfs_da_btree.h"
25 #include "xfs_dir2_priv.h"
26 #include "xfs_attr_leaf.h"
27 #include "xfs_shared.h"
29 kmem_zone_t
*xfs_ifork_zone
;
31 STATIC
int xfs_iformat_local(xfs_inode_t
*, xfs_dinode_t
*, int, int);
32 STATIC
int xfs_iformat_extents(xfs_inode_t
*, xfs_dinode_t
*, int);
33 STATIC
int xfs_iformat_btree(xfs_inode_t
*, xfs_dinode_t
*, int);
36 * Copy inode type and data and attr format specific information from the
37 * on-disk inode to the in-core inode and fork structures. For fifos, devices,
38 * and sockets this means set i_rdev to the proper value. For files,
39 * directories, and symlinks this means to bring in the in-line data or extent
40 * pointers as well as the attribute fork. For a fork in B-tree format, only
41 * the root is immediately brought in-core. The rest will be read in later when
42 * first referenced (see xfs_iread_extents()).
47 struct xfs_dinode
*dip
)
49 struct inode
*inode
= VFS_I(ip
);
50 struct xfs_attr_shortform
*atp
;
55 switch (inode
->i_mode
& S_IFMT
) {
61 inode
->i_rdev
= xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip
));
67 switch (dip
->di_format
) {
68 case XFS_DINODE_FMT_LOCAL
:
69 di_size
= be64_to_cpu(dip
->di_size
);
71 error
= xfs_iformat_local(ip
, dip
, XFS_DATA_FORK
, size
);
73 case XFS_DINODE_FMT_EXTENTS
:
74 error
= xfs_iformat_extents(ip
, dip
, XFS_DATA_FORK
);
76 case XFS_DINODE_FMT_BTREE
:
77 error
= xfs_iformat_btree(ip
, dip
, XFS_DATA_FORK
);
90 if (xfs_is_reflink_inode(ip
)) {
91 ASSERT(ip
->i_cowfp
== NULL
);
92 xfs_ifork_init_cow(ip
);
95 if (!XFS_DFORK_Q(dip
))
98 ASSERT(ip
->i_afp
== NULL
);
99 ip
->i_afp
= kmem_zone_zalloc(xfs_ifork_zone
, KM_SLEEP
| KM_NOFS
);
101 switch (dip
->di_aformat
) {
102 case XFS_DINODE_FMT_LOCAL
:
103 atp
= (xfs_attr_shortform_t
*)XFS_DFORK_APTR(dip
);
104 size
= be16_to_cpu(atp
->hdr
.totsize
);
106 error
= xfs_iformat_local(ip
, dip
, XFS_ATTR_FORK
, size
);
108 case XFS_DINODE_FMT_EXTENTS
:
109 error
= xfs_iformat_extents(ip
, dip
, XFS_ATTR_FORK
);
111 case XFS_DINODE_FMT_BTREE
:
112 error
= xfs_iformat_btree(ip
, dip
, XFS_ATTR_FORK
);
115 error
= -EFSCORRUPTED
;
119 kmem_zone_free(xfs_ifork_zone
, ip
->i_afp
);
122 kmem_zone_free(xfs_ifork_zone
, ip
->i_cowfp
);
124 xfs_idestroy_fork(ip
, XFS_DATA_FORK
);
131 struct xfs_inode
*ip
,
136 struct xfs_ifork
*ifp
= XFS_IFORK_PTR(ip
, whichfork
);
137 int mem_size
= size
, real_size
= 0;
141 * If we are using the local fork to store a symlink body we need to
142 * zero-terminate it so that we can pass it back to the VFS directly.
143 * Overallocate the in-memory fork by one for that and add a zero
144 * to terminate it below.
146 zero_terminate
= S_ISLNK(VFS_I(ip
)->i_mode
);
151 real_size
= roundup(mem_size
, 4);
152 ifp
->if_u1
.if_data
= kmem_alloc(real_size
, KM_SLEEP
| KM_NOFS
);
153 memcpy(ifp
->if_u1
.if_data
, data
, size
);
155 ifp
->if_u1
.if_data
[size
] = '\0';
157 ifp
->if_u1
.if_data
= NULL
;
160 ifp
->if_bytes
= size
;
161 ifp
->if_flags
&= ~(XFS_IFEXTENTS
| XFS_IFBROOT
);
162 ifp
->if_flags
|= XFS_IFINLINE
;
166 * The file is in-lined in the on-disk inode.
176 * If the size is unreasonable, then something
177 * is wrong and we just bail out rather than crash in
178 * kmem_alloc() or memcpy() below.
180 if (unlikely(size
> XFS_DFORK_SIZE(dip
, ip
->i_mount
, whichfork
))) {
181 xfs_warn(ip
->i_mount
,
182 "corrupt inode %Lu (bad size %d for local fork, size = %d).",
183 (unsigned long long) ip
->i_ino
, size
,
184 XFS_DFORK_SIZE(dip
, ip
->i_mount
, whichfork
));
185 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
186 "xfs_iformat_local", dip
, sizeof(*dip
),
188 return -EFSCORRUPTED
;
191 xfs_init_local_fork(ip
, whichfork
, XFS_DFORK_PTR(dip
, whichfork
), size
);
196 * The file consists of a set of extents all of which fit into the on-disk
201 struct xfs_inode
*ip
,
202 struct xfs_dinode
*dip
,
205 struct xfs_mount
*mp
= ip
->i_mount
;
206 struct xfs_ifork
*ifp
= XFS_IFORK_PTR(ip
, whichfork
);
207 int state
= xfs_bmap_fork_to_state(whichfork
);
208 int nex
= XFS_DFORK_NEXTENTS(dip
, whichfork
);
209 int size
= nex
* sizeof(xfs_bmbt_rec_t
);
210 struct xfs_iext_cursor icur
;
211 struct xfs_bmbt_rec
*dp
;
212 struct xfs_bmbt_irec
new;
216 * If the number of extents is unreasonable, then something is wrong and
217 * we just bail out rather than crash in kmem_alloc() or memcpy() below.
219 if (unlikely(size
< 0 || size
> XFS_DFORK_SIZE(dip
, mp
, whichfork
))) {
220 xfs_warn(ip
->i_mount
, "corrupt inode %Lu ((a)extents = %d).",
221 (unsigned long long) ip
->i_ino
, nex
);
222 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
223 "xfs_iformat_extents(1)", dip
, sizeof(*dip
),
225 return -EFSCORRUPTED
;
229 ifp
->if_u1
.if_root
= NULL
;
232 dp
= (xfs_bmbt_rec_t
*) XFS_DFORK_PTR(dip
, whichfork
);
234 xfs_iext_first(ifp
, &icur
);
235 for (i
= 0; i
< nex
; i
++, dp
++) {
238 xfs_bmbt_disk_get_all(dp
, &new);
239 fa
= xfs_bmap_validate_extent(ip
, whichfork
, &new);
241 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
242 "xfs_iformat_extents(2)",
243 dp
, sizeof(*dp
), fa
);
244 return -EFSCORRUPTED
;
247 xfs_iext_insert(ip
, &icur
, &new, state
);
248 trace_xfs_read_extent(ip
, &icur
, state
, _THIS_IP_
);
249 xfs_iext_next(ifp
, &icur
);
252 ifp
->if_flags
|= XFS_IFEXTENTS
;
257 * The file has too many extents to fit into
258 * the inode, so they are in B-tree format.
259 * Allocate a buffer for the root of the B-tree
260 * and copy the root into it. The i_extents
261 * field will remain NULL until all of the
262 * extents are read in (when they are needed).
270 struct xfs_mount
*mp
= ip
->i_mount
;
271 xfs_bmdr_block_t
*dfp
;
272 struct xfs_ifork
*ifp
;
278 ifp
= XFS_IFORK_PTR(ip
, whichfork
);
279 dfp
= (xfs_bmdr_block_t
*)XFS_DFORK_PTR(dip
, whichfork
);
280 size
= XFS_BMAP_BROOT_SPACE(mp
, dfp
);
281 nrecs
= be16_to_cpu(dfp
->bb_numrecs
);
282 level
= be16_to_cpu(dfp
->bb_level
);
285 * blow out if -- fork has less extents than can fit in
286 * fork (fork shouldn't be a btree format), root btree
287 * block has more records than can fit into the fork,
288 * or the number of extents is greater than the number of
291 if (unlikely(XFS_IFORK_NEXTENTS(ip
, whichfork
) <=
292 XFS_IFORK_MAXEXT(ip
, whichfork
) ||
294 XFS_BMDR_SPACE_CALC(nrecs
) >
295 XFS_DFORK_SIZE(dip
, mp
, whichfork
) ||
296 XFS_IFORK_NEXTENTS(ip
, whichfork
) > ip
->i_d
.di_nblocks
) ||
297 level
== 0 || level
> XFS_BTREE_MAXLEVELS
) {
298 xfs_warn(mp
, "corrupt inode %Lu (btree).",
299 (unsigned long long) ip
->i_ino
);
300 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
301 "xfs_iformat_btree", dfp
, size
,
303 return -EFSCORRUPTED
;
306 ifp
->if_broot_bytes
= size
;
307 ifp
->if_broot
= kmem_alloc(size
, KM_SLEEP
| KM_NOFS
);
308 ASSERT(ifp
->if_broot
!= NULL
);
310 * Copy and convert from the on-disk structure
311 * to the in-memory structure.
313 xfs_bmdr_to_bmbt(ip
, dfp
, XFS_DFORK_SIZE(dip
, ip
->i_mount
, whichfork
),
314 ifp
->if_broot
, size
);
315 ifp
->if_flags
&= ~XFS_IFEXTENTS
;
316 ifp
->if_flags
|= XFS_IFBROOT
;
319 ifp
->if_u1
.if_root
= NULL
;
325 * Reallocate the space for if_broot based on the number of records
326 * being added or deleted as indicated in rec_diff. Move the records
327 * and pointers in if_broot to fit the new size. When shrinking this
328 * will eliminate holes between the records and pointers created by
329 * the caller. When growing this will create holes to be filled in
332 * The caller must not request to add more records than would fit in
333 * the on-disk inode root. If the if_broot is currently NULL, then
334 * if we are adding records, one will be allocated. The caller must also
335 * not request that the number of records go below zero, although
338 * ip -- the inode whose if_broot area is changing
339 * ext_diff -- the change in the number of records, positive or negative,
340 * requested for the if_broot array.
348 struct xfs_mount
*mp
= ip
->i_mount
;
350 struct xfs_ifork
*ifp
;
351 struct xfs_btree_block
*new_broot
;
358 * Handle the degenerate case quietly.
364 ifp
= XFS_IFORK_PTR(ip
, whichfork
);
367 * If there wasn't any memory allocated before, just
368 * allocate it now and get out.
370 if (ifp
->if_broot_bytes
== 0) {
371 new_size
= XFS_BMAP_BROOT_SPACE_CALC(mp
, rec_diff
);
372 ifp
->if_broot
= kmem_alloc(new_size
, KM_SLEEP
| KM_NOFS
);
373 ifp
->if_broot_bytes
= (int)new_size
;
378 * If there is already an existing if_broot, then we need
379 * to realloc() it and shift the pointers to their new
380 * location. The records don't change location because
381 * they are kept butted up against the btree block header.
383 cur_max
= xfs_bmbt_maxrecs(mp
, ifp
->if_broot_bytes
, 0);
384 new_max
= cur_max
+ rec_diff
;
385 new_size
= XFS_BMAP_BROOT_SPACE_CALC(mp
, new_max
);
386 ifp
->if_broot
= kmem_realloc(ifp
->if_broot
, new_size
,
388 op
= (char *)XFS_BMAP_BROOT_PTR_ADDR(mp
, ifp
->if_broot
, 1,
389 ifp
->if_broot_bytes
);
390 np
= (char *)XFS_BMAP_BROOT_PTR_ADDR(mp
, ifp
->if_broot
, 1,
392 ifp
->if_broot_bytes
= (int)new_size
;
393 ASSERT(XFS_BMAP_BMDR_SPACE(ifp
->if_broot
) <=
394 XFS_IFORK_SIZE(ip
, whichfork
));
395 memmove(np
, op
, cur_max
* (uint
)sizeof(xfs_fsblock_t
));
400 * rec_diff is less than 0. In this case, we are shrinking the
401 * if_broot buffer. It must already exist. If we go to zero
402 * records, just get rid of the root and clear the status bit.
404 ASSERT((ifp
->if_broot
!= NULL
) && (ifp
->if_broot_bytes
> 0));
405 cur_max
= xfs_bmbt_maxrecs(mp
, ifp
->if_broot_bytes
, 0);
406 new_max
= cur_max
+ rec_diff
;
407 ASSERT(new_max
>= 0);
409 new_size
= XFS_BMAP_BROOT_SPACE_CALC(mp
, new_max
);
413 new_broot
= kmem_alloc(new_size
, KM_SLEEP
| KM_NOFS
);
415 * First copy over the btree block header.
417 memcpy(new_broot
, ifp
->if_broot
,
418 XFS_BMBT_BLOCK_LEN(ip
->i_mount
));
421 ifp
->if_flags
&= ~XFS_IFBROOT
;
425 * Only copy the records and pointers if there are any.
429 * First copy the records.
431 op
= (char *)XFS_BMBT_REC_ADDR(mp
, ifp
->if_broot
, 1);
432 np
= (char *)XFS_BMBT_REC_ADDR(mp
, new_broot
, 1);
433 memcpy(np
, op
, new_max
* (uint
)sizeof(xfs_bmbt_rec_t
));
436 * Then copy the pointers.
438 op
= (char *)XFS_BMAP_BROOT_PTR_ADDR(mp
, ifp
->if_broot
, 1,
439 ifp
->if_broot_bytes
);
440 np
= (char *)XFS_BMAP_BROOT_PTR_ADDR(mp
, new_broot
, 1,
442 memcpy(np
, op
, new_max
* (uint
)sizeof(xfs_fsblock_t
));
444 kmem_free(ifp
->if_broot
);
445 ifp
->if_broot
= new_broot
;
446 ifp
->if_broot_bytes
= (int)new_size
;
448 ASSERT(XFS_BMAP_BMDR_SPACE(ifp
->if_broot
) <=
449 XFS_IFORK_SIZE(ip
, whichfork
));
455 * This is called when the amount of space needed for if_data
456 * is increased or decreased. The change in size is indicated by
457 * the number of bytes that need to be added or deleted in the
458 * byte_diff parameter.
460 * If the amount of space needed has decreased below the size of the
461 * inline buffer, then switch to using the inline buffer. Otherwise,
462 * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
465 * ip -- the inode whose if_data area is changing
466 * byte_diff -- the change in the number of bytes, positive or negative,
467 * requested for the if_data array.
471 struct xfs_inode
*ip
,
475 struct xfs_ifork
*ifp
= XFS_IFORK_PTR(ip
, whichfork
);
476 int new_size
= (int)ifp
->if_bytes
+ byte_diff
;
478 ASSERT(new_size
>= 0);
479 ASSERT(new_size
<= XFS_IFORK_SIZE(ip
, whichfork
));
485 kmem_free(ifp
->if_u1
.if_data
);
486 ifp
->if_u1
.if_data
= NULL
;
492 * For inline data, the underlying buffer must be a multiple of 4 bytes
493 * in size so that it can be logged and stay on word boundaries.
494 * We enforce that here.
496 ifp
->if_u1
.if_data
= kmem_realloc(ifp
->if_u1
.if_data
,
497 roundup(new_size
, 4), KM_SLEEP
| KM_NOFS
);
498 ifp
->if_bytes
= new_size
;
506 struct xfs_ifork
*ifp
;
508 ifp
= XFS_IFORK_PTR(ip
, whichfork
);
509 if (ifp
->if_broot
!= NULL
) {
510 kmem_free(ifp
->if_broot
);
511 ifp
->if_broot
= NULL
;
515 * If the format is local, then we can't have an extents
516 * array so just look for an inline data array. If we're
517 * not local then we may or may not have an extents list,
518 * so check and free it up if we do.
520 if (XFS_IFORK_FORMAT(ip
, whichfork
) == XFS_DINODE_FMT_LOCAL
) {
521 if (ifp
->if_u1
.if_data
!= NULL
) {
522 kmem_free(ifp
->if_u1
.if_data
);
523 ifp
->if_u1
.if_data
= NULL
;
525 } else if ((ifp
->if_flags
& XFS_IFEXTENTS
) && ifp
->if_height
) {
526 xfs_iext_destroy(ifp
);
529 if (whichfork
== XFS_ATTR_FORK
) {
530 kmem_zone_free(xfs_ifork_zone
, ip
->i_afp
);
532 } else if (whichfork
== XFS_COW_FORK
) {
533 kmem_zone_free(xfs_ifork_zone
, ip
->i_cowfp
);
539 * Convert in-core extents to on-disk form
541 * In the case of the data fork, the in-core and on-disk fork sizes can be
542 * different due to delayed allocation extents. We only copy on-disk extents
543 * here, so callers must always use the physical fork size to determine the
544 * size of the buffer passed to this routine. We will return the size actually
549 struct xfs_inode
*ip
,
550 struct xfs_bmbt_rec
*dp
,
553 int state
= xfs_bmap_fork_to_state(whichfork
);
554 struct xfs_ifork
*ifp
= XFS_IFORK_PTR(ip
, whichfork
);
555 struct xfs_iext_cursor icur
;
556 struct xfs_bmbt_irec rec
;
559 ASSERT(xfs_isilocked(ip
, XFS_ILOCK_EXCL
| XFS_ILOCK_SHARED
));
560 ASSERT(ifp
->if_bytes
> 0);
562 for_each_xfs_iext(ifp
, &icur
, &rec
) {
563 if (isnullstartblock(rec
.br_startblock
))
565 ASSERT(xfs_bmap_validate_extent(ip
, whichfork
, &rec
) == NULL
);
566 xfs_bmbt_disk_set_all(dp
, &rec
);
567 trace_xfs_write_extent(ip
, &icur
, state
, _RET_IP_
);
568 copied
+= sizeof(struct xfs_bmbt_rec
);
573 ASSERT(copied
<= ifp
->if_bytes
);
578 * Each of the following cases stores data into the same region
579 * of the on-disk inode, so only one of them can be valid at
580 * any given time. While it is possible to have conflicting formats
581 * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
582 * in EXTENTS format, this can only happen when the fork has
583 * changed formats after being modified but before being flushed.
584 * In these cases, the format always takes precedence, because the
585 * format indicates the current state of the fork.
591 xfs_inode_log_item_t
*iip
,
595 struct xfs_ifork
*ifp
;
597 static const short brootflag
[2] =
598 { XFS_ILOG_DBROOT
, XFS_ILOG_ABROOT
};
599 static const short dataflag
[2] =
600 { XFS_ILOG_DDATA
, XFS_ILOG_ADATA
};
601 static const short extflag
[2] =
602 { XFS_ILOG_DEXT
, XFS_ILOG_AEXT
};
606 ifp
= XFS_IFORK_PTR(ip
, whichfork
);
608 * This can happen if we gave up in iformat in an error path,
609 * for the attribute fork.
612 ASSERT(whichfork
== XFS_ATTR_FORK
);
615 cp
= XFS_DFORK_PTR(dip
, whichfork
);
617 switch (XFS_IFORK_FORMAT(ip
, whichfork
)) {
618 case XFS_DINODE_FMT_LOCAL
:
619 if ((iip
->ili_fields
& dataflag
[whichfork
]) &&
620 (ifp
->if_bytes
> 0)) {
621 ASSERT(ifp
->if_u1
.if_data
!= NULL
);
622 ASSERT(ifp
->if_bytes
<= XFS_IFORK_SIZE(ip
, whichfork
));
623 memcpy(cp
, ifp
->if_u1
.if_data
, ifp
->if_bytes
);
627 case XFS_DINODE_FMT_EXTENTS
:
628 ASSERT((ifp
->if_flags
& XFS_IFEXTENTS
) ||
629 !(iip
->ili_fields
& extflag
[whichfork
]));
630 if ((iip
->ili_fields
& extflag
[whichfork
]) &&
631 (ifp
->if_bytes
> 0)) {
632 ASSERT(XFS_IFORK_NEXTENTS(ip
, whichfork
) > 0);
633 (void)xfs_iextents_copy(ip
, (xfs_bmbt_rec_t
*)cp
,
638 case XFS_DINODE_FMT_BTREE
:
639 if ((iip
->ili_fields
& brootflag
[whichfork
]) &&
640 (ifp
->if_broot_bytes
> 0)) {
641 ASSERT(ifp
->if_broot
!= NULL
);
642 ASSERT(XFS_BMAP_BMDR_SPACE(ifp
->if_broot
) <=
643 XFS_IFORK_SIZE(ip
, whichfork
));
644 xfs_bmbt_to_bmdr(mp
, ifp
->if_broot
, ifp
->if_broot_bytes
,
645 (xfs_bmdr_block_t
*)cp
,
646 XFS_DFORK_SIZE(dip
, mp
, whichfork
));
650 case XFS_DINODE_FMT_DEV
:
651 if (iip
->ili_fields
& XFS_ILOG_DEV
) {
652 ASSERT(whichfork
== XFS_DATA_FORK
);
653 xfs_dinode_put_rdev(dip
,
654 linux_to_xfs_dev_t(VFS_I(ip
)->i_rdev
));
664 /* Convert bmap state flags to an inode fork. */
666 xfs_iext_state_to_fork(
667 struct xfs_inode
*ip
,
670 if (state
& BMAP_COWFORK
)
672 else if (state
& BMAP_ATTRFORK
)
678 * Initialize an inode's copy-on-write fork.
682 struct xfs_inode
*ip
)
687 ip
->i_cowfp
= kmem_zone_zalloc(xfs_ifork_zone
,
689 ip
->i_cowfp
->if_flags
= XFS_IFEXTENTS
;
690 ip
->i_cformat
= XFS_DINODE_FMT_EXTENTS
;
694 /* Default fork content verifiers. */
695 struct xfs_ifork_ops xfs_default_ifork_ops
= {
696 .verify_attr
= xfs_attr_shortform_verify
,
697 .verify_dir
= xfs_dir2_sf_verify
,
698 .verify_symlink
= xfs_symlink_shortform_verify
,
701 /* Verify the inline contents of the data fork of an inode. */
703 xfs_ifork_verify_data(
704 struct xfs_inode
*ip
,
705 struct xfs_ifork_ops
*ops
)
707 /* Non-local data fork, we're done. */
708 if (ip
->i_d
.di_format
!= XFS_DINODE_FMT_LOCAL
)
711 /* Check the inline data fork if there is one. */
712 switch (VFS_I(ip
)->i_mode
& S_IFMT
) {
714 return ops
->verify_dir(ip
);
716 return ops
->verify_symlink(ip
);
722 /* Verify the inline contents of the attr fork of an inode. */
724 xfs_ifork_verify_attr(
725 struct xfs_inode
*ip
,
726 struct xfs_ifork_ops
*ops
)
728 /* There has to be an attr fork allocated if aformat is local. */
729 if (ip
->i_d
.di_aformat
!= XFS_DINODE_FMT_LOCAL
)
731 if (!XFS_IFORK_PTR(ip
, XFS_ATTR_FORK
))
732 return __this_address
;
733 return ops
->verify_attr(ip
);