1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
9 #include "xfs_shared.h"
10 #include "xfs_format.h"
11 #include "xfs_log_format.h"
12 #include "xfs_trans_resv.h"
13 #include "xfs_mount.h"
14 #include "xfs_inode.h"
15 #include "xfs_trans.h"
16 #include "xfs_inode_item.h"
17 #include "xfs_btree.h"
18 #include "xfs_bmap_btree.h"
20 #include "xfs_error.h"
21 #include "xfs_trace.h"
22 #include "xfs_da_format.h"
23 #include "xfs_da_btree.h"
24 #include "xfs_dir2_priv.h"
25 #include "xfs_attr_leaf.h"
27 kmem_zone_t
*xfs_ifork_zone
;
36 struct xfs_ifork
*ifp
= XFS_IFORK_PTR(ip
, whichfork
);
37 int mem_size
= size
, real_size
= 0;
41 * If we are using the local fork to store a symlink body we need to
42 * zero-terminate it so that we can pass it back to the VFS directly.
43 * Overallocate the in-memory fork by one for that and add a zero
44 * to terminate it below.
46 zero_terminate
= S_ISLNK(VFS_I(ip
)->i_mode
);
51 real_size
= roundup(mem_size
, 4);
52 ifp
->if_u1
.if_data
= kmem_alloc(real_size
, KM_NOFS
);
53 memcpy(ifp
->if_u1
.if_data
, data
, size
);
55 ifp
->if_u1
.if_data
[size
] = '\0';
57 ifp
->if_u1
.if_data
= NULL
;
61 ifp
->if_flags
&= ~(XFS_IFEXTENTS
| XFS_IFBROOT
);
62 ifp
->if_flags
|= XFS_IFINLINE
;
66 * The file is in-lined in the on-disk inode.
76 * If the size is unreasonable, then something
77 * is wrong and we just bail out rather than crash in
78 * kmem_alloc() or memcpy() below.
80 if (unlikely(size
> XFS_DFORK_SIZE(dip
, ip
->i_mount
, whichfork
))) {
82 "corrupt inode %Lu (bad size %d for local fork, size = %zd).",
83 (unsigned long long) ip
->i_ino
, size
,
84 XFS_DFORK_SIZE(dip
, ip
->i_mount
, whichfork
));
85 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
86 "xfs_iformat_local", dip
, sizeof(*dip
),
91 xfs_init_local_fork(ip
, whichfork
, XFS_DFORK_PTR(dip
, whichfork
), size
);
96 * The file consists of a set of extents all of which fit into the on-disk
101 struct xfs_inode
*ip
,
102 struct xfs_dinode
*dip
,
105 struct xfs_mount
*mp
= ip
->i_mount
;
106 struct xfs_ifork
*ifp
= XFS_IFORK_PTR(ip
, whichfork
);
107 int state
= xfs_bmap_fork_to_state(whichfork
);
108 int nex
= XFS_DFORK_NEXTENTS(dip
, whichfork
);
109 int size
= nex
* sizeof(xfs_bmbt_rec_t
);
110 struct xfs_iext_cursor icur
;
111 struct xfs_bmbt_rec
*dp
;
112 struct xfs_bmbt_irec
new;
116 * If the number of extents is unreasonable, then something is wrong and
117 * we just bail out rather than crash in kmem_alloc() or memcpy() below.
119 if (unlikely(size
< 0 || size
> XFS_DFORK_SIZE(dip
, mp
, whichfork
))) {
120 xfs_warn(ip
->i_mount
, "corrupt inode %Lu ((a)extents = %d).",
121 (unsigned long long) ip
->i_ino
, nex
);
122 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
123 "xfs_iformat_extents(1)", dip
, sizeof(*dip
),
125 return -EFSCORRUPTED
;
129 ifp
->if_u1
.if_root
= NULL
;
132 dp
= (xfs_bmbt_rec_t
*) XFS_DFORK_PTR(dip
, whichfork
);
134 xfs_iext_first(ifp
, &icur
);
135 for (i
= 0; i
< nex
; i
++, dp
++) {
138 xfs_bmbt_disk_get_all(dp
, &new);
139 fa
= xfs_bmap_validate_extent(ip
, whichfork
, &new);
141 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
142 "xfs_iformat_extents(2)",
143 dp
, sizeof(*dp
), fa
);
144 return -EFSCORRUPTED
;
147 xfs_iext_insert(ip
, &icur
, &new, state
);
148 trace_xfs_read_extent(ip
, &icur
, state
, _THIS_IP_
);
149 xfs_iext_next(ifp
, &icur
);
152 ifp
->if_flags
|= XFS_IFEXTENTS
;
157 * The file has too many extents to fit into
158 * the inode, so they are in B-tree format.
159 * Allocate a buffer for the root of the B-tree
160 * and copy the root into it. The i_extents
161 * field will remain NULL until all of the
162 * extents are read in (when they are needed).
170 struct xfs_mount
*mp
= ip
->i_mount
;
171 xfs_bmdr_block_t
*dfp
;
172 struct xfs_ifork
*ifp
;
178 ifp
= XFS_IFORK_PTR(ip
, whichfork
);
179 dfp
= (xfs_bmdr_block_t
*)XFS_DFORK_PTR(dip
, whichfork
);
180 size
= XFS_BMAP_BROOT_SPACE(mp
, dfp
);
181 nrecs
= be16_to_cpu(dfp
->bb_numrecs
);
182 level
= be16_to_cpu(dfp
->bb_level
);
185 * blow out if -- fork has less extents than can fit in
186 * fork (fork shouldn't be a btree format), root btree
187 * block has more records than can fit into the fork,
188 * or the number of extents is greater than the number of
191 if (unlikely(ifp
->if_nextents
<= XFS_IFORK_MAXEXT(ip
, whichfork
) ||
193 XFS_BMDR_SPACE_CALC(nrecs
) >
194 XFS_DFORK_SIZE(dip
, mp
, whichfork
) ||
195 ifp
->if_nextents
> ip
->i_d
.di_nblocks
) ||
196 level
== 0 || level
> XFS_BTREE_MAXLEVELS
) {
197 xfs_warn(mp
, "corrupt inode %Lu (btree).",
198 (unsigned long long) ip
->i_ino
);
199 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
200 "xfs_iformat_btree", dfp
, size
,
202 return -EFSCORRUPTED
;
205 ifp
->if_broot_bytes
= size
;
206 ifp
->if_broot
= kmem_alloc(size
, KM_NOFS
);
207 ASSERT(ifp
->if_broot
!= NULL
);
209 * Copy and convert from the on-disk structure
210 * to the in-memory structure.
212 xfs_bmdr_to_bmbt(ip
, dfp
, XFS_DFORK_SIZE(dip
, ip
->i_mount
, whichfork
),
213 ifp
->if_broot
, size
);
214 ifp
->if_flags
&= ~XFS_IFEXTENTS
;
215 ifp
->if_flags
|= XFS_IFBROOT
;
218 ifp
->if_u1
.if_root
= NULL
;
224 xfs_iformat_data_fork(
225 struct xfs_inode
*ip
,
226 struct xfs_dinode
*dip
)
228 struct inode
*inode
= VFS_I(ip
);
232 * Initialize the extent count early, as the per-format routines may
235 ip
->i_df
.if_format
= dip
->di_format
;
236 ip
->i_df
.if_nextents
= be32_to_cpu(dip
->di_nextents
);
238 switch (inode
->i_mode
& S_IFMT
) {
244 inode
->i_rdev
= xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip
));
249 switch (ip
->i_df
.if_format
) {
250 case XFS_DINODE_FMT_LOCAL
:
251 error
= xfs_iformat_local(ip
, dip
, XFS_DATA_FORK
,
252 be64_to_cpu(dip
->di_size
));
254 error
= xfs_ifork_verify_local_data(ip
);
256 case XFS_DINODE_FMT_EXTENTS
:
257 return xfs_iformat_extents(ip
, dip
, XFS_DATA_FORK
);
258 case XFS_DINODE_FMT_BTREE
:
259 return xfs_iformat_btree(ip
, dip
, XFS_DATA_FORK
);
261 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
, __func__
,
262 dip
, sizeof(*dip
), __this_address
);
263 return -EFSCORRUPTED
;
267 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
, __func__
, dip
,
268 sizeof(*dip
), __this_address
);
269 return -EFSCORRUPTED
;
274 xfs_dfork_attr_shortform_size(
275 struct xfs_dinode
*dip
)
277 struct xfs_attr_shortform
*atp
=
278 (struct xfs_attr_shortform
*)XFS_DFORK_APTR(dip
);
280 return be16_to_cpu(atp
->hdr
.totsize
);
284 xfs_iformat_attr_fork(
285 struct xfs_inode
*ip
,
286 struct xfs_dinode
*dip
)
291 * Initialize the extent count early, as the per-format routines may
294 ip
->i_afp
= kmem_cache_zalloc(xfs_ifork_zone
, GFP_NOFS
| __GFP_NOFAIL
);
295 ip
->i_afp
->if_format
= dip
->di_aformat
;
296 if (unlikely(ip
->i_afp
->if_format
== 0)) /* pre IRIX 6.2 file system */
297 ip
->i_afp
->if_format
= XFS_DINODE_FMT_EXTENTS
;
298 ip
->i_afp
->if_nextents
= be16_to_cpu(dip
->di_anextents
);
300 switch (ip
->i_afp
->if_format
) {
301 case XFS_DINODE_FMT_LOCAL
:
302 error
= xfs_iformat_local(ip
, dip
, XFS_ATTR_FORK
,
303 xfs_dfork_attr_shortform_size(dip
));
305 error
= xfs_ifork_verify_local_attr(ip
);
307 case XFS_DINODE_FMT_EXTENTS
:
308 error
= xfs_iformat_extents(ip
, dip
, XFS_ATTR_FORK
);
310 case XFS_DINODE_FMT_BTREE
:
311 error
= xfs_iformat_btree(ip
, dip
, XFS_ATTR_FORK
);
314 xfs_inode_verifier_error(ip
, error
, __func__
, dip
,
315 sizeof(*dip
), __this_address
);
316 error
= -EFSCORRUPTED
;
321 kmem_cache_free(xfs_ifork_zone
, ip
->i_afp
);
328 * Reallocate the space for if_broot based on the number of records
329 * being added or deleted as indicated in rec_diff. Move the records
330 * and pointers in if_broot to fit the new size. When shrinking this
331 * will eliminate holes between the records and pointers created by
332 * the caller. When growing this will create holes to be filled in
335 * The caller must not request to add more records than would fit in
336 * the on-disk inode root. If the if_broot is currently NULL, then
337 * if we are adding records, one will be allocated. The caller must also
338 * not request that the number of records go below zero, although
341 * ip -- the inode whose if_broot area is changing
342 * ext_diff -- the change in the number of records, positive or negative,
343 * requested for the if_broot array.
351 struct xfs_mount
*mp
= ip
->i_mount
;
353 struct xfs_ifork
*ifp
;
354 struct xfs_btree_block
*new_broot
;
361 * Handle the degenerate case quietly.
367 ifp
= XFS_IFORK_PTR(ip
, whichfork
);
370 * If there wasn't any memory allocated before, just
371 * allocate it now and get out.
373 if (ifp
->if_broot_bytes
== 0) {
374 new_size
= XFS_BMAP_BROOT_SPACE_CALC(mp
, rec_diff
);
375 ifp
->if_broot
= kmem_alloc(new_size
, KM_NOFS
);
376 ifp
->if_broot_bytes
= (int)new_size
;
381 * If there is already an existing if_broot, then we need
382 * to realloc() it and shift the pointers to their new
383 * location. The records don't change location because
384 * they are kept butted up against the btree block header.
386 cur_max
= xfs_bmbt_maxrecs(mp
, ifp
->if_broot_bytes
, 0);
387 new_max
= cur_max
+ rec_diff
;
388 new_size
= XFS_BMAP_BROOT_SPACE_CALC(mp
, new_max
);
389 ifp
->if_broot
= krealloc(ifp
->if_broot
, new_size
,
390 GFP_NOFS
| __GFP_NOFAIL
);
391 op
= (char *)XFS_BMAP_BROOT_PTR_ADDR(mp
, ifp
->if_broot
, 1,
392 ifp
->if_broot_bytes
);
393 np
= (char *)XFS_BMAP_BROOT_PTR_ADDR(mp
, ifp
->if_broot
, 1,
395 ifp
->if_broot_bytes
= (int)new_size
;
396 ASSERT(XFS_BMAP_BMDR_SPACE(ifp
->if_broot
) <=
397 XFS_IFORK_SIZE(ip
, whichfork
));
398 memmove(np
, op
, cur_max
* (uint
)sizeof(xfs_fsblock_t
));
403 * rec_diff is less than 0. In this case, we are shrinking the
404 * if_broot buffer. It must already exist. If we go to zero
405 * records, just get rid of the root and clear the status bit.
407 ASSERT((ifp
->if_broot
!= NULL
) && (ifp
->if_broot_bytes
> 0));
408 cur_max
= xfs_bmbt_maxrecs(mp
, ifp
->if_broot_bytes
, 0);
409 new_max
= cur_max
+ rec_diff
;
410 ASSERT(new_max
>= 0);
412 new_size
= XFS_BMAP_BROOT_SPACE_CALC(mp
, new_max
);
416 new_broot
= kmem_alloc(new_size
, KM_NOFS
);
418 * First copy over the btree block header.
420 memcpy(new_broot
, ifp
->if_broot
,
421 XFS_BMBT_BLOCK_LEN(ip
->i_mount
));
424 ifp
->if_flags
&= ~XFS_IFBROOT
;
428 * Only copy the records and pointers if there are any.
432 * First copy the records.
434 op
= (char *)XFS_BMBT_REC_ADDR(mp
, ifp
->if_broot
, 1);
435 np
= (char *)XFS_BMBT_REC_ADDR(mp
, new_broot
, 1);
436 memcpy(np
, op
, new_max
* (uint
)sizeof(xfs_bmbt_rec_t
));
439 * Then copy the pointers.
441 op
= (char *)XFS_BMAP_BROOT_PTR_ADDR(mp
, ifp
->if_broot
, 1,
442 ifp
->if_broot_bytes
);
443 np
= (char *)XFS_BMAP_BROOT_PTR_ADDR(mp
, new_broot
, 1,
445 memcpy(np
, op
, new_max
* (uint
)sizeof(xfs_fsblock_t
));
447 kmem_free(ifp
->if_broot
);
448 ifp
->if_broot
= new_broot
;
449 ifp
->if_broot_bytes
= (int)new_size
;
451 ASSERT(XFS_BMAP_BMDR_SPACE(ifp
->if_broot
) <=
452 XFS_IFORK_SIZE(ip
, whichfork
));
458 * This is called when the amount of space needed for if_data
459 * is increased or decreased. The change in size is indicated by
460 * the number of bytes that need to be added or deleted in the
461 * byte_diff parameter.
463 * If the amount of space needed has decreased below the size of the
464 * inline buffer, then switch to using the inline buffer. Otherwise,
465 * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
468 * ip -- the inode whose if_data area is changing
469 * byte_diff -- the change in the number of bytes, positive or negative,
470 * requested for the if_data array.
474 struct xfs_inode
*ip
,
478 struct xfs_ifork
*ifp
= XFS_IFORK_PTR(ip
, whichfork
);
479 int64_t new_size
= ifp
->if_bytes
+ byte_diff
;
481 ASSERT(new_size
>= 0);
482 ASSERT(new_size
<= XFS_IFORK_SIZE(ip
, whichfork
));
488 kmem_free(ifp
->if_u1
.if_data
);
489 ifp
->if_u1
.if_data
= NULL
;
495 * For inline data, the underlying buffer must be a multiple of 4 bytes
496 * in size so that it can be logged and stay on word boundaries.
497 * We enforce that here.
499 ifp
->if_u1
.if_data
= krealloc(ifp
->if_u1
.if_data
, roundup(new_size
, 4),
500 GFP_NOFS
| __GFP_NOFAIL
);
501 ifp
->if_bytes
= new_size
;
506 struct xfs_ifork
*ifp
)
508 if (ifp
->if_broot
!= NULL
) {
509 kmem_free(ifp
->if_broot
);
510 ifp
->if_broot
= NULL
;
514 * If the format is local, then we can't have an extents array so just
515 * look for an inline data array. If we're not local then we may or may
516 * not have an extents list, so check and free it up if we do.
518 if (ifp
->if_format
== XFS_DINODE_FMT_LOCAL
) {
519 kmem_free(ifp
->if_u1
.if_data
);
520 ifp
->if_u1
.if_data
= NULL
;
521 } else if (ifp
->if_flags
& XFS_IFEXTENTS
) {
523 xfs_iext_destroy(ifp
);
528 * Convert in-core extents to on-disk form
530 * In the case of the data fork, the in-core and on-disk fork sizes can be
531 * different due to delayed allocation extents. We only copy on-disk extents
532 * here, so callers must always use the physical fork size to determine the
533 * size of the buffer passed to this routine. We will return the size actually
538 struct xfs_inode
*ip
,
539 struct xfs_bmbt_rec
*dp
,
542 int state
= xfs_bmap_fork_to_state(whichfork
);
543 struct xfs_ifork
*ifp
= XFS_IFORK_PTR(ip
, whichfork
);
544 struct xfs_iext_cursor icur
;
545 struct xfs_bmbt_irec rec
;
548 ASSERT(xfs_isilocked(ip
, XFS_ILOCK_EXCL
| XFS_ILOCK_SHARED
));
549 ASSERT(ifp
->if_bytes
> 0);
551 for_each_xfs_iext(ifp
, &icur
, &rec
) {
552 if (isnullstartblock(rec
.br_startblock
))
554 ASSERT(xfs_bmap_validate_extent(ip
, whichfork
, &rec
) == NULL
);
555 xfs_bmbt_disk_set_all(dp
, &rec
);
556 trace_xfs_write_extent(ip
, &icur
, state
, _RET_IP_
);
557 copied
+= sizeof(struct xfs_bmbt_rec
);
562 ASSERT(copied
<= ifp
->if_bytes
);
567 * Each of the following cases stores data into the same region
568 * of the on-disk inode, so only one of them can be valid at
569 * any given time. While it is possible to have conflicting formats
570 * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
571 * in EXTENTS format, this can only happen when the fork has
572 * changed formats after being modified but before being flushed.
573 * In these cases, the format always takes precedence, because the
574 * format indicates the current state of the fork.
580 struct xfs_inode_log_item
*iip
,
584 struct xfs_ifork
*ifp
;
586 static const short brootflag
[2] =
587 { XFS_ILOG_DBROOT
, XFS_ILOG_ABROOT
};
588 static const short dataflag
[2] =
589 { XFS_ILOG_DDATA
, XFS_ILOG_ADATA
};
590 static const short extflag
[2] =
591 { XFS_ILOG_DEXT
, XFS_ILOG_AEXT
};
595 ifp
= XFS_IFORK_PTR(ip
, whichfork
);
597 * This can happen if we gave up in iformat in an error path,
598 * for the attribute fork.
601 ASSERT(whichfork
== XFS_ATTR_FORK
);
604 cp
= XFS_DFORK_PTR(dip
, whichfork
);
606 switch (ifp
->if_format
) {
607 case XFS_DINODE_FMT_LOCAL
:
608 if ((iip
->ili_fields
& dataflag
[whichfork
]) &&
609 (ifp
->if_bytes
> 0)) {
610 ASSERT(ifp
->if_u1
.if_data
!= NULL
);
611 ASSERT(ifp
->if_bytes
<= XFS_IFORK_SIZE(ip
, whichfork
));
612 memcpy(cp
, ifp
->if_u1
.if_data
, ifp
->if_bytes
);
616 case XFS_DINODE_FMT_EXTENTS
:
617 ASSERT((ifp
->if_flags
& XFS_IFEXTENTS
) ||
618 !(iip
->ili_fields
& extflag
[whichfork
]));
619 if ((iip
->ili_fields
& extflag
[whichfork
]) &&
620 (ifp
->if_bytes
> 0)) {
621 ASSERT(ifp
->if_nextents
> 0);
622 (void)xfs_iextents_copy(ip
, (xfs_bmbt_rec_t
*)cp
,
627 case XFS_DINODE_FMT_BTREE
:
628 if ((iip
->ili_fields
& brootflag
[whichfork
]) &&
629 (ifp
->if_broot_bytes
> 0)) {
630 ASSERT(ifp
->if_broot
!= NULL
);
631 ASSERT(XFS_BMAP_BMDR_SPACE(ifp
->if_broot
) <=
632 XFS_IFORK_SIZE(ip
, whichfork
));
633 xfs_bmbt_to_bmdr(mp
, ifp
->if_broot
, ifp
->if_broot_bytes
,
634 (xfs_bmdr_block_t
*)cp
,
635 XFS_DFORK_SIZE(dip
, mp
, whichfork
));
639 case XFS_DINODE_FMT_DEV
:
640 if (iip
->ili_fields
& XFS_ILOG_DEV
) {
641 ASSERT(whichfork
== XFS_DATA_FORK
);
642 xfs_dinode_put_rdev(dip
,
643 linux_to_xfs_dev_t(VFS_I(ip
)->i_rdev
));
653 /* Convert bmap state flags to an inode fork. */
655 xfs_iext_state_to_fork(
656 struct xfs_inode
*ip
,
659 if (state
& BMAP_COWFORK
)
661 else if (state
& BMAP_ATTRFORK
)
667 * Initialize an inode's copy-on-write fork.
671 struct xfs_inode
*ip
)
676 ip
->i_cowfp
= kmem_cache_zalloc(xfs_ifork_zone
,
677 GFP_NOFS
| __GFP_NOFAIL
);
678 ip
->i_cowfp
->if_flags
= XFS_IFEXTENTS
;
679 ip
->i_cowfp
->if_format
= XFS_DINODE_FMT_EXTENTS
;
682 /* Verify the inline contents of the data fork of an inode. */
684 xfs_ifork_verify_local_data(
685 struct xfs_inode
*ip
)
687 xfs_failaddr_t fa
= NULL
;
689 switch (VFS_I(ip
)->i_mode
& S_IFMT
) {
691 fa
= xfs_dir2_sf_verify(ip
);
694 fa
= xfs_symlink_shortform_verify(ip
);
701 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
, "data fork",
702 ip
->i_df
.if_u1
.if_data
, ip
->i_df
.if_bytes
, fa
);
703 return -EFSCORRUPTED
;
709 /* Verify the inline contents of the attr fork of an inode. */
711 xfs_ifork_verify_local_attr(
712 struct xfs_inode
*ip
)
714 struct xfs_ifork
*ifp
= ip
->i_afp
;
720 fa
= xfs_attr_shortform_verify(ip
);
723 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
, "attr fork",
724 ifp
? ifp
->if_u1
.if_data
: NULL
,
725 ifp
? ifp
->if_bytes
: 0, fa
);
726 return -EFSCORRUPTED
;