1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_inode.h"
14 #include "xfs_btree.h"
15 #include "xfs_ialloc.h"
16 #include "xfs_ialloc_btree.h"
17 #include "xfs_iwalk.h"
18 #include "xfs_itable.h"
19 #include "xfs_error.h"
20 #include "xfs_icache.h"
21 #include "xfs_health.h"
22 #include "xfs_trans.h"
28 * Use the inode walking functions to fill out struct xfs_bulkstat for every
29 * allocated inode, then pass the stat information to some externally provided
33 struct xfs_bstat_chunk
{
34 bulkstat_one_fmt_pf formatter
;
35 struct xfs_ibulk
*breq
;
36 struct xfs_bulkstat
*buf
;
40 * Fill out the bulkstat info for a single inode and report it somewhere.
42 * bc->breq->lastino is effectively the inode cursor as we walk through the
43 * filesystem. Therefore, we update it any time we need to move the cursor
44 * forward, regardless of whether or not we're sending any bstat information
45 * back to userspace. If the inode is internal metadata or, has been freed
46 * out from under us, we just simply keep going.
48 * However, if any other type of error happens we want to stop right where we
49 * are so that userspace will call back with exact number of the bad inode and
50 * we can send back an error code.
52 * Note that if the formatter tells us there's no space left in the buffer we
53 * move the cursor forward and abort the walk.
58 struct mnt_idmap
*idmap
,
61 struct xfs_bstat_chunk
*bc
)
63 struct user_namespace
*sb_userns
= mp
->m_super
->s_user_ns
;
64 struct xfs_inode
*ip
; /* incore inode pointer */
66 struct xfs_bulkstat
*buf
= bc
->buf
;
67 xfs_extnum_t nextents
;
72 if (xfs_internal_inum(mp
, ino
))
75 error
= xfs_iget(mp
, tp
, ino
,
76 (XFS_IGET_DONTCACHE
| XFS_IGET_UNTRUSTED
),
77 XFS_ILOCK_SHARED
, &ip
);
78 if (error
== -ENOENT
|| error
== -EINVAL
)
83 /* Reload the incore unlinked list to avoid failure in inodegc. */
84 if (xfs_inode_unlinked_incomplete(ip
)) {
85 error
= xfs_inode_reload_unlinked_bucket(tp
, ip
);
87 xfs_iunlock(ip
, XFS_ILOCK_SHARED
);
88 xfs_force_shutdown(mp
, SHUTDOWN_CORRUPT_INCORE
);
95 ASSERT(ip
->i_imap
.im_blkno
!= 0);
97 vfsuid
= i_uid_into_vfsuid(idmap
, inode
);
98 vfsgid
= i_gid_into_vfsgid(idmap
, inode
);
100 /* If this is a private inode, don't leak its details to userspace. */
101 if (IS_PRIVATE(inode
)) {
102 xfs_iunlock(ip
, XFS_ILOCK_SHARED
);
108 /* xfs_iget returns the following without needing
111 buf
->bs_projectid
= ip
->i_projid
;
113 buf
->bs_uid
= from_kuid(sb_userns
, vfsuid_into_kuid(vfsuid
));
114 buf
->bs_gid
= from_kgid(sb_userns
, vfsgid_into_kgid(vfsgid
));
115 buf
->bs_size
= ip
->i_disk_size
;
117 buf
->bs_nlink
= inode
->i_nlink
;
118 buf
->bs_atime
= inode_get_atime_sec(inode
);
119 buf
->bs_atime_nsec
= inode_get_atime_nsec(inode
);
120 buf
->bs_mtime
= inode_get_mtime_sec(inode
);
121 buf
->bs_mtime_nsec
= inode_get_mtime_nsec(inode
);
122 buf
->bs_ctime
= inode_get_ctime_sec(inode
);
123 buf
->bs_ctime_nsec
= inode_get_ctime_nsec(inode
);
124 buf
->bs_gen
= inode
->i_generation
;
125 buf
->bs_mode
= inode
->i_mode
;
127 buf
->bs_xflags
= xfs_ip2xflags(ip
);
128 buf
->bs_extsize_blks
= ip
->i_extsize
;
130 nextents
= xfs_ifork_nextents(&ip
->i_df
);
131 if (!(bc
->breq
->flags
& XFS_IBULK_NREXT64
))
132 buf
->bs_extents
= min(nextents
, XFS_MAX_EXTCNT_DATA_FORK_SMALL
);
134 buf
->bs_extents64
= nextents
;
136 xfs_bulkstat_health(ip
, buf
);
137 buf
->bs_aextents
= xfs_ifork_nextents(&ip
->i_af
);
138 buf
->bs_forkoff
= xfs_inode_fork_boff(ip
);
139 buf
->bs_version
= XFS_BULKSTAT_VERSION_V5
;
141 if (xfs_has_v3inodes(mp
)) {
142 buf
->bs_btime
= ip
->i_crtime
.tv_sec
;
143 buf
->bs_btime_nsec
= ip
->i_crtime
.tv_nsec
;
144 if (ip
->i_diflags2
& XFS_DIFLAG2_COWEXTSIZE
)
145 buf
->bs_cowextsize_blks
= ip
->i_cowextsize
;
148 switch (ip
->i_df
.if_format
) {
149 case XFS_DINODE_FMT_DEV
:
150 buf
->bs_rdev
= sysv_encode_dev(inode
->i_rdev
);
151 buf
->bs_blksize
= BLKDEV_IOSIZE
;
154 case XFS_DINODE_FMT_LOCAL
:
156 buf
->bs_blksize
= mp
->m_sb
.sb_blocksize
;
159 case XFS_DINODE_FMT_EXTENTS
:
160 case XFS_DINODE_FMT_BTREE
:
162 buf
->bs_blksize
= mp
->m_sb
.sb_blocksize
;
163 buf
->bs_blocks
= ip
->i_nblocks
+ ip
->i_delayed_blks
;
166 xfs_iunlock(ip
, XFS_ILOCK_SHARED
);
169 error
= bc
->formatter(bc
->breq
, buf
);
170 if (error
== -ECANCELED
)
177 * Advance the cursor to the inode that comes after the one we just
178 * looked at. We want the caller to move along if the bulkstat
179 * information was copied successfully; if we tried to grab the inode
180 * but it's no longer allocated; or if it's internal metadata.
182 bc
->breq
->startino
= ino
+ 1;
187 /* Bulkstat a single inode. */
190 struct xfs_ibulk
*breq
,
191 bulkstat_one_fmt_pf formatter
)
193 struct xfs_bstat_chunk bc
= {
194 .formatter
= formatter
,
197 struct xfs_trans
*tp
;
200 if (breq
->idmap
!= &nop_mnt_idmap
) {
201 xfs_warn_ratelimited(breq
->mp
,
202 "bulkstat not supported inside of idmapped mounts.");
206 ASSERT(breq
->icount
== 1);
208 bc
.buf
= kzalloc(sizeof(struct xfs_bulkstat
),
209 GFP_KERNEL
| __GFP_RETRY_MAYFAIL
);
214 * Grab an empty transaction so that we can use its recursive buffer
215 * locking abilities to detect cycles in the inobt without deadlocking.
217 error
= xfs_trans_alloc_empty(breq
->mp
, &tp
);
221 error
= xfs_bulkstat_one_int(breq
->mp
, breq
->idmap
, tp
,
222 breq
->startino
, &bc
);
223 xfs_trans_cancel(tp
);
228 * If we reported one inode to userspace then we abort because we hit
229 * the end of the buffer. Don't leak that back to userspace.
231 if (error
== -ECANCELED
)
239 struct xfs_mount
*mp
,
240 struct xfs_trans
*tp
,
244 struct xfs_bstat_chunk
*bc
= data
;
247 error
= xfs_bulkstat_one_int(mp
, bc
->breq
->idmap
, tp
, ino
, data
);
248 /* bulkstat just skips over missing inodes */
249 if (error
== -ENOENT
|| error
== -EINVAL
)
255 * Check the incoming lastino parameter.
257 * We allow any inode value that could map to physical space inside the
258 * filesystem because if there are no inodes there, bulkstat moves on to the
259 * next chunk. In other words, the magic agino value of zero takes us to the
260 * first chunk in the AG, and an agino value past the end of the AG takes us to
261 * the first chunk in the next AG.
263 * Therefore we can end early if the requested inode is beyond the end of the
264 * filesystem or doesn't map properly.
267 xfs_bulkstat_already_done(
268 struct xfs_mount
*mp
,
271 xfs_agnumber_t agno
= XFS_INO_TO_AGNO(mp
, startino
);
272 xfs_agino_t agino
= XFS_INO_TO_AGINO(mp
, startino
);
274 return agno
>= mp
->m_sb
.sb_agcount
||
275 startino
!= XFS_AGINO_TO_INO(mp
, agno
, agino
);
278 /* Return stat information in bulk (by-inode) for the filesystem. */
281 struct xfs_ibulk
*breq
,
282 bulkstat_one_fmt_pf formatter
)
284 struct xfs_bstat_chunk bc
= {
285 .formatter
= formatter
,
288 struct xfs_trans
*tp
;
289 unsigned int iwalk_flags
= 0;
292 if (breq
->idmap
!= &nop_mnt_idmap
) {
293 xfs_warn_ratelimited(breq
->mp
,
294 "bulkstat not supported inside of idmapped mounts.");
297 if (xfs_bulkstat_already_done(breq
->mp
, breq
->startino
))
300 bc
.buf
= kzalloc(sizeof(struct xfs_bulkstat
),
301 GFP_KERNEL
| __GFP_RETRY_MAYFAIL
);
306 * Grab an empty transaction so that we can use its recursive buffer
307 * locking abilities to detect cycles in the inobt without deadlocking.
309 error
= xfs_trans_alloc_empty(breq
->mp
, &tp
);
313 if (breq
->flags
& XFS_IBULK_SAME_AG
)
314 iwalk_flags
|= XFS_IWALK_SAME_AG
;
316 error
= xfs_iwalk(breq
->mp
, tp
, breq
->startino
, iwalk_flags
,
317 xfs_bulkstat_iwalk
, breq
->icount
, &bc
);
318 xfs_trans_cancel(tp
);
323 * We found some inodes, so clear the error status and return them.
324 * The lastino pointer will point directly at the inode that triggered
325 * any error that occurred, so on the next call the error will be
326 * triggered again and propagated to userspace as there will be no
327 * formatted inodes in the buffer.
329 if (breq
->ocount
> 0)
335 /* Convert bulkstat (v5) to bstat (v1). */
337 xfs_bulkstat_to_bstat(
338 struct xfs_mount
*mp
,
339 struct xfs_bstat
*bs1
,
340 const struct xfs_bulkstat
*bstat
)
342 /* memset is needed here because of padding holes in the structure. */
343 memset(bs1
, 0, sizeof(struct xfs_bstat
));
344 bs1
->bs_ino
= bstat
->bs_ino
;
345 bs1
->bs_mode
= bstat
->bs_mode
;
346 bs1
->bs_nlink
= bstat
->bs_nlink
;
347 bs1
->bs_uid
= bstat
->bs_uid
;
348 bs1
->bs_gid
= bstat
->bs_gid
;
349 bs1
->bs_rdev
= bstat
->bs_rdev
;
350 bs1
->bs_blksize
= bstat
->bs_blksize
;
351 bs1
->bs_size
= bstat
->bs_size
;
352 bs1
->bs_atime
.tv_sec
= bstat
->bs_atime
;
353 bs1
->bs_mtime
.tv_sec
= bstat
->bs_mtime
;
354 bs1
->bs_ctime
.tv_sec
= bstat
->bs_ctime
;
355 bs1
->bs_atime
.tv_nsec
= bstat
->bs_atime_nsec
;
356 bs1
->bs_mtime
.tv_nsec
= bstat
->bs_mtime_nsec
;
357 bs1
->bs_ctime
.tv_nsec
= bstat
->bs_ctime_nsec
;
358 bs1
->bs_blocks
= bstat
->bs_blocks
;
359 bs1
->bs_xflags
= bstat
->bs_xflags
;
360 bs1
->bs_extsize
= XFS_FSB_TO_B(mp
, bstat
->bs_extsize_blks
);
361 bs1
->bs_extents
= bstat
->bs_extents
;
362 bs1
->bs_gen
= bstat
->bs_gen
;
363 bs1
->bs_projid_lo
= bstat
->bs_projectid
& 0xFFFF;
364 bs1
->bs_forkoff
= bstat
->bs_forkoff
;
365 bs1
->bs_projid_hi
= bstat
->bs_projectid
>> 16;
366 bs1
->bs_sick
= bstat
->bs_sick
;
367 bs1
->bs_checked
= bstat
->bs_checked
;
368 bs1
->bs_cowextsize
= XFS_FSB_TO_B(mp
, bstat
->bs_cowextsize_blks
);
369 bs1
->bs_dmevmask
= 0;
371 bs1
->bs_aextents
= bstat
->bs_aextents
;
374 struct xfs_inumbers_chunk
{
375 inumbers_fmt_pf formatter
;
376 struct xfs_ibulk
*breq
;
382 * This is how we export inode btree records to userspace, so that XFS tools
383 * can figure out where inodes are allocated.
387 * Format the inode group structure and report it somewhere.
389 * Similar to xfs_bulkstat_one_int, lastino is the inode cursor as we walk
390 * through the filesystem so we move it forward unless there was a runtime
391 * error. If the formatter tells us the buffer is now full we also move the
392 * cursor forward and abort the walk.
396 struct xfs_mount
*mp
,
397 struct xfs_trans
*tp
,
399 const struct xfs_inobt_rec_incore
*irec
,
402 struct xfs_inumbers inogrp
= {
403 .xi_startino
= XFS_AGINO_TO_INO(mp
, agno
, irec
->ir_startino
),
404 .xi_alloccount
= irec
->ir_count
- irec
->ir_freecount
,
405 .xi_allocmask
= ~irec
->ir_free
,
406 .xi_version
= XFS_INUMBERS_VERSION_V5
,
408 struct xfs_inumbers_chunk
*ic
= data
;
411 error
= ic
->formatter(ic
->breq
, &inogrp
);
412 if (error
&& error
!= -ECANCELED
)
415 ic
->breq
->startino
= XFS_AGINO_TO_INO(mp
, agno
, irec
->ir_startino
) +
416 XFS_INODES_PER_CHUNK
;
421 * Return inode number table for the filesystem.
425 struct xfs_ibulk
*breq
,
426 inumbers_fmt_pf formatter
)
428 struct xfs_inumbers_chunk ic
= {
429 .formatter
= formatter
,
432 struct xfs_trans
*tp
;
435 if (xfs_bulkstat_already_done(breq
->mp
, breq
->startino
))
439 * Grab an empty transaction so that we can use its recursive buffer
440 * locking abilities to detect cycles in the inobt without deadlocking.
442 error
= xfs_trans_alloc_empty(breq
->mp
, &tp
);
446 error
= xfs_inobt_walk(breq
->mp
, tp
, breq
->startino
, breq
->flags
,
447 xfs_inumbers_walk
, breq
->icount
, &ic
);
448 xfs_trans_cancel(tp
);
452 * We found some inode groups, so clear the error status and return
453 * them. The lastino pointer will point directly at the inode that
454 * triggered any error that occurred, so on the next call the error
455 * will be triggered again and propagated to userspace as there will be
456 * no formatted inode groups in the buffer.
458 if (breq
->ocount
> 0)
464 /* Convert an inumbers (v5) struct to a inogrp (v1) struct. */
466 xfs_inumbers_to_inogrp(
467 struct xfs_inogrp
*ig1
,
468 const struct xfs_inumbers
*ig
)
470 /* memset is needed here because of padding holes in the structure. */
471 memset(ig1
, 0, sizeof(struct xfs_inogrp
));
472 ig1
->xi_startino
= ig
->xi_startino
;
473 ig1
->xi_alloccount
= ig
->xi_alloccount
;
474 ig1
->xi_allocmask
= ig
->xi_allocmask
;