1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2022 Fujitsu. All Rights Reserved.
7 #include "xfs_shared.h"
8 #include "xfs_format.h"
9 #include "xfs_log_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_alloc.h"
14 #include "xfs_btree.h"
15 #include "xfs_inode.h"
16 #include "xfs_icache.h"
18 #include "xfs_rmap_btree.h"
19 #include "xfs_rtalloc.h"
20 #include "xfs_trans.h"
24 #include <linux/dax.h>
27 struct xfs_failure_info
{
28 xfs_agblock_t startblock
;
29 xfs_extlen_t blockcount
;
37 const struct xfs_rmap_irec
*rec
,
38 const struct xfs_failure_info
*notify
)
40 loff_t pos
= XFS_FSB_TO_B(mp
, rec
->rm_offset
);
42 if (notify
->startblock
> rec
->rm_startblock
)
43 pos
+= XFS_FSB_TO_B(mp
,
44 notify
->startblock
- rec
->rm_startblock
);
45 return pos
>> PAGE_SHIFT
;
51 const struct xfs_rmap_irec
*rec
,
52 const struct xfs_failure_info
*notify
)
54 xfs_agblock_t end_rec
;
55 xfs_agblock_t end_notify
;
56 xfs_agblock_t start_cross
;
57 xfs_agblock_t end_cross
;
59 start_cross
= max(rec
->rm_startblock
, notify
->startblock
);
61 end_rec
= rec
->rm_startblock
+ rec
->rm_blockcount
;
62 end_notify
= notify
->startblock
+ notify
->blockcount
;
63 end_cross
= min(end_rec
, end_notify
);
65 return XFS_FSB_TO_B(mp
, end_cross
- start_cross
) >> PAGE_SHIFT
;
70 struct xfs_btree_cur
*cur
,
71 const struct xfs_rmap_irec
*rec
,
74 struct xfs_mount
*mp
= cur
->bc_mp
;
76 struct xfs_failure_info
*notify
= data
;
77 struct address_space
*mapping
;
82 if (XFS_RMAP_NON_INODE_OWNER(rec
->rm_owner
) ||
83 (rec
->rm_flags
& (XFS_RMAP_ATTR_FORK
| XFS_RMAP_BMBT_BLOCK
))) {
84 /* Continue the query because this isn't a failure. */
85 if (notify
->mf_flags
& MF_MEM_PRE_REMOVE
)
87 notify
->want_shutdown
= true;
91 /* Get files that incore, filter out others that are not in use. */
92 error
= xfs_iget(mp
, cur
->bc_tp
, rec
->rm_owner
, XFS_IGET_INCORE
,
94 /* Continue the rmap query if the inode isn't incore */
95 if (error
== -ENODATA
)
98 notify
->want_shutdown
= true;
102 mapping
= VFS_I(ip
)->i_mapping
;
103 pgoff
= xfs_failure_pgoff(mp
, rec
, notify
);
104 pgcnt
= xfs_failure_pgcnt(mp
, rec
, notify
);
106 /* Continue the rmap query if the inode isn't a dax file. */
107 if (dax_mapping(mapping
))
108 error
= mf_dax_kill_procs(mapping
, pgoff
, pgcnt
,
111 /* Invalidate the cache in dax pages. */
112 if (notify
->mf_flags
& MF_MEM_PRE_REMOVE
)
113 invalidate_inode_pages2_range(mapping
, pgoff
,
121 xfs_dax_notify_failure_freeze(
122 struct xfs_mount
*mp
)
124 struct super_block
*sb
= mp
->m_super
;
127 error
= freeze_super(sb
, FREEZE_HOLDER_KERNEL
);
129 xfs_emerg(mp
, "already frozen by kernel, err=%d", error
);
135 xfs_dax_notify_failure_thaw(
136 struct xfs_mount
*mp
,
139 struct super_block
*sb
= mp
->m_super
;
143 error
= thaw_super(sb
, FREEZE_HOLDER_KERNEL
);
145 xfs_emerg(mp
, "still frozen after notify failure, err=%d",
150 * Also thaw userspace call anyway because the device is about to be
151 * removed immediately.
153 thaw_super(sb
, FREEZE_HOLDER_USERSPACE
);
157 xfs_dax_notify_ddev_failure(
158 struct xfs_mount
*mp
,
163 struct xfs_failure_info notify
= { .mf_flags
= mf_flags
};
164 struct xfs_trans
*tp
= NULL
;
165 struct xfs_btree_cur
*cur
= NULL
;
166 struct xfs_buf
*agf_bp
= NULL
;
168 bool kernel_frozen
= false;
169 xfs_fsblock_t fsbno
= XFS_DADDR_TO_FSB(mp
, daddr
);
170 xfs_agnumber_t agno
= XFS_FSB_TO_AGNO(mp
, fsbno
);
171 xfs_fsblock_t end_fsbno
= XFS_DADDR_TO_FSB(mp
,
173 xfs_agnumber_t end_agno
= XFS_FSB_TO_AGNO(mp
, end_fsbno
);
175 if (mf_flags
& MF_MEM_PRE_REMOVE
) {
176 xfs_info(mp
, "Device is about to be removed!");
178 * Freeze fs to prevent new mappings from being created.
179 * - Keep going on if others already hold the kernel forzen.
180 * - Keep going on if other errors too because this device is
182 * - If kernel frozen state is hold successfully here, thaw it
183 * here as well at the end.
185 kernel_frozen
= xfs_dax_notify_failure_freeze(mp
) == 0;
188 error
= xfs_trans_alloc_empty(mp
, &tp
);
192 for (; agno
<= end_agno
; agno
++) {
193 struct xfs_rmap_irec ri_low
= { };
194 struct xfs_rmap_irec ri_high
;
196 struct xfs_perag
*pag
;
197 xfs_agblock_t range_agend
;
199 pag
= xfs_perag_get(mp
, agno
);
200 error
= xfs_alloc_read_agf(pag
, tp
, 0, &agf_bp
);
206 cur
= xfs_rmapbt_init_cursor(mp
, tp
, agf_bp
, pag
);
209 * Set the rmap range from ri_low to ri_high, which represents
210 * a [start, end] where we looking for the files or metadata.
212 memset(&ri_high
, 0xFF, sizeof(ri_high
));
213 ri_low
.rm_startblock
= XFS_FSB_TO_AGBNO(mp
, fsbno
);
214 if (agno
== end_agno
)
215 ri_high
.rm_startblock
= XFS_FSB_TO_AGBNO(mp
, end_fsbno
);
217 agf
= agf_bp
->b_addr
;
218 range_agend
= min(be32_to_cpu(agf
->agf_length
) - 1,
219 ri_high
.rm_startblock
);
220 notify
.startblock
= ri_low
.rm_startblock
;
221 notify
.blockcount
= range_agend
+ 1 - ri_low
.rm_startblock
;
223 error
= xfs_rmap_query_range(cur
, &ri_low
, &ri_high
,
224 xfs_dax_failure_fn
, ¬ify
);
225 xfs_btree_del_cursor(cur
, error
);
226 xfs_trans_brelse(tp
, agf_bp
);
231 fsbno
= XFS_AGB_TO_FSB(mp
, agno
+ 1, 0);
234 xfs_trans_cancel(tp
);
237 * Shutdown fs from a force umount in pre-remove case which won't fail,
238 * so errors can be ignored. Otherwise, shutdown the filesystem with
239 * CORRUPT flag if error occured or notify.want_shutdown was set during
242 if (mf_flags
& MF_MEM_PRE_REMOVE
)
243 xfs_force_shutdown(mp
, SHUTDOWN_FORCE_UMOUNT
);
244 else if (error
|| notify
.want_shutdown
) {
245 xfs_force_shutdown(mp
, SHUTDOWN_CORRUPT_ONDISK
);
247 error
= -EFSCORRUPTED
;
251 /* Thaw the fs if it has been frozen before. */
252 if (mf_flags
& MF_MEM_PRE_REMOVE
)
253 xfs_dax_notify_failure_thaw(mp
, kernel_frozen
);
259 xfs_dax_notify_failure(
260 struct dax_device
*dax_dev
,
265 struct xfs_mount
*mp
= dax_holder(dax_dev
);
269 if (!(mp
->m_super
->s_flags
& SB_BORN
)) {
270 xfs_warn(mp
, "filesystem is not ready for notify_failure()!");
274 if (mp
->m_rtdev_targp
&& mp
->m_rtdev_targp
->bt_daxdev
== dax_dev
) {
276 "notify_failure() not supported on realtime device!");
280 if (mp
->m_logdev_targp
&& mp
->m_logdev_targp
->bt_daxdev
== dax_dev
&&
281 mp
->m_logdev_targp
!= mp
->m_ddev_targp
) {
283 * In the pre-remove case the failure notification is attempting
284 * to trigger a force unmount. The expectation is that the
285 * device is still present, but its removal is in progress and
286 * can not be cancelled, proceed with accessing the log device.
288 if (mf_flags
& MF_MEM_PRE_REMOVE
)
290 xfs_err(mp
, "ondisk log corrupt, shutting down fs!");
291 xfs_force_shutdown(mp
, SHUTDOWN_CORRUPT_ONDISK
);
292 return -EFSCORRUPTED
;
295 if (!xfs_has_rmapbt(mp
)) {
296 xfs_debug(mp
, "notify_failure() needs rmapbt enabled!");
300 ddev_start
= mp
->m_ddev_targp
->bt_dax_part_off
;
301 ddev_end
= ddev_start
+ bdev_nr_bytes(mp
->m_ddev_targp
->bt_bdev
) - 1;
303 /* Notify failure on the whole device. */
304 if (offset
== 0 && len
== U64_MAX
) {
306 len
= bdev_nr_bytes(mp
->m_ddev_targp
->bt_bdev
);
309 /* Ignore the range out of filesystem area */
310 if (offset
+ len
- 1 < ddev_start
)
312 if (offset
> ddev_end
)
315 /* Calculate the real range when it touches the boundary */
316 if (offset
> ddev_start
)
317 offset
-= ddev_start
;
319 len
-= ddev_start
- offset
;
322 if (offset
+ len
- 1 > ddev_end
)
323 len
= ddev_end
- offset
+ 1;
325 return xfs_dax_notify_ddev_failure(mp
, BTOBB(offset
), BTOBB(len
),
329 const struct dax_holder_operations xfs_dax_holder_operations
= {
330 .notify_failure
= xfs_dax_notify_failure
,