netfilter: nft_set_rbtree: fix panic when destroying set by GC
[linux/fpc-iii.git] / fs / xfs / xfs_inode_item.c
blob2389c34c172dda0ab4333e0461d3727296509a1e
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_format.h"
9 #include "xfs_log_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_inode.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode_item.h"
15 #include "xfs_error.h"
16 #include "xfs_trace.h"
17 #include "xfs_trans_priv.h"
18 #include "xfs_buf_item.h"
19 #include "xfs_log.h"
21 #include <linux/iversion.h>
23 kmem_zone_t *xfs_ili_zone; /* inode log item zone */
25 static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)
27 return container_of(lip, struct xfs_inode_log_item, ili_item);
30 STATIC void
31 xfs_inode_item_data_fork_size(
32 struct xfs_inode_log_item *iip,
33 int *nvecs,
34 int *nbytes)
36 struct xfs_inode *ip = iip->ili_inode;
38 switch (ip->i_d.di_format) {
39 case XFS_DINODE_FMT_EXTENTS:
40 if ((iip->ili_fields & XFS_ILOG_DEXT) &&
41 ip->i_d.di_nextents > 0 &&
42 ip->i_df.if_bytes > 0) {
43 /* worst case, doesn't subtract delalloc extents */
44 *nbytes += XFS_IFORK_DSIZE(ip);
45 *nvecs += 1;
47 break;
48 case XFS_DINODE_FMT_BTREE:
49 if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
50 ip->i_df.if_broot_bytes > 0) {
51 *nbytes += ip->i_df.if_broot_bytes;
52 *nvecs += 1;
54 break;
55 case XFS_DINODE_FMT_LOCAL:
56 if ((iip->ili_fields & XFS_ILOG_DDATA) &&
57 ip->i_df.if_bytes > 0) {
58 *nbytes += roundup(ip->i_df.if_bytes, 4);
59 *nvecs += 1;
61 break;
63 case XFS_DINODE_FMT_DEV:
64 break;
65 default:
66 ASSERT(0);
67 break;
71 STATIC void
72 xfs_inode_item_attr_fork_size(
73 struct xfs_inode_log_item *iip,
74 int *nvecs,
75 int *nbytes)
77 struct xfs_inode *ip = iip->ili_inode;
79 switch (ip->i_d.di_aformat) {
80 case XFS_DINODE_FMT_EXTENTS:
81 if ((iip->ili_fields & XFS_ILOG_AEXT) &&
82 ip->i_d.di_anextents > 0 &&
83 ip->i_afp->if_bytes > 0) {
84 /* worst case, doesn't subtract unused space */
85 *nbytes += XFS_IFORK_ASIZE(ip);
86 *nvecs += 1;
88 break;
89 case XFS_DINODE_FMT_BTREE:
90 if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
91 ip->i_afp->if_broot_bytes > 0) {
92 *nbytes += ip->i_afp->if_broot_bytes;
93 *nvecs += 1;
95 break;
96 case XFS_DINODE_FMT_LOCAL:
97 if ((iip->ili_fields & XFS_ILOG_ADATA) &&
98 ip->i_afp->if_bytes > 0) {
99 *nbytes += roundup(ip->i_afp->if_bytes, 4);
100 *nvecs += 1;
102 break;
103 default:
104 ASSERT(0);
105 break;
110 * This returns the number of iovecs needed to log the given inode item.
112 * We need one iovec for the inode log format structure, one for the
113 * inode core, and possibly one for the inode data/extents/b-tree root
114 * and one for the inode attribute data/extents/b-tree root.
116 STATIC void
117 xfs_inode_item_size(
118 struct xfs_log_item *lip,
119 int *nvecs,
120 int *nbytes)
122 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
123 struct xfs_inode *ip = iip->ili_inode;
125 *nvecs += 2;
126 *nbytes += sizeof(struct xfs_inode_log_format) +
127 xfs_log_dinode_size(ip->i_d.di_version);
129 xfs_inode_item_data_fork_size(iip, nvecs, nbytes);
130 if (XFS_IFORK_Q(ip))
131 xfs_inode_item_attr_fork_size(iip, nvecs, nbytes);
134 STATIC void
135 xfs_inode_item_format_data_fork(
136 struct xfs_inode_log_item *iip,
137 struct xfs_inode_log_format *ilf,
138 struct xfs_log_vec *lv,
139 struct xfs_log_iovec **vecp)
141 struct xfs_inode *ip = iip->ili_inode;
142 size_t data_bytes;
144 switch (ip->i_d.di_format) {
145 case XFS_DINODE_FMT_EXTENTS:
146 iip->ili_fields &=
147 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEV);
149 if ((iip->ili_fields & XFS_ILOG_DEXT) &&
150 ip->i_d.di_nextents > 0 &&
151 ip->i_df.if_bytes > 0) {
152 struct xfs_bmbt_rec *p;
154 ASSERT(xfs_iext_count(&ip->i_df) > 0);
156 p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT);
157 data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK);
158 xlog_finish_iovec(lv, *vecp, data_bytes);
160 ASSERT(data_bytes <= ip->i_df.if_bytes);
162 ilf->ilf_dsize = data_bytes;
163 ilf->ilf_size++;
164 } else {
165 iip->ili_fields &= ~XFS_ILOG_DEXT;
167 break;
168 case XFS_DINODE_FMT_BTREE:
169 iip->ili_fields &=
170 ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | XFS_ILOG_DEV);
172 if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
173 ip->i_df.if_broot_bytes > 0) {
174 ASSERT(ip->i_df.if_broot != NULL);
175 xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IBROOT,
176 ip->i_df.if_broot,
177 ip->i_df.if_broot_bytes);
178 ilf->ilf_dsize = ip->i_df.if_broot_bytes;
179 ilf->ilf_size++;
180 } else {
181 ASSERT(!(iip->ili_fields &
182 XFS_ILOG_DBROOT));
183 iip->ili_fields &= ~XFS_ILOG_DBROOT;
185 break;
186 case XFS_DINODE_FMT_LOCAL:
187 iip->ili_fields &=
188 ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | XFS_ILOG_DEV);
189 if ((iip->ili_fields & XFS_ILOG_DDATA) &&
190 ip->i_df.if_bytes > 0) {
192 * Round i_bytes up to a word boundary.
193 * The underlying memory is guaranteed to
194 * to be there by xfs_idata_realloc().
196 data_bytes = roundup(ip->i_df.if_bytes, 4);
197 ASSERT(ip->i_df.if_real_bytes == 0 ||
198 ip->i_df.if_real_bytes >= data_bytes);
199 ASSERT(ip->i_df.if_u1.if_data != NULL);
200 ASSERT(ip->i_d.di_size > 0);
201 xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL,
202 ip->i_df.if_u1.if_data, data_bytes);
203 ilf->ilf_dsize = (unsigned)data_bytes;
204 ilf->ilf_size++;
205 } else {
206 iip->ili_fields &= ~XFS_ILOG_DDATA;
208 break;
209 case XFS_DINODE_FMT_DEV:
210 iip->ili_fields &=
211 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEXT);
212 if (iip->ili_fields & XFS_ILOG_DEV)
213 ilf->ilf_u.ilfu_rdev = sysv_encode_dev(VFS_I(ip)->i_rdev);
214 break;
215 default:
216 ASSERT(0);
217 break;
221 STATIC void
222 xfs_inode_item_format_attr_fork(
223 struct xfs_inode_log_item *iip,
224 struct xfs_inode_log_format *ilf,
225 struct xfs_log_vec *lv,
226 struct xfs_log_iovec **vecp)
228 struct xfs_inode *ip = iip->ili_inode;
229 size_t data_bytes;
231 switch (ip->i_d.di_aformat) {
232 case XFS_DINODE_FMT_EXTENTS:
233 iip->ili_fields &=
234 ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT);
236 if ((iip->ili_fields & XFS_ILOG_AEXT) &&
237 ip->i_d.di_anextents > 0 &&
238 ip->i_afp->if_bytes > 0) {
239 struct xfs_bmbt_rec *p;
241 ASSERT(xfs_iext_count(ip->i_afp) ==
242 ip->i_d.di_anextents);
244 p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT);
245 data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK);
246 xlog_finish_iovec(lv, *vecp, data_bytes);
248 ilf->ilf_asize = data_bytes;
249 ilf->ilf_size++;
250 } else {
251 iip->ili_fields &= ~XFS_ILOG_AEXT;
253 break;
254 case XFS_DINODE_FMT_BTREE:
255 iip->ili_fields &=
256 ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT);
258 if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
259 ip->i_afp->if_broot_bytes > 0) {
260 ASSERT(ip->i_afp->if_broot != NULL);
262 xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_BROOT,
263 ip->i_afp->if_broot,
264 ip->i_afp->if_broot_bytes);
265 ilf->ilf_asize = ip->i_afp->if_broot_bytes;
266 ilf->ilf_size++;
267 } else {
268 iip->ili_fields &= ~XFS_ILOG_ABROOT;
270 break;
271 case XFS_DINODE_FMT_LOCAL:
272 iip->ili_fields &=
273 ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);
275 if ((iip->ili_fields & XFS_ILOG_ADATA) &&
276 ip->i_afp->if_bytes > 0) {
278 * Round i_bytes up to a word boundary.
279 * The underlying memory is guaranteed to
280 * to be there by xfs_idata_realloc().
282 data_bytes = roundup(ip->i_afp->if_bytes, 4);
283 ASSERT(ip->i_afp->if_real_bytes == 0 ||
284 ip->i_afp->if_real_bytes >= data_bytes);
285 ASSERT(ip->i_afp->if_u1.if_data != NULL);
286 xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL,
287 ip->i_afp->if_u1.if_data,
288 data_bytes);
289 ilf->ilf_asize = (unsigned)data_bytes;
290 ilf->ilf_size++;
291 } else {
292 iip->ili_fields &= ~XFS_ILOG_ADATA;
294 break;
295 default:
296 ASSERT(0);
297 break;
301 static void
302 xfs_inode_to_log_dinode(
303 struct xfs_inode *ip,
304 struct xfs_log_dinode *to,
305 xfs_lsn_t lsn)
307 struct xfs_icdinode *from = &ip->i_d;
308 struct inode *inode = VFS_I(ip);
310 to->di_magic = XFS_DINODE_MAGIC;
312 to->di_version = from->di_version;
313 to->di_format = from->di_format;
314 to->di_uid = from->di_uid;
315 to->di_gid = from->di_gid;
316 to->di_projid_lo = from->di_projid_lo;
317 to->di_projid_hi = from->di_projid_hi;
319 memset(to->di_pad, 0, sizeof(to->di_pad));
320 memset(to->di_pad3, 0, sizeof(to->di_pad3));
321 to->di_atime.t_sec = inode->i_atime.tv_sec;
322 to->di_atime.t_nsec = inode->i_atime.tv_nsec;
323 to->di_mtime.t_sec = inode->i_mtime.tv_sec;
324 to->di_mtime.t_nsec = inode->i_mtime.tv_nsec;
325 to->di_ctime.t_sec = inode->i_ctime.tv_sec;
326 to->di_ctime.t_nsec = inode->i_ctime.tv_nsec;
327 to->di_nlink = inode->i_nlink;
328 to->di_gen = inode->i_generation;
329 to->di_mode = inode->i_mode;
331 to->di_size = from->di_size;
332 to->di_nblocks = from->di_nblocks;
333 to->di_extsize = from->di_extsize;
334 to->di_nextents = from->di_nextents;
335 to->di_anextents = from->di_anextents;
336 to->di_forkoff = from->di_forkoff;
337 to->di_aformat = from->di_aformat;
338 to->di_dmevmask = from->di_dmevmask;
339 to->di_dmstate = from->di_dmstate;
340 to->di_flags = from->di_flags;
342 /* log a dummy value to ensure log structure is fully initialised */
343 to->di_next_unlinked = NULLAGINO;
345 if (from->di_version == 3) {
346 to->di_changecount = inode_peek_iversion(inode);
347 to->di_crtime.t_sec = from->di_crtime.t_sec;
348 to->di_crtime.t_nsec = from->di_crtime.t_nsec;
349 to->di_flags2 = from->di_flags2;
350 to->di_cowextsize = from->di_cowextsize;
351 to->di_ino = ip->i_ino;
352 to->di_lsn = lsn;
353 memset(to->di_pad2, 0, sizeof(to->di_pad2));
354 uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
355 to->di_flushiter = 0;
356 } else {
357 to->di_flushiter = from->di_flushiter;
362 * Format the inode core. Current timestamp data is only in the VFS inode
363 * fields, so we need to grab them from there. Hence rather than just copying
364 * the XFS inode core structure, format the fields directly into the iovec.
366 static void
367 xfs_inode_item_format_core(
368 struct xfs_inode *ip,
369 struct xfs_log_vec *lv,
370 struct xfs_log_iovec **vecp)
372 struct xfs_log_dinode *dic;
374 dic = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_ICORE);
375 xfs_inode_to_log_dinode(ip, dic, ip->i_itemp->ili_item.li_lsn);
376 xlog_finish_iovec(lv, *vecp, xfs_log_dinode_size(ip->i_d.di_version));
380 * This is called to fill in the vector of log iovecs for the given inode
381 * log item. It fills the first item with an inode log format structure,
382 * the second with the on-disk inode structure, and a possible third and/or
383 * fourth with the inode data/extents/b-tree root and inode attributes
384 * data/extents/b-tree root.
386 * Note: Always use the 64 bit inode log format structure so we don't
387 * leave an uninitialised hole in the format item on 64 bit systems. Log
388 * recovery on 32 bit systems handles this just fine, so there's no reason
389 * for not using an initialising the properly padded structure all the time.
391 STATIC void
392 xfs_inode_item_format(
393 struct xfs_log_item *lip,
394 struct xfs_log_vec *lv)
396 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
397 struct xfs_inode *ip = iip->ili_inode;
398 struct xfs_log_iovec *vecp = NULL;
399 struct xfs_inode_log_format *ilf;
401 ASSERT(ip->i_d.di_version > 1);
403 ilf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT);
404 ilf->ilf_type = XFS_LI_INODE;
405 ilf->ilf_ino = ip->i_ino;
406 ilf->ilf_blkno = ip->i_imap.im_blkno;
407 ilf->ilf_len = ip->i_imap.im_len;
408 ilf->ilf_boffset = ip->i_imap.im_boffset;
409 ilf->ilf_fields = XFS_ILOG_CORE;
410 ilf->ilf_size = 2; /* format + core */
413 * make sure we don't leak uninitialised data into the log in the case
414 * when we don't log every field in the inode.
416 ilf->ilf_dsize = 0;
417 ilf->ilf_asize = 0;
418 ilf->ilf_pad = 0;
419 memset(&ilf->ilf_u, 0, sizeof(ilf->ilf_u));
421 xlog_finish_iovec(lv, vecp, sizeof(*ilf));
423 xfs_inode_item_format_core(ip, lv, &vecp);
424 xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp);
425 if (XFS_IFORK_Q(ip)) {
426 xfs_inode_item_format_attr_fork(iip, ilf, lv, &vecp);
427 } else {
428 iip->ili_fields &=
429 ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
432 /* update the format with the exact fields we actually logged */
433 ilf->ilf_fields |= (iip->ili_fields & ~XFS_ILOG_TIMESTAMP);
437 * This is called to pin the inode associated with the inode log
438 * item in memory so it cannot be written out.
440 STATIC void
441 xfs_inode_item_pin(
442 struct xfs_log_item *lip)
444 struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode;
446 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
448 trace_xfs_inode_pin(ip, _RET_IP_);
449 atomic_inc(&ip->i_pincount);
454 * This is called to unpin the inode associated with the inode log
455 * item which was previously pinned with a call to xfs_inode_item_pin().
457 * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0.
459 STATIC void
460 xfs_inode_item_unpin(
461 struct xfs_log_item *lip,
462 int remove)
464 struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode;
466 trace_xfs_inode_unpin(ip, _RET_IP_);
467 ASSERT(atomic_read(&ip->i_pincount) > 0);
468 if (atomic_dec_and_test(&ip->i_pincount))
469 wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
473 * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer
474 * have been failed during writeback
476 * This informs the AIL that the inode is already flush locked on the next push,
477 * and acquires a hold on the buffer to ensure that it isn't reclaimed before
478 * dirty data makes it to disk.
480 STATIC void
481 xfs_inode_item_error(
482 struct xfs_log_item *lip,
483 struct xfs_buf *bp)
485 ASSERT(xfs_isiflocked(INODE_ITEM(lip)->ili_inode));
486 xfs_set_li_failed(lip, bp);
489 STATIC uint
490 xfs_inode_item_push(
491 struct xfs_log_item *lip,
492 struct list_head *buffer_list)
493 __releases(&lip->li_ailp->ail_lock)
494 __acquires(&lip->li_ailp->ail_lock)
496 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
497 struct xfs_inode *ip = iip->ili_inode;
498 struct xfs_buf *bp = lip->li_buf;
499 uint rval = XFS_ITEM_SUCCESS;
500 int error;
502 if (xfs_ipincount(ip) > 0)
503 return XFS_ITEM_PINNED;
506 * The buffer containing this item failed to be written back
507 * previously. Resubmit the buffer for IO.
509 if (test_bit(XFS_LI_FAILED, &lip->li_flags)) {
510 if (!xfs_buf_trylock(bp))
511 return XFS_ITEM_LOCKED;
513 if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list))
514 rval = XFS_ITEM_FLUSHING;
516 xfs_buf_unlock(bp);
517 return rval;
520 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
521 return XFS_ITEM_LOCKED;
524 * Re-check the pincount now that we stabilized the value by
525 * taking the ilock.
527 if (xfs_ipincount(ip) > 0) {
528 rval = XFS_ITEM_PINNED;
529 goto out_unlock;
533 * Stale inode items should force out the iclog.
535 if (ip->i_flags & XFS_ISTALE) {
536 rval = XFS_ITEM_PINNED;
537 goto out_unlock;
541 * Someone else is already flushing the inode. Nothing we can do
542 * here but wait for the flush to finish and remove the item from
543 * the AIL.
545 if (!xfs_iflock_nowait(ip)) {
546 rval = XFS_ITEM_FLUSHING;
547 goto out_unlock;
550 ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
551 ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
553 spin_unlock(&lip->li_ailp->ail_lock);
555 error = xfs_iflush(ip, &bp);
556 if (!error) {
557 if (!xfs_buf_delwri_queue(bp, buffer_list))
558 rval = XFS_ITEM_FLUSHING;
559 xfs_buf_relse(bp);
562 spin_lock(&lip->li_ailp->ail_lock);
563 out_unlock:
564 xfs_iunlock(ip, XFS_ILOCK_SHARED);
565 return rval;
569 * Unlock the inode associated with the inode log item.
571 STATIC void
572 xfs_inode_item_unlock(
573 struct xfs_log_item *lip)
575 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
576 struct xfs_inode *ip = iip->ili_inode;
577 unsigned short lock_flags;
579 ASSERT(ip->i_itemp != NULL);
580 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
582 lock_flags = iip->ili_lock_flags;
583 iip->ili_lock_flags = 0;
584 if (lock_flags)
585 xfs_iunlock(ip, lock_flags);
589 * This is called to find out where the oldest active copy of the inode log
590 * item in the on disk log resides now that the last log write of it completed
591 * at the given lsn. Since we always re-log all dirty data in an inode, the
592 * latest copy in the on disk log is the only one that matters. Therefore,
593 * simply return the given lsn.
595 * If the inode has been marked stale because the cluster is being freed, we
596 * don't want to (re-)insert this inode into the AIL. There is a race condition
597 * where the cluster buffer may be unpinned before the inode is inserted into
598 * the AIL during transaction committed processing. If the buffer is unpinned
599 * before the inode item has been committed and inserted, then it is possible
600 * for the buffer to be written and IO completes before the inode is inserted
601 * into the AIL. In that case, we'd be inserting a clean, stale inode into the
602 * AIL which will never get removed. It will, however, get reclaimed which
603 * triggers an assert in xfs_inode_free() complaining about freein an inode
604 * still in the AIL.
606 * To avoid this, just unpin the inode directly and return a LSN of -1 so the
607 * transaction committed code knows that it does not need to do any further
608 * processing on the item.
610 STATIC xfs_lsn_t
611 xfs_inode_item_committed(
612 struct xfs_log_item *lip,
613 xfs_lsn_t lsn)
615 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
616 struct xfs_inode *ip = iip->ili_inode;
618 if (xfs_iflags_test(ip, XFS_ISTALE)) {
619 xfs_inode_item_unpin(lip, 0);
620 return -1;
622 return lsn;
625 STATIC void
626 xfs_inode_item_committing(
627 struct xfs_log_item *lip,
628 xfs_lsn_t lsn)
630 INODE_ITEM(lip)->ili_last_lsn = lsn;
634 * This is the ops vector shared by all buf log items.
636 static const struct xfs_item_ops xfs_inode_item_ops = {
637 .iop_size = xfs_inode_item_size,
638 .iop_format = xfs_inode_item_format,
639 .iop_pin = xfs_inode_item_pin,
640 .iop_unpin = xfs_inode_item_unpin,
641 .iop_unlock = xfs_inode_item_unlock,
642 .iop_committed = xfs_inode_item_committed,
643 .iop_push = xfs_inode_item_push,
644 .iop_committing = xfs_inode_item_committing,
645 .iop_error = xfs_inode_item_error
650 * Initialize the inode log item for a newly allocated (in-core) inode.
652 void
653 xfs_inode_item_init(
654 struct xfs_inode *ip,
655 struct xfs_mount *mp)
657 struct xfs_inode_log_item *iip;
659 ASSERT(ip->i_itemp == NULL);
660 iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP);
662 iip->ili_inode = ip;
663 xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE,
664 &xfs_inode_item_ops);
668 * Free the inode log item and any memory hanging off of it.
670 void
671 xfs_inode_item_destroy(
672 xfs_inode_t *ip)
674 kmem_free(ip->i_itemp->ili_item.li_lv_shadow);
675 kmem_zone_free(xfs_ili_zone, ip->i_itemp);
680 * This is the inode flushing I/O completion routine. It is called
681 * from interrupt level when the buffer containing the inode is
682 * flushed to disk. It is responsible for removing the inode item
683 * from the AIL if it has not been re-logged, and unlocking the inode's
684 * flush lock.
686 * To reduce AIL lock traffic as much as possible, we scan the buffer log item
687 * list for other inodes that will run this function. We remove them from the
688 * buffer list so we can process all the inode IO completions in one AIL lock
689 * traversal.
691 void
692 xfs_iflush_done(
693 struct xfs_buf *bp,
694 struct xfs_log_item *lip)
696 struct xfs_inode_log_item *iip;
697 struct xfs_log_item *blip, *n;
698 struct xfs_ail *ailp = lip->li_ailp;
699 int need_ail = 0;
700 LIST_HEAD(tmp);
703 * Scan the buffer IO completions for other inodes being completed and
704 * attach them to the current inode log item.
707 list_add_tail(&lip->li_bio_list, &tmp);
709 list_for_each_entry_safe(blip, n, &bp->b_li_list, li_bio_list) {
710 if (lip->li_cb != xfs_iflush_done)
711 continue;
713 list_move_tail(&blip->li_bio_list, &tmp);
715 * while we have the item, do the unlocked check for needing
716 * the AIL lock.
718 iip = INODE_ITEM(blip);
719 if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
720 test_bit(XFS_LI_FAILED, &blip->li_flags))
721 need_ail++;
724 /* make sure we capture the state of the initial inode. */
725 iip = INODE_ITEM(lip);
726 if ((iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) ||
727 test_bit(XFS_LI_FAILED, &lip->li_flags))
728 need_ail++;
731 * We only want to pull the item from the AIL if it is
732 * actually there and its location in the log has not
733 * changed since we started the flush. Thus, we only bother
734 * if the ili_logged flag is set and the inode's lsn has not
735 * changed. First we check the lsn outside
736 * the lock since it's cheaper, and then we recheck while
737 * holding the lock before removing the inode from the AIL.
739 if (need_ail) {
740 bool mlip_changed = false;
742 /* this is an opencoded batch version of xfs_trans_ail_delete */
743 spin_lock(&ailp->ail_lock);
744 list_for_each_entry(blip, &tmp, li_bio_list) {
745 if (INODE_ITEM(blip)->ili_logged &&
746 blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn)
747 mlip_changed |= xfs_ail_delete_one(ailp, blip);
748 else {
749 xfs_clear_li_failed(blip);
753 if (mlip_changed) {
754 if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount))
755 xlog_assign_tail_lsn_locked(ailp->ail_mount);
756 if (list_empty(&ailp->ail_head))
757 wake_up_all(&ailp->ail_empty);
759 spin_unlock(&ailp->ail_lock);
761 if (mlip_changed)
762 xfs_log_space_wake(ailp->ail_mount);
766 * clean up and unlock the flush lock now we are done. We can clear the
767 * ili_last_fields bits now that we know that the data corresponding to
768 * them is safely on disk.
770 list_for_each_entry_safe(blip, n, &tmp, li_bio_list) {
771 list_del_init(&blip->li_bio_list);
772 iip = INODE_ITEM(blip);
773 iip->ili_logged = 0;
774 iip->ili_last_fields = 0;
775 xfs_ifunlock(iip->ili_inode);
777 list_del(&tmp);
781 * This is the inode flushing abort routine. It is called from xfs_iflush when
782 * the filesystem is shutting down to clean up the inode state. It is
783 * responsible for removing the inode item from the AIL if it has not been
784 * re-logged, and unlocking the inode's flush lock.
786 void
787 xfs_iflush_abort(
788 xfs_inode_t *ip,
789 bool stale)
791 xfs_inode_log_item_t *iip = ip->i_itemp;
793 if (iip) {
794 if (test_bit(XFS_LI_IN_AIL, &iip->ili_item.li_flags)) {
795 xfs_trans_ail_remove(&iip->ili_item,
796 stale ? SHUTDOWN_LOG_IO_ERROR :
797 SHUTDOWN_CORRUPT_INCORE);
799 iip->ili_logged = 0;
801 * Clear the ili_last_fields bits now that we know that the
802 * data corresponding to them is safely on disk.
804 iip->ili_last_fields = 0;
806 * Clear the inode logging fields so no more flushes are
807 * attempted.
809 iip->ili_fields = 0;
810 iip->ili_fsync_fields = 0;
813 * Release the inode's flush lock since we're done with it.
815 xfs_ifunlock(ip);
818 void
819 xfs_istale_done(
820 struct xfs_buf *bp,
821 struct xfs_log_item *lip)
823 xfs_iflush_abort(INODE_ITEM(lip)->ili_inode, true);
827 * convert an xfs_inode_log_format struct from the old 32 bit version
828 * (which can have different field alignments) to the native 64 bit version
831 xfs_inode_item_format_convert(
832 struct xfs_log_iovec *buf,
833 struct xfs_inode_log_format *in_f)
835 struct xfs_inode_log_format_32 *in_f32 = buf->i_addr;
837 if (buf->i_len != sizeof(*in_f32))
838 return -EFSCORRUPTED;
840 in_f->ilf_type = in_f32->ilf_type;
841 in_f->ilf_size = in_f32->ilf_size;
842 in_f->ilf_fields = in_f32->ilf_fields;
843 in_f->ilf_asize = in_f32->ilf_asize;
844 in_f->ilf_dsize = in_f32->ilf_dsize;
845 in_f->ilf_ino = in_f32->ilf_ino;
846 memcpy(&in_f->ilf_u, &in_f32->ilf_u, sizeof(in_f->ilf_u));
847 in_f->ilf_blkno = in_f32->ilf_blkno;
848 in_f->ilf_len = in_f32->ilf_len;
849 in_f->ilf_boffset = in_f32->ilf_boffset;
850 return 0;