1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
4 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
7 #include <linux/sched.h>
8 #include <linux/slab.h>
9 #include <linux/spinlock.h>
10 #include <linux/completion.h>
11 #include <linux/buffer_head.h>
12 #include <linux/pagemap.h>
13 #include <linux/pagevec.h>
14 #include <linux/mpage.h>
16 #include <linux/writeback.h>
17 #include <linux/swap.h>
18 #include <linux/gfs2_ondisk.h>
19 #include <linux/backing-dev.h>
20 #include <linux/uio.h>
21 #include <trace/events/writeback.h>
22 #include <linux/sched/signal.h>
40 void gfs2_trans_add_databufs(struct gfs2_inode
*ip
, struct folio
*folio
,
41 size_t from
, size_t len
)
43 struct buffer_head
*head
= folio_buffers(folio
);
44 unsigned int bsize
= head
->b_size
;
45 struct buffer_head
*bh
;
46 size_t to
= from
+ len
;
49 for (bh
= head
, start
= 0; bh
!= head
|| !start
;
50 bh
= bh
->b_this_page
, start
= end
) {
56 set_buffer_uptodate(bh
);
57 gfs2_trans_add_data(ip
->i_gl
, bh
);
62 * gfs2_get_block_noalloc - Fills in a buffer head with details about a block
64 * @lblock: The block number to look up
65 * @bh_result: The buffer head to return the result in
66 * @create: Non-zero if we may add block to the file
71 static int gfs2_get_block_noalloc(struct inode
*inode
, sector_t lblock
,
72 struct buffer_head
*bh_result
, int create
)
76 error
= gfs2_block_map(inode
, lblock
, bh_result
, 0);
79 if (!buffer_mapped(bh_result
))
85 * gfs2_write_jdata_folio - gfs2 jdata-specific version of block_write_full_folio
86 * @folio: The folio to write
87 * @wbc: The writeback control
89 * This is the same as calling block_write_full_folio, but it also
90 * writes pages outside of i_size
92 static int gfs2_write_jdata_folio(struct folio
*folio
,
93 struct writeback_control
*wbc
)
95 struct inode
* const inode
= folio
->mapping
->host
;
96 loff_t i_size
= i_size_read(inode
);
99 * The folio straddles i_size. It must be zeroed out on each and every
100 * writepage invocation because it may be mmapped. "A file is mapped
101 * in multiples of the page size. For a file that is not a multiple of
102 * the page size, the remaining memory is zeroed when mapped, and
103 * writes to that region are not written out to the file."
105 if (folio_pos(folio
) < i_size
&&
106 i_size
< folio_pos(folio
) + folio_size(folio
))
107 folio_zero_segment(folio
, offset_in_folio(folio
, i_size
),
110 return __block_write_full_folio(inode
, folio
, gfs2_get_block_noalloc
,
115 * __gfs2_jdata_write_folio - The core of jdata writepage
116 * @folio: The folio to write
117 * @wbc: The writeback control
119 * Implements the core of write back. If a transaction is required then
120 * the checked flag will have been set and the transaction will have
121 * already been started before this is called.
123 static int __gfs2_jdata_write_folio(struct folio
*folio
,
124 struct writeback_control
*wbc
)
126 struct inode
*inode
= folio
->mapping
->host
;
127 struct gfs2_inode
*ip
= GFS2_I(inode
);
129 if (folio_test_checked(folio
)) {
130 folio_clear_checked(folio
);
131 if (!folio_buffers(folio
)) {
132 create_empty_buffers(folio
,
133 inode
->i_sb
->s_blocksize
,
134 BIT(BH_Dirty
)|BIT(BH_Uptodate
));
136 gfs2_trans_add_databufs(ip
, folio
, 0, folio_size(folio
));
138 return gfs2_write_jdata_folio(folio
, wbc
);
142 * gfs2_writepages - Write a bunch of dirty pages back to disk
143 * @mapping: The mapping to write
144 * @wbc: Write-back control
146 * Used for both ordered and writeback modes.
148 static int gfs2_writepages(struct address_space
*mapping
,
149 struct writeback_control
*wbc
)
151 struct gfs2_sbd
*sdp
= gfs2_mapping2sbd(mapping
);
152 struct iomap_writepage_ctx wpc
= { };
156 * Even if we didn't write enough pages here, we might still be holding
157 * dirty pages in the ail. We forcibly flush the ail because we don't
158 * want balance_dirty_pages() to loop indefinitely trying to write out
159 * pages held in the ail that it can't find.
161 ret
= iomap_writepages(mapping
, wbc
, &wpc
, &gfs2_writeback_ops
);
162 if (ret
== 0 && wbc
->nr_to_write
> 0)
163 set_bit(SDF_FORCE_AIL_FLUSH
, &sdp
->sd_flags
);
168 * gfs2_write_jdata_batch - Write back a folio batch's worth of folios
169 * @mapping: The mapping
170 * @wbc: The writeback control
171 * @fbatch: The batch of folios
172 * @done_index: Page index
174 * Returns: non-zero if loop should terminate, zero otherwise
177 static int gfs2_write_jdata_batch(struct address_space
*mapping
,
178 struct writeback_control
*wbc
,
179 struct folio_batch
*fbatch
,
182 struct inode
*inode
= mapping
->host
;
183 struct gfs2_sbd
*sdp
= GFS2_SB(inode
);
188 int nr_folios
= folio_batch_count(fbatch
);
190 for (i
= 0; i
< nr_folios
; i
++)
191 size
+= folio_size(fbatch
->folios
[i
]);
192 nrblocks
= size
>> inode
->i_blkbits
;
194 ret
= gfs2_trans_begin(sdp
, nrblocks
, nrblocks
);
198 for (i
= 0; i
< nr_folios
; i
++) {
199 struct folio
*folio
= fbatch
->folios
[i
];
201 *done_index
= folio
->index
;
205 if (unlikely(folio
->mapping
!= mapping
)) {
211 if (!folio_test_dirty(folio
)) {
212 /* someone wrote it for us */
213 goto continue_unlock
;
216 if (folio_test_writeback(folio
)) {
217 if (wbc
->sync_mode
!= WB_SYNC_NONE
)
218 folio_wait_writeback(folio
);
220 goto continue_unlock
;
223 BUG_ON(folio_test_writeback(folio
));
224 if (!folio_clear_dirty_for_io(folio
))
225 goto continue_unlock
;
227 trace_wbc_writepage(wbc
, inode_to_bdi(inode
));
229 ret
= __gfs2_jdata_write_folio(folio
, wbc
);
231 if (ret
== AOP_WRITEPAGE_ACTIVATE
) {
237 * done_index is set past this page,
238 * so media errors will not choke
239 * background writeout for the entire
240 * file. This has consequences for
241 * range_cyclic semantics (ie. it may
242 * not be suitable for data integrity
245 *done_index
= folio_next_index(folio
);
252 * We stop writing back only if we are not doing
253 * integrity sync. In case of integrity sync we have to
254 * keep going until we have written all the pages
255 * we tagged for writeback prior to entering this loop.
257 if (--wbc
->nr_to_write
<= 0 && wbc
->sync_mode
== WB_SYNC_NONE
) {
268 * gfs2_write_cache_jdata - Like write_cache_pages but different
269 * @mapping: The mapping to write
270 * @wbc: The writeback control
272 * The reason that we use our own function here is that we need to
273 * start transactions before we grab page locks. This allows us
274 * to get the ordering right.
277 static int gfs2_write_cache_jdata(struct address_space
*mapping
,
278 struct writeback_control
*wbc
)
282 struct folio_batch fbatch
;
284 pgoff_t writeback_index
;
292 folio_batch_init(&fbatch
);
293 if (wbc
->range_cyclic
) {
294 writeback_index
= mapping
->writeback_index
; /* prev offset */
295 index
= writeback_index
;
302 index
= wbc
->range_start
>> PAGE_SHIFT
;
303 end
= wbc
->range_end
>> PAGE_SHIFT
;
304 if (wbc
->range_start
== 0 && wbc
->range_end
== LLONG_MAX
)
306 cycled
= 1; /* ignore range_cyclic tests */
308 if (wbc
->sync_mode
== WB_SYNC_ALL
|| wbc
->tagged_writepages
)
309 tag
= PAGECACHE_TAG_TOWRITE
;
311 tag
= PAGECACHE_TAG_DIRTY
;
314 if (wbc
->sync_mode
== WB_SYNC_ALL
|| wbc
->tagged_writepages
)
315 tag_pages_for_writeback(mapping
, index
, end
);
317 while (!done
&& (index
<= end
)) {
318 nr_folios
= filemap_get_folios_tag(mapping
, &index
, end
,
323 ret
= gfs2_write_jdata_batch(mapping
, wbc
, &fbatch
,
329 folio_batch_release(&fbatch
);
333 if (!cycled
&& !done
) {
336 * We hit the last page and there is more work to be done: wrap
337 * back to the start of the file
341 end
= writeback_index
- 1;
345 if (wbc
->range_cyclic
|| (range_whole
&& wbc
->nr_to_write
> 0))
346 mapping
->writeback_index
= done_index
;
353 * gfs2_jdata_writepages - Write a bunch of dirty pages back to disk
354 * @mapping: The mapping to write
355 * @wbc: The writeback control
359 static int gfs2_jdata_writepages(struct address_space
*mapping
,
360 struct writeback_control
*wbc
)
362 struct gfs2_inode
*ip
= GFS2_I(mapping
->host
);
363 struct gfs2_sbd
*sdp
= GFS2_SB(mapping
->host
);
366 ret
= gfs2_write_cache_jdata(mapping
, wbc
);
367 if (ret
== 0 && wbc
->sync_mode
== WB_SYNC_ALL
) {
368 gfs2_log_flush(sdp
, ip
->i_gl
, GFS2_LOG_HEAD_FLUSH_NORMAL
|
369 GFS2_LFC_JDATA_WPAGES
);
370 ret
= gfs2_write_cache_jdata(mapping
, wbc
);
376 * stuffed_read_folio - Fill in a Linux folio with stuffed file data
382 static int stuffed_read_folio(struct gfs2_inode
*ip
, struct folio
*folio
)
384 struct buffer_head
*dibh
= NULL
;
385 size_t dsize
= i_size_read(&ip
->i_inode
);
390 * Due to the order of unstuffing files and ->fault(), we can be
391 * asked for a zero folio in the case of a stuffed file being extended,
392 * so we need to supply one here. It doesn't happen often.
394 if (unlikely(folio
->index
)) {
397 error
= gfs2_meta_inode_buffer(ip
, &dibh
);
400 from
= dibh
->b_data
+ sizeof(struct gfs2_dinode
);
403 folio_fill_tail(folio
, 0, from
, dsize
);
406 folio_end_read(folio
, error
== 0);
412 * gfs2_read_folio - read a folio from a file
413 * @file: The file to read
414 * @folio: The folio in the file
416 static int gfs2_read_folio(struct file
*file
, struct folio
*folio
)
418 struct inode
*inode
= folio
->mapping
->host
;
419 struct gfs2_inode
*ip
= GFS2_I(inode
);
420 struct gfs2_sbd
*sdp
= GFS2_SB(inode
);
423 if (!gfs2_is_jdata(ip
) ||
424 (i_blocksize(inode
) == PAGE_SIZE
&& !folio_buffers(folio
))) {
425 error
= iomap_read_folio(folio
, &gfs2_iomap_ops
);
426 } else if (gfs2_is_stuffed(ip
)) {
427 error
= stuffed_read_folio(ip
, folio
);
429 error
= mpage_read_folio(folio
, gfs2_block_map
);
432 if (gfs2_withdrawing_or_withdrawn(sdp
))
439 * gfs2_internal_read - read an internal file
440 * @ip: The gfs2 inode
441 * @buf: The buffer to fill
442 * @pos: The file position
443 * @size: The amount to read
447 ssize_t
gfs2_internal_read(struct gfs2_inode
*ip
, char *buf
, loff_t
*pos
,
450 struct address_space
*mapping
= ip
->i_inode
.i_mapping
;
451 unsigned long index
= *pos
>> PAGE_SHIFT
;
455 size_t offset
, chunk
;
458 folio
= read_cache_folio(mapping
, index
, gfs2_read_folio
, NULL
);
460 if (PTR_ERR(folio
) == -EINTR
)
462 return PTR_ERR(folio
);
464 offset
= *pos
+ copied
- folio_pos(folio
);
465 chunk
= min(size
- copied
, folio_size(folio
) - offset
);
466 memcpy_from_folio(buf
+ copied
, folio
, offset
, chunk
);
467 index
= folio_next_index(folio
);
470 } while(copied
< size
);
476 * gfs2_readahead - Read a bunch of pages at once
477 * @rac: Read-ahead control structure
480 * 1. This is only for readahead, so we can simply ignore any things
481 * which are slightly inconvenient (such as locking conflicts between
482 * the page lock and the glock) and return having done no I/O. Its
483 * obviously not something we'd want to do on too regular a basis.
484 * Any I/O we ignore at this time will be done via readpage later.
485 * 2. We don't handle stuffed files here we let readpage do the honours.
486 * 3. mpage_readahead() does most of the heavy lifting in the common case.
487 * 4. gfs2_block_map() is relied upon to set BH_Boundary in the right places.
490 static void gfs2_readahead(struct readahead_control
*rac
)
492 struct inode
*inode
= rac
->mapping
->host
;
493 struct gfs2_inode
*ip
= GFS2_I(inode
);
495 if (gfs2_is_stuffed(ip
))
497 else if (gfs2_is_jdata(ip
))
498 mpage_readahead(rac
, gfs2_block_map
);
500 iomap_readahead(rac
, &gfs2_iomap_ops
);
504 * adjust_fs_space - Adjusts the free space available due to gfs2_grow
505 * @inode: the rindex inode
507 void adjust_fs_space(struct inode
*inode
)
509 struct gfs2_sbd
*sdp
= GFS2_SB(inode
);
510 struct gfs2_inode
*m_ip
= GFS2_I(sdp
->sd_statfs_inode
);
511 struct gfs2_statfs_change_host
*m_sc
= &sdp
->sd_statfs_master
;
512 struct gfs2_statfs_change_host
*l_sc
= &sdp
->sd_statfs_local
;
513 struct buffer_head
*m_bh
;
514 u64 fs_total
, new_free
;
516 if (gfs2_trans_begin(sdp
, 2 * RES_STATFS
, 0) != 0)
519 /* Total up the file system space, according to the latest rindex. */
520 fs_total
= gfs2_ri_total(sdp
);
521 if (gfs2_meta_inode_buffer(m_ip
, &m_bh
) != 0)
524 spin_lock(&sdp
->sd_statfs_spin
);
525 gfs2_statfs_change_in(m_sc
, m_bh
->b_data
+
526 sizeof(struct gfs2_dinode
));
527 if (fs_total
> (m_sc
->sc_total
+ l_sc
->sc_total
))
528 new_free
= fs_total
- (m_sc
->sc_total
+ l_sc
->sc_total
);
531 spin_unlock(&sdp
->sd_statfs_spin
);
532 fs_warn(sdp
, "File system extended by %llu blocks.\n",
533 (unsigned long long)new_free
);
534 gfs2_statfs_change(sdp
, new_free
, new_free
, 0);
536 update_statfs(sdp
, m_bh
);
539 sdp
->sd_rindex_uptodate
= 0;
543 static bool jdata_dirty_folio(struct address_space
*mapping
,
546 if (current
->journal_info
)
547 folio_set_checked(folio
);
548 return block_dirty_folio(mapping
, folio
);
552 * gfs2_bmap - Block map function
553 * @mapping: Address space info
554 * @lblock: The block to map
556 * Returns: The disk address for the block or 0 on hole or error
559 static sector_t
gfs2_bmap(struct address_space
*mapping
, sector_t lblock
)
561 struct gfs2_inode
*ip
= GFS2_I(mapping
->host
);
562 struct gfs2_holder i_gh
;
566 error
= gfs2_glock_nq_init(ip
->i_gl
, LM_ST_SHARED
, LM_FLAG_ANY
, &i_gh
);
570 if (!gfs2_is_stuffed(ip
))
571 dblock
= iomap_bmap(mapping
, lblock
, &gfs2_iomap_ops
);
573 gfs2_glock_dq_uninit(&i_gh
);
578 static void gfs2_discard(struct gfs2_sbd
*sdp
, struct buffer_head
*bh
)
580 struct gfs2_bufdata
*bd
;
584 clear_buffer_dirty(bh
);
587 if (!list_empty(&bd
->bd_list
) && !buffer_pinned(bh
))
588 list_del_init(&bd
->bd_list
);
590 spin_lock(&sdp
->sd_ail_lock
);
591 gfs2_remove_from_journal(bh
, REMOVE_JDATA
);
592 spin_unlock(&sdp
->sd_ail_lock
);
596 clear_buffer_mapped(bh
);
597 clear_buffer_req(bh
);
598 clear_buffer_new(bh
);
599 gfs2_log_unlock(sdp
);
603 static void gfs2_invalidate_folio(struct folio
*folio
, size_t offset
,
606 struct gfs2_sbd
*sdp
= GFS2_SB(folio
->mapping
->host
);
607 size_t stop
= offset
+ length
;
608 int partial_page
= (offset
|| length
< folio_size(folio
));
609 struct buffer_head
*bh
, *head
;
610 unsigned long pos
= 0;
612 BUG_ON(!folio_test_locked(folio
));
614 folio_clear_checked(folio
);
615 head
= folio_buffers(folio
);
621 if (pos
+ bh
->b_size
> stop
)
625 gfs2_discard(sdp
, bh
);
627 bh
= bh
->b_this_page
;
628 } while (bh
!= head
);
631 filemap_release_folio(folio
, 0);
635 * gfs2_release_folio - free the metadata associated with a folio
636 * @folio: the folio that's being released
637 * @gfp_mask: passed from Linux VFS, ignored by us
639 * Calls try_to_free_buffers() to free the buffers and put the folio if the
640 * buffers can be released.
642 * Returns: true if the folio was put or else false
645 bool gfs2_release_folio(struct folio
*folio
, gfp_t gfp_mask
)
647 struct address_space
*mapping
= folio
->mapping
;
648 struct gfs2_sbd
*sdp
= gfs2_mapping2sbd(mapping
);
649 struct buffer_head
*bh
, *head
;
650 struct gfs2_bufdata
*bd
;
652 head
= folio_buffers(folio
);
657 * mm accommodates an old ext3 case where clean folios might
658 * not have had the dirty bit cleared. Thus, it can send actual
659 * dirty folios to ->release_folio() via shrink_active_list().
661 * As a workaround, we skip folios that contain dirty buffers
662 * below. Once ->release_folio isn't called on dirty folios
663 * anymore, we can warn on dirty buffers like we used to here
670 if (atomic_read(&bh
->b_count
))
675 if (buffer_dirty(bh
) || WARN_ON(buffer_pinned(bh
)))
677 bh
= bh
->b_this_page
;
678 } while (bh
!= head
);
684 gfs2_assert_warn(sdp
, bd
->bd_bh
== bh
);
686 bh
->b_private
= NULL
;
688 * The bd may still be queued as a revoke, in which
689 * case we must not dequeue nor free it.
691 if (!bd
->bd_blkno
&& !list_empty(&bd
->bd_list
))
692 list_del_init(&bd
->bd_list
);
693 if (list_empty(&bd
->bd_list
))
694 kmem_cache_free(gfs2_bufdata_cachep
, bd
);
697 bh
= bh
->b_this_page
;
698 } while (bh
!= head
);
699 gfs2_log_unlock(sdp
);
701 return try_to_free_buffers(folio
);
704 gfs2_log_unlock(sdp
);
708 static const struct address_space_operations gfs2_aops
= {
709 .writepages
= gfs2_writepages
,
710 .read_folio
= gfs2_read_folio
,
711 .readahead
= gfs2_readahead
,
712 .dirty_folio
= iomap_dirty_folio
,
713 .release_folio
= iomap_release_folio
,
714 .invalidate_folio
= iomap_invalidate_folio
,
716 .migrate_folio
= filemap_migrate_folio
,
717 .is_partially_uptodate
= iomap_is_partially_uptodate
,
718 .error_remove_folio
= generic_error_remove_folio
,
721 static const struct address_space_operations gfs2_jdata_aops
= {
722 .writepages
= gfs2_jdata_writepages
,
723 .read_folio
= gfs2_read_folio
,
724 .readahead
= gfs2_readahead
,
725 .dirty_folio
= jdata_dirty_folio
,
727 .migrate_folio
= buffer_migrate_folio
,
728 .invalidate_folio
= gfs2_invalidate_folio
,
729 .release_folio
= gfs2_release_folio
,
730 .is_partially_uptodate
= block_is_partially_uptodate
,
731 .error_remove_folio
= generic_error_remove_folio
,
734 void gfs2_set_aops(struct inode
*inode
)
736 if (gfs2_is_jdata(GFS2_I(inode
)))
737 inode
->i_mapping
->a_ops
= &gfs2_jdata_aops
;
739 inode
->i_mapping
->a_ops
= &gfs2_aops
;