1 // SPDX-License-Identifier: GPL-2.0
5 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6 * http://www.samsung.com/
9 #include <linux/f2fs_fs.h>
10 #include <linux/stat.h>
11 #include <linux/writeback.h>
12 #include <linux/blkdev.h>
13 #include <linux/falloc.h>
14 #include <linux/types.h>
15 #include <linux/compat.h>
16 #include <linux/uaccess.h>
17 #include <linux/mount.h>
18 #include <linux/pagevec.h>
19 #include <linux/uio.h>
20 #include <linux/uuid.h>
21 #include <linux/file.h>
22 #include <linux/nls.h>
23 #include <linux/sched/signal.h>
24 #include <linux/fileattr.h>
25 #include <linux/fadvise.h>
26 #include <linux/iomap.h>
35 #include <trace/events/f2fs.h>
36 #include <uapi/linux/f2fs.h>
38 static vm_fault_t
f2fs_filemap_fault(struct vm_fault
*vmf
)
40 struct inode
*inode
= file_inode(vmf
->vma
->vm_file
);
41 vm_flags_t flags
= vmf
->vma
->vm_flags
;
44 ret
= filemap_fault(vmf
);
45 if (ret
& VM_FAULT_LOCKED
)
46 f2fs_update_iostat(F2FS_I_SB(inode
), inode
,
47 APP_MAPPED_READ_IO
, F2FS_BLKSIZE
);
49 trace_f2fs_filemap_fault(inode
, vmf
->pgoff
, flags
, ret
);
54 static vm_fault_t
f2fs_vm_page_mkwrite(struct vm_fault
*vmf
)
56 struct folio
*folio
= page_folio(vmf
->page
);
57 struct inode
*inode
= file_inode(vmf
->vma
->vm_file
);
58 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
59 struct dnode_of_data dn
;
60 bool need_alloc
= !f2fs_is_pinned_file(inode
);
64 if (unlikely(IS_IMMUTABLE(inode
)))
65 return VM_FAULT_SIGBUS
;
67 if (is_inode_flag_set(inode
, FI_COMPRESS_RELEASED
)) {
72 if (unlikely(f2fs_cp_error(sbi
))) {
77 if (!f2fs_is_checkpoint_ready(sbi
)) {
82 err
= f2fs_convert_inline_inode(inode
);
86 #ifdef CONFIG_F2FS_FS_COMPRESSION
87 if (f2fs_compressed_file(inode
)) {
88 int ret
= f2fs_is_compressed_cluster(inode
, folio
->index
);
98 /* should do out of any locked page */
100 f2fs_balance_fs(sbi
, true);
102 sb_start_pagefault(inode
->i_sb
);
104 f2fs_bug_on(sbi
, f2fs_has_inline_data(inode
));
106 file_update_time(vmf
->vma
->vm_file
);
107 filemap_invalidate_lock_shared(inode
->i_mapping
);
109 if (unlikely(folio
->mapping
!= inode
->i_mapping
||
110 folio_pos(folio
) > i_size_read(inode
) ||
111 !folio_test_uptodate(folio
))) {
117 set_new_dnode(&dn
, inode
, NULL
, NULL
, 0);
119 /* block allocation */
120 err
= f2fs_get_block_locked(&dn
, folio
->index
);
122 err
= f2fs_get_dnode_of_data(&dn
, folio
->index
, LOOKUP_NODE
);
124 if (f2fs_is_pinned_file(inode
) &&
125 !__is_valid_data_blkaddr(dn
.data_blkaddr
))
134 f2fs_wait_on_page_writeback(folio_page(folio
, 0), DATA
, false, true);
136 /* wait for GCed page writeback via META_MAPPING */
137 f2fs_wait_on_block_writeback(inode
, dn
.data_blkaddr
);
140 * check to see if the page is mapped already (no holes)
142 if (folio_test_mappedtodisk(folio
))
145 /* page is wholly or partially inside EOF */
146 if (((loff_t
)(folio
->index
+ 1) << PAGE_SHIFT
) >
147 i_size_read(inode
)) {
150 offset
= i_size_read(inode
) & ~PAGE_MASK
;
151 folio_zero_segment(folio
, offset
, folio_size(folio
));
153 folio_mark_dirty(folio
);
155 f2fs_update_iostat(sbi
, inode
, APP_MAPPED_IO
, F2FS_BLKSIZE
);
156 f2fs_update_time(sbi
, REQ_TIME
);
159 filemap_invalidate_unlock_shared(inode
->i_mapping
);
161 sb_end_pagefault(inode
->i_sb
);
163 ret
= vmf_fs_error(err
);
165 trace_f2fs_vm_page_mkwrite(inode
, folio
->index
, vmf
->vma
->vm_flags
, ret
);
169 static const struct vm_operations_struct f2fs_file_vm_ops
= {
170 .fault
= f2fs_filemap_fault
,
171 .map_pages
= filemap_map_pages
,
172 .page_mkwrite
= f2fs_vm_page_mkwrite
,
175 static int get_parent_ino(struct inode
*inode
, nid_t
*pino
)
177 struct dentry
*dentry
;
180 * Make sure to get the non-deleted alias. The alias associated with
181 * the open file descriptor being fsync()'ed may be deleted already.
183 dentry
= d_find_alias(inode
);
187 *pino
= d_parent_ino(dentry
);
192 static inline enum cp_reason_type
need_do_checkpoint(struct inode
*inode
)
194 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
195 enum cp_reason_type cp_reason
= CP_NO_NEEDED
;
197 if (!S_ISREG(inode
->i_mode
))
198 cp_reason
= CP_NON_REGULAR
;
199 else if (f2fs_compressed_file(inode
))
200 cp_reason
= CP_COMPRESSED
;
201 else if (inode
->i_nlink
!= 1)
202 cp_reason
= CP_HARDLINK
;
203 else if (is_sbi_flag_set(sbi
, SBI_NEED_CP
))
204 cp_reason
= CP_SB_NEED_CP
;
205 else if (file_wrong_pino(inode
))
206 cp_reason
= CP_WRONG_PINO
;
207 else if (!f2fs_space_for_roll_forward(sbi
))
208 cp_reason
= CP_NO_SPC_ROLL
;
209 else if (!f2fs_is_checkpointed_node(sbi
, F2FS_I(inode
)->i_pino
))
210 cp_reason
= CP_NODE_NEED_CP
;
211 else if (test_opt(sbi
, FASTBOOT
))
212 cp_reason
= CP_FASTBOOT_MODE
;
213 else if (F2FS_OPTION(sbi
).active_logs
== 2)
214 cp_reason
= CP_SPEC_LOG_NUM
;
215 else if (F2FS_OPTION(sbi
).fsync_mode
== FSYNC_MODE_STRICT
&&
216 f2fs_need_dentry_mark(sbi
, inode
->i_ino
) &&
217 f2fs_exist_written_data(sbi
, F2FS_I(inode
)->i_pino
,
219 cp_reason
= CP_RECOVER_DIR
;
220 else if (f2fs_exist_written_data(sbi
, F2FS_I(inode
)->i_pino
,
222 cp_reason
= CP_XATTR_DIR
;
227 static bool need_inode_page_update(struct f2fs_sb_info
*sbi
, nid_t ino
)
229 struct page
*i
= find_get_page(NODE_MAPPING(sbi
), ino
);
231 /* But we need to avoid that there are some inode updates */
232 if ((i
&& PageDirty(i
)) || f2fs_need_inode_block_update(sbi
, ino
))
238 static void try_to_fix_pino(struct inode
*inode
)
240 struct f2fs_inode_info
*fi
= F2FS_I(inode
);
243 f2fs_down_write(&fi
->i_sem
);
244 if (file_wrong_pino(inode
) && inode
->i_nlink
== 1 &&
245 get_parent_ino(inode
, &pino
)) {
246 f2fs_i_pino_write(inode
, pino
);
247 file_got_pino(inode
);
249 f2fs_up_write(&fi
->i_sem
);
252 static int f2fs_do_sync_file(struct file
*file
, loff_t start
, loff_t end
,
253 int datasync
, bool atomic
)
255 struct inode
*inode
= file
->f_mapping
->host
;
256 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
257 nid_t ino
= inode
->i_ino
;
259 enum cp_reason_type cp_reason
= 0;
260 struct writeback_control wbc
= {
261 .sync_mode
= WB_SYNC_ALL
,
262 .nr_to_write
= LONG_MAX
,
265 unsigned int seq_id
= 0;
267 if (unlikely(f2fs_readonly(inode
->i_sb
)))
270 trace_f2fs_sync_file_enter(inode
);
272 if (S_ISDIR(inode
->i_mode
))
275 /* if fdatasync is triggered, let's do in-place-update */
276 if (datasync
|| get_dirty_pages(inode
) <= SM_I(sbi
)->min_fsync_blocks
)
277 set_inode_flag(inode
, FI_NEED_IPU
);
278 ret
= file_write_and_wait_range(file
, start
, end
);
279 clear_inode_flag(inode
, FI_NEED_IPU
);
281 if (ret
|| is_sbi_flag_set(sbi
, SBI_CP_DISABLED
)) {
282 trace_f2fs_sync_file_exit(inode
, cp_reason
, datasync
, ret
);
286 /* if the inode is dirty, let's recover all the time */
287 if (!f2fs_skip_inode_update(inode
, datasync
)) {
288 f2fs_write_inode(inode
, NULL
);
293 * if there is no written data, don't waste time to write recovery info.
295 if (!is_inode_flag_set(inode
, FI_APPEND_WRITE
) &&
296 !f2fs_exist_written_data(sbi
, ino
, APPEND_INO
)) {
298 /* it may call write_inode just prior to fsync */
299 if (need_inode_page_update(sbi
, ino
))
302 if (is_inode_flag_set(inode
, FI_UPDATE_WRITE
) ||
303 f2fs_exist_written_data(sbi
, ino
, UPDATE_INO
))
308 * for OPU case, during fsync(), node can be persisted before
309 * data when lower device doesn't support write barrier, result
310 * in data corruption after SPO.
311 * So for strict fsync mode, force to use atomic write semantics
312 * to keep write order in between data/node and last node to
313 * avoid potential data corruption.
315 if (F2FS_OPTION(sbi
).fsync_mode
==
316 FSYNC_MODE_STRICT
&& !atomic
)
321 * Both of fdatasync() and fsync() are able to be recovered from
324 f2fs_down_read(&F2FS_I(inode
)->i_sem
);
325 cp_reason
= need_do_checkpoint(inode
);
326 f2fs_up_read(&F2FS_I(inode
)->i_sem
);
329 /* all the dirty node pages should be flushed for POR */
330 ret
= f2fs_sync_fs(inode
->i_sb
, 1);
333 * We've secured consistency through sync_fs. Following pino
334 * will be used only for fsynced inodes after checkpoint.
336 try_to_fix_pino(inode
);
337 clear_inode_flag(inode
, FI_APPEND_WRITE
);
338 clear_inode_flag(inode
, FI_UPDATE_WRITE
);
342 atomic_inc(&sbi
->wb_sync_req
[NODE
]);
343 ret
= f2fs_fsync_node_pages(sbi
, inode
, &wbc
, atomic
, &seq_id
);
344 atomic_dec(&sbi
->wb_sync_req
[NODE
]);
348 /* if cp_error was enabled, we should avoid infinite loop */
349 if (unlikely(f2fs_cp_error(sbi
))) {
354 if (f2fs_need_inode_block_update(sbi
, ino
)) {
355 f2fs_mark_inode_dirty_sync(inode
, true);
356 f2fs_write_inode(inode
, NULL
);
361 * If it's atomic_write, it's just fine to keep write ordering. So
362 * here we don't need to wait for node write completion, since we use
363 * node chain which serializes node blocks. If one of node writes are
364 * reordered, we can see simply broken chain, resulting in stopping
365 * roll-forward recovery. It means we'll recover all or none node blocks
369 ret
= f2fs_wait_on_node_pages_writeback(sbi
, seq_id
);
374 /* once recovery info is written, don't need to tack this */
375 f2fs_remove_ino_entry(sbi
, ino
, APPEND_INO
);
376 clear_inode_flag(inode
, FI_APPEND_WRITE
);
378 if (!atomic
&& F2FS_OPTION(sbi
).fsync_mode
!= FSYNC_MODE_NOBARRIER
)
379 ret
= f2fs_issue_flush(sbi
, inode
->i_ino
);
381 f2fs_remove_ino_entry(sbi
, ino
, UPDATE_INO
);
382 clear_inode_flag(inode
, FI_UPDATE_WRITE
);
383 f2fs_remove_ino_entry(sbi
, ino
, FLUSH_INO
);
385 f2fs_update_time(sbi
, REQ_TIME
);
387 trace_f2fs_sync_file_exit(inode
, cp_reason
, datasync
, ret
);
391 int f2fs_sync_file(struct file
*file
, loff_t start
, loff_t end
, int datasync
)
393 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file
)))))
395 return f2fs_do_sync_file(file
, start
, end
, datasync
, false);
398 static bool __found_offset(struct address_space
*mapping
,
399 struct dnode_of_data
*dn
, pgoff_t index
, int whence
)
401 block_t blkaddr
= f2fs_data_blkaddr(dn
);
402 struct inode
*inode
= mapping
->host
;
403 bool compressed_cluster
= false;
405 if (f2fs_compressed_file(inode
)) {
406 block_t first_blkaddr
= data_blkaddr(dn
->inode
, dn
->node_page
,
407 ALIGN_DOWN(dn
->ofs_in_node
, F2FS_I(inode
)->i_cluster_size
));
409 compressed_cluster
= first_blkaddr
== COMPRESS_ADDR
;
414 if (__is_valid_data_blkaddr(blkaddr
))
416 if (blkaddr
== NEW_ADDR
&&
417 xa_get_mark(&mapping
->i_pages
, index
, PAGECACHE_TAG_DIRTY
))
419 if (compressed_cluster
)
423 if (compressed_cluster
)
425 if (blkaddr
== NULL_ADDR
)
432 static loff_t
f2fs_seek_block(struct file
*file
, loff_t offset
, int whence
)
434 struct inode
*inode
= file
->f_mapping
->host
;
435 loff_t maxbytes
= F2FS_BLK_TO_BYTES(max_file_blocks(inode
));
436 struct dnode_of_data dn
;
437 pgoff_t pgofs
, end_offset
;
438 loff_t data_ofs
= offset
;
442 inode_lock_shared(inode
);
444 isize
= i_size_read(inode
);
448 /* handle inline data case */
449 if (f2fs_has_inline_data(inode
)) {
450 if (whence
== SEEK_HOLE
) {
453 } else if (whence
== SEEK_DATA
) {
459 pgofs
= (pgoff_t
)(offset
>> PAGE_SHIFT
);
461 for (; data_ofs
< isize
; data_ofs
= (loff_t
)pgofs
<< PAGE_SHIFT
) {
462 set_new_dnode(&dn
, inode
, NULL
, NULL
, 0);
463 err
= f2fs_get_dnode_of_data(&dn
, pgofs
, LOOKUP_NODE
);
464 if (err
&& err
!= -ENOENT
) {
466 } else if (err
== -ENOENT
) {
467 /* direct node does not exists */
468 if (whence
== SEEK_DATA
) {
469 pgofs
= f2fs_get_next_page_offset(&dn
, pgofs
);
476 end_offset
= ADDRS_PER_PAGE(dn
.node_page
, inode
);
478 /* find data/hole in dnode block */
479 for (; dn
.ofs_in_node
< end_offset
;
480 dn
.ofs_in_node
++, pgofs
++,
481 data_ofs
= (loff_t
)pgofs
<< PAGE_SHIFT
) {
484 blkaddr
= f2fs_data_blkaddr(&dn
);
486 if (__is_valid_data_blkaddr(blkaddr
) &&
487 !f2fs_is_valid_blkaddr(F2FS_I_SB(inode
),
488 blkaddr
, DATA_GENERIC_ENHANCE
)) {
493 if (__found_offset(file
->f_mapping
, &dn
,
502 if (whence
== SEEK_DATA
)
505 if (whence
== SEEK_HOLE
&& data_ofs
> isize
)
507 inode_unlock_shared(inode
);
508 return vfs_setpos(file
, data_ofs
, maxbytes
);
510 inode_unlock_shared(inode
);
514 static loff_t
f2fs_llseek(struct file
*file
, loff_t offset
, int whence
)
516 struct inode
*inode
= file
->f_mapping
->host
;
517 loff_t maxbytes
= F2FS_BLK_TO_BYTES(max_file_blocks(inode
));
523 return generic_file_llseek_size(file
, offset
, whence
,
524 maxbytes
, i_size_read(inode
));
529 return f2fs_seek_block(file
, offset
, whence
);
535 static int f2fs_file_mmap(struct file
*file
, struct vm_area_struct
*vma
)
537 struct inode
*inode
= file_inode(file
);
539 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode
))))
542 if (!f2fs_is_compress_backend_ready(inode
))
546 vma
->vm_ops
= &f2fs_file_vm_ops
;
548 f2fs_down_read(&F2FS_I(inode
)->i_sem
);
549 set_inode_flag(inode
, FI_MMAP_FILE
);
550 f2fs_up_read(&F2FS_I(inode
)->i_sem
);
555 static int finish_preallocate_blocks(struct inode
*inode
)
560 if (is_inode_flag_set(inode
, FI_OPENED_FILE
)) {
565 if (!file_should_truncate(inode
)) {
566 set_inode_flag(inode
, FI_OPENED_FILE
);
571 f2fs_down_write(&F2FS_I(inode
)->i_gc_rwsem
[WRITE
]);
572 filemap_invalidate_lock(inode
->i_mapping
);
574 truncate_setsize(inode
, i_size_read(inode
));
575 ret
= f2fs_truncate(inode
);
577 filemap_invalidate_unlock(inode
->i_mapping
);
578 f2fs_up_write(&F2FS_I(inode
)->i_gc_rwsem
[WRITE
]);
581 set_inode_flag(inode
, FI_OPENED_FILE
);
587 file_dont_truncate(inode
);
591 static int f2fs_file_open(struct inode
*inode
, struct file
*filp
)
593 int err
= fscrypt_file_open(inode
, filp
);
598 if (!f2fs_is_compress_backend_ready(inode
))
601 err
= fsverity_file_open(inode
, filp
);
605 filp
->f_mode
|= FMODE_NOWAIT
;
606 filp
->f_mode
|= FMODE_CAN_ODIRECT
;
608 err
= dquot_file_open(inode
, filp
);
612 return finish_preallocate_blocks(inode
);
615 void f2fs_truncate_data_blocks_range(struct dnode_of_data
*dn
, int count
)
617 struct f2fs_sb_info
*sbi
= F2FS_I_SB(dn
->inode
);
618 int nr_free
= 0, ofs
= dn
->ofs_in_node
, len
= count
;
620 bool compressed_cluster
= false;
621 int cluster_index
= 0, valid_blocks
= 0;
622 int cluster_size
= F2FS_I(dn
->inode
)->i_cluster_size
;
623 bool released
= !atomic_read(&F2FS_I(dn
->inode
)->i_compr_blocks
);
625 addr
= get_dnode_addr(dn
->inode
, dn
->node_page
) + ofs
;
627 /* Assumption: truncation starts with cluster */
628 for (; count
> 0; count
--, addr
++, dn
->ofs_in_node
++, cluster_index
++) {
629 block_t blkaddr
= le32_to_cpu(*addr
);
631 if (f2fs_compressed_file(dn
->inode
) &&
632 !(cluster_index
& (cluster_size
- 1))) {
633 if (compressed_cluster
)
634 f2fs_i_compr_blocks_update(dn
->inode
,
635 valid_blocks
, false);
636 compressed_cluster
= (blkaddr
== COMPRESS_ADDR
);
640 if (blkaddr
== NULL_ADDR
)
643 f2fs_set_data_blkaddr(dn
, NULL_ADDR
);
645 if (__is_valid_data_blkaddr(blkaddr
)) {
646 if (time_to_inject(sbi
, FAULT_BLKADDR_CONSISTENCE
))
648 if (!f2fs_is_valid_blkaddr_raw(sbi
, blkaddr
,
649 DATA_GENERIC_ENHANCE
))
651 if (compressed_cluster
)
655 f2fs_invalidate_blocks(sbi
, blkaddr
);
657 if (!released
|| blkaddr
!= COMPRESS_ADDR
)
661 if (compressed_cluster
)
662 f2fs_i_compr_blocks_update(dn
->inode
, valid_blocks
, false);
667 * once we invalidate valid blkaddr in range [ofs, ofs + count],
668 * we will invalidate all blkaddr in the whole range.
670 fofs
= f2fs_start_bidx_of_node(ofs_of_node(dn
->node_page
),
672 f2fs_update_read_extent_cache_range(dn
, fofs
, 0, len
);
673 f2fs_update_age_extent_cache_range(dn
, fofs
, len
);
674 dec_valid_block_count(sbi
, dn
->inode
, nr_free
);
676 dn
->ofs_in_node
= ofs
;
678 f2fs_update_time(sbi
, REQ_TIME
);
679 trace_f2fs_truncate_data_blocks_range(dn
->inode
, dn
->nid
,
680 dn
->ofs_in_node
, nr_free
);
683 static int truncate_partial_data_page(struct inode
*inode
, u64 from
,
686 loff_t offset
= from
& (PAGE_SIZE
- 1);
687 pgoff_t index
= from
>> PAGE_SHIFT
;
688 struct address_space
*mapping
= inode
->i_mapping
;
691 if (!offset
&& !cache_only
)
695 page
= find_lock_page(mapping
, index
);
696 if (page
&& PageUptodate(page
))
698 f2fs_put_page(page
, 1);
702 page
= f2fs_get_lock_data_page(inode
, index
, true);
704 return PTR_ERR(page
) == -ENOENT
? 0 : PTR_ERR(page
);
706 f2fs_wait_on_page_writeback(page
, DATA
, true, true);
707 zero_user(page
, offset
, PAGE_SIZE
- offset
);
709 /* An encrypted inode should have a key and truncate the last page. */
710 f2fs_bug_on(F2FS_I_SB(inode
), cache_only
&& IS_ENCRYPTED(inode
));
712 set_page_dirty(page
);
713 f2fs_put_page(page
, 1);
717 int f2fs_do_truncate_blocks(struct inode
*inode
, u64 from
, bool lock
)
719 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
720 struct dnode_of_data dn
;
722 int count
= 0, err
= 0;
724 bool truncate_page
= false;
726 trace_f2fs_truncate_blocks_enter(inode
, from
);
728 if (IS_DEVICE_ALIASING(inode
) && from
) {
733 free_from
= (pgoff_t
)F2FS_BLK_ALIGN(from
);
735 if (free_from
>= max_file_blocks(inode
))
741 ipage
= f2fs_get_node_page(sbi
, inode
->i_ino
);
743 err
= PTR_ERR(ipage
);
747 if (IS_DEVICE_ALIASING(inode
)) {
748 struct extent_tree
*et
= F2FS_I(inode
)->extent_tree
[EX_READ
];
749 struct extent_info ei
= et
->largest
;
752 for (i
= 0; i
< ei
.len
; i
++)
753 f2fs_invalidate_blocks(sbi
, ei
.blk
+ i
);
755 dec_valid_block_count(sbi
, inode
, ei
.len
);
756 f2fs_update_time(sbi
, REQ_TIME
);
758 f2fs_put_page(ipage
, 1);
762 if (f2fs_has_inline_data(inode
)) {
763 f2fs_truncate_inline_inode(inode
, ipage
, from
);
764 f2fs_put_page(ipage
, 1);
765 truncate_page
= true;
769 set_new_dnode(&dn
, inode
, ipage
, NULL
, 0);
770 err
= f2fs_get_dnode_of_data(&dn
, free_from
, LOOKUP_NODE_RA
);
777 count
= ADDRS_PER_PAGE(dn
.node_page
, inode
);
779 count
-= dn
.ofs_in_node
;
780 f2fs_bug_on(sbi
, count
< 0);
782 if (dn
.ofs_in_node
|| IS_INODE(dn
.node_page
)) {
783 f2fs_truncate_data_blocks_range(&dn
, count
);
789 err
= f2fs_truncate_inode_blocks(inode
, free_from
);
794 /* lastly zero out the first data page */
796 err
= truncate_partial_data_page(inode
, from
, truncate_page
);
798 trace_f2fs_truncate_blocks_exit(inode
, err
);
802 int f2fs_truncate_blocks(struct inode
*inode
, u64 from
, bool lock
)
804 u64 free_from
= from
;
807 #ifdef CONFIG_F2FS_FS_COMPRESSION
809 * for compressed file, only support cluster size
810 * aligned truncation.
812 if (f2fs_compressed_file(inode
))
813 free_from
= round_up(from
,
814 F2FS_I(inode
)->i_cluster_size
<< PAGE_SHIFT
);
817 err
= f2fs_do_truncate_blocks(inode
, free_from
, lock
);
821 #ifdef CONFIG_F2FS_FS_COMPRESSION
823 * For compressed file, after release compress blocks, don't allow write
824 * direct, but we should allow write direct after truncate to zero.
826 if (f2fs_compressed_file(inode
) && !free_from
827 && is_inode_flag_set(inode
, FI_COMPRESS_RELEASED
))
828 clear_inode_flag(inode
, FI_COMPRESS_RELEASED
);
830 if (from
!= free_from
) {
831 err
= f2fs_truncate_partial_cluster(inode
, from
, lock
);
840 int f2fs_truncate(struct inode
*inode
)
844 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode
))))
847 if (!(S_ISREG(inode
->i_mode
) || S_ISDIR(inode
->i_mode
) ||
848 S_ISLNK(inode
->i_mode
)))
851 trace_f2fs_truncate(inode
);
853 if (time_to_inject(F2FS_I_SB(inode
), FAULT_TRUNCATE
))
856 err
= f2fs_dquot_initialize(inode
);
860 /* we should check inline_data size */
861 if (!f2fs_may_inline_data(inode
)) {
862 err
= f2fs_convert_inline_inode(inode
);
867 err
= f2fs_truncate_blocks(inode
, i_size_read(inode
), true);
871 inode_set_mtime_to_ts(inode
, inode_set_ctime_current(inode
));
872 f2fs_mark_inode_dirty_sync(inode
, false);
876 static bool f2fs_force_buffered_io(struct inode
*inode
, int rw
)
878 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
880 if (!fscrypt_dio_supported(inode
))
882 if (fsverity_active(inode
))
884 if (f2fs_compressed_file(inode
))
887 * only force direct read to use buffered IO, for direct write,
888 * it expects inline data conversion before committing IO.
890 if (f2fs_has_inline_data(inode
) && rw
== READ
)
893 /* disallow direct IO if any of devices has unaligned blksize */
894 if (f2fs_is_multi_device(sbi
) && !sbi
->aligned_blksize
)
897 * for blkzoned device, fallback direct IO to buffered IO, so
898 * all IOs can be serialized by log-structured write.
900 if (f2fs_sb_has_blkzoned(sbi
) && (rw
== WRITE
) &&
901 !f2fs_is_pinned_file(inode
))
903 if (is_sbi_flag_set(sbi
, SBI_CP_DISABLED
))
909 int f2fs_getattr(struct mnt_idmap
*idmap
, const struct path
*path
,
910 struct kstat
*stat
, u32 request_mask
, unsigned int query_flags
)
912 struct inode
*inode
= d_inode(path
->dentry
);
913 struct f2fs_inode_info
*fi
= F2FS_I(inode
);
914 struct f2fs_inode
*ri
= NULL
;
917 if (f2fs_has_extra_attr(inode
) &&
918 f2fs_sb_has_inode_crtime(F2FS_I_SB(inode
)) &&
919 F2FS_FITS_IN_INODE(ri
, fi
->i_extra_isize
, i_crtime
)) {
920 stat
->result_mask
|= STATX_BTIME
;
921 stat
->btime
.tv_sec
= fi
->i_crtime
.tv_sec
;
922 stat
->btime
.tv_nsec
= fi
->i_crtime
.tv_nsec
;
926 * Return the DIO alignment restrictions if requested. We only return
927 * this information when requested, since on encrypted files it might
928 * take a fair bit of work to get if the file wasn't opened recently.
930 * f2fs sometimes supports DIO reads but not DIO writes. STATX_DIOALIGN
931 * cannot represent that, so in that case we report no DIO support.
933 if ((request_mask
& STATX_DIOALIGN
) && S_ISREG(inode
->i_mode
)) {
934 unsigned int bsize
= i_blocksize(inode
);
936 stat
->result_mask
|= STATX_DIOALIGN
;
937 if (!f2fs_force_buffered_io(inode
, WRITE
)) {
938 stat
->dio_mem_align
= bsize
;
939 stat
->dio_offset_align
= bsize
;
944 if (flags
& F2FS_COMPR_FL
)
945 stat
->attributes
|= STATX_ATTR_COMPRESSED
;
946 if (flags
& F2FS_APPEND_FL
)
947 stat
->attributes
|= STATX_ATTR_APPEND
;
948 if (IS_ENCRYPTED(inode
))
949 stat
->attributes
|= STATX_ATTR_ENCRYPTED
;
950 if (flags
& F2FS_IMMUTABLE_FL
)
951 stat
->attributes
|= STATX_ATTR_IMMUTABLE
;
952 if (flags
& F2FS_NODUMP_FL
)
953 stat
->attributes
|= STATX_ATTR_NODUMP
;
954 if (IS_VERITY(inode
))
955 stat
->attributes
|= STATX_ATTR_VERITY
;
957 stat
->attributes_mask
|= (STATX_ATTR_COMPRESSED
|
959 STATX_ATTR_ENCRYPTED
|
960 STATX_ATTR_IMMUTABLE
|
964 generic_fillattr(idmap
, request_mask
, inode
, stat
);
966 /* we need to show initial sectors used for inline_data/dentries */
967 if ((S_ISREG(inode
->i_mode
) && f2fs_has_inline_data(inode
)) ||
968 f2fs_has_inline_dentry(inode
))
969 stat
->blocks
+= (stat
->size
+ 511) >> 9;
974 #ifdef CONFIG_F2FS_FS_POSIX_ACL
975 static void __setattr_copy(struct mnt_idmap
*idmap
,
976 struct inode
*inode
, const struct iattr
*attr
)
978 unsigned int ia_valid
= attr
->ia_valid
;
980 i_uid_update(idmap
, attr
, inode
);
981 i_gid_update(idmap
, attr
, inode
);
982 if (ia_valid
& ATTR_ATIME
)
983 inode_set_atime_to_ts(inode
, attr
->ia_atime
);
984 if (ia_valid
& ATTR_MTIME
)
985 inode_set_mtime_to_ts(inode
, attr
->ia_mtime
);
986 if (ia_valid
& ATTR_CTIME
)
987 inode_set_ctime_to_ts(inode
, attr
->ia_ctime
);
988 if (ia_valid
& ATTR_MODE
) {
989 umode_t mode
= attr
->ia_mode
;
991 if (!in_group_or_capable(idmap
, inode
, i_gid_into_vfsgid(idmap
, inode
)))
993 set_acl_inode(inode
, mode
);
997 #define __setattr_copy setattr_copy
1000 int f2fs_setattr(struct mnt_idmap
*idmap
, struct dentry
*dentry
,
1003 struct inode
*inode
= d_inode(dentry
);
1004 struct f2fs_inode_info
*fi
= F2FS_I(inode
);
1007 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode
))))
1010 if (unlikely(IS_IMMUTABLE(inode
)))
1013 if (unlikely(IS_APPEND(inode
) &&
1014 (attr
->ia_valid
& (ATTR_MODE
| ATTR_UID
|
1015 ATTR_GID
| ATTR_TIMES_SET
))))
1018 if ((attr
->ia_valid
& ATTR_SIZE
)) {
1019 if (!f2fs_is_compress_backend_ready(inode
) ||
1020 IS_DEVICE_ALIASING(inode
))
1022 if (is_inode_flag_set(inode
, FI_COMPRESS_RELEASED
) &&
1023 !IS_ALIGNED(attr
->ia_size
,
1024 F2FS_BLK_TO_BYTES(fi
->i_cluster_size
)))
1028 err
= setattr_prepare(idmap
, dentry
, attr
);
1032 err
= fscrypt_prepare_setattr(dentry
, attr
);
1036 err
= fsverity_prepare_setattr(dentry
, attr
);
1040 if (is_quota_modification(idmap
, inode
, attr
)) {
1041 err
= f2fs_dquot_initialize(inode
);
1045 if (i_uid_needs_update(idmap
, attr
, inode
) ||
1046 i_gid_needs_update(idmap
, attr
, inode
)) {
1047 f2fs_lock_op(F2FS_I_SB(inode
));
1048 err
= dquot_transfer(idmap
, inode
, attr
);
1050 set_sbi_flag(F2FS_I_SB(inode
),
1051 SBI_QUOTA_NEED_REPAIR
);
1052 f2fs_unlock_op(F2FS_I_SB(inode
));
1056 * update uid/gid under lock_op(), so that dquot and inode can
1057 * be updated atomically.
1059 i_uid_update(idmap
, attr
, inode
);
1060 i_gid_update(idmap
, attr
, inode
);
1061 f2fs_mark_inode_dirty_sync(inode
, true);
1062 f2fs_unlock_op(F2FS_I_SB(inode
));
1065 if (attr
->ia_valid
& ATTR_SIZE
) {
1066 loff_t old_size
= i_size_read(inode
);
1068 if (attr
->ia_size
> MAX_INLINE_DATA(inode
)) {
1070 * should convert inline inode before i_size_write to
1071 * keep smaller than inline_data size with inline flag.
1073 err
= f2fs_convert_inline_inode(inode
);
1079 * wait for inflight dio, blocks should be removed after
1082 if (attr
->ia_size
< old_size
)
1083 inode_dio_wait(inode
);
1085 f2fs_down_write(&fi
->i_gc_rwsem
[WRITE
]);
1086 filemap_invalidate_lock(inode
->i_mapping
);
1088 truncate_setsize(inode
, attr
->ia_size
);
1090 if (attr
->ia_size
<= old_size
)
1091 err
= f2fs_truncate(inode
);
1093 * do not trim all blocks after i_size if target size is
1094 * larger than i_size.
1096 filemap_invalidate_unlock(inode
->i_mapping
);
1097 f2fs_up_write(&fi
->i_gc_rwsem
[WRITE
]);
1101 spin_lock(&fi
->i_size_lock
);
1102 inode_set_mtime_to_ts(inode
, inode_set_ctime_current(inode
));
1103 fi
->last_disk_size
= i_size_read(inode
);
1104 spin_unlock(&fi
->i_size_lock
);
1107 __setattr_copy(idmap
, inode
, attr
);
1109 if (attr
->ia_valid
& ATTR_MODE
) {
1110 err
= posix_acl_chmod(idmap
, dentry
, f2fs_get_inode_mode(inode
));
1112 if (is_inode_flag_set(inode
, FI_ACL_MODE
)) {
1114 inode
->i_mode
= fi
->i_acl_mode
;
1115 clear_inode_flag(inode
, FI_ACL_MODE
);
1119 /* file size may changed here */
1120 f2fs_mark_inode_dirty_sync(inode
, true);
1122 /* inode change will produce dirty node pages flushed by checkpoint */
1123 f2fs_balance_fs(F2FS_I_SB(inode
), true);
1128 const struct inode_operations f2fs_file_inode_operations
= {
1129 .getattr
= f2fs_getattr
,
1130 .setattr
= f2fs_setattr
,
1131 .get_inode_acl
= f2fs_get_acl
,
1132 .set_acl
= f2fs_set_acl
,
1133 .listxattr
= f2fs_listxattr
,
1134 .fiemap
= f2fs_fiemap
,
1135 .fileattr_get
= f2fs_fileattr_get
,
1136 .fileattr_set
= f2fs_fileattr_set
,
1139 static int fill_zero(struct inode
*inode
, pgoff_t index
,
1140 loff_t start
, loff_t len
)
1142 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
1148 f2fs_balance_fs(sbi
, true);
1151 page
= f2fs_get_new_data_page(inode
, NULL
, index
, false);
1152 f2fs_unlock_op(sbi
);
1155 return PTR_ERR(page
);
1157 f2fs_wait_on_page_writeback(page
, DATA
, true, true);
1158 zero_user(page
, start
, len
);
1159 set_page_dirty(page
);
1160 f2fs_put_page(page
, 1);
1164 int f2fs_truncate_hole(struct inode
*inode
, pgoff_t pg_start
, pgoff_t pg_end
)
1168 while (pg_start
< pg_end
) {
1169 struct dnode_of_data dn
;
1170 pgoff_t end_offset
, count
;
1172 set_new_dnode(&dn
, inode
, NULL
, NULL
, 0);
1173 err
= f2fs_get_dnode_of_data(&dn
, pg_start
, LOOKUP_NODE
);
1175 if (err
== -ENOENT
) {
1176 pg_start
= f2fs_get_next_page_offset(&dn
,
1183 end_offset
= ADDRS_PER_PAGE(dn
.node_page
, inode
);
1184 count
= min(end_offset
- dn
.ofs_in_node
, pg_end
- pg_start
);
1186 f2fs_bug_on(F2FS_I_SB(inode
), count
== 0 || count
> end_offset
);
1188 f2fs_truncate_data_blocks_range(&dn
, count
);
1189 f2fs_put_dnode(&dn
);
1196 static int f2fs_punch_hole(struct inode
*inode
, loff_t offset
, loff_t len
)
1198 pgoff_t pg_start
, pg_end
;
1199 loff_t off_start
, off_end
;
1202 ret
= f2fs_convert_inline_inode(inode
);
1206 pg_start
= ((unsigned long long) offset
) >> PAGE_SHIFT
;
1207 pg_end
= ((unsigned long long) offset
+ len
) >> PAGE_SHIFT
;
1209 off_start
= offset
& (PAGE_SIZE
- 1);
1210 off_end
= (offset
+ len
) & (PAGE_SIZE
- 1);
1212 if (pg_start
== pg_end
) {
1213 ret
= fill_zero(inode
, pg_start
, off_start
,
1214 off_end
- off_start
);
1219 ret
= fill_zero(inode
, pg_start
++, off_start
,
1220 PAGE_SIZE
- off_start
);
1225 ret
= fill_zero(inode
, pg_end
, 0, off_end
);
1230 if (pg_start
< pg_end
) {
1231 loff_t blk_start
, blk_end
;
1232 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
1234 f2fs_balance_fs(sbi
, true);
1236 blk_start
= (loff_t
)pg_start
<< PAGE_SHIFT
;
1237 blk_end
= (loff_t
)pg_end
<< PAGE_SHIFT
;
1239 f2fs_down_write(&F2FS_I(inode
)->i_gc_rwsem
[WRITE
]);
1240 filemap_invalidate_lock(inode
->i_mapping
);
1242 truncate_pagecache_range(inode
, blk_start
, blk_end
- 1);
1245 ret
= f2fs_truncate_hole(inode
, pg_start
, pg_end
);
1246 f2fs_unlock_op(sbi
);
1248 filemap_invalidate_unlock(inode
->i_mapping
);
1249 f2fs_up_write(&F2FS_I(inode
)->i_gc_rwsem
[WRITE
]);
1256 static int __read_out_blkaddrs(struct inode
*inode
, block_t
*blkaddr
,
1257 int *do_replace
, pgoff_t off
, pgoff_t len
)
1259 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
1260 struct dnode_of_data dn
;
1264 set_new_dnode(&dn
, inode
, NULL
, NULL
, 0);
1265 ret
= f2fs_get_dnode_of_data(&dn
, off
, LOOKUP_NODE_RA
);
1266 if (ret
&& ret
!= -ENOENT
) {
1268 } else if (ret
== -ENOENT
) {
1269 if (dn
.max_level
== 0)
1271 done
= min((pgoff_t
)ADDRS_PER_BLOCK(inode
) -
1272 dn
.ofs_in_node
, len
);
1278 done
= min((pgoff_t
)ADDRS_PER_PAGE(dn
.node_page
, inode
) -
1279 dn
.ofs_in_node
, len
);
1280 for (i
= 0; i
< done
; i
++, blkaddr
++, do_replace
++, dn
.ofs_in_node
++) {
1281 *blkaddr
= f2fs_data_blkaddr(&dn
);
1283 if (__is_valid_data_blkaddr(*blkaddr
) &&
1284 !f2fs_is_valid_blkaddr(sbi
, *blkaddr
,
1285 DATA_GENERIC_ENHANCE
)) {
1286 f2fs_put_dnode(&dn
);
1287 return -EFSCORRUPTED
;
1290 if (!f2fs_is_checkpointed_data(sbi
, *blkaddr
)) {
1292 if (f2fs_lfs_mode(sbi
)) {
1293 f2fs_put_dnode(&dn
);
1297 /* do not invalidate this block address */
1298 f2fs_update_data_blkaddr(&dn
, NULL_ADDR
);
1302 f2fs_put_dnode(&dn
);
1311 static int __roll_back_blkaddrs(struct inode
*inode
, block_t
*blkaddr
,
1312 int *do_replace
, pgoff_t off
, int len
)
1314 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
1315 struct dnode_of_data dn
;
1318 for (i
= 0; i
< len
; i
++, do_replace
++, blkaddr
++) {
1319 if (*do_replace
== 0)
1322 set_new_dnode(&dn
, inode
, NULL
, NULL
, 0);
1323 ret
= f2fs_get_dnode_of_data(&dn
, off
+ i
, LOOKUP_NODE_RA
);
1325 dec_valid_block_count(sbi
, inode
, 1);
1326 f2fs_invalidate_blocks(sbi
, *blkaddr
);
1328 f2fs_update_data_blkaddr(&dn
, *blkaddr
);
1330 f2fs_put_dnode(&dn
);
1335 static int __clone_blkaddrs(struct inode
*src_inode
, struct inode
*dst_inode
,
1336 block_t
*blkaddr
, int *do_replace
,
1337 pgoff_t src
, pgoff_t dst
, pgoff_t len
, bool full
)
1339 struct f2fs_sb_info
*sbi
= F2FS_I_SB(src_inode
);
1344 if (blkaddr
[i
] == NULL_ADDR
&& !full
) {
1349 if (do_replace
[i
] || blkaddr
[i
] == NULL_ADDR
) {
1350 struct dnode_of_data dn
;
1351 struct node_info ni
;
1355 set_new_dnode(&dn
, dst_inode
, NULL
, NULL
, 0);
1356 ret
= f2fs_get_dnode_of_data(&dn
, dst
+ i
, ALLOC_NODE
);
1360 ret
= f2fs_get_node_info(sbi
, dn
.nid
, &ni
, false);
1362 f2fs_put_dnode(&dn
);
1366 ilen
= min((pgoff_t
)
1367 ADDRS_PER_PAGE(dn
.node_page
, dst_inode
) -
1368 dn
.ofs_in_node
, len
- i
);
1370 dn
.data_blkaddr
= f2fs_data_blkaddr(&dn
);
1371 f2fs_truncate_data_blocks_range(&dn
, 1);
1373 if (do_replace
[i
]) {
1374 f2fs_i_blocks_write(src_inode
,
1376 f2fs_i_blocks_write(dst_inode
,
1378 f2fs_replace_block(sbi
, &dn
, dn
.data_blkaddr
,
1379 blkaddr
[i
], ni
.version
, true, false);
1385 new_size
= (loff_t
)(dst
+ i
) << PAGE_SHIFT
;
1386 if (dst_inode
->i_size
< new_size
)
1387 f2fs_i_size_write(dst_inode
, new_size
);
1388 } while (--ilen
&& (do_replace
[i
] || blkaddr
[i
] == NULL_ADDR
));
1390 f2fs_put_dnode(&dn
);
1392 struct page
*psrc
, *pdst
;
1394 psrc
= f2fs_get_lock_data_page(src_inode
,
1397 return PTR_ERR(psrc
);
1398 pdst
= f2fs_get_new_data_page(dst_inode
, NULL
, dst
+ i
,
1401 f2fs_put_page(psrc
, 1);
1402 return PTR_ERR(pdst
);
1405 f2fs_wait_on_page_writeback(pdst
, DATA
, true, true);
1407 memcpy_page(pdst
, 0, psrc
, 0, PAGE_SIZE
);
1408 set_page_dirty(pdst
);
1409 set_page_private_gcing(pdst
);
1410 f2fs_put_page(pdst
, 1);
1411 f2fs_put_page(psrc
, 1);
1413 ret
= f2fs_truncate_hole(src_inode
,
1414 src
+ i
, src
+ i
+ 1);
1423 static int __exchange_data_block(struct inode
*src_inode
,
1424 struct inode
*dst_inode
, pgoff_t src
, pgoff_t dst
,
1425 pgoff_t len
, bool full
)
1427 block_t
*src_blkaddr
;
1433 olen
= min((pgoff_t
)4 * ADDRS_PER_BLOCK(src_inode
), len
);
1435 src_blkaddr
= f2fs_kvzalloc(F2FS_I_SB(src_inode
),
1436 array_size(olen
, sizeof(block_t
)),
1441 do_replace
= f2fs_kvzalloc(F2FS_I_SB(src_inode
),
1442 array_size(olen
, sizeof(int)),
1445 kvfree(src_blkaddr
);
1449 ret
= __read_out_blkaddrs(src_inode
, src_blkaddr
,
1450 do_replace
, src
, olen
);
1454 ret
= __clone_blkaddrs(src_inode
, dst_inode
, src_blkaddr
,
1455 do_replace
, src
, dst
, olen
, full
);
1463 kvfree(src_blkaddr
);
1469 __roll_back_blkaddrs(src_inode
, src_blkaddr
, do_replace
, src
, olen
);
1470 kvfree(src_blkaddr
);
1475 static int f2fs_do_collapse(struct inode
*inode
, loff_t offset
, loff_t len
)
1477 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
1478 pgoff_t nrpages
= DIV_ROUND_UP(i_size_read(inode
), PAGE_SIZE
);
1479 pgoff_t start
= offset
>> PAGE_SHIFT
;
1480 pgoff_t end
= (offset
+ len
) >> PAGE_SHIFT
;
1483 f2fs_balance_fs(sbi
, true);
1485 /* avoid gc operation during block exchange */
1486 f2fs_down_write(&F2FS_I(inode
)->i_gc_rwsem
[WRITE
]);
1487 filemap_invalidate_lock(inode
->i_mapping
);
1490 f2fs_drop_extent_tree(inode
);
1491 truncate_pagecache(inode
, offset
);
1492 ret
= __exchange_data_block(inode
, inode
, end
, start
, nrpages
- end
, true);
1493 f2fs_unlock_op(sbi
);
1495 filemap_invalidate_unlock(inode
->i_mapping
);
1496 f2fs_up_write(&F2FS_I(inode
)->i_gc_rwsem
[WRITE
]);
1500 static int f2fs_collapse_range(struct inode
*inode
, loff_t offset
, loff_t len
)
1505 if (offset
+ len
>= i_size_read(inode
))
1508 /* collapse range should be aligned to block size of f2fs. */
1509 if (offset
& (F2FS_BLKSIZE
- 1) || len
& (F2FS_BLKSIZE
- 1))
1512 ret
= f2fs_convert_inline_inode(inode
);
1516 /* write out all dirty pages from offset */
1517 ret
= filemap_write_and_wait_range(inode
->i_mapping
, offset
, LLONG_MAX
);
1521 ret
= f2fs_do_collapse(inode
, offset
, len
);
1525 /* write out all moved pages, if possible */
1526 filemap_invalidate_lock(inode
->i_mapping
);
1527 filemap_write_and_wait_range(inode
->i_mapping
, offset
, LLONG_MAX
);
1528 truncate_pagecache(inode
, offset
);
1530 new_size
= i_size_read(inode
) - len
;
1531 ret
= f2fs_truncate_blocks(inode
, new_size
, true);
1532 filemap_invalidate_unlock(inode
->i_mapping
);
1534 f2fs_i_size_write(inode
, new_size
);
1538 static int f2fs_do_zero_range(struct dnode_of_data
*dn
, pgoff_t start
,
1541 struct f2fs_sb_info
*sbi
= F2FS_I_SB(dn
->inode
);
1542 pgoff_t index
= start
;
1543 unsigned int ofs_in_node
= dn
->ofs_in_node
;
1547 for (; index
< end
; index
++, dn
->ofs_in_node
++) {
1548 if (f2fs_data_blkaddr(dn
) == NULL_ADDR
)
1552 dn
->ofs_in_node
= ofs_in_node
;
1553 ret
= f2fs_reserve_new_blocks(dn
, count
);
1557 dn
->ofs_in_node
= ofs_in_node
;
1558 for (index
= start
; index
< end
; index
++, dn
->ofs_in_node
++) {
1559 dn
->data_blkaddr
= f2fs_data_blkaddr(dn
);
1561 * f2fs_reserve_new_blocks will not guarantee entire block
1564 if (dn
->data_blkaddr
== NULL_ADDR
) {
1569 if (dn
->data_blkaddr
== NEW_ADDR
)
1572 if (!f2fs_is_valid_blkaddr(sbi
, dn
->data_blkaddr
,
1573 DATA_GENERIC_ENHANCE
)) {
1574 ret
= -EFSCORRUPTED
;
1578 f2fs_invalidate_blocks(sbi
, dn
->data_blkaddr
);
1579 f2fs_set_data_blkaddr(dn
, NEW_ADDR
);
1582 f2fs_update_read_extent_cache_range(dn
, start
, 0, index
- start
);
1583 f2fs_update_age_extent_cache_range(dn
, start
, index
- start
);
1588 static int f2fs_zero_range(struct inode
*inode
, loff_t offset
, loff_t len
,
1591 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
1592 struct address_space
*mapping
= inode
->i_mapping
;
1593 pgoff_t index
, pg_start
, pg_end
;
1594 loff_t new_size
= i_size_read(inode
);
1595 loff_t off_start
, off_end
;
1598 ret
= inode_newsize_ok(inode
, (len
+ offset
));
1602 ret
= f2fs_convert_inline_inode(inode
);
1606 ret
= filemap_write_and_wait_range(mapping
, offset
, offset
+ len
- 1);
1610 pg_start
= ((unsigned long long) offset
) >> PAGE_SHIFT
;
1611 pg_end
= ((unsigned long long) offset
+ len
) >> PAGE_SHIFT
;
1613 off_start
= offset
& (PAGE_SIZE
- 1);
1614 off_end
= (offset
+ len
) & (PAGE_SIZE
- 1);
1616 if (pg_start
== pg_end
) {
1617 ret
= fill_zero(inode
, pg_start
, off_start
,
1618 off_end
- off_start
);
1622 new_size
= max_t(loff_t
, new_size
, offset
+ len
);
1625 ret
= fill_zero(inode
, pg_start
++, off_start
,
1626 PAGE_SIZE
- off_start
);
1630 new_size
= max_t(loff_t
, new_size
,
1631 (loff_t
)pg_start
<< PAGE_SHIFT
);
1634 for (index
= pg_start
; index
< pg_end
;) {
1635 struct dnode_of_data dn
;
1636 unsigned int end_offset
;
1639 f2fs_down_write(&F2FS_I(inode
)->i_gc_rwsem
[WRITE
]);
1640 filemap_invalidate_lock(mapping
);
1642 truncate_pagecache_range(inode
,
1643 (loff_t
)index
<< PAGE_SHIFT
,
1644 ((loff_t
)pg_end
<< PAGE_SHIFT
) - 1);
1648 set_new_dnode(&dn
, inode
, NULL
, NULL
, 0);
1649 ret
= f2fs_get_dnode_of_data(&dn
, index
, ALLOC_NODE
);
1651 f2fs_unlock_op(sbi
);
1652 filemap_invalidate_unlock(mapping
);
1653 f2fs_up_write(&F2FS_I(inode
)->i_gc_rwsem
[WRITE
]);
1657 end_offset
= ADDRS_PER_PAGE(dn
.node_page
, inode
);
1658 end
= min(pg_end
, end_offset
- dn
.ofs_in_node
+ index
);
1660 ret
= f2fs_do_zero_range(&dn
, index
, end
);
1661 f2fs_put_dnode(&dn
);
1663 f2fs_unlock_op(sbi
);
1664 filemap_invalidate_unlock(mapping
);
1665 f2fs_up_write(&F2FS_I(inode
)->i_gc_rwsem
[WRITE
]);
1667 f2fs_balance_fs(sbi
, dn
.node_changed
);
1673 new_size
= max_t(loff_t
, new_size
,
1674 (loff_t
)index
<< PAGE_SHIFT
);
1678 ret
= fill_zero(inode
, pg_end
, 0, off_end
);
1682 new_size
= max_t(loff_t
, new_size
, offset
+ len
);
1687 if (new_size
> i_size_read(inode
)) {
1688 if (mode
& FALLOC_FL_KEEP_SIZE
)
1689 file_set_keep_isize(inode
);
1691 f2fs_i_size_write(inode
, new_size
);
1696 static int f2fs_insert_range(struct inode
*inode
, loff_t offset
, loff_t len
)
1698 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
1699 struct address_space
*mapping
= inode
->i_mapping
;
1700 pgoff_t nr
, pg_start
, pg_end
, delta
, idx
;
1704 new_size
= i_size_read(inode
) + len
;
1705 ret
= inode_newsize_ok(inode
, new_size
);
1709 if (offset
>= i_size_read(inode
))
1712 /* insert range should be aligned to block size of f2fs. */
1713 if (offset
& (F2FS_BLKSIZE
- 1) || len
& (F2FS_BLKSIZE
- 1))
1716 ret
= f2fs_convert_inline_inode(inode
);
1720 f2fs_balance_fs(sbi
, true);
1722 filemap_invalidate_lock(mapping
);
1723 ret
= f2fs_truncate_blocks(inode
, i_size_read(inode
), true);
1724 filemap_invalidate_unlock(mapping
);
1728 /* write out all dirty pages from offset */
1729 ret
= filemap_write_and_wait_range(mapping
, offset
, LLONG_MAX
);
1733 pg_start
= offset
>> PAGE_SHIFT
;
1734 pg_end
= (offset
+ len
) >> PAGE_SHIFT
;
1735 delta
= pg_end
- pg_start
;
1736 idx
= DIV_ROUND_UP(i_size_read(inode
), PAGE_SIZE
);
1738 /* avoid gc operation during block exchange */
1739 f2fs_down_write(&F2FS_I(inode
)->i_gc_rwsem
[WRITE
]);
1740 filemap_invalidate_lock(mapping
);
1741 truncate_pagecache(inode
, offset
);
1743 while (!ret
&& idx
> pg_start
) {
1744 nr
= idx
- pg_start
;
1750 f2fs_drop_extent_tree(inode
);
1752 ret
= __exchange_data_block(inode
, inode
, idx
,
1753 idx
+ delta
, nr
, false);
1754 f2fs_unlock_op(sbi
);
1756 filemap_invalidate_unlock(mapping
);
1757 f2fs_up_write(&F2FS_I(inode
)->i_gc_rwsem
[WRITE
]);
1761 /* write out all moved pages, if possible */
1762 filemap_invalidate_lock(mapping
);
1763 ret
= filemap_write_and_wait_range(mapping
, offset
, LLONG_MAX
);
1764 truncate_pagecache(inode
, offset
);
1765 filemap_invalidate_unlock(mapping
);
1768 f2fs_i_size_write(inode
, new_size
);
1772 static int f2fs_expand_inode_data(struct inode
*inode
, loff_t offset
,
1773 loff_t len
, int mode
)
1775 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
1776 struct f2fs_map_blocks map
= { .m_next_pgofs
= NULL
,
1777 .m_next_extent
= NULL
, .m_seg_type
= NO_CHECK_TYPE
,
1778 .m_may_create
= true };
1779 struct f2fs_gc_control gc_control
= { .victim_segno
= NULL_SEGNO
,
1780 .init_gc_type
= FG_GC
,
1781 .should_migrate_blocks
= false,
1782 .err_gc_skipped
= true,
1783 .nr_free_secs
= 0 };
1784 pgoff_t pg_start
, pg_end
;
1787 block_t expanded
= 0;
1790 err
= inode_newsize_ok(inode
, (len
+ offset
));
1794 err
= f2fs_convert_inline_inode(inode
);
1798 f2fs_balance_fs(sbi
, true);
1800 pg_start
= ((unsigned long long)offset
) >> PAGE_SHIFT
;
1801 pg_end
= ((unsigned long long)offset
+ len
) >> PAGE_SHIFT
;
1802 off_end
= (offset
+ len
) & (PAGE_SIZE
- 1);
1804 map
.m_lblk
= pg_start
;
1805 map
.m_len
= pg_end
- pg_start
;
1812 if (f2fs_is_pinned_file(inode
)) {
1813 block_t sec_blks
= CAP_BLKS_PER_SEC(sbi
);
1814 block_t sec_len
= roundup(map
.m_len
, sec_blks
);
1816 map
.m_len
= sec_blks
;
1818 if (has_not_enough_free_secs(sbi
, 0, f2fs_sb_has_blkzoned(sbi
) ?
1819 ZONED_PIN_SEC_REQUIRED_COUNT
:
1820 GET_SEC_FROM_SEG(sbi
, overprovision_segments(sbi
)))) {
1821 f2fs_down_write(&sbi
->gc_lock
);
1822 stat_inc_gc_call_count(sbi
, FOREGROUND
);
1823 err
= f2fs_gc(sbi
, &gc_control
);
1824 if (err
&& err
!= -ENODATA
)
1828 f2fs_down_write(&sbi
->pin_sem
);
1830 err
= f2fs_allocate_pinning_section(sbi
);
1832 f2fs_up_write(&sbi
->pin_sem
);
1836 map
.m_seg_type
= CURSEG_COLD_DATA_PINNED
;
1837 err
= f2fs_map_blocks(inode
, &map
, F2FS_GET_BLOCK_PRE_DIO
);
1838 file_dont_truncate(inode
);
1840 f2fs_up_write(&sbi
->pin_sem
);
1842 expanded
+= map
.m_len
;
1843 sec_len
-= map
.m_len
;
1844 map
.m_lblk
+= map
.m_len
;
1845 if (!err
&& sec_len
)
1848 map
.m_len
= expanded
;
1850 err
= f2fs_map_blocks(inode
, &map
, F2FS_GET_BLOCK_PRE_AIO
);
1851 expanded
= map
.m_len
;
1860 last_off
= pg_start
+ expanded
- 1;
1862 /* update new size to the failed position */
1863 new_size
= (last_off
== pg_end
) ? offset
+ len
:
1864 (loff_t
)(last_off
+ 1) << PAGE_SHIFT
;
1866 new_size
= ((loff_t
)pg_end
<< PAGE_SHIFT
) + off_end
;
1869 if (new_size
> i_size_read(inode
)) {
1870 if (mode
& FALLOC_FL_KEEP_SIZE
)
1871 file_set_keep_isize(inode
);
1873 f2fs_i_size_write(inode
, new_size
);
1879 static long f2fs_fallocate(struct file
*file
, int mode
,
1880 loff_t offset
, loff_t len
)
1882 struct inode
*inode
= file_inode(file
);
1885 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode
))))
1887 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode
)))
1889 if (!f2fs_is_compress_backend_ready(inode
) || IS_DEVICE_ALIASING(inode
))
1892 /* f2fs only support ->fallocate for regular file */
1893 if (!S_ISREG(inode
->i_mode
))
1896 if (IS_ENCRYPTED(inode
) &&
1897 (mode
& (FALLOC_FL_COLLAPSE_RANGE
| FALLOC_FL_INSERT_RANGE
)))
1900 if (mode
& ~(FALLOC_FL_KEEP_SIZE
| FALLOC_FL_PUNCH_HOLE
|
1901 FALLOC_FL_COLLAPSE_RANGE
| FALLOC_FL_ZERO_RANGE
|
1902 FALLOC_FL_INSERT_RANGE
))
1908 * Pinned file should not support partial truncation since the block
1909 * can be used by applications.
1911 if ((f2fs_compressed_file(inode
) || f2fs_is_pinned_file(inode
)) &&
1912 (mode
& (FALLOC_FL_PUNCH_HOLE
| FALLOC_FL_COLLAPSE_RANGE
|
1913 FALLOC_FL_ZERO_RANGE
| FALLOC_FL_INSERT_RANGE
))) {
1918 ret
= file_modified(file
);
1923 * wait for inflight dio, blocks should be removed after IO
1926 inode_dio_wait(inode
);
1928 if (mode
& FALLOC_FL_PUNCH_HOLE
) {
1929 if (offset
>= inode
->i_size
)
1932 ret
= f2fs_punch_hole(inode
, offset
, len
);
1933 } else if (mode
& FALLOC_FL_COLLAPSE_RANGE
) {
1934 ret
= f2fs_collapse_range(inode
, offset
, len
);
1935 } else if (mode
& FALLOC_FL_ZERO_RANGE
) {
1936 ret
= f2fs_zero_range(inode
, offset
, len
, mode
);
1937 } else if (mode
& FALLOC_FL_INSERT_RANGE
) {
1938 ret
= f2fs_insert_range(inode
, offset
, len
);
1940 ret
= f2fs_expand_inode_data(inode
, offset
, len
, mode
);
1944 inode_set_mtime_to_ts(inode
, inode_set_ctime_current(inode
));
1945 f2fs_mark_inode_dirty_sync(inode
, false);
1946 f2fs_update_time(F2FS_I_SB(inode
), REQ_TIME
);
1950 inode_unlock(inode
);
1952 trace_f2fs_fallocate(inode
, mode
, offset
, len
, ret
);
1956 static int f2fs_release_file(struct inode
*inode
, struct file
*filp
)
1959 * f2fs_release_file is called at every close calls. So we should
1960 * not drop any inmemory pages by close called by other process.
1962 if (!(filp
->f_mode
& FMODE_WRITE
) ||
1963 atomic_read(&inode
->i_writecount
) != 1)
1967 f2fs_abort_atomic_write(inode
, true);
1968 inode_unlock(inode
);
1973 static int f2fs_file_flush(struct file
*file
, fl_owner_t id
)
1975 struct inode
*inode
= file_inode(file
);
1978 * If the process doing a transaction is crashed, we should do
1979 * roll-back. Otherwise, other reader/write can see corrupted database
1980 * until all the writers close its file. Since this should be done
1981 * before dropping file lock, it needs to do in ->flush.
1983 if (F2FS_I(inode
)->atomic_write_task
== current
&&
1984 (current
->flags
& PF_EXITING
)) {
1986 f2fs_abort_atomic_write(inode
, true);
1987 inode_unlock(inode
);
1993 static int f2fs_setflags_common(struct inode
*inode
, u32 iflags
, u32 mask
)
1995 struct f2fs_inode_info
*fi
= F2FS_I(inode
);
1996 u32 masked_flags
= fi
->i_flags
& mask
;
1998 /* mask can be shrunk by flags_valid selector */
2001 /* Is it quota file? Do not allow user to mess with it */
2002 if (IS_NOQUOTA(inode
))
2005 if ((iflags
^ masked_flags
) & F2FS_CASEFOLD_FL
) {
2006 if (!f2fs_sb_has_casefold(F2FS_I_SB(inode
)))
2008 if (!f2fs_empty_dir(inode
))
2012 if (iflags
& (F2FS_COMPR_FL
| F2FS_NOCOMP_FL
)) {
2013 if (!f2fs_sb_has_compression(F2FS_I_SB(inode
)))
2015 if ((iflags
& F2FS_COMPR_FL
) && (iflags
& F2FS_NOCOMP_FL
))
2019 if ((iflags
^ masked_flags
) & F2FS_COMPR_FL
) {
2020 if (masked_flags
& F2FS_COMPR_FL
) {
2021 if (!f2fs_disable_compressed_file(inode
))
2024 /* try to convert inline_data to support compression */
2025 int err
= f2fs_convert_inline_inode(inode
);
2029 f2fs_down_write(&fi
->i_sem
);
2030 if (!f2fs_may_compress(inode
) ||
2031 (S_ISREG(inode
->i_mode
) &&
2032 F2FS_HAS_BLOCKS(inode
))) {
2033 f2fs_up_write(&fi
->i_sem
);
2036 err
= set_compress_context(inode
);
2037 f2fs_up_write(&fi
->i_sem
);
2044 fi
->i_flags
= iflags
| (fi
->i_flags
& ~mask
);
2045 f2fs_bug_on(F2FS_I_SB(inode
), (fi
->i_flags
& F2FS_COMPR_FL
) &&
2046 (fi
->i_flags
& F2FS_NOCOMP_FL
));
2048 if (fi
->i_flags
& F2FS_PROJINHERIT_FL
)
2049 set_inode_flag(inode
, FI_PROJ_INHERIT
);
2051 clear_inode_flag(inode
, FI_PROJ_INHERIT
);
2053 inode_set_ctime_current(inode
);
2054 f2fs_set_inode_flags(inode
);
2055 f2fs_mark_inode_dirty_sync(inode
, true);
2059 /* FS_IOC_[GS]ETFLAGS and FS_IOC_FS[GS]ETXATTR support */
2062 * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry
2063 * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to
2064 * F2FS_GETTABLE_FS_FL. To also make it settable via FS_IOC_SETFLAGS, also add
2065 * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL.
2067 * Translating flags to fsx_flags value used by FS_IOC_FSGETXATTR and
2068 * FS_IOC_FSSETXATTR is done by the VFS.
2071 static const struct {
2074 } f2fs_fsflags_map
[] = {
2075 { F2FS_COMPR_FL
, FS_COMPR_FL
},
2076 { F2FS_SYNC_FL
, FS_SYNC_FL
},
2077 { F2FS_IMMUTABLE_FL
, FS_IMMUTABLE_FL
},
2078 { F2FS_APPEND_FL
, FS_APPEND_FL
},
2079 { F2FS_NODUMP_FL
, FS_NODUMP_FL
},
2080 { F2FS_NOATIME_FL
, FS_NOATIME_FL
},
2081 { F2FS_NOCOMP_FL
, FS_NOCOMP_FL
},
2082 { F2FS_INDEX_FL
, FS_INDEX_FL
},
2083 { F2FS_DIRSYNC_FL
, FS_DIRSYNC_FL
},
2084 { F2FS_PROJINHERIT_FL
, FS_PROJINHERIT_FL
},
2085 { F2FS_CASEFOLD_FL
, FS_CASEFOLD_FL
},
2088 #define F2FS_GETTABLE_FS_FL ( \
2098 FS_PROJINHERIT_FL | \
2100 FS_INLINE_DATA_FL | \
2105 #define F2FS_SETTABLE_FS_FL ( \
2114 FS_PROJINHERIT_FL | \
2117 /* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */
2118 static inline u32
f2fs_iflags_to_fsflags(u32 iflags
)
2123 for (i
= 0; i
< ARRAY_SIZE(f2fs_fsflags_map
); i
++)
2124 if (iflags
& f2fs_fsflags_map
[i
].iflag
)
2125 fsflags
|= f2fs_fsflags_map
[i
].fsflag
;
2130 /* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */
2131 static inline u32
f2fs_fsflags_to_iflags(u32 fsflags
)
2136 for (i
= 0; i
< ARRAY_SIZE(f2fs_fsflags_map
); i
++)
2137 if (fsflags
& f2fs_fsflags_map
[i
].fsflag
)
2138 iflags
|= f2fs_fsflags_map
[i
].iflag
;
2143 static int f2fs_ioc_getversion(struct file
*filp
, unsigned long arg
)
2145 struct inode
*inode
= file_inode(filp
);
2147 return put_user(inode
->i_generation
, (int __user
*)arg
);
2150 static int f2fs_ioc_start_atomic_write(struct file
*filp
, bool truncate
)
2152 struct inode
*inode
= file_inode(filp
);
2153 struct mnt_idmap
*idmap
= file_mnt_idmap(filp
);
2154 struct f2fs_inode_info
*fi
= F2FS_I(inode
);
2155 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
2159 if (!(filp
->f_mode
& FMODE_WRITE
))
2162 if (!inode_owner_or_capable(idmap
, inode
))
2165 if (!S_ISREG(inode
->i_mode
))
2168 if (filp
->f_flags
& O_DIRECT
)
2171 ret
= mnt_want_write_file(filp
);
2177 if (!f2fs_disable_compressed_file(inode
) ||
2178 f2fs_is_pinned_file(inode
)) {
2183 if (f2fs_is_atomic_file(inode
))
2186 ret
= f2fs_convert_inline_inode(inode
);
2190 f2fs_down_write(&fi
->i_gc_rwsem
[WRITE
]);
2191 f2fs_down_write(&fi
->i_gc_rwsem
[READ
]);
2194 * Should wait end_io to count F2FS_WB_CP_DATA correctly by
2195 * f2fs_is_atomic_file.
2197 if (get_dirty_pages(inode
))
2198 f2fs_warn(sbi
, "Unexpected flush for atomic writes: ino=%lu, npages=%u",
2199 inode
->i_ino
, get_dirty_pages(inode
));
2200 ret
= filemap_write_and_wait_range(inode
->i_mapping
, 0, LLONG_MAX
);
2204 /* Check if the inode already has a COW inode */
2205 if (fi
->cow_inode
== NULL
) {
2206 /* Create a COW inode for atomic write */
2207 struct dentry
*dentry
= file_dentry(filp
);
2208 struct inode
*dir
= d_inode(dentry
->d_parent
);
2210 ret
= f2fs_get_tmpfile(idmap
, dir
, &fi
->cow_inode
);
2214 set_inode_flag(fi
->cow_inode
, FI_COW_FILE
);
2215 clear_inode_flag(fi
->cow_inode
, FI_INLINE_DATA
);
2217 /* Set the COW inode's atomic_inode to the atomic inode */
2218 F2FS_I(fi
->cow_inode
)->atomic_inode
= inode
;
2220 /* Reuse the already created COW inode */
2221 f2fs_bug_on(sbi
, get_dirty_pages(fi
->cow_inode
));
2223 invalidate_mapping_pages(fi
->cow_inode
->i_mapping
, 0, -1);
2225 ret
= f2fs_do_truncate_blocks(fi
->cow_inode
, 0, true);
2230 f2fs_write_inode(inode
, NULL
);
2232 stat_inc_atomic_inode(inode
);
2234 set_inode_flag(inode
, FI_ATOMIC_FILE
);
2236 isize
= i_size_read(inode
);
2237 fi
->original_i_size
= isize
;
2239 set_inode_flag(inode
, FI_ATOMIC_REPLACE
);
2240 truncate_inode_pages_final(inode
->i_mapping
);
2241 f2fs_i_size_write(inode
, 0);
2244 f2fs_i_size_write(fi
->cow_inode
, isize
);
2247 f2fs_up_write(&fi
->i_gc_rwsem
[READ
]);
2248 f2fs_up_write(&fi
->i_gc_rwsem
[WRITE
]);
2252 f2fs_update_time(sbi
, REQ_TIME
);
2253 fi
->atomic_write_task
= current
;
2254 stat_update_max_atomic_write(inode
);
2255 fi
->atomic_write_cnt
= 0;
2257 inode_unlock(inode
);
2258 mnt_drop_write_file(filp
);
2262 static int f2fs_ioc_commit_atomic_write(struct file
*filp
)
2264 struct inode
*inode
= file_inode(filp
);
2265 struct mnt_idmap
*idmap
= file_mnt_idmap(filp
);
2268 if (!(filp
->f_mode
& FMODE_WRITE
))
2271 if (!inode_owner_or_capable(idmap
, inode
))
2274 ret
= mnt_want_write_file(filp
);
2278 f2fs_balance_fs(F2FS_I_SB(inode
), true);
2282 if (f2fs_is_atomic_file(inode
)) {
2283 ret
= f2fs_commit_atomic_write(inode
);
2285 ret
= f2fs_do_sync_file(filp
, 0, LLONG_MAX
, 0, true);
2287 f2fs_abort_atomic_write(inode
, ret
);
2289 ret
= f2fs_do_sync_file(filp
, 0, LLONG_MAX
, 1, false);
2292 inode_unlock(inode
);
2293 mnt_drop_write_file(filp
);
2297 static int f2fs_ioc_abort_atomic_write(struct file
*filp
)
2299 struct inode
*inode
= file_inode(filp
);
2300 struct mnt_idmap
*idmap
= file_mnt_idmap(filp
);
2303 if (!(filp
->f_mode
& FMODE_WRITE
))
2306 if (!inode_owner_or_capable(idmap
, inode
))
2309 ret
= mnt_want_write_file(filp
);
2315 f2fs_abort_atomic_write(inode
, true);
2317 inode_unlock(inode
);
2319 mnt_drop_write_file(filp
);
2320 f2fs_update_time(F2FS_I_SB(inode
), REQ_TIME
);
2324 int f2fs_do_shutdown(struct f2fs_sb_info
*sbi
, unsigned int flag
,
2325 bool readonly
, bool need_lock
)
2327 struct super_block
*sb
= sbi
->sb
;
2331 case F2FS_GOING_DOWN_FULLSYNC
:
2332 ret
= bdev_freeze(sb
->s_bdev
);
2335 f2fs_stop_checkpoint(sbi
, false, STOP_CP_REASON_SHUTDOWN
);
2336 bdev_thaw(sb
->s_bdev
);
2338 case F2FS_GOING_DOWN_METASYNC
:
2339 /* do checkpoint only */
2340 ret
= f2fs_sync_fs(sb
, 1);
2346 f2fs_stop_checkpoint(sbi
, false, STOP_CP_REASON_SHUTDOWN
);
2348 case F2FS_GOING_DOWN_NOSYNC
:
2349 f2fs_stop_checkpoint(sbi
, false, STOP_CP_REASON_SHUTDOWN
);
2351 case F2FS_GOING_DOWN_METAFLUSH
:
2352 f2fs_sync_meta_pages(sbi
, META
, LONG_MAX
, FS_META_IO
);
2353 f2fs_stop_checkpoint(sbi
, false, STOP_CP_REASON_SHUTDOWN
);
2355 case F2FS_GOING_DOWN_NEED_FSCK
:
2356 set_sbi_flag(sbi
, SBI_NEED_FSCK
);
2357 set_sbi_flag(sbi
, SBI_CP_DISABLED_QUICK
);
2358 set_sbi_flag(sbi
, SBI_IS_DIRTY
);
2359 /* do checkpoint only */
2360 ret
= f2fs_sync_fs(sb
, 1);
2373 * grab sb->s_umount to avoid racing w/ remount() and other shutdown
2377 down_write(&sbi
->sb
->s_umount
);
2379 f2fs_stop_gc_thread(sbi
);
2380 f2fs_stop_discard_thread(sbi
);
2382 f2fs_drop_discard_cmd(sbi
);
2383 clear_opt(sbi
, DISCARD
);
2386 up_write(&sbi
->sb
->s_umount
);
2388 f2fs_update_time(sbi
, REQ_TIME
);
2391 trace_f2fs_shutdown(sbi
, flag
, ret
);
2396 static int f2fs_ioc_shutdown(struct file
*filp
, unsigned long arg
)
2398 struct inode
*inode
= file_inode(filp
);
2399 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
2402 bool need_drop
= false, readonly
= false;
2404 if (!capable(CAP_SYS_ADMIN
))
2407 if (get_user(in
, (__u32 __user
*)arg
))
2410 if (in
!= F2FS_GOING_DOWN_FULLSYNC
) {
2411 ret
= mnt_want_write_file(filp
);
2416 /* fallback to nosync shutdown for readonly fs */
2417 in
= F2FS_GOING_DOWN_NOSYNC
;
2424 ret
= f2fs_do_shutdown(sbi
, in
, readonly
, true);
2427 mnt_drop_write_file(filp
);
2432 static int f2fs_ioc_fitrim(struct file
*filp
, unsigned long arg
)
2434 struct inode
*inode
= file_inode(filp
);
2435 struct super_block
*sb
= inode
->i_sb
;
2436 struct fstrim_range range
;
2439 if (!capable(CAP_SYS_ADMIN
))
2442 if (!f2fs_hw_support_discard(F2FS_SB(sb
)))
2445 if (copy_from_user(&range
, (struct fstrim_range __user
*)arg
,
2449 ret
= mnt_want_write_file(filp
);
2453 range
.minlen
= max((unsigned int)range
.minlen
,
2454 bdev_discard_granularity(sb
->s_bdev
));
2455 ret
= f2fs_trim_fs(F2FS_SB(sb
), &range
);
2456 mnt_drop_write_file(filp
);
2460 if (copy_to_user((struct fstrim_range __user
*)arg
, &range
,
2463 f2fs_update_time(F2FS_I_SB(inode
), REQ_TIME
);
2467 static bool uuid_is_nonzero(__u8 u
[16])
2471 for (i
= 0; i
< 16; i
++)
2477 static int f2fs_ioc_set_encryption_policy(struct file
*filp
, unsigned long arg
)
2479 struct inode
*inode
= file_inode(filp
);
2482 if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode
)))
2485 ret
= fscrypt_ioctl_set_policy(filp
, (const void __user
*)arg
);
2486 f2fs_update_time(F2FS_I_SB(inode
), REQ_TIME
);
2490 static int f2fs_ioc_get_encryption_policy(struct file
*filp
, unsigned long arg
)
2492 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp
))))
2494 return fscrypt_ioctl_get_policy(filp
, (void __user
*)arg
);
2497 static int f2fs_ioc_get_encryption_pwsalt(struct file
*filp
, unsigned long arg
)
2499 struct inode
*inode
= file_inode(filp
);
2500 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
2501 u8 encrypt_pw_salt
[16];
2504 if (!f2fs_sb_has_encrypt(sbi
))
2507 err
= mnt_want_write_file(filp
);
2511 f2fs_down_write(&sbi
->sb_lock
);
2513 if (uuid_is_nonzero(sbi
->raw_super
->encrypt_pw_salt
))
2516 /* update superblock with uuid */
2517 generate_random_uuid(sbi
->raw_super
->encrypt_pw_salt
);
2519 err
= f2fs_commit_super(sbi
, false);
2522 memset(sbi
->raw_super
->encrypt_pw_salt
, 0, 16);
2526 memcpy(encrypt_pw_salt
, sbi
->raw_super
->encrypt_pw_salt
, 16);
2528 f2fs_up_write(&sbi
->sb_lock
);
2529 mnt_drop_write_file(filp
);
2531 if (!err
&& copy_to_user((__u8 __user
*)arg
, encrypt_pw_salt
, 16))
2537 static int f2fs_ioc_get_encryption_policy_ex(struct file
*filp
,
2540 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp
))))
2543 return fscrypt_ioctl_get_policy_ex(filp
, (void __user
*)arg
);
2546 static int f2fs_ioc_add_encryption_key(struct file
*filp
, unsigned long arg
)
2548 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp
))))
2551 return fscrypt_ioctl_add_key(filp
, (void __user
*)arg
);
2554 static int f2fs_ioc_remove_encryption_key(struct file
*filp
, unsigned long arg
)
2556 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp
))))
2559 return fscrypt_ioctl_remove_key(filp
, (void __user
*)arg
);
2562 static int f2fs_ioc_remove_encryption_key_all_users(struct file
*filp
,
2565 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp
))))
2568 return fscrypt_ioctl_remove_key_all_users(filp
, (void __user
*)arg
);
2571 static int f2fs_ioc_get_encryption_key_status(struct file
*filp
,
2574 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp
))))
2577 return fscrypt_ioctl_get_key_status(filp
, (void __user
*)arg
);
2580 static int f2fs_ioc_get_encryption_nonce(struct file
*filp
, unsigned long arg
)
2582 if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp
))))
2585 return fscrypt_ioctl_get_nonce(filp
, (void __user
*)arg
);
2588 static int f2fs_ioc_gc(struct file
*filp
, unsigned long arg
)
2590 struct inode
*inode
= file_inode(filp
);
2591 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
2592 struct f2fs_gc_control gc_control
= { .victim_segno
= NULL_SEGNO
,
2594 .should_migrate_blocks
= false,
2595 .nr_free_secs
= 0 };
2599 if (!capable(CAP_SYS_ADMIN
))
2602 if (get_user(sync
, (__u32 __user
*)arg
))
2605 if (f2fs_readonly(sbi
->sb
))
2608 ret
= mnt_want_write_file(filp
);
2613 if (!f2fs_down_write_trylock(&sbi
->gc_lock
)) {
2618 f2fs_down_write(&sbi
->gc_lock
);
2621 gc_control
.init_gc_type
= sync
? FG_GC
: BG_GC
;
2622 gc_control
.err_gc_skipped
= sync
;
2623 stat_inc_gc_call_count(sbi
, FOREGROUND
);
2624 ret
= f2fs_gc(sbi
, &gc_control
);
2626 mnt_drop_write_file(filp
);
2630 static int __f2fs_ioc_gc_range(struct file
*filp
, struct f2fs_gc_range
*range
)
2632 struct f2fs_sb_info
*sbi
= F2FS_I_SB(file_inode(filp
));
2633 struct f2fs_gc_control gc_control
= {
2634 .init_gc_type
= range
->sync
? FG_GC
: BG_GC
,
2636 .should_migrate_blocks
= false,
2637 .err_gc_skipped
= range
->sync
,
2638 .nr_free_secs
= 0 };
2642 if (!capable(CAP_SYS_ADMIN
))
2644 if (f2fs_readonly(sbi
->sb
))
2647 end
= range
->start
+ range
->len
;
2648 if (end
< range
->start
|| range
->start
< MAIN_BLKADDR(sbi
) ||
2649 end
>= MAX_BLKADDR(sbi
))
2652 ret
= mnt_want_write_file(filp
);
2658 if (!f2fs_down_write_trylock(&sbi
->gc_lock
)) {
2663 f2fs_down_write(&sbi
->gc_lock
);
2666 gc_control
.victim_segno
= GET_SEGNO(sbi
, range
->start
);
2667 stat_inc_gc_call_count(sbi
, FOREGROUND
);
2668 ret
= f2fs_gc(sbi
, &gc_control
);
2674 range
->start
+= CAP_BLKS_PER_SEC(sbi
);
2675 if (range
->start
<= end
)
2678 mnt_drop_write_file(filp
);
2682 static int f2fs_ioc_gc_range(struct file
*filp
, unsigned long arg
)
2684 struct f2fs_gc_range range
;
2686 if (copy_from_user(&range
, (struct f2fs_gc_range __user
*)arg
,
2689 return __f2fs_ioc_gc_range(filp
, &range
);
2692 static int f2fs_ioc_write_checkpoint(struct file
*filp
)
2694 struct inode
*inode
= file_inode(filp
);
2695 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
2698 if (!capable(CAP_SYS_ADMIN
))
2701 if (f2fs_readonly(sbi
->sb
))
2704 if (unlikely(is_sbi_flag_set(sbi
, SBI_CP_DISABLED
))) {
2705 f2fs_info(sbi
, "Skipping Checkpoint. Checkpoints currently disabled.");
2709 ret
= mnt_want_write_file(filp
);
2713 ret
= f2fs_sync_fs(sbi
->sb
, 1);
2715 mnt_drop_write_file(filp
);
2719 static int f2fs_defragment_range(struct f2fs_sb_info
*sbi
,
2721 struct f2fs_defragment
*range
)
2723 struct inode
*inode
= file_inode(filp
);
2724 struct f2fs_map_blocks map
= { .m_next_extent
= NULL
,
2725 .m_seg_type
= NO_CHECK_TYPE
,
2726 .m_may_create
= false };
2727 struct extent_info ei
= {};
2728 pgoff_t pg_start
, pg_end
, next_pgofs
;
2729 unsigned int total
= 0, sec_num
;
2730 block_t blk_end
= 0;
2731 bool fragmented
= false;
2734 f2fs_balance_fs(sbi
, true);
2737 pg_start
= range
->start
>> PAGE_SHIFT
;
2738 pg_end
= min_t(pgoff_t
,
2739 (range
->start
+ range
->len
) >> PAGE_SHIFT
,
2740 DIV_ROUND_UP(i_size_read(inode
), PAGE_SIZE
));
2742 if (is_inode_flag_set(inode
, FI_COMPRESS_RELEASED
) ||
2743 f2fs_is_atomic_file(inode
)) {
2748 /* if in-place-update policy is enabled, don't waste time here */
2749 set_inode_flag(inode
, FI_OPU_WRITE
);
2750 if (f2fs_should_update_inplace(inode
, NULL
)) {
2755 /* writeback all dirty pages in the range */
2756 err
= filemap_write_and_wait_range(inode
->i_mapping
,
2757 pg_start
<< PAGE_SHIFT
,
2758 (pg_end
<< PAGE_SHIFT
) - 1);
2763 * lookup mapping info in extent cache, skip defragmenting if physical
2764 * block addresses are continuous.
2766 if (f2fs_lookup_read_extent_cache(inode
, pg_start
, &ei
)) {
2767 if ((pgoff_t
)ei
.fofs
+ ei
.len
>= pg_end
)
2771 map
.m_lblk
= pg_start
;
2772 map
.m_next_pgofs
= &next_pgofs
;
2775 * lookup mapping info in dnode page cache, skip defragmenting if all
2776 * physical block addresses are continuous even if there are hole(s)
2777 * in logical blocks.
2779 while (map
.m_lblk
< pg_end
) {
2780 map
.m_len
= pg_end
- map
.m_lblk
;
2781 err
= f2fs_map_blocks(inode
, &map
, F2FS_GET_BLOCK_DEFAULT
);
2785 if (!(map
.m_flags
& F2FS_MAP_FLAGS
)) {
2786 map
.m_lblk
= next_pgofs
;
2790 if (blk_end
&& blk_end
!= map
.m_pblk
)
2793 /* record total count of block that we're going to move */
2796 blk_end
= map
.m_pblk
+ map
.m_len
;
2798 map
.m_lblk
+= map
.m_len
;
2806 sec_num
= DIV_ROUND_UP(total
, CAP_BLKS_PER_SEC(sbi
));
2809 * make sure there are enough free section for LFS allocation, this can
2810 * avoid defragment running in SSR mode when free section are allocated
2813 if (has_not_enough_free_secs(sbi
, 0, sec_num
)) {
2818 map
.m_lblk
= pg_start
;
2819 map
.m_len
= pg_end
- pg_start
;
2822 while (map
.m_lblk
< pg_end
) {
2827 map
.m_len
= pg_end
- map
.m_lblk
;
2828 err
= f2fs_map_blocks(inode
, &map
, F2FS_GET_BLOCK_DEFAULT
);
2832 if (!(map
.m_flags
& F2FS_MAP_FLAGS
)) {
2833 map
.m_lblk
= next_pgofs
;
2837 set_inode_flag(inode
, FI_SKIP_WRITES
);
2840 while (idx
< map
.m_lblk
+ map
.m_len
&&
2841 cnt
< BLKS_PER_SEG(sbi
)) {
2844 page
= f2fs_get_lock_data_page(inode
, idx
, true);
2846 err
= PTR_ERR(page
);
2850 f2fs_wait_on_page_writeback(page
, DATA
, true, true);
2852 set_page_dirty(page
);
2853 set_page_private_gcing(page
);
2854 f2fs_put_page(page
, 1);
2863 if (map
.m_lblk
< pg_end
&& cnt
< BLKS_PER_SEG(sbi
))
2866 clear_inode_flag(inode
, FI_SKIP_WRITES
);
2868 err
= filemap_fdatawrite(inode
->i_mapping
);
2873 clear_inode_flag(inode
, FI_SKIP_WRITES
);
2875 clear_inode_flag(inode
, FI_OPU_WRITE
);
2877 inode_unlock(inode
);
2879 range
->len
= (u64
)total
<< PAGE_SHIFT
;
2883 static int f2fs_ioc_defragment(struct file
*filp
, unsigned long arg
)
2885 struct inode
*inode
= file_inode(filp
);
2886 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
2887 struct f2fs_defragment range
;
2890 if (!capable(CAP_SYS_ADMIN
))
2893 if (!S_ISREG(inode
->i_mode
))
2896 if (f2fs_readonly(sbi
->sb
))
2899 if (copy_from_user(&range
, (struct f2fs_defragment __user
*)arg
,
2903 /* verify alignment of offset & size */
2904 if (range
.start
& (F2FS_BLKSIZE
- 1) || range
.len
& (F2FS_BLKSIZE
- 1))
2907 if (unlikely((range
.start
+ range
.len
) >> PAGE_SHIFT
>
2908 max_file_blocks(inode
)))
2911 err
= mnt_want_write_file(filp
);
2915 err
= f2fs_defragment_range(sbi
, filp
, &range
);
2916 mnt_drop_write_file(filp
);
2919 f2fs_update_time(sbi
, REQ_TIME
);
2923 if (copy_to_user((struct f2fs_defragment __user
*)arg
, &range
,
2930 static int f2fs_move_file_range(struct file
*file_in
, loff_t pos_in
,
2931 struct file
*file_out
, loff_t pos_out
, size_t len
)
2933 struct inode
*src
= file_inode(file_in
);
2934 struct inode
*dst
= file_inode(file_out
);
2935 struct f2fs_sb_info
*sbi
= F2FS_I_SB(src
);
2936 size_t olen
= len
, dst_max_i_size
= 0;
2940 if (file_in
->f_path
.mnt
!= file_out
->f_path
.mnt
||
2941 src
->i_sb
!= dst
->i_sb
)
2944 if (unlikely(f2fs_readonly(src
->i_sb
)))
2947 if (!S_ISREG(src
->i_mode
) || !S_ISREG(dst
->i_mode
))
2950 if (IS_ENCRYPTED(src
) || IS_ENCRYPTED(dst
))
2953 if (pos_out
< 0 || pos_in
< 0)
2957 if (pos_in
== pos_out
)
2959 if (pos_out
> pos_in
&& pos_out
< pos_in
+ len
)
2966 if (!inode_trylock(dst
))
2970 if (f2fs_compressed_file(src
) || f2fs_compressed_file(dst
) ||
2971 f2fs_is_pinned_file(src
) || f2fs_is_pinned_file(dst
)) {
2976 if (f2fs_is_atomic_file(src
) || f2fs_is_atomic_file(dst
)) {
2982 if (pos_in
+ len
> src
->i_size
|| pos_in
+ len
< pos_in
)
2985 olen
= len
= src
->i_size
- pos_in
;
2986 if (pos_in
+ len
== src
->i_size
)
2987 len
= ALIGN(src
->i_size
, F2FS_BLKSIZE
) - pos_in
;
2993 dst_osize
= dst
->i_size
;
2994 if (pos_out
+ olen
> dst
->i_size
)
2995 dst_max_i_size
= pos_out
+ olen
;
2997 /* verify the end result is block aligned */
2998 if (!IS_ALIGNED(pos_in
, F2FS_BLKSIZE
) ||
2999 !IS_ALIGNED(pos_in
+ len
, F2FS_BLKSIZE
) ||
3000 !IS_ALIGNED(pos_out
, F2FS_BLKSIZE
))
3003 ret
= f2fs_convert_inline_inode(src
);
3007 ret
= f2fs_convert_inline_inode(dst
);
3011 /* write out all dirty pages from offset */
3012 ret
= filemap_write_and_wait_range(src
->i_mapping
,
3013 pos_in
, pos_in
+ len
);
3017 ret
= filemap_write_and_wait_range(dst
->i_mapping
,
3018 pos_out
, pos_out
+ len
);
3022 f2fs_balance_fs(sbi
, true);
3024 f2fs_down_write(&F2FS_I(src
)->i_gc_rwsem
[WRITE
]);
3027 if (!f2fs_down_write_trylock(&F2FS_I(dst
)->i_gc_rwsem
[WRITE
]))
3032 ret
= __exchange_data_block(src
, dst
, F2FS_BYTES_TO_BLK(pos_in
),
3033 F2FS_BYTES_TO_BLK(pos_out
),
3034 F2FS_BYTES_TO_BLK(len
), false);
3038 f2fs_i_size_write(dst
, dst_max_i_size
);
3039 else if (dst_osize
!= dst
->i_size
)
3040 f2fs_i_size_write(dst
, dst_osize
);
3042 f2fs_unlock_op(sbi
);
3045 f2fs_up_write(&F2FS_I(dst
)->i_gc_rwsem
[WRITE
]);
3047 f2fs_up_write(&F2FS_I(src
)->i_gc_rwsem
[WRITE
]);
3051 inode_set_mtime_to_ts(src
, inode_set_ctime_current(src
));
3052 f2fs_mark_inode_dirty_sync(src
, false);
3054 inode_set_mtime_to_ts(dst
, inode_set_ctime_current(dst
));
3055 f2fs_mark_inode_dirty_sync(dst
, false);
3057 f2fs_update_time(sbi
, REQ_TIME
);
3067 static int __f2fs_ioc_move_range(struct file
*filp
,
3068 struct f2fs_move_range
*range
)
3072 if (!(filp
->f_mode
& FMODE_READ
) ||
3073 !(filp
->f_mode
& FMODE_WRITE
))
3076 CLASS(fd
, dst
)(range
->dst_fd
);
3080 if (!(fd_file(dst
)->f_mode
& FMODE_WRITE
))
3083 err
= mnt_want_write_file(filp
);
3087 err
= f2fs_move_file_range(filp
, range
->pos_in
, fd_file(dst
),
3088 range
->pos_out
, range
->len
);
3090 mnt_drop_write_file(filp
);
3094 static int f2fs_ioc_move_range(struct file
*filp
, unsigned long arg
)
3096 struct f2fs_move_range range
;
3098 if (copy_from_user(&range
, (struct f2fs_move_range __user
*)arg
,
3101 return __f2fs_ioc_move_range(filp
, &range
);
3104 static int f2fs_ioc_flush_device(struct file
*filp
, unsigned long arg
)
3106 struct inode
*inode
= file_inode(filp
);
3107 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
3108 struct sit_info
*sm
= SIT_I(sbi
);
3109 unsigned int start_segno
= 0, end_segno
= 0;
3110 unsigned int dev_start_segno
= 0, dev_end_segno
= 0;
3111 struct f2fs_flush_device range
;
3112 struct f2fs_gc_control gc_control
= {
3113 .init_gc_type
= FG_GC
,
3114 .should_migrate_blocks
= true,
3115 .err_gc_skipped
= true,
3116 .nr_free_secs
= 0 };
3119 if (!capable(CAP_SYS_ADMIN
))
3122 if (f2fs_readonly(sbi
->sb
))
3125 if (unlikely(is_sbi_flag_set(sbi
, SBI_CP_DISABLED
)))
3128 if (copy_from_user(&range
, (struct f2fs_flush_device __user
*)arg
,
3132 if (!f2fs_is_multi_device(sbi
) || sbi
->s_ndevs
- 1 <= range
.dev_num
||
3133 __is_large_section(sbi
)) {
3134 f2fs_warn(sbi
, "Can't flush %u in %d for SEGS_PER_SEC %u != 1",
3135 range
.dev_num
, sbi
->s_ndevs
, SEGS_PER_SEC(sbi
));
3139 ret
= mnt_want_write_file(filp
);
3143 if (range
.dev_num
!= 0)
3144 dev_start_segno
= GET_SEGNO(sbi
, FDEV(range
.dev_num
).start_blk
);
3145 dev_end_segno
= GET_SEGNO(sbi
, FDEV(range
.dev_num
).end_blk
);
3147 start_segno
= sm
->last_victim
[FLUSH_DEVICE
];
3148 if (start_segno
< dev_start_segno
|| start_segno
>= dev_end_segno
)
3149 start_segno
= dev_start_segno
;
3150 end_segno
= min(start_segno
+ range
.segments
, dev_end_segno
);
3152 while (start_segno
< end_segno
) {
3153 if (!f2fs_down_write_trylock(&sbi
->gc_lock
)) {
3157 sm
->last_victim
[GC_CB
] = end_segno
+ 1;
3158 sm
->last_victim
[GC_GREEDY
] = end_segno
+ 1;
3159 sm
->last_victim
[ALLOC_NEXT
] = end_segno
+ 1;
3161 gc_control
.victim_segno
= start_segno
;
3162 stat_inc_gc_call_count(sbi
, FOREGROUND
);
3163 ret
= f2fs_gc(sbi
, &gc_control
);
3171 mnt_drop_write_file(filp
);
3175 static int f2fs_ioc_get_features(struct file
*filp
, unsigned long arg
)
3177 struct inode
*inode
= file_inode(filp
);
3178 u32 sb_feature
= le32_to_cpu(F2FS_I_SB(inode
)->raw_super
->feature
);
3180 /* Must validate to set it with SQLite behavior in Android. */
3181 sb_feature
|= F2FS_FEATURE_ATOMIC_WRITE
;
3183 return put_user(sb_feature
, (u32 __user
*)arg
);
3187 int f2fs_transfer_project_quota(struct inode
*inode
, kprojid_t kprojid
)
3189 struct dquot
*transfer_to
[MAXQUOTAS
] = {};
3190 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
3191 struct super_block
*sb
= sbi
->sb
;
3194 transfer_to
[PRJQUOTA
] = dqget(sb
, make_kqid_projid(kprojid
));
3195 if (IS_ERR(transfer_to
[PRJQUOTA
]))
3196 return PTR_ERR(transfer_to
[PRJQUOTA
]);
3198 err
= __dquot_transfer(inode
, transfer_to
);
3200 set_sbi_flag(sbi
, SBI_QUOTA_NEED_REPAIR
);
3201 dqput(transfer_to
[PRJQUOTA
]);
3205 static int f2fs_ioc_setproject(struct inode
*inode
, __u32 projid
)
3207 struct f2fs_inode_info
*fi
= F2FS_I(inode
);
3208 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
3209 struct f2fs_inode
*ri
= NULL
;
3213 if (!f2fs_sb_has_project_quota(sbi
)) {
3214 if (projid
!= F2FS_DEF_PROJID
)
3220 if (!f2fs_has_extra_attr(inode
))
3223 kprojid
= make_kprojid(&init_user_ns
, (projid_t
)projid
);
3225 if (projid_eq(kprojid
, fi
->i_projid
))
3229 /* Is it quota file? Do not allow user to mess with it */
3230 if (IS_NOQUOTA(inode
))
3233 if (!F2FS_FITS_IN_INODE(ri
, fi
->i_extra_isize
, i_projid
))
3236 err
= f2fs_dquot_initialize(inode
);
3241 err
= f2fs_transfer_project_quota(inode
, kprojid
);
3245 fi
->i_projid
= kprojid
;
3246 inode_set_ctime_current(inode
);
3247 f2fs_mark_inode_dirty_sync(inode
, true);
3249 f2fs_unlock_op(sbi
);
3253 int f2fs_transfer_project_quota(struct inode
*inode
, kprojid_t kprojid
)
3258 static int f2fs_ioc_setproject(struct inode
*inode
, __u32 projid
)
3260 if (projid
!= F2FS_DEF_PROJID
)
3266 int f2fs_fileattr_get(struct dentry
*dentry
, struct fileattr
*fa
)
3268 struct inode
*inode
= d_inode(dentry
);
3269 struct f2fs_inode_info
*fi
= F2FS_I(inode
);
3270 u32 fsflags
= f2fs_iflags_to_fsflags(fi
->i_flags
);
3272 if (IS_ENCRYPTED(inode
))
3273 fsflags
|= FS_ENCRYPT_FL
;
3274 if (IS_VERITY(inode
))
3275 fsflags
|= FS_VERITY_FL
;
3276 if (f2fs_has_inline_data(inode
) || f2fs_has_inline_dentry(inode
))
3277 fsflags
|= FS_INLINE_DATA_FL
;
3278 if (is_inode_flag_set(inode
, FI_PIN_FILE
))
3279 fsflags
|= FS_NOCOW_FL
;
3281 fileattr_fill_flags(fa
, fsflags
& F2FS_GETTABLE_FS_FL
);
3283 if (f2fs_sb_has_project_quota(F2FS_I_SB(inode
)))
3284 fa
->fsx_projid
= from_kprojid(&init_user_ns
, fi
->i_projid
);
3289 int f2fs_fileattr_set(struct mnt_idmap
*idmap
,
3290 struct dentry
*dentry
, struct fileattr
*fa
)
3292 struct inode
*inode
= d_inode(dentry
);
3293 u32 fsflags
= fa
->flags
, mask
= F2FS_SETTABLE_FS_FL
;
3297 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode
))))
3299 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode
)))
3301 if (fsflags
& ~F2FS_GETTABLE_FS_FL
)
3303 fsflags
&= F2FS_SETTABLE_FS_FL
;
3304 if (!fa
->flags_valid
)
3305 mask
&= FS_COMMON_FL
;
3307 iflags
= f2fs_fsflags_to_iflags(fsflags
);
3308 if (f2fs_mask_flags(inode
->i_mode
, iflags
) != iflags
)
3311 err
= f2fs_setflags_common(inode
, iflags
, f2fs_fsflags_to_iflags(mask
));
3313 err
= f2fs_ioc_setproject(inode
, fa
->fsx_projid
);
3318 int f2fs_pin_file_control(struct inode
*inode
, bool inc
)
3320 struct f2fs_inode_info
*fi
= F2FS_I(inode
);
3321 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
3323 if (IS_DEVICE_ALIASING(inode
))
3326 if (fi
->i_gc_failures
>= sbi
->gc_pin_file_threshold
) {
3327 f2fs_warn(sbi
, "%s: Enable GC = ino %lx after %x GC trials",
3328 __func__
, inode
->i_ino
, fi
->i_gc_failures
);
3329 clear_inode_flag(inode
, FI_PIN_FILE
);
3333 /* Use i_gc_failures for normal file as a risk signal. */
3335 f2fs_i_gc_failures_write(inode
, fi
->i_gc_failures
+ 1);
3340 static int f2fs_ioc_set_pin_file(struct file
*filp
, unsigned long arg
)
3342 struct inode
*inode
= file_inode(filp
);
3343 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
3347 if (get_user(pin
, (__u32 __user
*)arg
))
3350 if (!S_ISREG(inode
->i_mode
))
3353 if (f2fs_readonly(sbi
->sb
))
3356 if (!pin
&& IS_DEVICE_ALIASING(inode
))
3359 ret
= mnt_want_write_file(filp
);
3365 if (f2fs_is_atomic_file(inode
)) {
3371 clear_inode_flag(inode
, FI_PIN_FILE
);
3372 f2fs_i_gc_failures_write(inode
, 0);
3374 } else if (f2fs_is_pinned_file(inode
)) {
3378 if (F2FS_HAS_BLOCKS(inode
)) {
3383 /* Let's allow file pinning on zoned device. */
3384 if (!f2fs_sb_has_blkzoned(sbi
) &&
3385 f2fs_should_update_outplace(inode
, NULL
)) {
3390 if (f2fs_pin_file_control(inode
, false)) {
3395 ret
= f2fs_convert_inline_inode(inode
);
3399 if (!f2fs_disable_compressed_file(inode
)) {
3404 set_inode_flag(inode
, FI_PIN_FILE
);
3405 ret
= F2FS_I(inode
)->i_gc_failures
;
3407 f2fs_update_time(sbi
, REQ_TIME
);
3409 inode_unlock(inode
);
3410 mnt_drop_write_file(filp
);
3414 static int f2fs_ioc_get_pin_file(struct file
*filp
, unsigned long arg
)
3416 struct inode
*inode
= file_inode(filp
);
3419 if (is_inode_flag_set(inode
, FI_PIN_FILE
))
3420 pin
= F2FS_I(inode
)->i_gc_failures
;
3421 return put_user(pin
, (u32 __user
*)arg
);
3424 static int f2fs_ioc_get_dev_alias_file(struct file
*filp
, unsigned long arg
)
3426 return put_user(IS_DEVICE_ALIASING(file_inode(filp
)) ? 1 : 0,
3430 int f2fs_precache_extents(struct inode
*inode
)
3432 struct f2fs_inode_info
*fi
= F2FS_I(inode
);
3433 struct f2fs_map_blocks map
;
3434 pgoff_t m_next_extent
;
3438 if (is_inode_flag_set(inode
, FI_NO_EXTENT
))
3443 map
.m_next_pgofs
= NULL
;
3444 map
.m_next_extent
= &m_next_extent
;
3445 map
.m_seg_type
= NO_CHECK_TYPE
;
3446 map
.m_may_create
= false;
3447 end
= F2FS_BLK_ALIGN(i_size_read(inode
));
3449 while (map
.m_lblk
< end
) {
3450 map
.m_len
= end
- map
.m_lblk
;
3452 f2fs_down_write(&fi
->i_gc_rwsem
[WRITE
]);
3453 err
= f2fs_map_blocks(inode
, &map
, F2FS_GET_BLOCK_PRECACHE
);
3454 f2fs_up_write(&fi
->i_gc_rwsem
[WRITE
]);
3455 if (err
|| !map
.m_len
)
3458 map
.m_lblk
= m_next_extent
;
3464 static int f2fs_ioc_precache_extents(struct file
*filp
)
3466 return f2fs_precache_extents(file_inode(filp
));
3469 static int f2fs_ioc_resize_fs(struct file
*filp
, unsigned long arg
)
3471 struct f2fs_sb_info
*sbi
= F2FS_I_SB(file_inode(filp
));
3474 if (!capable(CAP_SYS_ADMIN
))
3477 if (f2fs_readonly(sbi
->sb
))
3480 if (copy_from_user(&block_count
, (void __user
*)arg
,
3481 sizeof(block_count
)))
3484 return f2fs_resize_fs(filp
, block_count
);
3487 static int f2fs_ioc_enable_verity(struct file
*filp
, unsigned long arg
)
3489 struct inode
*inode
= file_inode(filp
);
3491 f2fs_update_time(F2FS_I_SB(inode
), REQ_TIME
);
3493 if (!f2fs_sb_has_verity(F2FS_I_SB(inode
))) {
3494 f2fs_warn(F2FS_I_SB(inode
),
3495 "Can't enable fs-verity on inode %lu: the verity feature is not enabled on this filesystem",
3500 return fsverity_ioctl_enable(filp
, (const void __user
*)arg
);
3503 static int f2fs_ioc_measure_verity(struct file
*filp
, unsigned long arg
)
3505 if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp
))))
3508 return fsverity_ioctl_measure(filp
, (void __user
*)arg
);
3511 static int f2fs_ioc_read_verity_metadata(struct file
*filp
, unsigned long arg
)
3513 if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp
))))
3516 return fsverity_ioctl_read_metadata(filp
, (const void __user
*)arg
);
3519 static int f2fs_ioc_getfslabel(struct file
*filp
, unsigned long arg
)
3521 struct inode
*inode
= file_inode(filp
);
3522 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
3527 vbuf
= f2fs_kzalloc(sbi
, MAX_VOLUME_NAME
, GFP_KERNEL
);
3531 f2fs_down_read(&sbi
->sb_lock
);
3532 count
= utf16s_to_utf8s(sbi
->raw_super
->volume_name
,
3533 ARRAY_SIZE(sbi
->raw_super
->volume_name
),
3534 UTF16_LITTLE_ENDIAN
, vbuf
, MAX_VOLUME_NAME
);
3535 f2fs_up_read(&sbi
->sb_lock
);
3537 if (copy_to_user((char __user
*)arg
, vbuf
,
3538 min(FSLABEL_MAX
, count
)))
3545 static int f2fs_ioc_setfslabel(struct file
*filp
, unsigned long arg
)
3547 struct inode
*inode
= file_inode(filp
);
3548 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
3552 if (!capable(CAP_SYS_ADMIN
))
3555 vbuf
= strndup_user((const char __user
*)arg
, FSLABEL_MAX
);
3557 return PTR_ERR(vbuf
);
3559 err
= mnt_want_write_file(filp
);
3563 f2fs_down_write(&sbi
->sb_lock
);
3565 memset(sbi
->raw_super
->volume_name
, 0,
3566 sizeof(sbi
->raw_super
->volume_name
));
3567 utf8s_to_utf16s(vbuf
, strlen(vbuf
), UTF16_LITTLE_ENDIAN
,
3568 sbi
->raw_super
->volume_name
,
3569 ARRAY_SIZE(sbi
->raw_super
->volume_name
));
3571 err
= f2fs_commit_super(sbi
, false);
3573 f2fs_up_write(&sbi
->sb_lock
);
3575 mnt_drop_write_file(filp
);
3581 static int f2fs_get_compress_blocks(struct inode
*inode
, __u64
*blocks
)
3583 if (!f2fs_sb_has_compression(F2FS_I_SB(inode
)))
3586 if (!f2fs_compressed_file(inode
))
3589 *blocks
= atomic_read(&F2FS_I(inode
)->i_compr_blocks
);
3594 static int f2fs_ioc_get_compress_blocks(struct file
*filp
, unsigned long arg
)
3596 struct inode
*inode
= file_inode(filp
);
3600 ret
= f2fs_get_compress_blocks(inode
, &blocks
);
3604 return put_user(blocks
, (u64 __user
*)arg
);
3607 static int release_compress_blocks(struct dnode_of_data
*dn
, pgoff_t count
)
3609 struct f2fs_sb_info
*sbi
= F2FS_I_SB(dn
->inode
);
3610 unsigned int released_blocks
= 0;
3611 int cluster_size
= F2FS_I(dn
->inode
)->i_cluster_size
;
3615 for (i
= 0; i
< count
; i
++) {
3616 blkaddr
= data_blkaddr(dn
->inode
, dn
->node_page
,
3617 dn
->ofs_in_node
+ i
);
3619 if (!__is_valid_data_blkaddr(blkaddr
))
3621 if (unlikely(!f2fs_is_valid_blkaddr(sbi
, blkaddr
,
3622 DATA_GENERIC_ENHANCE
)))
3623 return -EFSCORRUPTED
;
3627 int compr_blocks
= 0;
3629 for (i
= 0; i
< cluster_size
; i
++, dn
->ofs_in_node
++) {
3630 blkaddr
= f2fs_data_blkaddr(dn
);
3633 if (blkaddr
== COMPRESS_ADDR
)
3635 dn
->ofs_in_node
+= cluster_size
;
3639 if (__is_valid_data_blkaddr(blkaddr
))
3642 if (blkaddr
!= NEW_ADDR
)
3645 f2fs_set_data_blkaddr(dn
, NULL_ADDR
);
3648 f2fs_i_compr_blocks_update(dn
->inode
, compr_blocks
, false);
3649 dec_valid_block_count(sbi
, dn
->inode
,
3650 cluster_size
- compr_blocks
);
3652 released_blocks
+= cluster_size
- compr_blocks
;
3654 count
-= cluster_size
;
3657 return released_blocks
;
3660 static int f2fs_release_compress_blocks(struct file
*filp
, unsigned long arg
)
3662 struct inode
*inode
= file_inode(filp
);
3663 struct f2fs_inode_info
*fi
= F2FS_I(inode
);
3664 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
3665 pgoff_t page_idx
= 0, last_idx
;
3666 unsigned int released_blocks
= 0;
3670 if (!f2fs_sb_has_compression(sbi
))
3673 if (f2fs_readonly(sbi
->sb
))
3676 ret
= mnt_want_write_file(filp
);
3680 f2fs_balance_fs(sbi
, true);
3684 writecount
= atomic_read(&inode
->i_writecount
);
3685 if ((filp
->f_mode
& FMODE_WRITE
&& writecount
!= 1) ||
3686 (!(filp
->f_mode
& FMODE_WRITE
) && writecount
)) {
3691 if (!f2fs_compressed_file(inode
) ||
3692 is_inode_flag_set(inode
, FI_COMPRESS_RELEASED
)) {
3697 ret
= filemap_write_and_wait_range(inode
->i_mapping
, 0, LLONG_MAX
);
3701 if (!atomic_read(&fi
->i_compr_blocks
)) {
3706 set_inode_flag(inode
, FI_COMPRESS_RELEASED
);
3707 inode_set_ctime_current(inode
);
3708 f2fs_mark_inode_dirty_sync(inode
, true);
3710 f2fs_down_write(&fi
->i_gc_rwsem
[WRITE
]);
3711 filemap_invalidate_lock(inode
->i_mapping
);
3713 last_idx
= DIV_ROUND_UP(i_size_read(inode
), PAGE_SIZE
);
3715 while (page_idx
< last_idx
) {
3716 struct dnode_of_data dn
;
3717 pgoff_t end_offset
, count
;
3721 set_new_dnode(&dn
, inode
, NULL
, NULL
, 0);
3722 ret
= f2fs_get_dnode_of_data(&dn
, page_idx
, LOOKUP_NODE
);
3724 f2fs_unlock_op(sbi
);
3725 if (ret
== -ENOENT
) {
3726 page_idx
= f2fs_get_next_page_offset(&dn
,
3734 end_offset
= ADDRS_PER_PAGE(dn
.node_page
, inode
);
3735 count
= min(end_offset
- dn
.ofs_in_node
, last_idx
- page_idx
);
3736 count
= round_up(count
, fi
->i_cluster_size
);
3738 ret
= release_compress_blocks(&dn
, count
);
3740 f2fs_put_dnode(&dn
);
3742 f2fs_unlock_op(sbi
);
3748 released_blocks
+= ret
;
3751 filemap_invalidate_unlock(inode
->i_mapping
);
3752 f2fs_up_write(&fi
->i_gc_rwsem
[WRITE
]);
3754 if (released_blocks
)
3755 f2fs_update_time(sbi
, REQ_TIME
);
3756 inode_unlock(inode
);
3758 mnt_drop_write_file(filp
);
3761 ret
= put_user(released_blocks
, (u64 __user
*)arg
);
3762 } else if (released_blocks
&&
3763 atomic_read(&fi
->i_compr_blocks
)) {
3764 set_sbi_flag(sbi
, SBI_NEED_FSCK
);
3765 f2fs_warn(sbi
, "%s: partial blocks were released i_ino=%lx "
3766 "iblocks=%llu, released=%u, compr_blocks=%u, "
3768 __func__
, inode
->i_ino
, inode
->i_blocks
,
3770 atomic_read(&fi
->i_compr_blocks
));
3776 static int reserve_compress_blocks(struct dnode_of_data
*dn
, pgoff_t count
,
3777 unsigned int *reserved_blocks
)
3779 struct f2fs_sb_info
*sbi
= F2FS_I_SB(dn
->inode
);
3780 int cluster_size
= F2FS_I(dn
->inode
)->i_cluster_size
;
3784 for (i
= 0; i
< count
; i
++) {
3785 blkaddr
= data_blkaddr(dn
->inode
, dn
->node_page
,
3786 dn
->ofs_in_node
+ i
);
3788 if (!__is_valid_data_blkaddr(blkaddr
))
3790 if (unlikely(!f2fs_is_valid_blkaddr(sbi
, blkaddr
,
3791 DATA_GENERIC_ENHANCE
)))
3792 return -EFSCORRUPTED
;
3796 int compr_blocks
= 0;
3797 blkcnt_t reserved
= 0;
3798 blkcnt_t to_reserved
;
3801 for (i
= 0; i
< cluster_size
; i
++) {
3802 blkaddr
= data_blkaddr(dn
->inode
, dn
->node_page
,
3803 dn
->ofs_in_node
+ i
);
3806 if (blkaddr
!= COMPRESS_ADDR
) {
3807 dn
->ofs_in_node
+= cluster_size
;
3814 * compressed cluster was not released due to it
3815 * fails in release_compress_blocks(), so NEW_ADDR
3816 * is a possible case.
3818 if (blkaddr
== NEW_ADDR
) {
3822 if (__is_valid_data_blkaddr(blkaddr
)) {
3828 to_reserved
= cluster_size
- compr_blocks
- reserved
;
3830 /* for the case all blocks in cluster were reserved */
3831 if (reserved
&& to_reserved
== 1) {
3832 dn
->ofs_in_node
+= cluster_size
;
3836 ret
= inc_valid_block_count(sbi
, dn
->inode
,
3837 &to_reserved
, false);
3841 for (i
= 0; i
< cluster_size
; i
++, dn
->ofs_in_node
++) {
3842 if (f2fs_data_blkaddr(dn
) == NULL_ADDR
)
3843 f2fs_set_data_blkaddr(dn
, NEW_ADDR
);
3846 f2fs_i_compr_blocks_update(dn
->inode
, compr_blocks
, true);
3848 *reserved_blocks
+= to_reserved
;
3850 count
-= cluster_size
;
3856 static int f2fs_reserve_compress_blocks(struct file
*filp
, unsigned long arg
)
3858 struct inode
*inode
= file_inode(filp
);
3859 struct f2fs_inode_info
*fi
= F2FS_I(inode
);
3860 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
3861 pgoff_t page_idx
= 0, last_idx
;
3862 unsigned int reserved_blocks
= 0;
3865 if (!f2fs_sb_has_compression(sbi
))
3868 if (f2fs_readonly(sbi
->sb
))
3871 ret
= mnt_want_write_file(filp
);
3875 f2fs_balance_fs(sbi
, true);
3879 if (!f2fs_compressed_file(inode
) ||
3880 !is_inode_flag_set(inode
, FI_COMPRESS_RELEASED
)) {
3885 if (atomic_read(&fi
->i_compr_blocks
))
3888 f2fs_down_write(&fi
->i_gc_rwsem
[WRITE
]);
3889 filemap_invalidate_lock(inode
->i_mapping
);
3891 last_idx
= DIV_ROUND_UP(i_size_read(inode
), PAGE_SIZE
);
3893 while (page_idx
< last_idx
) {
3894 struct dnode_of_data dn
;
3895 pgoff_t end_offset
, count
;
3899 set_new_dnode(&dn
, inode
, NULL
, NULL
, 0);
3900 ret
= f2fs_get_dnode_of_data(&dn
, page_idx
, LOOKUP_NODE
);
3902 f2fs_unlock_op(sbi
);
3903 if (ret
== -ENOENT
) {
3904 page_idx
= f2fs_get_next_page_offset(&dn
,
3912 end_offset
= ADDRS_PER_PAGE(dn
.node_page
, inode
);
3913 count
= min(end_offset
- dn
.ofs_in_node
, last_idx
- page_idx
);
3914 count
= round_up(count
, fi
->i_cluster_size
);
3916 ret
= reserve_compress_blocks(&dn
, count
, &reserved_blocks
);
3918 f2fs_put_dnode(&dn
);
3920 f2fs_unlock_op(sbi
);
3928 filemap_invalidate_unlock(inode
->i_mapping
);
3929 f2fs_up_write(&fi
->i_gc_rwsem
[WRITE
]);
3932 clear_inode_flag(inode
, FI_COMPRESS_RELEASED
);
3933 inode_set_ctime_current(inode
);
3934 f2fs_mark_inode_dirty_sync(inode
, true);
3937 if (reserved_blocks
)
3938 f2fs_update_time(sbi
, REQ_TIME
);
3939 inode_unlock(inode
);
3940 mnt_drop_write_file(filp
);
3943 ret
= put_user(reserved_blocks
, (u64 __user
*)arg
);
3944 } else if (reserved_blocks
&&
3945 atomic_read(&fi
->i_compr_blocks
)) {
3946 set_sbi_flag(sbi
, SBI_NEED_FSCK
);
3947 f2fs_warn(sbi
, "%s: partial blocks were reserved i_ino=%lx "
3948 "iblocks=%llu, reserved=%u, compr_blocks=%u, "
3950 __func__
, inode
->i_ino
, inode
->i_blocks
,
3952 atomic_read(&fi
->i_compr_blocks
));
3958 static int f2fs_secure_erase(struct block_device
*bdev
, struct inode
*inode
,
3959 pgoff_t off
, block_t block
, block_t len
, u32 flags
)
3961 sector_t sector
= SECTOR_FROM_BLOCK(block
);
3962 sector_t nr_sects
= SECTOR_FROM_BLOCK(len
);
3965 if (flags
& F2FS_TRIM_FILE_DISCARD
) {
3966 if (bdev_max_secure_erase_sectors(bdev
))
3967 ret
= blkdev_issue_secure_erase(bdev
, sector
, nr_sects
,
3970 ret
= blkdev_issue_discard(bdev
, sector
, nr_sects
,
3974 if (!ret
&& (flags
& F2FS_TRIM_FILE_ZEROOUT
)) {
3975 if (IS_ENCRYPTED(inode
))
3976 ret
= fscrypt_zeroout_range(inode
, off
, block
, len
);
3978 ret
= blkdev_issue_zeroout(bdev
, sector
, nr_sects
,
3985 static int f2fs_sec_trim_file(struct file
*filp
, unsigned long arg
)
3987 struct inode
*inode
= file_inode(filp
);
3988 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
3989 struct address_space
*mapping
= inode
->i_mapping
;
3990 struct block_device
*prev_bdev
= NULL
;
3991 struct f2fs_sectrim_range range
;
3992 pgoff_t index
, pg_end
, prev_index
= 0;
3993 block_t prev_block
= 0, len
= 0;
3995 bool to_end
= false;
3998 if (!(filp
->f_mode
& FMODE_WRITE
))
4001 if (copy_from_user(&range
, (struct f2fs_sectrim_range __user
*)arg
,
4005 if (range
.flags
== 0 || (range
.flags
& ~F2FS_TRIM_FILE_MASK
) ||
4006 !S_ISREG(inode
->i_mode
))
4009 if (((range
.flags
& F2FS_TRIM_FILE_DISCARD
) &&
4010 !f2fs_hw_support_discard(sbi
)) ||
4011 ((range
.flags
& F2FS_TRIM_FILE_ZEROOUT
) &&
4012 IS_ENCRYPTED(inode
) && f2fs_is_multi_device(sbi
)))
4015 ret
= mnt_want_write_file(filp
);
4020 if (f2fs_is_atomic_file(inode
) || f2fs_compressed_file(inode
) ||
4021 range
.start
>= inode
->i_size
) {
4029 if (inode
->i_size
- range
.start
> range
.len
) {
4030 end_addr
= range
.start
+ range
.len
;
4032 end_addr
= range
.len
== (u64
)-1 ?
4033 sbi
->sb
->s_maxbytes
: inode
->i_size
;
4037 if (!IS_ALIGNED(range
.start
, F2FS_BLKSIZE
) ||
4038 (!to_end
&& !IS_ALIGNED(end_addr
, F2FS_BLKSIZE
))) {
4043 index
= F2FS_BYTES_TO_BLK(range
.start
);
4044 pg_end
= DIV_ROUND_UP(end_addr
, F2FS_BLKSIZE
);
4046 ret
= f2fs_convert_inline_inode(inode
);
4050 f2fs_down_write(&F2FS_I(inode
)->i_gc_rwsem
[WRITE
]);
4051 filemap_invalidate_lock(mapping
);
4053 ret
= filemap_write_and_wait_range(mapping
, range
.start
,
4054 to_end
? LLONG_MAX
: end_addr
- 1);
4058 truncate_inode_pages_range(mapping
, range
.start
,
4059 to_end
? -1 : end_addr
- 1);
4061 while (index
< pg_end
) {
4062 struct dnode_of_data dn
;
4063 pgoff_t end_offset
, count
;
4066 set_new_dnode(&dn
, inode
, NULL
, NULL
, 0);
4067 ret
= f2fs_get_dnode_of_data(&dn
, index
, LOOKUP_NODE
);
4069 if (ret
== -ENOENT
) {
4070 index
= f2fs_get_next_page_offset(&dn
, index
);
4076 end_offset
= ADDRS_PER_PAGE(dn
.node_page
, inode
);
4077 count
= min(end_offset
- dn
.ofs_in_node
, pg_end
- index
);
4078 for (i
= 0; i
< count
; i
++, index
++, dn
.ofs_in_node
++) {
4079 struct block_device
*cur_bdev
;
4080 block_t blkaddr
= f2fs_data_blkaddr(&dn
);
4082 if (!__is_valid_data_blkaddr(blkaddr
))
4085 if (!f2fs_is_valid_blkaddr(sbi
, blkaddr
,
4086 DATA_GENERIC_ENHANCE
)) {
4087 ret
= -EFSCORRUPTED
;
4088 f2fs_put_dnode(&dn
);
4092 cur_bdev
= f2fs_target_device(sbi
, blkaddr
, NULL
);
4093 if (f2fs_is_multi_device(sbi
)) {
4094 int di
= f2fs_target_device_index(sbi
, blkaddr
);
4096 blkaddr
-= FDEV(di
).start_blk
;
4100 if (prev_bdev
== cur_bdev
&&
4101 index
== prev_index
+ len
&&
4102 blkaddr
== prev_block
+ len
) {
4105 ret
= f2fs_secure_erase(prev_bdev
,
4106 inode
, prev_index
, prev_block
,
4109 f2fs_put_dnode(&dn
);
4118 prev_bdev
= cur_bdev
;
4120 prev_block
= blkaddr
;
4125 f2fs_put_dnode(&dn
);
4127 if (fatal_signal_pending(current
)) {
4135 ret
= f2fs_secure_erase(prev_bdev
, inode
, prev_index
,
4136 prev_block
, len
, range
.flags
);
4137 f2fs_update_time(sbi
, REQ_TIME
);
4139 filemap_invalidate_unlock(mapping
);
4140 f2fs_up_write(&F2FS_I(inode
)->i_gc_rwsem
[WRITE
]);
4142 inode_unlock(inode
);
4143 mnt_drop_write_file(filp
);
4148 static int f2fs_ioc_get_compress_option(struct file
*filp
, unsigned long arg
)
4150 struct inode
*inode
= file_inode(filp
);
4151 struct f2fs_comp_option option
;
4153 if (!f2fs_sb_has_compression(F2FS_I_SB(inode
)))
4156 inode_lock_shared(inode
);
4158 if (!f2fs_compressed_file(inode
)) {
4159 inode_unlock_shared(inode
);
4163 option
.algorithm
= F2FS_I(inode
)->i_compress_algorithm
;
4164 option
.log_cluster_size
= F2FS_I(inode
)->i_log_cluster_size
;
4166 inode_unlock_shared(inode
);
4168 if (copy_to_user((struct f2fs_comp_option __user
*)arg
, &option
,
4175 static int f2fs_ioc_set_compress_option(struct file
*filp
, unsigned long arg
)
4177 struct inode
*inode
= file_inode(filp
);
4178 struct f2fs_inode_info
*fi
= F2FS_I(inode
);
4179 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
4180 struct f2fs_comp_option option
;
4183 if (!f2fs_sb_has_compression(sbi
))
4186 if (!(filp
->f_mode
& FMODE_WRITE
))
4189 if (copy_from_user(&option
, (struct f2fs_comp_option __user
*)arg
,
4193 if (option
.log_cluster_size
< MIN_COMPRESS_LOG_SIZE
||
4194 option
.log_cluster_size
> MAX_COMPRESS_LOG_SIZE
||
4195 option
.algorithm
>= COMPRESS_MAX
)
4198 ret
= mnt_want_write_file(filp
);
4203 f2fs_down_write(&F2FS_I(inode
)->i_sem
);
4204 if (!f2fs_compressed_file(inode
)) {
4209 if (f2fs_is_mmap_file(inode
) || get_dirty_pages(inode
)) {
4214 if (F2FS_HAS_BLOCKS(inode
)) {
4219 fi
->i_compress_algorithm
= option
.algorithm
;
4220 fi
->i_log_cluster_size
= option
.log_cluster_size
;
4221 fi
->i_cluster_size
= BIT(option
.log_cluster_size
);
4222 /* Set default level */
4223 if (fi
->i_compress_algorithm
== COMPRESS_ZSTD
)
4224 fi
->i_compress_level
= F2FS_ZSTD_DEFAULT_CLEVEL
;
4226 fi
->i_compress_level
= 0;
4227 /* Adjust mount option level */
4228 if (option
.algorithm
== F2FS_OPTION(sbi
).compress_algorithm
&&
4229 F2FS_OPTION(sbi
).compress_level
)
4230 fi
->i_compress_level
= F2FS_OPTION(sbi
).compress_level
;
4231 f2fs_mark_inode_dirty_sync(inode
, true);
4233 if (!f2fs_is_compress_backend_ready(inode
))
4234 f2fs_warn(sbi
, "compression algorithm is successfully set, "
4235 "but current kernel doesn't support this algorithm.");
4237 f2fs_up_write(&fi
->i_sem
);
4238 inode_unlock(inode
);
4239 mnt_drop_write_file(filp
);
4244 static int redirty_blocks(struct inode
*inode
, pgoff_t page_idx
, int len
)
4246 DEFINE_READAHEAD(ractl
, NULL
, NULL
, inode
->i_mapping
, page_idx
);
4247 struct address_space
*mapping
= inode
->i_mapping
;
4249 pgoff_t redirty_idx
= page_idx
;
4250 int i
, page_len
= 0, ret
= 0;
4252 page_cache_ra_unbounded(&ractl
, len
, 0);
4254 for (i
= 0; i
< len
; i
++, page_idx
++) {
4255 page
= read_cache_page(mapping
, page_idx
, NULL
, NULL
);
4257 ret
= PTR_ERR(page
);
4263 for (i
= 0; i
< page_len
; i
++, redirty_idx
++) {
4264 page
= find_lock_page(mapping
, redirty_idx
);
4266 /* It will never fail, when page has pinned above */
4267 f2fs_bug_on(F2FS_I_SB(inode
), !page
);
4269 f2fs_wait_on_page_writeback(page
, DATA
, true, true);
4271 set_page_dirty(page
);
4272 set_page_private_gcing(page
);
4273 f2fs_put_page(page
, 1);
4274 f2fs_put_page(page
, 0);
4280 static int f2fs_ioc_decompress_file(struct file
*filp
)
4282 struct inode
*inode
= file_inode(filp
);
4283 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
4284 struct f2fs_inode_info
*fi
= F2FS_I(inode
);
4285 pgoff_t page_idx
= 0, last_idx
, cluster_idx
;
4288 if (!f2fs_sb_has_compression(sbi
) ||
4289 F2FS_OPTION(sbi
).compress_mode
!= COMPR_MODE_USER
)
4292 if (!(filp
->f_mode
& FMODE_WRITE
))
4295 f2fs_balance_fs(sbi
, true);
4297 ret
= mnt_want_write_file(filp
);
4302 if (!f2fs_is_compress_backend_ready(inode
)) {
4307 if (!f2fs_compressed_file(inode
) ||
4308 is_inode_flag_set(inode
, FI_COMPRESS_RELEASED
)) {
4313 ret
= filemap_write_and_wait_range(inode
->i_mapping
, 0, LLONG_MAX
);
4317 if (!atomic_read(&fi
->i_compr_blocks
))
4320 last_idx
= DIV_ROUND_UP(i_size_read(inode
), PAGE_SIZE
);
4321 last_idx
>>= fi
->i_log_cluster_size
;
4323 for (cluster_idx
= 0; cluster_idx
< last_idx
; cluster_idx
++) {
4324 page_idx
= cluster_idx
<< fi
->i_log_cluster_size
;
4326 if (!f2fs_is_compressed_cluster(inode
, page_idx
))
4329 ret
= redirty_blocks(inode
, page_idx
, fi
->i_cluster_size
);
4333 if (get_dirty_pages(inode
) >= BLKS_PER_SEG(sbi
)) {
4334 ret
= filemap_fdatawrite(inode
->i_mapping
);
4340 if (fatal_signal_pending(current
)) {
4347 ret
= filemap_write_and_wait_range(inode
->i_mapping
, 0,
4351 f2fs_warn(sbi
, "%s: The file might be partially decompressed (errno=%d). Please delete the file.",
4353 f2fs_update_time(sbi
, REQ_TIME
);
4355 inode_unlock(inode
);
4356 mnt_drop_write_file(filp
);
4361 static int f2fs_ioc_compress_file(struct file
*filp
)
4363 struct inode
*inode
= file_inode(filp
);
4364 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
4365 struct f2fs_inode_info
*fi
= F2FS_I(inode
);
4366 pgoff_t page_idx
= 0, last_idx
, cluster_idx
;
4369 if (!f2fs_sb_has_compression(sbi
) ||
4370 F2FS_OPTION(sbi
).compress_mode
!= COMPR_MODE_USER
)
4373 if (!(filp
->f_mode
& FMODE_WRITE
))
4376 f2fs_balance_fs(sbi
, true);
4378 ret
= mnt_want_write_file(filp
);
4383 if (!f2fs_is_compress_backend_ready(inode
)) {
4388 if (!f2fs_compressed_file(inode
) ||
4389 is_inode_flag_set(inode
, FI_COMPRESS_RELEASED
)) {
4394 ret
= filemap_write_and_wait_range(inode
->i_mapping
, 0, LLONG_MAX
);
4398 set_inode_flag(inode
, FI_ENABLE_COMPRESS
);
4400 last_idx
= DIV_ROUND_UP(i_size_read(inode
), PAGE_SIZE
);
4401 last_idx
>>= fi
->i_log_cluster_size
;
4403 for (cluster_idx
= 0; cluster_idx
< last_idx
; cluster_idx
++) {
4404 page_idx
= cluster_idx
<< fi
->i_log_cluster_size
;
4406 if (f2fs_is_sparse_cluster(inode
, page_idx
))
4409 ret
= redirty_blocks(inode
, page_idx
, fi
->i_cluster_size
);
4413 if (get_dirty_pages(inode
) >= BLKS_PER_SEG(sbi
)) {
4414 ret
= filemap_fdatawrite(inode
->i_mapping
);
4420 if (fatal_signal_pending(current
)) {
4427 ret
= filemap_write_and_wait_range(inode
->i_mapping
, 0,
4430 clear_inode_flag(inode
, FI_ENABLE_COMPRESS
);
4433 f2fs_warn(sbi
, "%s: The file might be partially compressed (errno=%d). Please delete the file.",
4435 f2fs_update_time(sbi
, REQ_TIME
);
4437 inode_unlock(inode
);
4438 mnt_drop_write_file(filp
);
4443 static long __f2fs_ioctl(struct file
*filp
, unsigned int cmd
, unsigned long arg
)
4446 case FS_IOC_GETVERSION
:
4447 return f2fs_ioc_getversion(filp
, arg
);
4448 case F2FS_IOC_START_ATOMIC_WRITE
:
4449 return f2fs_ioc_start_atomic_write(filp
, false);
4450 case F2FS_IOC_START_ATOMIC_REPLACE
:
4451 return f2fs_ioc_start_atomic_write(filp
, true);
4452 case F2FS_IOC_COMMIT_ATOMIC_WRITE
:
4453 return f2fs_ioc_commit_atomic_write(filp
);
4454 case F2FS_IOC_ABORT_ATOMIC_WRITE
:
4455 return f2fs_ioc_abort_atomic_write(filp
);
4456 case F2FS_IOC_START_VOLATILE_WRITE
:
4457 case F2FS_IOC_RELEASE_VOLATILE_WRITE
:
4459 case F2FS_IOC_SHUTDOWN
:
4460 return f2fs_ioc_shutdown(filp
, arg
);
4462 return f2fs_ioc_fitrim(filp
, arg
);
4463 case FS_IOC_SET_ENCRYPTION_POLICY
:
4464 return f2fs_ioc_set_encryption_policy(filp
, arg
);
4465 case FS_IOC_GET_ENCRYPTION_POLICY
:
4466 return f2fs_ioc_get_encryption_policy(filp
, arg
);
4467 case FS_IOC_GET_ENCRYPTION_PWSALT
:
4468 return f2fs_ioc_get_encryption_pwsalt(filp
, arg
);
4469 case FS_IOC_GET_ENCRYPTION_POLICY_EX
:
4470 return f2fs_ioc_get_encryption_policy_ex(filp
, arg
);
4471 case FS_IOC_ADD_ENCRYPTION_KEY
:
4472 return f2fs_ioc_add_encryption_key(filp
, arg
);
4473 case FS_IOC_REMOVE_ENCRYPTION_KEY
:
4474 return f2fs_ioc_remove_encryption_key(filp
, arg
);
4475 case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS
:
4476 return f2fs_ioc_remove_encryption_key_all_users(filp
, arg
);
4477 case FS_IOC_GET_ENCRYPTION_KEY_STATUS
:
4478 return f2fs_ioc_get_encryption_key_status(filp
, arg
);
4479 case FS_IOC_GET_ENCRYPTION_NONCE
:
4480 return f2fs_ioc_get_encryption_nonce(filp
, arg
);
4481 case F2FS_IOC_GARBAGE_COLLECT
:
4482 return f2fs_ioc_gc(filp
, arg
);
4483 case F2FS_IOC_GARBAGE_COLLECT_RANGE
:
4484 return f2fs_ioc_gc_range(filp
, arg
);
4485 case F2FS_IOC_WRITE_CHECKPOINT
:
4486 return f2fs_ioc_write_checkpoint(filp
);
4487 case F2FS_IOC_DEFRAGMENT
:
4488 return f2fs_ioc_defragment(filp
, arg
);
4489 case F2FS_IOC_MOVE_RANGE
:
4490 return f2fs_ioc_move_range(filp
, arg
);
4491 case F2FS_IOC_FLUSH_DEVICE
:
4492 return f2fs_ioc_flush_device(filp
, arg
);
4493 case F2FS_IOC_GET_FEATURES
:
4494 return f2fs_ioc_get_features(filp
, arg
);
4495 case F2FS_IOC_GET_PIN_FILE
:
4496 return f2fs_ioc_get_pin_file(filp
, arg
);
4497 case F2FS_IOC_SET_PIN_FILE
:
4498 return f2fs_ioc_set_pin_file(filp
, arg
);
4499 case F2FS_IOC_PRECACHE_EXTENTS
:
4500 return f2fs_ioc_precache_extents(filp
);
4501 case F2FS_IOC_RESIZE_FS
:
4502 return f2fs_ioc_resize_fs(filp
, arg
);
4503 case FS_IOC_ENABLE_VERITY
:
4504 return f2fs_ioc_enable_verity(filp
, arg
);
4505 case FS_IOC_MEASURE_VERITY
:
4506 return f2fs_ioc_measure_verity(filp
, arg
);
4507 case FS_IOC_READ_VERITY_METADATA
:
4508 return f2fs_ioc_read_verity_metadata(filp
, arg
);
4509 case FS_IOC_GETFSLABEL
:
4510 return f2fs_ioc_getfslabel(filp
, arg
);
4511 case FS_IOC_SETFSLABEL
:
4512 return f2fs_ioc_setfslabel(filp
, arg
);
4513 case F2FS_IOC_GET_COMPRESS_BLOCKS
:
4514 return f2fs_ioc_get_compress_blocks(filp
, arg
);
4515 case F2FS_IOC_RELEASE_COMPRESS_BLOCKS
:
4516 return f2fs_release_compress_blocks(filp
, arg
);
4517 case F2FS_IOC_RESERVE_COMPRESS_BLOCKS
:
4518 return f2fs_reserve_compress_blocks(filp
, arg
);
4519 case F2FS_IOC_SEC_TRIM_FILE
:
4520 return f2fs_sec_trim_file(filp
, arg
);
4521 case F2FS_IOC_GET_COMPRESS_OPTION
:
4522 return f2fs_ioc_get_compress_option(filp
, arg
);
4523 case F2FS_IOC_SET_COMPRESS_OPTION
:
4524 return f2fs_ioc_set_compress_option(filp
, arg
);
4525 case F2FS_IOC_DECOMPRESS_FILE
:
4526 return f2fs_ioc_decompress_file(filp
);
4527 case F2FS_IOC_COMPRESS_FILE
:
4528 return f2fs_ioc_compress_file(filp
);
4529 case F2FS_IOC_GET_DEV_ALIAS_FILE
:
4530 return f2fs_ioc_get_dev_alias_file(filp
, arg
);
4536 long f2fs_ioctl(struct file
*filp
, unsigned int cmd
, unsigned long arg
)
4538 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp
)))))
4540 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp
))))
4543 return __f2fs_ioctl(filp
, cmd
, arg
);
4547 * Return %true if the given read or write request should use direct I/O, or
4548 * %false if it should use buffered I/O.
4550 static bool f2fs_should_use_dio(struct inode
*inode
, struct kiocb
*iocb
,
4551 struct iov_iter
*iter
)
4555 if (!(iocb
->ki_flags
& IOCB_DIRECT
))
4558 if (f2fs_force_buffered_io(inode
, iov_iter_rw(iter
)))
4562 * Direct I/O not aligned to the disk's logical_block_size will be
4563 * attempted, but will fail with -EINVAL.
4565 * f2fs additionally requires that direct I/O be aligned to the
4566 * filesystem block size, which is often a stricter requirement.
4567 * However, f2fs traditionally falls back to buffered I/O on requests
4568 * that are logical_block_size-aligned but not fs-block aligned.
4570 * The below logic implements this behavior.
4572 align
= iocb
->ki_pos
| iov_iter_alignment(iter
);
4573 if (!IS_ALIGNED(align
, i_blocksize(inode
)) &&
4574 IS_ALIGNED(align
, bdev_logical_block_size(inode
->i_sb
->s_bdev
)))
4580 static int f2fs_dio_read_end_io(struct kiocb
*iocb
, ssize_t size
, int error
,
4583 struct f2fs_sb_info
*sbi
= F2FS_I_SB(file_inode(iocb
->ki_filp
));
4585 dec_page_count(sbi
, F2FS_DIO_READ
);
4588 f2fs_update_iostat(sbi
, NULL
, APP_DIRECT_READ_IO
, size
);
4592 static const struct iomap_dio_ops f2fs_iomap_dio_read_ops
= {
4593 .end_io
= f2fs_dio_read_end_io
,
4596 static ssize_t
f2fs_dio_read_iter(struct kiocb
*iocb
, struct iov_iter
*to
)
4598 struct file
*file
= iocb
->ki_filp
;
4599 struct inode
*inode
= file_inode(file
);
4600 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
4601 struct f2fs_inode_info
*fi
= F2FS_I(inode
);
4602 const loff_t pos
= iocb
->ki_pos
;
4603 const size_t count
= iov_iter_count(to
);
4604 struct iomap_dio
*dio
;
4608 return 0; /* skip atime update */
4610 trace_f2fs_direct_IO_enter(inode
, iocb
, count
, READ
);
4612 if (iocb
->ki_flags
& IOCB_NOWAIT
) {
4613 if (!f2fs_down_read_trylock(&fi
->i_gc_rwsem
[READ
])) {
4618 f2fs_down_read(&fi
->i_gc_rwsem
[READ
]);
4621 /* dio is not compatible w/ atomic file */
4622 if (f2fs_is_atomic_file(inode
)) {
4623 f2fs_up_read(&fi
->i_gc_rwsem
[READ
]);
4629 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
4630 * the higher-level function iomap_dio_rw() in order to ensure that the
4631 * F2FS_DIO_READ counter will be decremented correctly in all cases.
4633 inc_page_count(sbi
, F2FS_DIO_READ
);
4634 dio
= __iomap_dio_rw(iocb
, to
, &f2fs_iomap_ops
,
4635 &f2fs_iomap_dio_read_ops
, 0, NULL
, 0);
4636 if (IS_ERR_OR_NULL(dio
)) {
4637 ret
= PTR_ERR_OR_ZERO(dio
);
4638 if (ret
!= -EIOCBQUEUED
)
4639 dec_page_count(sbi
, F2FS_DIO_READ
);
4641 ret
= iomap_dio_complete(dio
);
4644 f2fs_up_read(&fi
->i_gc_rwsem
[READ
]);
4646 file_accessed(file
);
4648 trace_f2fs_direct_IO_exit(inode
, pos
, count
, READ
, ret
);
4652 static void f2fs_trace_rw_file_path(struct file
*file
, loff_t pos
, size_t count
,
4655 struct inode
*inode
= file_inode(file
);
4658 buf
= f2fs_getname(F2FS_I_SB(inode
));
4661 path
= dentry_path_raw(file_dentry(file
), buf
, PATH_MAX
);
4665 trace_f2fs_datawrite_start(inode
, pos
, count
,
4666 current
->pid
, path
, current
->comm
);
4668 trace_f2fs_dataread_start(inode
, pos
, count
,
4669 current
->pid
, path
, current
->comm
);
4674 static ssize_t
f2fs_file_read_iter(struct kiocb
*iocb
, struct iov_iter
*to
)
4676 struct inode
*inode
= file_inode(iocb
->ki_filp
);
4677 const loff_t pos
= iocb
->ki_pos
;
4680 if (!f2fs_is_compress_backend_ready(inode
))
4683 if (trace_f2fs_dataread_start_enabled())
4684 f2fs_trace_rw_file_path(iocb
->ki_filp
, iocb
->ki_pos
,
4685 iov_iter_count(to
), READ
);
4687 /* In LFS mode, if there is inflight dio, wait for its completion */
4688 if (f2fs_lfs_mode(F2FS_I_SB(inode
)) &&
4689 get_pages(F2FS_I_SB(inode
), F2FS_DIO_WRITE
))
4690 inode_dio_wait(inode
);
4692 if (f2fs_should_use_dio(inode
, iocb
, to
)) {
4693 ret
= f2fs_dio_read_iter(iocb
, to
);
4695 ret
= filemap_read(iocb
, to
, 0);
4697 f2fs_update_iostat(F2FS_I_SB(inode
), inode
,
4698 APP_BUFFERED_READ_IO
, ret
);
4700 if (trace_f2fs_dataread_end_enabled())
4701 trace_f2fs_dataread_end(inode
, pos
, ret
);
4705 static ssize_t
f2fs_file_splice_read(struct file
*in
, loff_t
*ppos
,
4706 struct pipe_inode_info
*pipe
,
4707 size_t len
, unsigned int flags
)
4709 struct inode
*inode
= file_inode(in
);
4710 const loff_t pos
= *ppos
;
4713 if (!f2fs_is_compress_backend_ready(inode
))
4716 if (trace_f2fs_dataread_start_enabled())
4717 f2fs_trace_rw_file_path(in
, pos
, len
, READ
);
4719 ret
= filemap_splice_read(in
, ppos
, pipe
, len
, flags
);
4721 f2fs_update_iostat(F2FS_I_SB(inode
), inode
,
4722 APP_BUFFERED_READ_IO
, ret
);
4724 if (trace_f2fs_dataread_end_enabled())
4725 trace_f2fs_dataread_end(inode
, pos
, ret
);
4729 static ssize_t
f2fs_write_checks(struct kiocb
*iocb
, struct iov_iter
*from
)
4731 struct file
*file
= iocb
->ki_filp
;
4732 struct inode
*inode
= file_inode(file
);
4736 if (IS_IMMUTABLE(inode
))
4739 if (is_inode_flag_set(inode
, FI_COMPRESS_RELEASED
))
4742 count
= generic_write_checks(iocb
, from
);
4746 err
= file_modified(file
);
4753 * Preallocate blocks for a write request, if it is possible and helpful to do
4754 * so. Returns a positive number if blocks may have been preallocated, 0 if no
4755 * blocks were preallocated, or a negative errno value if something went
4756 * seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if *all* the
4757 * requested blocks (not just some of them) have been allocated.
4759 static int f2fs_preallocate_blocks(struct kiocb
*iocb
, struct iov_iter
*iter
,
4762 struct inode
*inode
= file_inode(iocb
->ki_filp
);
4763 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
4764 const loff_t pos
= iocb
->ki_pos
;
4765 const size_t count
= iov_iter_count(iter
);
4766 struct f2fs_map_blocks map
= {};
4770 /* If it will be an out-of-place direct write, don't bother. */
4771 if (dio
&& f2fs_lfs_mode(sbi
))
4774 * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into
4775 * buffered IO, if DIO meets any holes.
4777 if (dio
&& i_size_read(inode
) &&
4778 (F2FS_BYTES_TO_BLK(pos
) < F2FS_BLK_ALIGN(i_size_read(inode
))))
4781 /* No-wait I/O can't allocate blocks. */
4782 if (iocb
->ki_flags
& IOCB_NOWAIT
)
4785 /* If it will be a short write, don't bother. */
4786 if (fault_in_iov_iter_readable(iter
, count
))
4789 if (f2fs_has_inline_data(inode
)) {
4790 /* If the data will fit inline, don't bother. */
4791 if (pos
+ count
<= MAX_INLINE_DATA(inode
))
4793 ret
= f2fs_convert_inline_inode(inode
);
4798 /* Do not preallocate blocks that will be written partially in 4KB. */
4799 map
.m_lblk
= F2FS_BLK_ALIGN(pos
);
4800 map
.m_len
= F2FS_BYTES_TO_BLK(pos
+ count
);
4801 if (map
.m_len
> map
.m_lblk
)
4802 map
.m_len
-= map
.m_lblk
;
4806 if (!IS_DEVICE_ALIASING(inode
))
4807 map
.m_may_create
= true;
4809 map
.m_seg_type
= f2fs_rw_hint_to_seg_type(sbi
,
4810 inode
->i_write_hint
);
4811 flag
= F2FS_GET_BLOCK_PRE_DIO
;
4813 map
.m_seg_type
= NO_CHECK_TYPE
;
4814 flag
= F2FS_GET_BLOCK_PRE_AIO
;
4817 ret
= f2fs_map_blocks(inode
, &map
, flag
);
4818 /* -ENOSPC|-EDQUOT are fine to report the number of allocated blocks. */
4819 if (ret
< 0 && !((ret
== -ENOSPC
|| ret
== -EDQUOT
) && map
.m_len
> 0))
4822 set_inode_flag(inode
, FI_PREALLOCATED_ALL
);
4826 static ssize_t
f2fs_buffered_write_iter(struct kiocb
*iocb
,
4827 struct iov_iter
*from
)
4829 struct file
*file
= iocb
->ki_filp
;
4830 struct inode
*inode
= file_inode(file
);
4833 if (iocb
->ki_flags
& IOCB_NOWAIT
)
4836 ret
= generic_perform_write(iocb
, from
);
4839 f2fs_update_iostat(F2FS_I_SB(inode
), inode
,
4840 APP_BUFFERED_IO
, ret
);
4845 static int f2fs_dio_write_end_io(struct kiocb
*iocb
, ssize_t size
, int error
,
4848 struct f2fs_sb_info
*sbi
= F2FS_I_SB(file_inode(iocb
->ki_filp
));
4850 dec_page_count(sbi
, F2FS_DIO_WRITE
);
4853 f2fs_update_time(sbi
, REQ_TIME
);
4854 f2fs_update_iostat(sbi
, NULL
, APP_DIRECT_IO
, size
);
4858 static void f2fs_dio_write_submit_io(const struct iomap_iter
*iter
,
4859 struct bio
*bio
, loff_t file_offset
)
4861 struct inode
*inode
= iter
->inode
;
4862 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
4863 enum log_type type
= f2fs_rw_hint_to_seg_type(sbi
, inode
->i_write_hint
);
4864 enum temp_type temp
= f2fs_get_segment_temp(sbi
, type
);
4866 bio
->bi_write_hint
= f2fs_io_type_to_rw_hint(sbi
, DATA
, temp
);
4870 static const struct iomap_dio_ops f2fs_iomap_dio_write_ops
= {
4871 .end_io
= f2fs_dio_write_end_io
,
4872 .submit_io
= f2fs_dio_write_submit_io
,
4875 static void f2fs_flush_buffered_write(struct address_space
*mapping
,
4876 loff_t start_pos
, loff_t end_pos
)
4880 ret
= filemap_write_and_wait_range(mapping
, start_pos
, end_pos
);
4883 invalidate_mapping_pages(mapping
,
4884 start_pos
>> PAGE_SHIFT
,
4885 end_pos
>> PAGE_SHIFT
);
4888 static ssize_t
f2fs_dio_write_iter(struct kiocb
*iocb
, struct iov_iter
*from
,
4889 bool *may_need_sync
)
4891 struct file
*file
= iocb
->ki_filp
;
4892 struct inode
*inode
= file_inode(file
);
4893 struct f2fs_inode_info
*fi
= F2FS_I(inode
);
4894 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
4895 const bool do_opu
= f2fs_lfs_mode(sbi
);
4896 const loff_t pos
= iocb
->ki_pos
;
4897 const ssize_t count
= iov_iter_count(from
);
4898 unsigned int dio_flags
;
4899 struct iomap_dio
*dio
;
4902 trace_f2fs_direct_IO_enter(inode
, iocb
, count
, WRITE
);
4904 if (iocb
->ki_flags
& IOCB_NOWAIT
) {
4905 /* f2fs_convert_inline_inode() and block allocation can block */
4906 if (f2fs_has_inline_data(inode
) ||
4907 !f2fs_overwrite_io(inode
, pos
, count
)) {
4912 if (!f2fs_down_read_trylock(&fi
->i_gc_rwsem
[WRITE
])) {
4916 if (do_opu
&& !f2fs_down_read_trylock(&fi
->i_gc_rwsem
[READ
])) {
4917 f2fs_up_read(&fi
->i_gc_rwsem
[WRITE
]);
4922 ret
= f2fs_convert_inline_inode(inode
);
4926 f2fs_down_read(&fi
->i_gc_rwsem
[WRITE
]);
4928 f2fs_down_read(&fi
->i_gc_rwsem
[READ
]);
4932 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
4933 * the higher-level function iomap_dio_rw() in order to ensure that the
4934 * F2FS_DIO_WRITE counter will be decremented correctly in all cases.
4936 inc_page_count(sbi
, F2FS_DIO_WRITE
);
4938 if (pos
+ count
> inode
->i_size
)
4939 dio_flags
|= IOMAP_DIO_FORCE_WAIT
;
4940 dio
= __iomap_dio_rw(iocb
, from
, &f2fs_iomap_ops
,
4941 &f2fs_iomap_dio_write_ops
, dio_flags
, NULL
, 0);
4942 if (IS_ERR_OR_NULL(dio
)) {
4943 ret
= PTR_ERR_OR_ZERO(dio
);
4944 if (ret
== -ENOTBLK
)
4946 if (ret
!= -EIOCBQUEUED
)
4947 dec_page_count(sbi
, F2FS_DIO_WRITE
);
4949 ret
= iomap_dio_complete(dio
);
4953 f2fs_up_read(&fi
->i_gc_rwsem
[READ
]);
4954 f2fs_up_read(&fi
->i_gc_rwsem
[WRITE
]);
4958 if (pos
+ ret
> inode
->i_size
)
4959 f2fs_i_size_write(inode
, pos
+ ret
);
4961 set_inode_flag(inode
, FI_UPDATE_WRITE
);
4963 if (iov_iter_count(from
)) {
4965 loff_t bufio_start_pos
= iocb
->ki_pos
;
4968 * The direct write was partial, so we need to fall back to a
4969 * buffered write for the remainder.
4972 ret2
= f2fs_buffered_write_iter(iocb
, from
);
4973 if (iov_iter_count(from
))
4974 f2fs_write_failed(inode
, iocb
->ki_pos
);
4979 * Ensure that the pagecache pages are written to disk and
4980 * invalidated to preserve the expected O_DIRECT semantics.
4983 loff_t bufio_end_pos
= bufio_start_pos
+ ret2
- 1;
4987 f2fs_flush_buffered_write(file
->f_mapping
,
4992 /* iomap_dio_rw() already handled the generic_write_sync(). */
4993 *may_need_sync
= false;
4996 trace_f2fs_direct_IO_exit(inode
, pos
, count
, WRITE
, ret
);
5000 static ssize_t
f2fs_file_write_iter(struct kiocb
*iocb
, struct iov_iter
*from
)
5002 struct inode
*inode
= file_inode(iocb
->ki_filp
);
5003 const loff_t orig_pos
= iocb
->ki_pos
;
5004 const size_t orig_count
= iov_iter_count(from
);
5007 bool may_need_sync
= true;
5009 const loff_t pos
= iocb
->ki_pos
;
5010 const ssize_t count
= iov_iter_count(from
);
5013 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode
)))) {
5018 if (!f2fs_is_compress_backend_ready(inode
)) {
5023 if (iocb
->ki_flags
& IOCB_NOWAIT
) {
5024 if (!inode_trylock(inode
)) {
5032 if (f2fs_is_pinned_file(inode
) &&
5033 !f2fs_overwrite_io(inode
, pos
, count
)) {
5038 ret
= f2fs_write_checks(iocb
, from
);
5042 /* Determine whether we will do a direct write or a buffered write. */
5043 dio
= f2fs_should_use_dio(inode
, iocb
, from
);
5045 /* dio is not compatible w/ atomic write */
5046 if (dio
&& f2fs_is_atomic_file(inode
)) {
5051 /* Possibly preallocate the blocks for the write. */
5052 target_size
= iocb
->ki_pos
+ iov_iter_count(from
);
5053 preallocated
= f2fs_preallocate_blocks(iocb
, from
, dio
);
5054 if (preallocated
< 0) {
5057 if (trace_f2fs_datawrite_start_enabled())
5058 f2fs_trace_rw_file_path(iocb
->ki_filp
, iocb
->ki_pos
,
5061 /* Do the actual write. */
5063 f2fs_dio_write_iter(iocb
, from
, &may_need_sync
) :
5064 f2fs_buffered_write_iter(iocb
, from
);
5066 if (trace_f2fs_datawrite_end_enabled())
5067 trace_f2fs_datawrite_end(inode
, orig_pos
, ret
);
5070 /* Don't leave any preallocated blocks around past i_size. */
5071 if (preallocated
&& i_size_read(inode
) < target_size
) {
5072 f2fs_down_write(&F2FS_I(inode
)->i_gc_rwsem
[WRITE
]);
5073 filemap_invalidate_lock(inode
->i_mapping
);
5074 if (!f2fs_truncate(inode
))
5075 file_dont_truncate(inode
);
5076 filemap_invalidate_unlock(inode
->i_mapping
);
5077 f2fs_up_write(&F2FS_I(inode
)->i_gc_rwsem
[WRITE
]);
5079 file_dont_truncate(inode
);
5082 clear_inode_flag(inode
, FI_PREALLOCATED_ALL
);
5084 inode_unlock(inode
);
5086 trace_f2fs_file_write_iter(inode
, orig_pos
, orig_count
, ret
);
5088 if (ret
> 0 && may_need_sync
)
5089 ret
= generic_write_sync(iocb
, ret
);
5091 /* If buffered IO was forced, flush and drop the data from
5092 * the page cache to preserve O_DIRECT semantics
5094 if (ret
> 0 && !dio
&& (iocb
->ki_flags
& IOCB_DIRECT
))
5095 f2fs_flush_buffered_write(iocb
->ki_filp
->f_mapping
,
5097 orig_pos
+ ret
- 1);
5102 static int f2fs_file_fadvise(struct file
*filp
, loff_t offset
, loff_t len
,
5105 struct address_space
*mapping
;
5106 struct backing_dev_info
*bdi
;
5107 struct inode
*inode
= file_inode(filp
);
5110 if (advice
== POSIX_FADV_SEQUENTIAL
) {
5111 if (S_ISFIFO(inode
->i_mode
))
5114 mapping
= filp
->f_mapping
;
5115 if (!mapping
|| len
< 0)
5118 bdi
= inode_to_bdi(mapping
->host
);
5119 filp
->f_ra
.ra_pages
= bdi
->ra_pages
*
5120 F2FS_I_SB(inode
)->seq_file_ra_mul
;
5121 spin_lock(&filp
->f_lock
);
5122 filp
->f_mode
&= ~FMODE_RANDOM
;
5123 spin_unlock(&filp
->f_lock
);
5125 } else if (advice
== POSIX_FADV_WILLNEED
&& offset
== 0) {
5126 /* Load extent cache at the first readahead. */
5127 f2fs_precache_extents(inode
);
5130 err
= generic_fadvise(filp
, offset
, len
, advice
);
5131 if (!err
&& advice
== POSIX_FADV_DONTNEED
&&
5132 test_opt(F2FS_I_SB(inode
), COMPRESS_CACHE
) &&
5133 f2fs_compressed_file(inode
))
5134 f2fs_invalidate_compress_pages(F2FS_I_SB(inode
), inode
->i_ino
);
5139 #ifdef CONFIG_COMPAT
5140 struct compat_f2fs_gc_range
{
5145 #define F2FS_IOC32_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11,\
5146 struct compat_f2fs_gc_range)
5148 static int f2fs_compat_ioc_gc_range(struct file
*file
, unsigned long arg
)
5150 struct compat_f2fs_gc_range __user
*urange
;
5151 struct f2fs_gc_range range
;
5154 urange
= compat_ptr(arg
);
5155 err
= get_user(range
.sync
, &urange
->sync
);
5156 err
|= get_user(range
.start
, &urange
->start
);
5157 err
|= get_user(range
.len
, &urange
->len
);
5161 return __f2fs_ioc_gc_range(file
, &range
);
5164 struct compat_f2fs_move_range
{
5170 #define F2FS_IOC32_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \
5171 struct compat_f2fs_move_range)
5173 static int f2fs_compat_ioc_move_range(struct file
*file
, unsigned long arg
)
5175 struct compat_f2fs_move_range __user
*urange
;
5176 struct f2fs_move_range range
;
5179 urange
= compat_ptr(arg
);
5180 err
= get_user(range
.dst_fd
, &urange
->dst_fd
);
5181 err
|= get_user(range
.pos_in
, &urange
->pos_in
);
5182 err
|= get_user(range
.pos_out
, &urange
->pos_out
);
5183 err
|= get_user(range
.len
, &urange
->len
);
5187 return __f2fs_ioc_move_range(file
, &range
);
5190 long f2fs_compat_ioctl(struct file
*file
, unsigned int cmd
, unsigned long arg
)
5192 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file
)))))
5194 if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(file
))))
5198 case FS_IOC32_GETVERSION
:
5199 cmd
= FS_IOC_GETVERSION
;
5201 case F2FS_IOC32_GARBAGE_COLLECT_RANGE
:
5202 return f2fs_compat_ioc_gc_range(file
, arg
);
5203 case F2FS_IOC32_MOVE_RANGE
:
5204 return f2fs_compat_ioc_move_range(file
, arg
);
5205 case F2FS_IOC_START_ATOMIC_WRITE
:
5206 case F2FS_IOC_START_ATOMIC_REPLACE
:
5207 case F2FS_IOC_COMMIT_ATOMIC_WRITE
:
5208 case F2FS_IOC_START_VOLATILE_WRITE
:
5209 case F2FS_IOC_RELEASE_VOLATILE_WRITE
:
5210 case F2FS_IOC_ABORT_ATOMIC_WRITE
:
5211 case F2FS_IOC_SHUTDOWN
:
5213 case FS_IOC_SET_ENCRYPTION_POLICY
:
5214 case FS_IOC_GET_ENCRYPTION_PWSALT
:
5215 case FS_IOC_GET_ENCRYPTION_POLICY
:
5216 case FS_IOC_GET_ENCRYPTION_POLICY_EX
:
5217 case FS_IOC_ADD_ENCRYPTION_KEY
:
5218 case FS_IOC_REMOVE_ENCRYPTION_KEY
:
5219 case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS
:
5220 case FS_IOC_GET_ENCRYPTION_KEY_STATUS
:
5221 case FS_IOC_GET_ENCRYPTION_NONCE
:
5222 case F2FS_IOC_GARBAGE_COLLECT
:
5223 case F2FS_IOC_WRITE_CHECKPOINT
:
5224 case F2FS_IOC_DEFRAGMENT
:
5225 case F2FS_IOC_FLUSH_DEVICE
:
5226 case F2FS_IOC_GET_FEATURES
:
5227 case F2FS_IOC_GET_PIN_FILE
:
5228 case F2FS_IOC_SET_PIN_FILE
:
5229 case F2FS_IOC_PRECACHE_EXTENTS
:
5230 case F2FS_IOC_RESIZE_FS
:
5231 case FS_IOC_ENABLE_VERITY
:
5232 case FS_IOC_MEASURE_VERITY
:
5233 case FS_IOC_READ_VERITY_METADATA
:
5234 case FS_IOC_GETFSLABEL
:
5235 case FS_IOC_SETFSLABEL
:
5236 case F2FS_IOC_GET_COMPRESS_BLOCKS
:
5237 case F2FS_IOC_RELEASE_COMPRESS_BLOCKS
:
5238 case F2FS_IOC_RESERVE_COMPRESS_BLOCKS
:
5239 case F2FS_IOC_SEC_TRIM_FILE
:
5240 case F2FS_IOC_GET_COMPRESS_OPTION
:
5241 case F2FS_IOC_SET_COMPRESS_OPTION
:
5242 case F2FS_IOC_DECOMPRESS_FILE
:
5243 case F2FS_IOC_COMPRESS_FILE
:
5244 case F2FS_IOC_GET_DEV_ALIAS_FILE
:
5247 return -ENOIOCTLCMD
;
5249 return __f2fs_ioctl(file
, cmd
, (unsigned long) compat_ptr(arg
));
5253 const struct file_operations f2fs_file_operations
= {
5254 .llseek
= f2fs_llseek
,
5255 .read_iter
= f2fs_file_read_iter
,
5256 .write_iter
= f2fs_file_write_iter
,
5257 .iopoll
= iocb_bio_iopoll
,
5258 .open
= f2fs_file_open
,
5259 .release
= f2fs_release_file
,
5260 .mmap
= f2fs_file_mmap
,
5261 .flush
= f2fs_file_flush
,
5262 .fsync
= f2fs_sync_file
,
5263 .fallocate
= f2fs_fallocate
,
5264 .unlocked_ioctl
= f2fs_ioctl
,
5265 #ifdef CONFIG_COMPAT
5266 .compat_ioctl
= f2fs_compat_ioctl
,
5268 .splice_read
= f2fs_file_splice_read
,
5269 .splice_write
= iter_file_splice_write
,
5270 .fadvise
= f2fs_file_fadvise
,
5271 .fop_flags
= FOP_BUFFER_RASYNC
,