1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (c) 2020-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
7 #include "xfs_shared.h"
8 #include "xfs_format.h"
9 #include "xfs_log_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_inode.h"
14 #include "xfs_trans.h"
15 #include "xfs_quota.h"
16 #include "xfs_bmap_util.h"
17 #include "xfs_reflink.h"
18 #include "xfs_trace.h"
19 #include "xfs_exchrange.h"
20 #include "xfs_exchmaps.h"
22 #include "xfs_icache.h"
24 #include "xfs_rtbitmap.h"
25 #include <linux/fsnotify.h>
27 /* Lock (and optionally join) two inodes for a file range exchange. */
31 struct xfs_inode
*ip1
,
32 struct xfs_inode
*ip2
)
35 xfs_lock_two_inodes(ip1
, XFS_ILOCK_EXCL
,
38 xfs_ilock(ip1
, XFS_ILOCK_EXCL
);
40 xfs_trans_ijoin(tp
, ip1
, 0);
42 xfs_trans_ijoin(tp
, ip2
, 0);
47 /* Unlock two inodes after a file range exchange operation. */
49 xfs_exchrange_iunlock(
50 struct xfs_inode
*ip1
,
51 struct xfs_inode
*ip2
)
54 xfs_iunlock(ip2
, XFS_ILOCK_EXCL
);
55 xfs_iunlock(ip1
, XFS_ILOCK_EXCL
);
59 * Estimate the resource requirements to exchange file contents between the two
60 * files. The caller is required to hold the IOLOCK and the MMAPLOCK and to
61 * have flushed both inodes' pagecache and active direct-ios.
64 xfs_exchrange_estimate(
65 struct xfs_exchmaps_req
*req
)
69 xfs_exchrange_ilock(NULL
, req
->ip1
, req
->ip2
);
70 error
= xfs_exchmaps_estimate(req
);
71 xfs_exchrange_iunlock(req
->ip1
, req
->ip2
);
76 * Check that file2's metadata agree with the snapshot that we took for the
77 * range commit request.
79 * This should be called after the filesystem has locked /all/ inode metadata
80 * against modification.
83 xfs_exchrange_check_freshness(
84 const struct xfs_exchrange
*fxr
,
85 struct xfs_inode
*ip2
)
87 struct inode
*inode2
= VFS_I(ip2
);
88 struct timespec64 ctime
= inode_get_ctime(inode2
);
89 struct timespec64 mtime
= inode_get_mtime(inode2
);
91 trace_xfs_exchrange_freshness(fxr
, ip2
);
93 /* Check that file2 hasn't otherwise been modified. */
94 if (fxr
->file2_ino
!= ip2
->i_ino
||
95 fxr
->file2_gen
!= inode2
->i_generation
||
96 !timespec64_equal(&fxr
->file2_ctime
, &ctime
) ||
97 !timespec64_equal(&fxr
->file2_mtime
, &mtime
))
103 #define QRETRY_IP1 (0x1)
104 #define QRETRY_IP2 (0x2)
107 * Obtain a quota reservation to make sure we don't hit EDQUOT. We can skip
108 * this if quota enforcement is disabled or if both inodes' dquots are the
109 * same. The qretry structure must be initialized to zeroes before the first
110 * call to this function.
113 xfs_exchrange_reserve_quota(
114 struct xfs_trans
*tp
,
115 const struct xfs_exchmaps_req
*req
,
116 unsigned int *qretry
)
118 int64_t ddelta
, rdelta
;
123 * Don't bother with a quota reservation if we're not enforcing them
124 * or the two inodes have the same dquots.
126 if (!XFS_IS_QUOTA_ON(tp
->t_mountp
) || req
->ip1
== req
->ip2
||
127 (req
->ip1
->i_udquot
== req
->ip2
->i_udquot
&&
128 req
->ip1
->i_gdquot
== req
->ip2
->i_gdquot
&&
129 req
->ip1
->i_pdquot
== req
->ip2
->i_pdquot
))
135 * For each file, compute the net gain in the number of regular blocks
136 * that will be mapped into that file and reserve that much quota. The
137 * quota counts must be able to absorb at least that much space.
139 ddelta
= req
->ip2_bcount
- req
->ip1_bcount
;
140 rdelta
= req
->ip2_rtbcount
- req
->ip1_rtbcount
;
141 if (ddelta
> 0 || rdelta
> 0) {
142 error
= xfs_trans_reserve_quota_nblks(tp
, req
->ip1
,
143 ddelta
> 0 ? ddelta
: 0,
144 rdelta
> 0 ? rdelta
: 0,
146 if (error
== -EDQUOT
|| error
== -ENOSPC
) {
148 * Save this error and see what happens if we try to
149 * reserve quota for ip2. Then report both.
151 *qretry
|= QRETRY_IP1
;
158 if (ddelta
< 0 || rdelta
< 0) {
159 error
= xfs_trans_reserve_quota_nblks(tp
, req
->ip2
,
160 ddelta
< 0 ? -ddelta
: 0,
161 rdelta
< 0 ? -rdelta
: 0,
163 if (error
== -EDQUOT
|| error
== -ENOSPC
)
164 *qretry
|= QRETRY_IP2
;
172 * For each file, forcibly reserve the gross gain in mapped blocks so
173 * that we don't trip over any quota block reservation assertions.
174 * We must reserve the gross gain because the quota code subtracts from
175 * bcount the number of blocks that we unmap; it does not add that
176 * quantity back to the quota block reservation.
178 error
= xfs_trans_reserve_quota_nblks(tp
, req
->ip1
, req
->ip1_bcount
,
179 req
->ip1_rtbcount
, true);
183 return xfs_trans_reserve_quota_nblks(tp
, req
->ip2
, req
->ip2_bcount
,
184 req
->ip2_rtbcount
, true);
187 /* Exchange the mappings (and hence the contents) of two files' forks. */
189 xfs_exchrange_mappings(
190 const struct xfs_exchrange
*fxr
,
191 struct xfs_inode
*ip1
,
192 struct xfs_inode
*ip2
)
194 struct xfs_mount
*mp
= ip1
->i_mount
;
195 struct xfs_exchmaps_req req
= {
198 .startoff1
= XFS_B_TO_FSBT(mp
, fxr
->file1_offset
),
199 .startoff2
= XFS_B_TO_FSBT(mp
, fxr
->file2_offset
),
200 .blockcount
= XFS_B_TO_FSB(mp
, fxr
->length
),
202 struct xfs_trans
*tp
;
204 bool retried
= false;
207 trace_xfs_exchrange_mappings(fxr
, ip1
, ip2
);
209 if (fxr
->flags
& XFS_EXCHANGE_RANGE_TO_EOF
)
210 req
.flags
|= XFS_EXCHMAPS_SET_SIZES
;
211 if (fxr
->flags
& XFS_EXCHANGE_RANGE_FILE1_WRITTEN
)
212 req
.flags
|= XFS_EXCHMAPS_INO1_WRITTEN
;
215 * Round the request length up to the nearest file allocation unit.
216 * The prep function already checked that the request offsets and
217 * length in @fxr are safe to round up.
219 if (xfs_inode_has_bigrtalloc(ip2
))
220 req
.blockcount
= xfs_blen_roundup_rtx(mp
, req
.blockcount
);
222 error
= xfs_exchrange_estimate(&req
);
227 /* Allocate the transaction, lock the inodes, and join them. */
228 error
= xfs_trans_alloc(mp
, &M_RES(mp
)->tr_write
, req
.resblks
, 0,
229 XFS_TRANS_RES_FDBLKS
, &tp
);
233 xfs_exchrange_ilock(tp
, ip1
, ip2
);
235 trace_xfs_exchrange_before(ip2
, 2);
236 trace_xfs_exchrange_before(ip1
, 1);
238 error
= xfs_exchmaps_check_forks(mp
, &req
);
240 goto out_trans_cancel
;
243 * Reserve ourselves some quota if any of them are in enforcing mode.
244 * In theory we only need enough to satisfy the change in the number
245 * of blocks between the two ranges being remapped.
247 error
= xfs_exchrange_reserve_quota(tp
, &req
, &qretry
);
248 if ((error
== -EDQUOT
|| error
== -ENOSPC
) && !retried
) {
249 xfs_trans_cancel(tp
);
250 xfs_exchrange_iunlock(ip1
, ip2
);
251 if (qretry
& QRETRY_IP1
)
252 xfs_blockgc_free_quota(ip1
, 0);
253 if (qretry
& QRETRY_IP2
)
254 xfs_blockgc_free_quota(ip2
, 0);
259 goto out_trans_cancel
;
261 /* If we got this far on a dry run, all parameters are ok. */
262 if (fxr
->flags
& XFS_EXCHANGE_RANGE_DRY_RUN
)
263 goto out_trans_cancel
;
265 /* Update the mtime and ctime of both files. */
266 if (fxr
->flags
& __XFS_EXCHANGE_RANGE_UPD_CMTIME1
)
267 xfs_trans_ichgtime(tp
, ip1
, XFS_ICHGTIME_MOD
| XFS_ICHGTIME_CHG
);
268 if (fxr
->flags
& __XFS_EXCHANGE_RANGE_UPD_CMTIME2
)
269 xfs_trans_ichgtime(tp
, ip2
, XFS_ICHGTIME_MOD
| XFS_ICHGTIME_CHG
);
271 xfs_exchange_mappings(tp
, &req
);
274 * Force the log to persist metadata updates if the caller or the
275 * administrator requires this. The generic prep function already
276 * flushed the relevant parts of the page cache.
278 if (xfs_has_wsync(mp
) || (fxr
->flags
& XFS_EXCHANGE_RANGE_DSYNC
))
279 xfs_trans_set_sync(tp
);
281 error
= xfs_trans_commit(tp
);
283 trace_xfs_exchrange_after(ip2
, 2);
284 trace_xfs_exchrange_after(ip1
, 1);
290 * If the caller wanted us to exchange the contents of two complete
291 * files of unequal length, exchange the incore sizes now. This should
292 * be safe because we flushed both files' page caches, exchanged all
293 * the mappings, and updated the ondisk sizes.
295 if (fxr
->flags
& XFS_EXCHANGE_RANGE_TO_EOF
) {
298 temp
= i_size_read(VFS_I(ip2
));
299 i_size_write(VFS_I(ip2
), i_size_read(VFS_I(ip1
)));
300 i_size_write(VFS_I(ip1
), temp
);
304 xfs_exchrange_iunlock(ip1
, ip2
);
308 xfs_trans_cancel(tp
);
313 * Generic code for exchanging ranges of two files via XFS_IOC_EXCHANGE_RANGE.
314 * This part deals with struct file objects and byte ranges and does not deal
315 * with XFS-specific data structures such as xfs_inodes and block ranges. This
316 * separation may some day facilitate porting to another filesystem.
318 * The goal is to exchange fxr.length bytes starting at fxr.file1_offset in
319 * file1 with the same number of bytes starting at fxr.file2_offset in file2.
320 * Implementations must call xfs_exchange_range_prep to prepare the two
321 * files prior to taking locks; and they must update the inode change and mod
322 * times of both files as part of the metadata update. The timestamp update
323 * and freshness checks must be done atomically as part of the data exchange
324 * operation to ensure correctness of the freshness check.
325 * xfs_exchange_range_finish must be called after the operation completes
326 * successfully but before locks are dropped.
329 /* Verify that we have security clearance to perform this operation. */
331 xfs_exchange_range_verify_area(
332 struct xfs_exchrange
*fxr
)
336 ret
= remap_verify_area(fxr
->file1
, fxr
->file1_offset
, fxr
->length
,
341 return remap_verify_area(fxr
->file2
, fxr
->file2_offset
, fxr
->length
,
346 * Performs necessary checks before doing a range exchange, having stabilized
347 * mutable inode attributes via i_rwsem.
350 xfs_exchange_range_checks(
351 struct xfs_exchrange
*fxr
,
352 unsigned int alloc_unit
)
354 struct inode
*inode1
= file_inode(fxr
->file1
);
355 struct inode
*inode2
= file_inode(fxr
->file2
);
356 uint64_t allocmask
= alloc_unit
- 1;
359 loff_t size1
, size2
, tmp
;
362 /* Don't touch certain kinds of inodes */
363 if (IS_IMMUTABLE(inode1
) || IS_IMMUTABLE(inode2
))
365 if (IS_SWAPFILE(inode1
) || IS_SWAPFILE(inode2
))
368 size1
= i_size_read(inode1
);
369 size2
= i_size_read(inode2
);
371 /* Ranges cannot start after EOF. */
372 if (fxr
->file1_offset
> size1
|| fxr
->file2_offset
> size2
)
376 * If the caller said to exchange to EOF, we set the length of the
377 * request large enough to cover everything to the end of both files.
379 if (fxr
->flags
& XFS_EXCHANGE_RANGE_TO_EOF
) {
380 fxr
->length
= max_t(int64_t, size1
- fxr
->file1_offset
,
381 size2
- fxr
->file2_offset
);
383 error
= xfs_exchange_range_verify_area(fxr
);
389 * The start of both ranges must be aligned to the file allocation
392 if (!IS_ALIGNED(fxr
->file1_offset
, alloc_unit
) ||
393 !IS_ALIGNED(fxr
->file2_offset
, alloc_unit
))
396 /* Ensure offsets don't wrap. */
397 if (check_add_overflow(fxr
->file1_offset
, fxr
->length
, &tmp
) ||
398 check_add_overflow(fxr
->file2_offset
, fxr
->length
, &tmp
))
402 * We require both ranges to end within EOF, unless we're exchanging
405 if (!(fxr
->flags
& XFS_EXCHANGE_RANGE_TO_EOF
) &&
406 (fxr
->file1_offset
+ fxr
->length
> size1
||
407 fxr
->file2_offset
+ fxr
->length
> size2
))
411 * Make sure we don't hit any file size limits. If we hit any size
412 * limits such that test_length was adjusted, we abort the whole
415 test_len
= fxr
->length
;
416 error
= generic_write_check_limits(fxr
->file2
, fxr
->file2_offset
,
420 error
= generic_write_check_limits(fxr
->file1
, fxr
->file1_offset
,
424 if (test_len
!= fxr
->length
)
428 * If the user wanted us to exchange up to the infile's EOF, round up
429 * to the next allocation unit boundary for this check. Do the same
432 * Otherwise, reject the range length if it's not aligned to an
435 if (fxr
->file1_offset
+ fxr
->length
== size1
)
436 blen
= ALIGN(size1
, alloc_unit
) - fxr
->file1_offset
;
437 else if (fxr
->file2_offset
+ fxr
->length
== size2
)
438 blen
= ALIGN(size2
, alloc_unit
) - fxr
->file2_offset
;
439 else if (!IS_ALIGNED(fxr
->length
, alloc_unit
))
444 /* Don't allow overlapped exchanges within the same file. */
445 if (inode1
== inode2
&&
446 fxr
->file2_offset
+ blen
> fxr
->file1_offset
&&
447 fxr
->file1_offset
+ blen
> fxr
->file2_offset
)
451 * Ensure that we don't exchange a partial EOF block into the middle of
454 if ((fxr
->length
& allocmask
) == 0)
458 if (fxr
->file2_offset
+ blen
< size2
)
461 if (fxr
->file1_offset
+ blen
< size1
)
464 return blen
== fxr
->length
? 0 : -EINVAL
;
468 * Check that the two inodes are eligible for range exchanges, the ranges make
469 * sense, and then flush all dirty data. Caller must ensure that the inodes
470 * have been locked against any other modifications.
473 xfs_exchange_range_prep(
474 struct xfs_exchrange
*fxr
,
475 unsigned int alloc_unit
)
477 struct inode
*inode1
= file_inode(fxr
->file1
);
478 struct inode
*inode2
= file_inode(fxr
->file2
);
479 bool same_inode
= (inode1
== inode2
);
482 /* Check that we don't violate system file offset limits. */
483 error
= xfs_exchange_range_checks(fxr
, alloc_unit
);
484 if (error
|| fxr
->length
== 0)
487 /* Wait for the completion of any pending IOs on both files */
488 inode_dio_wait(inode1
);
490 inode_dio_wait(inode2
);
492 error
= filemap_write_and_wait_range(inode1
->i_mapping
,
494 fxr
->file1_offset
+ fxr
->length
- 1);
498 error
= filemap_write_and_wait_range(inode2
->i_mapping
,
500 fxr
->file2_offset
+ fxr
->length
- 1);
505 * If the files or inodes involved require synchronous writes, amend
506 * the request to force the filesystem to flush all data and metadata
507 * to disk after the operation completes.
509 if (((fxr
->file1
->f_flags
| fxr
->file2
->f_flags
) & O_SYNC
) ||
510 IS_SYNC(inode1
) || IS_SYNC(inode2
))
511 fxr
->flags
|= XFS_EXCHANGE_RANGE_DSYNC
;
517 * Finish a range exchange operation, if it was successful. Caller must ensure
518 * that the inodes are still locked against any other modifications.
521 xfs_exchange_range_finish(
522 struct xfs_exchrange
*fxr
)
526 error
= file_remove_privs(fxr
->file1
);
529 if (file_inode(fxr
->file1
) == file_inode(fxr
->file2
))
532 return file_remove_privs(fxr
->file2
);
536 * Check the alignment of an exchange request when the allocation unit size
537 * isn't a power of two. The generic file-level helpers use (fast)
538 * bitmask-based alignment checks, but here we have to use slow long division.
541 xfs_exchrange_check_rtalign(
542 const struct xfs_exchrange
*fxr
,
543 struct xfs_inode
*ip1
,
544 struct xfs_inode
*ip2
,
545 unsigned int alloc_unit
)
547 uint64_t length
= fxr
->length
;
551 size1
= i_size_read(VFS_I(ip1
));
552 size2
= i_size_read(VFS_I(ip2
));
554 /* The start of both ranges must be aligned to a rt extent. */
555 if (!isaligned_64(fxr
->file1_offset
, alloc_unit
) ||
556 !isaligned_64(fxr
->file2_offset
, alloc_unit
))
559 if (fxr
->flags
& XFS_EXCHANGE_RANGE_TO_EOF
)
560 length
= max_t(int64_t, size1
- fxr
->file1_offset
,
561 size2
- fxr
->file2_offset
);
564 * If the user wanted us to exchange up to the infile's EOF, round up
565 * to the next rt extent boundary for this check. Do the same for the
568 * Otherwise, reject the range length if it's not rt extent aligned.
569 * We already confirmed the starting offsets' rt extent block
572 if (fxr
->file1_offset
+ length
== size1
)
573 blen
= roundup_64(size1
, alloc_unit
) - fxr
->file1_offset
;
574 else if (fxr
->file2_offset
+ length
== size2
)
575 blen
= roundup_64(size2
, alloc_unit
) - fxr
->file2_offset
;
576 else if (!isaligned_64(length
, alloc_unit
))
581 /* Don't allow overlapped exchanges within the same file. */
583 fxr
->file2_offset
+ blen
> fxr
->file1_offset
&&
584 fxr
->file1_offset
+ blen
> fxr
->file2_offset
)
588 * Ensure that we don't exchange a partial EOF rt extent into the
589 * middle of another file.
591 if (isaligned_64(length
, alloc_unit
))
595 if (fxr
->file2_offset
+ length
< size2
)
596 blen
= rounddown_64(blen
, alloc_unit
);
598 if (fxr
->file1_offset
+ blen
< size1
)
599 blen
= rounddown_64(blen
, alloc_unit
);
601 return blen
== length
? 0 : -EINVAL
;
604 /* Prepare two files to have their data exchanged. */
607 struct xfs_exchrange
*fxr
,
608 struct xfs_inode
*ip1
,
609 struct xfs_inode
*ip2
)
611 struct xfs_mount
*mp
= ip2
->i_mount
;
612 unsigned int alloc_unit
= xfs_inode_alloc_unitsize(ip2
);
615 trace_xfs_exchrange_prep(fxr
, ip1
, ip2
);
617 /* Verify both files are either real-time or non-realtime */
618 if (XFS_IS_REALTIME_INODE(ip1
) != XFS_IS_REALTIME_INODE(ip2
))
621 /* Check non-power of two alignment issues, if necessary. */
622 if (!is_power_of_2(alloc_unit
)) {
623 error
= xfs_exchrange_check_rtalign(fxr
, ip1
, ip2
, alloc_unit
);
628 * Do the generic file-level checks with the regular block
631 alloc_unit
= mp
->m_sb
.sb_blocksize
;
634 error
= xfs_exchange_range_prep(fxr
, alloc_unit
);
635 if (error
|| fxr
->length
== 0)
638 if (fxr
->flags
& __XFS_EXCHANGE_RANGE_CHECK_FRESH2
) {
639 error
= xfs_exchrange_check_freshness(fxr
, ip2
);
644 /* Attach dquots to both inodes before changing block maps. */
645 error
= xfs_qm_dqattach(ip2
);
648 error
= xfs_qm_dqattach(ip1
);
652 trace_xfs_exchrange_flush(fxr
, ip1
, ip2
);
654 /* Flush the relevant ranges of both files. */
655 error
= xfs_flush_unmap_range(ip2
, fxr
->file2_offset
, fxr
->length
);
658 error
= xfs_flush_unmap_range(ip1
, fxr
->file1_offset
, fxr
->length
);
663 * Cancel CoW fork preallocations for the ranges of both files. The
664 * prep function should have flushed all the dirty data, so the only
665 * CoW mappings remaining should be speculative.
667 if (xfs_inode_has_cow_data(ip1
)) {
668 error
= xfs_reflink_cancel_cow_range(ip1
, fxr
->file1_offset
,
674 if (xfs_inode_has_cow_data(ip2
)) {
675 error
= xfs_reflink_cancel_cow_range(ip2
, fxr
->file2_offset
,
685 * Exchange contents of files. This is the binding between the generic
686 * file-level concepts and the XFS inode-specific implementation.
689 xfs_exchrange_contents(
690 struct xfs_exchrange
*fxr
)
692 struct inode
*inode1
= file_inode(fxr
->file1
);
693 struct inode
*inode2
= file_inode(fxr
->file2
);
694 struct xfs_inode
*ip1
= XFS_I(inode1
);
695 struct xfs_inode
*ip2
= XFS_I(inode2
);
696 struct xfs_mount
*mp
= ip1
->i_mount
;
699 if (!xfs_has_exchange_range(mp
))
702 if (fxr
->flags
& ~(XFS_EXCHANGE_RANGE_ALL_FLAGS
|
703 XFS_EXCHANGE_RANGE_PRIV_FLAGS
))
706 if (xfs_is_shutdown(mp
))
709 /* Lock both files against IO */
710 error
= xfs_ilock2_io_mmap(ip1
, ip2
);
714 /* Prepare and then exchange file contents. */
715 error
= xfs_exchrange_prep(fxr
, ip1
, ip2
);
719 error
= xfs_exchrange_mappings(fxr
, ip1
, ip2
);
724 * Finish the exchange by removing special file privileges like any
725 * other file write would do. This may involve turning on support for
726 * logged xattrs if either file has security capabilities.
728 error
= xfs_exchange_range_finish(fxr
);
733 xfs_iunlock2_io_mmap(ip1
, ip2
);
736 trace_xfs_exchrange_error(ip2
, error
, _RET_IP_
);
740 /* Exchange parts of two files. */
743 struct xfs_exchrange
*fxr
)
745 struct inode
*inode1
= file_inode(fxr
->file1
);
746 struct inode
*inode2
= file_inode(fxr
->file2
);
749 BUILD_BUG_ON(XFS_EXCHANGE_RANGE_ALL_FLAGS
&
750 XFS_EXCHANGE_RANGE_PRIV_FLAGS
);
752 /* Both files must be on the same mount/filesystem. */
753 if (fxr
->file1
->f_path
.mnt
!= fxr
->file2
->f_path
.mnt
)
756 if (fxr
->flags
& ~(XFS_EXCHANGE_RANGE_ALL_FLAGS
|
757 __XFS_EXCHANGE_RANGE_CHECK_FRESH2
))
760 /* Userspace requests only honored for regular files. */
761 if (S_ISDIR(inode1
->i_mode
) || S_ISDIR(inode2
->i_mode
))
763 if (!S_ISREG(inode1
->i_mode
) || !S_ISREG(inode2
->i_mode
))
766 /* Both files must be opened for read and write. */
767 if (!(fxr
->file1
->f_mode
& FMODE_READ
) ||
768 !(fxr
->file1
->f_mode
& FMODE_WRITE
) ||
769 !(fxr
->file2
->f_mode
& FMODE_READ
) ||
770 !(fxr
->file2
->f_mode
& FMODE_WRITE
))
773 /* Neither file can be opened append-only. */
774 if ((fxr
->file1
->f_flags
& O_APPEND
) ||
775 (fxr
->file2
->f_flags
& O_APPEND
))
779 * If we're not exchanging to EOF, we can check the areas before
780 * stabilizing both files' i_size.
782 if (!(fxr
->flags
& XFS_EXCHANGE_RANGE_TO_EOF
)) {
783 ret
= xfs_exchange_range_verify_area(fxr
);
788 /* Update cmtime if the fd/inode don't forbid it. */
789 if (!(fxr
->file1
->f_mode
& FMODE_NOCMTIME
) && !IS_NOCMTIME(inode1
))
790 fxr
->flags
|= __XFS_EXCHANGE_RANGE_UPD_CMTIME1
;
791 if (!(fxr
->file2
->f_mode
& FMODE_NOCMTIME
) && !IS_NOCMTIME(inode2
))
792 fxr
->flags
|= __XFS_EXCHANGE_RANGE_UPD_CMTIME2
;
794 file_start_write(fxr
->file2
);
795 ret
= xfs_exchrange_contents(fxr
);
796 file_end_write(fxr
->file2
);
800 fsnotify_modify(fxr
->file1
);
801 if (fxr
->file2
!= fxr
->file1
)
802 fsnotify_modify(fxr
->file2
);
806 /* Collect exchange-range arguments from userspace. */
808 xfs_ioc_exchange_range(
810 struct xfs_exchange_range __user
*argp
)
812 struct xfs_exchrange fxr
= {
815 struct xfs_exchange_range args
;
817 if (copy_from_user(&args
, argp
, sizeof(args
)))
819 if (memchr_inv(&args
.pad
, 0, sizeof(args
.pad
)))
821 if (args
.flags
& ~XFS_EXCHANGE_RANGE_ALL_FLAGS
)
824 fxr
.file1_offset
= args
.file1_offset
;
825 fxr
.file2_offset
= args
.file2_offset
;
826 fxr
.length
= args
.length
;
827 fxr
.flags
= args
.flags
;
829 CLASS(fd
, file1
)(args
.file1_fd
);
832 fxr
.file1
= fd_file(file1
);
834 return xfs_exchange_range(&fxr
);
837 /* Opaque freshness blob for XFS_IOC_COMMIT_RANGE */
838 struct xfs_commit_range_fresh
{
839 xfs_fsid_t fsid
; /* m_fixedfsid */
840 __u64 file2_ino
; /* inode number */
841 __s64 file2_mtime
; /* modification time */
842 __s64 file2_ctime
; /* change time */
843 __s32 file2_mtime_nsec
; /* mod time, nsec */
844 __s32 file2_ctime_nsec
; /* change time, nsec */
845 __u32 file2_gen
; /* inode generation */
846 __u32 magic
; /* zero */
848 #define XCR_FRESH_MAGIC 0x444F524B /* DORK */
850 /* Set up a commitrange operation by sampling file2's write-related attrs */
852 xfs_ioc_start_commit(
854 struct xfs_commit_range __user
*argp
)
856 struct xfs_commit_range args
= { };
857 struct timespec64 ts
;
858 struct xfs_commit_range_fresh
*kern_f
;
859 struct xfs_commit_range_fresh __user
*user_f
;
860 struct inode
*inode2
= file_inode(file
);
861 struct xfs_inode
*ip2
= XFS_I(inode2
);
862 const unsigned int lockflags
= XFS_IOLOCK_SHARED
|
863 XFS_MMAPLOCK_SHARED
|
866 BUILD_BUG_ON(sizeof(struct xfs_commit_range_fresh
) !=
867 sizeof(args
.file2_freshness
));
869 kern_f
= (struct xfs_commit_range_fresh
*)&args
.file2_freshness
;
871 memcpy(&kern_f
->fsid
, ip2
->i_mount
->m_fixedfsid
, sizeof(xfs_fsid_t
));
873 xfs_ilock(ip2
, lockflags
);
874 ts
= inode_get_ctime(inode2
);
875 kern_f
->file2_ctime
= ts
.tv_sec
;
876 kern_f
->file2_ctime_nsec
= ts
.tv_nsec
;
877 ts
= inode_get_mtime(inode2
);
878 kern_f
->file2_mtime
= ts
.tv_sec
;
879 kern_f
->file2_mtime_nsec
= ts
.tv_nsec
;
880 kern_f
->file2_ino
= ip2
->i_ino
;
881 kern_f
->file2_gen
= inode2
->i_generation
;
882 kern_f
->magic
= XCR_FRESH_MAGIC
;
883 xfs_iunlock(ip2
, lockflags
);
885 user_f
= (struct xfs_commit_range_fresh __user
*)&argp
->file2_freshness
;
886 if (copy_to_user(user_f
, kern_f
, sizeof(*kern_f
)))
893 * Exchange file1 and file2 contents if file2 has not been written since the
894 * start commit operation.
897 xfs_ioc_commit_range(
899 struct xfs_commit_range __user
*argp
)
901 struct xfs_exchrange fxr
= {
904 struct xfs_commit_range args
;
905 struct xfs_commit_range_fresh
*kern_f
;
906 struct xfs_inode
*ip2
= XFS_I(file_inode(file
));
907 struct xfs_mount
*mp
= ip2
->i_mount
;
909 kern_f
= (struct xfs_commit_range_fresh
*)&args
.file2_freshness
;
911 if (copy_from_user(&args
, argp
, sizeof(args
)))
913 if (args
.flags
& ~XFS_EXCHANGE_RANGE_ALL_FLAGS
)
915 if (kern_f
->magic
!= XCR_FRESH_MAGIC
)
917 if (memcmp(&kern_f
->fsid
, mp
->m_fixedfsid
, sizeof(xfs_fsid_t
)))
920 fxr
.file1_offset
= args
.file1_offset
;
921 fxr
.file2_offset
= args
.file2_offset
;
922 fxr
.length
= args
.length
;
923 fxr
.flags
= args
.flags
| __XFS_EXCHANGE_RANGE_CHECK_FRESH2
;
924 fxr
.file2_ino
= kern_f
->file2_ino
;
925 fxr
.file2_gen
= kern_f
->file2_gen
;
926 fxr
.file2_mtime
.tv_sec
= kern_f
->file2_mtime
;
927 fxr
.file2_mtime
.tv_nsec
= kern_f
->file2_mtime_nsec
;
928 fxr
.file2_ctime
.tv_sec
= kern_f
->file2_ctime
;
929 fxr
.file2_ctime
.tv_nsec
= kern_f
->file2_ctime_nsec
;
931 CLASS(fd
, file1
)(args
.file1_fd
);
934 fxr
.file1
= fd_file(file1
);
936 return xfs_exchange_range(&fxr
);