1 // SPDX-License-Identifier: GPL-2.0-only
4 * Copyright (C) 2011 Novell Inc.
7 #include <linux/module.h>
9 #include <linux/slab.h>
10 #include <linux/file.h>
11 #include <linux/fileattr.h>
12 #include <linux/splice.h>
13 #include <linux/xattr.h>
14 #include <linux/security.h>
15 #include <linux/uaccess.h>
16 #include <linux/sched/signal.h>
17 #include <linux/cred.h>
18 #include <linux/namei.h>
19 #include <linux/ratelimit.h>
20 #include <linux/exportfs.h>
21 #include "overlayfs.h"
23 #define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
25 static int ovl_ccup_set(const char *buf
, const struct kernel_param
*param
)
27 pr_warn("\"check_copy_up\" module option is obsolete\n");
31 static int ovl_ccup_get(char *buf
, const struct kernel_param
*param
)
33 return sprintf(buf
, "N\n");
36 module_param_call(check_copy_up
, ovl_ccup_set
, ovl_ccup_get
, NULL
, 0644);
37 MODULE_PARM_DESC(check_copy_up
, "Obsolete; does nothing");
39 static bool ovl_must_copy_xattr(const char *name
)
41 return !strcmp(name
, XATTR_POSIX_ACL_ACCESS
) ||
42 !strcmp(name
, XATTR_POSIX_ACL_DEFAULT
) ||
43 !strncmp(name
, XATTR_SECURITY_PREFIX
, XATTR_SECURITY_PREFIX_LEN
);
46 static int ovl_copy_acl(struct ovl_fs
*ofs
, const struct path
*path
,
47 struct dentry
*dentry
, const char *acl_name
)
50 struct posix_acl
*clone
, *real_acl
= NULL
;
52 real_acl
= ovl_get_acl_path(path
, acl_name
, false);
56 if (IS_ERR(real_acl
)) {
57 err
= PTR_ERR(real_acl
);
58 if (err
== -ENODATA
|| err
== -EOPNOTSUPP
)
63 clone
= posix_acl_clone(real_acl
, GFP_KERNEL
);
64 posix_acl_release(real_acl
); /* release original acl */
68 err
= ovl_do_set_acl(ofs
, dentry
, acl_name
, clone
);
70 /* release cloned acl */
71 posix_acl_release(clone
);
75 int ovl_copy_xattr(struct super_block
*sb
, const struct path
*oldpath
, struct dentry
*new)
77 struct dentry
*old
= oldpath
->dentry
;
78 ssize_t list_size
, size
, value_size
= 0;
79 char *buf
, *name
, *value
= NULL
;
83 if (!old
->d_inode
->i_op
->listxattr
|| !new->d_inode
->i_op
->listxattr
)
86 list_size
= vfs_listxattr(old
, NULL
, 0);
88 if (list_size
== -EOPNOTSUPP
)
93 buf
= kvzalloc(list_size
, GFP_KERNEL
);
97 list_size
= vfs_listxattr(old
, buf
, list_size
);
103 for (name
= buf
; list_size
; name
+= slen
) {
104 slen
= strnlen(name
, list_size
) + 1;
106 /* underlying fs providing us with an broken xattr list? */
107 if (WARN_ON(slen
> list_size
)) {
113 if (ovl_is_private_xattr(sb
, name
))
116 error
= security_inode_copy_up_xattr(old
, name
);
117 if (error
== -ECANCELED
) {
119 continue; /* Discard */
121 if (error
< 0 && error
!= -EOPNOTSUPP
)
124 if (is_posix_acl_xattr(name
)) {
125 error
= ovl_copy_acl(OVL_FS(sb
), oldpath
, new, name
);
128 /* POSIX ACLs must be copied. */
133 size
= ovl_do_getxattr(oldpath
, name
, value
, value_size
);
135 size
= ovl_do_getxattr(oldpath
, name
, NULL
, 0);
142 if (size
> value_size
) {
145 new = kvmalloc(size
, GFP_KERNEL
);
156 error
= ovl_do_setxattr(OVL_FS(sb
), new, name
, value
, size
, 0);
158 if (error
!= -EOPNOTSUPP
|| ovl_must_copy_xattr(name
))
161 /* Ignore failure to copy unknown xattrs */
171 static int ovl_copy_fileattr(struct inode
*inode
, const struct path
*old
,
172 const struct path
*new)
174 struct fileattr oldfa
= { .flags_valid
= true };
175 struct fileattr newfa
= { .flags_valid
= true };
178 err
= ovl_real_fileattr_get(old
, &oldfa
);
180 /* Ntfs-3g returns -EINVAL for "no fileattr support" */
181 if (err
== -ENOTTY
|| err
== -EINVAL
)
183 pr_warn("failed to retrieve lower fileattr (%pd2, err=%i)\n",
189 * We cannot set immutable and append-only flags on upper inode,
190 * because we would not be able to link upper inode to upper dir
191 * not set overlay private xattr on upper inode.
192 * Store these flags in overlay.protattr xattr instead.
194 if (oldfa
.flags
& OVL_PROT_FS_FLAGS_MASK
) {
195 err
= ovl_set_protattr(inode
, new->dentry
, &oldfa
);
197 pr_warn_once("copying fileattr: no xattr on upper\n");
202 /* Don't bother copying flags if none are set */
203 if (!(oldfa
.flags
& OVL_COPY_FS_FLAGS_MASK
))
206 err
= ovl_real_fileattr_get(new, &newfa
);
209 * Returning an error if upper doesn't support fileattr will
210 * result in a regression, so revert to the old behavior.
212 if (err
== -ENOTTY
|| err
== -EINVAL
) {
213 pr_warn_once("copying fileattr: no support on upper\n");
216 pr_warn("failed to retrieve upper fileattr (%pd2, err=%i)\n",
221 BUILD_BUG_ON(OVL_COPY_FS_FLAGS_MASK
& ~FS_COMMON_FL
);
222 newfa
.flags
&= ~OVL_COPY_FS_FLAGS_MASK
;
223 newfa
.flags
|= (oldfa
.flags
& OVL_COPY_FS_FLAGS_MASK
);
225 BUILD_BUG_ON(OVL_COPY_FSX_FLAGS_MASK
& ~FS_XFLAG_COMMON
);
226 newfa
.fsx_xflags
&= ~OVL_COPY_FSX_FLAGS_MASK
;
227 newfa
.fsx_xflags
|= (oldfa
.fsx_xflags
& OVL_COPY_FSX_FLAGS_MASK
);
229 return ovl_real_fileattr_set(new, &newfa
);
232 static int ovl_verify_area(loff_t pos
, loff_t pos2
, loff_t len
, loff_t totlen
)
238 if (pos
< 0 || len
< 0 || totlen
< 0)
240 if (check_add_overflow(pos
, len
, &tmp
))
245 static int ovl_sync_file(struct path
*path
)
247 struct file
*new_file
;
250 new_file
= ovl_path_open(path
, O_LARGEFILE
| O_RDONLY
);
251 if (IS_ERR(new_file
))
252 return PTR_ERR(new_file
);
254 err
= vfs_fsync(new_file
, 0);
260 static int ovl_copy_up_file(struct ovl_fs
*ofs
, struct dentry
*dentry
,
261 struct file
*new_file
, loff_t len
,
264 struct path datapath
;
265 struct file
*old_file
;
269 loff_t data_pos
= -1;
271 bool skip_hole
= false;
274 ovl_path_lowerdata(dentry
, &datapath
);
275 if (WARN_ON_ONCE(datapath
.dentry
== NULL
) ||
276 WARN_ON_ONCE(len
< 0))
279 old_file
= ovl_path_open(&datapath
, O_LARGEFILE
| O_RDONLY
);
280 if (IS_ERR(old_file
))
281 return PTR_ERR(old_file
);
283 /* Try to use clone_file_range to clone up within the same fs */
284 cloned
= vfs_clone_file_range(old_file
, 0, new_file
, 0, len
, 0);
288 /* Couldn't clone, so now we try to copy the data */
289 error
= rw_verify_area(READ
, old_file
, &old_pos
, len
);
291 error
= rw_verify_area(WRITE
, new_file
, &new_pos
, len
);
295 /* Check if lower fs supports seek operation */
296 if (old_file
->f_mode
& FMODE_LSEEK
)
300 size_t this_len
= OVL_COPY_UP_CHUNK_SIZE
;
306 if (signal_pending_state(TASK_KILLABLE
, current
)) {
312 * Fill zero for hole will cost unnecessary disk space
313 * and meanwhile slow down the copy-up speed, so we do
314 * an optimization for hole during copy-up, it relies
315 * on SEEK_DATA implementation in lower fs so if lower
316 * fs does not support it, copy-up will behave as before.
318 * Detail logic of hole detection as below:
319 * When we detect next data position is larger than current
320 * position we will skip that hole, otherwise we copy
321 * data in the size of OVL_COPY_UP_CHUNK_SIZE. Actually,
322 * it may not recognize all kind of holes and sometimes
323 * only skips partial of hole area. However, it will be
324 * enough for most of the use cases.
326 * We do not hold upper sb_writers throughout the loop to avert
327 * lockdep warning with llseek of lower file in nested overlay:
329 * -- lower ovl_inode_lock (ovl_llseek)
331 if (skip_hole
&& data_pos
< old_pos
) {
332 data_pos
= vfs_llseek(old_file
, old_pos
, SEEK_DATA
);
333 if (data_pos
> old_pos
) {
334 hole_len
= data_pos
- old_pos
;
336 old_pos
= new_pos
= data_pos
;
338 } else if (data_pos
== -ENXIO
) {
340 } else if (data_pos
< 0) {
345 error
= ovl_verify_area(old_pos
, new_pos
, this_len
, len
);
349 bytes
= do_splice_direct(old_file
, &old_pos
,
351 this_len
, SPLICE_F_MOVE
);
356 WARN_ON(old_pos
!= new_pos
);
360 /* call fsync once, either now or later along with metadata */
361 if (!error
&& ovl_should_sync(ofs
) && datasync
)
362 error
= vfs_fsync(new_file
, 0);
368 static int ovl_set_size(struct ovl_fs
*ofs
,
369 struct dentry
*upperdentry
, struct kstat
*stat
)
371 struct iattr attr
= {
372 .ia_valid
= ATTR_SIZE
,
373 .ia_size
= stat
->size
,
376 return ovl_do_notify_change(ofs
, upperdentry
, &attr
);
379 static int ovl_set_timestamps(struct ovl_fs
*ofs
, struct dentry
*upperdentry
,
382 struct iattr attr
= {
384 ATTR_ATIME
| ATTR_MTIME
| ATTR_ATIME_SET
| ATTR_MTIME_SET
| ATTR_CTIME
,
385 .ia_atime
= stat
->atime
,
386 .ia_mtime
= stat
->mtime
,
389 return ovl_do_notify_change(ofs
, upperdentry
, &attr
);
392 int ovl_set_attr(struct ovl_fs
*ofs
, struct dentry
*upperdentry
,
397 if (!S_ISLNK(stat
->mode
)) {
398 struct iattr attr
= {
399 .ia_valid
= ATTR_MODE
,
400 .ia_mode
= stat
->mode
,
402 err
= ovl_do_notify_change(ofs
, upperdentry
, &attr
);
405 struct iattr attr
= {
406 .ia_valid
= ATTR_UID
| ATTR_GID
,
407 .ia_vfsuid
= VFSUIDT_INIT(stat
->uid
),
408 .ia_vfsgid
= VFSGIDT_INIT(stat
->gid
),
410 err
= ovl_do_notify_change(ofs
, upperdentry
, &attr
);
413 ovl_set_timestamps(ofs
, upperdentry
, stat
);
418 struct ovl_fh
*ovl_encode_real_fh(struct ovl_fs
*ofs
, struct dentry
*real
,
423 int buflen
= MAX_HANDLE_SZ
;
424 uuid_t
*uuid
= &real
->d_sb
->s_uuid
;
427 /* Make sure the real fid stays 32bit aligned */
428 BUILD_BUG_ON(OVL_FH_FID_OFFSET
% 4);
429 BUILD_BUG_ON(MAX_HANDLE_SZ
+ OVL_FH_FID_OFFSET
> 255);
431 fh
= kzalloc(buflen
+ OVL_FH_FID_OFFSET
, GFP_KERNEL
);
433 return ERR_PTR(-ENOMEM
);
436 * We encode a non-connectable file handle for non-dir, because we
437 * only need to find the lower inode number and we don't want to pay
438 * the price or reconnecting the dentry.
440 dwords
= buflen
>> 2;
441 fh_type
= exportfs_encode_fh(real
, (void *)fh
->fb
.fid
, &dwords
, 0);
442 buflen
= (dwords
<< 2);
445 if (WARN_ON(fh_type
< 0) ||
446 WARN_ON(buflen
> MAX_HANDLE_SZ
) ||
447 WARN_ON(fh_type
== FILEID_INVALID
))
450 fh
->fb
.version
= OVL_FH_VERSION
;
451 fh
->fb
.magic
= OVL_FH_MAGIC
;
452 fh
->fb
.type
= fh_type
;
453 fh
->fb
.flags
= OVL_FH_FLAG_CPU_ENDIAN
;
455 * When we will want to decode an overlay dentry from this handle
456 * and all layers are on the same fs, if we get a disconncted real
457 * dentry when we decode fid, the only way to tell if we should assign
458 * it to upperdentry or to lowerstack is by checking this flag.
461 fh
->fb
.flags
|= OVL_FH_FLAG_PATH_UPPER
;
462 fh
->fb
.len
= sizeof(fh
->fb
) + buflen
;
463 if (ovl_origin_uuid(ofs
))
473 struct ovl_fh
*ovl_get_origin_fh(struct ovl_fs
*ofs
, struct dentry
*origin
)
476 * When lower layer doesn't support export operations store a 'null' fh,
477 * so we can use the overlay.origin xattr to distignuish between a copy
478 * up and a pure upper inode.
480 if (!ovl_can_decode_fh(origin
->d_sb
))
483 return ovl_encode_real_fh(ofs
, origin
, false);
486 int ovl_set_origin_fh(struct ovl_fs
*ofs
, const struct ovl_fh
*fh
,
487 struct dentry
*upper
)
492 * Do not fail when upper doesn't support xattrs.
494 err
= ovl_check_setxattr(ofs
, upper
, OVL_XATTR_ORIGIN
, fh
->buf
,
495 fh
? fh
->fb
.len
: 0, 0);
497 /* Ignore -EPERM from setting "user.*" on symlink/special */
498 return err
== -EPERM
? 0 : err
;
501 /* Store file handle of @upper dir in @index dir entry */
502 static int ovl_set_upper_fh(struct ovl_fs
*ofs
, struct dentry
*upper
,
503 struct dentry
*index
)
505 const struct ovl_fh
*fh
;
508 fh
= ovl_encode_real_fh(ofs
, upper
, true);
512 err
= ovl_setxattr(ofs
, index
, OVL_XATTR_UPPER
, fh
->buf
, fh
->fb
.len
);
519 * Create and install index entry.
521 * Caller must hold i_mutex on indexdir.
523 static int ovl_create_index(struct dentry
*dentry
, const struct ovl_fh
*fh
,
524 struct dentry
*upper
)
526 struct ovl_fs
*ofs
= OVL_FS(dentry
->d_sb
);
527 struct dentry
*indexdir
= ovl_indexdir(dentry
->d_sb
);
528 struct inode
*dir
= d_inode(indexdir
);
529 struct dentry
*index
= NULL
;
530 struct dentry
*temp
= NULL
;
531 struct qstr name
= { };
535 * For now this is only used for creating index entry for directories,
536 * because non-dir are copied up directly to index and then hardlinked
539 * TODO: implement create index for non-dir, so we can call it when
540 * encoding file handle for non-dir in case index does not exist.
542 if (WARN_ON(!d_is_dir(dentry
)))
545 /* Directory not expected to be indexed before copy up */
546 if (WARN_ON(ovl_test_flag(OVL_INDEX
, d_inode(dentry
))))
549 err
= ovl_get_index_name_fh(fh
, &name
);
553 temp
= ovl_create_temp(ofs
, indexdir
, OVL_CATTR(S_IFDIR
| 0));
558 err
= ovl_set_upper_fh(ofs
, upper
, temp
);
562 index
= ovl_lookup_upper(ofs
, name
.name
, indexdir
, name
.len
);
564 err
= PTR_ERR(index
);
566 err
= ovl_do_rename(ofs
, dir
, temp
, dir
, index
, 0);
571 ovl_cleanup(ofs
, dir
, temp
);
578 struct ovl_copy_up_ctx
{
579 struct dentry
*parent
;
580 struct dentry
*dentry
;
581 struct path lowerpath
;
585 struct dentry
*destdir
;
586 struct qstr destname
;
587 struct dentry
*workdir
;
588 const struct ovl_fh
*origin_fh
;
592 bool metacopy_digest
;
596 static int ovl_link_up(struct ovl_copy_up_ctx
*c
)
599 struct dentry
*upper
;
600 struct dentry
*upperdir
= ovl_dentry_upper(c
->parent
);
601 struct ovl_fs
*ofs
= OVL_FS(c
->dentry
->d_sb
);
602 struct inode
*udir
= d_inode(upperdir
);
604 ovl_start_write(c
->dentry
);
606 /* Mark parent "impure" because it may now contain non-pure upper */
607 err
= ovl_set_impure(c
->parent
, upperdir
);
611 err
= ovl_set_nlink_lower(c
->dentry
);
615 inode_lock_nested(udir
, I_MUTEX_PARENT
);
616 upper
= ovl_lookup_upper(ofs
, c
->dentry
->d_name
.name
, upperdir
,
617 c
->dentry
->d_name
.len
);
618 err
= PTR_ERR(upper
);
619 if (!IS_ERR(upper
)) {
620 err
= ovl_do_link(ofs
, ovl_dentry_upper(c
->dentry
), udir
, upper
);
624 /* Restore timestamps on parent (best effort) */
625 ovl_set_timestamps(ofs
, upperdir
, &c
->pstat
);
626 ovl_dentry_set_upper_alias(c
->dentry
);
627 ovl_dentry_update_reval(c
->dentry
, upper
);
634 err
= ovl_set_nlink_upper(c
->dentry
);
637 ovl_end_write(c
->dentry
);
641 static int ovl_copy_up_data(struct ovl_copy_up_ctx
*c
, const struct path
*temp
)
643 struct ovl_fs
*ofs
= OVL_FS(c
->dentry
->d_sb
);
644 struct file
*new_file
;
647 if (!S_ISREG(c
->stat
.mode
) || c
->metacopy
|| !c
->stat
.size
)
650 new_file
= ovl_path_open(temp
, O_LARGEFILE
| O_WRONLY
);
651 if (IS_ERR(new_file
))
652 return PTR_ERR(new_file
);
654 err
= ovl_copy_up_file(ofs
, c
->dentry
, new_file
, c
->stat
.size
,
661 static int ovl_copy_up_metadata(struct ovl_copy_up_ctx
*c
, struct dentry
*temp
)
663 struct ovl_fs
*ofs
= OVL_FS(c
->dentry
->d_sb
);
664 struct inode
*inode
= d_inode(c
->dentry
);
665 struct path upperpath
= { .mnt
= ovl_upper_mnt(ofs
), .dentry
= temp
};
668 err
= ovl_copy_xattr(c
->dentry
->d_sb
, &c
->lowerpath
, temp
);
672 if (inode
->i_flags
& OVL_COPY_I_FLAGS_MASK
&&
673 (S_ISREG(c
->stat
.mode
) || S_ISDIR(c
->stat
.mode
))) {
675 * Copy the fileattr inode flags that are the source of already
678 err
= ovl_copy_fileattr(inode
, &c
->lowerpath
, &upperpath
);
684 * Store identifier of lower inode in upper inode xattr to
685 * allow lookup of the copy up origin inode.
687 * Don't set origin when we are breaking the association with a lower
691 err
= ovl_set_origin_fh(ofs
, c
->origin_fh
, temp
);
697 struct path lowerdatapath
;
698 struct ovl_metacopy metacopy_data
= OVL_METACOPY_INIT
;
700 ovl_path_lowerdata(c
->dentry
, &lowerdatapath
);
701 if (WARN_ON_ONCE(lowerdatapath
.dentry
== NULL
))
703 err
= ovl_get_verity_digest(ofs
, &lowerdatapath
, &metacopy_data
);
707 if (metacopy_data
.digest_algo
)
708 c
->metacopy_digest
= true;
710 err
= ovl_set_metacopy_xattr(ofs
, temp
, &metacopy_data
);
715 inode_lock(temp
->d_inode
);
716 if (S_ISREG(c
->stat
.mode
))
717 err
= ovl_set_size(ofs
, temp
, &c
->stat
);
719 err
= ovl_set_attr(ofs
, temp
, &c
->stat
);
720 inode_unlock(temp
->d_inode
);
722 /* fsync metadata before moving it into upper dir */
723 if (!err
&& ovl_should_sync(ofs
) && c
->metadata_fsync
)
724 err
= ovl_sync_file(&upperpath
);
729 struct ovl_cu_creds
{
730 const struct cred
*old
;
734 static int ovl_prep_cu_creds(struct dentry
*dentry
, struct ovl_cu_creds
*cc
)
738 cc
->old
= cc
->new = NULL
;
739 err
= security_inode_copy_up(dentry
, &cc
->new);
744 cc
->old
= override_creds(cc
->new);
749 static void ovl_revert_cu_creds(struct ovl_cu_creds
*cc
)
752 revert_creds(cc
->old
);
758 * Copyup using workdir to prepare temp file. Used when copying up directories,
759 * special files or when upper fs doesn't support O_TMPFILE.
761 static int ovl_copy_up_workdir(struct ovl_copy_up_ctx
*c
)
763 struct ovl_fs
*ofs
= OVL_FS(c
->dentry
->d_sb
);
765 struct inode
*udir
= d_inode(c
->destdir
), *wdir
= d_inode(c
->workdir
);
766 struct path path
= { .mnt
= ovl_upper_mnt(ofs
) };
767 struct dentry
*temp
, *upper
, *trap
;
768 struct ovl_cu_creds cc
;
770 struct ovl_cattr cattr
= {
771 /* Can't properly set mode on creation because of the umask */
772 .mode
= c
->stat
.mode
& S_IFMT
,
773 .rdev
= c
->stat
.rdev
,
777 err
= ovl_prep_cu_creds(c
->dentry
, &cc
);
781 ovl_start_write(c
->dentry
);
783 temp
= ovl_create_temp(ofs
, c
->workdir
, &cattr
);
785 ovl_end_write(c
->dentry
);
786 ovl_revert_cu_creds(&cc
);
789 return PTR_ERR(temp
);
792 * Copy up data first and then xattrs. Writing data after
793 * xattrs will remove security.capability xattr automatically.
796 err
= ovl_copy_up_data(c
, &path
);
798 * We cannot hold lock_rename() throughout this helper, because of
799 * lock ordering with sb_writers, which shouldn't be held when calling
800 * ovl_copy_up_data(), so lock workdir and destdir and make sure that
801 * temp wasn't moved before copy up completion or cleanup.
803 ovl_start_write(c
->dentry
);
804 trap
= lock_rename(c
->workdir
, c
->destdir
);
805 if (trap
|| temp
->d_parent
!= c
->workdir
) {
806 /* temp or workdir moved underneath us? abort without cleanup */
816 err
= ovl_copy_up_metadata(c
, temp
);
820 if (S_ISDIR(c
->stat
.mode
) && c
->indexed
) {
821 err
= ovl_create_index(c
->dentry
, c
->origin_fh
, temp
);
826 upper
= ovl_lookup_upper(ofs
, c
->destname
.name
, c
->destdir
,
828 err
= PTR_ERR(upper
);
832 err
= ovl_do_rename(ofs
, wdir
, temp
, udir
, upper
, 0);
837 inode
= d_inode(c
->dentry
);
838 if (c
->metacopy_digest
)
839 ovl_set_flag(OVL_HAS_DIGEST
, inode
);
841 ovl_clear_flag(OVL_HAS_DIGEST
, inode
);
842 ovl_clear_flag(OVL_VERIFIED_DIGEST
, inode
);
845 ovl_set_upperdata(inode
);
846 ovl_inode_update(inode
, temp
);
847 if (S_ISDIR(inode
->i_mode
))
848 ovl_set_flag(OVL_WHITEOUTS
, inode
);
850 unlock_rename(c
->workdir
, c
->destdir
);
852 ovl_end_write(c
->dentry
);
857 ovl_cleanup(ofs
, wdir
, temp
);
862 /* Copyup using O_TMPFILE which does not require cross dir locking */
863 static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx
*c
)
865 struct ovl_fs
*ofs
= OVL_FS(c
->dentry
->d_sb
);
866 struct inode
*udir
= d_inode(c
->destdir
);
867 struct dentry
*temp
, *upper
;
868 struct file
*tmpfile
;
869 struct ovl_cu_creds cc
;
872 err
= ovl_prep_cu_creds(c
->dentry
, &cc
);
876 ovl_start_write(c
->dentry
);
877 tmpfile
= ovl_do_tmpfile(ofs
, c
->workdir
, c
->stat
.mode
);
878 ovl_end_write(c
->dentry
);
879 ovl_revert_cu_creds(&cc
);
881 return PTR_ERR(tmpfile
);
883 temp
= tmpfile
->f_path
.dentry
;
884 if (!c
->metacopy
&& c
->stat
.size
) {
885 err
= ovl_copy_up_file(ofs
, c
->dentry
, tmpfile
, c
->stat
.size
,
891 ovl_start_write(c
->dentry
);
893 err
= ovl_copy_up_metadata(c
, temp
);
897 inode_lock_nested(udir
, I_MUTEX_PARENT
);
899 upper
= ovl_lookup_upper(ofs
, c
->destname
.name
, c
->destdir
,
901 err
= PTR_ERR(upper
);
902 if (!IS_ERR(upper
)) {
903 err
= ovl_do_link(ofs
, temp
, udir
, upper
);
911 if (c
->metacopy_digest
)
912 ovl_set_flag(OVL_HAS_DIGEST
, d_inode(c
->dentry
));
914 ovl_clear_flag(OVL_HAS_DIGEST
, d_inode(c
->dentry
));
915 ovl_clear_flag(OVL_VERIFIED_DIGEST
, d_inode(c
->dentry
));
918 ovl_set_upperdata(d_inode(c
->dentry
));
919 ovl_inode_update(d_inode(c
->dentry
), dget(temp
));
922 ovl_end_write(c
->dentry
);
929 * Copy up a single dentry
931 * All renames start with copy up of source if necessary. The actual
932 * rename will only proceed once the copy up was successful. Copy up uses
933 * upper parent i_mutex for exclusion. Since rename can change d_parent it
934 * is possible that the copy up will lock the old parent. At that point
935 * the file will have already been copied up anyway.
937 static int ovl_do_copy_up(struct ovl_copy_up_ctx
*c
)
940 struct ovl_fs
*ofs
= OVL_FS(c
->dentry
->d_sb
);
941 struct dentry
*origin
= c
->lowerpath
.dentry
;
942 struct ovl_fh
*fh
= NULL
;
943 bool to_index
= false;
946 * Indexed non-dir is copied up directly to the index entry and then
947 * hardlinked to upper dir. Indexed dir is copied up to indexdir,
948 * then index entry is created and then copied up dir installed.
949 * Copying dir up to indexdir instead of workdir simplifies locking.
951 if (ovl_need_index(c
->dentry
)) {
953 if (S_ISDIR(c
->stat
.mode
))
954 c
->workdir
= ovl_indexdir(c
->dentry
->d_sb
);
959 if (S_ISDIR(c
->stat
.mode
) || c
->stat
.nlink
== 1 || to_index
) {
960 fh
= ovl_get_origin_fh(ofs
, origin
);
964 /* origin_fh may be NULL */
970 c
->destdir
= ovl_indexdir(c
->dentry
->d_sb
);
971 err
= ovl_get_index_name(ofs
, origin
, &c
->destname
);
974 } else if (WARN_ON(!c
->parent
)) {
975 /* Disconnected dentry must be copied up to index dir */
980 * c->dentry->d_name is stabilzed by ovl_copy_up_start(),
981 * because if we got here, it means that c->dentry has no upper
982 * alias and changing ->d_name means going through ovl_rename()
983 * that will call ovl_copy_up() on source and target dentry.
985 c
->destname
= c
->dentry
->d_name
;
987 * Mark parent "impure" because it may now contain non-pure
990 ovl_start_write(c
->dentry
);
991 err
= ovl_set_impure(c
->parent
, c
->destdir
);
992 ovl_end_write(c
->dentry
);
997 /* Should we copyup with O_TMPFILE or with workdir? */
998 if (S_ISREG(c
->stat
.mode
) && ofs
->tmpfile
)
999 err
= ovl_copy_up_tmpfile(c
);
1001 err
= ovl_copy_up_workdir(c
);
1006 ovl_set_flag(OVL_INDEX
, d_inode(c
->dentry
));
1008 ovl_start_write(c
->dentry
);
1010 /* Initialize nlink for copy up of disconnected dentry */
1011 err
= ovl_set_nlink_upper(c
->dentry
);
1013 struct inode
*udir
= d_inode(c
->destdir
);
1015 /* Restore timestamps on parent (best effort) */
1017 ovl_set_timestamps(ofs
, c
->destdir
, &c
->pstat
);
1020 ovl_dentry_set_upper_alias(c
->dentry
);
1021 ovl_dentry_update_reval(c
->dentry
, ovl_dentry_upper(c
->dentry
));
1023 ovl_end_write(c
->dentry
);
1027 kfree(c
->destname
.name
);
1033 static bool ovl_need_meta_copy_up(struct dentry
*dentry
, umode_t mode
,
1036 struct ovl_fs
*ofs
= OVL_FS(dentry
->d_sb
);
1038 if (!ofs
->config
.metacopy
)
1044 if (flags
&& ((OPEN_FMODE(flags
) & FMODE_WRITE
) || (flags
& O_TRUNC
)))
1047 /* Fall back to full copy if no fsverity on source data and we require verity */
1048 if (ofs
->config
.verity_mode
== OVL_VERITY_REQUIRE
) {
1049 struct path lowerdata
;
1051 ovl_path_lowerdata(dentry
, &lowerdata
);
1053 if (WARN_ON_ONCE(lowerdata
.dentry
== NULL
) ||
1054 ovl_ensure_verity_loaded(&lowerdata
) ||
1055 !fsverity_active(d_inode(lowerdata
.dentry
))) {
1063 static ssize_t
ovl_getxattr_value(const struct path
*path
, char *name
, char **value
)
1068 res
= ovl_do_getxattr(path
, name
, NULL
, 0);
1069 if (res
== -ENODATA
|| res
== -EOPNOTSUPP
)
1073 buf
= kzalloc(res
, GFP_KERNEL
);
1077 res
= ovl_do_getxattr(path
, name
, buf
, res
);
1086 /* Copy up data of an inode which was copied up metadata only in the past. */
1087 static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx
*c
)
1089 struct ovl_fs
*ofs
= OVL_FS(c
->dentry
->d_sb
);
1090 struct path upperpath
;
1092 char *capability
= NULL
;
1095 ovl_path_upper(c
->dentry
, &upperpath
);
1096 if (WARN_ON(upperpath
.dentry
== NULL
))
1100 err
= cap_size
= ovl_getxattr_value(&upperpath
, XATTR_NAME_CAPS
,
1106 err
= ovl_copy_up_data(c
, &upperpath
);
1111 * Writing to upper file will clear security.capability xattr. We
1112 * don't want that to happen for normal copy-up operation.
1114 ovl_start_write(c
->dentry
);
1116 err
= ovl_do_setxattr(ofs
, upperpath
.dentry
, XATTR_NAME_CAPS
,
1117 capability
, cap_size
, 0);
1120 err
= ovl_removexattr(ofs
, upperpath
.dentry
,
1121 OVL_XATTR_METACOPY
);
1123 ovl_end_write(c
->dentry
);
1127 ovl_clear_flag(OVL_HAS_DIGEST
, d_inode(c
->dentry
));
1128 ovl_clear_flag(OVL_VERIFIED_DIGEST
, d_inode(c
->dentry
));
1129 ovl_set_upperdata(d_inode(c
->dentry
));
1136 static int ovl_copy_up_one(struct dentry
*parent
, struct dentry
*dentry
,
1140 DEFINE_DELAYED_CALL(done
);
1141 struct path parentpath
;
1142 struct ovl_copy_up_ctx ctx
= {
1145 .workdir
= ovl_workdir(dentry
),
1148 if (WARN_ON(!ctx
.workdir
))
1151 ovl_path_lower(dentry
, &ctx
.lowerpath
);
1152 err
= vfs_getattr(&ctx
.lowerpath
, &ctx
.stat
,
1153 STATX_BASIC_STATS
, AT_STATX_SYNC_AS_STAT
);
1157 if (!kuid_has_mapping(current_user_ns(), ctx
.stat
.uid
) ||
1158 !kgid_has_mapping(current_user_ns(), ctx
.stat
.gid
))
1162 * With metacopy disabled, we fsync after final metadata copyup, for
1163 * both regular files and directories to get atomic copyup semantics
1164 * on filesystems that do not use strict metadata ordering (e.g. ubifs).
1166 * With metacopy enabled we want to avoid fsync on all meta copyup
1167 * that will hurt performance of workloads such as chown -R, so we
1168 * only fsync on data copyup as legacy behavior.
1170 ctx
.metadata_fsync
= !OVL_FS(dentry
->d_sb
)->config
.metacopy
&&
1171 (S_ISREG(ctx
.stat
.mode
) || S_ISDIR(ctx
.stat
.mode
));
1172 ctx
.metacopy
= ovl_need_meta_copy_up(dentry
, ctx
.stat
.mode
, flags
);
1175 ovl_path_upper(parent
, &parentpath
);
1176 ctx
.destdir
= parentpath
.dentry
;
1178 err
= vfs_getattr(&parentpath
, &ctx
.pstat
,
1179 STATX_ATIME
| STATX_MTIME
,
1180 AT_STATX_SYNC_AS_STAT
);
1185 /* maybe truncate regular file. this has no effect on dirs */
1186 if (flags
& O_TRUNC
)
1189 if (S_ISLNK(ctx
.stat
.mode
)) {
1190 ctx
.link
= vfs_get_link(ctx
.lowerpath
.dentry
, &done
);
1191 if (IS_ERR(ctx
.link
))
1192 return PTR_ERR(ctx
.link
);
1195 err
= ovl_copy_up_start(dentry
, flags
);
1196 /* err < 0: interrupted, err > 0: raced with another copy-up */
1197 if (unlikely(err
)) {
1201 if (!ovl_dentry_upper(dentry
))
1202 err
= ovl_do_copy_up(&ctx
);
1203 if (!err
&& parent
&& !ovl_dentry_has_upper_alias(dentry
))
1204 err
= ovl_link_up(&ctx
);
1205 if (!err
&& ovl_dentry_needs_data_copy_up_locked(dentry
, flags
))
1206 err
= ovl_copy_up_meta_inode_data(&ctx
);
1207 ovl_copy_up_end(dentry
);
1209 do_delayed_call(&done
);
1214 static int ovl_copy_up_flags(struct dentry
*dentry
, int flags
)
1217 const struct cred
*old_cred
;
1218 bool disconnected
= (dentry
->d_flags
& DCACHE_DISCONNECTED
);
1221 * With NFS export, copy up can get called for a disconnected non-dir.
1222 * In this case, we will copy up lower inode to index dir without
1223 * linking it to upper dir.
1225 if (WARN_ON(disconnected
&& d_is_dir(dentry
)))
1229 * We may not need lowerdata if we are only doing metacopy up, but it is
1230 * not very important to optimize this case, so do lazy lowerdata lookup
1231 * before any copy up, so we can do it before taking ovl_inode_lock().
1233 err
= ovl_verify_lowerdata(dentry
);
1237 old_cred
= ovl_override_creds(dentry
->d_sb
);
1239 struct dentry
*next
;
1240 struct dentry
*parent
= NULL
;
1242 if (ovl_already_copied_up(dentry
, flags
))
1245 next
= dget(dentry
);
1246 /* find the topmost dentry not yet copied up */
1247 for (; !disconnected
;) {
1248 parent
= dget_parent(next
);
1250 if (ovl_dentry_upper(parent
))
1257 err
= ovl_copy_up_one(parent
, next
, flags
);
1262 ovl_revert_creds(old_cred
);
1267 static bool ovl_open_need_copy_up(struct dentry
*dentry
, int flags
)
1269 /* Copy up of disconnected dentry does not set upper alias */
1270 if (ovl_already_copied_up(dentry
, flags
))
1273 if (special_file(d_inode(dentry
)->i_mode
))
1276 if (!ovl_open_flags_need_copy_up(flags
))
1282 int ovl_maybe_copy_up(struct dentry
*dentry
, int flags
)
1284 if (!ovl_open_need_copy_up(dentry
, flags
))
1287 return ovl_copy_up_flags(dentry
, flags
);
1290 int ovl_copy_up_with_data(struct dentry
*dentry
)
1292 return ovl_copy_up_flags(dentry
, O_WRONLY
);
1295 int ovl_copy_up(struct dentry
*dentry
)
1297 return ovl_copy_up_flags(dentry
, 0);