1 // SPDX-License-Identifier: GPL-2.0-only
4 * Copyright (C) 2011 Novell Inc.
7 #include <linux/module.h>
9 #include <linux/slab.h>
10 #include <linux/file.h>
11 #include <linux/splice.h>
12 #include <linux/xattr.h>
13 #include <linux/security.h>
14 #include <linux/uaccess.h>
15 #include <linux/sched/signal.h>
16 #include <linux/cred.h>
17 #include <linux/namei.h>
18 #include <linux/fdtable.h>
19 #include <linux/ratelimit.h>
20 #include <linux/exportfs.h>
21 #include "overlayfs.h"
23 #define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
25 static int ovl_ccup_set(const char *buf
, const struct kernel_param
*param
)
27 pr_warn("overlayfs: \"check_copy_up\" module option is obsolete\n");
31 static int ovl_ccup_get(char *buf
, const struct kernel_param
*param
)
33 return sprintf(buf
, "N\n");
36 module_param_call(check_copy_up
, ovl_ccup_set
, ovl_ccup_get
, NULL
, 0644);
37 MODULE_PARM_DESC(check_copy_up
, "Obsolete; does nothing");
39 int ovl_copy_xattr(struct dentry
*old
, struct dentry
*new)
41 ssize_t list_size
, size
, value_size
= 0;
42 char *buf
, *name
, *value
= NULL
;
43 int uninitialized_var(error
);
46 if (!(old
->d_inode
->i_opflags
& IOP_XATTR
) ||
47 !(new->d_inode
->i_opflags
& IOP_XATTR
))
50 list_size
= vfs_listxattr(old
, NULL
, 0);
52 if (list_size
== -EOPNOTSUPP
)
57 buf
= kzalloc(list_size
, GFP_KERNEL
);
61 list_size
= vfs_listxattr(old
, buf
, list_size
);
67 for (name
= buf
; list_size
; name
+= slen
) {
68 slen
= strnlen(name
, list_size
) + 1;
70 /* underlying fs providing us with an broken xattr list? */
71 if (WARN_ON(slen
> list_size
)) {
77 if (ovl_is_private_xattr(name
))
80 size
= vfs_getxattr(old
, name
, value
, value_size
);
82 size
= vfs_getxattr(old
, name
, NULL
, 0);
89 if (size
> value_size
) {
92 new = krealloc(value
, size
, GFP_KERNEL
);
102 error
= security_inode_copy_up_xattr(name
);
103 if (error
< 0 && error
!= -EOPNOTSUPP
)
107 continue; /* Discard */
109 error
= vfs_setxattr(new, name
, value
, size
, 0);
119 static int ovl_copy_up_data(struct path
*old
, struct path
*new, loff_t len
)
121 struct file
*old_file
;
122 struct file
*new_file
;
131 old_file
= ovl_path_open(old
, O_LARGEFILE
| O_RDONLY
);
132 if (IS_ERR(old_file
))
133 return PTR_ERR(old_file
);
135 new_file
= ovl_path_open(new, O_LARGEFILE
| O_WRONLY
);
136 if (IS_ERR(new_file
)) {
137 error
= PTR_ERR(new_file
);
141 /* Try to use clone_file_range to clone up within the same fs */
142 cloned
= do_clone_file_range(old_file
, 0, new_file
, 0, len
, 0);
145 /* Couldn't clone, so now we try to copy the data */
147 /* FIXME: copy up sparse files efficiently */
149 size_t this_len
= OVL_COPY_UP_CHUNK_SIZE
;
155 if (signal_pending_state(TASK_KILLABLE
, current
)) {
160 bytes
= do_splice_direct(old_file
, &old_pos
,
162 this_len
, SPLICE_F_MOVE
);
167 WARN_ON(old_pos
!= new_pos
);
173 error
= vfs_fsync(new_file
, 0);
180 static int ovl_set_size(struct dentry
*upperdentry
, struct kstat
*stat
)
182 struct iattr attr
= {
183 .ia_valid
= ATTR_SIZE
,
184 .ia_size
= stat
->size
,
187 return notify_change(upperdentry
, &attr
, NULL
);
190 static int ovl_set_timestamps(struct dentry
*upperdentry
, struct kstat
*stat
)
192 struct iattr attr
= {
194 ATTR_ATIME
| ATTR_MTIME
| ATTR_ATIME_SET
| ATTR_MTIME_SET
,
195 .ia_atime
= stat
->atime
,
196 .ia_mtime
= stat
->mtime
,
199 return notify_change(upperdentry
, &attr
, NULL
);
202 int ovl_set_attr(struct dentry
*upperdentry
, struct kstat
*stat
)
206 if (!S_ISLNK(stat
->mode
)) {
207 struct iattr attr
= {
208 .ia_valid
= ATTR_MODE
,
209 .ia_mode
= stat
->mode
,
211 err
= notify_change(upperdentry
, &attr
, NULL
);
214 struct iattr attr
= {
215 .ia_valid
= ATTR_UID
| ATTR_GID
,
219 err
= notify_change(upperdentry
, &attr
, NULL
);
222 ovl_set_timestamps(upperdentry
, stat
);
227 struct ovl_fh
*ovl_encode_real_fh(struct dentry
*real
, bool is_upper
)
231 int buflen
= MAX_HANDLE_SZ
;
232 uuid_t
*uuid
= &real
->d_sb
->s_uuid
;
235 /* Make sure the real fid stays 32bit aligned */
236 BUILD_BUG_ON(OVL_FH_FID_OFFSET
% 4);
237 BUILD_BUG_ON(MAX_HANDLE_SZ
+ OVL_FH_FID_OFFSET
> 255);
239 fh
= kzalloc(buflen
+ OVL_FH_FID_OFFSET
, GFP_KERNEL
);
241 return ERR_PTR(-ENOMEM
);
244 * We encode a non-connectable file handle for non-dir, because we
245 * only need to find the lower inode number and we don't want to pay
246 * the price or reconnecting the dentry.
248 dwords
= buflen
>> 2;
249 fh_type
= exportfs_encode_fh(real
, (void *)fh
->fb
.fid
, &dwords
, 0);
250 buflen
= (dwords
<< 2);
253 if (WARN_ON(fh_type
< 0) ||
254 WARN_ON(buflen
> MAX_HANDLE_SZ
) ||
255 WARN_ON(fh_type
== FILEID_INVALID
))
258 fh
->fb
.version
= OVL_FH_VERSION
;
259 fh
->fb
.magic
= OVL_FH_MAGIC
;
260 fh
->fb
.type
= fh_type
;
261 fh
->fb
.flags
= OVL_FH_FLAG_CPU_ENDIAN
;
263 * When we will want to decode an overlay dentry from this handle
264 * and all layers are on the same fs, if we get a disconncted real
265 * dentry when we decode fid, the only way to tell if we should assign
266 * it to upperdentry or to lowerstack is by checking this flag.
269 fh
->fb
.flags
|= OVL_FH_FLAG_PATH_UPPER
;
270 fh
->fb
.len
= sizeof(fh
->fb
) + buflen
;
280 int ovl_set_origin(struct dentry
*dentry
, struct dentry
*lower
,
281 struct dentry
*upper
)
283 const struct ovl_fh
*fh
= NULL
;
287 * When lower layer doesn't support export operations store a 'null' fh,
288 * so we can use the overlay.origin xattr to distignuish between a copy
289 * up and a pure upper inode.
291 if (ovl_can_decode_fh(lower
->d_sb
)) {
292 fh
= ovl_encode_real_fh(lower
, false);
298 * Do not fail when upper doesn't support xattrs.
300 err
= ovl_check_setxattr(dentry
, upper
, OVL_XATTR_ORIGIN
, fh
->buf
,
301 fh
? fh
->fb
.len
: 0, 0);
307 /* Store file handle of @upper dir in @index dir entry */
308 static int ovl_set_upper_fh(struct dentry
*upper
, struct dentry
*index
)
310 const struct ovl_fh
*fh
;
313 fh
= ovl_encode_real_fh(upper
, true);
317 err
= ovl_do_setxattr(index
, OVL_XATTR_UPPER
, fh
->buf
, fh
->fb
.len
, 0);
324 * Create and install index entry.
326 * Caller must hold i_mutex on indexdir.
328 static int ovl_create_index(struct dentry
*dentry
, struct dentry
*origin
,
329 struct dentry
*upper
)
331 struct dentry
*indexdir
= ovl_indexdir(dentry
->d_sb
);
332 struct inode
*dir
= d_inode(indexdir
);
333 struct dentry
*index
= NULL
;
334 struct dentry
*temp
= NULL
;
335 struct qstr name
= { };
339 * For now this is only used for creating index entry for directories,
340 * because non-dir are copied up directly to index and then hardlinked
343 * TODO: implement create index for non-dir, so we can call it when
344 * encoding file handle for non-dir in case index does not exist.
346 if (WARN_ON(!d_is_dir(dentry
)))
349 /* Directory not expected to be indexed before copy up */
350 if (WARN_ON(ovl_test_flag(OVL_INDEX
, d_inode(dentry
))))
353 err
= ovl_get_index_name(origin
, &name
);
357 temp
= ovl_create_temp(indexdir
, OVL_CATTR(S_IFDIR
| 0));
362 err
= ovl_set_upper_fh(upper
, temp
);
366 index
= lookup_one_len(name
.name
, indexdir
, name
.len
);
368 err
= PTR_ERR(index
);
370 err
= ovl_do_rename(dir
, temp
, dir
, index
, 0);
375 ovl_cleanup(dir
, temp
);
382 struct ovl_copy_up_ctx
{
383 struct dentry
*parent
;
384 struct dentry
*dentry
;
385 struct path lowerpath
;
389 struct dentry
*destdir
;
390 struct qstr destname
;
391 struct dentry
*workdir
;
397 static int ovl_link_up(struct ovl_copy_up_ctx
*c
)
400 struct dentry
*upper
;
401 struct dentry
*upperdir
= ovl_dentry_upper(c
->parent
);
402 struct inode
*udir
= d_inode(upperdir
);
404 /* Mark parent "impure" because it may now contain non-pure upper */
405 err
= ovl_set_impure(c
->parent
, upperdir
);
409 err
= ovl_set_nlink_lower(c
->dentry
);
413 inode_lock_nested(udir
, I_MUTEX_PARENT
);
414 upper
= lookup_one_len(c
->dentry
->d_name
.name
, upperdir
,
415 c
->dentry
->d_name
.len
);
416 err
= PTR_ERR(upper
);
417 if (!IS_ERR(upper
)) {
418 err
= ovl_do_link(ovl_dentry_upper(c
->dentry
), udir
, upper
);
422 /* Restore timestamps on parent (best effort) */
423 ovl_set_timestamps(upperdir
, &c
->pstat
);
424 ovl_dentry_set_upper_alias(c
->dentry
);
431 err
= ovl_set_nlink_upper(c
->dentry
);
436 static int ovl_copy_up_inode(struct ovl_copy_up_ctx
*c
, struct dentry
*temp
)
441 * Copy up data first and then xattrs. Writing data after
442 * xattrs will remove security.capability xattr automatically.
444 if (S_ISREG(c
->stat
.mode
) && !c
->metacopy
) {
445 struct path upperpath
, datapath
;
447 ovl_path_upper(c
->dentry
, &upperpath
);
448 if (WARN_ON(upperpath
.dentry
!= NULL
))
450 upperpath
.dentry
= temp
;
452 ovl_path_lowerdata(c
->dentry
, &datapath
);
453 err
= ovl_copy_up_data(&datapath
, &upperpath
, c
->stat
.size
);
458 err
= ovl_copy_xattr(c
->lowerpath
.dentry
, temp
);
463 * Store identifier of lower inode in upper inode xattr to
464 * allow lookup of the copy up origin inode.
466 * Don't set origin when we are breaking the association with a lower
470 err
= ovl_set_origin(c
->dentry
, c
->lowerpath
.dentry
, temp
);
476 err
= ovl_check_setxattr(c
->dentry
, temp
, OVL_XATTR_METACOPY
,
477 NULL
, 0, -EOPNOTSUPP
);
482 inode_lock(temp
->d_inode
);
484 err
= ovl_set_size(temp
, &c
->stat
);
486 err
= ovl_set_attr(temp
, &c
->stat
);
487 inode_unlock(temp
->d_inode
);
492 struct ovl_cu_creds
{
493 const struct cred
*old
;
497 static int ovl_prep_cu_creds(struct dentry
*dentry
, struct ovl_cu_creds
*cc
)
501 cc
->old
= cc
->new = NULL
;
502 err
= security_inode_copy_up(dentry
, &cc
->new);
507 cc
->old
= override_creds(cc
->new);
512 static void ovl_revert_cu_creds(struct ovl_cu_creds
*cc
)
515 revert_creds(cc
->old
);
521 * Copyup using workdir to prepare temp file. Used when copying up directories,
522 * special files or when upper fs doesn't support O_TMPFILE.
524 static int ovl_copy_up_workdir(struct ovl_copy_up_ctx
*c
)
527 struct inode
*udir
= d_inode(c
->destdir
), *wdir
= d_inode(c
->workdir
);
528 struct dentry
*temp
, *upper
;
529 struct ovl_cu_creds cc
;
531 struct ovl_cattr cattr
= {
532 /* Can't properly set mode on creation because of the umask */
533 .mode
= c
->stat
.mode
& S_IFMT
,
534 .rdev
= c
->stat
.rdev
,
538 err
= ovl_lock_rename_workdir(c
->workdir
, c
->destdir
);
542 err
= ovl_prep_cu_creds(c
->dentry
, &cc
);
546 temp
= ovl_create_temp(c
->workdir
, &cattr
);
547 ovl_revert_cu_creds(&cc
);
553 err
= ovl_copy_up_inode(c
, temp
);
557 if (S_ISDIR(c
->stat
.mode
) && c
->indexed
) {
558 err
= ovl_create_index(c
->dentry
, c
->lowerpath
.dentry
, temp
);
563 upper
= lookup_one_len(c
->destname
.name
, c
->destdir
, c
->destname
.len
);
564 err
= PTR_ERR(upper
);
568 err
= ovl_do_rename(wdir
, temp
, udir
, upper
, 0);
574 ovl_set_upperdata(d_inode(c
->dentry
));
575 inode
= d_inode(c
->dentry
);
576 ovl_inode_update(inode
, temp
);
577 if (S_ISDIR(inode
->i_mode
))
578 ovl_set_flag(OVL_WHITEOUTS
, inode
);
580 unlock_rename(c
->workdir
, c
->destdir
);
585 ovl_cleanup(wdir
, temp
);
590 /* Copyup using O_TMPFILE which does not require cross dir locking */
591 static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx
*c
)
593 struct inode
*udir
= d_inode(c
->destdir
);
594 struct dentry
*temp
, *upper
;
595 struct ovl_cu_creds cc
;
598 err
= ovl_prep_cu_creds(c
->dentry
, &cc
);
602 temp
= ovl_do_tmpfile(c
->workdir
, c
->stat
.mode
);
603 ovl_revert_cu_creds(&cc
);
606 return PTR_ERR(temp
);
608 err
= ovl_copy_up_inode(c
, temp
);
612 inode_lock_nested(udir
, I_MUTEX_PARENT
);
614 upper
= lookup_one_len(c
->destname
.name
, c
->destdir
, c
->destname
.len
);
615 err
= PTR_ERR(upper
);
616 if (!IS_ERR(upper
)) {
617 err
= ovl_do_link(temp
, udir
, upper
);
626 ovl_set_upperdata(d_inode(c
->dentry
));
627 ovl_inode_update(d_inode(c
->dentry
), temp
);
637 * Copy up a single dentry
639 * All renames start with copy up of source if necessary. The actual
640 * rename will only proceed once the copy up was successful. Copy up uses
641 * upper parent i_mutex for exclusion. Since rename can change d_parent it
642 * is possible that the copy up will lock the old parent. At that point
643 * the file will have already been copied up anyway.
645 static int ovl_do_copy_up(struct ovl_copy_up_ctx
*c
)
648 struct ovl_fs
*ofs
= c
->dentry
->d_sb
->s_fs_info
;
649 bool to_index
= false;
652 * Indexed non-dir is copied up directly to the index entry and then
653 * hardlinked to upper dir. Indexed dir is copied up to indexdir,
654 * then index entry is created and then copied up dir installed.
655 * Copying dir up to indexdir instead of workdir simplifies locking.
657 if (ovl_need_index(c
->dentry
)) {
659 if (S_ISDIR(c
->stat
.mode
))
660 c
->workdir
= ovl_indexdir(c
->dentry
->d_sb
);
665 if (S_ISDIR(c
->stat
.mode
) || c
->stat
.nlink
== 1 || to_index
)
669 c
->destdir
= ovl_indexdir(c
->dentry
->d_sb
);
670 err
= ovl_get_index_name(c
->lowerpath
.dentry
, &c
->destname
);
673 } else if (WARN_ON(!c
->parent
)) {
674 /* Disconnected dentry must be copied up to index dir */
678 * Mark parent "impure" because it may now contain non-pure
681 err
= ovl_set_impure(c
->parent
, c
->destdir
);
686 /* Should we copyup with O_TMPFILE or with workdir? */
687 if (S_ISREG(c
->stat
.mode
) && ofs
->tmpfile
)
688 err
= ovl_copy_up_tmpfile(c
);
690 err
= ovl_copy_up_workdir(c
);
695 ovl_set_flag(OVL_INDEX
, d_inode(c
->dentry
));
698 /* Initialize nlink for copy up of disconnected dentry */
699 err
= ovl_set_nlink_upper(c
->dentry
);
701 struct inode
*udir
= d_inode(c
->destdir
);
703 /* Restore timestamps on parent (best effort) */
705 ovl_set_timestamps(c
->destdir
, &c
->pstat
);
708 ovl_dentry_set_upper_alias(c
->dentry
);
713 kfree(c
->destname
.name
);
717 static bool ovl_need_meta_copy_up(struct dentry
*dentry
, umode_t mode
,
720 struct ovl_fs
*ofs
= dentry
->d_sb
->s_fs_info
;
722 if (!ofs
->config
.metacopy
)
728 if (flags
&& ((OPEN_FMODE(flags
) & FMODE_WRITE
) || (flags
& O_TRUNC
)))
734 /* Copy up data of an inode which was copied up metadata only in the past. */
735 static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx
*c
)
737 struct path upperpath
, datapath
;
739 char *capability
= NULL
;
740 ssize_t
uninitialized_var(cap_size
);
742 ovl_path_upper(c
->dentry
, &upperpath
);
743 if (WARN_ON(upperpath
.dentry
== NULL
))
746 ovl_path_lowerdata(c
->dentry
, &datapath
);
747 if (WARN_ON(datapath
.dentry
== NULL
))
751 err
= cap_size
= ovl_getxattr(upperpath
.dentry
, XATTR_NAME_CAPS
,
753 if (err
< 0 && err
!= -ENODATA
)
757 err
= ovl_copy_up_data(&datapath
, &upperpath
, c
->stat
.size
);
762 * Writing to upper file will clear security.capability xattr. We
763 * don't want that to happen for normal copy-up operation.
766 err
= ovl_do_setxattr(upperpath
.dentry
, XATTR_NAME_CAPS
,
767 capability
, cap_size
, 0);
773 err
= vfs_removexattr(upperpath
.dentry
, OVL_XATTR_METACOPY
);
777 ovl_set_upperdata(d_inode(c
->dentry
));
784 static int ovl_copy_up_one(struct dentry
*parent
, struct dentry
*dentry
,
788 DEFINE_DELAYED_CALL(done
);
789 struct path parentpath
;
790 struct ovl_copy_up_ctx ctx
= {
793 .workdir
= ovl_workdir(dentry
),
796 if (WARN_ON(!ctx
.workdir
))
799 ovl_path_lower(dentry
, &ctx
.lowerpath
);
800 err
= vfs_getattr(&ctx
.lowerpath
, &ctx
.stat
,
801 STATX_BASIC_STATS
, AT_STATX_SYNC_AS_STAT
);
805 ctx
.metacopy
= ovl_need_meta_copy_up(dentry
, ctx
.stat
.mode
, flags
);
808 ovl_path_upper(parent
, &parentpath
);
809 ctx
.destdir
= parentpath
.dentry
;
810 ctx
.destname
= dentry
->d_name
;
812 err
= vfs_getattr(&parentpath
, &ctx
.pstat
,
813 STATX_ATIME
| STATX_MTIME
,
814 AT_STATX_SYNC_AS_STAT
);
819 /* maybe truncate regular file. this has no effect on dirs */
823 if (S_ISLNK(ctx
.stat
.mode
)) {
824 ctx
.link
= vfs_get_link(ctx
.lowerpath
.dentry
, &done
);
825 if (IS_ERR(ctx
.link
))
826 return PTR_ERR(ctx
.link
);
829 err
= ovl_copy_up_start(dentry
, flags
);
830 /* err < 0: interrupted, err > 0: raced with another copy-up */
835 if (!ovl_dentry_upper(dentry
))
836 err
= ovl_do_copy_up(&ctx
);
837 if (!err
&& parent
&& !ovl_dentry_has_upper_alias(dentry
))
838 err
= ovl_link_up(&ctx
);
839 if (!err
&& ovl_dentry_needs_data_copy_up_locked(dentry
, flags
))
840 err
= ovl_copy_up_meta_inode_data(&ctx
);
841 ovl_copy_up_end(dentry
);
843 do_delayed_call(&done
);
848 int ovl_copy_up_flags(struct dentry
*dentry
, int flags
)
851 const struct cred
*old_cred
= ovl_override_creds(dentry
->d_sb
);
852 bool disconnected
= (dentry
->d_flags
& DCACHE_DISCONNECTED
);
855 * With NFS export, copy up can get called for a disconnected non-dir.
856 * In this case, we will copy up lower inode to index dir without
857 * linking it to upper dir.
859 if (WARN_ON(disconnected
&& d_is_dir(dentry
)))
864 struct dentry
*parent
= NULL
;
866 if (ovl_already_copied_up(dentry
, flags
))
870 /* find the topmost dentry not yet copied up */
871 for (; !disconnected
;) {
872 parent
= dget_parent(next
);
874 if (ovl_dentry_upper(parent
))
881 err
= ovl_copy_up_one(parent
, next
, flags
);
886 revert_creds(old_cred
);
891 static bool ovl_open_need_copy_up(struct dentry
*dentry
, int flags
)
893 /* Copy up of disconnected dentry does not set upper alias */
894 if (ovl_already_copied_up(dentry
, flags
))
897 if (special_file(d_inode(dentry
)->i_mode
))
900 if (!ovl_open_flags_need_copy_up(flags
))
906 int ovl_maybe_copy_up(struct dentry
*dentry
, int flags
)
910 if (ovl_open_need_copy_up(dentry
, flags
)) {
911 err
= ovl_want_write(dentry
);
913 err
= ovl_copy_up_flags(dentry
, flags
);
914 ovl_drop_write(dentry
);
921 int ovl_copy_up_with_data(struct dentry
*dentry
)
923 return ovl_copy_up_flags(dentry
, O_WRONLY
);
926 int ovl_copy_up(struct dentry
*dentry
)
928 return ovl_copy_up_flags(dentry
, 0);