3 * Copyright (C) 2011 Novell Inc.
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
10 #include <linux/module.h>
12 #include <linux/slab.h>
13 #include <linux/file.h>
14 #include <linux/splice.h>
15 #include <linux/xattr.h>
16 #include <linux/security.h>
17 #include <linux/uaccess.h>
18 #include <linux/sched/signal.h>
19 #include <linux/cred.h>
20 #include <linux/namei.h>
21 #include <linux/fdtable.h>
22 #include <linux/ratelimit.h>
23 #include <linux/exportfs.h>
24 #include "overlayfs.h"
26 #define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
28 static int ovl_ccup_set(const char *buf
, const struct kernel_param
*param
)
30 pr_warn("overlayfs: \"check_copy_up\" module option is obsolete\n");
34 static int ovl_ccup_get(char *buf
, const struct kernel_param
*param
)
36 return sprintf(buf
, "N\n");
39 module_param_call(check_copy_up
, ovl_ccup_set
, ovl_ccup_get
, NULL
, 0644);
40 MODULE_PARM_DESC(ovl_check_copy_up
, "Obsolete; does nothing");
42 int ovl_copy_xattr(struct dentry
*old
, struct dentry
*new)
44 ssize_t list_size
, size
, value_size
= 0;
45 char *buf
, *name
, *value
= NULL
;
46 int uninitialized_var(error
);
49 if (!(old
->d_inode
->i_opflags
& IOP_XATTR
) ||
50 !(new->d_inode
->i_opflags
& IOP_XATTR
))
53 list_size
= vfs_listxattr(old
, NULL
, 0);
55 if (list_size
== -EOPNOTSUPP
)
60 buf
= kzalloc(list_size
, GFP_KERNEL
);
64 list_size
= vfs_listxattr(old
, buf
, list_size
);
70 for (name
= buf
; list_size
; name
+= slen
) {
71 slen
= strnlen(name
, list_size
) + 1;
73 /* underlying fs providing us with an broken xattr list? */
74 if (WARN_ON(slen
> list_size
)) {
80 if (ovl_is_private_xattr(name
))
83 size
= vfs_getxattr(old
, name
, value
, value_size
);
85 size
= vfs_getxattr(old
, name
, NULL
, 0);
92 if (size
> value_size
) {
95 new = krealloc(value
, size
, GFP_KERNEL
);
105 error
= security_inode_copy_up_xattr(name
);
106 if (error
< 0 && error
!= -EOPNOTSUPP
)
110 continue; /* Discard */
112 error
= vfs_setxattr(new, name
, value
, size
, 0);
122 static int ovl_copy_up_data(struct path
*old
, struct path
*new, loff_t len
)
124 struct file
*old_file
;
125 struct file
*new_file
;
134 old_file
= ovl_path_open(old
, O_LARGEFILE
| O_RDONLY
);
135 if (IS_ERR(old_file
))
136 return PTR_ERR(old_file
);
138 new_file
= ovl_path_open(new, O_LARGEFILE
| O_WRONLY
);
139 if (IS_ERR(new_file
)) {
140 error
= PTR_ERR(new_file
);
144 /* Try to use clone_file_range to clone up within the same fs */
145 cloned
= do_clone_file_range(old_file
, 0, new_file
, 0, len
, 0);
148 /* Couldn't clone, so now we try to copy the data */
150 /* FIXME: copy up sparse files efficiently */
152 size_t this_len
= OVL_COPY_UP_CHUNK_SIZE
;
158 if (signal_pending_state(TASK_KILLABLE
, current
)) {
163 bytes
= do_splice_direct(old_file
, &old_pos
,
165 this_len
, SPLICE_F_MOVE
);
170 WARN_ON(old_pos
!= new_pos
);
176 error
= vfs_fsync(new_file
, 0);
183 static int ovl_set_size(struct dentry
*upperdentry
, struct kstat
*stat
)
185 struct iattr attr
= {
186 .ia_valid
= ATTR_SIZE
,
187 .ia_size
= stat
->size
,
190 return notify_change(upperdentry
, &attr
, NULL
);
193 static int ovl_set_timestamps(struct dentry
*upperdentry
, struct kstat
*stat
)
195 struct iattr attr
= {
197 ATTR_ATIME
| ATTR_MTIME
| ATTR_ATIME_SET
| ATTR_MTIME_SET
,
198 .ia_atime
= stat
->atime
,
199 .ia_mtime
= stat
->mtime
,
202 return notify_change(upperdentry
, &attr
, NULL
);
205 int ovl_set_attr(struct dentry
*upperdentry
, struct kstat
*stat
)
209 if (!S_ISLNK(stat
->mode
)) {
210 struct iattr attr
= {
211 .ia_valid
= ATTR_MODE
,
212 .ia_mode
= stat
->mode
,
214 err
= notify_change(upperdentry
, &attr
, NULL
);
217 struct iattr attr
= {
218 .ia_valid
= ATTR_UID
| ATTR_GID
,
222 err
= notify_change(upperdentry
, &attr
, NULL
);
225 ovl_set_timestamps(upperdentry
, stat
);
230 struct ovl_fh
*ovl_encode_real_fh(struct dentry
*real
, bool is_upper
)
233 int fh_type
, fh_len
, dwords
;
235 int buflen
= MAX_HANDLE_SZ
;
236 uuid_t
*uuid
= &real
->d_sb
->s_uuid
;
238 buf
= kmalloc(buflen
, GFP_KERNEL
);
240 return ERR_PTR(-ENOMEM
);
243 * We encode a non-connectable file handle for non-dir, because we
244 * only need to find the lower inode number and we don't want to pay
245 * the price or reconnecting the dentry.
247 dwords
= buflen
>> 2;
248 fh_type
= exportfs_encode_fh(real
, buf
, &dwords
, 0);
249 buflen
= (dwords
<< 2);
252 if (WARN_ON(fh_type
< 0) ||
253 WARN_ON(buflen
> MAX_HANDLE_SZ
) ||
254 WARN_ON(fh_type
== FILEID_INVALID
))
257 BUILD_BUG_ON(MAX_HANDLE_SZ
+ offsetof(struct ovl_fh
, fid
) > 255);
258 fh_len
= offsetof(struct ovl_fh
, fid
) + buflen
;
259 fh
= kmalloc(fh_len
, GFP_KERNEL
);
261 fh
= ERR_PTR(-ENOMEM
);
265 fh
->version
= OVL_FH_VERSION
;
266 fh
->magic
= OVL_FH_MAGIC
;
268 fh
->flags
= OVL_FH_FLAG_CPU_ENDIAN
;
270 * When we will want to decode an overlay dentry from this handle
271 * and all layers are on the same fs, if we get a disconncted real
272 * dentry when we decode fid, the only way to tell if we should assign
273 * it to upperdentry or to lowerstack is by checking this flag.
276 fh
->flags
|= OVL_FH_FLAG_PATH_UPPER
;
279 memcpy(fh
->fid
, buf
, buflen
);
286 int ovl_set_origin(struct dentry
*dentry
, struct dentry
*lower
,
287 struct dentry
*upper
)
289 const struct ovl_fh
*fh
= NULL
;
293 * When lower layer doesn't support export operations store a 'null' fh,
294 * so we can use the overlay.origin xattr to distignuish between a copy
295 * up and a pure upper inode.
297 if (ovl_can_decode_fh(lower
->d_sb
)) {
298 fh
= ovl_encode_real_fh(lower
, false);
304 * Do not fail when upper doesn't support xattrs.
306 err
= ovl_check_setxattr(dentry
, upper
, OVL_XATTR_ORIGIN
, fh
,
307 fh
? fh
->len
: 0, 0);
313 /* Store file handle of @upper dir in @index dir entry */
314 static int ovl_set_upper_fh(struct dentry
*upper
, struct dentry
*index
)
316 const struct ovl_fh
*fh
;
319 fh
= ovl_encode_real_fh(upper
, true);
323 err
= ovl_do_setxattr(index
, OVL_XATTR_UPPER
, fh
, fh
->len
, 0);
330 * Create and install index entry.
332 * Caller must hold i_mutex on indexdir.
334 static int ovl_create_index(struct dentry
*dentry
, struct dentry
*origin
,
335 struct dentry
*upper
)
337 struct dentry
*indexdir
= ovl_indexdir(dentry
->d_sb
);
338 struct inode
*dir
= d_inode(indexdir
);
339 struct dentry
*index
= NULL
;
340 struct dentry
*temp
= NULL
;
341 struct qstr name
= { };
345 * For now this is only used for creating index entry for directories,
346 * because non-dir are copied up directly to index and then hardlinked
349 * TODO: implement create index for non-dir, so we can call it when
350 * encoding file handle for non-dir in case index does not exist.
352 if (WARN_ON(!d_is_dir(dentry
)))
355 /* Directory not expected to be indexed before copy up */
356 if (WARN_ON(ovl_test_flag(OVL_INDEX
, d_inode(dentry
))))
359 err
= ovl_get_index_name(origin
, &name
);
363 temp
= ovl_create_temp(indexdir
, OVL_CATTR(S_IFDIR
| 0));
368 err
= ovl_set_upper_fh(upper
, temp
);
372 index
= lookup_one_len(name
.name
, indexdir
, name
.len
);
374 err
= PTR_ERR(index
);
376 err
= ovl_do_rename(dir
, temp
, dir
, index
, 0);
381 ovl_cleanup(dir
, temp
);
388 struct ovl_copy_up_ctx
{
389 struct dentry
*parent
;
390 struct dentry
*dentry
;
391 struct path lowerpath
;
395 struct dentry
*destdir
;
396 struct qstr destname
;
397 struct dentry
*workdir
;
403 static int ovl_link_up(struct ovl_copy_up_ctx
*c
)
406 struct dentry
*upper
;
407 struct dentry
*upperdir
= ovl_dentry_upper(c
->parent
);
408 struct inode
*udir
= d_inode(upperdir
);
410 /* Mark parent "impure" because it may now contain non-pure upper */
411 err
= ovl_set_impure(c
->parent
, upperdir
);
415 err
= ovl_set_nlink_lower(c
->dentry
);
419 inode_lock_nested(udir
, I_MUTEX_PARENT
);
420 upper
= lookup_one_len(c
->dentry
->d_name
.name
, upperdir
,
421 c
->dentry
->d_name
.len
);
422 err
= PTR_ERR(upper
);
423 if (!IS_ERR(upper
)) {
424 err
= ovl_do_link(ovl_dentry_upper(c
->dentry
), udir
, upper
);
428 /* Restore timestamps on parent (best effort) */
429 ovl_set_timestamps(upperdir
, &c
->pstat
);
430 ovl_dentry_set_upper_alias(c
->dentry
);
437 err
= ovl_set_nlink_upper(c
->dentry
);
442 static int ovl_copy_up_inode(struct ovl_copy_up_ctx
*c
, struct dentry
*temp
)
447 * Copy up data first and then xattrs. Writing data after
448 * xattrs will remove security.capability xattr automatically.
450 if (S_ISREG(c
->stat
.mode
) && !c
->metacopy
) {
451 struct path upperpath
, datapath
;
453 ovl_path_upper(c
->dentry
, &upperpath
);
454 if (WARN_ON(upperpath
.dentry
!= NULL
))
456 upperpath
.dentry
= temp
;
458 ovl_path_lowerdata(c
->dentry
, &datapath
);
459 err
= ovl_copy_up_data(&datapath
, &upperpath
, c
->stat
.size
);
464 err
= ovl_copy_xattr(c
->lowerpath
.dentry
, temp
);
469 * Store identifier of lower inode in upper inode xattr to
470 * allow lookup of the copy up origin inode.
472 * Don't set origin when we are breaking the association with a lower
476 err
= ovl_set_origin(c
->dentry
, c
->lowerpath
.dentry
, temp
);
482 err
= ovl_check_setxattr(c
->dentry
, temp
, OVL_XATTR_METACOPY
,
483 NULL
, 0, -EOPNOTSUPP
);
488 inode_lock(temp
->d_inode
);
490 err
= ovl_set_size(temp
, &c
->stat
);
492 err
= ovl_set_attr(temp
, &c
->stat
);
493 inode_unlock(temp
->d_inode
);
498 struct ovl_cu_creds
{
499 const struct cred
*old
;
503 static int ovl_prep_cu_creds(struct dentry
*dentry
, struct ovl_cu_creds
*cc
)
507 cc
->old
= cc
->new = NULL
;
508 err
= security_inode_copy_up(dentry
, &cc
->new);
513 cc
->old
= override_creds(cc
->new);
518 static void ovl_revert_cu_creds(struct ovl_cu_creds
*cc
)
521 revert_creds(cc
->old
);
527 * Copyup using workdir to prepare temp file. Used when copying up directories,
528 * special files or when upper fs doesn't support O_TMPFILE.
530 static int ovl_copy_up_workdir(struct ovl_copy_up_ctx
*c
)
533 struct inode
*udir
= d_inode(c
->destdir
), *wdir
= d_inode(c
->workdir
);
534 struct dentry
*temp
, *upper
;
535 struct ovl_cu_creds cc
;
537 struct ovl_cattr cattr
= {
538 /* Can't properly set mode on creation because of the umask */
539 .mode
= c
->stat
.mode
& S_IFMT
,
540 .rdev
= c
->stat
.rdev
,
544 err
= ovl_lock_rename_workdir(c
->workdir
, c
->destdir
);
548 err
= ovl_prep_cu_creds(c
->dentry
, &cc
);
552 temp
= ovl_create_temp(c
->workdir
, &cattr
);
553 ovl_revert_cu_creds(&cc
);
559 err
= ovl_copy_up_inode(c
, temp
);
563 if (S_ISDIR(c
->stat
.mode
) && c
->indexed
) {
564 err
= ovl_create_index(c
->dentry
, c
->lowerpath
.dentry
, temp
);
569 upper
= lookup_one_len(c
->destname
.name
, c
->destdir
, c
->destname
.len
);
570 err
= PTR_ERR(upper
);
574 err
= ovl_do_rename(wdir
, temp
, udir
, upper
, 0);
580 ovl_set_upperdata(d_inode(c
->dentry
));
581 inode
= d_inode(c
->dentry
);
582 ovl_inode_update(inode
, temp
);
583 if (S_ISDIR(inode
->i_mode
))
584 ovl_set_flag(OVL_WHITEOUTS
, inode
);
586 unlock_rename(c
->workdir
, c
->destdir
);
591 ovl_cleanup(wdir
, temp
);
596 /* Copyup using O_TMPFILE which does not require cross dir locking */
597 static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx
*c
)
599 struct inode
*udir
= d_inode(c
->destdir
);
600 struct dentry
*temp
, *upper
;
601 struct ovl_cu_creds cc
;
604 err
= ovl_prep_cu_creds(c
->dentry
, &cc
);
608 temp
= ovl_do_tmpfile(c
->workdir
, c
->stat
.mode
);
609 ovl_revert_cu_creds(&cc
);
612 return PTR_ERR(temp
);
614 err
= ovl_copy_up_inode(c
, temp
);
618 inode_lock_nested(udir
, I_MUTEX_PARENT
);
620 upper
= lookup_one_len(c
->destname
.name
, c
->destdir
, c
->destname
.len
);
621 err
= PTR_ERR(upper
);
622 if (!IS_ERR(upper
)) {
623 err
= ovl_do_link(temp
, udir
, upper
);
632 ovl_set_upperdata(d_inode(c
->dentry
));
633 ovl_inode_update(d_inode(c
->dentry
), temp
);
643 * Copy up a single dentry
645 * All renames start with copy up of source if necessary. The actual
646 * rename will only proceed once the copy up was successful. Copy up uses
647 * upper parent i_mutex for exclusion. Since rename can change d_parent it
648 * is possible that the copy up will lock the old parent. At that point
649 * the file will have already been copied up anyway.
651 static int ovl_do_copy_up(struct ovl_copy_up_ctx
*c
)
654 struct ovl_fs
*ofs
= c
->dentry
->d_sb
->s_fs_info
;
655 bool to_index
= false;
658 * Indexed non-dir is copied up directly to the index entry and then
659 * hardlinked to upper dir. Indexed dir is copied up to indexdir,
660 * then index entry is created and then copied up dir installed.
661 * Copying dir up to indexdir instead of workdir simplifies locking.
663 if (ovl_need_index(c
->dentry
)) {
665 if (S_ISDIR(c
->stat
.mode
))
666 c
->workdir
= ovl_indexdir(c
->dentry
->d_sb
);
671 if (S_ISDIR(c
->stat
.mode
) || c
->stat
.nlink
== 1 || to_index
)
675 c
->destdir
= ovl_indexdir(c
->dentry
->d_sb
);
676 err
= ovl_get_index_name(c
->lowerpath
.dentry
, &c
->destname
);
679 } else if (WARN_ON(!c
->parent
)) {
680 /* Disconnected dentry must be copied up to index dir */
684 * Mark parent "impure" because it may now contain non-pure
687 err
= ovl_set_impure(c
->parent
, c
->destdir
);
692 /* Should we copyup with O_TMPFILE or with workdir? */
693 if (S_ISREG(c
->stat
.mode
) && ofs
->tmpfile
)
694 err
= ovl_copy_up_tmpfile(c
);
696 err
= ovl_copy_up_workdir(c
);
701 ovl_set_flag(OVL_INDEX
, d_inode(c
->dentry
));
704 /* Initialize nlink for copy up of disconnected dentry */
705 err
= ovl_set_nlink_upper(c
->dentry
);
707 struct inode
*udir
= d_inode(c
->destdir
);
709 /* Restore timestamps on parent (best effort) */
711 ovl_set_timestamps(c
->destdir
, &c
->pstat
);
714 ovl_dentry_set_upper_alias(c
->dentry
);
719 kfree(c
->destname
.name
);
723 static bool ovl_need_meta_copy_up(struct dentry
*dentry
, umode_t mode
,
726 struct ovl_fs
*ofs
= dentry
->d_sb
->s_fs_info
;
728 if (!ofs
->config
.metacopy
)
734 if (flags
&& ((OPEN_FMODE(flags
) & FMODE_WRITE
) || (flags
& O_TRUNC
)))
740 /* Copy up data of an inode which was copied up metadata only in the past. */
741 static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx
*c
)
743 struct path upperpath
, datapath
;
745 char *capability
= NULL
;
746 ssize_t
uninitialized_var(cap_size
);
748 ovl_path_upper(c
->dentry
, &upperpath
);
749 if (WARN_ON(upperpath
.dentry
== NULL
))
752 ovl_path_lowerdata(c
->dentry
, &datapath
);
753 if (WARN_ON(datapath
.dentry
== NULL
))
757 err
= cap_size
= ovl_getxattr(upperpath
.dentry
, XATTR_NAME_CAPS
,
759 if (err
< 0 && err
!= -ENODATA
)
763 err
= ovl_copy_up_data(&datapath
, &upperpath
, c
->stat
.size
);
768 * Writing to upper file will clear security.capability xattr. We
769 * don't want that to happen for normal copy-up operation.
772 err
= ovl_do_setxattr(upperpath
.dentry
, XATTR_NAME_CAPS
,
773 capability
, cap_size
, 0);
779 err
= vfs_removexattr(upperpath
.dentry
, OVL_XATTR_METACOPY
);
783 ovl_set_upperdata(d_inode(c
->dentry
));
790 static int ovl_copy_up_one(struct dentry
*parent
, struct dentry
*dentry
,
794 DEFINE_DELAYED_CALL(done
);
795 struct path parentpath
;
796 struct ovl_copy_up_ctx ctx
= {
799 .workdir
= ovl_workdir(dentry
),
802 if (WARN_ON(!ctx
.workdir
))
805 ovl_path_lower(dentry
, &ctx
.lowerpath
);
806 err
= vfs_getattr(&ctx
.lowerpath
, &ctx
.stat
,
807 STATX_BASIC_STATS
, AT_STATX_SYNC_AS_STAT
);
811 ctx
.metacopy
= ovl_need_meta_copy_up(dentry
, ctx
.stat
.mode
, flags
);
814 ovl_path_upper(parent
, &parentpath
);
815 ctx
.destdir
= parentpath
.dentry
;
816 ctx
.destname
= dentry
->d_name
;
818 err
= vfs_getattr(&parentpath
, &ctx
.pstat
,
819 STATX_ATIME
| STATX_MTIME
,
820 AT_STATX_SYNC_AS_STAT
);
825 /* maybe truncate regular file. this has no effect on dirs */
829 if (S_ISLNK(ctx
.stat
.mode
)) {
830 ctx
.link
= vfs_get_link(ctx
.lowerpath
.dentry
, &done
);
831 if (IS_ERR(ctx
.link
))
832 return PTR_ERR(ctx
.link
);
835 err
= ovl_copy_up_start(dentry
, flags
);
836 /* err < 0: interrupted, err > 0: raced with another copy-up */
841 if (!ovl_dentry_upper(dentry
))
842 err
= ovl_do_copy_up(&ctx
);
843 if (!err
&& parent
&& !ovl_dentry_has_upper_alias(dentry
))
844 err
= ovl_link_up(&ctx
);
845 if (!err
&& ovl_dentry_needs_data_copy_up_locked(dentry
, flags
))
846 err
= ovl_copy_up_meta_inode_data(&ctx
);
847 ovl_copy_up_end(dentry
);
849 do_delayed_call(&done
);
854 int ovl_copy_up_flags(struct dentry
*dentry
, int flags
)
857 const struct cred
*old_cred
= ovl_override_creds(dentry
->d_sb
);
858 bool disconnected
= (dentry
->d_flags
& DCACHE_DISCONNECTED
);
861 * With NFS export, copy up can get called for a disconnected non-dir.
862 * In this case, we will copy up lower inode to index dir without
863 * linking it to upper dir.
865 if (WARN_ON(disconnected
&& d_is_dir(dentry
)))
870 struct dentry
*parent
= NULL
;
872 if (ovl_already_copied_up(dentry
, flags
))
876 /* find the topmost dentry not yet copied up */
877 for (; !disconnected
;) {
878 parent
= dget_parent(next
);
880 if (ovl_dentry_upper(parent
))
887 err
= ovl_copy_up_one(parent
, next
, flags
);
892 revert_creds(old_cred
);
897 static bool ovl_open_need_copy_up(struct dentry
*dentry
, int flags
)
899 /* Copy up of disconnected dentry does not set upper alias */
900 if (ovl_already_copied_up(dentry
, flags
))
903 if (special_file(d_inode(dentry
)->i_mode
))
906 if (!ovl_open_flags_need_copy_up(flags
))
912 int ovl_maybe_copy_up(struct dentry
*dentry
, int flags
)
916 if (ovl_open_need_copy_up(dentry
, flags
)) {
917 err
= ovl_want_write(dentry
);
919 err
= ovl_copy_up_flags(dentry
, flags
);
920 ovl_drop_write(dentry
);
927 int ovl_copy_up_with_data(struct dentry
*dentry
)
929 return ovl_copy_up_flags(dentry
, O_WRONLY
);
932 int ovl_copy_up(struct dentry
*dentry
)
934 return ovl_copy_up_flags(dentry
, 0);