2 * Copyright (C) 2011 Novell Inc.
3 * Copyright (C) 2016 Red Hat, Inc.
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
11 #include <linux/cred.h>
12 #include <linux/namei.h>
13 #include <linux/xattr.h>
14 #include <linux/ratelimit.h>
15 #include <linux/mount.h>
16 #include <linux/exportfs.h>
17 #include "overlayfs.h"
18 #include "ovl_entry.h"
20 struct ovl_lookup_data
{
29 static int ovl_check_redirect(struct dentry
*dentry
, struct ovl_lookup_data
*d
,
30 size_t prelen
, const char *post
)
33 char *s
, *next
, *buf
= NULL
;
35 res
= vfs_getxattr(dentry
, OVL_XATTR_REDIRECT
, NULL
, 0);
37 if (res
== -ENODATA
|| res
== -EOPNOTSUPP
)
41 buf
= kzalloc(prelen
+ res
+ strlen(post
) + 1, GFP_KERNEL
);
48 res
= vfs_getxattr(dentry
, OVL_XATTR_REDIRECT
, buf
, res
);
54 for (s
= buf
; *s
++ == '/'; s
= next
) {
55 next
= strchrnul(s
, '/');
60 * One of the ancestor path elements in an absolute path
61 * lookup in ovl_lookup_layer() could have been opaque and
62 * that will stop further lookup in lower layers (d->stop=true)
63 * But we have found an absolute redirect in decendant path
64 * element and that should force continue lookup in lower
65 * layers (reset d->stop).
69 if (strchr(buf
, '/') != NULL
)
72 memmove(buf
+ prelen
, buf
, res
);
73 memcpy(buf
, d
->name
.name
, prelen
);
79 d
->name
.name
= d
->redirect
;
80 d
->name
.len
= strlen(d
->redirect
);
88 pr_warn_ratelimited("overlayfs: failed to get redirect (%i)\n", res
);
91 pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf
);
95 static int ovl_acceptable(void *ctx
, struct dentry
*dentry
)
100 static struct ovl_fh
*ovl_get_origin_fh(struct dentry
*dentry
)
103 struct ovl_fh
*fh
= NULL
;
105 res
= vfs_getxattr(dentry
, OVL_XATTR_ORIGIN
, NULL
, 0);
107 if (res
== -ENODATA
|| res
== -EOPNOTSUPP
)
111 /* Zero size value means "copied up but origin unknown" */
115 fh
= kzalloc(res
, GFP_KERNEL
);
117 return ERR_PTR(-ENOMEM
);
119 res
= vfs_getxattr(dentry
, OVL_XATTR_ORIGIN
, fh
, res
);
123 if (res
< sizeof(struct ovl_fh
) || res
< fh
->len
)
126 if (fh
->magic
!= OVL_FH_MAGIC
)
129 /* Treat larger version and unknown flags as "origin unknown" */
130 if (fh
->version
> OVL_FH_VERSION
|| fh
->flags
& ~OVL_FH_FLAG_ALL
)
133 /* Treat endianness mismatch as "origin unknown" */
134 if (!(fh
->flags
& OVL_FH_FLAG_ANY_ENDIAN
) &&
135 (fh
->flags
& OVL_FH_FLAG_BIG_ENDIAN
) != OVL_FH_FLAG_CPU_ENDIAN
)
145 pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res
);
148 pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res
, fh
);
152 static struct dentry
*ovl_get_origin(struct dentry
*dentry
,
153 struct vfsmount
*mnt
)
155 struct dentry
*origin
= NULL
;
156 struct ovl_fh
*fh
= ovl_get_origin_fh(dentry
);
159 if (IS_ERR_OR_NULL(fh
))
160 return (struct dentry
*)fh
;
163 * Make sure that the stored uuid matches the uuid of the lower
164 * layer where file handle will be decoded.
166 if (!uuid_equal(&fh
->uuid
, &mnt
->mnt_sb
->s_uuid
))
169 bytes
= (fh
->len
- offsetof(struct ovl_fh
, fid
));
170 origin
= exportfs_decode_fh(mnt
, (struct fid
*)fh
->fid
,
171 bytes
>> 2, (int)fh
->type
,
172 ovl_acceptable
, NULL
);
173 if (IS_ERR(origin
)) {
174 /* Treat stale file handle as "origin unknown" */
175 if (origin
== ERR_PTR(-ESTALE
))
180 if (ovl_dentry_weird(origin
) ||
181 ((d_inode(origin
)->i_mode
^ d_inode(dentry
)->i_mode
) & S_IFMT
))
189 pr_warn_ratelimited("overlayfs: invalid origin (%pd2)\n", origin
);
195 static bool ovl_is_opaquedir(struct dentry
*dentry
)
197 return ovl_check_dir_xattr(dentry
, OVL_XATTR_OPAQUE
);
200 static int ovl_lookup_single(struct dentry
*base
, struct ovl_lookup_data
*d
,
201 const char *name
, unsigned int namelen
,
202 size_t prelen
, const char *post
,
208 this = lookup_one_len_unlocked(name
, base
, namelen
);
212 if (err
== -ENOENT
|| err
== -ENAMETOOLONG
)
219 if (ovl_dentry_weird(this)) {
220 /* Don't support traversing automounts and other weirdness */
224 if (ovl_is_whiteout(this)) {
225 d
->stop
= d
->opaque
= true;
228 if (!d_can_lookup(this)) {
235 if (!d
->last
&& ovl_is_opaquedir(this)) {
236 d
->stop
= d
->opaque
= true;
239 err
= ovl_check_redirect(this, d
, prelen
, post
);
256 static int ovl_lookup_layer(struct dentry
*base
, struct ovl_lookup_data
*d
,
259 /* Counting down from the end, since the prefix can change */
260 size_t rem
= d
->name
.len
- 1;
261 struct dentry
*dentry
= NULL
;
264 if (d
->name
.name
[0] != '/')
265 return ovl_lookup_single(base
, d
, d
->name
.name
, d
->name
.len
,
268 while (!IS_ERR_OR_NULL(base
) && d_can_lookup(base
)) {
269 const char *s
= d
->name
.name
+ d
->name
.len
- rem
;
270 const char *next
= strchrnul(s
, '/');
271 size_t thislen
= next
- s
;
274 /* Verify we did not go off the rails */
275 if (WARN_ON(s
[-1] != '/'))
278 err
= ovl_lookup_single(base
, d
, s
, thislen
,
279 d
->name
.len
- rem
, next
, &base
);
289 if (WARN_ON(rem
>= d
->name
.len
))
297 static int ovl_check_origin(struct dentry
*upperdentry
,
298 struct path
*lowerstack
, unsigned int numlower
,
299 struct path
**stackp
, unsigned int *ctrp
)
301 struct vfsmount
*mnt
;
302 struct dentry
*origin
= NULL
;
306 for (i
= 0; i
< numlower
; i
++) {
307 mnt
= lowerstack
[i
].mnt
;
308 origin
= ovl_get_origin(upperdentry
, mnt
);
310 return PTR_ERR(origin
);
321 *stackp
= kmalloc(sizeof(struct path
), GFP_KERNEL
);
326 **stackp
= (struct path
) { .dentry
= origin
, .mnt
= mnt
};
333 * Verify that @fh matches the origin file handle stored in OVL_XATTR_ORIGIN.
334 * Return 0 on match, -ESTALE on mismatch, < 0 on error.
336 static int ovl_verify_origin_fh(struct dentry
*dentry
, const struct ovl_fh
*fh
)
338 struct ovl_fh
*ofh
= ovl_get_origin_fh(dentry
);
347 if (fh
->len
!= ofh
->len
|| memcmp(fh
, ofh
, fh
->len
))
355 * Verify that an inode matches the origin file handle stored in upper inode.
357 * If @set is true and there is no stored file handle, encode and store origin
358 * file handle in OVL_XATTR_ORIGIN.
360 * Return 0 on match, -ESTALE on mismatch, < 0 on error.
362 int ovl_verify_origin(struct dentry
*dentry
, struct vfsmount
*mnt
,
363 struct dentry
*origin
, bool is_upper
, bool set
)
369 fh
= ovl_encode_fh(origin
, is_upper
);
376 err
= ovl_verify_origin_fh(dentry
, fh
);
377 if (set
&& err
== -ENODATA
)
378 err
= ovl_do_setxattr(dentry
, OVL_XATTR_ORIGIN
, fh
, fh
->len
, 0);
387 inode
= d_inode(origin
);
388 pr_warn_ratelimited("overlayfs: failed to verify origin (%pd2, ino=%lu, err=%i)\n",
389 origin
, inode
? inode
->i_ino
: 0, err
);
394 * Verify that an index entry name matches the origin file handle stored in
395 * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
396 * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
398 int ovl_verify_index(struct dentry
*index
, struct path
*lowerstack
,
399 unsigned int numlower
)
401 struct ovl_fh
*fh
= NULL
;
403 struct path origin
= { };
404 struct path
*stack
= &origin
;
405 unsigned int ctr
= 0;
412 * Directory index entries are going to be used for looking up
413 * redirected upper dirs by lower dir fh when decoding an overlay
414 * file handle of a merge dir. Whiteout index entries are going to be
415 * used as an indication that an exported overlay file handle should
416 * be treated as stale (i.e. after unlink of the overlay inode).
417 * We don't know the verification rules for directory and whiteout
418 * index entries, because they have not been implemented yet, so return
419 * EINVAL if those entries are found to abort the mount to avoid
420 * corrupting an index that was created by a newer kernel.
423 if (d_is_dir(index
) || ovl_is_whiteout(index
))
426 if (index
->d_name
.len
< sizeof(struct ovl_fh
)*2)
430 len
= index
->d_name
.len
/ 2;
431 fh
= kzalloc(len
, GFP_KERNEL
);
436 if (hex2bin((u8
*)fh
, index
->d_name
.name
, len
) || len
!= fh
->len
)
439 err
= ovl_verify_origin_fh(index
, fh
);
443 err
= ovl_check_origin(index
, lowerstack
, numlower
, &stack
, &ctr
);
449 /* Check if index is orphan and don't warn before cleaning it */
450 if (d_inode(index
)->i_nlink
== 1 &&
451 ovl_get_nlink(origin
.dentry
, index
, 0) == 0)
460 pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n",
461 index
, d_inode(index
)->i_mode
& S_IFMT
, err
);
466 * Lookup in indexdir for the index entry of a lower real inode or a copy up
467 * origin inode. The index entry name is the hex representation of the lower
470 * If the index dentry in negative, then either no lower aliases have been
471 * copied up yet, or aliases have been copied up in older kernels and are
474 * If the index dentry for a copy up origin inode is positive, but points
475 * to an inode different than the upper inode, then either the upper inode
476 * has been copied up and not indexed or it was indexed, but since then
477 * index dir was cleared. Either way, that index cannot be used to indentify
480 int ovl_get_index_name(struct dentry
*origin
, struct qstr
*name
)
486 fh
= ovl_encode_fh(origin
, false);
491 n
= kzalloc(fh
->len
* 2, GFP_KERNEL
);
493 s
= bin2hex(n
, fh
, fh
->len
);
494 *name
= (struct qstr
) QSTR_INIT(n
, s
- n
);
503 static struct dentry
*ovl_lookup_index(struct dentry
*dentry
,
504 struct dentry
*upper
,
505 struct dentry
*origin
)
507 struct ovl_fs
*ofs
= dentry
->d_sb
->s_fs_info
;
508 struct dentry
*index
;
513 err
= ovl_get_index_name(origin
, &name
);
517 index
= lookup_one_len_unlocked(name
.name
, ofs
->indexdir
, name
.len
);
519 err
= PTR_ERR(index
);
520 if (err
== -ENOENT
) {
524 pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
525 "overlayfs: mount with '-o index=off' to disable inodes index.\n",
526 d_inode(origin
)->i_ino
, name
.len
, name
.name
,
531 inode
= d_inode(index
);
532 if (d_is_negative(index
)) {
534 } else if (upper
&& d_inode(upper
) != inode
) {
536 } else if (ovl_dentry_weird(index
) || ovl_is_whiteout(index
) ||
537 ((inode
->i_mode
^ d_inode(origin
)->i_mode
) & S_IFMT
)) {
539 * Index should always be of the same file type as origin
540 * except for the case of a whiteout index. A whiteout
541 * index should only exist if all lower aliases have been
542 * unlinked, which means that finding a lower origin on lookup
543 * whose index is a whiteout should be treated as an error.
545 pr_warn_ratelimited("overlayfs: bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
546 index
, d_inode(index
)->i_mode
& S_IFMT
,
547 d_inode(origin
)->i_mode
& S_IFMT
);
562 index
= ERR_PTR(-EIO
);
567 * Returns next layer in stack starting from top.
568 * Returns -1 if this is the last layer.
570 int ovl_path_next(int idx
, struct dentry
*dentry
, struct path
*path
)
572 struct ovl_entry
*oe
= dentry
->d_fsdata
;
576 ovl_path_upper(dentry
, path
);
578 return oe
->numlower
? 1 : -1;
581 BUG_ON(idx
> oe
->numlower
);
582 *path
= oe
->lowerstack
[idx
- 1];
584 return (idx
< oe
->numlower
) ? idx
+ 1 : -1;
587 struct dentry
*ovl_lookup(struct inode
*dir
, struct dentry
*dentry
,
590 struct ovl_entry
*oe
;
591 const struct cred
*old_cred
;
592 struct ovl_fs
*ofs
= dentry
->d_sb
->s_fs_info
;
593 struct ovl_entry
*poe
= dentry
->d_parent
->d_fsdata
;
594 struct ovl_entry
*roe
= dentry
->d_sb
->s_root
->d_fsdata
;
595 struct path
*stack
= NULL
;
596 struct dentry
*upperdir
, *upperdentry
= NULL
;
597 struct dentry
*index
= NULL
;
598 unsigned int ctr
= 0;
599 struct inode
*inode
= NULL
;
600 bool upperopaque
= false;
601 char *upperredirect
= NULL
;
605 struct ovl_lookup_data d
= {
606 .name
= dentry
->d_name
,
610 .last
= !poe
->numlower
,
614 if (dentry
->d_name
.len
> ofs
->namelen
)
615 return ERR_PTR(-ENAMETOOLONG
);
617 old_cred
= ovl_override_creds(dentry
->d_sb
);
618 upperdir
= ovl_dentry_upper(dentry
->d_parent
);
620 err
= ovl_lookup_layer(upperdir
, &d
, &upperdentry
);
624 if (upperdentry
&& unlikely(ovl_dentry_remote(upperdentry
))) {
629 if (upperdentry
&& !d
.is_dir
) {
630 BUG_ON(!d
.stop
|| d
.redirect
);
632 * Lookup copy up origin by decoding origin file handle.
633 * We may get a disconnected dentry, which is fine,
634 * because we only need to hold the origin inode in
635 * cache and use its inode number. We may even get a
636 * connected dentry, that is not under any of the lower
637 * layers root. That is also fine for using it's inode
638 * number - it's the same as if we held a reference
639 * to a dentry in lower layer that was moved under us.
641 err
= ovl_check_origin(upperdentry
, roe
->lowerstack
,
642 roe
->numlower
, &stack
, &ctr
);
649 upperredirect
= kstrdup(d
.redirect
, GFP_KERNEL
);
652 if (d
.redirect
[0] == '/')
655 upperopaque
= d
.opaque
;
658 if (!d
.stop
&& poe
->numlower
) {
660 stack
= kcalloc(ofs
->numlower
, sizeof(struct path
),
666 for (i
= 0; !d
.stop
&& i
< poe
->numlower
; i
++) {
667 struct path lowerpath
= poe
->lowerstack
[i
];
669 d
.last
= i
== poe
->numlower
- 1;
670 err
= ovl_lookup_layer(lowerpath
.dentry
, &d
, &this);
677 stack
[ctr
].dentry
= this;
678 stack
[ctr
].mnt
= lowerpath
.mnt
;
684 if (d
.redirect
&& d
.redirect
[0] == '/' && poe
!= roe
) {
687 /* Find the current layer on the root dentry */
688 for (i
= 0; i
< poe
->numlower
; i
++)
689 if (poe
->lowerstack
[i
].mnt
== lowerpath
.mnt
)
691 if (WARN_ON(i
== poe
->numlower
))
696 /* Lookup index by lower inode and verify it matches upper inode */
697 if (ctr
&& !d
.is_dir
&& ovl_indexdir(dentry
->d_sb
)) {
698 struct dentry
*origin
= stack
[0].dentry
;
700 index
= ovl_lookup_index(dentry
, upperdentry
, origin
);
702 err
= PTR_ERR(index
);
708 oe
= ovl_alloc_entry(ctr
);
713 oe
->opaque
= upperopaque
;
714 memcpy(oe
->lowerstack
, stack
, sizeof(struct path
) * ctr
);
715 dentry
->d_fsdata
= oe
;
718 ovl_dentry_set_upper_alias(dentry
);
720 upperdentry
= dget(index
);
722 if (upperdentry
|| ctr
) {
723 inode
= ovl_get_inode(dentry
, upperdentry
, index
);
724 err
= PTR_ERR(inode
);
728 OVL_I(inode
)->redirect
= upperredirect
;
730 ovl_set_flag(OVL_INDEX
, inode
);
733 revert_creds(old_cred
);
737 d_add(dentry
, inode
);
742 dentry
->d_fsdata
= NULL
;
746 for (i
= 0; i
< ctr
; i
++)
747 dput(stack
[i
].dentry
);
751 kfree(upperredirect
);
754 revert_creds(old_cred
);
758 bool ovl_lower_positive(struct dentry
*dentry
)
760 struct ovl_entry
*oe
= dentry
->d_fsdata
;
761 struct ovl_entry
*poe
= dentry
->d_parent
->d_fsdata
;
762 const struct qstr
*name
= &dentry
->d_name
;
764 bool positive
= false;
768 * If dentry is negative, then lower is positive iff this is a
771 if (!dentry
->d_inode
)
774 /* Negative upper -> positive lower */
775 if (!ovl_dentry_upper(dentry
))
778 /* Positive upper -> have to look up lower to see whether it exists */
779 for (i
= 0; !done
&& !positive
&& i
< poe
->numlower
; i
++) {
781 struct dentry
*lowerdir
= poe
->lowerstack
[i
].dentry
;
783 this = lookup_one_len_unlocked(name
->name
, lowerdir
,
786 switch (PTR_ERR(this)) {
793 * Assume something is there, we just couldn't
801 positive
= !ovl_is_whiteout(this);