2 * Copyright (C) 2011 Novell Inc.
3 * Copyright (C) 2016 Red Hat, Inc.
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
11 #include <linux/cred.h>
12 #include <linux/namei.h>
13 #include <linux/xattr.h>
14 #include <linux/ratelimit.h>
15 #include <linux/mount.h>
16 #include <linux/exportfs.h>
17 #include "overlayfs.h"
19 struct ovl_lookup_data
{
28 static int ovl_check_redirect(struct dentry
*dentry
, struct ovl_lookup_data
*d
,
29 size_t prelen
, const char *post
)
32 char *s
, *next
, *buf
= NULL
;
34 res
= vfs_getxattr(dentry
, OVL_XATTR_REDIRECT
, NULL
, 0);
36 if (res
== -ENODATA
|| res
== -EOPNOTSUPP
)
40 buf
= kzalloc(prelen
+ res
+ strlen(post
) + 1, GFP_KERNEL
);
47 res
= vfs_getxattr(dentry
, OVL_XATTR_REDIRECT
, buf
, res
);
53 for (s
= buf
; *s
++ == '/'; s
= next
) {
54 next
= strchrnul(s
, '/');
59 if (strchr(buf
, '/') != NULL
)
62 memmove(buf
+ prelen
, buf
, res
);
63 memcpy(buf
, d
->name
.name
, prelen
);
69 d
->name
.name
= d
->redirect
;
70 d
->name
.len
= strlen(d
->redirect
);
78 pr_warn_ratelimited("overlayfs: failed to get redirect (%i)\n", res
);
81 pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf
);
85 static int ovl_acceptable(void *ctx
, struct dentry
*dentry
)
90 static struct ovl_fh
*ovl_get_origin_fh(struct dentry
*dentry
)
93 struct ovl_fh
*fh
= NULL
;
95 res
= vfs_getxattr(dentry
, OVL_XATTR_ORIGIN
, NULL
, 0);
97 if (res
== -ENODATA
|| res
== -EOPNOTSUPP
)
101 /* Zero size value means "copied up but origin unknown" */
105 fh
= kzalloc(res
, GFP_KERNEL
);
107 return ERR_PTR(-ENOMEM
);
109 res
= vfs_getxattr(dentry
, OVL_XATTR_ORIGIN
, fh
, res
);
113 if (res
< sizeof(struct ovl_fh
) || res
< fh
->len
)
116 if (fh
->magic
!= OVL_FH_MAGIC
)
119 /* Treat larger version and unknown flags as "origin unknown" */
120 if (fh
->version
> OVL_FH_VERSION
|| fh
->flags
& ~OVL_FH_FLAG_ALL
)
123 /* Treat endianness mismatch as "origin unknown" */
124 if (!(fh
->flags
& OVL_FH_FLAG_ANY_ENDIAN
) &&
125 (fh
->flags
& OVL_FH_FLAG_BIG_ENDIAN
) != OVL_FH_FLAG_CPU_ENDIAN
)
135 pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res
);
138 pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res
, fh
);
142 static struct dentry
*ovl_get_origin(struct dentry
*dentry
,
143 struct vfsmount
*mnt
)
145 struct dentry
*origin
= NULL
;
146 struct ovl_fh
*fh
= ovl_get_origin_fh(dentry
);
149 if (IS_ERR_OR_NULL(fh
))
150 return (struct dentry
*)fh
;
153 * Make sure that the stored uuid matches the uuid of the lower
154 * layer where file handle will be decoded.
156 if (!uuid_equal(&fh
->uuid
, &mnt
->mnt_sb
->s_uuid
))
159 bytes
= (fh
->len
- offsetof(struct ovl_fh
, fid
));
160 origin
= exportfs_decode_fh(mnt
, (struct fid
*)fh
->fid
,
161 bytes
>> 2, (int)fh
->type
,
162 ovl_acceptable
, NULL
);
163 if (IS_ERR(origin
)) {
164 /* Treat stale file handle as "origin unknown" */
165 if (origin
== ERR_PTR(-ESTALE
))
170 if (ovl_dentry_weird(origin
) ||
171 ((d_inode(origin
)->i_mode
^ d_inode(dentry
)->i_mode
) & S_IFMT
))
179 pr_warn_ratelimited("overlayfs: invalid origin (%pd2)\n", origin
);
185 static bool ovl_is_opaquedir(struct dentry
*dentry
)
187 return ovl_check_dir_xattr(dentry
, OVL_XATTR_OPAQUE
);
190 static int ovl_lookup_single(struct dentry
*base
, struct ovl_lookup_data
*d
,
191 const char *name
, unsigned int namelen
,
192 size_t prelen
, const char *post
,
198 this = lookup_one_len_unlocked(name
, base
, namelen
);
202 if (err
== -ENOENT
|| err
== -ENAMETOOLONG
)
209 if (ovl_dentry_weird(this)) {
210 /* Don't support traversing automounts and other weirdness */
214 if (ovl_is_whiteout(this)) {
215 d
->stop
= d
->opaque
= true;
218 if (!d_can_lookup(this)) {
225 if (!d
->last
&& ovl_is_opaquedir(this)) {
226 d
->stop
= d
->opaque
= true;
229 err
= ovl_check_redirect(this, d
, prelen
, post
);
246 static int ovl_lookup_layer(struct dentry
*base
, struct ovl_lookup_data
*d
,
249 /* Counting down from the end, since the prefix can change */
250 size_t rem
= d
->name
.len
- 1;
251 struct dentry
*dentry
= NULL
;
254 if (d
->name
.name
[0] != '/')
255 return ovl_lookup_single(base
, d
, d
->name
.name
, d
->name
.len
,
258 while (!IS_ERR_OR_NULL(base
) && d_can_lookup(base
)) {
259 const char *s
= d
->name
.name
+ d
->name
.len
- rem
;
260 const char *next
= strchrnul(s
, '/');
261 size_t thislen
= next
- s
;
264 /* Verify we did not go off the rails */
265 if (WARN_ON(s
[-1] != '/'))
268 err
= ovl_lookup_single(base
, d
, s
, thislen
,
269 d
->name
.len
- rem
, next
, &base
);
279 if (WARN_ON(rem
>= d
->name
.len
))
287 static int ovl_check_origin(struct dentry
*upperdentry
,
288 struct ovl_path
*lower
, unsigned int numlower
,
289 struct ovl_path
**stackp
, unsigned int *ctrp
)
291 struct vfsmount
*mnt
;
292 struct dentry
*origin
= NULL
;
295 for (i
= 0; i
< numlower
; i
++) {
296 mnt
= lower
[i
].layer
->mnt
;
297 origin
= ovl_get_origin(upperdentry
, mnt
);
299 return PTR_ERR(origin
);
310 *stackp
= kmalloc(sizeof(struct ovl_path
), GFP_KERNEL
);
315 **stackp
= (struct ovl_path
){.dentry
= origin
, .layer
= lower
[i
].layer
};
322 * Verify that @fh matches the origin file handle stored in OVL_XATTR_ORIGIN.
323 * Return 0 on match, -ESTALE on mismatch, < 0 on error.
325 static int ovl_verify_origin_fh(struct dentry
*dentry
, const struct ovl_fh
*fh
)
327 struct ovl_fh
*ofh
= ovl_get_origin_fh(dentry
);
336 if (fh
->len
!= ofh
->len
|| memcmp(fh
, ofh
, fh
->len
))
344 * Verify that an inode matches the origin file handle stored in upper inode.
346 * If @set is true and there is no stored file handle, encode and store origin
347 * file handle in OVL_XATTR_ORIGIN.
349 * Return 0 on match, -ESTALE on mismatch, < 0 on error.
351 int ovl_verify_origin(struct dentry
*dentry
, struct dentry
*origin
,
352 bool is_upper
, bool set
)
358 fh
= ovl_encode_fh(origin
, is_upper
);
363 err
= ovl_verify_origin_fh(dentry
, fh
);
364 if (set
&& err
== -ENODATA
)
365 err
= ovl_do_setxattr(dentry
, OVL_XATTR_ORIGIN
, fh
, fh
->len
, 0);
374 inode
= d_inode(origin
);
375 pr_warn_ratelimited("overlayfs: failed to verify origin (%pd2, ino=%lu, err=%i)\n",
376 origin
, inode
? inode
->i_ino
: 0, err
);
381 * Verify that an index entry name matches the origin file handle stored in
382 * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
383 * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
385 int ovl_verify_index(struct dentry
*index
, struct ovl_path
*lower
,
386 unsigned int numlower
)
388 struct ovl_fh
*fh
= NULL
;
390 struct ovl_path origin
= { };
391 struct ovl_path
*stack
= &origin
;
392 unsigned int ctr
= 0;
399 * Directory index entries are going to be used for looking up
400 * redirected upper dirs by lower dir fh when decoding an overlay
401 * file handle of a merge dir. Whiteout index entries are going to be
402 * used as an indication that an exported overlay file handle should
403 * be treated as stale (i.e. after unlink of the overlay inode).
404 * We don't know the verification rules for directory and whiteout
405 * index entries, because they have not been implemented yet, so return
406 * EINVAL if those entries are found to abort the mount to avoid
407 * corrupting an index that was created by a newer kernel.
410 if (d_is_dir(index
) || ovl_is_whiteout(index
))
413 if (index
->d_name
.len
< sizeof(struct ovl_fh
)*2)
417 len
= index
->d_name
.len
/ 2;
418 fh
= kzalloc(len
, GFP_KERNEL
);
423 if (hex2bin((u8
*)fh
, index
->d_name
.name
, len
) || len
!= fh
->len
)
426 err
= ovl_verify_origin_fh(index
, fh
);
430 err
= ovl_check_origin(index
, lower
, numlower
, &stack
, &ctr
);
436 /* Check if index is orphan and don't warn before cleaning it */
437 if (d_inode(index
)->i_nlink
== 1 &&
438 ovl_get_nlink(origin
.dentry
, index
, 0) == 0)
447 pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n",
448 index
, d_inode(index
)->i_mode
& S_IFMT
, err
);
453 * Lookup in indexdir for the index entry of a lower real inode or a copy up
454 * origin inode. The index entry name is the hex representation of the lower
457 * If the index dentry in negative, then either no lower aliases have been
458 * copied up yet, or aliases have been copied up in older kernels and are
461 * If the index dentry for a copy up origin inode is positive, but points
462 * to an inode different than the upper inode, then either the upper inode
463 * has been copied up and not indexed or it was indexed, but since then
464 * index dir was cleared. Either way, that index cannot be used to indentify
467 int ovl_get_index_name(struct dentry
*origin
, struct qstr
*name
)
473 fh
= ovl_encode_fh(origin
, false);
478 n
= kzalloc(fh
->len
* 2, GFP_KERNEL
);
480 s
= bin2hex(n
, fh
, fh
->len
);
481 *name
= (struct qstr
) QSTR_INIT(n
, s
- n
);
490 static struct dentry
*ovl_lookup_index(struct dentry
*dentry
,
491 struct dentry
*upper
,
492 struct dentry
*origin
)
494 struct ovl_fs
*ofs
= dentry
->d_sb
->s_fs_info
;
495 struct dentry
*index
;
500 err
= ovl_get_index_name(origin
, &name
);
504 index
= lookup_one_len_unlocked(name
.name
, ofs
->indexdir
, name
.len
);
506 err
= PTR_ERR(index
);
507 if (err
== -ENOENT
) {
511 pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n"
512 "overlayfs: mount with '-o index=off' to disable inodes index.\n",
513 d_inode(origin
)->i_ino
, name
.len
, name
.name
,
518 inode
= d_inode(index
);
519 if (d_is_negative(index
)) {
521 } else if (upper
&& d_inode(upper
) != inode
) {
523 } else if (ovl_dentry_weird(index
) || ovl_is_whiteout(index
) ||
524 ((inode
->i_mode
^ d_inode(origin
)->i_mode
) & S_IFMT
)) {
526 * Index should always be of the same file type as origin
527 * except for the case of a whiteout index. A whiteout
528 * index should only exist if all lower aliases have been
529 * unlinked, which means that finding a lower origin on lookup
530 * whose index is a whiteout should be treated as an error.
532 pr_warn_ratelimited("overlayfs: bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
533 index
, d_inode(index
)->i_mode
& S_IFMT
,
534 d_inode(origin
)->i_mode
& S_IFMT
);
549 index
= ERR_PTR(-EIO
);
554 * Returns next layer in stack starting from top.
555 * Returns -1 if this is the last layer.
557 int ovl_path_next(int idx
, struct dentry
*dentry
, struct path
*path
)
559 struct ovl_entry
*oe
= dentry
->d_fsdata
;
563 ovl_path_upper(dentry
, path
);
565 return oe
->numlower
? 1 : -1;
568 BUG_ON(idx
> oe
->numlower
);
569 path
->dentry
= oe
->lowerstack
[idx
- 1].dentry
;
570 path
->mnt
= oe
->lowerstack
[idx
- 1].layer
->mnt
;
572 return (idx
< oe
->numlower
) ? idx
+ 1 : -1;
575 static int ovl_find_layer(struct ovl_fs
*ofs
, struct ovl_path
*path
)
579 for (i
= 0; i
< ofs
->numlower
; i
++) {
580 if (ofs
->lower_layers
[i
].mnt
== path
->layer
->mnt
)
587 struct dentry
*ovl_lookup(struct inode
*dir
, struct dentry
*dentry
,
590 struct ovl_entry
*oe
;
591 const struct cred
*old_cred
;
592 struct ovl_fs
*ofs
= dentry
->d_sb
->s_fs_info
;
593 struct ovl_entry
*poe
= dentry
->d_parent
->d_fsdata
;
594 struct ovl_entry
*roe
= dentry
->d_sb
->s_root
->d_fsdata
;
595 struct ovl_path
*stack
= NULL
;
596 struct dentry
*upperdir
, *upperdentry
= NULL
;
597 struct dentry
*index
= NULL
;
598 unsigned int ctr
= 0;
599 struct inode
*inode
= NULL
;
600 bool upperopaque
= false;
601 char *upperredirect
= NULL
;
605 struct ovl_lookup_data d
= {
606 .name
= dentry
->d_name
,
610 .last
= !poe
->numlower
,
614 if (dentry
->d_name
.len
> ofs
->namelen
)
615 return ERR_PTR(-ENAMETOOLONG
);
617 old_cred
= ovl_override_creds(dentry
->d_sb
);
618 upperdir
= ovl_dentry_upper(dentry
->d_parent
);
620 err
= ovl_lookup_layer(upperdir
, &d
, &upperdentry
);
624 if (upperdentry
&& unlikely(ovl_dentry_remote(upperdentry
))) {
629 if (upperdentry
&& !d
.is_dir
) {
630 BUG_ON(!d
.stop
|| d
.redirect
);
632 * Lookup copy up origin by decoding origin file handle.
633 * We may get a disconnected dentry, which is fine,
634 * because we only need to hold the origin inode in
635 * cache and use its inode number. We may even get a
636 * connected dentry, that is not under any of the lower
637 * layers root. That is also fine for using it's inode
638 * number - it's the same as if we held a reference
639 * to a dentry in lower layer that was moved under us.
641 err
= ovl_check_origin(upperdentry
, roe
->lowerstack
,
642 roe
->numlower
, &stack
, &ctr
);
649 upperredirect
= kstrdup(d
.redirect
, GFP_KERNEL
);
652 if (d
.redirect
[0] == '/')
655 upperopaque
= d
.opaque
;
658 if (!d
.stop
&& poe
->numlower
) {
660 stack
= kcalloc(ofs
->numlower
, sizeof(struct ovl_path
),
666 for (i
= 0; !d
.stop
&& i
< poe
->numlower
; i
++) {
667 struct ovl_path lower
= poe
->lowerstack
[i
];
669 d
.last
= i
== poe
->numlower
- 1;
670 err
= ovl_lookup_layer(lower
.dentry
, &d
, &this);
677 stack
[ctr
].dentry
= this;
678 stack
[ctr
].layer
= lower
.layer
;
685 * Following redirects can have security consequences: it's like
686 * a symlink into the lower layer without the permission checks.
687 * This is only a problem if the upper layer is untrusted (e.g
688 * comes from an USB drive). This can allow a non-readable file
689 * or directory to become readable.
691 * Only following redirects when redirects are enabled disables
692 * this attack vector when not necessary.
695 if (d
.redirect
&& !ofs
->config
.redirect_follow
) {
696 pr_warn_ratelimited("overlay: refusing to follow redirect for (%pd2)\n", dentry
);
700 if (d
.redirect
&& d
.redirect
[0] == '/' && poe
!= roe
) {
703 /* Find the current layer on the root dentry */
704 i
= ovl_find_layer(ofs
, &lower
);
705 if (WARN_ON(i
== ofs
->numlower
))
710 /* Lookup index by lower inode and verify it matches upper inode */
711 if (ctr
&& !d
.is_dir
&& ovl_indexdir(dentry
->d_sb
)) {
712 struct dentry
*origin
= stack
[0].dentry
;
714 index
= ovl_lookup_index(dentry
, upperdentry
, origin
);
716 err
= PTR_ERR(index
);
722 oe
= ovl_alloc_entry(ctr
);
727 oe
->opaque
= upperopaque
;
728 memcpy(oe
->lowerstack
, stack
, sizeof(struct ovl_path
) * ctr
);
729 dentry
->d_fsdata
= oe
;
732 ovl_dentry_set_upper_alias(dentry
);
734 upperdentry
= dget(index
);
736 if (upperdentry
|| ctr
) {
737 inode
= ovl_get_inode(dentry
, upperdentry
, index
);
738 err
= PTR_ERR(inode
);
742 OVL_I(inode
)->redirect
= upperredirect
;
744 ovl_set_flag(OVL_INDEX
, inode
);
747 revert_creds(old_cred
);
751 d_add(dentry
, inode
);
756 dentry
->d_fsdata
= NULL
;
760 for (i
= 0; i
< ctr
; i
++)
761 dput(stack
[i
].dentry
);
765 kfree(upperredirect
);
768 revert_creds(old_cred
);
772 bool ovl_lower_positive(struct dentry
*dentry
)
774 struct ovl_entry
*oe
= dentry
->d_fsdata
;
775 struct ovl_entry
*poe
= dentry
->d_parent
->d_fsdata
;
776 const struct qstr
*name
= &dentry
->d_name
;
778 bool positive
= false;
782 * If dentry is negative, then lower is positive iff this is a
785 if (!dentry
->d_inode
)
788 /* Negative upper -> positive lower */
789 if (!ovl_dentry_upper(dentry
))
792 /* Positive upper -> have to look up lower to see whether it exists */
793 for (i
= 0; !done
&& !positive
&& i
< poe
->numlower
; i
++) {
795 struct dentry
*lowerdir
= poe
->lowerstack
[i
].dentry
;
797 this = lookup_one_len_unlocked(name
->name
, lowerdir
,
800 switch (PTR_ERR(this)) {
807 * Assume something is there, we just couldn't
815 positive
= !ovl_is_whiteout(this);