Linux 6.14-rc1
[linux.git] / fs / overlayfs / namei.c
blobbe5c65d6f8484b1fba6b3fee379ba1d034c0df8a
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2011 Novell Inc.
4 * Copyright (C) 2016 Red Hat, Inc.
5 */
7 #include <linux/fs.h>
8 #include <linux/cred.h>
9 #include <linux/ctype.h>
10 #include <linux/namei.h>
11 #include <linux/xattr.h>
12 #include <linux/ratelimit.h>
13 #include <linux/mount.h>
14 #include <linux/exportfs.h>
15 #include "overlayfs.h"
17 struct ovl_lookup_data {
18 struct super_block *sb;
19 const struct ovl_layer *layer;
20 struct qstr name;
21 bool is_dir;
22 bool opaque;
23 bool xwhiteouts;
24 bool stop;
25 bool last;
26 char *redirect;
27 int metacopy;
28 /* Referring to last redirect xattr */
29 bool absolute_redirect;
32 static int ovl_check_redirect(const struct path *path, struct ovl_lookup_data *d,
33 size_t prelen, const char *post)
35 int res;
36 char *buf;
37 struct ovl_fs *ofs = OVL_FS(d->sb);
39 d->absolute_redirect = false;
40 buf = ovl_get_redirect_xattr(ofs, path, prelen + strlen(post));
41 if (IS_ERR_OR_NULL(buf))
42 return PTR_ERR(buf);
44 if (buf[0] == '/') {
45 d->absolute_redirect = true;
47 * One of the ancestor path elements in an absolute path
48 * lookup in ovl_lookup_layer() could have been opaque and
49 * that will stop further lookup in lower layers (d->stop=true)
50 * But we have found an absolute redirect in descendant path
51 * element and that should force continue lookup in lower
52 * layers (reset d->stop).
54 d->stop = false;
55 } else {
56 res = strlen(buf) + 1;
57 memmove(buf + prelen, buf, res);
58 memcpy(buf, d->name.name, prelen);
61 strcat(buf, post);
62 kfree(d->redirect);
63 d->redirect = buf;
64 d->name.name = d->redirect;
65 d->name.len = strlen(d->redirect);
67 return 0;
70 static int ovl_acceptable(void *ctx, struct dentry *dentry)
73 * A non-dir origin may be disconnected, which is fine, because
74 * we only need it for its unique inode number.
76 if (!d_is_dir(dentry))
77 return 1;
79 /* Don't decode a deleted empty directory */
80 if (d_unhashed(dentry))
81 return 0;
83 /* Check if directory belongs to the layer we are decoding from */
84 return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root);
88 * Check validity of an overlay file handle buffer.
90 * Return 0 for a valid file handle.
91 * Return -ENODATA for "origin unknown".
92 * Return <0 for an invalid file handle.
94 int ovl_check_fb_len(struct ovl_fb *fb, int fb_len)
96 if (fb_len < sizeof(struct ovl_fb) || fb_len < fb->len)
97 return -EINVAL;
99 if (fb->magic != OVL_FH_MAGIC)
100 return -EINVAL;
102 /* Treat larger version and unknown flags as "origin unknown" */
103 if (fb->version > OVL_FH_VERSION || fb->flags & ~OVL_FH_FLAG_ALL)
104 return -ENODATA;
106 /* Treat endianness mismatch as "origin unknown" */
107 if (!(fb->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
108 (fb->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
109 return -ENODATA;
111 return 0;
114 static struct ovl_fh *ovl_get_fh(struct ovl_fs *ofs, struct dentry *upperdentry,
115 enum ovl_xattr ox)
117 int res, err;
118 struct ovl_fh *fh = NULL;
120 res = ovl_getxattr_upper(ofs, upperdentry, ox, NULL, 0);
121 if (res < 0) {
122 if (res == -ENODATA || res == -EOPNOTSUPP)
123 return NULL;
124 goto fail;
126 /* Zero size value means "copied up but origin unknown" */
127 if (res == 0)
128 return NULL;
130 fh = kzalloc(res + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
131 if (!fh)
132 return ERR_PTR(-ENOMEM);
134 res = ovl_getxattr_upper(ofs, upperdentry, ox, fh->buf, res);
135 if (res < 0)
136 goto fail;
138 err = ovl_check_fb_len(&fh->fb, res);
139 if (err < 0) {
140 if (err == -ENODATA)
141 goto out;
142 goto invalid;
145 return fh;
147 out:
148 kfree(fh);
149 return NULL;
151 fail:
152 pr_warn_ratelimited("failed to get origin (%i)\n", res);
153 goto out;
154 invalid:
155 pr_warn_ratelimited("invalid origin (%*phN)\n", res, fh);
156 goto out;
159 struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
160 struct vfsmount *mnt, bool connected)
162 struct dentry *real;
163 int bytes;
165 if (!capable(CAP_DAC_READ_SEARCH))
166 return NULL;
169 * Make sure that the stored uuid matches the uuid of the lower
170 * layer where file handle will be decoded.
171 * In case of uuid=off option just make sure that stored uuid is null.
173 if (ovl_origin_uuid(ofs) ?
174 !uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid) :
175 !uuid_is_null(&fh->fb.uuid))
176 return NULL;
178 bytes = (fh->fb.len - offsetof(struct ovl_fb, fid));
179 real = exportfs_decode_fh(mnt, (struct fid *)fh->fb.fid,
180 bytes >> 2, (int)fh->fb.type,
181 connected ? ovl_acceptable : NULL, mnt);
182 if (IS_ERR(real)) {
184 * Treat stale file handle to lower file as "origin unknown".
185 * upper file handle could become stale when upper file is
186 * unlinked and this information is needed to handle stale
187 * index entries correctly.
189 if (real == ERR_PTR(-ESTALE) &&
190 !(fh->fb.flags & OVL_FH_FLAG_PATH_UPPER))
191 real = NULL;
192 return real;
195 if (ovl_dentry_weird(real)) {
196 dput(real);
197 return NULL;
200 return real;
203 static struct dentry *ovl_lookup_positive_unlocked(struct ovl_lookup_data *d,
204 const char *name,
205 struct dentry *base, int len,
206 bool drop_negative)
208 struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->layer->mnt), name,
209 base, len);
211 if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
212 if (drop_negative && ret->d_lockref.count == 1) {
213 spin_lock(&ret->d_lock);
214 /* Recheck condition under lock */
215 if (d_is_negative(ret) && ret->d_lockref.count == 1)
216 __d_drop(ret);
217 spin_unlock(&ret->d_lock);
219 dput(ret);
220 ret = ERR_PTR(-ENOENT);
222 return ret;
225 static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
226 const char *name, unsigned int namelen,
227 size_t prelen, const char *post,
228 struct dentry **ret, bool drop_negative)
230 struct ovl_fs *ofs = OVL_FS(d->sb);
231 struct dentry *this;
232 struct path path;
233 int err;
234 bool last_element = !post[0];
235 bool is_upper = d->layer->idx == 0;
236 char val;
238 this = ovl_lookup_positive_unlocked(d, name, base, namelen, drop_negative);
239 if (IS_ERR(this)) {
240 err = PTR_ERR(this);
241 this = NULL;
242 if (err == -ENOENT || err == -ENAMETOOLONG)
243 goto out;
244 goto out_err;
247 if (ovl_dentry_weird(this)) {
248 /* Don't support traversing automounts and other weirdness */
249 err = -EREMOTE;
250 goto out_err;
253 path.dentry = this;
254 path.mnt = d->layer->mnt;
255 if (ovl_path_is_whiteout(ofs, &path)) {
256 d->stop = d->opaque = true;
257 goto put_and_out;
260 * This dentry should be a regular file if previous layer lookup
261 * found a metacopy dentry.
263 if (last_element && d->metacopy && !d_is_reg(this)) {
264 d->stop = true;
265 goto put_and_out;
268 if (!d_can_lookup(this)) {
269 if (d->is_dir || !last_element) {
270 d->stop = true;
271 goto put_and_out;
273 err = ovl_check_metacopy_xattr(ofs, &path, NULL);
274 if (err < 0)
275 goto out_err;
277 d->metacopy = err;
278 d->stop = !d->metacopy;
279 if (!d->metacopy || d->last)
280 goto out;
281 } else {
282 if (ovl_lookup_trap_inode(d->sb, this)) {
283 /* Caught in a trap of overlapping layers */
284 err = -ELOOP;
285 goto out_err;
288 if (last_element)
289 d->is_dir = true;
290 if (d->last)
291 goto out;
293 /* overlay.opaque=x means xwhiteouts directory */
294 val = ovl_get_opaquedir_val(ofs, &path);
295 if (last_element && !is_upper && val == 'x') {
296 d->xwhiteouts = true;
297 ovl_layer_set_xwhiteouts(ofs, d->layer);
298 } else if (val == 'y') {
299 d->stop = true;
300 if (last_element)
301 d->opaque = true;
302 goto out;
305 err = ovl_check_redirect(&path, d, prelen, post);
306 if (err)
307 goto out_err;
308 out:
309 *ret = this;
310 return 0;
312 put_and_out:
313 dput(this);
314 this = NULL;
315 goto out;
317 out_err:
318 dput(this);
319 return err;
322 static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
323 struct dentry **ret, bool drop_negative)
325 /* Counting down from the end, since the prefix can change */
326 size_t rem = d->name.len - 1;
327 struct dentry *dentry = NULL;
328 int err;
330 if (d->name.name[0] != '/')
331 return ovl_lookup_single(base, d, d->name.name, d->name.len,
332 0, "", ret, drop_negative);
334 while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) {
335 const char *s = d->name.name + d->name.len - rem;
336 const char *next = strchrnul(s, '/');
337 size_t thislen = next - s;
338 bool end = !next[0];
340 /* Verify we did not go off the rails */
341 if (WARN_ON(s[-1] != '/'))
342 return -EIO;
344 err = ovl_lookup_single(base, d, s, thislen,
345 d->name.len - rem, next, &base,
346 drop_negative);
347 dput(dentry);
348 if (err)
349 return err;
350 dentry = base;
351 if (end)
352 break;
354 rem -= thislen + 1;
356 if (WARN_ON(rem >= d->name.len))
357 return -EIO;
359 *ret = dentry;
360 return 0;
363 static int ovl_lookup_data_layer(struct dentry *dentry, const char *redirect,
364 const struct ovl_layer *layer,
365 struct path *datapath)
367 int err;
369 err = vfs_path_lookup(layer->mnt->mnt_root, layer->mnt, redirect,
370 LOOKUP_BENEATH | LOOKUP_NO_SYMLINKS | LOOKUP_NO_XDEV,
371 datapath);
372 pr_debug("lookup lowerdata (%pd2, redirect=\"%s\", layer=%d, err=%i)\n",
373 dentry, redirect, layer->idx, err);
375 if (err)
376 return err;
378 err = -EREMOTE;
379 if (ovl_dentry_weird(datapath->dentry))
380 goto out_path_put;
382 err = -ENOENT;
383 /* Only regular file is acceptable as lower data */
384 if (!d_is_reg(datapath->dentry))
385 goto out_path_put;
387 return 0;
389 out_path_put:
390 path_put(datapath);
392 return err;
395 /* Lookup in data-only layers by absolute redirect to layer root */
396 static int ovl_lookup_data_layers(struct dentry *dentry, const char *redirect,
397 struct ovl_path *lowerdata)
399 struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
400 const struct ovl_layer *layer;
401 struct path datapath;
402 int err = -ENOENT;
403 int i;
405 layer = &ofs->layers[ofs->numlayer - ofs->numdatalayer];
406 for (i = 0; i < ofs->numdatalayer; i++, layer++) {
407 err = ovl_lookup_data_layer(dentry, redirect, layer, &datapath);
408 if (!err) {
409 mntput(datapath.mnt);
410 lowerdata->dentry = datapath.dentry;
411 lowerdata->layer = layer;
412 return 0;
416 return err;
419 int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
420 struct dentry *upperdentry, struct ovl_path **stackp)
422 struct dentry *origin = NULL;
423 int i;
425 for (i = 1; i <= ovl_numlowerlayer(ofs); i++) {
427 * If lower fs uuid is not unique among lower fs we cannot match
428 * fh->uuid to layer.
430 if (ofs->layers[i].fsid &&
431 ofs->layers[i].fs->bad_uuid)
432 continue;
434 origin = ovl_decode_real_fh(ofs, fh, ofs->layers[i].mnt,
435 connected);
436 if (origin)
437 break;
440 if (!origin)
441 return -ESTALE;
442 else if (IS_ERR(origin))
443 return PTR_ERR(origin);
445 if (upperdentry && !ovl_upper_is_whiteout(ofs, upperdentry) &&
446 inode_wrong_type(d_inode(upperdentry), d_inode(origin)->i_mode))
447 goto invalid;
449 if (!*stackp)
450 *stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL);
451 if (!*stackp) {
452 dput(origin);
453 return -ENOMEM;
455 **stackp = (struct ovl_path){
456 .dentry = origin,
457 .layer = &ofs->layers[i]
460 return 0;
462 invalid:
463 pr_warn_ratelimited("invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
464 upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
465 d_inode(origin)->i_mode & S_IFMT);
466 dput(origin);
467 return -ESTALE;
470 static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
471 struct ovl_path **stackp)
473 struct ovl_fh *fh = ovl_get_fh(ofs, upperdentry, OVL_XATTR_ORIGIN);
474 int err;
476 if (IS_ERR_OR_NULL(fh))
477 return PTR_ERR(fh);
479 err = ovl_check_origin_fh(ofs, fh, false, upperdentry, stackp);
480 kfree(fh);
482 if (err) {
483 if (err == -ESTALE)
484 return 0;
485 return err;
488 return 0;
492 * Verify that @fh matches the file handle stored in xattr @name.
493 * Return 0 on match, -ESTALE on mismatch, < 0 on error.
495 static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry,
496 enum ovl_xattr ox, const struct ovl_fh *fh)
498 struct ovl_fh *ofh = ovl_get_fh(ofs, dentry, ox);
499 int err = 0;
501 if (!ofh)
502 return -ENODATA;
504 if (IS_ERR(ofh))
505 return PTR_ERR(ofh);
507 if (fh->fb.len != ofh->fb.len || memcmp(&fh->fb, &ofh->fb, fh->fb.len))
508 err = -ESTALE;
510 kfree(ofh);
511 return err;
514 int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
515 enum ovl_xattr ox, const struct ovl_fh *fh,
516 bool is_upper, bool set)
518 int err;
520 err = ovl_verify_fh(ofs, dentry, ox, fh);
521 if (set && err == -ENODATA)
522 err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len);
524 return err;
528 * Verify that @real dentry matches the file handle stored in xattr @name.
530 * If @set is true and there is no stored file handle, encode @real and store
531 * file handle in xattr @name.
533 * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
535 int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry,
536 enum ovl_xattr ox, struct dentry *real,
537 bool is_upper, bool set)
539 struct inode *inode;
540 struct ovl_fh *fh;
541 int err;
543 fh = ovl_encode_real_fh(ofs, d_inode(real), is_upper);
544 err = PTR_ERR(fh);
545 if (IS_ERR(fh)) {
546 fh = NULL;
547 goto fail;
550 err = ovl_verify_set_fh(ofs, dentry, ox, fh, is_upper, set);
551 if (err)
552 goto fail;
554 out:
555 kfree(fh);
556 return err;
558 fail:
559 inode = d_inode(real);
560 pr_warn_ratelimited("failed to verify %s (%pd2, ino=%lu, err=%i)\n",
561 is_upper ? "upper" : "origin", real,
562 inode ? inode->i_ino : 0, err);
563 goto out;
567 /* Get upper dentry from index */
568 struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index,
569 bool connected)
571 struct ovl_fh *fh;
572 struct dentry *upper;
574 if (!d_is_dir(index))
575 return dget(index);
577 fh = ovl_get_fh(ofs, index, OVL_XATTR_UPPER);
578 if (IS_ERR_OR_NULL(fh))
579 return ERR_CAST(fh);
581 upper = ovl_decode_real_fh(ofs, fh, ovl_upper_mnt(ofs), connected);
582 kfree(fh);
584 if (IS_ERR_OR_NULL(upper))
585 return upper ?: ERR_PTR(-ESTALE);
587 if (!d_is_dir(upper)) {
588 pr_warn_ratelimited("invalid index upper (%pd2, upper=%pd2).\n",
589 index, upper);
590 dput(upper);
591 return ERR_PTR(-EIO);
594 return upper;
598 * Verify that an index entry name matches the origin file handle stored in
599 * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
600 * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
602 int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
604 struct ovl_fh *fh = NULL;
605 size_t len;
606 struct ovl_path origin = { };
607 struct ovl_path *stack = &origin;
608 struct dentry *upper = NULL;
609 int err;
611 if (!d_inode(index))
612 return 0;
614 err = -EINVAL;
615 if (index->d_name.len < sizeof(struct ovl_fb)*2)
616 goto fail;
618 err = -ENOMEM;
619 len = index->d_name.len / 2;
620 fh = kzalloc(len + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
621 if (!fh)
622 goto fail;
624 err = -EINVAL;
625 if (hex2bin(fh->buf, index->d_name.name, len))
626 goto fail;
628 err = ovl_check_fb_len(&fh->fb, len);
629 if (err)
630 goto fail;
633 * Whiteout index entries are used as an indication that an exported
634 * overlay file handle should be treated as stale (i.e. after unlink
635 * of the overlay inode). These entries contain no origin xattr.
637 if (ovl_is_whiteout(index))
638 goto out;
641 * Verifying directory index entries are not stale is expensive, so
642 * only verify stale dir index if NFS export is enabled.
644 if (d_is_dir(index) && !ofs->config.nfs_export)
645 goto out;
648 * Directory index entries should have 'upper' xattr pointing to the
649 * real upper dir. Non-dir index entries are hardlinks to the upper
650 * real inode. For non-dir index, we can read the copy up origin xattr
651 * directly from the index dentry, but for dir index we first need to
652 * decode the upper directory.
654 upper = ovl_index_upper(ofs, index, false);
655 if (IS_ERR_OR_NULL(upper)) {
656 err = PTR_ERR(upper);
658 * Directory index entries with no 'upper' xattr need to be
659 * removed. When dir index entry has a stale 'upper' xattr,
660 * we assume that upper dir was removed and we treat the dir
661 * index as orphan entry that needs to be whited out.
663 if (err == -ESTALE)
664 goto orphan;
665 else if (!err)
666 err = -ESTALE;
667 goto fail;
670 err = ovl_verify_fh(ofs, upper, OVL_XATTR_ORIGIN, fh);
671 dput(upper);
672 if (err)
673 goto fail;
675 /* Check if non-dir index is orphan and don't warn before cleaning it */
676 if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) {
677 err = ovl_check_origin_fh(ofs, fh, false, index, &stack);
678 if (err)
679 goto fail;
681 if (ovl_get_nlink(ofs, origin.dentry, index, 0) == 0)
682 goto orphan;
685 out:
686 dput(origin.dentry);
687 kfree(fh);
688 return err;
690 fail:
691 pr_warn_ratelimited("failed to verify index (%pd2, ftype=%x, err=%i)\n",
692 index, d_inode(index)->i_mode & S_IFMT, err);
693 goto out;
695 orphan:
696 pr_warn_ratelimited("orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
697 index, d_inode(index)->i_mode & S_IFMT,
698 d_inode(index)->i_nlink);
699 err = -ENOENT;
700 goto out;
703 int ovl_get_index_name_fh(const struct ovl_fh *fh, struct qstr *name)
705 char *n, *s;
707 n = kcalloc(fh->fb.len, 2, GFP_KERNEL);
708 if (!n)
709 return -ENOMEM;
711 s = bin2hex(n, fh->buf, fh->fb.len);
712 *name = (struct qstr) QSTR_INIT(n, s - n);
714 return 0;
719 * Lookup in indexdir for the index entry of a lower real inode or a copy up
720 * origin inode. The index entry name is the hex representation of the lower
721 * inode file handle.
723 * If the index dentry in negative, then either no lower aliases have been
724 * copied up yet, or aliases have been copied up in older kernels and are
725 * not indexed.
727 * If the index dentry for a copy up origin inode is positive, but points
728 * to an inode different than the upper inode, then either the upper inode
729 * has been copied up and not indexed or it was indexed, but since then
730 * index dir was cleared. Either way, that index cannot be used to identify
731 * the overlay inode.
733 int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
734 struct qstr *name)
736 struct ovl_fh *fh;
737 int err;
739 fh = ovl_encode_real_fh(ofs, d_inode(origin), false);
740 if (IS_ERR(fh))
741 return PTR_ERR(fh);
743 err = ovl_get_index_name_fh(fh, name);
745 kfree(fh);
746 return err;
749 /* Lookup index by file handle for NFS export */
750 struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
752 struct dentry *index;
753 struct qstr name;
754 int err;
756 err = ovl_get_index_name_fh(fh, &name);
757 if (err)
758 return ERR_PTR(err);
760 index = lookup_positive_unlocked(name.name, ofs->workdir, name.len);
761 kfree(name.name);
762 if (IS_ERR(index)) {
763 if (PTR_ERR(index) == -ENOENT)
764 index = NULL;
765 return index;
768 if (ovl_is_whiteout(index))
769 err = -ESTALE;
770 else if (ovl_dentry_weird(index))
771 err = -EIO;
772 else
773 return index;
775 dput(index);
776 return ERR_PTR(err);
779 struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
780 struct dentry *origin, bool verify)
782 struct dentry *index;
783 struct inode *inode;
784 struct qstr name;
785 bool is_dir = d_is_dir(origin);
786 int err;
788 err = ovl_get_index_name(ofs, origin, &name);
789 if (err)
790 return ERR_PTR(err);
792 index = lookup_one_positive_unlocked(ovl_upper_mnt_idmap(ofs), name.name,
793 ofs->workdir, name.len);
794 if (IS_ERR(index)) {
795 err = PTR_ERR(index);
796 if (err == -ENOENT) {
797 index = NULL;
798 goto out;
800 pr_warn_ratelimited("failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
801 "overlayfs: mount with '-o index=off' to disable inodes index.\n",
802 d_inode(origin)->i_ino, name.len, name.name,
803 err);
804 goto out;
807 inode = d_inode(index);
808 if (ovl_is_whiteout(index) && !verify) {
810 * When index lookup is called with !verify for decoding an
811 * overlay file handle, a whiteout index implies that decode
812 * should treat file handle as stale and no need to print a
813 * warning about it.
815 dput(index);
816 index = ERR_PTR(-ESTALE);
817 goto out;
818 } else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
819 inode_wrong_type(inode, d_inode(origin)->i_mode)) {
821 * Index should always be of the same file type as origin
822 * except for the case of a whiteout index. A whiteout
823 * index should only exist if all lower aliases have been
824 * unlinked, which means that finding a lower origin on lookup
825 * whose index is a whiteout should be treated as an error.
827 pr_warn_ratelimited("bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
828 index, d_inode(index)->i_mode & S_IFMT,
829 d_inode(origin)->i_mode & S_IFMT);
830 goto fail;
831 } else if (is_dir && verify) {
832 if (!upper) {
833 pr_warn_ratelimited("suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
834 origin, index);
835 goto fail;
838 /* Verify that dir index 'upper' xattr points to upper dir */
839 err = ovl_verify_upper(ofs, index, upper, false);
840 if (err) {
841 if (err == -ESTALE) {
842 pr_warn_ratelimited("suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
843 upper, origin, index);
845 goto fail;
847 } else if (upper && d_inode(upper) != inode) {
848 goto out_dput;
850 out:
851 kfree(name.name);
852 return index;
854 out_dput:
855 dput(index);
856 index = NULL;
857 goto out;
859 fail:
860 dput(index);
861 index = ERR_PTR(-EIO);
862 goto out;
866 * Returns next layer in stack starting from top.
867 * Returns -1 if this is the last layer.
869 int ovl_path_next(int idx, struct dentry *dentry, struct path *path,
870 const struct ovl_layer **layer)
872 struct ovl_entry *oe = OVL_E(dentry);
873 struct ovl_path *lowerstack = ovl_lowerstack(oe);
875 BUG_ON(idx < 0);
876 if (idx == 0) {
877 ovl_path_upper(dentry, path);
878 if (path->dentry) {
879 *layer = &OVL_FS(dentry->d_sb)->layers[0];
880 return ovl_numlower(oe) ? 1 : -1;
882 idx++;
884 BUG_ON(idx > ovl_numlower(oe));
885 path->dentry = lowerstack[idx - 1].dentry;
886 *layer = lowerstack[idx - 1].layer;
887 path->mnt = (*layer)->mnt;
889 return (idx < ovl_numlower(oe)) ? idx + 1 : -1;
892 /* Fix missing 'origin' xattr */
893 static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry,
894 struct dentry *lower, struct dentry *upper)
896 const struct ovl_fh *fh;
897 int err;
899 if (ovl_check_origin_xattr(ofs, upper))
900 return 0;
902 fh = ovl_get_origin_fh(ofs, lower);
903 if (IS_ERR(fh))
904 return PTR_ERR(fh);
906 err = ovl_want_write(dentry);
907 if (err)
908 goto out;
910 err = ovl_set_origin_fh(ofs, fh, upper);
911 if (!err)
912 err = ovl_set_impure(dentry->d_parent, upper->d_parent);
914 ovl_drop_write(dentry);
915 out:
916 kfree(fh);
917 return err;
920 static int ovl_maybe_validate_verity(struct dentry *dentry)
922 struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
923 struct inode *inode = d_inode(dentry);
924 struct path datapath, metapath;
925 int err;
927 if (!ofs->config.verity_mode ||
928 !ovl_is_metacopy_dentry(dentry) ||
929 ovl_test_flag(OVL_VERIFIED_DIGEST, inode))
930 return 0;
932 if (!ovl_test_flag(OVL_HAS_DIGEST, inode)) {
933 if (ofs->config.verity_mode == OVL_VERITY_REQUIRE) {
934 pr_warn_ratelimited("metacopy file '%pd' has no digest specified\n",
935 dentry);
936 return -EIO;
938 return 0;
941 ovl_path_lowerdata(dentry, &datapath);
942 if (!datapath.dentry)
943 return -EIO;
945 ovl_path_real(dentry, &metapath);
946 if (!metapath.dentry)
947 return -EIO;
949 err = ovl_inode_lock_interruptible(inode);
950 if (err)
951 return err;
953 if (!ovl_test_flag(OVL_VERIFIED_DIGEST, inode)) {
954 const struct cred *old_cred;
956 old_cred = ovl_override_creds(dentry->d_sb);
958 err = ovl_validate_verity(ofs, &metapath, &datapath);
959 if (err == 0)
960 ovl_set_flag(OVL_VERIFIED_DIGEST, inode);
962 ovl_revert_creds(old_cred);
965 ovl_inode_unlock(inode);
967 return err;
970 /* Lazy lookup of lowerdata */
971 static int ovl_maybe_lookup_lowerdata(struct dentry *dentry)
973 struct inode *inode = d_inode(dentry);
974 const char *redirect = ovl_lowerdata_redirect(inode);
975 struct ovl_path datapath = {};
976 const struct cred *old_cred;
977 int err;
979 if (!redirect || ovl_dentry_lowerdata(dentry))
980 return 0;
982 if (redirect[0] != '/')
983 return -EIO;
985 err = ovl_inode_lock_interruptible(inode);
986 if (err)
987 return err;
989 err = 0;
990 /* Someone got here before us? */
991 if (ovl_dentry_lowerdata(dentry))
992 goto out;
994 old_cred = ovl_override_creds(dentry->d_sb);
995 err = ovl_lookup_data_layers(dentry, redirect, &datapath);
996 ovl_revert_creds(old_cred);
997 if (err)
998 goto out_err;
1000 err = ovl_dentry_set_lowerdata(dentry, &datapath);
1001 if (err)
1002 goto out_err;
1004 out:
1005 ovl_inode_unlock(inode);
1006 dput(datapath.dentry);
1008 return err;
1010 out_err:
1011 pr_warn_ratelimited("lazy lowerdata lookup failed (%pd2, err=%i)\n",
1012 dentry, err);
1013 goto out;
1016 int ovl_verify_lowerdata(struct dentry *dentry)
1018 int err;
1020 err = ovl_maybe_lookup_lowerdata(dentry);
1021 if (err)
1022 return err;
1024 return ovl_maybe_validate_verity(dentry);
1027 struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
1028 unsigned int flags)
1030 struct ovl_entry *oe = NULL;
1031 const struct cred *old_cred;
1032 struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
1033 struct ovl_entry *poe = OVL_E(dentry->d_parent);
1034 struct ovl_entry *roe = OVL_E(dentry->d_sb->s_root);
1035 struct ovl_path *stack = NULL, *origin_path = NULL;
1036 struct dentry *upperdir, *upperdentry = NULL;
1037 struct dentry *origin = NULL;
1038 struct dentry *index = NULL;
1039 unsigned int ctr = 0;
1040 struct inode *inode = NULL;
1041 bool upperopaque = false;
1042 char *upperredirect = NULL;
1043 struct dentry *this;
1044 unsigned int i;
1045 int err;
1046 bool uppermetacopy = false;
1047 int metacopy_size = 0;
1048 struct ovl_lookup_data d = {
1049 .sb = dentry->d_sb,
1050 .name = dentry->d_name,
1051 .is_dir = false,
1052 .opaque = false,
1053 .stop = false,
1054 .last = ovl_redirect_follow(ofs) ? false : !ovl_numlower(poe),
1055 .redirect = NULL,
1056 .metacopy = 0,
1059 if (dentry->d_name.len > ofs->namelen)
1060 return ERR_PTR(-ENAMETOOLONG);
1062 old_cred = ovl_override_creds(dentry->d_sb);
1063 upperdir = ovl_dentry_upper(dentry->d_parent);
1064 if (upperdir) {
1065 d.layer = &ofs->layers[0];
1066 err = ovl_lookup_layer(upperdir, &d, &upperdentry, true);
1067 if (err)
1068 goto out;
1070 if (upperdentry && upperdentry->d_flags & DCACHE_OP_REAL) {
1071 dput(upperdentry);
1072 err = -EREMOTE;
1073 goto out;
1075 if (upperdentry && !d.is_dir) {
1077 * Lookup copy up origin by decoding origin file handle.
1078 * We may get a disconnected dentry, which is fine,
1079 * because we only need to hold the origin inode in
1080 * cache and use its inode number. We may even get a
1081 * connected dentry, that is not under any of the lower
1082 * layers root. That is also fine for using it's inode
1083 * number - it's the same as if we held a reference
1084 * to a dentry in lower layer that was moved under us.
1086 err = ovl_check_origin(ofs, upperdentry, &origin_path);
1087 if (err)
1088 goto out_put_upper;
1090 if (d.metacopy)
1091 uppermetacopy = true;
1092 metacopy_size = d.metacopy;
1095 if (d.redirect) {
1096 err = -ENOMEM;
1097 upperredirect = kstrdup(d.redirect, GFP_KERNEL);
1098 if (!upperredirect)
1099 goto out_put_upper;
1100 if (d.redirect[0] == '/')
1101 poe = roe;
1103 upperopaque = d.opaque;
1106 if (!d.stop && ovl_numlower(poe)) {
1107 err = -ENOMEM;
1108 stack = ovl_stack_alloc(ofs->numlayer - 1);
1109 if (!stack)
1110 goto out_put_upper;
1113 for (i = 0; !d.stop && i < ovl_numlower(poe); i++) {
1114 struct ovl_path lower = ovl_lowerstack(poe)[i];
1116 if (!ovl_redirect_follow(ofs))
1117 d.last = i == ovl_numlower(poe) - 1;
1118 else if (d.is_dir || !ofs->numdatalayer)
1119 d.last = lower.layer->idx == ovl_numlower(roe);
1121 d.layer = lower.layer;
1122 err = ovl_lookup_layer(lower.dentry, &d, &this, false);
1123 if (err)
1124 goto out_put;
1126 if (!this)
1127 continue;
1129 if ((uppermetacopy || d.metacopy) && !ofs->config.metacopy) {
1130 dput(this);
1131 err = -EPERM;
1132 pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", dentry);
1133 goto out_put;
1137 * If no origin fh is stored in upper of a merge dir, store fh
1138 * of lower dir and set upper parent "impure".
1140 if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) {
1141 err = ovl_fix_origin(ofs, dentry, this, upperdentry);
1142 if (err) {
1143 dput(this);
1144 goto out_put;
1149 * When "verify_lower" feature is enabled, do not merge with a
1150 * lower dir that does not match a stored origin xattr. In any
1151 * case, only verified origin is used for index lookup.
1153 * For non-dir dentry, if index=on, then ensure origin
1154 * matches the dentry found using path based lookup,
1155 * otherwise error out.
1157 if (upperdentry && !ctr &&
1158 ((d.is_dir && ovl_verify_lower(dentry->d_sb)) ||
1159 (!d.is_dir && ofs->config.index && origin_path))) {
1160 err = ovl_verify_origin(ofs, upperdentry, this, false);
1161 if (err) {
1162 dput(this);
1163 if (d.is_dir)
1164 break;
1165 goto out_put;
1167 origin = this;
1170 if (!upperdentry && !d.is_dir && !ctr && d.metacopy)
1171 metacopy_size = d.metacopy;
1173 if (d.metacopy && ctr) {
1175 * Do not store intermediate metacopy dentries in
1176 * lower chain, except top most lower metacopy dentry.
1177 * Continue the loop so that if there is an absolute
1178 * redirect on this dentry, poe can be reset to roe.
1180 dput(this);
1181 this = NULL;
1182 } else {
1183 stack[ctr].dentry = this;
1184 stack[ctr].layer = lower.layer;
1185 ctr++;
1189 * Following redirects can have security consequences: it's like
1190 * a symlink into the lower layer without the permission checks.
1191 * This is only a problem if the upper layer is untrusted (e.g
1192 * comes from an USB drive). This can allow a non-readable file
1193 * or directory to become readable.
1195 * Only following redirects when redirects are enabled disables
1196 * this attack vector when not necessary.
1198 err = -EPERM;
1199 if (d.redirect && !ovl_redirect_follow(ofs)) {
1200 pr_warn_ratelimited("refusing to follow redirect for (%pd2)\n",
1201 dentry);
1202 goto out_put;
1205 if (d.stop)
1206 break;
1208 if (d.redirect && d.redirect[0] == '/' && poe != roe) {
1209 poe = roe;
1210 /* Find the current layer on the root dentry */
1211 i = lower.layer->idx - 1;
1215 /* Defer lookup of lowerdata in data-only layers to first access */
1216 if (d.metacopy && ctr && ofs->numdatalayer && d.absolute_redirect) {
1217 d.metacopy = 0;
1218 ctr++;
1222 * For regular non-metacopy upper dentries, there is no lower
1223 * path based lookup, hence ctr will be zero. If a dentry is found
1224 * using ORIGIN xattr on upper, install it in stack.
1226 * For metacopy dentry, path based lookup will find lower dentries.
1227 * Just make sure a corresponding data dentry has been found.
1229 if (d.metacopy || (uppermetacopy && !ctr)) {
1230 pr_warn_ratelimited("metacopy with no lower data found - abort lookup (%pd2)\n",
1231 dentry);
1232 err = -EIO;
1233 goto out_put;
1234 } else if (!d.is_dir && upperdentry && !ctr && origin_path) {
1235 if (WARN_ON(stack != NULL)) {
1236 err = -EIO;
1237 goto out_put;
1239 stack = origin_path;
1240 ctr = 1;
1241 origin = origin_path->dentry;
1242 origin_path = NULL;
1246 * Always lookup index if there is no-upperdentry.
1248 * For the case of upperdentry, we have set origin by now if it
1249 * needed to be set. There are basically three cases.
1251 * For directories, lookup index by lower inode and verify it matches
1252 * upper inode. We only trust dir index if we verified that lower dir
1253 * matches origin, otherwise dir index entries may be inconsistent
1254 * and we ignore them.
1256 * For regular upper, we already set origin if upper had ORIGIN
1257 * xattr. There is no verification though as there is no path
1258 * based dentry lookup in lower in this case.
1260 * For metacopy upper, we set a verified origin already if index
1261 * is enabled and if upper had an ORIGIN xattr.
1264 if (!upperdentry && ctr)
1265 origin = stack[0].dentry;
1267 if (origin && ovl_indexdir(dentry->d_sb) &&
1268 (!d.is_dir || ovl_index_all(dentry->d_sb))) {
1269 index = ovl_lookup_index(ofs, upperdentry, origin, true);
1270 if (IS_ERR(index)) {
1271 err = PTR_ERR(index);
1272 index = NULL;
1273 goto out_put;
1277 if (ctr) {
1278 oe = ovl_alloc_entry(ctr);
1279 err = -ENOMEM;
1280 if (!oe)
1281 goto out_put;
1283 ovl_stack_cpy(ovl_lowerstack(oe), stack, ctr);
1286 if (upperopaque)
1287 ovl_dentry_set_opaque(dentry);
1288 if (d.xwhiteouts)
1289 ovl_dentry_set_xwhiteouts(dentry);
1291 if (upperdentry)
1292 ovl_dentry_set_upper_alias(dentry);
1293 else if (index) {
1294 struct path upperpath = {
1295 .dentry = upperdentry = dget(index),
1296 .mnt = ovl_upper_mnt(ofs),
1300 * It's safe to assign upperredirect here: the previous
1301 * assignment of happens only if upperdentry is non-NULL, and
1302 * this one only if upperdentry is NULL.
1304 upperredirect = ovl_get_redirect_xattr(ofs, &upperpath, 0);
1305 if (IS_ERR(upperredirect)) {
1306 err = PTR_ERR(upperredirect);
1307 upperredirect = NULL;
1308 goto out_free_oe;
1310 err = ovl_check_metacopy_xattr(ofs, &upperpath, NULL);
1311 if (err < 0)
1312 goto out_free_oe;
1313 uppermetacopy = err;
1314 metacopy_size = err;
1317 if (upperdentry || ctr) {
1318 struct ovl_inode_params oip = {
1319 .upperdentry = upperdentry,
1320 .oe = oe,
1321 .index = index,
1322 .redirect = upperredirect,
1325 /* Store lowerdata redirect for lazy lookup */
1326 if (ctr > 1 && !d.is_dir && !stack[ctr - 1].dentry) {
1327 oip.lowerdata_redirect = d.redirect;
1328 d.redirect = NULL;
1330 inode = ovl_get_inode(dentry->d_sb, &oip);
1331 err = PTR_ERR(inode);
1332 if (IS_ERR(inode))
1333 goto out_free_oe;
1334 if (upperdentry && !uppermetacopy)
1335 ovl_set_flag(OVL_UPPERDATA, inode);
1337 if (metacopy_size > OVL_METACOPY_MIN_SIZE)
1338 ovl_set_flag(OVL_HAS_DIGEST, inode);
1341 ovl_dentry_init_reval(dentry, upperdentry, OVL_I_E(inode));
1343 ovl_revert_creds(old_cred);
1344 if (origin_path) {
1345 dput(origin_path->dentry);
1346 kfree(origin_path);
1348 dput(index);
1349 ovl_stack_free(stack, ctr);
1350 kfree(d.redirect);
1351 return d_splice_alias(inode, dentry);
1353 out_free_oe:
1354 ovl_free_entry(oe);
1355 out_put:
1356 dput(index);
1357 ovl_stack_free(stack, ctr);
1358 out_put_upper:
1359 if (origin_path) {
1360 dput(origin_path->dentry);
1361 kfree(origin_path);
1363 dput(upperdentry);
1364 kfree(upperredirect);
1365 out:
1366 kfree(d.redirect);
1367 ovl_revert_creds(old_cred);
1368 return ERR_PTR(err);
1371 bool ovl_lower_positive(struct dentry *dentry)
1373 struct ovl_entry *poe = OVL_E(dentry->d_parent);
1374 const struct qstr *name = &dentry->d_name;
1375 const struct cred *old_cred;
1376 unsigned int i;
1377 bool positive = false;
1378 bool done = false;
1381 * If dentry is negative, then lower is positive iff this is a
1382 * whiteout.
1384 if (!dentry->d_inode)
1385 return ovl_dentry_is_opaque(dentry);
1387 /* Negative upper -> positive lower */
1388 if (!ovl_dentry_upper(dentry))
1389 return true;
1391 old_cred = ovl_override_creds(dentry->d_sb);
1392 /* Positive upper -> have to look up lower to see whether it exists */
1393 for (i = 0; !done && !positive && i < ovl_numlower(poe); i++) {
1394 struct dentry *this;
1395 struct ovl_path *parentpath = &ovl_lowerstack(poe)[i];
1397 this = lookup_one_positive_unlocked(
1398 mnt_idmap(parentpath->layer->mnt),
1399 name->name, parentpath->dentry, name->len);
1400 if (IS_ERR(this)) {
1401 switch (PTR_ERR(this)) {
1402 case -ENOENT:
1403 case -ENAMETOOLONG:
1404 break;
1406 default:
1408 * Assume something is there, we just couldn't
1409 * access it.
1411 positive = true;
1412 break;
1414 } else {
1415 struct path path = {
1416 .dentry = this,
1417 .mnt = parentpath->layer->mnt,
1419 positive = !ovl_path_is_whiteout(OVL_FS(dentry->d_sb), &path);
1420 done = true;
1421 dput(this);
1424 ovl_revert_creds(old_cred);
1426 return positive;