1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2011 Novell Inc.
4 * Copyright (C) 2016 Red Hat, Inc.
8 #include <linux/mount.h>
9 #include <linux/slab.h>
10 #include <linux/cred.h>
11 #include <linux/xattr.h>
12 #include <linux/exportfs.h>
13 #include <linux/file.h>
14 #include <linux/fileattr.h>
15 #include <linux/uuid.h>
16 #include <linux/namei.h>
17 #include <linux/ratelimit.h>
18 #include "overlayfs.h"
20 /* Get write access to upper mnt - may fail if upper sb was remounted ro */
21 int ovl_get_write_access(struct dentry
*dentry
)
23 struct ovl_fs
*ofs
= OVL_FS(dentry
->d_sb
);
24 return mnt_get_write_access(ovl_upper_mnt(ofs
));
27 /* Get write access to upper sb - may block if upper sb is frozen */
28 void ovl_start_write(struct dentry
*dentry
)
30 struct ovl_fs
*ofs
= OVL_FS(dentry
->d_sb
);
31 sb_start_write(ovl_upper_mnt(ofs
)->mnt_sb
);
34 int ovl_want_write(struct dentry
*dentry
)
36 struct ovl_fs
*ofs
= OVL_FS(dentry
->d_sb
);
37 return mnt_want_write(ovl_upper_mnt(ofs
));
40 void ovl_put_write_access(struct dentry
*dentry
)
42 struct ovl_fs
*ofs
= OVL_FS(dentry
->d_sb
);
43 mnt_put_write_access(ovl_upper_mnt(ofs
));
46 void ovl_end_write(struct dentry
*dentry
)
48 struct ovl_fs
*ofs
= OVL_FS(dentry
->d_sb
);
49 sb_end_write(ovl_upper_mnt(ofs
)->mnt_sb
);
52 void ovl_drop_write(struct dentry
*dentry
)
54 struct ovl_fs
*ofs
= OVL_FS(dentry
->d_sb
);
55 mnt_drop_write(ovl_upper_mnt(ofs
));
58 struct dentry
*ovl_workdir(struct dentry
*dentry
)
60 struct ovl_fs
*ofs
= OVL_FS(dentry
->d_sb
);
64 const struct cred
*ovl_override_creds(struct super_block
*sb
)
66 struct ovl_fs
*ofs
= OVL_FS(sb
);
68 return override_creds_light(ofs
->creator_cred
);
71 void ovl_revert_creds(const struct cred
*old_cred
)
73 revert_creds_light(old_cred
);
77 * Check if underlying fs supports file handles and try to determine encoding
78 * type, in order to deduce maximum inode number used by fs.
80 * Return 0 if file handles are not supported.
81 * Return 1 (FILEID_INO32_GEN) if fs uses the default 32bit inode encoding.
82 * Return -1 if fs uses a non default encoding with unknown inode size.
84 int ovl_can_decode_fh(struct super_block
*sb
)
86 if (!capable(CAP_DAC_READ_SEARCH
))
89 if (!exportfs_can_decode_fh(sb
->s_export_op
))
92 return sb
->s_export_op
->encode_fh
? -1 : FILEID_INO32_GEN
;
95 struct dentry
*ovl_indexdir(struct super_block
*sb
)
97 struct ovl_fs
*ofs
= OVL_FS(sb
);
99 return ofs
->config
.index
? ofs
->workdir
: NULL
;
102 /* Index all files on copy up. For now only enabled for NFS export */
103 bool ovl_index_all(struct super_block
*sb
)
105 struct ovl_fs
*ofs
= OVL_FS(sb
);
107 return ofs
->config
.nfs_export
&& ofs
->config
.index
;
110 /* Verify lower origin on lookup. For now only enabled for NFS export */
111 bool ovl_verify_lower(struct super_block
*sb
)
113 struct ovl_fs
*ofs
= OVL_FS(sb
);
115 return ofs
->config
.nfs_export
&& ofs
->config
.index
;
118 struct ovl_path
*ovl_stack_alloc(unsigned int n
)
120 return kcalloc(n
, sizeof(struct ovl_path
), GFP_KERNEL
);
123 void ovl_stack_cpy(struct ovl_path
*dst
, struct ovl_path
*src
, unsigned int n
)
127 memcpy(dst
, src
, sizeof(struct ovl_path
) * n
);
128 for (i
= 0; i
< n
; i
++)
132 void ovl_stack_put(struct ovl_path
*stack
, unsigned int n
)
136 for (i
= 0; stack
&& i
< n
; i
++)
137 dput(stack
[i
].dentry
);
140 void ovl_stack_free(struct ovl_path
*stack
, unsigned int n
)
142 ovl_stack_put(stack
, n
);
146 struct ovl_entry
*ovl_alloc_entry(unsigned int numlower
)
148 size_t size
= offsetof(struct ovl_entry
, __lowerstack
[numlower
]);
149 struct ovl_entry
*oe
= kzalloc(size
, GFP_KERNEL
);
152 oe
->__numlower
= numlower
;
157 void ovl_free_entry(struct ovl_entry
*oe
)
159 ovl_stack_put(ovl_lowerstack(oe
), ovl_numlower(oe
));
163 #define OVL_D_REVALIDATE (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE)
165 bool ovl_dentry_remote(struct dentry
*dentry
)
167 return dentry
->d_flags
& OVL_D_REVALIDATE
;
170 void ovl_dentry_update_reval(struct dentry
*dentry
, struct dentry
*realdentry
)
172 if (!ovl_dentry_remote(realdentry
))
175 spin_lock(&dentry
->d_lock
);
176 dentry
->d_flags
|= realdentry
->d_flags
& OVL_D_REVALIDATE
;
177 spin_unlock(&dentry
->d_lock
);
180 void ovl_dentry_init_reval(struct dentry
*dentry
, struct dentry
*upperdentry
,
181 struct ovl_entry
*oe
)
183 return ovl_dentry_init_flags(dentry
, upperdentry
, oe
, OVL_D_REVALIDATE
);
186 void ovl_dentry_init_flags(struct dentry
*dentry
, struct dentry
*upperdentry
,
187 struct ovl_entry
*oe
, unsigned int mask
)
189 struct ovl_path
*lowerstack
= ovl_lowerstack(oe
);
190 unsigned int i
, flags
= 0;
193 flags
|= upperdentry
->d_flags
;
194 for (i
= 0; i
< ovl_numlower(oe
) && lowerstack
[i
].dentry
; i
++)
195 flags
|= lowerstack
[i
].dentry
->d_flags
;
197 spin_lock(&dentry
->d_lock
);
198 dentry
->d_flags
&= ~mask
;
199 dentry
->d_flags
|= flags
& mask
;
200 spin_unlock(&dentry
->d_lock
);
203 bool ovl_dentry_weird(struct dentry
*dentry
)
205 if (!d_can_lookup(dentry
) && !d_is_file(dentry
) && !d_is_symlink(dentry
))
208 return dentry
->d_flags
& (DCACHE_NEED_AUTOMOUNT
|
209 DCACHE_MANAGE_TRANSIT
|
214 enum ovl_path_type
ovl_path_type(struct dentry
*dentry
)
216 struct ovl_entry
*oe
= OVL_E(dentry
);
217 enum ovl_path_type type
= 0;
219 if (ovl_dentry_upper(dentry
)) {
220 type
= __OVL_PATH_UPPER
;
223 * Non-dir dentry can hold lower dentry of its copy up origin.
225 if (ovl_numlower(oe
)) {
226 if (ovl_test_flag(OVL_CONST_INO
, d_inode(dentry
)))
227 type
|= __OVL_PATH_ORIGIN
;
228 if (d_is_dir(dentry
) ||
229 !ovl_has_upperdata(d_inode(dentry
)))
230 type
|= __OVL_PATH_MERGE
;
233 if (ovl_numlower(oe
) > 1)
234 type
|= __OVL_PATH_MERGE
;
239 void ovl_path_upper(struct dentry
*dentry
, struct path
*path
)
241 struct ovl_fs
*ofs
= OVL_FS(dentry
->d_sb
);
243 path
->mnt
= ovl_upper_mnt(ofs
);
244 path
->dentry
= ovl_dentry_upper(dentry
);
247 void ovl_path_lower(struct dentry
*dentry
, struct path
*path
)
249 struct ovl_entry
*oe
= OVL_E(dentry
);
250 struct ovl_path
*lowerpath
= ovl_lowerstack(oe
);
252 if (ovl_numlower(oe
)) {
253 path
->mnt
= lowerpath
->layer
->mnt
;
254 path
->dentry
= lowerpath
->dentry
;
256 *path
= (struct path
) { };
260 void ovl_path_lowerdata(struct dentry
*dentry
, struct path
*path
)
262 struct ovl_entry
*oe
= OVL_E(dentry
);
263 struct ovl_path
*lowerdata
= ovl_lowerdata(oe
);
264 struct dentry
*lowerdata_dentry
= ovl_lowerdata_dentry(oe
);
266 if (lowerdata_dentry
) {
267 path
->dentry
= lowerdata_dentry
;
269 * Pairs with smp_wmb() in ovl_dentry_set_lowerdata().
270 * Make sure that if lowerdata->dentry is visible, then
271 * datapath->layer is visible as well.
274 path
->mnt
= READ_ONCE(lowerdata
->layer
)->mnt
;
276 *path
= (struct path
) { };
280 enum ovl_path_type
ovl_path_real(struct dentry
*dentry
, struct path
*path
)
282 enum ovl_path_type type
= ovl_path_type(dentry
);
284 if (!OVL_TYPE_UPPER(type
))
285 ovl_path_lower(dentry
, path
);
287 ovl_path_upper(dentry
, path
);
292 enum ovl_path_type
ovl_path_realdata(struct dentry
*dentry
, struct path
*path
)
294 enum ovl_path_type type
= ovl_path_type(dentry
);
296 WARN_ON_ONCE(d_is_dir(dentry
));
298 if (!OVL_TYPE_UPPER(type
) || OVL_TYPE_MERGE(type
))
299 ovl_path_lowerdata(dentry
, path
);
301 ovl_path_upper(dentry
, path
);
306 struct dentry
*ovl_dentry_upper(struct dentry
*dentry
)
308 return ovl_upperdentry_dereference(OVL_I(d_inode(dentry
)));
311 struct dentry
*ovl_dentry_lower(struct dentry
*dentry
)
313 struct ovl_entry
*oe
= OVL_E(dentry
);
315 return ovl_numlower(oe
) ? ovl_lowerstack(oe
)->dentry
: NULL
;
318 const struct ovl_layer
*ovl_layer_lower(struct dentry
*dentry
)
320 struct ovl_entry
*oe
= OVL_E(dentry
);
322 return ovl_numlower(oe
) ? ovl_lowerstack(oe
)->layer
: NULL
;
326 * ovl_dentry_lower() could return either a data dentry or metacopy dentry
327 * depending on what is stored in lowerstack[0]. At times we need to find
328 * lower dentry which has data (and not metacopy dentry). This helper
329 * returns the lower data dentry.
331 struct dentry
*ovl_dentry_lowerdata(struct dentry
*dentry
)
333 return ovl_lowerdata_dentry(OVL_E(dentry
));
336 int ovl_dentry_set_lowerdata(struct dentry
*dentry
, struct ovl_path
*datapath
)
338 struct ovl_entry
*oe
= OVL_E(dentry
);
339 struct ovl_path
*lowerdata
= ovl_lowerdata(oe
);
340 struct dentry
*datadentry
= datapath
->dentry
;
342 if (WARN_ON_ONCE(ovl_numlower(oe
) <= 1))
345 WRITE_ONCE(lowerdata
->layer
, datapath
->layer
);
347 * Pairs with smp_rmb() in ovl_path_lowerdata().
348 * Make sure that if lowerdata->dentry is visible, then
349 * lowerdata->layer is visible as well.
352 WRITE_ONCE(lowerdata
->dentry
, dget(datadentry
));
354 ovl_dentry_update_reval(dentry
, datadentry
);
359 struct dentry
*ovl_dentry_real(struct dentry
*dentry
)
361 return ovl_dentry_upper(dentry
) ?: ovl_dentry_lower(dentry
);
364 struct dentry
*ovl_i_dentry_upper(struct inode
*inode
)
366 return ovl_upperdentry_dereference(OVL_I(inode
));
369 struct inode
*ovl_i_path_real(struct inode
*inode
, struct path
*path
)
371 struct ovl_path
*lowerpath
= ovl_lowerpath(OVL_I_E(inode
));
373 path
->dentry
= ovl_i_dentry_upper(inode
);
375 path
->dentry
= lowerpath
->dentry
;
376 path
->mnt
= lowerpath
->layer
->mnt
;
378 path
->mnt
= ovl_upper_mnt(OVL_FS(inode
->i_sb
));
381 return path
->dentry
? d_inode_rcu(path
->dentry
) : NULL
;
384 struct inode
*ovl_inode_upper(struct inode
*inode
)
386 struct dentry
*upperdentry
= ovl_i_dentry_upper(inode
);
388 return upperdentry
? d_inode(upperdentry
) : NULL
;
391 struct inode
*ovl_inode_lower(struct inode
*inode
)
393 struct ovl_path
*lowerpath
= ovl_lowerpath(OVL_I_E(inode
));
395 return lowerpath
? d_inode(lowerpath
->dentry
) : NULL
;
398 struct inode
*ovl_inode_real(struct inode
*inode
)
400 return ovl_inode_upper(inode
) ?: ovl_inode_lower(inode
);
403 /* Return inode which contains lower data. Do not return metacopy */
404 struct inode
*ovl_inode_lowerdata(struct inode
*inode
)
406 struct dentry
*lowerdata
= ovl_lowerdata_dentry(OVL_I_E(inode
));
408 if (WARN_ON(!S_ISREG(inode
->i_mode
)))
411 return lowerdata
? d_inode(lowerdata
) : NULL
;
414 /* Return real inode which contains data. Does not return metacopy inode */
415 struct inode
*ovl_inode_realdata(struct inode
*inode
)
417 struct inode
*upperinode
;
419 upperinode
= ovl_inode_upper(inode
);
420 if (upperinode
&& ovl_has_upperdata(inode
))
423 return ovl_inode_lowerdata(inode
);
426 const char *ovl_lowerdata_redirect(struct inode
*inode
)
428 return inode
&& S_ISREG(inode
->i_mode
) ?
429 OVL_I(inode
)->lowerdata_redirect
: NULL
;
432 struct ovl_dir_cache
*ovl_dir_cache(struct inode
*inode
)
434 return inode
&& S_ISDIR(inode
->i_mode
) ? OVL_I(inode
)->cache
: NULL
;
437 void ovl_set_dir_cache(struct inode
*inode
, struct ovl_dir_cache
*cache
)
439 OVL_I(inode
)->cache
= cache
;
442 void ovl_dentry_set_flag(unsigned long flag
, struct dentry
*dentry
)
444 set_bit(flag
, OVL_E_FLAGS(dentry
));
447 void ovl_dentry_clear_flag(unsigned long flag
, struct dentry
*dentry
)
449 clear_bit(flag
, OVL_E_FLAGS(dentry
));
452 bool ovl_dentry_test_flag(unsigned long flag
, struct dentry
*dentry
)
454 return test_bit(flag
, OVL_E_FLAGS(dentry
));
457 bool ovl_dentry_is_opaque(struct dentry
*dentry
)
459 return ovl_dentry_test_flag(OVL_E_OPAQUE
, dentry
);
462 bool ovl_dentry_is_whiteout(struct dentry
*dentry
)
464 return !dentry
->d_inode
&& ovl_dentry_is_opaque(dentry
);
467 void ovl_dentry_set_opaque(struct dentry
*dentry
)
469 ovl_dentry_set_flag(OVL_E_OPAQUE
, dentry
);
472 bool ovl_dentry_has_xwhiteouts(struct dentry
*dentry
)
474 return ovl_dentry_test_flag(OVL_E_XWHITEOUTS
, dentry
);
477 void ovl_dentry_set_xwhiteouts(struct dentry
*dentry
)
479 ovl_dentry_set_flag(OVL_E_XWHITEOUTS
, dentry
);
483 * ovl_layer_set_xwhiteouts() is called before adding the overlay dir
484 * dentry to dcache, while readdir of that same directory happens after
485 * the overlay dir dentry is in dcache, so if some cpu observes that
486 * ovl_dentry_is_xwhiteouts(), it will also observe layer->has_xwhiteouts
487 * for the layers where xwhiteouts marker was found in that merge dir.
489 void ovl_layer_set_xwhiteouts(struct ovl_fs
*ofs
,
490 const struct ovl_layer
*layer
)
492 if (layer
->has_xwhiteouts
)
495 /* Write once to read-mostly layer properties */
496 ofs
->layers
[layer
->idx
].has_xwhiteouts
= true;
500 * For hard links and decoded file handles, it's possible for ovl_dentry_upper()
501 * to return positive, while there's no actual upper alias for the inode.
502 * Copy up code needs to know about the existence of the upper alias, so it
503 * can't use ovl_dentry_upper().
505 bool ovl_dentry_has_upper_alias(struct dentry
*dentry
)
507 return ovl_dentry_test_flag(OVL_E_UPPER_ALIAS
, dentry
);
510 void ovl_dentry_set_upper_alias(struct dentry
*dentry
)
512 ovl_dentry_set_flag(OVL_E_UPPER_ALIAS
, dentry
);
515 static bool ovl_should_check_upperdata(struct inode
*inode
)
517 if (!S_ISREG(inode
->i_mode
))
520 if (!ovl_inode_lower(inode
))
526 bool ovl_has_upperdata(struct inode
*inode
)
528 if (!ovl_should_check_upperdata(inode
))
531 if (!ovl_test_flag(OVL_UPPERDATA
, inode
))
534 * Pairs with smp_wmb() in ovl_set_upperdata(). Main user of
535 * ovl_has_upperdata() is ovl_copy_up_meta_inode_data(). Make sure
536 * if setting of OVL_UPPERDATA is visible, then effects of writes
537 * before that are visible too.
543 void ovl_set_upperdata(struct inode
*inode
)
546 * Pairs with smp_rmb() in ovl_has_upperdata(). Make sure
547 * if OVL_UPPERDATA flag is visible, then effects of write operations
548 * before it are visible as well.
551 ovl_set_flag(OVL_UPPERDATA
, inode
);
554 /* Caller should hold ovl_inode->lock */
555 bool ovl_dentry_needs_data_copy_up_locked(struct dentry
*dentry
, int flags
)
557 if (!ovl_open_flags_need_copy_up(flags
))
560 return !ovl_test_flag(OVL_UPPERDATA
, d_inode(dentry
));
563 bool ovl_dentry_needs_data_copy_up(struct dentry
*dentry
, int flags
)
565 if (!ovl_open_flags_need_copy_up(flags
))
568 return !ovl_has_upperdata(d_inode(dentry
));
571 const char *ovl_dentry_get_redirect(struct dentry
*dentry
)
573 return OVL_I(d_inode(dentry
))->redirect
;
576 void ovl_dentry_set_redirect(struct dentry
*dentry
, const char *redirect
)
578 struct ovl_inode
*oi
= OVL_I(d_inode(dentry
));
581 oi
->redirect
= redirect
;
584 void ovl_inode_update(struct inode
*inode
, struct dentry
*upperdentry
)
586 struct inode
*upperinode
= d_inode(upperdentry
);
588 WARN_ON(OVL_I(inode
)->__upperdentry
);
591 * Make sure upperdentry is consistent before making it visible
594 OVL_I(inode
)->__upperdentry
= upperdentry
;
595 if (inode_unhashed(inode
)) {
596 inode
->i_private
= upperinode
;
597 __insert_inode_hash(inode
, (unsigned long) upperinode
);
601 static void ovl_dir_version_inc(struct dentry
*dentry
, bool impurity
)
603 struct inode
*inode
= d_inode(dentry
);
605 WARN_ON(!inode_is_locked(inode
));
606 WARN_ON(!d_is_dir(dentry
));
608 * Version is used by readdir code to keep cache consistent.
609 * For merge dirs (or dirs with origin) all changes need to be noted.
610 * For non-merge dirs, cache contains only impure entries (i.e. ones
611 * which have been copied up and have origins), so only need to note
612 * changes to impure entries.
614 if (!ovl_dir_is_real(inode
) || impurity
)
615 OVL_I(inode
)->version
++;
618 void ovl_dir_modified(struct dentry
*dentry
, bool impurity
)
620 /* Copy mtime/ctime */
621 ovl_copyattr(d_inode(dentry
));
623 ovl_dir_version_inc(dentry
, impurity
);
626 u64
ovl_inode_version_get(struct inode
*inode
)
628 WARN_ON(!inode_is_locked(inode
));
629 return OVL_I(inode
)->version
;
632 bool ovl_is_whiteout(struct dentry
*dentry
)
634 struct inode
*inode
= dentry
->d_inode
;
636 return inode
&& IS_WHITEOUT(inode
);
640 * Use this over ovl_is_whiteout for upper and lower files, as it also
641 * handles overlay.whiteout xattr whiteout files.
643 bool ovl_path_is_whiteout(struct ovl_fs
*ofs
, const struct path
*path
)
645 return ovl_is_whiteout(path
->dentry
) ||
646 ovl_path_check_xwhiteout_xattr(ofs
, path
);
649 struct file
*ovl_path_open(const struct path
*path
, int flags
)
651 struct inode
*inode
= d_inode(path
->dentry
);
652 struct mnt_idmap
*real_idmap
= mnt_idmap(path
->mnt
);
655 if (flags
& ~(O_ACCMODE
| O_LARGEFILE
))
658 switch (flags
& O_ACCMODE
) {
663 acc_mode
= MAY_WRITE
;
669 err
= inode_permission(real_idmap
, inode
, acc_mode
| MAY_OPEN
);
673 /* O_NOATIME is an optimization, don't fail if not permitted */
674 if (inode_owner_or_capable(real_idmap
, inode
))
677 return dentry_open(path
, flags
, current_cred());
680 /* Caller should hold ovl_inode->lock */
681 static bool ovl_already_copied_up_locked(struct dentry
*dentry
, int flags
)
683 bool disconnected
= dentry
->d_flags
& DCACHE_DISCONNECTED
;
685 if (ovl_dentry_upper(dentry
) &&
686 (ovl_dentry_has_upper_alias(dentry
) || disconnected
) &&
687 !ovl_dentry_needs_data_copy_up_locked(dentry
, flags
))
693 bool ovl_already_copied_up(struct dentry
*dentry
, int flags
)
695 bool disconnected
= dentry
->d_flags
& DCACHE_DISCONNECTED
;
698 * Check if copy-up has happened as well as for upper alias (in
699 * case of hard links) is there.
701 * Both checks are lockless:
702 * - false negatives: will recheck under oi->lock
704 * + ovl_dentry_upper() uses memory barriers to ensure the
705 * upper dentry is up-to-date
706 * + ovl_dentry_has_upper_alias() relies on locking of
707 * upper parent i_rwsem to prevent reordering copy-up
710 if (ovl_dentry_upper(dentry
) &&
711 (ovl_dentry_has_upper_alias(dentry
) || disconnected
) &&
712 !ovl_dentry_needs_data_copy_up(dentry
, flags
))
719 * The copy up "transaction" keeps an elevated mnt write count on upper mnt,
720 * but leaves taking freeze protection on upper sb to lower level helpers.
722 int ovl_copy_up_start(struct dentry
*dentry
, int flags
)
724 struct inode
*inode
= d_inode(dentry
);
727 err
= ovl_inode_lock_interruptible(inode
);
731 if (ovl_already_copied_up_locked(dentry
, flags
))
732 err
= 1; /* Already copied up */
734 err
= ovl_get_write_access(dentry
);
741 ovl_inode_unlock(inode
);
745 void ovl_copy_up_end(struct dentry
*dentry
)
747 ovl_put_write_access(dentry
);
748 ovl_inode_unlock(d_inode(dentry
));
751 bool ovl_path_check_origin_xattr(struct ovl_fs
*ofs
, const struct path
*path
)
755 res
= ovl_path_getxattr(ofs
, path
, OVL_XATTR_ORIGIN
, NULL
, 0);
757 /* Zero size value means "copied up but origin unknown" */
764 bool ovl_path_check_xwhiteout_xattr(struct ovl_fs
*ofs
, const struct path
*path
)
766 struct dentry
*dentry
= path
->dentry
;
769 /* xattr.whiteout must be a zero size regular file */
770 if (!d_is_reg(dentry
) || i_size_read(d_inode(dentry
)) != 0)
773 res
= ovl_path_getxattr(ofs
, path
, OVL_XATTR_XWHITEOUT
, NULL
, 0);
778 * Load persistent uuid from xattr into s_uuid if found, or store a new
779 * random generated value in s_uuid and in xattr.
781 bool ovl_init_uuid_xattr(struct super_block
*sb
, struct ovl_fs
*ofs
,
782 const struct path
*upperpath
)
788 /* Try to load existing persistent uuid */
789 res
= ovl_path_getxattr(ofs
, upperpath
, OVL_XATTR_UUID
, uuid
.b
,
791 if (res
== UUID_SIZE
)
798 * With uuid=auto, if uuid xattr is found, it will be used.
799 * If uuid xattrs is not found, generate a persistent uuid only on mount
800 * of new overlays where upper root dir is not yet marked as impure.
801 * An upper dir is marked as impure on copy up or lookup of its subdirs.
803 if (ofs
->config
.uuid
== OVL_UUID_AUTO
) {
804 res
= ovl_path_getxattr(ofs
, upperpath
, OVL_XATTR_IMPURE
, NULL
,
807 /* Any mount of old overlay - downgrade to uuid=null */
808 ofs
->config
.uuid
= OVL_UUID_NULL
;
810 } else if (res
== -ENODATA
) {
811 /* First mount of new overlay - upgrade to uuid=on */
812 ofs
->config
.uuid
= OVL_UUID_ON
;
813 } else if (res
< 0) {
819 /* Generate overlay instance uuid */
822 /* Try to store persistent uuid */
824 res
= ovl_setxattr(ofs
, upperpath
->dentry
, OVL_XATTR_UUID
, uuid
.b
,
830 super_set_uuid(sb
, uuid
.b
, sizeof(uuid
));
834 ofs
->config
.uuid
= OVL_UUID_NULL
;
835 pr_warn("failed to %s uuid (%pd2, err=%i); falling back to uuid=null.\n",
836 set
? "set" : "get", upperpath
->dentry
, res
);
840 char ovl_get_dir_xattr_val(struct ovl_fs
*ofs
, const struct path
*path
,
846 if (!d_is_dir(path
->dentry
))
849 res
= ovl_path_getxattr(ofs
, path
, ox
, &val
, 1);
850 return res
== 1 ? val
: 0;
853 #define OVL_XATTR_OPAQUE_POSTFIX "opaque"
854 #define OVL_XATTR_REDIRECT_POSTFIX "redirect"
855 #define OVL_XATTR_ORIGIN_POSTFIX "origin"
856 #define OVL_XATTR_IMPURE_POSTFIX "impure"
857 #define OVL_XATTR_NLINK_POSTFIX "nlink"
858 #define OVL_XATTR_UPPER_POSTFIX "upper"
859 #define OVL_XATTR_UUID_POSTFIX "uuid"
860 #define OVL_XATTR_METACOPY_POSTFIX "metacopy"
861 #define OVL_XATTR_PROTATTR_POSTFIX "protattr"
862 #define OVL_XATTR_XWHITEOUT_POSTFIX "whiteout"
864 #define OVL_XATTR_TAB_ENTRY(x) \
865 [x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \
866 [true] = OVL_XATTR_USER_PREFIX x ## _POSTFIX }
868 const char *const ovl_xattr_table
[][2] = {
869 OVL_XATTR_TAB_ENTRY(OVL_XATTR_OPAQUE
),
870 OVL_XATTR_TAB_ENTRY(OVL_XATTR_REDIRECT
),
871 OVL_XATTR_TAB_ENTRY(OVL_XATTR_ORIGIN
),
872 OVL_XATTR_TAB_ENTRY(OVL_XATTR_IMPURE
),
873 OVL_XATTR_TAB_ENTRY(OVL_XATTR_NLINK
),
874 OVL_XATTR_TAB_ENTRY(OVL_XATTR_UPPER
),
875 OVL_XATTR_TAB_ENTRY(OVL_XATTR_UUID
),
876 OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY
),
877 OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR
),
878 OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUT
),
881 int ovl_check_setxattr(struct ovl_fs
*ofs
, struct dentry
*upperdentry
,
882 enum ovl_xattr ox
, const void *value
, size_t size
,
890 err
= ovl_setxattr(ofs
, upperdentry
, ox
, value
, size
);
892 if (err
== -EOPNOTSUPP
) {
893 pr_warn("cannot set %s xattr on upper\n", ovl_xattr(ofs
, ox
));
901 int ovl_set_impure(struct dentry
*dentry
, struct dentry
*upperdentry
)
903 struct ovl_fs
*ofs
= OVL_FS(dentry
->d_sb
);
906 if (ovl_test_flag(OVL_IMPURE
, d_inode(dentry
)))
910 * Do not fail when upper doesn't support xattrs.
911 * Upper inodes won't have origin nor redirect xattr anyway.
913 err
= ovl_check_setxattr(ofs
, upperdentry
, OVL_XATTR_IMPURE
, "y", 1, 0);
915 ovl_set_flag(OVL_IMPURE
, d_inode(dentry
));
921 #define OVL_PROTATTR_MAX 32 /* Reserved for future flags */
923 void ovl_check_protattr(struct inode
*inode
, struct dentry
*upper
)
925 struct ovl_fs
*ofs
= OVL_FS(inode
->i_sb
);
926 u32 iflags
= inode
->i_flags
& OVL_PROT_I_FLAGS_MASK
;
927 char buf
[OVL_PROTATTR_MAX
+1];
930 res
= ovl_getxattr_upper(ofs
, upper
, OVL_XATTR_PROTATTR
, buf
,
936 * Initialize inode flags from overlay.protattr xattr and upper inode
937 * flags. If upper inode has those fileattr flags set (i.e. from old
938 * kernel), we do not clear them on ovl_get_inode(), but we will clear
939 * them on next fileattr_set().
941 for (n
= 0; n
< res
; n
++) {
944 else if (buf
[n
] == 'i')
945 iflags
|= S_IMMUTABLE
;
950 if (!res
|| n
< res
) {
951 pr_warn_ratelimited("incompatible overlay.protattr format (%pd2, len=%d)\n",
954 inode_set_flags(inode
, iflags
, OVL_PROT_I_FLAGS_MASK
);
958 int ovl_set_protattr(struct inode
*inode
, struct dentry
*upper
,
961 struct ovl_fs
*ofs
= OVL_FS(inode
->i_sb
);
962 char buf
[OVL_PROTATTR_MAX
];
963 int len
= 0, err
= 0;
966 BUILD_BUG_ON(HWEIGHT32(OVL_PROT_FS_FLAGS_MASK
) > OVL_PROTATTR_MAX
);
968 if (fa
->flags
& FS_APPEND_FL
) {
972 if (fa
->flags
& FS_IMMUTABLE_FL
) {
974 iflags
|= S_IMMUTABLE
;
978 * Do not allow to set protection flags when upper doesn't support
979 * xattrs, because we do not set those fileattr flags on upper inode.
980 * Remove xattr if it exist and all protection flags are cleared.
983 err
= ovl_check_setxattr(ofs
, upper
, OVL_XATTR_PROTATTR
,
985 } else if (inode
->i_flags
& OVL_PROT_I_FLAGS_MASK
) {
986 err
= ovl_removexattr(ofs
, upper
, OVL_XATTR_PROTATTR
);
987 if (err
== -EOPNOTSUPP
|| err
== -ENODATA
)
993 inode_set_flags(inode
, iflags
, OVL_PROT_I_FLAGS_MASK
);
995 /* Mask out the fileattr flags that should not be set in upper inode */
996 fa
->flags
&= ~OVL_PROT_FS_FLAGS_MASK
;
997 fa
->fsx_xflags
&= ~OVL_PROT_FSX_FLAGS_MASK
;
1003 * Caller must hold a reference to inode to prevent it from being freed while
1004 * it is marked inuse.
1006 bool ovl_inuse_trylock(struct dentry
*dentry
)
1008 struct inode
*inode
= d_inode(dentry
);
1009 bool locked
= false;
1011 spin_lock(&inode
->i_lock
);
1012 if (!(inode
->i_state
& I_OVL_INUSE
)) {
1013 inode
->i_state
|= I_OVL_INUSE
;
1016 spin_unlock(&inode
->i_lock
);
1021 void ovl_inuse_unlock(struct dentry
*dentry
)
1024 struct inode
*inode
= d_inode(dentry
);
1026 spin_lock(&inode
->i_lock
);
1027 WARN_ON(!(inode
->i_state
& I_OVL_INUSE
));
1028 inode
->i_state
&= ~I_OVL_INUSE
;
1029 spin_unlock(&inode
->i_lock
);
1033 bool ovl_is_inuse(struct dentry
*dentry
)
1035 struct inode
*inode
= d_inode(dentry
);
1038 spin_lock(&inode
->i_lock
);
1039 inuse
= (inode
->i_state
& I_OVL_INUSE
);
1040 spin_unlock(&inode
->i_lock
);
1046 * Does this overlay dentry need to be indexed on copy up?
1048 bool ovl_need_index(struct dentry
*dentry
)
1050 struct dentry
*lower
= ovl_dentry_lower(dentry
);
1052 if (!lower
|| !ovl_indexdir(dentry
->d_sb
))
1055 /* Index all files for NFS export and consistency verification */
1056 if (ovl_index_all(dentry
->d_sb
))
1059 /* Index only lower hardlinks on copy up */
1060 if (!d_is_dir(lower
) && d_inode(lower
)->i_nlink
> 1)
1066 /* Caller must hold OVL_I(inode)->lock */
1067 static void ovl_cleanup_index(struct dentry
*dentry
)
1069 struct ovl_fs
*ofs
= OVL_FS(dentry
->d_sb
);
1070 struct dentry
*indexdir
= ovl_indexdir(dentry
->d_sb
);
1071 struct inode
*dir
= indexdir
->d_inode
;
1072 struct dentry
*lowerdentry
= ovl_dentry_lower(dentry
);
1073 struct dentry
*upperdentry
= ovl_dentry_upper(dentry
);
1074 struct dentry
*index
= NULL
;
1075 struct inode
*inode
;
1076 struct qstr name
= { };
1077 bool got_write
= false;
1080 err
= ovl_get_index_name(ofs
, lowerdentry
, &name
);
1084 err
= ovl_want_write(dentry
);
1089 inode
= d_inode(upperdentry
);
1090 if (!S_ISDIR(inode
->i_mode
) && inode
->i_nlink
!= 1) {
1091 pr_warn_ratelimited("cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
1092 upperdentry
, inode
->i_ino
, inode
->i_nlink
);
1094 * We either have a bug with persistent union nlink or a lower
1095 * hardlink was added while overlay is mounted. Adding a lower
1096 * hardlink and then unlinking all overlay hardlinks would drop
1097 * overlay nlink to zero before all upper inodes are unlinked.
1098 * As a safety measure, when that situation is detected, set
1099 * the overlay nlink to the index inode nlink minus one for the
1100 * index entry itself.
1102 set_nlink(d_inode(dentry
), inode
->i_nlink
- 1);
1103 ovl_set_nlink_upper(dentry
);
1107 inode_lock_nested(dir
, I_MUTEX_PARENT
);
1108 index
= ovl_lookup_upper(ofs
, name
.name
, indexdir
, name
.len
);
1109 err
= PTR_ERR(index
);
1110 if (IS_ERR(index
)) {
1112 } else if (ovl_index_all(dentry
->d_sb
)) {
1113 /* Whiteout orphan index to block future open by handle */
1114 err
= ovl_cleanup_and_whiteout(OVL_FS(dentry
->d_sb
),
1117 /* Cleanup orphan index entries */
1118 err
= ovl_cleanup(ofs
, dir
, index
);
1127 ovl_drop_write(dentry
);
1133 pr_err("cleanup index of '%pd2' failed (%i)\n", dentry
, err
);
1138 * Operations that change overlay inode and upper inode nlink need to be
1139 * synchronized with copy up for persistent nlink accounting.
1141 int ovl_nlink_start(struct dentry
*dentry
)
1143 struct inode
*inode
= d_inode(dentry
);
1144 const struct cred
*old_cred
;
1147 if (WARN_ON(!inode
))
1151 * With inodes index is enabled, we store the union overlay nlink
1152 * in an xattr on the index inode. When whiting out an indexed lower,
1153 * we need to decrement the overlay persistent nlink, but before the
1154 * first copy up, we have no upper index inode to store the xattr.
1156 * As a workaround, before whiteout/rename over an indexed lower,
1157 * copy up to create the upper index. Creating the upper index will
1158 * initialize the overlay nlink, so it could be dropped if unlink
1159 * or rename succeeds.
1161 * TODO: implement metadata only index copy up when called with
1162 * ovl_copy_up_flags(dentry, O_PATH).
1164 if (ovl_need_index(dentry
) && !ovl_dentry_has_upper_alias(dentry
)) {
1165 err
= ovl_copy_up(dentry
);
1170 err
= ovl_inode_lock_interruptible(inode
);
1174 err
= ovl_want_write(dentry
);
1178 if (d_is_dir(dentry
) || !ovl_test_flag(OVL_INDEX
, inode
))
1181 old_cred
= ovl_override_creds(dentry
->d_sb
);
1183 * The overlay inode nlink should be incremented/decremented IFF the
1184 * upper operation succeeds, along with nlink change of upper inode.
1185 * Therefore, before link/unlink/rename, we store the union nlink
1186 * value relative to the upper inode nlink in an upper inode xattr.
1188 err
= ovl_set_nlink_upper(dentry
);
1189 ovl_revert_creds(old_cred
);
1191 goto out_drop_write
;
1196 ovl_drop_write(dentry
);
1198 ovl_inode_unlock(inode
);
1203 void ovl_nlink_end(struct dentry
*dentry
)
1205 struct inode
*inode
= d_inode(dentry
);
1207 ovl_drop_write(dentry
);
1209 if (ovl_test_flag(OVL_INDEX
, inode
) && inode
->i_nlink
== 0) {
1210 const struct cred
*old_cred
;
1212 old_cred
= ovl_override_creds(dentry
->d_sb
);
1213 ovl_cleanup_index(dentry
);
1214 ovl_revert_creds(old_cred
);
1217 ovl_inode_unlock(inode
);
1220 int ovl_lock_rename_workdir(struct dentry
*workdir
, struct dentry
*upperdir
)
1222 struct dentry
*trap
;
1224 /* Workdir should not be the same as upperdir */
1225 if (workdir
== upperdir
)
1228 /* Workdir should not be subdir of upperdir and vice versa */
1229 trap
= lock_rename(workdir
, upperdir
);
1238 unlock_rename(workdir
, upperdir
);
1240 pr_err("failed to lock workdir+upperdir\n");
1245 * err < 0, 0 if no metacopy xattr, metacopy data size if xattr found.
1246 * an empty xattr returns OVL_METACOPY_MIN_SIZE to distinguish from no xattr value.
1248 int ovl_check_metacopy_xattr(struct ovl_fs
*ofs
, const struct path
*path
,
1249 struct ovl_metacopy
*data
)
1253 /* Only regular files can have metacopy xattr */
1254 if (!S_ISREG(d_inode(path
->dentry
)->i_mode
))
1257 res
= ovl_path_getxattr(ofs
, path
, OVL_XATTR_METACOPY
,
1258 data
, data
? OVL_METACOPY_MAX_SIZE
: 0);
1260 if (res
== -ENODATA
|| res
== -EOPNOTSUPP
)
1263 * getxattr on user.* may fail with EACCES in case there's no
1264 * read permission on the inode. Not much we can do, other than
1265 * tell the caller that this is not a metacopy inode.
1267 if (ofs
->config
.userxattr
&& res
== -EACCES
)
1273 /* Emulate empty data for zero size metacopy xattr */
1274 res
= OVL_METACOPY_MIN_SIZE
;
1276 memset(data
, 0, res
);
1279 } else if (res
< OVL_METACOPY_MIN_SIZE
) {
1280 pr_warn_ratelimited("metacopy file '%pd' has too small xattr\n",
1284 if (data
->version
!= 0) {
1285 pr_warn_ratelimited("metacopy file '%pd' has unsupported version\n",
1289 if (res
!= data
->len
) {
1290 pr_warn_ratelimited("metacopy file '%pd' has invalid xattr size\n",
1298 pr_warn_ratelimited("failed to get metacopy (%i)\n", res
);
1302 int ovl_set_metacopy_xattr(struct ovl_fs
*ofs
, struct dentry
*d
, struct ovl_metacopy
*metacopy
)
1304 size_t len
= metacopy
->len
;
1306 /* If no flags or digest fall back to empty metacopy file */
1307 if (metacopy
->version
== 0 && metacopy
->flags
== 0 && metacopy
->digest_algo
== 0)
1310 return ovl_check_setxattr(ofs
, d
, OVL_XATTR_METACOPY
,
1311 metacopy
, len
, -EOPNOTSUPP
);
1314 bool ovl_is_metacopy_dentry(struct dentry
*dentry
)
1316 struct ovl_entry
*oe
= OVL_E(dentry
);
1318 if (!d_is_reg(dentry
))
1321 if (ovl_dentry_upper(dentry
)) {
1322 if (!ovl_has_upperdata(d_inode(dentry
)))
1327 return (ovl_numlower(oe
) > 1);
1330 char *ovl_get_redirect_xattr(struct ovl_fs
*ofs
, const struct path
*path
, int padding
)
1333 char *s
, *next
, *buf
= NULL
;
1335 res
= ovl_path_getxattr(ofs
, path
, OVL_XATTR_REDIRECT
, NULL
, 0);
1336 if (res
== -ENODATA
|| res
== -EOPNOTSUPP
)
1343 buf
= kzalloc(res
+ padding
+ 1, GFP_KERNEL
);
1345 return ERR_PTR(-ENOMEM
);
1347 res
= ovl_path_getxattr(ofs
, path
, OVL_XATTR_REDIRECT
, buf
, res
);
1353 if (buf
[0] == '/') {
1354 for (s
= buf
; *s
++ == '/'; s
= next
) {
1355 next
= strchrnul(s
, '/');
1360 if (strchr(buf
, '/') != NULL
)
1366 pr_warn_ratelimited("invalid redirect (%s)\n", buf
);
1370 pr_warn_ratelimited("failed to get redirect (%i)\n", res
);
1373 return ERR_PTR(res
);
1376 /* Call with mounter creds as it may open the file */
1377 int ovl_ensure_verity_loaded(struct path
*datapath
)
1379 struct inode
*inode
= d_inode(datapath
->dentry
);
1382 if (!fsverity_active(inode
) && IS_VERITY(inode
)) {
1384 * If this inode was not yet opened, the verity info hasn't been
1385 * loaded yet, so we need to do that here to force it into memory.
1387 filp
= kernel_file_open(datapath
, O_RDONLY
, current_cred());
1389 return PTR_ERR(filp
);
1396 int ovl_validate_verity(struct ovl_fs
*ofs
,
1397 struct path
*metapath
,
1398 struct path
*datapath
)
1400 struct ovl_metacopy metacopy_data
;
1401 u8 actual_digest
[FS_VERITY_MAX_DIGEST_SIZE
];
1402 int xattr_digest_size
, digest_size
;
1403 int xattr_size
, err
;
1406 if (!ofs
->config
.verity_mode
||
1407 /* Verity only works on regular files */
1408 !S_ISREG(d_inode(metapath
->dentry
)->i_mode
))
1411 xattr_size
= ovl_check_metacopy_xattr(ofs
, metapath
, &metacopy_data
);
1415 if (!xattr_size
|| !metacopy_data
.digest_algo
) {
1416 if (ofs
->config
.verity_mode
== OVL_VERITY_REQUIRE
) {
1417 pr_warn_ratelimited("metacopy file '%pd' has no digest specified\n",
1424 xattr_digest_size
= ovl_metadata_digest_size(&metacopy_data
);
1426 err
= ovl_ensure_verity_loaded(datapath
);
1428 pr_warn_ratelimited("lower file '%pd' failed to load fs-verity info\n",
1433 digest_size
= fsverity_get_digest(d_inode(datapath
->dentry
), actual_digest
,
1434 &verity_algo
, NULL
);
1435 if (digest_size
== 0) {
1436 pr_warn_ratelimited("lower file '%pd' has no fs-verity digest\n", datapath
->dentry
);
1440 if (xattr_digest_size
!= digest_size
||
1441 metacopy_data
.digest_algo
!= verity_algo
||
1442 memcmp(metacopy_data
.digest
, actual_digest
, xattr_digest_size
) != 0) {
1443 pr_warn_ratelimited("lower file '%pd' has the wrong fs-verity digest\n",
1451 int ovl_get_verity_digest(struct ovl_fs
*ofs
, struct path
*src
,
1452 struct ovl_metacopy
*metacopy
)
1454 int err
, digest_size
;
1456 if (!ofs
->config
.verity_mode
|| !S_ISREG(d_inode(src
->dentry
)->i_mode
))
1459 err
= ovl_ensure_verity_loaded(src
);
1461 pr_warn_ratelimited("lower file '%pd' failed to load fs-verity info\n",
1466 digest_size
= fsverity_get_digest(d_inode(src
->dentry
),
1467 metacopy
->digest
, &metacopy
->digest_algo
, NULL
);
1468 if (digest_size
== 0 ||
1469 WARN_ON_ONCE(digest_size
> FS_VERITY_MAX_DIGEST_SIZE
)) {
1470 if (ofs
->config
.verity_mode
== OVL_VERITY_REQUIRE
) {
1471 pr_warn_ratelimited("lower file '%pd' has no fs-verity digest\n",
1478 metacopy
->len
+= digest_size
;
1483 * ovl_sync_status() - Check fs sync status for volatile mounts
1485 * Returns 1 if this is not a volatile mount and a real sync is required.
1487 * Returns 0 if syncing can be skipped because mount is volatile, and no errors
1488 * have occurred on the upperdir since the mount.
1490 * Returns -errno if it is a volatile mount, and the error that occurred since
1491 * the last mount. If the error code changes, it'll return the latest error
1495 int ovl_sync_status(struct ovl_fs
*ofs
)
1497 struct vfsmount
*mnt
;
1499 if (ovl_should_sync(ofs
))
1502 mnt
= ovl_upper_mnt(ofs
);
1506 return errseq_check(&mnt
->mnt_sb
->s_wb_err
, ofs
->errseq
);
1510 * ovl_copyattr() - copy inode attributes from layer to ovl inode
1512 * When overlay copies inode information from an upper or lower layer to the
1513 * relevant overlay inode it will apply the idmapping of the upper or lower
1514 * layer when doing so ensuring that the ovl inode ownership will correctly
1515 * reflect the ownership of the idmapped upper or lower layer. For example, an
1516 * idmapped upper or lower layer mapping id 1001 to id 1000 will take care to
1517 * map any lower or upper inode owned by id 1001 to id 1000. These mapping
1518 * helpers are nops when the relevant layer isn't idmapped.
1520 void ovl_copyattr(struct inode
*inode
)
1522 struct path realpath
;
1523 struct inode
*realinode
;
1524 struct mnt_idmap
*real_idmap
;
1528 realinode
= ovl_i_path_real(inode
, &realpath
);
1529 real_idmap
= mnt_idmap(realpath
.mnt
);
1531 spin_lock(&inode
->i_lock
);
1532 vfsuid
= i_uid_into_vfsuid(real_idmap
, realinode
);
1533 vfsgid
= i_gid_into_vfsgid(real_idmap
, realinode
);
1535 inode
->i_uid
= vfsuid_into_kuid(vfsuid
);
1536 inode
->i_gid
= vfsgid_into_kgid(vfsgid
);
1537 inode
->i_mode
= realinode
->i_mode
;
1538 inode_set_atime_to_ts(inode
, inode_get_atime(realinode
));
1539 inode_set_mtime_to_ts(inode
, inode_get_mtime(realinode
));
1540 inode_set_ctime_to_ts(inode
, inode_get_ctime(realinode
));
1541 i_size_write(inode
, i_size_read(realinode
));
1542 spin_unlock(&inode
->i_lock
);