1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2017 Red Hat, Inc.
6 #include <linux/cred.h>
7 #include <linux/file.h>
8 #include <linux/mount.h>
9 #include <linux/xattr.h>
10 #include <linux/uio.h>
11 #include <linux/uaccess.h>
12 #include <linux/splice.h>
15 #include "overlayfs.h"
19 struct kiocb
*orig_iocb
;
23 static struct kmem_cache
*ovl_aio_request_cachep
;
25 static char ovl_whatisit(struct inode
*inode
, struct inode
*realinode
)
27 if (realinode
!= ovl_inode_upper(inode
))
29 if (ovl_has_upperdata(inode
))
35 static struct file
*ovl_open_realfile(const struct file
*file
,
36 struct inode
*realinode
)
38 struct inode
*inode
= file_inode(file
);
39 struct file
*realfile
;
40 const struct cred
*old_cred
;
41 int flags
= file
->f_flags
| O_NOATIME
| FMODE_NONOTIFY
;
43 old_cred
= ovl_override_creds(inode
->i_sb
);
44 realfile
= open_with_fake_path(&file
->f_path
, flags
, realinode
,
46 revert_creds(old_cred
);
48 pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
49 file
, file
, ovl_whatisit(inode
, realinode
), file
->f_flags
,
50 realfile
, IS_ERR(realfile
) ? 0 : realfile
->f_flags
);
55 #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
57 static int ovl_change_flags(struct file
*file
, unsigned int flags
)
59 struct inode
*inode
= file_inode(file
);
62 /* No atime modificaton on underlying */
63 flags
|= O_NOATIME
| FMODE_NONOTIFY
;
65 /* If some flag changed that cannot be changed then something's amiss */
66 if (WARN_ON((file
->f_flags
^ flags
) & ~OVL_SETFL_MASK
))
69 flags
&= OVL_SETFL_MASK
;
71 if (((flags
^ file
->f_flags
) & O_APPEND
) && IS_APPEND(inode
))
74 if (flags
& O_DIRECT
) {
75 if (!file
->f_mapping
->a_ops
||
76 !file
->f_mapping
->a_ops
->direct_IO
)
80 if (file
->f_op
->check_flags
) {
81 err
= file
->f_op
->check_flags(flags
);
86 spin_lock(&file
->f_lock
);
87 file
->f_flags
= (file
->f_flags
& ~OVL_SETFL_MASK
) | flags
;
88 spin_unlock(&file
->f_lock
);
93 static int ovl_real_fdget_meta(const struct file
*file
, struct fd
*real
,
96 struct inode
*inode
= file_inode(file
);
97 struct inode
*realinode
;
100 real
->file
= file
->private_data
;
103 realinode
= ovl_inode_real(inode
);
105 realinode
= ovl_inode_realdata(inode
);
107 /* Has it been copied up since we'd opened it? */
108 if (unlikely(file_inode(real
->file
) != realinode
)) {
109 real
->flags
= FDPUT_FPUT
;
110 real
->file
= ovl_open_realfile(file
, realinode
);
112 return PTR_ERR_OR_ZERO(real
->file
);
115 /* Did the flags change since open? */
116 if (unlikely((file
->f_flags
^ real
->file
->f_flags
) & ~O_NOATIME
))
117 return ovl_change_flags(real
->file
, file
->f_flags
);
122 static int ovl_real_fdget(const struct file
*file
, struct fd
*real
)
124 return ovl_real_fdget_meta(file
, real
, false);
127 static int ovl_open(struct inode
*inode
, struct file
*file
)
129 struct file
*realfile
;
132 err
= ovl_maybe_copy_up(file_dentry(file
), file
->f_flags
);
136 /* No longer need these flags, so don't pass them on to underlying fs */
137 file
->f_flags
&= ~(O_CREAT
| O_EXCL
| O_NOCTTY
| O_TRUNC
);
139 realfile
= ovl_open_realfile(file
, ovl_inode_realdata(inode
));
140 if (IS_ERR(realfile
))
141 return PTR_ERR(realfile
);
143 file
->private_data
= realfile
;
148 static int ovl_release(struct inode
*inode
, struct file
*file
)
150 fput(file
->private_data
);
155 static loff_t
ovl_llseek(struct file
*file
, loff_t offset
, int whence
)
157 struct inode
*inode
= file_inode(file
);
159 const struct cred
*old_cred
;
163 * The two special cases below do not need to involve real fs,
164 * so we can optimizing concurrent callers.
167 if (whence
== SEEK_CUR
)
170 if (whence
== SEEK_SET
)
171 return vfs_setpos(file
, 0, 0);
174 ret
= ovl_real_fdget(file
, &real
);
179 * Overlay file f_pos is the master copy that is preserved
180 * through copy up and modified on read/write, but only real
181 * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
182 * limitations that are more strict than ->s_maxbytes for specific
183 * files, so we use the real file to perform seeks.
185 ovl_inode_lock(inode
);
186 real
.file
->f_pos
= file
->f_pos
;
188 old_cred
= ovl_override_creds(inode
->i_sb
);
189 ret
= vfs_llseek(real
.file
, offset
, whence
);
190 revert_creds(old_cred
);
192 file
->f_pos
= real
.file
->f_pos
;
193 ovl_inode_unlock(inode
);
200 static void ovl_file_accessed(struct file
*file
)
202 struct inode
*inode
, *upperinode
;
204 if (file
->f_flags
& O_NOATIME
)
207 inode
= file_inode(file
);
208 upperinode
= ovl_inode_upper(inode
);
213 if ((!timespec64_equal(&inode
->i_mtime
, &upperinode
->i_mtime
) ||
214 !timespec64_equal(&inode
->i_ctime
, &upperinode
->i_ctime
))) {
215 inode
->i_mtime
= upperinode
->i_mtime
;
216 inode
->i_ctime
= upperinode
->i_ctime
;
219 touch_atime(&file
->f_path
);
222 static rwf_t
ovl_iocb_to_rwf(struct kiocb
*iocb
)
224 int ifl
= iocb
->ki_flags
;
227 if (ifl
& IOCB_NOWAIT
)
229 if (ifl
& IOCB_HIPRI
)
231 if (ifl
& IOCB_DSYNC
)
239 static void ovl_aio_cleanup_handler(struct ovl_aio_req
*aio_req
)
241 struct kiocb
*iocb
= &aio_req
->iocb
;
242 struct kiocb
*orig_iocb
= aio_req
->orig_iocb
;
244 if (iocb
->ki_flags
& IOCB_WRITE
) {
245 struct inode
*inode
= file_inode(orig_iocb
->ki_filp
);
247 /* Actually acquired in ovl_write_iter() */
248 __sb_writers_acquired(file_inode(iocb
->ki_filp
)->i_sb
,
250 file_end_write(iocb
->ki_filp
);
251 ovl_copyattr(ovl_inode_real(inode
), inode
);
254 orig_iocb
->ki_pos
= iocb
->ki_pos
;
256 kmem_cache_free(ovl_aio_request_cachep
, aio_req
);
259 static void ovl_aio_rw_complete(struct kiocb
*iocb
, long res
, long res2
)
261 struct ovl_aio_req
*aio_req
= container_of(iocb
,
262 struct ovl_aio_req
, iocb
);
263 struct kiocb
*orig_iocb
= aio_req
->orig_iocb
;
265 ovl_aio_cleanup_handler(aio_req
);
266 orig_iocb
->ki_complete(orig_iocb
, res
, res2
);
269 static ssize_t
ovl_read_iter(struct kiocb
*iocb
, struct iov_iter
*iter
)
271 struct file
*file
= iocb
->ki_filp
;
273 const struct cred
*old_cred
;
276 if (!iov_iter_count(iter
))
279 ret
= ovl_real_fdget(file
, &real
);
283 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
284 if (is_sync_kiocb(iocb
)) {
285 ret
= vfs_iter_read(real
.file
, iter
, &iocb
->ki_pos
,
286 ovl_iocb_to_rwf(iocb
));
288 struct ovl_aio_req
*aio_req
;
291 aio_req
= kmem_cache_zalloc(ovl_aio_request_cachep
, GFP_KERNEL
);
297 aio_req
->orig_iocb
= iocb
;
298 kiocb_clone(&aio_req
->iocb
, iocb
, real
.file
);
299 aio_req
->iocb
.ki_complete
= ovl_aio_rw_complete
;
300 ret
= vfs_iocb_iter_read(real
.file
, &aio_req
->iocb
, iter
);
301 if (ret
!= -EIOCBQUEUED
)
302 ovl_aio_cleanup_handler(aio_req
);
305 revert_creds(old_cred
);
306 ovl_file_accessed(file
);
313 static ssize_t
ovl_write_iter(struct kiocb
*iocb
, struct iov_iter
*iter
)
315 struct file
*file
= iocb
->ki_filp
;
316 struct inode
*inode
= file_inode(file
);
318 const struct cred
*old_cred
;
321 if (!iov_iter_count(iter
))
326 ovl_copyattr(ovl_inode_real(inode
), inode
);
327 ret
= file_remove_privs(file
);
331 ret
= ovl_real_fdget(file
, &real
);
335 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
336 if (is_sync_kiocb(iocb
)) {
337 file_start_write(real
.file
);
338 ret
= vfs_iter_write(real
.file
, iter
, &iocb
->ki_pos
,
339 ovl_iocb_to_rwf(iocb
));
340 file_end_write(real
.file
);
342 ovl_copyattr(ovl_inode_real(inode
), inode
);
344 struct ovl_aio_req
*aio_req
;
347 aio_req
= kmem_cache_zalloc(ovl_aio_request_cachep
, GFP_KERNEL
);
351 file_start_write(real
.file
);
352 /* Pacify lockdep, same trick as done in aio_write() */
353 __sb_writers_release(file_inode(real
.file
)->i_sb
,
357 aio_req
->orig_iocb
= iocb
;
358 kiocb_clone(&aio_req
->iocb
, iocb
, real
.file
);
359 aio_req
->iocb
.ki_complete
= ovl_aio_rw_complete
;
360 ret
= vfs_iocb_iter_write(real
.file
, &aio_req
->iocb
, iter
);
361 if (ret
!= -EIOCBQUEUED
)
362 ovl_aio_cleanup_handler(aio_req
);
365 revert_creds(old_cred
);
374 static ssize_t
ovl_splice_read(struct file
*in
, loff_t
*ppos
,
375 struct pipe_inode_info
*pipe
, size_t len
,
380 const struct cred
*old_cred
;
382 ret
= ovl_real_fdget(in
, &real
);
386 old_cred
= ovl_override_creds(file_inode(in
)->i_sb
);
387 ret
= generic_file_splice_read(real
.file
, ppos
, pipe
, len
, flags
);
388 revert_creds(old_cred
);
390 ovl_file_accessed(in
);
396 ovl_splice_write(struct pipe_inode_info
*pipe
, struct file
*out
,
397 loff_t
*ppos
, size_t len
, unsigned int flags
)
400 const struct cred
*old_cred
;
403 ret
= ovl_real_fdget(out
, &real
);
407 old_cred
= ovl_override_creds(file_inode(out
)->i_sb
);
408 ret
= iter_file_splice_write(pipe
, real
.file
, ppos
, len
, flags
);
409 revert_creds(old_cred
);
411 ovl_file_accessed(out
);
416 static int ovl_fsync(struct file
*file
, loff_t start
, loff_t end
, int datasync
)
419 const struct cred
*old_cred
;
422 ret
= ovl_real_fdget_meta(file
, &real
, !datasync
);
426 /* Don't sync lower file for fear of receiving EROFS error */
427 if (file_inode(real
.file
) == ovl_inode_upper(file_inode(file
))) {
428 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
429 ret
= vfs_fsync_range(real
.file
, start
, end
, datasync
);
430 revert_creds(old_cred
);
438 static int ovl_mmap(struct file
*file
, struct vm_area_struct
*vma
)
440 struct file
*realfile
= file
->private_data
;
441 const struct cred
*old_cred
;
444 if (!realfile
->f_op
->mmap
)
447 if (WARN_ON(file
!= vma
->vm_file
))
450 vma
->vm_file
= get_file(realfile
);
452 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
453 ret
= call_mmap(vma
->vm_file
, vma
);
454 revert_creds(old_cred
);
457 /* Drop reference count from new vm_file value */
460 /* Drop reference count from previous vm_file value */
464 ovl_file_accessed(file
);
469 static long ovl_fallocate(struct file
*file
, int mode
, loff_t offset
, loff_t len
)
471 struct inode
*inode
= file_inode(file
);
473 const struct cred
*old_cred
;
476 ret
= ovl_real_fdget(file
, &real
);
480 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
481 ret
= vfs_fallocate(real
.file
, mode
, offset
, len
);
482 revert_creds(old_cred
);
485 ovl_copyattr(ovl_inode_real(inode
), inode
);
492 static int ovl_fadvise(struct file
*file
, loff_t offset
, loff_t len
, int advice
)
495 const struct cred
*old_cred
;
498 ret
= ovl_real_fdget(file
, &real
);
502 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
503 ret
= vfs_fadvise(real
.file
, offset
, len
, advice
);
504 revert_creds(old_cred
);
511 static long ovl_real_ioctl(struct file
*file
, unsigned int cmd
,
515 const struct cred
*old_cred
;
518 ret
= ovl_real_fdget(file
, &real
);
522 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
523 ret
= vfs_ioctl(real
.file
, cmd
, arg
);
524 revert_creds(old_cred
);
531 static long ovl_ioctl_set_flags(struct file
*file
, unsigned int cmd
,
532 unsigned long arg
, unsigned int iflags
)
535 struct inode
*inode
= file_inode(file
);
536 unsigned int old_iflags
;
538 if (!inode_owner_or_capable(inode
))
541 ret
= mnt_want_write_file(file
);
547 /* Check the capability before cred override */
549 old_iflags
= READ_ONCE(inode
->i_flags
);
550 if (((iflags
^ old_iflags
) & (S_APPEND
| S_IMMUTABLE
)) &&
551 !capable(CAP_LINUX_IMMUTABLE
))
554 ret
= ovl_maybe_copy_up(file_dentry(file
), O_WRONLY
);
558 ret
= ovl_real_ioctl(file
, cmd
, arg
);
560 ovl_copyflags(ovl_inode_real(inode
), inode
);
564 mnt_drop_write_file(file
);
570 static unsigned int ovl_fsflags_to_iflags(unsigned int flags
)
572 unsigned int iflags
= 0;
574 if (flags
& FS_SYNC_FL
)
576 if (flags
& FS_APPEND_FL
)
578 if (flags
& FS_IMMUTABLE_FL
)
579 iflags
|= S_IMMUTABLE
;
580 if (flags
& FS_NOATIME_FL
)
586 static long ovl_ioctl_set_fsflags(struct file
*file
, unsigned int cmd
,
591 if (get_user(flags
, (int __user
*) arg
))
594 return ovl_ioctl_set_flags(file
, cmd
, arg
,
595 ovl_fsflags_to_iflags(flags
));
598 static unsigned int ovl_fsxflags_to_iflags(unsigned int xflags
)
600 unsigned int iflags
= 0;
602 if (xflags
& FS_XFLAG_SYNC
)
604 if (xflags
& FS_XFLAG_APPEND
)
606 if (xflags
& FS_XFLAG_IMMUTABLE
)
607 iflags
|= S_IMMUTABLE
;
608 if (xflags
& FS_XFLAG_NOATIME
)
614 static long ovl_ioctl_set_fsxflags(struct file
*file
, unsigned int cmd
,
619 memset(&fa
, 0, sizeof(fa
));
620 if (copy_from_user(&fa
, (void __user
*) arg
, sizeof(fa
)))
623 return ovl_ioctl_set_flags(file
, cmd
, arg
,
624 ovl_fsxflags_to_iflags(fa
.fsx_xflags
));
627 static long ovl_ioctl(struct file
*file
, unsigned int cmd
, unsigned long arg
)
632 case FS_IOC_GETFLAGS
:
633 case FS_IOC_FSGETXATTR
:
634 ret
= ovl_real_ioctl(file
, cmd
, arg
);
637 case FS_IOC_SETFLAGS
:
638 ret
= ovl_ioctl_set_fsflags(file
, cmd
, arg
);
641 case FS_IOC_FSSETXATTR
:
642 ret
= ovl_ioctl_set_fsxflags(file
, cmd
, arg
);
652 static long ovl_compat_ioctl(struct file
*file
, unsigned int cmd
,
656 case FS_IOC32_GETFLAGS
:
657 cmd
= FS_IOC_GETFLAGS
;
660 case FS_IOC32_SETFLAGS
:
661 cmd
= FS_IOC_SETFLAGS
;
668 return ovl_ioctl(file
, cmd
, arg
);
677 static loff_t
ovl_copyfile(struct file
*file_in
, loff_t pos_in
,
678 struct file
*file_out
, loff_t pos_out
,
679 loff_t len
, unsigned int flags
, enum ovl_copyop op
)
681 struct inode
*inode_out
= file_inode(file_out
);
682 struct fd real_in
, real_out
;
683 const struct cred
*old_cred
;
686 ret
= ovl_real_fdget(file_out
, &real_out
);
690 ret
= ovl_real_fdget(file_in
, &real_in
);
696 old_cred
= ovl_override_creds(file_inode(file_out
)->i_sb
);
699 ret
= vfs_copy_file_range(real_in
.file
, pos_in
,
700 real_out
.file
, pos_out
, len
, flags
);
704 ret
= vfs_clone_file_range(real_in
.file
, pos_in
,
705 real_out
.file
, pos_out
, len
, flags
);
709 ret
= vfs_dedupe_file_range_one(real_in
.file
, pos_in
,
710 real_out
.file
, pos_out
, len
,
714 revert_creds(old_cred
);
717 ovl_copyattr(ovl_inode_real(inode_out
), inode_out
);
725 static ssize_t
ovl_copy_file_range(struct file
*file_in
, loff_t pos_in
,
726 struct file
*file_out
, loff_t pos_out
,
727 size_t len
, unsigned int flags
)
729 return ovl_copyfile(file_in
, pos_in
, file_out
, pos_out
, len
, flags
,
733 static loff_t
ovl_remap_file_range(struct file
*file_in
, loff_t pos_in
,
734 struct file
*file_out
, loff_t pos_out
,
735 loff_t len
, unsigned int remap_flags
)
739 if (remap_flags
& ~(REMAP_FILE_DEDUP
| REMAP_FILE_ADVISORY
))
742 if (remap_flags
& REMAP_FILE_DEDUP
)
748 * Don't copy up because of a dedupe request, this wouldn't make sense
749 * most of the time (data would be duplicated instead of deduplicated).
751 if (op
== OVL_DEDUPE
&&
752 (!ovl_inode_upper(file_inode(file_in
)) ||
753 !ovl_inode_upper(file_inode(file_out
))))
756 return ovl_copyfile(file_in
, pos_in
, file_out
, pos_out
, len
,
760 const struct file_operations ovl_file_operations
= {
762 .release
= ovl_release
,
763 .llseek
= ovl_llseek
,
764 .read_iter
= ovl_read_iter
,
765 .write_iter
= ovl_write_iter
,
768 .fallocate
= ovl_fallocate
,
769 .fadvise
= ovl_fadvise
,
770 .unlocked_ioctl
= ovl_ioctl
,
771 .compat_ioctl
= ovl_compat_ioctl
,
772 .splice_read
= ovl_splice_read
,
773 .splice_write
= ovl_splice_write
,
775 .copy_file_range
= ovl_copy_file_range
,
776 .remap_file_range
= ovl_remap_file_range
,
779 int __init
ovl_aio_request_cache_init(void)
781 ovl_aio_request_cachep
= kmem_cache_create("ovl_aio_req",
782 sizeof(struct ovl_aio_req
),
783 0, SLAB_HWCACHE_ALIGN
, NULL
);
784 if (!ovl_aio_request_cachep
)
790 void ovl_aio_request_cache_destroy(void)
792 kmem_cache_destroy(ovl_aio_request_cachep
);