1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/fanotify.h>
3 #include <linux/fcntl.h>
4 #include <linux/file.h>
6 #include <linux/anon_inodes.h>
7 #include <linux/fsnotify_backend.h>
8 #include <linux/init.h>
9 #include <linux/mount.h>
10 #include <linux/namei.h>
11 #include <linux/poll.h>
12 #include <linux/security.h>
13 #include <linux/syscalls.h>
14 #include <linux/slab.h>
15 #include <linux/types.h>
16 #include <linux/uaccess.h>
17 #include <linux/compat.h>
18 #include <linux/sched/signal.h>
19 #include <linux/memcontrol.h>
20 #include <linux/statfs.h>
21 #include <linux/exportfs.h>
23 #include <asm/ioctls.h>
25 #include "../../mount.h"
26 #include "../fdinfo.h"
29 #define FANOTIFY_DEFAULT_MAX_EVENTS 16384
30 #define FANOTIFY_DEFAULT_MAX_MARKS 8192
31 #define FANOTIFY_DEFAULT_MAX_LISTENERS 128
34 * All flags that may be specified in parameter event_f_flags of fanotify_init.
36 * Internal and external open flags are stored together in field f_flags of
37 * struct file. Only external open flags shall be allowed in event_f_flags.
38 * Internal flags like FMODE_NONOTIFY, FMODE_EXEC, FMODE_NOCMTIME shall be
41 #define FANOTIFY_INIT_ALL_EVENT_F_BITS ( \
42 O_ACCMODE | O_APPEND | O_NONBLOCK | \
43 __O_SYNC | O_DSYNC | O_CLOEXEC | \
44 O_LARGEFILE | O_NOATIME )
46 extern const struct fsnotify_ops fanotify_fsnotify_ops
;
48 struct kmem_cache
*fanotify_mark_cache __read_mostly
;
49 struct kmem_cache
*fanotify_event_cachep __read_mostly
;
50 struct kmem_cache
*fanotify_perm_event_cachep __read_mostly
;
52 #define FANOTIFY_EVENT_ALIGN 4
54 static int fanotify_event_info_len(struct fanotify_event
*event
)
56 if (!fanotify_event_has_fid(event
))
59 return roundup(sizeof(struct fanotify_event_info_fid
) +
60 sizeof(struct file_handle
) + event
->fh_len
,
61 FANOTIFY_EVENT_ALIGN
);
65 * Get an fsnotify notification event if one exists and is small
66 * enough to fit in "count". Return an error pointer if the count
67 * is not large enough. When permission event is dequeued, its state is
68 * updated accordingly.
70 static struct fsnotify_event
*get_one_event(struct fsnotify_group
*group
,
73 size_t event_size
= FAN_EVENT_METADATA_LEN
;
74 struct fsnotify_event
*fsn_event
= NULL
;
76 pr_debug("%s: group=%p count=%zd\n", __func__
, group
, count
);
78 spin_lock(&group
->notification_lock
);
79 if (fsnotify_notify_queue_is_empty(group
))
82 if (FAN_GROUP_FLAG(group
, FAN_REPORT_FID
)) {
83 event_size
+= fanotify_event_info_len(
84 FANOTIFY_E(fsnotify_peek_first_event(group
)));
87 if (event_size
> count
) {
88 fsn_event
= ERR_PTR(-EINVAL
);
91 fsn_event
= fsnotify_remove_first_event(group
);
92 if (fanotify_is_perm_event(FANOTIFY_E(fsn_event
)->mask
))
93 FANOTIFY_PE(fsn_event
)->state
= FAN_EVENT_REPORTED
;
95 spin_unlock(&group
->notification_lock
);
99 static int create_fd(struct fsnotify_group
*group
,
100 struct fanotify_event
*event
,
104 struct file
*new_file
;
106 pr_debug("%s: group=%p event=%p\n", __func__
, group
, event
);
108 client_fd
= get_unused_fd_flags(group
->fanotify_data
.f_flags
);
113 * we need a new file handle for the userspace program so it can read even if it was
114 * originally opened O_WRONLY.
116 /* it's possible this event was an overflow event. in that case dentry and mnt
117 * are NULL; That's fine, just don't call dentry open */
118 if (event
->path
.dentry
&& event
->path
.mnt
)
119 new_file
= dentry_open(&event
->path
,
120 group
->fanotify_data
.f_flags
| FMODE_NONOTIFY
,
123 new_file
= ERR_PTR(-EOVERFLOW
);
124 if (IS_ERR(new_file
)) {
126 * we still send an event even if we can't open the file. this
127 * can happen when say tasks are gone and we try to open their
128 * /proc files or we try to open a WRONLY file like in sysfs
129 * we just send the errno to userspace since there isn't much
132 put_unused_fd(client_fd
);
133 client_fd
= PTR_ERR(new_file
);
142 * Finish processing of permission event by setting it to ANSWERED state and
143 * drop group->notification_lock.
145 static void finish_permission_event(struct fsnotify_group
*group
,
146 struct fanotify_perm_event
*event
,
147 unsigned int response
)
148 __releases(&group
->notification_lock
)
150 bool destroy
= false;
152 assert_spin_locked(&group
->notification_lock
);
153 event
->response
= response
;
154 if (event
->state
== FAN_EVENT_CANCELED
)
157 event
->state
= FAN_EVENT_ANSWERED
;
158 spin_unlock(&group
->notification_lock
);
160 fsnotify_destroy_event(group
, &event
->fae
.fse
);
163 static int process_access_response(struct fsnotify_group
*group
,
164 struct fanotify_response
*response_struct
)
166 struct fanotify_perm_event
*event
;
167 int fd
= response_struct
->fd
;
168 int response
= response_struct
->response
;
170 pr_debug("%s: group=%p fd=%d response=%d\n", __func__
, group
,
173 * make sure the response is valid, if invalid we do nothing and either
174 * userspace can send a valid response or we will clean it up after the
177 switch (response
& ~FAN_AUDIT
) {
188 if ((response
& FAN_AUDIT
) && !FAN_GROUP_FLAG(group
, FAN_ENABLE_AUDIT
))
191 spin_lock(&group
->notification_lock
);
192 list_for_each_entry(event
, &group
->fanotify_data
.access_list
,
197 list_del_init(&event
->fae
.fse
.list
);
198 finish_permission_event(group
, event
, response
);
199 wake_up(&group
->fanotify_data
.access_waitq
);
202 spin_unlock(&group
->notification_lock
);
207 static int copy_fid_to_user(struct fanotify_event
*event
, char __user
*buf
)
209 struct fanotify_event_info_fid info
= { };
210 struct file_handle handle
= { };
211 unsigned char bounce
[FANOTIFY_INLINE_FH_LEN
], *fh
;
212 size_t fh_len
= event
->fh_len
;
213 size_t len
= fanotify_event_info_len(event
);
218 if (WARN_ON_ONCE(len
< sizeof(info
) + sizeof(handle
) + fh_len
))
221 /* Copy event info fid header followed by vaiable sized file handle */
222 info
.hdr
.info_type
= FAN_EVENT_INFO_TYPE_FID
;
224 info
.fsid
= event
->fid
.fsid
;
225 if (copy_to_user(buf
, &info
, sizeof(info
)))
230 handle
.handle_type
= event
->fh_type
;
231 handle
.handle_bytes
= fh_len
;
232 if (copy_to_user(buf
, &handle
, sizeof(handle
)))
235 buf
+= sizeof(handle
);
236 len
-= sizeof(handle
);
238 * For an inline fh, copy through stack to exclude the copy from
239 * usercopy hardening protections.
241 fh
= fanotify_event_fh(event
);
242 if (fh_len
<= FANOTIFY_INLINE_FH_LEN
) {
243 memcpy(bounce
, fh
, fh_len
);
246 if (copy_to_user(buf
, fh
, fh_len
))
252 WARN_ON_ONCE(len
< 0 || len
>= FANOTIFY_EVENT_ALIGN
);
253 if (len
> 0 && clear_user(buf
, len
))
259 static ssize_t
copy_event_to_user(struct fsnotify_group
*group
,
260 struct fsnotify_event
*fsn_event
,
261 char __user
*buf
, size_t count
)
263 struct fanotify_event_metadata metadata
;
264 struct fanotify_event
*event
;
265 struct file
*f
= NULL
;
266 int ret
, fd
= FAN_NOFD
;
268 pr_debug("%s: group=%p event=%p\n", __func__
, group
, fsn_event
);
270 event
= container_of(fsn_event
, struct fanotify_event
, fse
);
271 metadata
.event_len
= FAN_EVENT_METADATA_LEN
;
272 metadata
.metadata_len
= FAN_EVENT_METADATA_LEN
;
273 metadata
.vers
= FANOTIFY_METADATA_VERSION
;
274 metadata
.reserved
= 0;
275 metadata
.mask
= event
->mask
& FANOTIFY_OUTGOING_EVENTS
;
276 metadata
.pid
= pid_vnr(event
->pid
);
278 if (fanotify_event_has_path(event
)) {
279 fd
= create_fd(group
, event
, &f
);
282 } else if (fanotify_event_has_fid(event
)) {
283 metadata
.event_len
+= fanotify_event_info_len(event
);
289 * Sanity check copy size in case get_one_event() and
290 * fill_event_metadata() event_len sizes ever get out of sync.
292 if (WARN_ON_ONCE(metadata
.event_len
> count
))
295 if (copy_to_user(buf
, &metadata
, FAN_EVENT_METADATA_LEN
))
298 if (fanotify_is_perm_event(event
->mask
))
299 FANOTIFY_PE(fsn_event
)->fd
= fd
;
301 if (fanotify_event_has_path(event
)) {
303 } else if (fanotify_event_has_fid(event
)) {
304 ret
= copy_fid_to_user(event
, buf
+ FAN_EVENT_METADATA_LEN
);
309 return metadata
.event_len
;
312 if (fd
!= FAN_NOFD
) {
319 /* intofiy userspace file descriptor functions */
320 static __poll_t
fanotify_poll(struct file
*file
, poll_table
*wait
)
322 struct fsnotify_group
*group
= file
->private_data
;
325 poll_wait(file
, &group
->notification_waitq
, wait
);
326 spin_lock(&group
->notification_lock
);
327 if (!fsnotify_notify_queue_is_empty(group
))
328 ret
= EPOLLIN
| EPOLLRDNORM
;
329 spin_unlock(&group
->notification_lock
);
334 static ssize_t
fanotify_read(struct file
*file
, char __user
*buf
,
335 size_t count
, loff_t
*pos
)
337 struct fsnotify_group
*group
;
338 struct fsnotify_event
*kevent
;
341 DEFINE_WAIT_FUNC(wait
, woken_wake_function
);
344 group
= file
->private_data
;
346 pr_debug("%s: group=%p\n", __func__
, group
);
348 add_wait_queue(&group
->notification_waitq
, &wait
);
350 kevent
= get_one_event(group
, count
);
351 if (IS_ERR(kevent
)) {
352 ret
= PTR_ERR(kevent
);
358 if (file
->f_flags
& O_NONBLOCK
)
362 if (signal_pending(current
))
368 wait_woken(&wait
, TASK_INTERRUPTIBLE
, MAX_SCHEDULE_TIMEOUT
);
372 ret
= copy_event_to_user(group
, kevent
, buf
, count
);
373 if (unlikely(ret
== -EOPENSTALE
)) {
375 * We cannot report events with stale fd so drop it.
376 * Setting ret to 0 will continue the event loop and
377 * do the right thing if there are no more events to
378 * read (i.e. return bytes read, -EAGAIN or wait).
384 * Permission events get queued to wait for response. Other
385 * events can be destroyed now.
387 if (!fanotify_is_perm_event(FANOTIFY_E(kevent
)->mask
)) {
388 fsnotify_destroy_event(group
, kevent
);
391 spin_lock(&group
->notification_lock
);
392 finish_permission_event(group
,
393 FANOTIFY_PE(kevent
), FAN_DENY
);
394 wake_up(&group
->fanotify_data
.access_waitq
);
396 spin_lock(&group
->notification_lock
);
397 list_add_tail(&kevent
->list
,
398 &group
->fanotify_data
.access_list
);
399 spin_unlock(&group
->notification_lock
);
407 remove_wait_queue(&group
->notification_waitq
, &wait
);
409 if (start
!= buf
&& ret
!= -EFAULT
)
414 static ssize_t
fanotify_write(struct file
*file
, const char __user
*buf
, size_t count
, loff_t
*pos
)
416 struct fanotify_response response
= { .fd
= -1, .response
= -1 };
417 struct fsnotify_group
*group
;
420 if (!IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS
))
423 group
= file
->private_data
;
425 if (count
> sizeof(response
))
426 count
= sizeof(response
);
428 pr_debug("%s: group=%p count=%zu\n", __func__
, group
, count
);
430 if (copy_from_user(&response
, buf
, count
))
433 ret
= process_access_response(group
, &response
);
440 static int fanotify_release(struct inode
*ignored
, struct file
*file
)
442 struct fsnotify_group
*group
= file
->private_data
;
443 struct fanotify_perm_event
*event
;
444 struct fsnotify_event
*fsn_event
;
447 * Stop new events from arriving in the notification queue. since
448 * userspace cannot use fanotify fd anymore, no event can enter or
449 * leave access_list by now either.
451 fsnotify_group_stop_queueing(group
);
454 * Process all permission events on access_list and notification queue
455 * and simulate reply from userspace.
457 spin_lock(&group
->notification_lock
);
458 while (!list_empty(&group
->fanotify_data
.access_list
)) {
459 event
= list_first_entry(&group
->fanotify_data
.access_list
,
460 struct fanotify_perm_event
, fae
.fse
.list
);
461 list_del_init(&event
->fae
.fse
.list
);
462 finish_permission_event(group
, event
, FAN_ALLOW
);
463 spin_lock(&group
->notification_lock
);
467 * Destroy all non-permission events. For permission events just
468 * dequeue them and set the response. They will be freed once the
469 * response is consumed and fanotify_get_response() returns.
471 while (!fsnotify_notify_queue_is_empty(group
)) {
472 fsn_event
= fsnotify_remove_first_event(group
);
473 if (!(FANOTIFY_E(fsn_event
)->mask
& FANOTIFY_PERM_EVENTS
)) {
474 spin_unlock(&group
->notification_lock
);
475 fsnotify_destroy_event(group
, fsn_event
);
477 finish_permission_event(group
, FANOTIFY_PE(fsn_event
),
480 spin_lock(&group
->notification_lock
);
482 spin_unlock(&group
->notification_lock
);
484 /* Response for all permission events it set, wakeup waiters */
485 wake_up(&group
->fanotify_data
.access_waitq
);
487 /* matches the fanotify_init->fsnotify_alloc_group */
488 fsnotify_destroy_group(group
);
493 static long fanotify_ioctl(struct file
*file
, unsigned int cmd
, unsigned long arg
)
495 struct fsnotify_group
*group
;
496 struct fsnotify_event
*fsn_event
;
501 group
= file
->private_data
;
503 p
= (void __user
*) arg
;
507 spin_lock(&group
->notification_lock
);
508 list_for_each_entry(fsn_event
, &group
->notification_list
, list
)
509 send_len
+= FAN_EVENT_METADATA_LEN
;
510 spin_unlock(&group
->notification_lock
);
511 ret
= put_user(send_len
, (int __user
*) p
);
518 static const struct file_operations fanotify_fops
= {
519 .show_fdinfo
= fanotify_show_fdinfo
,
520 .poll
= fanotify_poll
,
521 .read
= fanotify_read
,
522 .write
= fanotify_write
,
524 .release
= fanotify_release
,
525 .unlocked_ioctl
= fanotify_ioctl
,
526 .compat_ioctl
= compat_ptr_ioctl
,
527 .llseek
= noop_llseek
,
530 static int fanotify_find_path(int dfd
, const char __user
*filename
,
531 struct path
*path
, unsigned int flags
, __u64 mask
,
532 unsigned int obj_type
)
536 pr_debug("%s: dfd=%d filename=%p flags=%x\n", __func__
,
537 dfd
, filename
, flags
);
539 if (filename
== NULL
) {
540 struct fd f
= fdget(dfd
);
547 if ((flags
& FAN_MARK_ONLYDIR
) &&
548 !(S_ISDIR(file_inode(f
.file
)->i_mode
))) {
553 *path
= f
.file
->f_path
;
557 unsigned int lookup_flags
= 0;
559 if (!(flags
& FAN_MARK_DONT_FOLLOW
))
560 lookup_flags
|= LOOKUP_FOLLOW
;
561 if (flags
& FAN_MARK_ONLYDIR
)
562 lookup_flags
|= LOOKUP_DIRECTORY
;
564 ret
= user_path_at(dfd
, filename
, lookup_flags
, path
);
569 /* you can only watch an inode if you have read permissions on it */
570 ret
= inode_permission(path
->dentry
->d_inode
, MAY_READ
);
576 ret
= security_path_notify(path
, mask
, obj_type
);
584 static __u32
fanotify_mark_remove_from_mask(struct fsnotify_mark
*fsn_mark
,
591 spin_lock(&fsn_mark
->lock
);
592 if (!(flags
& FAN_MARK_IGNORED_MASK
)) {
593 oldmask
= fsn_mark
->mask
;
594 fsn_mark
->mask
&= ~mask
;
596 fsn_mark
->ignored_mask
&= ~mask
;
598 *destroy
= !(fsn_mark
->mask
| fsn_mark
->ignored_mask
);
599 spin_unlock(&fsn_mark
->lock
);
601 return mask
& oldmask
;
604 static int fanotify_remove_mark(struct fsnotify_group
*group
,
605 fsnotify_connp_t
*connp
, __u32 mask
,
608 struct fsnotify_mark
*fsn_mark
= NULL
;
612 mutex_lock(&group
->mark_mutex
);
613 fsn_mark
= fsnotify_find_mark(connp
, group
);
615 mutex_unlock(&group
->mark_mutex
);
619 removed
= fanotify_mark_remove_from_mask(fsn_mark
, mask
, flags
,
621 if (removed
& fsnotify_conn_mask(fsn_mark
->connector
))
622 fsnotify_recalc_mask(fsn_mark
->connector
);
624 fsnotify_detach_mark(fsn_mark
);
625 mutex_unlock(&group
->mark_mutex
);
627 fsnotify_free_mark(fsn_mark
);
629 /* matches the fsnotify_find_mark() */
630 fsnotify_put_mark(fsn_mark
);
634 static int fanotify_remove_vfsmount_mark(struct fsnotify_group
*group
,
635 struct vfsmount
*mnt
, __u32 mask
,
638 return fanotify_remove_mark(group
, &real_mount(mnt
)->mnt_fsnotify_marks
,
642 static int fanotify_remove_sb_mark(struct fsnotify_group
*group
,
643 struct super_block
*sb
, __u32 mask
,
646 return fanotify_remove_mark(group
, &sb
->s_fsnotify_marks
, mask
, flags
);
649 static int fanotify_remove_inode_mark(struct fsnotify_group
*group
,
650 struct inode
*inode
, __u32 mask
,
653 return fanotify_remove_mark(group
, &inode
->i_fsnotify_marks
, mask
,
657 static __u32
fanotify_mark_add_to_mask(struct fsnotify_mark
*fsn_mark
,
663 spin_lock(&fsn_mark
->lock
);
664 if (!(flags
& FAN_MARK_IGNORED_MASK
)) {
665 oldmask
= fsn_mark
->mask
;
666 fsn_mark
->mask
|= mask
;
668 fsn_mark
->ignored_mask
|= mask
;
669 if (flags
& FAN_MARK_IGNORED_SURV_MODIFY
)
670 fsn_mark
->flags
|= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY
;
672 spin_unlock(&fsn_mark
->lock
);
674 return mask
& ~oldmask
;
677 static struct fsnotify_mark
*fanotify_add_new_mark(struct fsnotify_group
*group
,
678 fsnotify_connp_t
*connp
,
680 __kernel_fsid_t
*fsid
)
682 struct fsnotify_mark
*mark
;
685 if (atomic_read(&group
->num_marks
) > group
->fanotify_data
.max_marks
)
686 return ERR_PTR(-ENOSPC
);
688 mark
= kmem_cache_alloc(fanotify_mark_cache
, GFP_KERNEL
);
690 return ERR_PTR(-ENOMEM
);
692 fsnotify_init_mark(mark
, group
);
693 ret
= fsnotify_add_mark_locked(mark
, connp
, type
, 0, fsid
);
695 fsnotify_put_mark(mark
);
703 static int fanotify_add_mark(struct fsnotify_group
*group
,
704 fsnotify_connp_t
*connp
, unsigned int type
,
705 __u32 mask
, unsigned int flags
,
706 __kernel_fsid_t
*fsid
)
708 struct fsnotify_mark
*fsn_mark
;
711 mutex_lock(&group
->mark_mutex
);
712 fsn_mark
= fsnotify_find_mark(connp
, group
);
714 fsn_mark
= fanotify_add_new_mark(group
, connp
, type
, fsid
);
715 if (IS_ERR(fsn_mark
)) {
716 mutex_unlock(&group
->mark_mutex
);
717 return PTR_ERR(fsn_mark
);
720 added
= fanotify_mark_add_to_mask(fsn_mark
, mask
, flags
);
721 if (added
& ~fsnotify_conn_mask(fsn_mark
->connector
))
722 fsnotify_recalc_mask(fsn_mark
->connector
);
723 mutex_unlock(&group
->mark_mutex
);
725 fsnotify_put_mark(fsn_mark
);
729 static int fanotify_add_vfsmount_mark(struct fsnotify_group
*group
,
730 struct vfsmount
*mnt
, __u32 mask
,
731 unsigned int flags
, __kernel_fsid_t
*fsid
)
733 return fanotify_add_mark(group
, &real_mount(mnt
)->mnt_fsnotify_marks
,
734 FSNOTIFY_OBJ_TYPE_VFSMOUNT
, mask
, flags
, fsid
);
737 static int fanotify_add_sb_mark(struct fsnotify_group
*group
,
738 struct super_block
*sb
, __u32 mask
,
739 unsigned int flags
, __kernel_fsid_t
*fsid
)
741 return fanotify_add_mark(group
, &sb
->s_fsnotify_marks
,
742 FSNOTIFY_OBJ_TYPE_SB
, mask
, flags
, fsid
);
745 static int fanotify_add_inode_mark(struct fsnotify_group
*group
,
746 struct inode
*inode
, __u32 mask
,
747 unsigned int flags
, __kernel_fsid_t
*fsid
)
749 pr_debug("%s: group=%p inode=%p\n", __func__
, group
, inode
);
752 * If some other task has this inode open for write we should not add
753 * an ignored mark, unless that ignored mark is supposed to survive
754 * modification changes anyway.
756 if ((flags
& FAN_MARK_IGNORED_MASK
) &&
757 !(flags
& FAN_MARK_IGNORED_SURV_MODIFY
) &&
758 inode_is_open_for_write(inode
))
761 return fanotify_add_mark(group
, &inode
->i_fsnotify_marks
,
762 FSNOTIFY_OBJ_TYPE_INODE
, mask
, flags
, fsid
);
765 /* fanotify syscalls */
766 SYSCALL_DEFINE2(fanotify_init
, unsigned int, flags
, unsigned int, event_f_flags
)
768 struct fsnotify_group
*group
;
770 struct user_struct
*user
;
771 struct fanotify_event
*oevent
;
773 pr_debug("%s: flags=%x event_f_flags=%x\n",
774 __func__
, flags
, event_f_flags
);
776 if (!capable(CAP_SYS_ADMIN
))
779 #ifdef CONFIG_AUDITSYSCALL
780 if (flags
& ~(FANOTIFY_INIT_FLAGS
| FAN_ENABLE_AUDIT
))
782 if (flags
& ~FANOTIFY_INIT_FLAGS
)
786 if (event_f_flags
& ~FANOTIFY_INIT_ALL_EVENT_F_BITS
)
789 switch (event_f_flags
& O_ACCMODE
) {
798 if ((flags
& FAN_REPORT_FID
) &&
799 (flags
& FANOTIFY_CLASS_BITS
) != FAN_CLASS_NOTIF
)
802 user
= get_current_user();
803 if (atomic_read(&user
->fanotify_listeners
) > FANOTIFY_DEFAULT_MAX_LISTENERS
) {
808 f_flags
= O_RDWR
| FMODE_NONOTIFY
;
809 if (flags
& FAN_CLOEXEC
)
810 f_flags
|= O_CLOEXEC
;
811 if (flags
& FAN_NONBLOCK
)
812 f_flags
|= O_NONBLOCK
;
814 /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */
815 group
= fsnotify_alloc_group(&fanotify_fsnotify_ops
);
818 return PTR_ERR(group
);
821 group
->fanotify_data
.user
= user
;
822 group
->fanotify_data
.flags
= flags
;
823 atomic_inc(&user
->fanotify_listeners
);
824 group
->memcg
= get_mem_cgroup_from_mm(current
->mm
);
826 oevent
= fanotify_alloc_event(group
, NULL
, FS_Q_OVERFLOW
, NULL
,
827 FSNOTIFY_EVENT_NONE
, NULL
);
828 if (unlikely(!oevent
)) {
830 goto out_destroy_group
;
832 group
->overflow_event
= &oevent
->fse
;
834 if (force_o_largefile())
835 event_f_flags
|= O_LARGEFILE
;
836 group
->fanotify_data
.f_flags
= event_f_flags
;
837 init_waitqueue_head(&group
->fanotify_data
.access_waitq
);
838 INIT_LIST_HEAD(&group
->fanotify_data
.access_list
);
839 switch (flags
& FANOTIFY_CLASS_BITS
) {
840 case FAN_CLASS_NOTIF
:
841 group
->priority
= FS_PRIO_0
;
843 case FAN_CLASS_CONTENT
:
844 group
->priority
= FS_PRIO_1
;
846 case FAN_CLASS_PRE_CONTENT
:
847 group
->priority
= FS_PRIO_2
;
851 goto out_destroy_group
;
854 if (flags
& FAN_UNLIMITED_QUEUE
) {
856 if (!capable(CAP_SYS_ADMIN
))
857 goto out_destroy_group
;
858 group
->max_events
= UINT_MAX
;
860 group
->max_events
= FANOTIFY_DEFAULT_MAX_EVENTS
;
863 if (flags
& FAN_UNLIMITED_MARKS
) {
865 if (!capable(CAP_SYS_ADMIN
))
866 goto out_destroy_group
;
867 group
->fanotify_data
.max_marks
= UINT_MAX
;
869 group
->fanotify_data
.max_marks
= FANOTIFY_DEFAULT_MAX_MARKS
;
872 if (flags
& FAN_ENABLE_AUDIT
) {
874 if (!capable(CAP_AUDIT_WRITE
))
875 goto out_destroy_group
;
878 fd
= anon_inode_getfd("[fanotify]", &fanotify_fops
, group
, f_flags
);
880 goto out_destroy_group
;
885 fsnotify_destroy_group(group
);
889 /* Check if filesystem can encode a unique fid */
890 static int fanotify_test_fid(struct path
*path
, __kernel_fsid_t
*fsid
)
892 __kernel_fsid_t root_fsid
;
896 * Make sure path is not in filesystem with zero fsid (e.g. tmpfs).
898 err
= vfs_get_fsid(path
->dentry
, fsid
);
902 if (!fsid
->val
[0] && !fsid
->val
[1])
906 * Make sure path is not inside a filesystem subvolume (e.g. btrfs)
907 * which uses a different fsid than sb root.
909 err
= vfs_get_fsid(path
->dentry
->d_sb
->s_root
, &root_fsid
);
913 if (root_fsid
.val
[0] != fsid
->val
[0] ||
914 root_fsid
.val
[1] != fsid
->val
[1])
918 * We need to make sure that the file system supports at least
919 * encoding a file handle so user can use name_to_handle_at() to
920 * compare fid returned with event to the file handle of watched
921 * objects. However, name_to_handle_at() requires that the
922 * filesystem also supports decoding file handles.
924 if (!path
->dentry
->d_sb
->s_export_op
||
925 !path
->dentry
->d_sb
->s_export_op
->fh_to_dentry
)
931 static int fanotify_events_supported(struct path
*path
, __u64 mask
)
934 * Some filesystems such as 'proc' acquire unusual locks when opening
935 * files. For them fanotify permission events have high chances of
936 * deadlocking the system - open done when reporting fanotify event
937 * blocks on this "unusual" lock while another process holding the lock
938 * waits for fanotify permission event to be answered. Just disallow
939 * permission events for such filesystems.
941 if (mask
& FANOTIFY_PERM_EVENTS
&&
942 path
->mnt
->mnt_sb
->s_type
->fs_flags
& FS_DISALLOW_NOTIFY_PERM
)
947 static int do_fanotify_mark(int fanotify_fd
, unsigned int flags
, __u64 mask
,
948 int dfd
, const char __user
*pathname
)
950 struct inode
*inode
= NULL
;
951 struct vfsmount
*mnt
= NULL
;
952 struct fsnotify_group
*group
;
955 __kernel_fsid_t __fsid
, *fsid
= NULL
;
956 u32 valid_mask
= FANOTIFY_EVENTS
| FANOTIFY_EVENT_FLAGS
;
957 unsigned int mark_type
= flags
& FANOTIFY_MARK_TYPE_BITS
;
958 unsigned int obj_type
;
961 pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
962 __func__
, fanotify_fd
, flags
, dfd
, pathname
, mask
);
964 /* we only use the lower 32 bits as of right now. */
965 if (mask
& ((__u64
)0xffffffff << 32))
968 if (flags
& ~FANOTIFY_MARK_FLAGS
)
973 obj_type
= FSNOTIFY_OBJ_TYPE_INODE
;
976 obj_type
= FSNOTIFY_OBJ_TYPE_VFSMOUNT
;
978 case FAN_MARK_FILESYSTEM
:
979 obj_type
= FSNOTIFY_OBJ_TYPE_SB
;
985 switch (flags
& (FAN_MARK_ADD
| FAN_MARK_REMOVE
| FAN_MARK_FLUSH
)) {
986 case FAN_MARK_ADD
: /* fallthrough */
987 case FAN_MARK_REMOVE
:
992 if (flags
& ~(FANOTIFY_MARK_TYPE_BITS
| FAN_MARK_FLUSH
))
999 if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS
))
1000 valid_mask
|= FANOTIFY_PERM_EVENTS
;
1002 if (mask
& ~valid_mask
)
1005 f
= fdget(fanotify_fd
);
1006 if (unlikely(!f
.file
))
1009 /* verify that this is indeed an fanotify instance */
1011 if (unlikely(f
.file
->f_op
!= &fanotify_fops
))
1013 group
= f
.file
->private_data
;
1016 * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not
1017 * allowed to set permissions events.
1020 if (mask
& FANOTIFY_PERM_EVENTS
&&
1021 group
->priority
== FS_PRIO_0
)
1025 * Events with data type inode do not carry enough information to report
1026 * event->fd, so we do not allow setting a mask for inode events unless
1027 * group supports reporting fid.
1028 * inode events are not supported on a mount mark, because they do not
1029 * carry enough information (i.e. path) to be filtered by mount point.
1031 if (mask
& FANOTIFY_INODE_EVENTS
&&
1032 (!FAN_GROUP_FLAG(group
, FAN_REPORT_FID
) ||
1033 mark_type
== FAN_MARK_MOUNT
))
1036 if (flags
& FAN_MARK_FLUSH
) {
1038 if (mark_type
== FAN_MARK_MOUNT
)
1039 fsnotify_clear_vfsmount_marks_by_group(group
);
1040 else if (mark_type
== FAN_MARK_FILESYSTEM
)
1041 fsnotify_clear_sb_marks_by_group(group
);
1043 fsnotify_clear_inode_marks_by_group(group
);
1047 ret
= fanotify_find_path(dfd
, pathname
, &path
, flags
,
1048 (mask
& ALL_FSNOTIFY_EVENTS
), obj_type
);
1052 if (flags
& FAN_MARK_ADD
) {
1053 ret
= fanotify_events_supported(&path
, mask
);
1055 goto path_put_and_out
;
1058 if (FAN_GROUP_FLAG(group
, FAN_REPORT_FID
)) {
1059 ret
= fanotify_test_fid(&path
, &__fsid
);
1061 goto path_put_and_out
;
1066 /* inode held in place by reference to path; group by fget on fd */
1067 if (mark_type
== FAN_MARK_INODE
)
1068 inode
= path
.dentry
->d_inode
;
1072 /* create/update an inode mark */
1073 switch (flags
& (FAN_MARK_ADD
| FAN_MARK_REMOVE
)) {
1075 if (mark_type
== FAN_MARK_MOUNT
)
1076 ret
= fanotify_add_vfsmount_mark(group
, mnt
, mask
,
1078 else if (mark_type
== FAN_MARK_FILESYSTEM
)
1079 ret
= fanotify_add_sb_mark(group
, mnt
->mnt_sb
, mask
,
1082 ret
= fanotify_add_inode_mark(group
, inode
, mask
,
1085 case FAN_MARK_REMOVE
:
1086 if (mark_type
== FAN_MARK_MOUNT
)
1087 ret
= fanotify_remove_vfsmount_mark(group
, mnt
, mask
,
1089 else if (mark_type
== FAN_MARK_FILESYSTEM
)
1090 ret
= fanotify_remove_sb_mark(group
, mnt
->mnt_sb
, mask
,
1093 ret
= fanotify_remove_inode_mark(group
, inode
, mask
,
1107 SYSCALL_DEFINE5(fanotify_mark
, int, fanotify_fd
, unsigned int, flags
,
1108 __u64
, mask
, int, dfd
,
1109 const char __user
*, pathname
)
1111 return do_fanotify_mark(fanotify_fd
, flags
, mask
, dfd
, pathname
);
1114 #ifdef CONFIG_COMPAT
1115 COMPAT_SYSCALL_DEFINE6(fanotify_mark
,
1116 int, fanotify_fd
, unsigned int, flags
,
1117 __u32
, mask0
, __u32
, mask1
, int, dfd
,
1118 const char __user
*, pathname
)
1120 return do_fanotify_mark(fanotify_fd
, flags
,
1122 ((__u64
)mask0
<< 32) | mask1
,
1124 ((__u64
)mask1
<< 32) | mask0
,
1131 * fanotify_user_setup - Our initialization function. Note that we cannot return
1132 * error because we have compiled-in VFS hooks. So an (unlikely) failure here
1133 * must result in panic().
1135 static int __init
fanotify_user_setup(void)
1137 BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS
) != 8);
1138 BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS
) != 9);
1140 fanotify_mark_cache
= KMEM_CACHE(fsnotify_mark
,
1141 SLAB_PANIC
|SLAB_ACCOUNT
);
1142 fanotify_event_cachep
= KMEM_CACHE(fanotify_event
, SLAB_PANIC
);
1143 if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS
)) {
1144 fanotify_perm_event_cachep
=
1145 KMEM_CACHE(fanotify_perm_event
, SLAB_PANIC
);
1150 device_initcall(fanotify_user_setup
);