1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
6 #include <linux/dcache.h>
9 #include <linux/init.h>
10 #include <linux/module.h>
11 #include <linux/mount.h>
12 #include <linux/srcu.h>
14 #include <linux/fsnotify_backend.h>
18 * Clear all of the marks on an inode when it is being evicted from core
20 void __fsnotify_inode_delete(struct inode
*inode
)
22 fsnotify_clear_marks_by_inode(inode
);
24 EXPORT_SYMBOL_GPL(__fsnotify_inode_delete
);
26 void __fsnotify_vfsmount_delete(struct vfsmount
*mnt
)
28 fsnotify_clear_marks_by_mount(mnt
);
32 * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
33 * @sb: superblock being unmounted.
35 * Called during unmount with no locks held, so needs to be safe against
36 * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
38 static void fsnotify_unmount_inodes(struct super_block
*sb
)
40 struct inode
*inode
, *iput_inode
= NULL
;
42 spin_lock(&sb
->s_inode_list_lock
);
43 list_for_each_entry(inode
, &sb
->s_inodes
, i_sb_list
) {
45 * We cannot __iget() an inode in state I_FREEING,
46 * I_WILL_FREE, or I_NEW which is fine because by that point
47 * the inode cannot have any associated watches.
49 spin_lock(&inode
->i_lock
);
50 if (inode
->i_state
& (I_FREEING
|I_WILL_FREE
|I_NEW
)) {
51 spin_unlock(&inode
->i_lock
);
56 * If i_count is zero, the inode cannot have any watches and
57 * doing an __iget/iput with SB_ACTIVE clear would actually
58 * evict all inodes with zero i_count from icache which is
59 * unnecessarily violent and may in fact be illegal to do.
60 * However, we should have been called /after/ evict_inodes
61 * removed all zero refcount inodes, in any case. Test to
64 if (!atomic_read(&inode
->i_count
)) {
65 spin_unlock(&inode
->i_lock
);
70 spin_unlock(&inode
->i_lock
);
71 spin_unlock(&sb
->s_inode_list_lock
);
75 /* for each watch, send FS_UNMOUNT and then remove it */
76 fsnotify_inode(inode
, FS_UNMOUNT
);
78 fsnotify_inode_delete(inode
);
83 spin_lock(&sb
->s_inode_list_lock
);
85 spin_unlock(&sb
->s_inode_list_lock
);
90 void fsnotify_sb_delete(struct super_block
*sb
)
92 struct fsnotify_sb_info
*sbinfo
= fsnotify_sb_info(sb
);
94 /* Were any marks ever added to any object on this sb? */
98 fsnotify_unmount_inodes(sb
);
99 fsnotify_clear_marks_by_sb(sb
);
100 /* Wait for outstanding object references from connectors */
101 wait_var_event(fsnotify_sb_watched_objects(sb
),
102 !atomic_long_read(fsnotify_sb_watched_objects(sb
)));
103 WARN_ON(fsnotify_sb_has_priority_watchers(sb
, FSNOTIFY_PRIO_CONTENT
));
104 WARN_ON(fsnotify_sb_has_priority_watchers(sb
,
105 FSNOTIFY_PRIO_PRE_CONTENT
));
108 void fsnotify_sb_free(struct super_block
*sb
)
110 kfree(sb
->s_fsnotify_info
);
114 * Given an inode, first check if we care what happens to our children. Inotify
115 * and dnotify both tell their parents about events. If we care about any event
116 * on a child we run all of our children and set a dentry flag saying that the
117 * parent cares. Thus when an event happens on a child it can quickly tell
118 * if there is a need to find a parent and send the event to the parent.
120 void fsnotify_set_children_dentry_flags(struct inode
*inode
)
122 struct dentry
*alias
;
124 if (!S_ISDIR(inode
->i_mode
))
127 spin_lock(&inode
->i_lock
);
128 /* run all of the dentries associated with this inode. Since this is a
129 * directory, there damn well better only be one item on this list */
130 hlist_for_each_entry(alias
, &inode
->i_dentry
, d_u
.d_alias
) {
131 struct dentry
*child
;
133 /* run all of the children of the original inode and fix their
134 * d_flags to indicate parental interest (their parent is the
136 spin_lock(&alias
->d_lock
);
137 hlist_for_each_entry(child
, &alias
->d_children
, d_sib
) {
141 spin_lock_nested(&child
->d_lock
, DENTRY_D_LOCK_NESTED
);
142 child
->d_flags
|= DCACHE_FSNOTIFY_PARENT_WATCHED
;
143 spin_unlock(&child
->d_lock
);
145 spin_unlock(&alias
->d_lock
);
147 spin_unlock(&inode
->i_lock
);
151 * Lazily clear false positive PARENT_WATCHED flag for child whose parent had
152 * stopped watching children.
154 static void fsnotify_clear_child_dentry_flag(struct inode
*pinode
,
155 struct dentry
*dentry
)
157 spin_lock(&dentry
->d_lock
);
159 * d_lock is a sufficient barrier to prevent observing a non-watched
160 * parent state from before the fsnotify_set_children_dentry_flags()
161 * or fsnotify_update_flags() call that had set PARENT_WATCHED.
163 if (!fsnotify_inode_watches_children(pinode
))
164 dentry
->d_flags
&= ~DCACHE_FSNOTIFY_PARENT_WATCHED
;
165 spin_unlock(&dentry
->d_lock
);
168 /* Are inode/sb/mount interested in parent and name info with this event? */
169 static bool fsnotify_event_needs_parent(struct inode
*inode
, __u32 mnt_mask
,
172 __u32 marks_mask
= 0;
174 /* We only send parent/name to inode/sb/mount for events on non-dir */
179 * All events that are possible on child can also may be reported with
180 * parent/name info to inode/sb/mount. Otherwise, a watching parent
181 * could result in events reported with unexpected name info to sb/mount.
183 BUILD_BUG_ON(FS_EVENTS_POSS_ON_CHILD
& ~FS_EVENTS_POSS_TO_PARENT
);
185 /* Did either inode/sb/mount subscribe for events with parent/name? */
186 marks_mask
|= fsnotify_parent_needed_mask(inode
->i_fsnotify_mask
);
187 marks_mask
|= fsnotify_parent_needed_mask(inode
->i_sb
->s_fsnotify_mask
);
188 marks_mask
|= fsnotify_parent_needed_mask(mnt_mask
);
190 /* Did they subscribe for this event with parent/name info? */
191 return mask
& marks_mask
;
194 /* Are there any inode/mount/sb objects that are interested in this event? */
195 static inline bool fsnotify_object_watched(struct inode
*inode
, __u32 mnt_mask
,
198 __u32 marks_mask
= inode
->i_fsnotify_mask
| mnt_mask
|
199 inode
->i_sb
->s_fsnotify_mask
;
201 return mask
& marks_mask
& ALL_FSNOTIFY_EVENTS
;
205 * Notify this dentry's parent about a child's events with child name info
206 * if parent is watching or if inode/sb/mount are interested in events with
207 * parent and name info.
209 * Notify only the child without name info if parent is not watching and
210 * inode/sb/mount are not interested in events with parent and name info.
212 int __fsnotify_parent(struct dentry
*dentry
, __u32 mask
, const void *data
,
215 const struct path
*path
= fsnotify_data_path(data
, data_type
);
216 __u32 mnt_mask
= path
? real_mount(path
->mnt
)->mnt_fsnotify_mask
: 0;
217 struct inode
*inode
= d_inode(dentry
);
218 struct dentry
*parent
;
219 bool parent_watched
= dentry
->d_flags
& DCACHE_FSNOTIFY_PARENT_WATCHED
;
220 bool parent_needed
, parent_interested
;
222 struct inode
*p_inode
= NULL
;
223 struct name_snapshot name
;
224 struct qstr
*file_name
= NULL
;
227 /* Optimize the likely case of nobody watching this path */
228 if (likely(!parent_watched
&&
229 !fsnotify_object_watched(inode
, mnt_mask
, mask
)))
233 parent_needed
= fsnotify_event_needs_parent(inode
, mnt_mask
, mask
);
234 if (!parent_watched
&& !parent_needed
)
237 /* Does parent inode care about events on children? */
238 parent
= dget_parent(dentry
);
239 p_inode
= parent
->d_inode
;
240 p_mask
= fsnotify_inode_watches_children(p_inode
);
241 if (unlikely(parent_watched
&& !p_mask
))
242 fsnotify_clear_child_dentry_flag(p_inode
, dentry
);
245 * Include parent/name in notification either if some notification
246 * groups require parent info or the parent is interested in this event.
248 parent_interested
= mask
& p_mask
& ALL_FSNOTIFY_EVENTS
;
249 if (parent_needed
|| parent_interested
) {
250 /* When notifying parent, child should be passed as data */
251 WARN_ON_ONCE(inode
!= fsnotify_data_inode(data
, data_type
));
253 /* Notify both parent and child with child name info */
254 take_dentry_name_snapshot(&name
, dentry
);
255 file_name
= &name
.name
;
256 if (parent_interested
)
257 mask
|= FS_EVENT_ON_CHILD
;
261 ret
= fsnotify(mask
, data
, data_type
, p_inode
, file_name
, inode
, 0);
264 release_dentry_name_snapshot(&name
);
269 EXPORT_SYMBOL_GPL(__fsnotify_parent
);
271 static int fsnotify_handle_inode_event(struct fsnotify_group
*group
,
272 struct fsnotify_mark
*inode_mark
,
273 u32 mask
, const void *data
, int data_type
,
274 struct inode
*dir
, const struct qstr
*name
,
277 const struct path
*path
= fsnotify_data_path(data
, data_type
);
278 struct inode
*inode
= fsnotify_data_inode(data
, data_type
);
279 const struct fsnotify_ops
*ops
= group
->ops
;
281 if (WARN_ON_ONCE(!ops
->handle_inode_event
))
284 if (WARN_ON_ONCE(!inode
&& !dir
))
287 if ((inode_mark
->flags
& FSNOTIFY_MARK_FLAG_EXCL_UNLINK
) &&
288 path
&& d_unlinked(path
->dentry
))
291 /* Check interest of this mark in case event was sent with two marks */
292 if (!(mask
& inode_mark
->mask
& ALL_FSNOTIFY_EVENTS
))
295 return ops
->handle_inode_event(inode_mark
, mask
, inode
, dir
, name
, cookie
);
298 static int fsnotify_handle_event(struct fsnotify_group
*group
, __u32 mask
,
299 const void *data
, int data_type
,
300 struct inode
*dir
, const struct qstr
*name
,
301 u32 cookie
, struct fsnotify_iter_info
*iter_info
)
303 struct fsnotify_mark
*inode_mark
= fsnotify_iter_inode_mark(iter_info
);
304 struct fsnotify_mark
*parent_mark
= fsnotify_iter_parent_mark(iter_info
);
307 if (WARN_ON_ONCE(fsnotify_iter_sb_mark(iter_info
)) ||
308 WARN_ON_ONCE(fsnotify_iter_vfsmount_mark(iter_info
)))
312 * For FS_RENAME, 'dir' is old dir and 'data' is new dentry.
313 * The only ->handle_inode_event() backend that supports FS_RENAME is
314 * dnotify, where it means file was renamed within same parent.
316 if (mask
& FS_RENAME
) {
317 struct dentry
*moved
= fsnotify_data_dentry(data
, data_type
);
319 if (dir
!= moved
->d_parent
->d_inode
)
324 ret
= fsnotify_handle_inode_event(group
, parent_mark
, mask
,
325 data
, data_type
, dir
, name
, 0);
333 if (mask
& FS_EVENT_ON_CHILD
) {
335 * Some events can be sent on both parent dir and child marks
336 * (e.g. FS_ATTRIB). If both parent dir and child are
337 * watching, report the event once to parent dir with name (if
338 * interested) and once to child without name (if interested).
339 * The child watcher is expecting an event without a file name
340 * and without the FS_EVENT_ON_CHILD flag.
342 mask
&= ~FS_EVENT_ON_CHILD
;
347 return fsnotify_handle_inode_event(group
, inode_mark
, mask
, data
, data_type
,
351 static int send_to_group(__u32 mask
, const void *data
, int data_type
,
352 struct inode
*dir
, const struct qstr
*file_name
,
353 u32 cookie
, struct fsnotify_iter_info
*iter_info
)
355 struct fsnotify_group
*group
= NULL
;
356 __u32 test_mask
= (mask
& ALL_FSNOTIFY_EVENTS
);
357 __u32 marks_mask
= 0;
358 __u32 marks_ignore_mask
= 0;
359 bool is_dir
= mask
& FS_ISDIR
;
360 struct fsnotify_mark
*mark
;
363 if (!iter_info
->report_mask
)
366 /* clear ignored on inode modification */
367 if (mask
& FS_MODIFY
) {
368 fsnotify_foreach_iter_mark_type(iter_info
, mark
, type
) {
370 FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY
))
371 mark
->ignore_mask
= 0;
375 /* Are any of the group marks interested in this event? */
376 fsnotify_foreach_iter_mark_type(iter_info
, mark
, type
) {
378 marks_mask
|= mark
->mask
;
380 fsnotify_effective_ignore_mask(mark
, is_dir
, type
);
383 pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignore_mask=%x data=%p data_type=%d dir=%p cookie=%d\n",
384 __func__
, group
, mask
, marks_mask
, marks_ignore_mask
,
385 data
, data_type
, dir
, cookie
);
387 if (!(test_mask
& marks_mask
& ~marks_ignore_mask
))
390 if (group
->ops
->handle_event
) {
391 return group
->ops
->handle_event(group
, mask
, data
, data_type
, dir
,
392 file_name
, cookie
, iter_info
);
395 return fsnotify_handle_event(group
, mask
, data
, data_type
, dir
,
396 file_name
, cookie
, iter_info
);
399 static struct fsnotify_mark
*fsnotify_first_mark(struct fsnotify_mark_connector
**connp
)
401 struct fsnotify_mark_connector
*conn
;
402 struct hlist_node
*node
= NULL
;
404 conn
= srcu_dereference(*connp
, &fsnotify_mark_srcu
);
406 node
= srcu_dereference(conn
->list
.first
, &fsnotify_mark_srcu
);
408 return hlist_entry_safe(node
, struct fsnotify_mark
, obj_list
);
411 static struct fsnotify_mark
*fsnotify_next_mark(struct fsnotify_mark
*mark
)
413 struct hlist_node
*node
= NULL
;
416 node
= srcu_dereference(mark
->obj_list
.next
,
417 &fsnotify_mark_srcu
);
419 return hlist_entry_safe(node
, struct fsnotify_mark
, obj_list
);
423 * iter_info is a multi head priority queue of marks.
424 * Pick a subset of marks from queue heads, all with the same group
425 * and set the report_mask to a subset of the selected marks.
426 * Returns false if there are no more groups to iterate.
428 static bool fsnotify_iter_select_report_types(
429 struct fsnotify_iter_info
*iter_info
)
431 struct fsnotify_group
*max_prio_group
= NULL
;
432 struct fsnotify_mark
*mark
;
435 /* Choose max prio group among groups of all queue heads */
436 fsnotify_foreach_iter_type(type
) {
437 mark
= iter_info
->marks
[type
];
439 fsnotify_compare_groups(max_prio_group
, mark
->group
) > 0)
440 max_prio_group
= mark
->group
;
446 /* Set the report mask for marks from same group as max prio group */
447 iter_info
->current_group
= max_prio_group
;
448 iter_info
->report_mask
= 0;
449 fsnotify_foreach_iter_type(type
) {
450 mark
= iter_info
->marks
[type
];
451 if (mark
&& mark
->group
== iter_info
->current_group
) {
453 * FSNOTIFY_ITER_TYPE_PARENT indicates that this inode
454 * is watching children and interested in this event,
455 * which is an event possible on child.
456 * But is *this mark* watching children?
458 if (type
== FSNOTIFY_ITER_TYPE_PARENT
&&
459 !(mark
->mask
& FS_EVENT_ON_CHILD
) &&
460 !(fsnotify_ignore_mask(mark
) & FS_EVENT_ON_CHILD
))
463 fsnotify_iter_set_report_type(iter_info
, type
);
471 * Pop from iter_info multi head queue, the marks that belong to the group of
472 * current iteration step.
474 static void fsnotify_iter_next(struct fsnotify_iter_info
*iter_info
)
476 struct fsnotify_mark
*mark
;
480 * We cannot use fsnotify_foreach_iter_mark_type() here because we
481 * may need to advance a mark of type X that belongs to current_group
482 * but was not selected for reporting.
484 fsnotify_foreach_iter_type(type
) {
485 mark
= iter_info
->marks
[type
];
486 if (mark
&& mark
->group
== iter_info
->current_group
)
487 iter_info
->marks
[type
] =
488 fsnotify_next_mark(iter_info
->marks
[type
]);
493 * fsnotify - This is the main call to fsnotify.
495 * The VFS calls into hook specific functions in linux/fsnotify.h.
496 * Those functions then in turn call here. Here will call out to all of the
497 * registered fsnotify_group. Those groups can then use the notification event
498 * in whatever means they feel necessary.
500 * @mask: event type and flags
501 * @data: object that event happened on
502 * @data_type: type of object for fanotify_data_XXX() accessors
503 * @dir: optional directory associated with event -
504 * if @file_name is not NULL, this is the directory that
505 * @file_name is relative to
506 * @file_name: optional file name associated with event
507 * @inode: optional inode associated with event -
508 * If @dir and @inode are both non-NULL, event may be
510 * @cookie: inotify rename cookie
512 int fsnotify(__u32 mask
, const void *data
, int data_type
, struct inode
*dir
,
513 const struct qstr
*file_name
, struct inode
*inode
, u32 cookie
)
515 const struct path
*path
= fsnotify_data_path(data
, data_type
);
516 struct super_block
*sb
= fsnotify_data_sb(data
, data_type
);
517 struct fsnotify_sb_info
*sbinfo
= fsnotify_sb_info(sb
);
518 struct fsnotify_iter_info iter_info
= {};
519 struct mount
*mnt
= NULL
;
520 struct inode
*inode2
= NULL
;
521 struct dentry
*moved
;
524 __u32 test_mask
, marks_mask
;
527 mnt
= real_mount(path
->mnt
);
530 /* Dirent event - report on TYPE_INODE to dir */
532 /* For FS_RENAME, inode is old_dir and inode2 is new_dir */
533 if (mask
& FS_RENAME
) {
534 moved
= fsnotify_data_dentry(data
, data_type
);
535 inode2
= moved
->d_parent
->d_inode
;
536 inode2_type
= FSNOTIFY_ITER_TYPE_INODE2
;
538 } else if (mask
& FS_EVENT_ON_CHILD
) {
540 * Event on child - report on TYPE_PARENT to dir if it is
541 * watching children and on TYPE_INODE to child.
544 inode2_type
= FSNOTIFY_ITER_TYPE_PARENT
;
548 * Optimization: srcu_read_lock() has a memory barrier which can
549 * be expensive. It protects walking the *_fsnotify_marks lists.
550 * However, if we do not walk the lists, we do not have to do
551 * SRCU because we have no references to any objects and do not
552 * need SRCU to keep them "alive".
554 if ((!sbinfo
|| !sbinfo
->sb_marks
) &&
555 (!mnt
|| !mnt
->mnt_fsnotify_marks
) &&
556 (!inode
|| !inode
->i_fsnotify_marks
) &&
557 (!inode2
|| !inode2
->i_fsnotify_marks
))
560 marks_mask
= sb
->s_fsnotify_mask
;
562 marks_mask
|= mnt
->mnt_fsnotify_mask
;
564 marks_mask
|= inode
->i_fsnotify_mask
;
566 marks_mask
|= inode2
->i_fsnotify_mask
;
570 * If this is a modify event we may need to clear some ignore masks.
571 * In that case, the object with ignore masks will have the FS_MODIFY
573 * Otherwise, return if none of the marks care about this type of event.
575 test_mask
= (mask
& ALL_FSNOTIFY_EVENTS
);
576 if (!(test_mask
& marks_mask
))
579 iter_info
.srcu_idx
= srcu_read_lock(&fsnotify_mark_srcu
);
582 iter_info
.marks
[FSNOTIFY_ITER_TYPE_SB
] =
583 fsnotify_first_mark(&sbinfo
->sb_marks
);
586 iter_info
.marks
[FSNOTIFY_ITER_TYPE_VFSMOUNT
] =
587 fsnotify_first_mark(&mnt
->mnt_fsnotify_marks
);
590 iter_info
.marks
[FSNOTIFY_ITER_TYPE_INODE
] =
591 fsnotify_first_mark(&inode
->i_fsnotify_marks
);
594 iter_info
.marks
[inode2_type
] =
595 fsnotify_first_mark(&inode2
->i_fsnotify_marks
);
599 * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark
600 * ignore masks are properly reflected for mount/sb mark notifications.
601 * That's why this traversal is so complicated...
603 while (fsnotify_iter_select_report_types(&iter_info
)) {
604 ret
= send_to_group(mask
, data
, data_type
, dir
, file_name
,
607 if (ret
&& (mask
& ALL_FSNOTIFY_PERM_EVENTS
))
610 fsnotify_iter_next(&iter_info
);
614 srcu_read_unlock(&fsnotify_mark_srcu
, iter_info
.srcu_idx
);
618 EXPORT_SYMBOL_GPL(fsnotify
);
620 static __init
int fsnotify_init(void)
624 BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS
) != 23);
626 ret
= init_srcu_struct(&fsnotify_mark_srcu
);
628 panic("initializing fsnotify_mark_srcu");
630 fsnotify_mark_connector_cachep
= KMEM_CACHE(fsnotify_mark_connector
,
635 core_initcall(fsnotify_init
);