sched: tune multi-core idle balancing
[wrt350n-kernel.git] / fs / inotify.c
blob690e72595e6e6048addba645cf179cb418208f50
1 /*
2 * fs/inotify.c - inode-based file event notifications
4 * Authors:
5 * John McCutchan <ttb@tentacle.dhs.org>
6 * Robert Love <rml@novell.com>
8 * Kernel API added by: Amy Griffis <amy.griffis@hp.com>
10 * Copyright (C) 2005 John McCutchan
11 * Copyright 2006 Hewlett-Packard Development Company, L.P.
13 * This program is free software; you can redistribute it and/or modify it
14 * under the terms of the GNU General Public License as published by the
15 * Free Software Foundation; either version 2, or (at your option) any
16 * later version.
18 * This program is distributed in the hope that it will be useful, but
19 * WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * General Public License for more details.
24 #include <linux/module.h>
25 #include <linux/kernel.h>
26 #include <linux/spinlock.h>
27 #include <linux/idr.h>
28 #include <linux/slab.h>
29 #include <linux/fs.h>
30 #include <linux/sched.h>
31 #include <linux/init.h>
32 #include <linux/list.h>
33 #include <linux/writeback.h>
34 #include <linux/inotify.h>
36 static atomic_t inotify_cookie;
39 * Lock ordering:
41 * dentry->d_lock (used to keep d_move() away from dentry->d_parent)
42 * iprune_mutex (synchronize shrink_icache_memory())
43 * inode_lock (protects the super_block->s_inodes list)
44 * inode->inotify_mutex (protects inode->inotify_watches and watches->i_list)
45 * inotify_handle->mutex (protects inotify_handle and watches->h_list)
47 * The inode->inotify_mutex and inotify_handle->mutex and held during execution
48 * of a caller's event handler. Thus, the caller must not hold any locks
49 * taken in their event handler while calling any of the published inotify
50 * interfaces.
54 * Lifetimes of the three main data structures--inotify_handle, inode, and
55 * inotify_watch--are managed by reference count.
57 * inotify_handle: Lifetime is from inotify_init() to inotify_destroy().
58 * Additional references can bump the count via get_inotify_handle() and drop
59 * the count via put_inotify_handle().
61 * inotify_watch: for inotify's purposes, lifetime is from inotify_add_watch()
62 * to remove_watch_no_event(). Additional references can bump the count via
63 * get_inotify_watch() and drop the count via put_inotify_watch(). The caller
64 * is reponsible for the final put after receiving IN_IGNORED, or when using
65 * IN_ONESHOT after receiving the first event. Inotify does the final put if
66 * inotify_destroy() is called.
68 * inode: Pinned so long as the inode is associated with a watch, from
69 * inotify_add_watch() to the final put_inotify_watch().
73 * struct inotify_handle - represents an inotify instance
75 * This structure is protected by the mutex 'mutex'.
77 struct inotify_handle {
78 struct idr idr; /* idr mapping wd -> watch */
79 struct mutex mutex; /* protects this bad boy */
80 struct list_head watches; /* list of watches */
81 atomic_t count; /* reference count */
82 u32 last_wd; /* the last wd allocated */
83 const struct inotify_operations *in_ops; /* inotify caller operations */
86 static inline void get_inotify_handle(struct inotify_handle *ih)
88 atomic_inc(&ih->count);
91 static inline void put_inotify_handle(struct inotify_handle *ih)
93 if (atomic_dec_and_test(&ih->count)) {
94 idr_destroy(&ih->idr);
95 kfree(ih);
99 /**
100 * get_inotify_watch - grab a reference to an inotify_watch
101 * @watch: watch to grab
103 void get_inotify_watch(struct inotify_watch *watch)
105 atomic_inc(&watch->count);
107 EXPORT_SYMBOL_GPL(get_inotify_watch);
110 * put_inotify_watch - decrements the ref count on a given watch. cleans up
111 * watch references if the count reaches zero. inotify_watch is freed by
112 * inotify callers via the destroy_watch() op.
113 * @watch: watch to release
115 void put_inotify_watch(struct inotify_watch *watch)
117 if (atomic_dec_and_test(&watch->count)) {
118 struct inotify_handle *ih = watch->ih;
120 iput(watch->inode);
121 ih->in_ops->destroy_watch(watch);
122 put_inotify_handle(ih);
125 EXPORT_SYMBOL_GPL(put_inotify_watch);
128 * inotify_handle_get_wd - returns the next WD for use by the given handle
130 * Callers must hold ih->mutex. This function can sleep.
132 static int inotify_handle_get_wd(struct inotify_handle *ih,
133 struct inotify_watch *watch)
135 int ret;
137 do {
138 if (unlikely(!idr_pre_get(&ih->idr, GFP_KERNEL)))
139 return -ENOSPC;
140 ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd);
141 } while (ret == -EAGAIN);
143 if (likely(!ret))
144 ih->last_wd = watch->wd;
146 return ret;
150 * inotify_inode_watched - returns nonzero if there are watches on this inode
151 * and zero otherwise. We call this lockless, we do not care if we race.
153 static inline int inotify_inode_watched(struct inode *inode)
155 return !list_empty(&inode->inotify_watches);
159 * Get child dentry flag into synch with parent inode.
160 * Flag should always be clear for negative dentrys.
162 static void set_dentry_child_flags(struct inode *inode, int watched)
164 struct dentry *alias;
166 spin_lock(&dcache_lock);
167 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
168 struct dentry *child;
170 list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
171 if (!child->d_inode)
172 continue;
174 spin_lock(&child->d_lock);
175 if (watched)
176 child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
177 else
178 child->d_flags &=~DCACHE_INOTIFY_PARENT_WATCHED;
179 spin_unlock(&child->d_lock);
182 spin_unlock(&dcache_lock);
186 * inotify_find_handle - find the watch associated with the given inode and
187 * handle
189 * Callers must hold inode->inotify_mutex.
191 static struct inotify_watch *inode_find_handle(struct inode *inode,
192 struct inotify_handle *ih)
194 struct inotify_watch *watch;
196 list_for_each_entry(watch, &inode->inotify_watches, i_list) {
197 if (watch->ih == ih)
198 return watch;
201 return NULL;
205 * remove_watch_no_event - remove watch without the IN_IGNORED event.
207 * Callers must hold both inode->inotify_mutex and ih->mutex.
209 static void remove_watch_no_event(struct inotify_watch *watch,
210 struct inotify_handle *ih)
212 list_del(&watch->i_list);
213 list_del(&watch->h_list);
215 if (!inotify_inode_watched(watch->inode))
216 set_dentry_child_flags(watch->inode, 0);
218 idr_remove(&ih->idr, watch->wd);
222 * inotify_remove_watch_locked - Remove a watch from both the handle and the
223 * inode. Sends the IN_IGNORED event signifying that the inode is no longer
224 * watched. May be invoked from a caller's event handler.
225 * @ih: inotify handle associated with watch
226 * @watch: watch to remove
228 * Callers must hold both inode->inotify_mutex and ih->mutex.
230 void inotify_remove_watch_locked(struct inotify_handle *ih,
231 struct inotify_watch *watch)
233 remove_watch_no_event(watch, ih);
234 ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL, NULL);
236 EXPORT_SYMBOL_GPL(inotify_remove_watch_locked);
238 /* Kernel API for producing events */
241 * inotify_d_instantiate - instantiate dcache entry for inode
243 void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
245 struct dentry *parent;
247 if (!inode)
248 return;
250 spin_lock(&entry->d_lock);
251 parent = entry->d_parent;
252 if (parent->d_inode && inotify_inode_watched(parent->d_inode))
253 entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
254 spin_unlock(&entry->d_lock);
258 * inotify_d_move - dcache entry has been moved
260 void inotify_d_move(struct dentry *entry)
262 struct dentry *parent;
264 parent = entry->d_parent;
265 if (inotify_inode_watched(parent->d_inode))
266 entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
267 else
268 entry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
272 * inotify_inode_queue_event - queue an event to all watches on this inode
273 * @inode: inode event is originating from
274 * @mask: event mask describing this event
275 * @cookie: cookie for synchronization, or zero
276 * @name: filename, if any
277 * @n_inode: inode associated with name
279 void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
280 const char *name, struct inode *n_inode)
282 struct inotify_watch *watch, *next;
284 if (!inotify_inode_watched(inode))
285 return;
287 mutex_lock(&inode->inotify_mutex);
288 list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
289 u32 watch_mask = watch->mask;
290 if (watch_mask & mask) {
291 struct inotify_handle *ih= watch->ih;
292 mutex_lock(&ih->mutex);
293 if (watch_mask & IN_ONESHOT)
294 remove_watch_no_event(watch, ih);
295 ih->in_ops->handle_event(watch, watch->wd, mask, cookie,
296 name, n_inode);
297 mutex_unlock(&ih->mutex);
300 mutex_unlock(&inode->inotify_mutex);
302 EXPORT_SYMBOL_GPL(inotify_inode_queue_event);
305 * inotify_dentry_parent_queue_event - queue an event to a dentry's parent
306 * @dentry: the dentry in question, we queue against this dentry's parent
307 * @mask: event mask describing this event
308 * @cookie: cookie for synchronization, or zero
309 * @name: filename, if any
311 void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
312 u32 cookie, const char *name)
314 struct dentry *parent;
315 struct inode *inode;
317 if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED))
318 return;
320 spin_lock(&dentry->d_lock);
321 parent = dentry->d_parent;
322 inode = parent->d_inode;
324 if (inotify_inode_watched(inode)) {
325 dget(parent);
326 spin_unlock(&dentry->d_lock);
327 inotify_inode_queue_event(inode, mask, cookie, name,
328 dentry->d_inode);
329 dput(parent);
330 } else
331 spin_unlock(&dentry->d_lock);
333 EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event);
336 * inotify_get_cookie - return a unique cookie for use in synchronizing events.
338 u32 inotify_get_cookie(void)
340 return atomic_inc_return(&inotify_cookie);
342 EXPORT_SYMBOL_GPL(inotify_get_cookie);
345 * inotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
346 * @list: list of inodes being unmounted (sb->s_inodes)
348 * Called with inode_lock held, protecting the unmounting super block's list
349 * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
350 * We temporarily drop inode_lock, however, and CAN block.
352 void inotify_unmount_inodes(struct list_head *list)
354 struct inode *inode, *next_i, *need_iput = NULL;
356 list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
357 struct inotify_watch *watch, *next_w;
358 struct inode *need_iput_tmp;
359 struct list_head *watches;
362 * If i_count is zero, the inode cannot have any watches and
363 * doing an __iget/iput with MS_ACTIVE clear would actually
364 * evict all inodes with zero i_count from icache which is
365 * unnecessarily violent and may in fact be illegal to do.
367 if (!atomic_read(&inode->i_count))
368 continue;
371 * We cannot __iget() an inode in state I_CLEAR, I_FREEING, or
372 * I_WILL_FREE which is fine because by that point the inode
373 * cannot have any associated watches.
375 if (inode->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))
376 continue;
378 need_iput_tmp = need_iput;
379 need_iput = NULL;
380 /* In case inotify_remove_watch_locked() drops a reference. */
381 if (inode != need_iput_tmp)
382 __iget(inode);
383 else
384 need_iput_tmp = NULL;
385 /* In case the dropping of a reference would nuke next_i. */
386 if ((&next_i->i_sb_list != list) &&
387 atomic_read(&next_i->i_count) &&
388 !(next_i->i_state & (I_CLEAR | I_FREEING |
389 I_WILL_FREE))) {
390 __iget(next_i);
391 need_iput = next_i;
395 * We can safely drop inode_lock here because we hold
396 * references on both inode and next_i. Also no new inodes
397 * will be added since the umount has begun. Finally,
398 * iprune_mutex keeps shrink_icache_memory() away.
400 spin_unlock(&inode_lock);
402 if (need_iput_tmp)
403 iput(need_iput_tmp);
405 /* for each watch, send IN_UNMOUNT and then remove it */
406 mutex_lock(&inode->inotify_mutex);
407 watches = &inode->inotify_watches;
408 list_for_each_entry_safe(watch, next_w, watches, i_list) {
409 struct inotify_handle *ih= watch->ih;
410 mutex_lock(&ih->mutex);
411 ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
412 NULL, NULL);
413 inotify_remove_watch_locked(ih, watch);
414 mutex_unlock(&ih->mutex);
416 mutex_unlock(&inode->inotify_mutex);
417 iput(inode);
419 spin_lock(&inode_lock);
422 EXPORT_SYMBOL_GPL(inotify_unmount_inodes);
425 * inotify_inode_is_dead - an inode has been deleted, cleanup any watches
426 * @inode: inode that is about to be removed
428 void inotify_inode_is_dead(struct inode *inode)
430 struct inotify_watch *watch, *next;
432 mutex_lock(&inode->inotify_mutex);
433 list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
434 struct inotify_handle *ih = watch->ih;
435 mutex_lock(&ih->mutex);
436 inotify_remove_watch_locked(ih, watch);
437 mutex_unlock(&ih->mutex);
439 mutex_unlock(&inode->inotify_mutex);
441 EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
443 /* Kernel Consumer API */
446 * inotify_init - allocate and initialize an inotify instance
447 * @ops: caller's inotify operations
449 struct inotify_handle *inotify_init(const struct inotify_operations *ops)
451 struct inotify_handle *ih;
453 ih = kmalloc(sizeof(struct inotify_handle), GFP_KERNEL);
454 if (unlikely(!ih))
455 return ERR_PTR(-ENOMEM);
457 idr_init(&ih->idr);
458 INIT_LIST_HEAD(&ih->watches);
459 mutex_init(&ih->mutex);
460 ih->last_wd = 0;
461 ih->in_ops = ops;
462 atomic_set(&ih->count, 0);
463 get_inotify_handle(ih);
465 return ih;
467 EXPORT_SYMBOL_GPL(inotify_init);
470 * inotify_init_watch - initialize an inotify watch
471 * @watch: watch to initialize
473 void inotify_init_watch(struct inotify_watch *watch)
475 INIT_LIST_HEAD(&watch->h_list);
476 INIT_LIST_HEAD(&watch->i_list);
477 atomic_set(&watch->count, 0);
478 get_inotify_watch(watch); /* initial get */
480 EXPORT_SYMBOL_GPL(inotify_init_watch);
483 * inotify_destroy - clean up and destroy an inotify instance
484 * @ih: inotify handle
486 void inotify_destroy(struct inotify_handle *ih)
489 * Destroy all of the watches for this handle. Unfortunately, not very
490 * pretty. We cannot do a simple iteration over the list, because we
491 * do not know the inode until we iterate to the watch. But we need to
492 * hold inode->inotify_mutex before ih->mutex. The following works.
494 while (1) {
495 struct inotify_watch *watch;
496 struct list_head *watches;
497 struct inode *inode;
499 mutex_lock(&ih->mutex);
500 watches = &ih->watches;
501 if (list_empty(watches)) {
502 mutex_unlock(&ih->mutex);
503 break;
505 watch = list_first_entry(watches, struct inotify_watch, h_list);
506 get_inotify_watch(watch);
507 mutex_unlock(&ih->mutex);
509 inode = watch->inode;
510 mutex_lock(&inode->inotify_mutex);
511 mutex_lock(&ih->mutex);
513 /* make sure we didn't race with another list removal */
514 if (likely(idr_find(&ih->idr, watch->wd))) {
515 remove_watch_no_event(watch, ih);
516 put_inotify_watch(watch);
519 mutex_unlock(&ih->mutex);
520 mutex_unlock(&inode->inotify_mutex);
521 put_inotify_watch(watch);
524 /* free this handle: the put matching the get in inotify_init() */
525 put_inotify_handle(ih);
527 EXPORT_SYMBOL_GPL(inotify_destroy);
530 * inotify_find_watch - find an existing watch for an (ih,inode) pair
531 * @ih: inotify handle
532 * @inode: inode to watch
533 * @watchp: pointer to existing inotify_watch
535 * Caller must pin given inode (via nameidata).
537 s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode,
538 struct inotify_watch **watchp)
540 struct inotify_watch *old;
541 int ret = -ENOENT;
543 mutex_lock(&inode->inotify_mutex);
544 mutex_lock(&ih->mutex);
546 old = inode_find_handle(inode, ih);
547 if (unlikely(old)) {
548 get_inotify_watch(old); /* caller must put watch */
549 *watchp = old;
550 ret = old->wd;
553 mutex_unlock(&ih->mutex);
554 mutex_unlock(&inode->inotify_mutex);
556 return ret;
558 EXPORT_SYMBOL_GPL(inotify_find_watch);
561 * inotify_find_update_watch - find and update the mask of an existing watch
562 * @ih: inotify handle
563 * @inode: inode's watch to update
564 * @mask: mask of events to watch
566 * Caller must pin given inode (via nameidata).
568 s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode,
569 u32 mask)
571 struct inotify_watch *old;
572 int mask_add = 0;
573 int ret;
575 if (mask & IN_MASK_ADD)
576 mask_add = 1;
578 /* don't allow invalid bits: we don't want flags set */
579 mask &= IN_ALL_EVENTS | IN_ONESHOT;
580 if (unlikely(!mask))
581 return -EINVAL;
583 mutex_lock(&inode->inotify_mutex);
584 mutex_lock(&ih->mutex);
587 * Handle the case of re-adding a watch on an (inode,ih) pair that we
588 * are already watching. We just update the mask and return its wd.
590 old = inode_find_handle(inode, ih);
591 if (unlikely(!old)) {
592 ret = -ENOENT;
593 goto out;
596 if (mask_add)
597 old->mask |= mask;
598 else
599 old->mask = mask;
600 ret = old->wd;
601 out:
602 mutex_unlock(&ih->mutex);
603 mutex_unlock(&inode->inotify_mutex);
604 return ret;
606 EXPORT_SYMBOL_GPL(inotify_find_update_watch);
609 * inotify_add_watch - add a watch to an inotify instance
610 * @ih: inotify handle
611 * @watch: caller allocated watch structure
612 * @inode: inode to watch
613 * @mask: mask of events to watch
615 * Caller must pin given inode (via nameidata).
616 * Caller must ensure it only calls inotify_add_watch() once per watch.
617 * Calls inotify_handle_get_wd() so may sleep.
619 s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
620 struct inode *inode, u32 mask)
622 int ret = 0;
623 int newly_watched;
625 /* don't allow invalid bits: we don't want flags set */
626 mask &= IN_ALL_EVENTS | IN_ONESHOT;
627 if (unlikely(!mask))
628 return -EINVAL;
629 watch->mask = mask;
631 mutex_lock(&inode->inotify_mutex);
632 mutex_lock(&ih->mutex);
634 /* Initialize a new watch */
635 ret = inotify_handle_get_wd(ih, watch);
636 if (unlikely(ret))
637 goto out;
638 ret = watch->wd;
640 /* save a reference to handle and bump the count to make it official */
641 get_inotify_handle(ih);
642 watch->ih = ih;
645 * Save a reference to the inode and bump the ref count to make it
646 * official. We hold a reference to nameidata, which makes this safe.
648 watch->inode = igrab(inode);
650 /* Add the watch to the handle's and the inode's list */
651 newly_watched = !inotify_inode_watched(inode);
652 list_add(&watch->h_list, &ih->watches);
653 list_add(&watch->i_list, &inode->inotify_watches);
655 * Set child flags _after_ adding the watch, so there is no race
656 * windows where newly instantiated children could miss their parent's
657 * watched flag.
659 if (newly_watched)
660 set_dentry_child_flags(inode, 1);
662 out:
663 mutex_unlock(&ih->mutex);
664 mutex_unlock(&inode->inotify_mutex);
665 return ret;
667 EXPORT_SYMBOL_GPL(inotify_add_watch);
670 * inotify_clone_watch - put the watch next to existing one
671 * @old: already installed watch
672 * @new: new watch
674 * Caller must hold the inotify_mutex of inode we are dealing with;
675 * it is expected to remove the old watch before unlocking the inode.
677 s32 inotify_clone_watch(struct inotify_watch *old, struct inotify_watch *new)
679 struct inotify_handle *ih = old->ih;
680 int ret = 0;
682 new->mask = old->mask;
683 new->ih = ih;
685 mutex_lock(&ih->mutex);
687 /* Initialize a new watch */
688 ret = inotify_handle_get_wd(ih, new);
689 if (unlikely(ret))
690 goto out;
691 ret = new->wd;
693 get_inotify_handle(ih);
695 new->inode = igrab(old->inode);
697 list_add(&new->h_list, &ih->watches);
698 list_add(&new->i_list, &old->inode->inotify_watches);
699 out:
700 mutex_unlock(&ih->mutex);
701 return ret;
704 void inotify_evict_watch(struct inotify_watch *watch)
706 get_inotify_watch(watch);
707 mutex_lock(&watch->ih->mutex);
708 inotify_remove_watch_locked(watch->ih, watch);
709 mutex_unlock(&watch->ih->mutex);
713 * inotify_rm_wd - remove a watch from an inotify instance
714 * @ih: inotify handle
715 * @wd: watch descriptor to remove
717 * Can sleep.
719 int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
721 struct inotify_watch *watch;
722 struct inode *inode;
724 mutex_lock(&ih->mutex);
725 watch = idr_find(&ih->idr, wd);
726 if (unlikely(!watch)) {
727 mutex_unlock(&ih->mutex);
728 return -EINVAL;
730 get_inotify_watch(watch);
731 inode = watch->inode;
732 mutex_unlock(&ih->mutex);
734 mutex_lock(&inode->inotify_mutex);
735 mutex_lock(&ih->mutex);
737 /* make sure that we did not race */
738 if (likely(idr_find(&ih->idr, wd) == watch))
739 inotify_remove_watch_locked(ih, watch);
741 mutex_unlock(&ih->mutex);
742 mutex_unlock(&inode->inotify_mutex);
743 put_inotify_watch(watch);
745 return 0;
747 EXPORT_SYMBOL_GPL(inotify_rm_wd);
750 * inotify_rm_watch - remove a watch from an inotify instance
751 * @ih: inotify handle
752 * @watch: watch to remove
754 * Can sleep.
756 int inotify_rm_watch(struct inotify_handle *ih,
757 struct inotify_watch *watch)
759 return inotify_rm_wd(ih, watch->wd);
761 EXPORT_SYMBOL_GPL(inotify_rm_watch);
764 * inotify_setup - core initialization function
766 static int __init inotify_setup(void)
768 atomic_set(&inotify_cookie, 0);
770 return 0;
773 module_init(inotify_setup);