[NET_SCHED]: Remove old estimator implementation
[hh.org.git] / fs / inotify.c
blob723836a1f71896a5406b7ca6e066dc42ca5a6286
1 /*
2 * fs/inotify.c - inode-based file event notifications
4 * Authors:
5 * John McCutchan <ttb@tentacle.dhs.org>
6 * Robert Love <rml@novell.com>
8 * Kernel API added by: Amy Griffis <amy.griffis@hp.com>
10 * Copyright (C) 2005 John McCutchan
11 * Copyright 2006 Hewlett-Packard Development Company, L.P.
13 * This program is free software; you can redistribute it and/or modify it
14 * under the terms of the GNU General Public License as published by the
15 * Free Software Foundation; either version 2, or (at your option) any
16 * later version.
18 * This program is distributed in the hope that it will be useful, but
19 * WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * General Public License for more details.
24 #include <linux/module.h>
25 #include <linux/kernel.h>
26 #include <linux/spinlock.h>
27 #include <linux/idr.h>
28 #include <linux/slab.h>
29 #include <linux/fs.h>
30 #include <linux/init.h>
31 #include <linux/list.h>
32 #include <linux/writeback.h>
33 #include <linux/inotify.h>
35 static atomic_t inotify_cookie;
38 * Lock ordering:
40 * dentry->d_lock (used to keep d_move() away from dentry->d_parent)
41 * iprune_mutex (synchronize shrink_icache_memory())
42 * inode_lock (protects the super_block->s_inodes list)
43 * inode->inotify_mutex (protects inode->inotify_watches and watches->i_list)
44 * inotify_handle->mutex (protects inotify_handle and watches->h_list)
46 * The inode->inotify_mutex and inotify_handle->mutex and held during execution
47 * of a caller's event handler. Thus, the caller must not hold any locks
48 * taken in their event handler while calling any of the published inotify
49 * interfaces.
53 * Lifetimes of the three main data structures--inotify_handle, inode, and
54 * inotify_watch--are managed by reference count.
56 * inotify_handle: Lifetime is from inotify_init() to inotify_destroy().
57 * Additional references can bump the count via get_inotify_handle() and drop
58 * the count via put_inotify_handle().
60 * inotify_watch: for inotify's purposes, lifetime is from inotify_add_watch()
61 * to remove_watch_no_event(). Additional references can bump the count via
62 * get_inotify_watch() and drop the count via put_inotify_watch(). The caller
63 * is reponsible for the final put after receiving IN_IGNORED, or when using
64 * IN_ONESHOT after receiving the first event. Inotify does the final put if
65 * inotify_destroy() is called.
67 * inode: Pinned so long as the inode is associated with a watch, from
68 * inotify_add_watch() to the final put_inotify_watch().
72 * struct inotify_handle - represents an inotify instance
74 * This structure is protected by the mutex 'mutex'.
76 struct inotify_handle {
77 struct idr idr; /* idr mapping wd -> watch */
78 struct mutex mutex; /* protects this bad boy */
79 struct list_head watches; /* list of watches */
80 atomic_t count; /* reference count */
81 u32 last_wd; /* the last wd allocated */
82 const struct inotify_operations *in_ops; /* inotify caller operations */
85 static inline void get_inotify_handle(struct inotify_handle *ih)
87 atomic_inc(&ih->count);
90 static inline void put_inotify_handle(struct inotify_handle *ih)
92 if (atomic_dec_and_test(&ih->count)) {
93 idr_destroy(&ih->idr);
94 kfree(ih);
98 /**
99 * get_inotify_watch - grab a reference to an inotify_watch
100 * @watch: watch to grab
102 void get_inotify_watch(struct inotify_watch *watch)
104 atomic_inc(&watch->count);
106 EXPORT_SYMBOL_GPL(get_inotify_watch);
109 * put_inotify_watch - decrements the ref count on a given watch. cleans up
110 * watch references if the count reaches zero. inotify_watch is freed by
111 * inotify callers via the destroy_watch() op.
112 * @watch: watch to release
114 void put_inotify_watch(struct inotify_watch *watch)
116 if (atomic_dec_and_test(&watch->count)) {
117 struct inotify_handle *ih = watch->ih;
119 iput(watch->inode);
120 ih->in_ops->destroy_watch(watch);
121 put_inotify_handle(ih);
124 EXPORT_SYMBOL_GPL(put_inotify_watch);
127 * inotify_handle_get_wd - returns the next WD for use by the given handle
129 * Callers must hold ih->mutex. This function can sleep.
131 static int inotify_handle_get_wd(struct inotify_handle *ih,
132 struct inotify_watch *watch)
134 int ret;
136 do {
137 if (unlikely(!idr_pre_get(&ih->idr, GFP_KERNEL)))
138 return -ENOSPC;
139 ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd);
140 } while (ret == -EAGAIN);
142 if (likely(!ret))
143 ih->last_wd = watch->wd;
145 return ret;
149 * inotify_inode_watched - returns nonzero if there are watches on this inode
150 * and zero otherwise. We call this lockless, we do not care if we race.
152 static inline int inotify_inode_watched(struct inode *inode)
154 return !list_empty(&inode->inotify_watches);
158 * Get child dentry flag into synch with parent inode.
159 * Flag should always be clear for negative dentrys.
161 static void set_dentry_child_flags(struct inode *inode, int watched)
163 struct dentry *alias;
165 spin_lock(&dcache_lock);
166 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
167 struct dentry *child;
169 list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
170 if (!child->d_inode) {
171 WARN_ON(child->d_flags & DCACHE_INOTIFY_PARENT_WATCHED);
172 continue;
174 spin_lock(&child->d_lock);
175 if (watched) {
176 WARN_ON(child->d_flags &
177 DCACHE_INOTIFY_PARENT_WATCHED);
178 child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
179 } else {
180 WARN_ON(!(child->d_flags &
181 DCACHE_INOTIFY_PARENT_WATCHED));
182 child->d_flags&=~DCACHE_INOTIFY_PARENT_WATCHED;
184 spin_unlock(&child->d_lock);
187 spin_unlock(&dcache_lock);
191 * inotify_find_handle - find the watch associated with the given inode and
192 * handle
194 * Callers must hold inode->inotify_mutex.
196 static struct inotify_watch *inode_find_handle(struct inode *inode,
197 struct inotify_handle *ih)
199 struct inotify_watch *watch;
201 list_for_each_entry(watch, &inode->inotify_watches, i_list) {
202 if (watch->ih == ih)
203 return watch;
206 return NULL;
210 * remove_watch_no_event - remove watch without the IN_IGNORED event.
212 * Callers must hold both inode->inotify_mutex and ih->mutex.
214 static void remove_watch_no_event(struct inotify_watch *watch,
215 struct inotify_handle *ih)
217 list_del(&watch->i_list);
218 list_del(&watch->h_list);
220 if (!inotify_inode_watched(watch->inode))
221 set_dentry_child_flags(watch->inode, 0);
223 idr_remove(&ih->idr, watch->wd);
227 * inotify_remove_watch_locked - Remove a watch from both the handle and the
228 * inode. Sends the IN_IGNORED event signifying that the inode is no longer
229 * watched. May be invoked from a caller's event handler.
230 * @ih: inotify handle associated with watch
231 * @watch: watch to remove
233 * Callers must hold both inode->inotify_mutex and ih->mutex.
235 void inotify_remove_watch_locked(struct inotify_handle *ih,
236 struct inotify_watch *watch)
238 remove_watch_no_event(watch, ih);
239 ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL, NULL);
241 EXPORT_SYMBOL_GPL(inotify_remove_watch_locked);
243 /* Kernel API for producing events */
246 * inotify_d_instantiate - instantiate dcache entry for inode
248 void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
250 struct dentry *parent;
252 if (!inode)
253 return;
255 WARN_ON(entry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED);
256 spin_lock(&entry->d_lock);
257 parent = entry->d_parent;
258 if (parent->d_inode && inotify_inode_watched(parent->d_inode))
259 entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
260 spin_unlock(&entry->d_lock);
264 * inotify_d_move - dcache entry has been moved
266 void inotify_d_move(struct dentry *entry)
268 struct dentry *parent;
270 parent = entry->d_parent;
271 if (inotify_inode_watched(parent->d_inode))
272 entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
273 else
274 entry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
278 * inotify_inode_queue_event - queue an event to all watches on this inode
279 * @inode: inode event is originating from
280 * @mask: event mask describing this event
281 * @cookie: cookie for synchronization, or zero
282 * @name: filename, if any
283 * @n_inode: inode associated with name
285 void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
286 const char *name, struct inode *n_inode)
288 struct inotify_watch *watch, *next;
290 if (!inotify_inode_watched(inode))
291 return;
293 mutex_lock(&inode->inotify_mutex);
294 list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
295 u32 watch_mask = watch->mask;
296 if (watch_mask & mask) {
297 struct inotify_handle *ih= watch->ih;
298 mutex_lock(&ih->mutex);
299 if (watch_mask & IN_ONESHOT)
300 remove_watch_no_event(watch, ih);
301 ih->in_ops->handle_event(watch, watch->wd, mask, cookie,
302 name, n_inode);
303 mutex_unlock(&ih->mutex);
306 mutex_unlock(&inode->inotify_mutex);
308 EXPORT_SYMBOL_GPL(inotify_inode_queue_event);
311 * inotify_dentry_parent_queue_event - queue an event to a dentry's parent
312 * @dentry: the dentry in question, we queue against this dentry's parent
313 * @mask: event mask describing this event
314 * @cookie: cookie for synchronization, or zero
315 * @name: filename, if any
317 void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
318 u32 cookie, const char *name)
320 struct dentry *parent;
321 struct inode *inode;
323 if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED))
324 return;
326 spin_lock(&dentry->d_lock);
327 parent = dentry->d_parent;
328 inode = parent->d_inode;
330 if (inotify_inode_watched(inode)) {
331 dget(parent);
332 spin_unlock(&dentry->d_lock);
333 inotify_inode_queue_event(inode, mask, cookie, name,
334 dentry->d_inode);
335 dput(parent);
336 } else
337 spin_unlock(&dentry->d_lock);
339 EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event);
342 * inotify_get_cookie - return a unique cookie for use in synchronizing events.
344 u32 inotify_get_cookie(void)
346 return atomic_inc_return(&inotify_cookie);
348 EXPORT_SYMBOL_GPL(inotify_get_cookie);
351 * inotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
352 * @list: list of inodes being unmounted (sb->s_inodes)
354 * Called with inode_lock held, protecting the unmounting super block's list
355 * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
356 * We temporarily drop inode_lock, however, and CAN block.
358 void inotify_unmount_inodes(struct list_head *list)
360 struct inode *inode, *next_i, *need_iput = NULL;
362 list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
363 struct inotify_watch *watch, *next_w;
364 struct inode *need_iput_tmp;
365 struct list_head *watches;
368 * If i_count is zero, the inode cannot have any watches and
369 * doing an __iget/iput with MS_ACTIVE clear would actually
370 * evict all inodes with zero i_count from icache which is
371 * unnecessarily violent and may in fact be illegal to do.
373 if (!atomic_read(&inode->i_count))
374 continue;
377 * We cannot __iget() an inode in state I_CLEAR, I_FREEING, or
378 * I_WILL_FREE which is fine because by that point the inode
379 * cannot have any associated watches.
381 if (inode->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))
382 continue;
384 need_iput_tmp = need_iput;
385 need_iput = NULL;
386 /* In case inotify_remove_watch_locked() drops a reference. */
387 if (inode != need_iput_tmp)
388 __iget(inode);
389 else
390 need_iput_tmp = NULL;
391 /* In case the dropping of a reference would nuke next_i. */
392 if ((&next_i->i_sb_list != list) &&
393 atomic_read(&next_i->i_count) &&
394 !(next_i->i_state & (I_CLEAR | I_FREEING |
395 I_WILL_FREE))) {
396 __iget(next_i);
397 need_iput = next_i;
401 * We can safely drop inode_lock here because we hold
402 * references on both inode and next_i. Also no new inodes
403 * will be added since the umount has begun. Finally,
404 * iprune_mutex keeps shrink_icache_memory() away.
406 spin_unlock(&inode_lock);
408 if (need_iput_tmp)
409 iput(need_iput_tmp);
411 /* for each watch, send IN_UNMOUNT and then remove it */
412 mutex_lock(&inode->inotify_mutex);
413 watches = &inode->inotify_watches;
414 list_for_each_entry_safe(watch, next_w, watches, i_list) {
415 struct inotify_handle *ih= watch->ih;
416 mutex_lock(&ih->mutex);
417 ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
418 NULL, NULL);
419 inotify_remove_watch_locked(ih, watch);
420 mutex_unlock(&ih->mutex);
422 mutex_unlock(&inode->inotify_mutex);
423 iput(inode);
425 spin_lock(&inode_lock);
428 EXPORT_SYMBOL_GPL(inotify_unmount_inodes);
431 * inotify_inode_is_dead - an inode has been deleted, cleanup any watches
432 * @inode: inode that is about to be removed
434 void inotify_inode_is_dead(struct inode *inode)
436 struct inotify_watch *watch, *next;
438 mutex_lock(&inode->inotify_mutex);
439 list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
440 struct inotify_handle *ih = watch->ih;
441 mutex_lock(&ih->mutex);
442 inotify_remove_watch_locked(ih, watch);
443 mutex_unlock(&ih->mutex);
445 mutex_unlock(&inode->inotify_mutex);
447 EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
449 /* Kernel Consumer API */
452 * inotify_init - allocate and initialize an inotify instance
453 * @ops: caller's inotify operations
455 struct inotify_handle *inotify_init(const struct inotify_operations *ops)
457 struct inotify_handle *ih;
459 ih = kmalloc(sizeof(struct inotify_handle), GFP_KERNEL);
460 if (unlikely(!ih))
461 return ERR_PTR(-ENOMEM);
463 idr_init(&ih->idr);
464 INIT_LIST_HEAD(&ih->watches);
465 mutex_init(&ih->mutex);
466 ih->last_wd = 0;
467 ih->in_ops = ops;
468 atomic_set(&ih->count, 0);
469 get_inotify_handle(ih);
471 return ih;
473 EXPORT_SYMBOL_GPL(inotify_init);
476 * inotify_init_watch - initialize an inotify watch
477 * @watch: watch to initialize
479 void inotify_init_watch(struct inotify_watch *watch)
481 INIT_LIST_HEAD(&watch->h_list);
482 INIT_LIST_HEAD(&watch->i_list);
483 atomic_set(&watch->count, 0);
484 get_inotify_watch(watch); /* initial get */
486 EXPORT_SYMBOL_GPL(inotify_init_watch);
489 * inotify_destroy - clean up and destroy an inotify instance
490 * @ih: inotify handle
492 void inotify_destroy(struct inotify_handle *ih)
495 * Destroy all of the watches for this handle. Unfortunately, not very
496 * pretty. We cannot do a simple iteration over the list, because we
497 * do not know the inode until we iterate to the watch. But we need to
498 * hold inode->inotify_mutex before ih->mutex. The following works.
500 while (1) {
501 struct inotify_watch *watch;
502 struct list_head *watches;
503 struct inode *inode;
505 mutex_lock(&ih->mutex);
506 watches = &ih->watches;
507 if (list_empty(watches)) {
508 mutex_unlock(&ih->mutex);
509 break;
511 watch = list_entry(watches->next, struct inotify_watch, h_list);
512 get_inotify_watch(watch);
513 mutex_unlock(&ih->mutex);
515 inode = watch->inode;
516 mutex_lock(&inode->inotify_mutex);
517 mutex_lock(&ih->mutex);
519 /* make sure we didn't race with another list removal */
520 if (likely(idr_find(&ih->idr, watch->wd))) {
521 remove_watch_no_event(watch, ih);
522 put_inotify_watch(watch);
525 mutex_unlock(&ih->mutex);
526 mutex_unlock(&inode->inotify_mutex);
527 put_inotify_watch(watch);
530 /* free this handle: the put matching the get in inotify_init() */
531 put_inotify_handle(ih);
533 EXPORT_SYMBOL_GPL(inotify_destroy);
536 * inotify_find_watch - find an existing watch for an (ih,inode) pair
537 * @ih: inotify handle
538 * @inode: inode to watch
539 * @watchp: pointer to existing inotify_watch
541 * Caller must pin given inode (via nameidata).
543 s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode,
544 struct inotify_watch **watchp)
546 struct inotify_watch *old;
547 int ret = -ENOENT;
549 mutex_lock(&inode->inotify_mutex);
550 mutex_lock(&ih->mutex);
552 old = inode_find_handle(inode, ih);
553 if (unlikely(old)) {
554 get_inotify_watch(old); /* caller must put watch */
555 *watchp = old;
556 ret = old->wd;
559 mutex_unlock(&ih->mutex);
560 mutex_unlock(&inode->inotify_mutex);
562 return ret;
564 EXPORT_SYMBOL_GPL(inotify_find_watch);
567 * inotify_find_update_watch - find and update the mask of an existing watch
568 * @ih: inotify handle
569 * @inode: inode's watch to update
570 * @mask: mask of events to watch
572 * Caller must pin given inode (via nameidata).
574 s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode,
575 u32 mask)
577 struct inotify_watch *old;
578 int mask_add = 0;
579 int ret;
581 if (mask & IN_MASK_ADD)
582 mask_add = 1;
584 /* don't allow invalid bits: we don't want flags set */
585 mask &= IN_ALL_EVENTS | IN_ONESHOT;
586 if (unlikely(!mask))
587 return -EINVAL;
589 mutex_lock(&inode->inotify_mutex);
590 mutex_lock(&ih->mutex);
593 * Handle the case of re-adding a watch on an (inode,ih) pair that we
594 * are already watching. We just update the mask and return its wd.
596 old = inode_find_handle(inode, ih);
597 if (unlikely(!old)) {
598 ret = -ENOENT;
599 goto out;
602 if (mask_add)
603 old->mask |= mask;
604 else
605 old->mask = mask;
606 ret = old->wd;
607 out:
608 mutex_unlock(&ih->mutex);
609 mutex_unlock(&inode->inotify_mutex);
610 return ret;
612 EXPORT_SYMBOL_GPL(inotify_find_update_watch);
615 * inotify_add_watch - add a watch to an inotify instance
616 * @ih: inotify handle
617 * @watch: caller allocated watch structure
618 * @inode: inode to watch
619 * @mask: mask of events to watch
621 * Caller must pin given inode (via nameidata).
622 * Caller must ensure it only calls inotify_add_watch() once per watch.
623 * Calls inotify_handle_get_wd() so may sleep.
625 s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
626 struct inode *inode, u32 mask)
628 int ret = 0;
630 /* don't allow invalid bits: we don't want flags set */
631 mask &= IN_ALL_EVENTS | IN_ONESHOT;
632 if (unlikely(!mask))
633 return -EINVAL;
634 watch->mask = mask;
636 mutex_lock(&inode->inotify_mutex);
637 mutex_lock(&ih->mutex);
639 /* Initialize a new watch */
640 ret = inotify_handle_get_wd(ih, watch);
641 if (unlikely(ret))
642 goto out;
643 ret = watch->wd;
645 /* save a reference to handle and bump the count to make it official */
646 get_inotify_handle(ih);
647 watch->ih = ih;
650 * Save a reference to the inode and bump the ref count to make it
651 * official. We hold a reference to nameidata, which makes this safe.
653 watch->inode = igrab(inode);
655 if (!inotify_inode_watched(inode))
656 set_dentry_child_flags(inode, 1);
658 /* Add the watch to the handle's and the inode's list */
659 list_add(&watch->h_list, &ih->watches);
660 list_add(&watch->i_list, &inode->inotify_watches);
661 out:
662 mutex_unlock(&ih->mutex);
663 mutex_unlock(&inode->inotify_mutex);
664 return ret;
666 EXPORT_SYMBOL_GPL(inotify_add_watch);
669 * inotify_rm_wd - remove a watch from an inotify instance
670 * @ih: inotify handle
671 * @wd: watch descriptor to remove
673 * Can sleep.
675 int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
677 struct inotify_watch *watch;
678 struct inode *inode;
680 mutex_lock(&ih->mutex);
681 watch = idr_find(&ih->idr, wd);
682 if (unlikely(!watch)) {
683 mutex_unlock(&ih->mutex);
684 return -EINVAL;
686 get_inotify_watch(watch);
687 inode = watch->inode;
688 mutex_unlock(&ih->mutex);
690 mutex_lock(&inode->inotify_mutex);
691 mutex_lock(&ih->mutex);
693 /* make sure that we did not race */
694 if (likely(idr_find(&ih->idr, wd) == watch))
695 inotify_remove_watch_locked(ih, watch);
697 mutex_unlock(&ih->mutex);
698 mutex_unlock(&inode->inotify_mutex);
699 put_inotify_watch(watch);
701 return 0;
703 EXPORT_SYMBOL_GPL(inotify_rm_wd);
706 * inotify_rm_watch - remove a watch from an inotify instance
707 * @ih: inotify handle
708 * @watch: watch to remove
710 * Can sleep.
712 int inotify_rm_watch(struct inotify_handle *ih,
713 struct inotify_watch *watch)
715 return inotify_rm_wd(ih, watch->wd);
717 EXPORT_SYMBOL_GPL(inotify_rm_watch);
720 * inotify_setup - core initialization function
722 static int __init inotify_setup(void)
724 atomic_set(&inotify_cookie, 0);
726 return 0;
729 module_init(inotify_setup);