treewide: remove redundant IS_ERR() before error code check
[linux/fpc-iii.git] / Documentation / RCU / rcuref.txt
blob5e6429d66c2425b8b94b6d78fbcdb8f527586657
1 Reference-count design for elements of lists/arrays protected by RCU.
4 Please note that the percpu-ref feature is likely your first
5 stop if you need to combine reference counts and RCU.  Please see
6 include/linux/percpu-refcount.h for more information.  However, in
7 those unusual cases where percpu-ref would consume too much memory,
8 please read on.
10 ------------------------------------------------------------------------
12 Reference counting on elements of lists which are protected by traditional
13 reader/writer spinlocks or semaphores are straightforward:
15 CODE LISTING A:
16 1.                              2.
17 add()                           search_and_reference()
18 {                               {
19     alloc_object                    read_lock(&list_lock);
20     ...                             search_for_element
21     atomic_set(&el->rc, 1);         atomic_inc(&el->rc);
22     write_lock(&list_lock);          ...
23     add_element                     read_unlock(&list_lock);
24     ...                             ...
25     write_unlock(&list_lock);   }
28 3.                                      4.
29 release_referenced()                    delete()
30 {                                       {
31     ...                                     write_lock(&list_lock);
32     if(atomic_dec_and_test(&el->rc))        ...
33         kfree(el);
34     ...                                     remove_element
35 }                                           write_unlock(&list_lock);
36                                             ...
37                                             if (atomic_dec_and_test(&el->rc))
38                                                 kfree(el);
39                                             ...
40                                         }
42 If this list/array is made lock free using RCU as in changing the
43 write_lock() in add() and delete() to spin_lock() and changing read_lock()
44 in search_and_reference() to rcu_read_lock(), the atomic_inc() in
45 search_and_reference() could potentially hold reference to an element which
46 has already been deleted from the list/array.  Use atomic_inc_not_zero()
47 in this scenario as follows:
49 CODE LISTING B:
50 1.                                      2.
51 add()                                   search_and_reference()
52 {                                       {
53     alloc_object                            rcu_read_lock();
54     ...                                     search_for_element
55     atomic_set(&el->rc, 1);                 if (!atomic_inc_not_zero(&el->rc)) {
56     spin_lock(&list_lock);                      rcu_read_unlock();
57                                                 return FAIL;
58     add_element                             }
59     ...                                     ...
60     spin_unlock(&list_lock);                rcu_read_unlock();
61 }                                       }
62 3.                                      4.
63 release_referenced()                    delete()
64 {                                       {
65     ...                                     spin_lock(&list_lock);
66     if (atomic_dec_and_test(&el->rc))       ...
67         call_rcu(&el->head, el_free);       remove_element
68     ...                                     spin_unlock(&list_lock);
69 }                                           ...
70                                             if (atomic_dec_and_test(&el->rc))
71                                                 call_rcu(&el->head, el_free);
72                                             ...
73                                         }
75 Sometimes, a reference to the element needs to be obtained in the
76 update (write) stream.  In such cases, atomic_inc_not_zero() might be
77 overkill, since we hold the update-side spinlock.  One might instead
78 use atomic_inc() in such cases.
80 It is not always convenient to deal with "FAIL" in the
81 search_and_reference() code path.  In such cases, the
82 atomic_dec_and_test() may be moved from delete() to el_free()
83 as follows:
85 CODE LISTING C:
86 1.                                      2.
87 add()                                   search_and_reference()
88 {                                       {
89     alloc_object                            rcu_read_lock();
90     ...                                     search_for_element
91     atomic_set(&el->rc, 1);                 atomic_inc(&el->rc);
92     spin_lock(&list_lock);                  ...
94     add_element                             rcu_read_unlock();
95     ...                                 }
96     spin_unlock(&list_lock);            4.
97 }                                       delete()
98 3.                                      {
99 release_referenced()                        spin_lock(&list_lock);
100 {                                           ...
101     ...                                     remove_element
102     if (atomic_dec_and_test(&el->rc))       spin_unlock(&list_lock);
103         kfree(el);                          ...
104     ...                                     call_rcu(&el->head, el_free);
105 }                                           ...
106 5.                                      }
107 void el_free(struct rcu_head *rhp)
109     release_referenced();
112 The key point is that the initial reference added by add() is not removed
113 until after a grace period has elapsed following removal.  This means that
114 search_and_reference() cannot find this element, which means that the value
115 of el->rc cannot increase.  Thus, once it reaches zero, there are no
116 readers that can or ever will be able to reference the element.  The
117 element can therefore safely be freed.  This in turn guarantees that if
118 any reader finds the element, that reader may safely acquire a reference
119 without checking the value of the reference counter.
121 A clear advantage of the RCU-based pattern in listing C over the one
122 in listing B is that any call to search_and_reference() that locates
123 a given object will succeed in obtaining a reference to that object,
124 even given a concurrent invocation of delete() for that same object.
125 Similarly, a clear advantage of both listings B and C over listing A is
126 that a call to delete() is not delayed even if there are an arbitrarily
127 large number of calls to search_and_reference() searching for the same
128 object that delete() was invoked on.  Instead, all that is delayed is
129 the eventual invocation of kfree(), which is usually not a problem on
130 modern computer systems, even the small ones.
132 In cases where delete() can sleep, synchronize_rcu() can be called from
133 delete(), so that el_free() can be subsumed into delete as follows:
136 delete()
138     spin_lock(&list_lock);
139     ...
140     remove_element
141     spin_unlock(&list_lock);
142     ...
143     synchronize_rcu();
144     if (atomic_dec_and_test(&el->rc))
145         kfree(el);
146     ...
149 As additional examples in the kernel, the pattern in listing C is used by
150 reference counting of struct pid, while the pattern in listing B is used by
151 struct posix_acl.