4 * This file and its contents are supplied under the terms of the
5 * Common Development and Distribution License ("CDDL"), version 1.0.
6 * You may only use this file in accordance with the terms of version
9 * A full copy of the text of the CDDL should have accompanied this
10 * source. A copy of the CDDL is also available via the Internet at
11 * http://www.illumos.org/license/CDDL.
16 * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
19 #include <sys/zfs_context.h>
20 #include <sys/multilist.h>
22 /* needed for spa_get_random() */
26 * This overrides the number of sublists in each multilist_t, which defaults
27 * to the number of CPUs in the system (see multilist_create()).
29 int zfs_multilist_num_sublists
= 0;
32 * Given the object contained on the list, return a pointer to the
33 * object's multilist_node_t structure it contains.
35 static multilist_node_t
*
36 multilist_d2l(multilist_t
*ml
, void *obj
)
38 return ((multilist_node_t
*)((char *)obj
+ ml
->ml_offset
));
42 * Initialize a new mutlilist using the parameters specified.
44 * - 'size' denotes the size of the structure containing the
46 * - 'offset' denotes the byte offset of the mutlilist_node_t within
47 * the structure that contains it.
48 * - 'num' specifies the number of internal sublists to create.
49 * - 'index_func' is used to determine which sublist to insert into
50 * when the multilist_insert() function is called; as well as which
51 * sublist to remove from when multilist_remove() is called. The
52 * requirements this function must meet, are the following:
54 * - It must always return the same value when called on the same
55 * object (to ensure the object is removed from the list it was
58 * - It must return a value in the range [0, number of sublists).
59 * The multilist_get_num_sublists() function may be used to
60 * determine the number of sublists in the multilist.
62 * Also, in order to reduce internal contention between the sublists
63 * during insertion and removal, this function should choose evenly
64 * between all available sublists when inserting. This isn't a hard
65 * requirement, but a general rule of thumb in order to garner the
66 * best multi-threaded performance out of the data structure.
69 multilist_create_impl(size_t size
, size_t offset
,
70 unsigned int num
, multilist_sublist_index_func_t
*index_func
)
73 ASSERT3U(size
, >=, offset
+ sizeof (multilist_node_t
));
75 ASSERT3P(index_func
, !=, NULL
);
77 multilist_t
*ml
= kmem_alloc(sizeof (*ml
), KM_SLEEP
);
78 ml
->ml_offset
= offset
;
79 ml
->ml_num_sublists
= num
;
80 ml
->ml_index_func
= index_func
;
82 ml
->ml_sublists
= kmem_zalloc(sizeof (multilist_sublist_t
) *
83 ml
->ml_num_sublists
, KM_SLEEP
);
85 ASSERT3P(ml
->ml_sublists
, !=, NULL
);
87 for (int i
= 0; i
< ml
->ml_num_sublists
; i
++) {
88 multilist_sublist_t
*mls
= &ml
->ml_sublists
[i
];
89 mutex_init(&mls
->mls_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
90 list_create(&mls
->mls_list
, size
, offset
);
96 * Allocate a new multilist, using the default number of sublists
97 * (the number of CPUs, or at least 4, or the tunable
98 * zfs_multilist_num_sublists).
101 multilist_create(size_t size
, size_t offset
,
102 multilist_sublist_index_func_t
*index_func
)
106 if (zfs_multilist_num_sublists
> 0) {
107 num_sublists
= zfs_multilist_num_sublists
;
109 num_sublists
= MAX(boot_ncpus
, 4);
112 return (multilist_create_impl(size
, offset
, num_sublists
, index_func
));
116 * Destroy the given multilist object, and free up any memory it holds.
119 multilist_destroy(multilist_t
*ml
)
121 ASSERT(multilist_is_empty(ml
));
123 for (int i
= 0; i
< ml
->ml_num_sublists
; i
++) {
124 multilist_sublist_t
*mls
= &ml
->ml_sublists
[i
];
126 ASSERT(list_is_empty(&mls
->mls_list
));
128 list_destroy(&mls
->mls_list
);
129 mutex_destroy(&mls
->mls_lock
);
132 ASSERT3P(ml
->ml_sublists
, !=, NULL
);
133 kmem_free(ml
->ml_sublists
,
134 sizeof (multilist_sublist_t
) * ml
->ml_num_sublists
);
136 ml
->ml_num_sublists
= 0;
138 kmem_free(ml
, sizeof (multilist_t
));
142 * Insert the given object into the multilist.
144 * This function will insert the object specified into the sublist
145 * determined using the function given at multilist creation time.
147 * The sublist locks are automatically acquired if not already held, to
148 * ensure consistency when inserting and removing from multiple threads.
151 multilist_insert(multilist_t
*ml
, void *obj
)
153 unsigned int sublist_idx
= ml
->ml_index_func(ml
, obj
);
154 multilist_sublist_t
*mls
;
157 DTRACE_PROBE3(multilist__insert
, multilist_t
*, ml
,
158 unsigned int, sublist_idx
, void *, obj
);
160 ASSERT3U(sublist_idx
, <, ml
->ml_num_sublists
);
162 mls
= &ml
->ml_sublists
[sublist_idx
];
165 * Note: Callers may already hold the sublist lock by calling
166 * multilist_sublist_lock(). Here we rely on MUTEX_HELD()
167 * returning TRUE if and only if the current thread holds the
168 * lock. While it's a little ugly to make the lock recursive in
169 * this way, it works and allows the calling code to be much
170 * simpler -- otherwise it would have to pass around a flag
171 * indicating that it already has the lock.
173 need_lock
= !MUTEX_HELD(&mls
->mls_lock
);
176 mutex_enter(&mls
->mls_lock
);
178 ASSERT(!multilist_link_active(multilist_d2l(ml
, obj
)));
180 multilist_sublist_insert_head(mls
, obj
);
183 mutex_exit(&mls
->mls_lock
);
187 * Remove the given object from the multilist.
189 * This function will remove the object specified from the sublist
190 * determined using the function given at multilist creation time.
192 * The necessary sublist locks are automatically acquired, to ensure
193 * consistency when inserting and removing from multiple threads.
196 multilist_remove(multilist_t
*ml
, void *obj
)
198 unsigned int sublist_idx
= ml
->ml_index_func(ml
, obj
);
199 multilist_sublist_t
*mls
;
202 DTRACE_PROBE3(multilist__remove
, multilist_t
*, ml
,
203 unsigned int, sublist_idx
, void *, obj
);
205 ASSERT3U(sublist_idx
, <, ml
->ml_num_sublists
);
207 mls
= &ml
->ml_sublists
[sublist_idx
];
208 /* See comment in multilist_insert(). */
209 need_lock
= !MUTEX_HELD(&mls
->mls_lock
);
212 mutex_enter(&mls
->mls_lock
);
214 ASSERT(multilist_link_active(multilist_d2l(ml
, obj
)));
216 multilist_sublist_remove(mls
, obj
);
219 mutex_exit(&mls
->mls_lock
);
223 * Check to see if this multilist object is empty.
225 * This will return TRUE if it finds all of the sublists of this
226 * multilist to be empty, and FALSE otherwise. Each sublist lock will be
227 * automatically acquired as necessary.
229 * If concurrent insertions and removals are occurring, the semantics
230 * of this function become a little fuzzy. Instead of locking all
231 * sublists for the entire call time of the function, each sublist is
232 * only locked as it is individually checked for emptiness. Thus, it's
233 * possible for this function to return TRUE with non-empty sublists at
234 * the time the function returns. This would be due to another thread
235 * inserting into a given sublist, after that specific sublist was check
236 * and deemed empty, but before all sublists have been checked.
239 multilist_is_empty(multilist_t
*ml
)
241 for (int i
= 0; i
< ml
->ml_num_sublists
; i
++) {
242 multilist_sublist_t
*mls
= &ml
->ml_sublists
[i
];
243 /* See comment in multilist_insert(). */
244 boolean_t need_lock
= !MUTEX_HELD(&mls
->mls_lock
);
247 mutex_enter(&mls
->mls_lock
);
249 if (!list_is_empty(&mls
->mls_list
)) {
251 mutex_exit(&mls
->mls_lock
);
257 mutex_exit(&mls
->mls_lock
);
263 /* Return the number of sublists composing this multilist */
265 multilist_get_num_sublists(multilist_t
*ml
)
267 return (ml
->ml_num_sublists
);
270 /* Return a randomly selected, valid sublist index for this multilist */
272 multilist_get_random_index(multilist_t
*ml
)
274 return (spa_get_random(ml
->ml_num_sublists
));
277 /* Lock and return the sublist specified at the given index */
278 multilist_sublist_t
*
279 multilist_sublist_lock(multilist_t
*ml
, unsigned int sublist_idx
)
281 multilist_sublist_t
*mls
;
283 ASSERT3U(sublist_idx
, <, ml
->ml_num_sublists
);
284 mls
= &ml
->ml_sublists
[sublist_idx
];
285 mutex_enter(&mls
->mls_lock
);
290 /* Lock and return the sublist that would be used to store the specified obj */
291 multilist_sublist_t
*
292 multilist_sublist_lock_obj(multilist_t
*ml
, void *obj
)
294 return (multilist_sublist_lock(ml
, ml
->ml_index_func(ml
, obj
)));
298 multilist_sublist_unlock(multilist_sublist_t
*mls
)
300 mutex_exit(&mls
->mls_lock
);
304 * We're allowing any object to be inserted into this specific sublist,
305 * but this can lead to trouble if multilist_remove() is called to
306 * remove this object. Specifically, if calling ml_index_func on this
307 * object returns an index for sublist different than what is passed as
308 * a parameter here, any call to multilist_remove() with this newly
309 * inserted object is undefined! (the call to multilist_remove() will
310 * remove the object from a list that it isn't contained in)
313 multilist_sublist_insert_head(multilist_sublist_t
*mls
, void *obj
)
315 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
316 list_insert_head(&mls
->mls_list
, obj
);
319 /* please see comment above multilist_sublist_insert_head */
321 multilist_sublist_insert_tail(multilist_sublist_t
*mls
, void *obj
)
323 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
324 list_insert_tail(&mls
->mls_list
, obj
);
328 * Move the object one element forward in the list.
330 * This function will move the given object forward in the list (towards
331 * the head) by one object. So, in essence, it will swap its position in
332 * the list with its "prev" pointer. If the given object is already at the
333 * head of the list, it cannot be moved forward any more than it already
334 * is, so no action is taken.
336 * NOTE: This function **must not** remove any object from the list other
337 * than the object given as the parameter. This is relied upon in
338 * arc_evict_state_impl().
341 multilist_sublist_move_forward(multilist_sublist_t
*mls
, void *obj
)
343 void *prev
= list_prev(&mls
->mls_list
, obj
);
345 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
346 ASSERT(!list_is_empty(&mls
->mls_list
));
348 /* 'obj' must be at the head of the list, nothing to do */
352 list_remove(&mls
->mls_list
, obj
);
353 list_insert_before(&mls
->mls_list
, prev
, obj
);
357 multilist_sublist_remove(multilist_sublist_t
*mls
, void *obj
)
359 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
360 list_remove(&mls
->mls_list
, obj
);
364 multilist_sublist_head(multilist_sublist_t
*mls
)
366 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
367 return (list_head(&mls
->mls_list
));
371 multilist_sublist_tail(multilist_sublist_t
*mls
)
373 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
374 return (list_tail(&mls
->mls_list
));
378 multilist_sublist_next(multilist_sublist_t
*mls
, void *obj
)
380 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
381 return (list_next(&mls
->mls_list
, obj
));
385 multilist_sublist_prev(multilist_sublist_t
*mls
, void *obj
)
387 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
388 return (list_prev(&mls
->mls_list
, obj
));
392 multilist_link_init(multilist_node_t
*link
)
394 list_link_init(link
);
398 multilist_link_active(multilist_node_t
*link
)
400 return (list_link_active(link
));