4 * This file and its contents are supplied under the terms of the
5 * Common Development and Distribution License ("CDDL"), version 1.0.
6 * You may only use this file in accordance with the terms of version
9 * A full copy of the text of the CDDL should have accompanied this
10 * source. A copy of the CDDL is also available via the Internet at
11 * http://www.illumos.org/license/CDDL.
16 * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
19 #include <sys/zfs_context.h>
20 #include <sys/multilist.h>
21 #include <sys/trace_zfs.h>
24 * This overrides the number of sublists in each multilist_t, which defaults
25 * to the number of CPUs in the system (see multilist_create()).
27 uint_t zfs_multilist_num_sublists
= 0;
30 * Given the object contained on the list, return a pointer to the
31 * object's multilist_node_t structure it contains.
34 static multilist_node_t
*
35 multilist_d2l(multilist_t
*ml
, void *obj
)
37 return ((multilist_node_t
*)((char *)obj
+ ml
->ml_offset
));
40 #define multilist_d2l(ml, obj) ((void) sizeof (ml), (void) sizeof (obj), NULL)
44 * Initialize a new mutlilist using the parameters specified.
46 * - 'size' denotes the size of the structure containing the
48 * - 'offset' denotes the byte offset of the mutlilist_node_t within
49 * the structure that contains it.
50 * - 'num' specifies the number of internal sublists to create.
51 * - 'index_func' is used to determine which sublist to insert into
52 * when the multilist_insert() function is called; as well as which
53 * sublist to remove from when multilist_remove() is called. The
54 * requirements this function must meet, are the following:
56 * - It must always return the same value when called on the same
57 * object (to ensure the object is removed from the list it was
60 * - It must return a value in the range [0, number of sublists).
61 * The multilist_get_num_sublists() function may be used to
62 * determine the number of sublists in the multilist.
64 * Also, in order to reduce internal contention between the sublists
65 * during insertion and removal, this function should choose evenly
66 * between all available sublists when inserting. This isn't a hard
67 * requirement, but a general rule of thumb in order to garner the
68 * best multi-threaded performance out of the data structure.
71 multilist_create_impl(multilist_t
*ml
, size_t size
, size_t offset
,
72 uint_t num
, multilist_sublist_index_func_t
*index_func
)
75 ASSERT3U(size
, >=, offset
+ sizeof (multilist_node_t
));
77 ASSERT3P(index_func
, !=, NULL
);
79 ml
->ml_offset
= offset
;
80 ml
->ml_num_sublists
= num
;
81 ml
->ml_index_func
= index_func
;
83 ml
->ml_sublists
= kmem_zalloc(sizeof (multilist_sublist_t
) *
84 ml
->ml_num_sublists
, KM_SLEEP
);
86 ASSERT3P(ml
->ml_sublists
, !=, NULL
);
88 for (int i
= 0; i
< ml
->ml_num_sublists
; i
++) {
89 multilist_sublist_t
*mls
= &ml
->ml_sublists
[i
];
90 mutex_init(&mls
->mls_lock
, NULL
, MUTEX_NOLOCKDEP
, NULL
);
91 list_create(&mls
->mls_list
, size
, offset
);
96 * Allocate a new multilist, using the default number of sublists (the number
97 * of CPUs, or at least 4, or the tunable zfs_multilist_num_sublists). Note
98 * that the multilists do not expand if more CPUs are hot-added. In that case,
99 * we will have less fanout than boot_ncpus, but we don't want to always
100 * reserve the RAM necessary to create the extra slots for additional CPUs up
101 * front, and dynamically adding them is a complex task.
104 multilist_create(multilist_t
*ml
, size_t size
, size_t offset
,
105 multilist_sublist_index_func_t
*index_func
)
109 if (zfs_multilist_num_sublists
> 0) {
110 num_sublists
= zfs_multilist_num_sublists
;
112 num_sublists
= MAX(boot_ncpus
, 4);
115 multilist_create_impl(ml
, size
, offset
, num_sublists
, index_func
);
119 * Destroy the given multilist object, and free up any memory it holds.
122 multilist_destroy(multilist_t
*ml
)
124 ASSERT(multilist_is_empty(ml
));
126 for (int i
= 0; i
< ml
->ml_num_sublists
; i
++) {
127 multilist_sublist_t
*mls
= &ml
->ml_sublists
[i
];
129 ASSERT(list_is_empty(&mls
->mls_list
));
131 list_destroy(&mls
->mls_list
);
132 mutex_destroy(&mls
->mls_lock
);
135 ASSERT3P(ml
->ml_sublists
, !=, NULL
);
136 kmem_free(ml
->ml_sublists
,
137 sizeof (multilist_sublist_t
) * ml
->ml_num_sublists
);
139 ml
->ml_num_sublists
= 0;
141 ml
->ml_sublists
= NULL
;
145 * Insert the given object into the multilist.
147 * This function will insert the object specified into the sublist
148 * determined using the function given at multilist creation time.
150 * The sublist locks are automatically acquired if not already held, to
151 * ensure consistency when inserting and removing from multiple threads.
154 multilist_insert(multilist_t
*ml
, void *obj
)
156 unsigned int sublist_idx
= ml
->ml_index_func(ml
, obj
);
157 multilist_sublist_t
*mls
;
160 DTRACE_PROBE3(multilist__insert
, multilist_t
*, ml
,
161 unsigned int, sublist_idx
, void *, obj
);
163 ASSERT3U(sublist_idx
, <, ml
->ml_num_sublists
);
165 mls
= &ml
->ml_sublists
[sublist_idx
];
168 * Note: Callers may already hold the sublist lock by calling
169 * multilist_sublist_lock(). Here we rely on MUTEX_HELD()
170 * returning TRUE if and only if the current thread holds the
171 * lock. While it's a little ugly to make the lock recursive in
172 * this way, it works and allows the calling code to be much
173 * simpler -- otherwise it would have to pass around a flag
174 * indicating that it already has the lock.
176 need_lock
= !MUTEX_HELD(&mls
->mls_lock
);
179 mutex_enter(&mls
->mls_lock
);
181 ASSERT(!multilist_link_active(multilist_d2l(ml
, obj
)));
183 multilist_sublist_insert_head(mls
, obj
);
186 mutex_exit(&mls
->mls_lock
);
190 * Remove the given object from the multilist.
192 * This function will remove the object specified from the sublist
193 * determined using the function given at multilist creation time.
195 * The necessary sublist locks are automatically acquired, to ensure
196 * consistency when inserting and removing from multiple threads.
199 multilist_remove(multilist_t
*ml
, void *obj
)
201 unsigned int sublist_idx
= ml
->ml_index_func(ml
, obj
);
202 multilist_sublist_t
*mls
;
205 DTRACE_PROBE3(multilist__remove
, multilist_t
*, ml
,
206 unsigned int, sublist_idx
, void *, obj
);
208 ASSERT3U(sublist_idx
, <, ml
->ml_num_sublists
);
210 mls
= &ml
->ml_sublists
[sublist_idx
];
211 /* See comment in multilist_insert(). */
212 need_lock
= !MUTEX_HELD(&mls
->mls_lock
);
215 mutex_enter(&mls
->mls_lock
);
217 ASSERT(multilist_link_active(multilist_d2l(ml
, obj
)));
219 multilist_sublist_remove(mls
, obj
);
222 mutex_exit(&mls
->mls_lock
);
226 * Check to see if this multilist object is empty.
228 * This will return TRUE if it finds all of the sublists of this
229 * multilist to be empty, and FALSE otherwise. Each sublist lock will be
230 * automatically acquired as necessary.
232 * If concurrent insertions and removals are occurring, the semantics
233 * of this function become a little fuzzy. Instead of locking all
234 * sublists for the entire call time of the function, each sublist is
235 * only locked as it is individually checked for emptiness. Thus, it's
236 * possible for this function to return TRUE with non-empty sublists at
237 * the time the function returns. This would be due to another thread
238 * inserting into a given sublist, after that specific sublist was check
239 * and deemed empty, but before all sublists have been checked.
242 multilist_is_empty(multilist_t
*ml
)
244 for (int i
= 0; i
< ml
->ml_num_sublists
; i
++) {
245 multilist_sublist_t
*mls
= &ml
->ml_sublists
[i
];
246 /* See comment in multilist_insert(). */
247 boolean_t need_lock
= !MUTEX_HELD(&mls
->mls_lock
);
250 mutex_enter(&mls
->mls_lock
);
252 if (!list_is_empty(&mls
->mls_list
)) {
254 mutex_exit(&mls
->mls_lock
);
260 mutex_exit(&mls
->mls_lock
);
266 /* Return the number of sublists composing this multilist */
268 multilist_get_num_sublists(multilist_t
*ml
)
270 return (ml
->ml_num_sublists
);
273 /* Return a randomly selected, valid sublist index for this multilist */
275 multilist_get_random_index(multilist_t
*ml
)
277 return (random_in_range(ml
->ml_num_sublists
));
280 /* Lock and return the sublist specified at the given index */
281 multilist_sublist_t
*
282 multilist_sublist_lock(multilist_t
*ml
, unsigned int sublist_idx
)
284 multilist_sublist_t
*mls
;
286 ASSERT3U(sublist_idx
, <, ml
->ml_num_sublists
);
287 mls
= &ml
->ml_sublists
[sublist_idx
];
288 mutex_enter(&mls
->mls_lock
);
293 /* Lock and return the sublist that would be used to store the specified obj */
294 multilist_sublist_t
*
295 multilist_sublist_lock_obj(multilist_t
*ml
, void *obj
)
297 return (multilist_sublist_lock(ml
, ml
->ml_index_func(ml
, obj
)));
301 multilist_sublist_unlock(multilist_sublist_t
*mls
)
303 mutex_exit(&mls
->mls_lock
);
307 * We're allowing any object to be inserted into this specific sublist,
308 * but this can lead to trouble if multilist_remove() is called to
309 * remove this object. Specifically, if calling ml_index_func on this
310 * object returns an index for sublist different than what is passed as
311 * a parameter here, any call to multilist_remove() with this newly
312 * inserted object is undefined! (the call to multilist_remove() will
313 * remove the object from a list that it isn't contained in)
316 multilist_sublist_insert_head(multilist_sublist_t
*mls
, void *obj
)
318 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
319 list_insert_head(&mls
->mls_list
, obj
);
322 /* please see comment above multilist_sublist_insert_head */
324 multilist_sublist_insert_tail(multilist_sublist_t
*mls
, void *obj
)
326 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
327 list_insert_tail(&mls
->mls_list
, obj
);
331 * Move the object one element forward in the list.
333 * This function will move the given object forward in the list (towards
334 * the head) by one object. So, in essence, it will swap its position in
335 * the list with its "prev" pointer. If the given object is already at the
336 * head of the list, it cannot be moved forward any more than it already
337 * is, so no action is taken.
339 * NOTE: This function **must not** remove any object from the list other
340 * than the object given as the parameter. This is relied upon in
341 * arc_evict_state_impl().
344 multilist_sublist_move_forward(multilist_sublist_t
*mls
, void *obj
)
346 void *prev
= list_prev(&mls
->mls_list
, obj
);
348 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
349 ASSERT(!list_is_empty(&mls
->mls_list
));
351 /* 'obj' must be at the head of the list, nothing to do */
355 list_remove(&mls
->mls_list
, obj
);
356 list_insert_before(&mls
->mls_list
, prev
, obj
);
360 multilist_sublist_remove(multilist_sublist_t
*mls
, void *obj
)
362 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
363 list_remove(&mls
->mls_list
, obj
);
367 multilist_sublist_is_empty(multilist_sublist_t
*mls
)
369 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
370 return (list_is_empty(&mls
->mls_list
));
374 multilist_sublist_is_empty_idx(multilist_t
*ml
, unsigned int sublist_idx
)
376 multilist_sublist_t
*mls
;
379 ASSERT3U(sublist_idx
, <, ml
->ml_num_sublists
);
380 mls
= &ml
->ml_sublists
[sublist_idx
];
381 ASSERT(!MUTEX_HELD(&mls
->mls_lock
));
382 mutex_enter(&mls
->mls_lock
);
383 empty
= list_is_empty(&mls
->mls_list
);
384 mutex_exit(&mls
->mls_lock
);
389 multilist_sublist_head(multilist_sublist_t
*mls
)
391 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
392 return (list_head(&mls
->mls_list
));
396 multilist_sublist_tail(multilist_sublist_t
*mls
)
398 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
399 return (list_tail(&mls
->mls_list
));
403 multilist_sublist_next(multilist_sublist_t
*mls
, void *obj
)
405 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
406 return (list_next(&mls
->mls_list
, obj
));
410 multilist_sublist_prev(multilist_sublist_t
*mls
, void *obj
)
412 ASSERT(MUTEX_HELD(&mls
->mls_lock
));
413 return (list_prev(&mls
->mls_list
, obj
));
417 multilist_link_init(multilist_node_t
*link
)
419 list_link_init(link
);
423 multilist_link_active(multilist_node_t
*link
)
425 return (list_link_active(link
));
428 ZFS_MODULE_PARAM(zfs
, zfs_
, multilist_num_sublists
, UINT
, ZMOD_RW
,
429 "Number of sublists used in each multilist");