1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2015 Red Hat. All rights reserved.
5 * This file is released under the GPL.
8 #include "dm-cache-background-tracker.h"
9 #include "dm-cache-policy-internal.h"
10 #include "dm-cache-policy.h"
13 #include <linux/hash.h>
14 #include <linux/jiffies.h>
15 #include <linux/module.h>
16 #include <linux/mutex.h>
17 #include <linux/vmalloc.h>
18 #include <linux/math64.h>
20 #define DM_MSG_PREFIX "cache-policy-smq"
22 /*----------------------------------------------------------------*/
25 * Safe division functions that return zero on divide by zero.
27 static unsigned int safe_div(unsigned int n
, unsigned int d
)
29 return d
? n
/ d
: 0u;
32 static unsigned int safe_mod(unsigned int n
, unsigned int d
)
34 return d
? n
% d
: 0u;
37 /*----------------------------------------------------------------*/
40 unsigned int hash_next
:28;
52 /*----------------------------------------------------------------*/
54 #define INDEXER_NULL ((1u << 28u) - 1u)
57 * An entry_space manages a set of entries that we use for the queues.
58 * The clean and dirty queues share entries, so this object is separate
59 * from the queue itself.
66 static int space_init(struct entry_space
*es
, unsigned int nr_entries
)
69 es
->begin
= es
->end
= NULL
;
73 es
->begin
= vzalloc(array_size(nr_entries
, sizeof(struct entry
)));
77 es
->end
= es
->begin
+ nr_entries
;
81 static void space_exit(struct entry_space
*es
)
86 static struct entry
*__get_entry(struct entry_space
*es
, unsigned int block
)
90 e
= es
->begin
+ block
;
96 static unsigned int to_index(struct entry_space
*es
, struct entry
*e
)
98 BUG_ON(e
< es
->begin
|| e
>= es
->end
);
102 static struct entry
*to_entry(struct entry_space
*es
, unsigned int block
)
104 if (block
== INDEXER_NULL
)
107 return __get_entry(es
, block
);
110 /*----------------------------------------------------------------*/
113 unsigned int nr_elts
; /* excluding sentinel entries */
114 unsigned int head
, tail
;
117 static void l_init(struct ilist
*l
)
120 l
->head
= l
->tail
= INDEXER_NULL
;
123 static struct entry
*l_head(struct entry_space
*es
, struct ilist
*l
)
125 return to_entry(es
, l
->head
);
128 static struct entry
*l_tail(struct entry_space
*es
, struct ilist
*l
)
130 return to_entry(es
, l
->tail
);
133 static struct entry
*l_next(struct entry_space
*es
, struct entry
*e
)
135 return to_entry(es
, e
->next
);
138 static struct entry
*l_prev(struct entry_space
*es
, struct entry
*e
)
140 return to_entry(es
, e
->prev
);
143 static bool l_empty(struct ilist
*l
)
145 return l
->head
== INDEXER_NULL
;
148 static void l_add_head(struct entry_space
*es
, struct ilist
*l
, struct entry
*e
)
150 struct entry
*head
= l_head(es
, l
);
153 e
->prev
= INDEXER_NULL
;
156 head
->prev
= l
->head
= to_index(es
, e
);
158 l
->head
= l
->tail
= to_index(es
, e
);
164 static void l_add_tail(struct entry_space
*es
, struct ilist
*l
, struct entry
*e
)
166 struct entry
*tail
= l_tail(es
, l
);
168 e
->next
= INDEXER_NULL
;
172 tail
->next
= l
->tail
= to_index(es
, e
);
174 l
->head
= l
->tail
= to_index(es
, e
);
180 static void l_add_before(struct entry_space
*es
, struct ilist
*l
,
181 struct entry
*old
, struct entry
*e
)
183 struct entry
*prev
= l_prev(es
, old
);
186 l_add_head(es
, l
, e
);
190 e
->next
= to_index(es
, old
);
191 prev
->next
= old
->prev
= to_index(es
, e
);
198 static void l_del(struct entry_space
*es
, struct ilist
*l
, struct entry
*e
)
200 struct entry
*prev
= l_prev(es
, e
);
201 struct entry
*next
= l_next(es
, e
);
204 prev
->next
= e
->next
;
209 next
->prev
= e
->prev
;
217 static struct entry
*l_pop_head(struct entry_space
*es
, struct ilist
*l
)
221 for (e
= l_head(es
, l
); e
; e
= l_next(es
, e
))
230 static struct entry
*l_pop_tail(struct entry_space
*es
, struct ilist
*l
)
234 for (e
= l_tail(es
, l
); e
; e
= l_prev(es
, e
))
243 /*----------------------------------------------------------------*/
246 * The stochastic-multi-queue is a set of lru lists stacked into levels.
247 * Entries are moved up levels when they are used, which loosely orders the
248 * most accessed entries in the top levels and least in the bottom. This
249 * structure is *much* better than a single lru list.
251 #define MAX_LEVELS 64u
254 struct entry_space
*es
;
256 unsigned int nr_elts
;
257 unsigned int nr_levels
;
258 struct ilist qs
[MAX_LEVELS
];
261 * We maintain a count of the number of entries we would like in each
264 unsigned int last_target_nr_elts
;
265 unsigned int nr_top_levels
;
266 unsigned int nr_in_top_levels
;
267 unsigned int target_count
[MAX_LEVELS
];
270 static void q_init(struct queue
*q
, struct entry_space
*es
, unsigned int nr_levels
)
276 q
->nr_levels
= nr_levels
;
278 for (i
= 0; i
< q
->nr_levels
; i
++) {
280 q
->target_count
[i
] = 0u;
283 q
->last_target_nr_elts
= 0u;
284 q
->nr_top_levels
= 0u;
285 q
->nr_in_top_levels
= 0u;
288 static unsigned int q_size(struct queue
*q
)
294 * Insert an entry to the back of the given level.
296 static void q_push(struct queue
*q
, struct entry
*e
)
298 BUG_ON(e
->pending_work
);
303 l_add_tail(q
->es
, q
->qs
+ e
->level
, e
);
306 static void q_push_front(struct queue
*q
, struct entry
*e
)
308 BUG_ON(e
->pending_work
);
313 l_add_head(q
->es
, q
->qs
+ e
->level
, e
);
316 static void q_push_before(struct queue
*q
, struct entry
*old
, struct entry
*e
)
318 BUG_ON(e
->pending_work
);
323 l_add_before(q
->es
, q
->qs
+ e
->level
, old
, e
);
326 static void q_del(struct queue
*q
, struct entry
*e
)
328 l_del(q
->es
, q
->qs
+ e
->level
, e
);
334 * Return the oldest entry of the lowest populated level.
336 static struct entry
*q_peek(struct queue
*q
, unsigned int max_level
, bool can_cross_sentinel
)
341 max_level
= min(max_level
, q
->nr_levels
);
343 for (level
= 0; level
< max_level
; level
++)
344 for (e
= l_head(q
->es
, q
->qs
+ level
); e
; e
= l_next(q
->es
, e
)) {
346 if (can_cross_sentinel
)
358 static struct entry
*q_pop(struct queue
*q
)
360 struct entry
*e
= q_peek(q
, q
->nr_levels
, true);
369 * This function assumes there is a non-sentinel entry to pop. It's only
370 * used by redistribute, so we know this is true. It also doesn't adjust
371 * the q->nr_elts count.
373 static struct entry
*__redist_pop_from(struct queue
*q
, unsigned int level
)
377 for (; level
< q
->nr_levels
; level
++)
378 for (e
= l_head(q
->es
, q
->qs
+ level
); e
; e
= l_next(q
->es
, e
))
380 l_del(q
->es
, q
->qs
+ e
->level
, e
);
387 static void q_set_targets_subrange_(struct queue
*q
, unsigned int nr_elts
,
388 unsigned int lbegin
, unsigned int lend
)
390 unsigned int level
, nr_levels
, entries_per_level
, remainder
;
392 BUG_ON(lbegin
> lend
);
393 BUG_ON(lend
> q
->nr_levels
);
394 nr_levels
= lend
- lbegin
;
395 entries_per_level
= safe_div(nr_elts
, nr_levels
);
396 remainder
= safe_mod(nr_elts
, nr_levels
);
398 for (level
= lbegin
; level
< lend
; level
++)
399 q
->target_count
[level
] =
400 (level
< (lbegin
+ remainder
)) ? entries_per_level
+ 1u : entries_per_level
;
404 * Typically we have fewer elements in the top few levels which allows us
405 * to adjust the promote threshold nicely.
407 static void q_set_targets(struct queue
*q
)
409 if (q
->last_target_nr_elts
== q
->nr_elts
)
412 q
->last_target_nr_elts
= q
->nr_elts
;
414 if (q
->nr_top_levels
> q
->nr_levels
)
415 q_set_targets_subrange_(q
, q
->nr_elts
, 0, q
->nr_levels
);
418 q_set_targets_subrange_(q
, q
->nr_in_top_levels
,
419 q
->nr_levels
- q
->nr_top_levels
, q
->nr_levels
);
421 if (q
->nr_in_top_levels
< q
->nr_elts
)
422 q_set_targets_subrange_(q
, q
->nr_elts
- q
->nr_in_top_levels
,
423 0, q
->nr_levels
- q
->nr_top_levels
);
425 q_set_targets_subrange_(q
, 0, 0, q
->nr_levels
- q
->nr_top_levels
);
429 static void q_redistribute(struct queue
*q
)
431 unsigned int target
, level
;
432 struct ilist
*l
, *l_above
;
437 for (level
= 0u; level
< q
->nr_levels
- 1u; level
++) {
439 target
= q
->target_count
[level
];
442 * Pull down some entries from the level above.
444 while (l
->nr_elts
< target
) {
445 e
= __redist_pop_from(q
, level
+ 1u);
452 l_add_tail(q
->es
, l
, e
);
456 * Push some entries up.
458 l_above
= q
->qs
+ level
+ 1u;
459 while (l
->nr_elts
> target
) {
460 e
= l_pop_tail(q
->es
, l
);
466 e
->level
= level
+ 1u;
467 l_add_tail(q
->es
, l_above
, e
);
472 static void q_requeue(struct queue
*q
, struct entry
*e
, unsigned int extra_levels
,
473 struct entry
*s1
, struct entry
*s2
)
476 unsigned int sentinels_passed
= 0;
477 unsigned int new_level
= min(q
->nr_levels
- 1u, e
->level
+ extra_levels
);
479 /* try and find an entry to swap with */
480 if (extra_levels
&& (e
->level
< q
->nr_levels
- 1u)) {
481 for (de
= l_head(q
->es
, q
->qs
+ new_level
); de
&& de
->sentinel
; de
= l_next(q
->es
, de
))
486 de
->level
= e
->level
;
488 switch (sentinels_passed
) {
490 q_push_before(q
, s1
, de
);
494 q_push_before(q
, s2
, de
);
506 e
->level
= new_level
;
510 /*----------------------------------------------------------------*/
513 #define SIXTEENTH (1u << (FP_SHIFT - 4u))
514 #define EIGHTH (1u << (FP_SHIFT - 3u))
517 unsigned int hit_threshold
;
528 static void stats_init(struct stats
*s
, unsigned int nr_levels
)
530 s
->hit_threshold
= (nr_levels
* 3u) / 4u;
535 static void stats_reset(struct stats
*s
)
537 s
->hits
= s
->misses
= 0u;
540 static void stats_level_accessed(struct stats
*s
, unsigned int level
)
542 if (level
>= s
->hit_threshold
)
548 static void stats_miss(struct stats
*s
)
554 * There are times when we don't have any confidence in the hotspot queue.
555 * Such as when a fresh cache is created and the blocks have been spread
556 * out across the levels, or if an io load changes. We detect this by
557 * seeing how often a lookup is in the top levels of the hotspot queue.
559 static enum performance
stats_assess(struct stats
*s
)
561 unsigned int confidence
= safe_div(s
->hits
<< FP_SHIFT
, s
->hits
+ s
->misses
);
563 if (confidence
< SIXTEENTH
)
566 else if (confidence
< EIGHTH
)
573 /*----------------------------------------------------------------*/
575 struct smq_hash_table
{
576 struct entry_space
*es
;
577 unsigned long long hash_bits
;
578 unsigned int *buckets
;
582 * All cache entries are stored in a chained hash table. To save space we
583 * use indexing again, and only store indexes to the next entry.
585 static int h_init(struct smq_hash_table
*ht
, struct entry_space
*es
, unsigned int nr_entries
)
587 unsigned int i
, nr_buckets
;
590 nr_buckets
= roundup_pow_of_two(max(nr_entries
/ 4u, 16u));
591 ht
->hash_bits
= __ffs(nr_buckets
);
593 ht
->buckets
= vmalloc(array_size(nr_buckets
, sizeof(*ht
->buckets
)));
597 for (i
= 0; i
< nr_buckets
; i
++)
598 ht
->buckets
[i
] = INDEXER_NULL
;
603 static void h_exit(struct smq_hash_table
*ht
)
608 static struct entry
*h_head(struct smq_hash_table
*ht
, unsigned int bucket
)
610 return to_entry(ht
->es
, ht
->buckets
[bucket
]);
613 static struct entry
*h_next(struct smq_hash_table
*ht
, struct entry
*e
)
615 return to_entry(ht
->es
, e
->hash_next
);
618 static void __h_insert(struct smq_hash_table
*ht
, unsigned int bucket
, struct entry
*e
)
620 e
->hash_next
= ht
->buckets
[bucket
];
621 ht
->buckets
[bucket
] = to_index(ht
->es
, e
);
624 static void h_insert(struct smq_hash_table
*ht
, struct entry
*e
)
626 unsigned int h
= hash_64(from_oblock(e
->oblock
), ht
->hash_bits
);
628 __h_insert(ht
, h
, e
);
631 static struct entry
*__h_lookup(struct smq_hash_table
*ht
, unsigned int h
, dm_oblock_t oblock
,
637 for (e
= h_head(ht
, h
); e
; e
= h_next(ht
, e
)) {
638 if (e
->oblock
== oblock
)
647 static void __h_unlink(struct smq_hash_table
*ht
, unsigned int h
,
648 struct entry
*e
, struct entry
*prev
)
651 prev
->hash_next
= e
->hash_next
;
653 ht
->buckets
[h
] = e
->hash_next
;
657 * Also moves each entry to the front of the bucket.
659 static struct entry
*h_lookup(struct smq_hash_table
*ht
, dm_oblock_t oblock
)
661 struct entry
*e
, *prev
;
662 unsigned int h
= hash_64(from_oblock(oblock
), ht
->hash_bits
);
664 e
= __h_lookup(ht
, h
, oblock
, &prev
);
667 * Move to the front because this entry is likely
670 __h_unlink(ht
, h
, e
, prev
);
671 __h_insert(ht
, h
, e
);
677 static void h_remove(struct smq_hash_table
*ht
, struct entry
*e
)
679 unsigned int h
= hash_64(from_oblock(e
->oblock
), ht
->hash_bits
);
683 * The down side of using a singly linked list is we have to
684 * iterate the bucket to remove an item.
686 e
= __h_lookup(ht
, h
, e
->oblock
, &prev
);
688 __h_unlink(ht
, h
, e
, prev
);
691 /*----------------------------------------------------------------*/
694 struct entry_space
*es
;
697 unsigned int nr_allocated
;
701 static void init_allocator(struct entry_alloc
*ea
, struct entry_space
*es
,
702 unsigned int begin
, unsigned int end
)
707 ea
->nr_allocated
= 0u;
711 for (i
= begin
; i
!= end
; i
++)
712 l_add_tail(ea
->es
, &ea
->free
, __get_entry(ea
->es
, i
));
715 static void init_entry(struct entry
*e
)
718 * We can't memset because that would clear the hotspot and
719 * sentinel bits which remain constant.
721 e
->hash_next
= INDEXER_NULL
;
722 e
->next
= INDEXER_NULL
;
723 e
->prev
= INDEXER_NULL
;
725 e
->dirty
= true; /* FIXME: audit */
728 e
->pending_work
= false;
731 static struct entry
*alloc_entry(struct entry_alloc
*ea
)
735 if (l_empty(&ea
->free
))
738 e
= l_pop_head(ea
->es
, &ea
->free
);
746 * This assumes the cblock hasn't already been allocated.
748 static struct entry
*alloc_particular_entry(struct entry_alloc
*ea
, unsigned int i
)
750 struct entry
*e
= __get_entry(ea
->es
, ea
->begin
+ i
);
752 BUG_ON(e
->allocated
);
754 l_del(ea
->es
, &ea
->free
, e
);
761 static void free_entry(struct entry_alloc
*ea
, struct entry
*e
)
763 BUG_ON(!ea
->nr_allocated
);
764 BUG_ON(!e
->allocated
);
767 e
->allocated
= false;
768 l_add_tail(ea
->es
, &ea
->free
, e
);
771 static bool allocator_empty(struct entry_alloc
*ea
)
773 return l_empty(&ea
->free
);
776 static unsigned int get_index(struct entry_alloc
*ea
, struct entry
*e
)
778 return to_index(ea
->es
, e
) - ea
->begin
;
781 static struct entry
*get_entry(struct entry_alloc
*ea
, unsigned int index
)
783 return __get_entry(ea
->es
, ea
->begin
+ index
);
786 /*----------------------------------------------------------------*/
788 #define NR_HOTSPOT_LEVELS 64u
789 #define NR_CACHE_LEVELS 64u
791 #define WRITEBACK_PERIOD (10ul * HZ)
792 #define DEMOTE_PERIOD (60ul * HZ)
794 #define HOTSPOT_UPDATE_PERIOD (HZ)
795 #define CACHE_UPDATE_PERIOD (60ul * HZ)
798 struct dm_cache_policy policy
;
800 /* protects everything */
802 dm_cblock_t cache_size
;
803 sector_t cache_block_size
;
805 sector_t hotspot_block_size
;
806 unsigned int nr_hotspot_blocks
;
807 unsigned int cache_blocks_per_hotspot_block
;
808 unsigned int hotspot_level_jump
;
810 struct entry_space es
;
811 struct entry_alloc writeback_sentinel_alloc
;
812 struct entry_alloc demote_sentinel_alloc
;
813 struct entry_alloc hotspot_alloc
;
814 struct entry_alloc cache_alloc
;
816 unsigned long *hotspot_hit_bits
;
817 unsigned long *cache_hit_bits
;
820 * We maintain three queues of entries. The cache proper,
821 * consisting of a clean and dirty queue, containing the currently
822 * active mappings. The hotspot queue uses a larger block size to
823 * track blocks that are being hit frequently and potential
824 * candidates for promotion to the cache.
826 struct queue hotspot
;
830 struct stats hotspot_stats
;
831 struct stats cache_stats
;
834 * Keeps track of time, incremented by the core. We use this to
835 * avoid attributing multiple hits within the same tick.
840 * The hash tables allows us to quickly find an entry by origin
843 struct smq_hash_table table
;
844 struct smq_hash_table hotspot_table
;
846 bool current_writeback_sentinels
;
847 unsigned long next_writeback_period
;
849 bool current_demote_sentinels
;
850 unsigned long next_demote_period
;
852 unsigned int write_promote_level
;
853 unsigned int read_promote_level
;
855 unsigned long next_hotspot_period
;
856 unsigned long next_cache_period
;
858 struct background_tracker
*bg_work
;
860 bool migrations_allowed
:1;
863 * If this is set the policy will try and clean the whole cache
864 * even if the device is not idle.
869 /*----------------------------------------------------------------*/
871 static struct entry
*get_sentinel(struct entry_alloc
*ea
, unsigned int level
, bool which
)
873 return get_entry(ea
, which
? level
: NR_CACHE_LEVELS
+ level
);
876 static struct entry
*writeback_sentinel(struct smq_policy
*mq
, unsigned int level
)
878 return get_sentinel(&mq
->writeback_sentinel_alloc
, level
, mq
->current_writeback_sentinels
);
881 static struct entry
*demote_sentinel(struct smq_policy
*mq
, unsigned int level
)
883 return get_sentinel(&mq
->demote_sentinel_alloc
, level
, mq
->current_demote_sentinels
);
886 static void __update_writeback_sentinels(struct smq_policy
*mq
)
889 struct queue
*q
= &mq
->dirty
;
890 struct entry
*sentinel
;
892 for (level
= 0; level
< q
->nr_levels
; level
++) {
893 sentinel
= writeback_sentinel(mq
, level
);
899 static void __update_demote_sentinels(struct smq_policy
*mq
)
902 struct queue
*q
= &mq
->clean
;
903 struct entry
*sentinel
;
905 for (level
= 0; level
< q
->nr_levels
; level
++) {
906 sentinel
= demote_sentinel(mq
, level
);
912 static void update_sentinels(struct smq_policy
*mq
)
914 if (time_after(jiffies
, mq
->next_writeback_period
)) {
915 mq
->next_writeback_period
= jiffies
+ WRITEBACK_PERIOD
;
916 mq
->current_writeback_sentinels
= !mq
->current_writeback_sentinels
;
917 __update_writeback_sentinels(mq
);
920 if (time_after(jiffies
, mq
->next_demote_period
)) {
921 mq
->next_demote_period
= jiffies
+ DEMOTE_PERIOD
;
922 mq
->current_demote_sentinels
= !mq
->current_demote_sentinels
;
923 __update_demote_sentinels(mq
);
927 static void __sentinels_init(struct smq_policy
*mq
)
930 struct entry
*sentinel
;
932 for (level
= 0; level
< NR_CACHE_LEVELS
; level
++) {
933 sentinel
= writeback_sentinel(mq
, level
);
934 sentinel
->level
= level
;
935 q_push(&mq
->dirty
, sentinel
);
937 sentinel
= demote_sentinel(mq
, level
);
938 sentinel
->level
= level
;
939 q_push(&mq
->clean
, sentinel
);
943 static void sentinels_init(struct smq_policy
*mq
)
945 mq
->next_writeback_period
= jiffies
+ WRITEBACK_PERIOD
;
946 mq
->next_demote_period
= jiffies
+ DEMOTE_PERIOD
;
948 mq
->current_writeback_sentinels
= false;
949 mq
->current_demote_sentinels
= false;
950 __sentinels_init(mq
);
952 mq
->current_writeback_sentinels
= !mq
->current_writeback_sentinels
;
953 mq
->current_demote_sentinels
= !mq
->current_demote_sentinels
;
954 __sentinels_init(mq
);
957 /*----------------------------------------------------------------*/
959 static void del_queue(struct smq_policy
*mq
, struct entry
*e
)
961 q_del(e
->dirty
? &mq
->dirty
: &mq
->clean
, e
);
964 static void push_queue(struct smq_policy
*mq
, struct entry
*e
)
967 q_push(&mq
->dirty
, e
);
969 q_push(&mq
->clean
, e
);
972 // !h, !q, a -> h, q, a
973 static void push(struct smq_policy
*mq
, struct entry
*e
)
975 h_insert(&mq
->table
, e
);
976 if (!e
->pending_work
)
980 static void push_queue_front(struct smq_policy
*mq
, struct entry
*e
)
983 q_push_front(&mq
->dirty
, e
);
985 q_push_front(&mq
->clean
, e
);
988 static void push_front(struct smq_policy
*mq
, struct entry
*e
)
990 h_insert(&mq
->table
, e
);
991 if (!e
->pending_work
)
992 push_queue_front(mq
, e
);
995 static dm_cblock_t
infer_cblock(struct smq_policy
*mq
, struct entry
*e
)
997 return to_cblock(get_index(&mq
->cache_alloc
, e
));
1000 static void requeue(struct smq_policy
*mq
, struct entry
*e
)
1003 * Pending work has temporarily been taken out of the queues.
1005 if (e
->pending_work
)
1008 if (!test_and_set_bit(from_cblock(infer_cblock(mq
, e
)), mq
->cache_hit_bits
)) {
1010 q_requeue(&mq
->clean
, e
, 1u, NULL
, NULL
);
1014 q_requeue(&mq
->dirty
, e
, 1u,
1015 get_sentinel(&mq
->writeback_sentinel_alloc
, e
->level
, !mq
->current_writeback_sentinels
),
1016 get_sentinel(&mq
->writeback_sentinel_alloc
, e
->level
, mq
->current_writeback_sentinels
));
1020 static unsigned int default_promote_level(struct smq_policy
*mq
)
1023 * The promote level depends on the current performance of the
1026 * If the cache is performing badly, then we can't afford
1027 * to promote much without causing performance to drop below that
1028 * of the origin device.
1030 * If the cache is performing well, then we don't need to promote
1031 * much. If it isn't broken, don't fix it.
1033 * If the cache is middling then we promote more.
1035 * This scheme reminds me of a graph of entropy vs probability of a
1038 static const unsigned int table
[] = {
1039 1, 1, 1, 2, 4, 6, 7, 8, 7, 6, 4, 4, 3, 3, 2, 2, 1
1042 unsigned int hits
= mq
->cache_stats
.hits
;
1043 unsigned int misses
= mq
->cache_stats
.misses
;
1044 unsigned int index
= safe_div(hits
<< 4u, hits
+ misses
);
1045 return table
[index
];
1048 static void update_promote_levels(struct smq_policy
*mq
)
1051 * If there are unused cache entries then we want to be really
1054 unsigned int threshold_level
= allocator_empty(&mq
->cache_alloc
) ?
1055 default_promote_level(mq
) : (NR_HOTSPOT_LEVELS
/ 2u);
1057 threshold_level
= max(threshold_level
, NR_HOTSPOT_LEVELS
);
1060 * If the hotspot queue is performing badly then we have little
1061 * confidence that we know which blocks to promote. So we cut down
1062 * the amount of promotions.
1064 switch (stats_assess(&mq
->hotspot_stats
)) {
1066 threshold_level
/= 4u;
1070 threshold_level
/= 2u;
1077 mq
->read_promote_level
= NR_HOTSPOT_LEVELS
- threshold_level
;
1078 mq
->write_promote_level
= (NR_HOTSPOT_LEVELS
- threshold_level
);
1082 * If the hotspot queue is performing badly, then we try and move entries
1083 * around more quickly.
1085 static void update_level_jump(struct smq_policy
*mq
)
1087 switch (stats_assess(&mq
->hotspot_stats
)) {
1089 mq
->hotspot_level_jump
= 4u;
1093 mq
->hotspot_level_jump
= 2u;
1097 mq
->hotspot_level_jump
= 1u;
1102 static void end_hotspot_period(struct smq_policy
*mq
)
1104 clear_bitset(mq
->hotspot_hit_bits
, mq
->nr_hotspot_blocks
);
1105 update_promote_levels(mq
);
1107 if (time_after(jiffies
, mq
->next_hotspot_period
)) {
1108 update_level_jump(mq
);
1109 q_redistribute(&mq
->hotspot
);
1110 stats_reset(&mq
->hotspot_stats
);
1111 mq
->next_hotspot_period
= jiffies
+ HOTSPOT_UPDATE_PERIOD
;
1115 static void end_cache_period(struct smq_policy
*mq
)
1117 if (time_after(jiffies
, mq
->next_cache_period
)) {
1118 clear_bitset(mq
->cache_hit_bits
, from_cblock(mq
->cache_size
));
1120 q_redistribute(&mq
->dirty
);
1121 q_redistribute(&mq
->clean
);
1122 stats_reset(&mq
->cache_stats
);
1124 mq
->next_cache_period
= jiffies
+ CACHE_UPDATE_PERIOD
;
1128 /*----------------------------------------------------------------*/
1131 * Targets are given as a percentage.
1133 #define CLEAN_TARGET 25u
1134 #define FREE_TARGET 25u
1136 static unsigned int percent_to_target(struct smq_policy
*mq
, unsigned int p
)
1138 return from_cblock(mq
->cache_size
) * p
/ 100u;
1141 static bool clean_target_met(struct smq_policy
*mq
, bool idle
)
1144 * Cache entries may not be populated. So we cannot rely on the
1145 * size of the clean queue.
1147 if (idle
|| mq
->cleaner
) {
1149 * We'd like to clean everything.
1151 return q_size(&mq
->dirty
) == 0u;
1155 * If we're busy we don't worry about cleaning at all.
1160 static bool free_target_met(struct smq_policy
*mq
)
1162 unsigned int nr_free
;
1164 nr_free
= from_cblock(mq
->cache_size
) - mq
->cache_alloc
.nr_allocated
;
1165 return (nr_free
+ btracker_nr_demotions_queued(mq
->bg_work
)) >=
1166 percent_to_target(mq
, FREE_TARGET
);
1169 /*----------------------------------------------------------------*/
1171 static void mark_pending(struct smq_policy
*mq
, struct entry
*e
)
1173 BUG_ON(e
->sentinel
);
1174 BUG_ON(!e
->allocated
);
1175 BUG_ON(e
->pending_work
);
1176 e
->pending_work
= true;
1179 static void clear_pending(struct smq_policy
*mq
, struct entry
*e
)
1181 BUG_ON(!e
->pending_work
);
1182 e
->pending_work
= false;
1185 static void queue_writeback(struct smq_policy
*mq
, bool idle
)
1188 struct policy_work work
;
1191 e
= q_peek(&mq
->dirty
, mq
->dirty
.nr_levels
, idle
);
1193 mark_pending(mq
, e
);
1194 q_del(&mq
->dirty
, e
);
1196 work
.op
= POLICY_WRITEBACK
;
1197 work
.oblock
= e
->oblock
;
1198 work
.cblock
= infer_cblock(mq
, e
);
1200 r
= btracker_queue(mq
->bg_work
, &work
, NULL
);
1202 clear_pending(mq
, e
);
1203 q_push_front(&mq
->dirty
, e
);
1208 static void queue_demotion(struct smq_policy
*mq
)
1211 struct policy_work work
;
1214 if (WARN_ON_ONCE(!mq
->migrations_allowed
))
1217 e
= q_peek(&mq
->clean
, mq
->clean
.nr_levels
/ 2, true);
1219 if (!clean_target_met(mq
, true))
1220 queue_writeback(mq
, false);
1224 mark_pending(mq
, e
);
1225 q_del(&mq
->clean
, e
);
1227 work
.op
= POLICY_DEMOTE
;
1228 work
.oblock
= e
->oblock
;
1229 work
.cblock
= infer_cblock(mq
, e
);
1230 r
= btracker_queue(mq
->bg_work
, &work
, NULL
);
1232 clear_pending(mq
, e
);
1233 q_push_front(&mq
->clean
, e
);
1237 static void queue_promotion(struct smq_policy
*mq
, dm_oblock_t oblock
,
1238 struct policy_work
**workp
)
1242 struct policy_work work
;
1244 if (!mq
->migrations_allowed
)
1247 if (allocator_empty(&mq
->cache_alloc
)) {
1249 * We always claim to be 'idle' to ensure some demotions happen
1250 * with continuous loads.
1252 if (!free_target_met(mq
))
1257 if (btracker_promotion_already_present(mq
->bg_work
, oblock
))
1261 * We allocate the entry now to reserve the cblock. If the
1262 * background work is aborted we must remember to free it.
1264 e
= alloc_entry(&mq
->cache_alloc
);
1266 e
->pending_work
= true;
1267 work
.op
= POLICY_PROMOTE
;
1268 work
.oblock
= oblock
;
1269 work
.cblock
= infer_cblock(mq
, e
);
1270 r
= btracker_queue(mq
->bg_work
, &work
, workp
);
1272 free_entry(&mq
->cache_alloc
, e
);
1275 /*----------------------------------------------------------------*/
1277 enum promote_result
{
1284 * Converts a boolean into a promote result.
1286 static enum promote_result
maybe_promote(bool promote
)
1288 return promote
? PROMOTE_PERMANENT
: PROMOTE_NOT
;
1291 static enum promote_result
should_promote(struct smq_policy
*mq
, struct entry
*hs_e
,
1292 int data_dir
, bool fast_promote
)
1294 if (data_dir
== WRITE
) {
1295 if (!allocator_empty(&mq
->cache_alloc
) && fast_promote
)
1296 return PROMOTE_TEMPORARY
;
1298 return maybe_promote(hs_e
->level
>= mq
->write_promote_level
);
1300 return maybe_promote(hs_e
->level
>= mq
->read_promote_level
);
1303 static dm_oblock_t
to_hblock(struct smq_policy
*mq
, dm_oblock_t b
)
1305 sector_t r
= from_oblock(b
);
1306 (void) sector_div(r
, mq
->cache_blocks_per_hotspot_block
);
1307 return to_oblock(r
);
1310 static struct entry
*update_hotspot_queue(struct smq_policy
*mq
, dm_oblock_t b
)
1313 dm_oblock_t hb
= to_hblock(mq
, b
);
1314 struct entry
*e
= h_lookup(&mq
->hotspot_table
, hb
);
1317 stats_level_accessed(&mq
->hotspot_stats
, e
->level
);
1319 hi
= get_index(&mq
->hotspot_alloc
, e
);
1320 q_requeue(&mq
->hotspot
, e
,
1321 test_and_set_bit(hi
, mq
->hotspot_hit_bits
) ?
1322 0u : mq
->hotspot_level_jump
,
1326 stats_miss(&mq
->hotspot_stats
);
1328 e
= alloc_entry(&mq
->hotspot_alloc
);
1330 e
= q_pop(&mq
->hotspot
);
1332 h_remove(&mq
->hotspot_table
, e
);
1333 hi
= get_index(&mq
->hotspot_alloc
, e
);
1334 clear_bit(hi
, mq
->hotspot_hit_bits
);
1341 q_push(&mq
->hotspot
, e
);
1342 h_insert(&mq
->hotspot_table
, e
);
1349 /*----------------------------------------------------------------*/
1352 * Public interface, via the policy struct. See dm-cache-policy.h for a
1353 * description of these.
1356 static struct smq_policy
*to_smq_policy(struct dm_cache_policy
*p
)
1358 return container_of(p
, struct smq_policy
, policy
);
1361 static void smq_destroy(struct dm_cache_policy
*p
)
1363 struct smq_policy
*mq
= to_smq_policy(p
);
1365 btracker_destroy(mq
->bg_work
);
1366 h_exit(&mq
->hotspot_table
);
1368 free_bitset(mq
->hotspot_hit_bits
);
1369 free_bitset(mq
->cache_hit_bits
);
1370 space_exit(&mq
->es
);
1374 /*----------------------------------------------------------------*/
1376 static int __lookup(struct smq_policy
*mq
, dm_oblock_t oblock
, dm_cblock_t
*cblock
,
1377 int data_dir
, bool fast_copy
,
1378 struct policy_work
**work
, bool *background_work
)
1380 struct entry
*e
, *hs_e
;
1381 enum promote_result pr
;
1383 *background_work
= false;
1385 e
= h_lookup(&mq
->table
, oblock
);
1387 stats_level_accessed(&mq
->cache_stats
, e
->level
);
1390 *cblock
= infer_cblock(mq
, e
);
1394 stats_miss(&mq
->cache_stats
);
1397 * The hotspot queue only gets updated with misses.
1399 hs_e
= update_hotspot_queue(mq
, oblock
);
1401 pr
= should_promote(mq
, hs_e
, data_dir
, fast_copy
);
1402 if (pr
!= PROMOTE_NOT
) {
1403 queue_promotion(mq
, oblock
, work
);
1404 *background_work
= true;
1411 static int smq_lookup(struct dm_cache_policy
*p
, dm_oblock_t oblock
, dm_cblock_t
*cblock
,
1412 int data_dir
, bool fast_copy
,
1413 bool *background_work
)
1416 unsigned long flags
;
1417 struct smq_policy
*mq
= to_smq_policy(p
);
1419 spin_lock_irqsave(&mq
->lock
, flags
);
1420 r
= __lookup(mq
, oblock
, cblock
,
1421 data_dir
, fast_copy
,
1422 NULL
, background_work
);
1423 spin_unlock_irqrestore(&mq
->lock
, flags
);
1428 static int smq_lookup_with_work(struct dm_cache_policy
*p
,
1429 dm_oblock_t oblock
, dm_cblock_t
*cblock
,
1430 int data_dir
, bool fast_copy
,
1431 struct policy_work
**work
)
1434 bool background_queued
;
1435 unsigned long flags
;
1436 struct smq_policy
*mq
= to_smq_policy(p
);
1438 spin_lock_irqsave(&mq
->lock
, flags
);
1439 r
= __lookup(mq
, oblock
, cblock
, data_dir
, fast_copy
, work
, &background_queued
);
1440 spin_unlock_irqrestore(&mq
->lock
, flags
);
1445 static int smq_get_background_work(struct dm_cache_policy
*p
, bool idle
,
1446 struct policy_work
**result
)
1449 unsigned long flags
;
1450 struct smq_policy
*mq
= to_smq_policy(p
);
1452 spin_lock_irqsave(&mq
->lock
, flags
);
1453 r
= btracker_issue(mq
->bg_work
, result
);
1454 if (r
== -ENODATA
) {
1455 if (!clean_target_met(mq
, idle
)) {
1456 queue_writeback(mq
, idle
);
1457 r
= btracker_issue(mq
->bg_work
, result
);
1460 spin_unlock_irqrestore(&mq
->lock
, flags
);
1466 * We need to clear any pending work flags that have been set, and in the
1467 * case of promotion free the entry for the destination cblock.
1469 static void __complete_background_work(struct smq_policy
*mq
,
1470 struct policy_work
*work
,
1473 struct entry
*e
= get_entry(&mq
->cache_alloc
,
1474 from_cblock(work
->cblock
));
1477 case POLICY_PROMOTE
:
1479 clear_pending(mq
, e
);
1481 e
->oblock
= work
->oblock
;
1482 e
->level
= NR_CACHE_LEVELS
- 1;
1486 free_entry(&mq
->cache_alloc
, e
);
1494 h_remove(&mq
->table
, e
);
1495 free_entry(&mq
->cache_alloc
, e
);
1498 clear_pending(mq
, e
);
1504 case POLICY_WRITEBACK
:
1506 clear_pending(mq
, e
);
1512 btracker_complete(mq
->bg_work
, work
);
1515 static void smq_complete_background_work(struct dm_cache_policy
*p
,
1516 struct policy_work
*work
,
1519 unsigned long flags
;
1520 struct smq_policy
*mq
= to_smq_policy(p
);
1522 spin_lock_irqsave(&mq
->lock
, flags
);
1523 __complete_background_work(mq
, work
, success
);
1524 spin_unlock_irqrestore(&mq
->lock
, flags
);
1527 // in_hash(oblock) -> in_hash(oblock)
1528 static void __smq_set_clear_dirty(struct smq_policy
*mq
, dm_cblock_t cblock
, bool set
)
1530 struct entry
*e
= get_entry(&mq
->cache_alloc
, from_cblock(cblock
));
1532 if (e
->pending_work
)
1541 static void smq_set_dirty(struct dm_cache_policy
*p
, dm_cblock_t cblock
)
1543 unsigned long flags
;
1544 struct smq_policy
*mq
= to_smq_policy(p
);
1546 spin_lock_irqsave(&mq
->lock
, flags
);
1547 __smq_set_clear_dirty(mq
, cblock
, true);
1548 spin_unlock_irqrestore(&mq
->lock
, flags
);
1551 static void smq_clear_dirty(struct dm_cache_policy
*p
, dm_cblock_t cblock
)
1553 struct smq_policy
*mq
= to_smq_policy(p
);
1554 unsigned long flags
;
1556 spin_lock_irqsave(&mq
->lock
, flags
);
1557 __smq_set_clear_dirty(mq
, cblock
, false);
1558 spin_unlock_irqrestore(&mq
->lock
, flags
);
1561 static unsigned int random_level(dm_cblock_t cblock
)
1563 return hash_32(from_cblock(cblock
), 9) & (NR_CACHE_LEVELS
- 1);
1566 static int smq_load_mapping(struct dm_cache_policy
*p
,
1567 dm_oblock_t oblock
, dm_cblock_t cblock
,
1568 bool dirty
, uint32_t hint
, bool hint_valid
)
1570 struct smq_policy
*mq
= to_smq_policy(p
);
1573 e
= alloc_particular_entry(&mq
->cache_alloc
, from_cblock(cblock
));
1576 e
->level
= hint_valid
? min(hint
, NR_CACHE_LEVELS
- 1) : random_level(cblock
);
1577 e
->pending_work
= false;
1580 * When we load mappings we push ahead of both sentinels in order to
1581 * allow demotions and cleaning to occur immediately.
1588 static int smq_invalidate_mapping(struct dm_cache_policy
*p
, dm_cblock_t cblock
)
1590 struct smq_policy
*mq
= to_smq_policy(p
);
1591 struct entry
*e
= get_entry(&mq
->cache_alloc
, from_cblock(cblock
));
1596 // FIXME: what if this block has pending background work?
1598 h_remove(&mq
->table
, e
);
1599 free_entry(&mq
->cache_alloc
, e
);
1603 static uint32_t smq_get_hint(struct dm_cache_policy
*p
, dm_cblock_t cblock
)
1605 struct smq_policy
*mq
= to_smq_policy(p
);
1606 struct entry
*e
= get_entry(&mq
->cache_alloc
, from_cblock(cblock
));
1614 static dm_cblock_t
smq_residency(struct dm_cache_policy
*p
)
1617 unsigned long flags
;
1618 struct smq_policy
*mq
= to_smq_policy(p
);
1620 spin_lock_irqsave(&mq
->lock
, flags
);
1621 r
= to_cblock(mq
->cache_alloc
.nr_allocated
);
1622 spin_unlock_irqrestore(&mq
->lock
, flags
);
1627 static void smq_tick(struct dm_cache_policy
*p
, bool can_block
)
1629 struct smq_policy
*mq
= to_smq_policy(p
);
1630 unsigned long flags
;
1632 spin_lock_irqsave(&mq
->lock
, flags
);
1634 update_sentinels(mq
);
1635 end_hotspot_period(mq
);
1636 end_cache_period(mq
);
1637 spin_unlock_irqrestore(&mq
->lock
, flags
);
1640 static void smq_allow_migrations(struct dm_cache_policy
*p
, bool allow
)
1642 struct smq_policy
*mq
= to_smq_policy(p
);
1644 mq
->migrations_allowed
= allow
;
1648 * smq has no config values, but the old mq policy did. To avoid breaking
1649 * software we continue to accept these configurables for the mq policy,
1650 * but they have no effect.
1652 static int mq_set_config_value(struct dm_cache_policy
*p
,
1653 const char *key
, const char *value
)
1657 if (kstrtoul(value
, 10, &tmp
))
1660 if (!strcasecmp(key
, "random_threshold") ||
1661 !strcasecmp(key
, "sequential_threshold") ||
1662 !strcasecmp(key
, "discard_promote_adjustment") ||
1663 !strcasecmp(key
, "read_promote_adjustment") ||
1664 !strcasecmp(key
, "write_promote_adjustment")) {
1665 DMWARN("tunable '%s' no longer has any effect, mq policy is now an alias for smq", key
);
1672 static int mq_emit_config_values(struct dm_cache_policy
*p
, char *result
,
1673 unsigned int maxlen
, ssize_t
*sz_ptr
)
1675 ssize_t sz
= *sz_ptr
;
1677 DMEMIT("10 random_threshold 0 "
1678 "sequential_threshold 0 "
1679 "discard_promote_adjustment 0 "
1680 "read_promote_adjustment 0 "
1681 "write_promote_adjustment 0 ");
1687 /* Init the policy plugin interface function pointers. */
1688 static void init_policy_functions(struct smq_policy
*mq
, bool mimic_mq
)
1690 mq
->policy
.destroy
= smq_destroy
;
1691 mq
->policy
.lookup
= smq_lookup
;
1692 mq
->policy
.lookup_with_work
= smq_lookup_with_work
;
1693 mq
->policy
.get_background_work
= smq_get_background_work
;
1694 mq
->policy
.complete_background_work
= smq_complete_background_work
;
1695 mq
->policy
.set_dirty
= smq_set_dirty
;
1696 mq
->policy
.clear_dirty
= smq_clear_dirty
;
1697 mq
->policy
.load_mapping
= smq_load_mapping
;
1698 mq
->policy
.invalidate_mapping
= smq_invalidate_mapping
;
1699 mq
->policy
.get_hint
= smq_get_hint
;
1700 mq
->policy
.residency
= smq_residency
;
1701 mq
->policy
.tick
= smq_tick
;
1702 mq
->policy
.allow_migrations
= smq_allow_migrations
;
1705 mq
->policy
.set_config_value
= mq_set_config_value
;
1706 mq
->policy
.emit_config_values
= mq_emit_config_values
;
1710 static bool too_many_hotspot_blocks(sector_t origin_size
,
1711 sector_t hotspot_block_size
,
1712 unsigned int nr_hotspot_blocks
)
1714 return (hotspot_block_size
* nr_hotspot_blocks
) > origin_size
;
1717 static void calc_hotspot_params(sector_t origin_size
,
1718 sector_t cache_block_size
,
1719 unsigned int nr_cache_blocks
,
1720 sector_t
*hotspot_block_size
,
1721 unsigned int *nr_hotspot_blocks
)
1723 *hotspot_block_size
= cache_block_size
* 16u;
1724 *nr_hotspot_blocks
= max(nr_cache_blocks
/ 4u, 1024u);
1726 while ((*hotspot_block_size
> cache_block_size
) &&
1727 too_many_hotspot_blocks(origin_size
, *hotspot_block_size
, *nr_hotspot_blocks
))
1728 *hotspot_block_size
/= 2u;
1731 static struct dm_cache_policy
*
1732 __smq_create(dm_cblock_t cache_size
, sector_t origin_size
, sector_t cache_block_size
,
1733 bool mimic_mq
, bool migrations_allowed
, bool cleaner
)
1736 unsigned int nr_sentinels_per_queue
= 2u * NR_CACHE_LEVELS
;
1737 unsigned int total_sentinels
= 2u * nr_sentinels_per_queue
;
1738 struct smq_policy
*mq
= kzalloc(sizeof(*mq
), GFP_KERNEL
);
1743 init_policy_functions(mq
, mimic_mq
);
1744 mq
->cache_size
= cache_size
;
1745 mq
->cache_block_size
= cache_block_size
;
1747 calc_hotspot_params(origin_size
, cache_block_size
, from_cblock(cache_size
),
1748 &mq
->hotspot_block_size
, &mq
->nr_hotspot_blocks
);
1750 mq
->cache_blocks_per_hotspot_block
= div64_u64(mq
->hotspot_block_size
, mq
->cache_block_size
);
1751 mq
->hotspot_level_jump
= 1u;
1752 if (space_init(&mq
->es
, total_sentinels
+ mq
->nr_hotspot_blocks
+ from_cblock(cache_size
))) {
1753 DMERR("couldn't initialize entry space");
1757 init_allocator(&mq
->writeback_sentinel_alloc
, &mq
->es
, 0, nr_sentinels_per_queue
);
1758 for (i
= 0; i
< nr_sentinels_per_queue
; i
++)
1759 get_entry(&mq
->writeback_sentinel_alloc
, i
)->sentinel
= true;
1761 init_allocator(&mq
->demote_sentinel_alloc
, &mq
->es
, nr_sentinels_per_queue
, total_sentinels
);
1762 for (i
= 0; i
< nr_sentinels_per_queue
; i
++)
1763 get_entry(&mq
->demote_sentinel_alloc
, i
)->sentinel
= true;
1765 init_allocator(&mq
->hotspot_alloc
, &mq
->es
, total_sentinels
,
1766 total_sentinels
+ mq
->nr_hotspot_blocks
);
1768 init_allocator(&mq
->cache_alloc
, &mq
->es
,
1769 total_sentinels
+ mq
->nr_hotspot_blocks
,
1770 total_sentinels
+ mq
->nr_hotspot_blocks
+ from_cblock(cache_size
));
1772 mq
->hotspot_hit_bits
= alloc_bitset(mq
->nr_hotspot_blocks
);
1773 if (!mq
->hotspot_hit_bits
) {
1774 DMERR("couldn't allocate hotspot hit bitset");
1775 goto bad_hotspot_hit_bits
;
1777 clear_bitset(mq
->hotspot_hit_bits
, mq
->nr_hotspot_blocks
);
1779 if (from_cblock(cache_size
)) {
1780 mq
->cache_hit_bits
= alloc_bitset(from_cblock(cache_size
));
1781 if (!mq
->cache_hit_bits
) {
1782 DMERR("couldn't allocate cache hit bitset");
1783 goto bad_cache_hit_bits
;
1785 clear_bitset(mq
->cache_hit_bits
, from_cblock(mq
->cache_size
));
1787 mq
->cache_hit_bits
= NULL
;
1790 spin_lock_init(&mq
->lock
);
1792 q_init(&mq
->hotspot
, &mq
->es
, NR_HOTSPOT_LEVELS
);
1793 mq
->hotspot
.nr_top_levels
= 8;
1794 mq
->hotspot
.nr_in_top_levels
= min(mq
->nr_hotspot_blocks
/ NR_HOTSPOT_LEVELS
,
1795 from_cblock(mq
->cache_size
) / mq
->cache_blocks_per_hotspot_block
);
1797 q_init(&mq
->clean
, &mq
->es
, NR_CACHE_LEVELS
);
1798 q_init(&mq
->dirty
, &mq
->es
, NR_CACHE_LEVELS
);
1800 stats_init(&mq
->hotspot_stats
, NR_HOTSPOT_LEVELS
);
1801 stats_init(&mq
->cache_stats
, NR_CACHE_LEVELS
);
1803 if (h_init(&mq
->table
, &mq
->es
, from_cblock(cache_size
)))
1804 goto bad_alloc_table
;
1806 if (h_init(&mq
->hotspot_table
, &mq
->es
, mq
->nr_hotspot_blocks
))
1807 goto bad_alloc_hotspot_table
;
1810 mq
->write_promote_level
= mq
->read_promote_level
= NR_HOTSPOT_LEVELS
;
1812 mq
->next_hotspot_period
= jiffies
;
1813 mq
->next_cache_period
= jiffies
;
1815 mq
->bg_work
= btracker_create(4096); /* FIXME: hard coded value */
1819 mq
->migrations_allowed
= migrations_allowed
;
1820 mq
->cleaner
= cleaner
;
1825 h_exit(&mq
->hotspot_table
);
1826 bad_alloc_hotspot_table
:
1829 free_bitset(mq
->cache_hit_bits
);
1831 free_bitset(mq
->hotspot_hit_bits
);
1832 bad_hotspot_hit_bits
:
1833 space_exit(&mq
->es
);
1840 static struct dm_cache_policy
*smq_create(dm_cblock_t cache_size
,
1841 sector_t origin_size
,
1842 sector_t cache_block_size
)
1844 return __smq_create(cache_size
, origin_size
, cache_block_size
,
1845 false, true, false);
1848 static struct dm_cache_policy
*mq_create(dm_cblock_t cache_size
,
1849 sector_t origin_size
,
1850 sector_t cache_block_size
)
1852 return __smq_create(cache_size
, origin_size
, cache_block_size
,
1856 static struct dm_cache_policy
*cleaner_create(dm_cblock_t cache_size
,
1857 sector_t origin_size
,
1858 sector_t cache_block_size
)
1860 return __smq_create(cache_size
, origin_size
, cache_block_size
,
1861 false, false, true);
1864 /*----------------------------------------------------------------*/
1866 static struct dm_cache_policy_type smq_policy_type
= {
1868 .version
= {2, 0, 0},
1870 .owner
= THIS_MODULE
,
1871 .create
= smq_create
1874 static struct dm_cache_policy_type mq_policy_type
= {
1876 .version
= {2, 0, 0},
1878 .owner
= THIS_MODULE
,
1879 .create
= mq_create
,
1882 static struct dm_cache_policy_type cleaner_policy_type
= {
1884 .version
= {2, 0, 0},
1886 .owner
= THIS_MODULE
,
1887 .create
= cleaner_create
,
1890 static struct dm_cache_policy_type default_policy_type
= {
1892 .version
= {2, 0, 0},
1894 .owner
= THIS_MODULE
,
1895 .create
= smq_create
,
1896 .real
= &smq_policy_type
1899 static int __init
smq_init(void)
1903 r
= dm_cache_policy_register(&smq_policy_type
);
1905 DMERR("register failed %d", r
);
1909 r
= dm_cache_policy_register(&mq_policy_type
);
1911 DMERR("register failed (as mq) %d", r
);
1915 r
= dm_cache_policy_register(&cleaner_policy_type
);
1917 DMERR("register failed (as cleaner) %d", r
);
1921 r
= dm_cache_policy_register(&default_policy_type
);
1923 DMERR("register failed (as default) %d", r
);
1930 dm_cache_policy_unregister(&cleaner_policy_type
);
1932 dm_cache_policy_unregister(&mq_policy_type
);
1934 dm_cache_policy_unregister(&smq_policy_type
);
1939 static void __exit
smq_exit(void)
1941 dm_cache_policy_unregister(&cleaner_policy_type
);
1942 dm_cache_policy_unregister(&smq_policy_type
);
1943 dm_cache_policy_unregister(&mq_policy_type
);
1944 dm_cache_policy_unregister(&default_policy_type
);
1947 module_init(smq_init
);
1948 module_exit(smq_exit
);
1950 MODULE_AUTHOR("Joe Thornber <dm-devel@lists.linux.dev>");
1951 MODULE_LICENSE("GPL");
1952 MODULE_DESCRIPTION("smq cache policy");
1954 MODULE_ALIAS("dm-cache-default");
1955 MODULE_ALIAS("dm-cache-mq");
1956 MODULE_ALIAS("dm-cache-cleaner");