1 // SPDX-License-Identifier: GPL-2.0-only
3 * Resource Director Technology(RDT)
4 * - Cache Allocation code.
6 * Copyright (C) 2016 Intel Corporation
9 * Fenghua Yu <fenghua.yu@intel.com>
10 * Tony Luck <tony.luck@intel.com>
11 * Vikas Shivappa <vikas.shivappa@intel.com>
13 * More information about RDT be found in the Intel (R) x86 Architecture
14 * Software Developer Manual June 2016, volume 3, section 17.17.
17 #define pr_fmt(fmt) "resctrl: " fmt
19 #include <linux/cpu.h>
20 #include <linux/slab.h>
21 #include <linux/err.h>
22 #include <linux/cpuhotplug.h>
24 #include <asm/cpu_device_id.h>
25 #include <asm/resctrl.h>
29 * rdt_domain structures are kfree()d when their last CPU goes offline,
30 * and allocated when the first CPU in a new domain comes online.
31 * The rdt_resource's domain list is updated when this happens. Readers of
32 * the domain list must either take cpus_read_lock(), or rely on an RCU
33 * read-side critical section, to avoid observing concurrent modification.
34 * All writers take this mutex:
36 static DEFINE_MUTEX(domain_list_lock
);
39 * The cached resctrl_pqr_state is strictly per CPU and can never be
40 * updated from a remote CPU. Functions which modify the state
41 * are called with interrupts disabled and no preemption, which
42 * is sufficient for the protection.
44 DEFINE_PER_CPU(struct resctrl_pqr_state
, pqr_state
);
47 * Used to store the max resource name width and max resource data width
48 * to display the schemata in a tabular format
50 int max_name_width
, max_data_width
;
53 * Global boolean for rdt_alloc which is true if any
54 * resource allocation is enabled.
56 bool rdt_alloc_capable
;
58 static void mba_wrmsr_intel(struct msr_param
*m
);
59 static void cat_wrmsr(struct msr_param
*m
);
60 static void mba_wrmsr_amd(struct msr_param
*m
);
62 #define ctrl_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.ctrl_domains)
63 #define mon_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.mon_domains)
65 struct rdt_hw_resource rdt_resources_all
[] = {
69 .rid
= RDT_RESOURCE_L3
,
71 .ctrl_scope
= RESCTRL_L3_CACHE
,
72 .mon_scope
= RESCTRL_L3_CACHE
,
73 .ctrl_domains
= ctrl_domain_init(RDT_RESOURCE_L3
),
74 .mon_domains
= mon_domain_init(RDT_RESOURCE_L3
),
75 .parse_ctrlval
= parse_cbm
,
76 .format_str
= "%d=%0*x",
77 .fflags
= RFTYPE_RES_CACHE
,
79 .msr_base
= MSR_IA32_L3_CBM_BASE
,
80 .msr_update
= cat_wrmsr
,
85 .rid
= RDT_RESOURCE_L2
,
87 .ctrl_scope
= RESCTRL_L2_CACHE
,
88 .ctrl_domains
= ctrl_domain_init(RDT_RESOURCE_L2
),
89 .parse_ctrlval
= parse_cbm
,
90 .format_str
= "%d=%0*x",
91 .fflags
= RFTYPE_RES_CACHE
,
93 .msr_base
= MSR_IA32_L2_CBM_BASE
,
94 .msr_update
= cat_wrmsr
,
99 .rid
= RDT_RESOURCE_MBA
,
101 .ctrl_scope
= RESCTRL_L3_CACHE
,
102 .ctrl_domains
= ctrl_domain_init(RDT_RESOURCE_MBA
),
103 .parse_ctrlval
= parse_bw
,
104 .format_str
= "%d=%*u",
105 .fflags
= RFTYPE_RES_MB
,
108 [RDT_RESOURCE_SMBA
] =
111 .rid
= RDT_RESOURCE_SMBA
,
113 .ctrl_scope
= RESCTRL_L3_CACHE
,
114 .ctrl_domains
= ctrl_domain_init(RDT_RESOURCE_SMBA
),
115 .parse_ctrlval
= parse_bw
,
116 .format_str
= "%d=%*u",
117 .fflags
= RFTYPE_RES_MB
,
122 u32
resctrl_arch_system_num_rmid_idx(void)
124 struct rdt_resource
*r
= &rdt_resources_all
[RDT_RESOURCE_L3
].r_resctrl
;
126 /* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
131 * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs
132 * as they do not have CPUID enumeration support for Cache allocation.
133 * The check for Vendor/Family/Model is not enough to guarantee that
134 * the MSRs won't #GP fault because only the following SKUs support
136 * Intel(R) Xeon(R) CPU E5-2658 v3 @ 2.20GHz
137 * Intel(R) Xeon(R) CPU E5-2648L v3 @ 1.80GHz
138 * Intel(R) Xeon(R) CPU E5-2628L v3 @ 2.00GHz
139 * Intel(R) Xeon(R) CPU E5-2618L v3 @ 2.30GHz
140 * Intel(R) Xeon(R) CPU E5-2608L v3 @ 2.00GHz
141 * Intel(R) Xeon(R) CPU E5-2658A v3 @ 2.20GHz
143 * Probe by trying to write the first of the L3 cache mask registers
144 * and checking that the bits stick. Max CLOSids is always 4 and max cbm length
145 * is always 20 on hsw server parts. The minimum cache bitmask length
146 * allowed for HSW server is always 2 bits. Hardcode all of them.
148 static inline void cache_alloc_hsw_probe(void)
150 struct rdt_hw_resource
*hw_res
= &rdt_resources_all
[RDT_RESOURCE_L3
];
151 struct rdt_resource
*r
= &hw_res
->r_resctrl
;
152 u64 max_cbm
= BIT_ULL_MASK(20) - 1, l3_cbm_0
;
154 if (wrmsrl_safe(MSR_IA32_L3_CBM_BASE
, max_cbm
))
157 rdmsrl(MSR_IA32_L3_CBM_BASE
, l3_cbm_0
);
159 /* If all the bits were set in MSR, return success */
160 if (l3_cbm_0
!= max_cbm
)
163 hw_res
->num_closid
= 4;
164 r
->default_ctrl
= max_cbm
;
165 r
->cache
.cbm_len
= 20;
166 r
->cache
.shareable_bits
= 0xc0000;
167 r
->cache
.min_cbm_bits
= 2;
168 r
->cache
.arch_has_sparse_bitmasks
= false;
169 r
->alloc_capable
= true;
171 rdt_alloc_capable
= true;
174 bool is_mba_sc(struct rdt_resource
*r
)
177 return rdt_resources_all
[RDT_RESOURCE_MBA
].r_resctrl
.membw
.mba_sc
;
180 * The software controller support is only applicable to MBA resource.
181 * Make sure to check for resource type.
183 if (r
->rid
!= RDT_RESOURCE_MBA
)
186 return r
->membw
.mba_sc
;
190 * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values
191 * exposed to user interface and the h/w understandable delay values.
193 * The non-linear delay values have the granularity of power of two
194 * and also the h/w does not guarantee a curve for configured delay
195 * values vs. actual b/w enforced.
196 * Hence we need a mapping that is pre calibrated so the user can
197 * express the memory b/w as a percentage value.
199 static inline bool rdt_get_mb_table(struct rdt_resource
*r
)
202 * There are no Intel SKUs as of now to support non-linear delay.
204 pr_info("MBA b/w map not implemented for cpu:%d, model:%d",
205 boot_cpu_data
.x86
, boot_cpu_data
.x86_model
);
210 static __init
bool __get_mem_config_intel(struct rdt_resource
*r
)
212 struct rdt_hw_resource
*hw_res
= resctrl_to_arch_res(r
);
213 union cpuid_0x10_3_eax eax
;
214 union cpuid_0x10_x_edx edx
;
215 u32 ebx
, ecx
, max_delay
;
217 cpuid_count(0x00000010, 3, &eax
.full
, &ebx
, &ecx
, &edx
.full
);
218 hw_res
->num_closid
= edx
.split
.cos_max
+ 1;
219 max_delay
= eax
.split
.max_delay
+ 1;
220 r
->default_ctrl
= MAX_MBA_BW
;
221 r
->membw
.arch_needs_linear
= true;
222 if (ecx
& MBA_IS_LINEAR
) {
223 r
->membw
.delay_linear
= true;
224 r
->membw
.min_bw
= MAX_MBA_BW
- max_delay
;
225 r
->membw
.bw_gran
= MAX_MBA_BW
- max_delay
;
227 if (!rdt_get_mb_table(r
))
229 r
->membw
.arch_needs_linear
= false;
233 if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA
))
234 r
->membw
.throttle_mode
= THREAD_THROTTLE_PER_THREAD
;
236 r
->membw
.throttle_mode
= THREAD_THROTTLE_MAX
;
237 thread_throttle_mode_init();
239 r
->alloc_capable
= true;
244 static __init
bool __rdt_get_mem_config_amd(struct rdt_resource
*r
)
246 struct rdt_hw_resource
*hw_res
= resctrl_to_arch_res(r
);
247 u32 eax
, ebx
, ecx
, edx
, subleaf
;
250 * Query CPUID_Fn80000020_EDX_x01 for MBA and
251 * CPUID_Fn80000020_EDX_x02 for SMBA
253 subleaf
= (r
->rid
== RDT_RESOURCE_SMBA
) ? 2 : 1;
255 cpuid_count(0x80000020, subleaf
, &eax
, &ebx
, &ecx
, &edx
);
256 hw_res
->num_closid
= edx
+ 1;
257 r
->default_ctrl
= 1 << eax
;
259 /* AMD does not use delay */
260 r
->membw
.delay_linear
= false;
261 r
->membw
.arch_needs_linear
= false;
264 * AMD does not use memory delay throttle model to control
265 * the allocation like Intel does.
267 r
->membw
.throttle_mode
= THREAD_THROTTLE_UNDEFINED
;
269 r
->membw
.bw_gran
= 1;
270 /* Max value is 2048, Data width should be 4 in decimal */
273 r
->alloc_capable
= true;
278 static void rdt_get_cache_alloc_cfg(int idx
, struct rdt_resource
*r
)
280 struct rdt_hw_resource
*hw_res
= resctrl_to_arch_res(r
);
281 union cpuid_0x10_1_eax eax
;
282 union cpuid_0x10_x_ecx ecx
;
283 union cpuid_0x10_x_edx edx
;
286 cpuid_count(0x00000010, idx
, &eax
.full
, &ebx
, &ecx
.full
, &edx
.full
);
287 hw_res
->num_closid
= edx
.split
.cos_max
+ 1;
288 r
->cache
.cbm_len
= eax
.split
.cbm_len
+ 1;
289 r
->default_ctrl
= BIT_MASK(eax
.split
.cbm_len
+ 1) - 1;
290 r
->cache
.shareable_bits
= ebx
& r
->default_ctrl
;
291 r
->data_width
= (r
->cache
.cbm_len
+ 3) / 4;
292 if (boot_cpu_data
.x86_vendor
== X86_VENDOR_INTEL
)
293 r
->cache
.arch_has_sparse_bitmasks
= ecx
.split
.noncont
;
294 r
->alloc_capable
= true;
297 static void rdt_get_cdp_config(int level
)
300 * By default, CDP is disabled. CDP can be enabled by mount parameter
301 * "cdp" during resctrl file system mount time.
303 rdt_resources_all
[level
].cdp_enabled
= false;
304 rdt_resources_all
[level
].r_resctrl
.cdp_capable
= true;
307 static void rdt_get_cdp_l3_config(void)
309 rdt_get_cdp_config(RDT_RESOURCE_L3
);
312 static void rdt_get_cdp_l2_config(void)
314 rdt_get_cdp_config(RDT_RESOURCE_L2
);
317 static void mba_wrmsr_amd(struct msr_param
*m
)
319 struct rdt_hw_ctrl_domain
*hw_dom
= resctrl_to_arch_ctrl_dom(m
->dom
);
320 struct rdt_hw_resource
*hw_res
= resctrl_to_arch_res(m
->res
);
323 for (i
= m
->low
; i
< m
->high
; i
++)
324 wrmsrl(hw_res
->msr_base
+ i
, hw_dom
->ctrl_val
[i
]);
328 * Map the memory b/w percentage value to delay values
329 * that can be written to QOS_MSRs.
330 * There are currently no SKUs which support non linear delay values.
332 static u32
delay_bw_map(unsigned long bw
, struct rdt_resource
*r
)
334 if (r
->membw
.delay_linear
)
335 return MAX_MBA_BW
- bw
;
337 pr_warn_once("Non Linear delay-bw map not supported but queried\n");
338 return r
->default_ctrl
;
341 static void mba_wrmsr_intel(struct msr_param
*m
)
343 struct rdt_hw_ctrl_domain
*hw_dom
= resctrl_to_arch_ctrl_dom(m
->dom
);
344 struct rdt_hw_resource
*hw_res
= resctrl_to_arch_res(m
->res
);
347 /* Write the delay values for mba. */
348 for (i
= m
->low
; i
< m
->high
; i
++)
349 wrmsrl(hw_res
->msr_base
+ i
, delay_bw_map(hw_dom
->ctrl_val
[i
], m
->res
));
352 static void cat_wrmsr(struct msr_param
*m
)
354 struct rdt_hw_ctrl_domain
*hw_dom
= resctrl_to_arch_ctrl_dom(m
->dom
);
355 struct rdt_hw_resource
*hw_res
= resctrl_to_arch_res(m
->res
);
358 for (i
= m
->low
; i
< m
->high
; i
++)
359 wrmsrl(hw_res
->msr_base
+ i
, hw_dom
->ctrl_val
[i
]);
362 struct rdt_ctrl_domain
*get_ctrl_domain_from_cpu(int cpu
, struct rdt_resource
*r
)
364 struct rdt_ctrl_domain
*d
;
366 lockdep_assert_cpus_held();
368 list_for_each_entry(d
, &r
->ctrl_domains
, hdr
.list
) {
369 /* Find the domain that contains this CPU */
370 if (cpumask_test_cpu(cpu
, &d
->hdr
.cpu_mask
))
377 struct rdt_mon_domain
*get_mon_domain_from_cpu(int cpu
, struct rdt_resource
*r
)
379 struct rdt_mon_domain
*d
;
381 lockdep_assert_cpus_held();
383 list_for_each_entry(d
, &r
->mon_domains
, hdr
.list
) {
384 /* Find the domain that contains this CPU */
385 if (cpumask_test_cpu(cpu
, &d
->hdr
.cpu_mask
))
392 u32
resctrl_arch_get_num_closid(struct rdt_resource
*r
)
394 return resctrl_to_arch_res(r
)->num_closid
;
397 void rdt_ctrl_update(void *arg
)
399 struct rdt_hw_resource
*hw_res
;
400 struct msr_param
*m
= arg
;
402 hw_res
= resctrl_to_arch_res(m
->res
);
403 hw_res
->msr_update(m
);
407 * rdt_find_domain - Search for a domain id in a resource domain list.
409 * Search the domain list to find the domain id. If the domain id is
410 * found, return the domain. NULL otherwise. If the domain id is not
411 * found (and NULL returned) then the first domain with id bigger than
412 * the input id can be returned to the caller via @pos.
414 struct rdt_domain_hdr
*rdt_find_domain(struct list_head
*h
, int id
,
415 struct list_head
**pos
)
417 struct rdt_domain_hdr
*d
;
420 list_for_each(l
, h
) {
421 d
= list_entry(l
, struct rdt_domain_hdr
, list
);
422 /* When id is found, return its domain. */
425 /* Stop searching when finding id's position in sorted list. */
436 static void setup_default_ctrlval(struct rdt_resource
*r
, u32
*dc
)
438 struct rdt_hw_resource
*hw_res
= resctrl_to_arch_res(r
);
442 * Initialize the Control MSRs to having no control.
443 * For Cache Allocation: Set all bits in cbm
444 * For Memory Allocation: Set b/w requested to 100%
446 for (i
= 0; i
< hw_res
->num_closid
; i
++, dc
++)
447 *dc
= r
->default_ctrl
;
450 static void ctrl_domain_free(struct rdt_hw_ctrl_domain
*hw_dom
)
452 kfree(hw_dom
->ctrl_val
);
456 static void mon_domain_free(struct rdt_hw_mon_domain
*hw_dom
)
458 kfree(hw_dom
->arch_mbm_total
);
459 kfree(hw_dom
->arch_mbm_local
);
463 static int domain_setup_ctrlval(struct rdt_resource
*r
, struct rdt_ctrl_domain
*d
)
465 struct rdt_hw_ctrl_domain
*hw_dom
= resctrl_to_arch_ctrl_dom(d
);
466 struct rdt_hw_resource
*hw_res
= resctrl_to_arch_res(r
);
470 dc
= kmalloc_array(hw_res
->num_closid
, sizeof(*hw_dom
->ctrl_val
),
475 hw_dom
->ctrl_val
= dc
;
476 setup_default_ctrlval(r
, dc
);
481 m
.high
= hw_res
->num_closid
;
482 hw_res
->msr_update(&m
);
487 * arch_domain_mbm_alloc() - Allocate arch private storage for the MBM counters
488 * @num_rmid: The size of the MBM counter array
489 * @hw_dom: The domain that owns the allocated arrays
491 static int arch_domain_mbm_alloc(u32 num_rmid
, struct rdt_hw_mon_domain
*hw_dom
)
495 if (is_mbm_total_enabled()) {
496 tsize
= sizeof(*hw_dom
->arch_mbm_total
);
497 hw_dom
->arch_mbm_total
= kcalloc(num_rmid
, tsize
, GFP_KERNEL
);
498 if (!hw_dom
->arch_mbm_total
)
501 if (is_mbm_local_enabled()) {
502 tsize
= sizeof(*hw_dom
->arch_mbm_local
);
503 hw_dom
->arch_mbm_local
= kcalloc(num_rmid
, tsize
, GFP_KERNEL
);
504 if (!hw_dom
->arch_mbm_local
) {
505 kfree(hw_dom
->arch_mbm_total
);
506 hw_dom
->arch_mbm_total
= NULL
;
514 static int get_domain_id_from_scope(int cpu
, enum resctrl_scope scope
)
517 case RESCTRL_L2_CACHE
:
518 case RESCTRL_L3_CACHE
:
519 return get_cpu_cacheinfo_id(cpu
, scope
);
520 case RESCTRL_L3_NODE
:
521 return cpu_to_node(cpu
);
529 static void domain_add_cpu_ctrl(int cpu
, struct rdt_resource
*r
)
531 int id
= get_domain_id_from_scope(cpu
, r
->ctrl_scope
);
532 struct rdt_hw_ctrl_domain
*hw_dom
;
533 struct list_head
*add_pos
= NULL
;
534 struct rdt_domain_hdr
*hdr
;
535 struct rdt_ctrl_domain
*d
;
538 lockdep_assert_held(&domain_list_lock
);
541 pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n",
542 cpu
, r
->ctrl_scope
, r
->name
);
546 hdr
= rdt_find_domain(&r
->ctrl_domains
, id
, &add_pos
);
548 if (WARN_ON_ONCE(hdr
->type
!= RESCTRL_CTRL_DOMAIN
))
550 d
= container_of(hdr
, struct rdt_ctrl_domain
, hdr
);
552 cpumask_set_cpu(cpu
, &d
->hdr
.cpu_mask
);
553 if (r
->cache
.arch_has_per_cpu_cfg
)
554 rdt_domain_reconfigure_cdp(r
);
558 hw_dom
= kzalloc_node(sizeof(*hw_dom
), GFP_KERNEL
, cpu_to_node(cpu
));
562 d
= &hw_dom
->d_resctrl
;
564 d
->hdr
.type
= RESCTRL_CTRL_DOMAIN
;
565 cpumask_set_cpu(cpu
, &d
->hdr
.cpu_mask
);
567 rdt_domain_reconfigure_cdp(r
);
569 if (domain_setup_ctrlval(r
, d
)) {
570 ctrl_domain_free(hw_dom
);
574 list_add_tail_rcu(&d
->hdr
.list
, add_pos
);
576 err
= resctrl_online_ctrl_domain(r
, d
);
578 list_del_rcu(&d
->hdr
.list
);
580 ctrl_domain_free(hw_dom
);
584 static void domain_add_cpu_mon(int cpu
, struct rdt_resource
*r
)
586 int id
= get_domain_id_from_scope(cpu
, r
->mon_scope
);
587 struct list_head
*add_pos
= NULL
;
588 struct rdt_hw_mon_domain
*hw_dom
;
589 struct rdt_domain_hdr
*hdr
;
590 struct rdt_mon_domain
*d
;
593 lockdep_assert_held(&domain_list_lock
);
596 pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n",
597 cpu
, r
->mon_scope
, r
->name
);
601 hdr
= rdt_find_domain(&r
->mon_domains
, id
, &add_pos
);
603 if (WARN_ON_ONCE(hdr
->type
!= RESCTRL_MON_DOMAIN
))
605 d
= container_of(hdr
, struct rdt_mon_domain
, hdr
);
607 cpumask_set_cpu(cpu
, &d
->hdr
.cpu_mask
);
611 hw_dom
= kzalloc_node(sizeof(*hw_dom
), GFP_KERNEL
, cpu_to_node(cpu
));
615 d
= &hw_dom
->d_resctrl
;
617 d
->hdr
.type
= RESCTRL_MON_DOMAIN
;
618 d
->ci
= get_cpu_cacheinfo_level(cpu
, RESCTRL_L3_CACHE
);
620 pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu
, r
->name
);
621 mon_domain_free(hw_dom
);
624 cpumask_set_cpu(cpu
, &d
->hdr
.cpu_mask
);
626 arch_mon_domain_online(r
, d
);
628 if (arch_domain_mbm_alloc(r
->num_rmid
, hw_dom
)) {
629 mon_domain_free(hw_dom
);
633 list_add_tail_rcu(&d
->hdr
.list
, add_pos
);
635 err
= resctrl_online_mon_domain(r
, d
);
637 list_del_rcu(&d
->hdr
.list
);
639 mon_domain_free(hw_dom
);
643 static void domain_add_cpu(int cpu
, struct rdt_resource
*r
)
645 if (r
->alloc_capable
)
646 domain_add_cpu_ctrl(cpu
, r
);
648 domain_add_cpu_mon(cpu
, r
);
651 static void domain_remove_cpu_ctrl(int cpu
, struct rdt_resource
*r
)
653 int id
= get_domain_id_from_scope(cpu
, r
->ctrl_scope
);
654 struct rdt_hw_ctrl_domain
*hw_dom
;
655 struct rdt_domain_hdr
*hdr
;
656 struct rdt_ctrl_domain
*d
;
658 lockdep_assert_held(&domain_list_lock
);
661 pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n",
662 cpu
, r
->ctrl_scope
, r
->name
);
666 hdr
= rdt_find_domain(&r
->ctrl_domains
, id
, NULL
);
668 pr_warn("Can't find control domain for id=%d for CPU %d for resource %s\n",
673 if (WARN_ON_ONCE(hdr
->type
!= RESCTRL_CTRL_DOMAIN
))
676 d
= container_of(hdr
, struct rdt_ctrl_domain
, hdr
);
677 hw_dom
= resctrl_to_arch_ctrl_dom(d
);
679 cpumask_clear_cpu(cpu
, &d
->hdr
.cpu_mask
);
680 if (cpumask_empty(&d
->hdr
.cpu_mask
)) {
681 resctrl_offline_ctrl_domain(r
, d
);
682 list_del_rcu(&d
->hdr
.list
);
686 * rdt_ctrl_domain "d" is going to be freed below, so clear
687 * its pointer from pseudo_lock_region struct.
691 ctrl_domain_free(hw_dom
);
697 static void domain_remove_cpu_mon(int cpu
, struct rdt_resource
*r
)
699 int id
= get_domain_id_from_scope(cpu
, r
->mon_scope
);
700 struct rdt_hw_mon_domain
*hw_dom
;
701 struct rdt_domain_hdr
*hdr
;
702 struct rdt_mon_domain
*d
;
704 lockdep_assert_held(&domain_list_lock
);
707 pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n",
708 cpu
, r
->mon_scope
, r
->name
);
712 hdr
= rdt_find_domain(&r
->mon_domains
, id
, NULL
);
714 pr_warn("Can't find monitor domain for id=%d for CPU %d for resource %s\n",
719 if (WARN_ON_ONCE(hdr
->type
!= RESCTRL_MON_DOMAIN
))
722 d
= container_of(hdr
, struct rdt_mon_domain
, hdr
);
723 hw_dom
= resctrl_to_arch_mon_dom(d
);
725 cpumask_clear_cpu(cpu
, &d
->hdr
.cpu_mask
);
726 if (cpumask_empty(&d
->hdr
.cpu_mask
)) {
727 resctrl_offline_mon_domain(r
, d
);
728 list_del_rcu(&d
->hdr
.list
);
730 mon_domain_free(hw_dom
);
736 static void domain_remove_cpu(int cpu
, struct rdt_resource
*r
)
738 if (r
->alloc_capable
)
739 domain_remove_cpu_ctrl(cpu
, r
);
741 domain_remove_cpu_mon(cpu
, r
);
744 static void clear_closid_rmid(int cpu
)
746 struct resctrl_pqr_state
*state
= this_cpu_ptr(&pqr_state
);
748 state
->default_closid
= RESCTRL_RESERVED_CLOSID
;
749 state
->default_rmid
= RESCTRL_RESERVED_RMID
;
750 state
->cur_closid
= RESCTRL_RESERVED_CLOSID
;
751 state
->cur_rmid
= RESCTRL_RESERVED_RMID
;
752 wrmsr(MSR_IA32_PQR_ASSOC
, RESCTRL_RESERVED_RMID
,
753 RESCTRL_RESERVED_CLOSID
);
756 static int resctrl_arch_online_cpu(unsigned int cpu
)
758 struct rdt_resource
*r
;
760 mutex_lock(&domain_list_lock
);
761 for_each_capable_rdt_resource(r
)
762 domain_add_cpu(cpu
, r
);
763 mutex_unlock(&domain_list_lock
);
765 clear_closid_rmid(cpu
);
766 resctrl_online_cpu(cpu
);
771 static int resctrl_arch_offline_cpu(unsigned int cpu
)
773 struct rdt_resource
*r
;
775 resctrl_offline_cpu(cpu
);
777 mutex_lock(&domain_list_lock
);
778 for_each_capable_rdt_resource(r
)
779 domain_remove_cpu(cpu
, r
);
780 mutex_unlock(&domain_list_lock
);
782 clear_closid_rmid(cpu
);
788 * Choose a width for the resource name and resource data based on the
789 * resource that has widest name and cbm.
791 static __init
void rdt_init_padding(void)
793 struct rdt_resource
*r
;
795 for_each_alloc_capable_rdt_resource(r
) {
796 if (r
->data_width
> max_data_width
)
797 max_data_width
= r
->data_width
;
814 #define RDT_OPT(idx, n, f) \
823 bool force_off
, force_on
;
826 static struct rdt_options rdt_options
[] __initdata
= {
827 RDT_OPT(RDT_FLAG_CMT
, "cmt", X86_FEATURE_CQM_OCCUP_LLC
),
828 RDT_OPT(RDT_FLAG_MBM_TOTAL
, "mbmtotal", X86_FEATURE_CQM_MBM_TOTAL
),
829 RDT_OPT(RDT_FLAG_MBM_LOCAL
, "mbmlocal", X86_FEATURE_CQM_MBM_LOCAL
),
830 RDT_OPT(RDT_FLAG_L3_CAT
, "l3cat", X86_FEATURE_CAT_L3
),
831 RDT_OPT(RDT_FLAG_L3_CDP
, "l3cdp", X86_FEATURE_CDP_L3
),
832 RDT_OPT(RDT_FLAG_L2_CAT
, "l2cat", X86_FEATURE_CAT_L2
),
833 RDT_OPT(RDT_FLAG_L2_CDP
, "l2cdp", X86_FEATURE_CDP_L2
),
834 RDT_OPT(RDT_FLAG_MBA
, "mba", X86_FEATURE_MBA
),
835 RDT_OPT(RDT_FLAG_SMBA
, "smba", X86_FEATURE_SMBA
),
836 RDT_OPT(RDT_FLAG_BMEC
, "bmec", X86_FEATURE_BMEC
),
838 #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options)
840 static int __init
set_rdt_options(char *str
)
842 struct rdt_options
*o
;
848 while ((tok
= strsep(&str
, ",")) != NULL
) {
849 force_off
= *tok
== '!';
852 for (o
= rdt_options
; o
< &rdt_options
[NUM_RDT_OPTIONS
]; o
++) {
853 if (strcmp(tok
, o
->name
) == 0) {
864 __setup("rdt", set_rdt_options
);
866 bool __init
rdt_cpu_has(int flag
)
868 bool ret
= boot_cpu_has(flag
);
869 struct rdt_options
*o
;
874 for (o
= rdt_options
; o
< &rdt_options
[NUM_RDT_OPTIONS
]; o
++) {
875 if (flag
== o
->flag
) {
886 static __init
bool get_mem_config(void)
888 struct rdt_hw_resource
*hw_res
= &rdt_resources_all
[RDT_RESOURCE_MBA
];
890 if (!rdt_cpu_has(X86_FEATURE_MBA
))
893 if (boot_cpu_data
.x86_vendor
== X86_VENDOR_INTEL
)
894 return __get_mem_config_intel(&hw_res
->r_resctrl
);
895 else if (boot_cpu_data
.x86_vendor
== X86_VENDOR_AMD
)
896 return __rdt_get_mem_config_amd(&hw_res
->r_resctrl
);
901 static __init
bool get_slow_mem_config(void)
903 struct rdt_hw_resource
*hw_res
= &rdt_resources_all
[RDT_RESOURCE_SMBA
];
905 if (!rdt_cpu_has(X86_FEATURE_SMBA
))
908 if (boot_cpu_data
.x86_vendor
== X86_VENDOR_AMD
)
909 return __rdt_get_mem_config_amd(&hw_res
->r_resctrl
);
914 static __init
bool get_rdt_alloc_resources(void)
916 struct rdt_resource
*r
;
919 if (rdt_alloc_capable
)
922 if (!boot_cpu_has(X86_FEATURE_RDT_A
))
925 if (rdt_cpu_has(X86_FEATURE_CAT_L3
)) {
926 r
= &rdt_resources_all
[RDT_RESOURCE_L3
].r_resctrl
;
927 rdt_get_cache_alloc_cfg(1, r
);
928 if (rdt_cpu_has(X86_FEATURE_CDP_L3
))
929 rdt_get_cdp_l3_config();
932 if (rdt_cpu_has(X86_FEATURE_CAT_L2
)) {
933 /* CPUID 0x10.2 fields are same format at 0x10.1 */
934 r
= &rdt_resources_all
[RDT_RESOURCE_L2
].r_resctrl
;
935 rdt_get_cache_alloc_cfg(2, r
);
936 if (rdt_cpu_has(X86_FEATURE_CDP_L2
))
937 rdt_get_cdp_l2_config();
941 if (get_mem_config())
944 if (get_slow_mem_config())
950 static __init
bool get_rdt_mon_resources(void)
952 struct rdt_resource
*r
= &rdt_resources_all
[RDT_RESOURCE_L3
].r_resctrl
;
954 if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC
))
955 rdt_mon_features
|= (1 << QOS_L3_OCCUP_EVENT_ID
);
956 if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL
))
957 rdt_mon_features
|= (1 << QOS_L3_MBM_TOTAL_EVENT_ID
);
958 if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL
))
959 rdt_mon_features
|= (1 << QOS_L3_MBM_LOCAL_EVENT_ID
);
961 if (!rdt_mon_features
)
964 return !rdt_get_mon_l3_config(r
);
967 static __init
void __check_quirks_intel(void)
969 switch (boot_cpu_data
.x86_vfm
) {
970 case INTEL_HASWELL_X
:
971 if (!rdt_options
[RDT_FLAG_L3_CAT
].force_off
)
972 cache_alloc_hsw_probe();
974 case INTEL_SKYLAKE_X
:
975 if (boot_cpu_data
.x86_stepping
<= 4)
976 set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat");
978 set_rdt_options("!l3cat");
980 case INTEL_BROADWELL_X
:
981 intel_rdt_mbm_apply_quirk();
986 static __init
void check_quirks(void)
988 if (boot_cpu_data
.x86_vendor
== X86_VENDOR_INTEL
)
989 __check_quirks_intel();
992 static __init
bool get_rdt_resources(void)
994 rdt_alloc_capable
= get_rdt_alloc_resources();
995 rdt_mon_capable
= get_rdt_mon_resources();
997 return (rdt_mon_capable
|| rdt_alloc_capable
);
1000 static __init
void rdt_init_res_defs_intel(void)
1002 struct rdt_hw_resource
*hw_res
;
1003 struct rdt_resource
*r
;
1005 for_each_rdt_resource(r
) {
1006 hw_res
= resctrl_to_arch_res(r
);
1008 if (r
->rid
== RDT_RESOURCE_L3
||
1009 r
->rid
== RDT_RESOURCE_L2
) {
1010 r
->cache
.arch_has_per_cpu_cfg
= false;
1011 r
->cache
.min_cbm_bits
= 1;
1012 } else if (r
->rid
== RDT_RESOURCE_MBA
) {
1013 hw_res
->msr_base
= MSR_IA32_MBA_THRTL_BASE
;
1014 hw_res
->msr_update
= mba_wrmsr_intel
;
1019 static __init
void rdt_init_res_defs_amd(void)
1021 struct rdt_hw_resource
*hw_res
;
1022 struct rdt_resource
*r
;
1024 for_each_rdt_resource(r
) {
1025 hw_res
= resctrl_to_arch_res(r
);
1027 if (r
->rid
== RDT_RESOURCE_L3
||
1028 r
->rid
== RDT_RESOURCE_L2
) {
1029 r
->cache
.arch_has_sparse_bitmasks
= true;
1030 r
->cache
.arch_has_per_cpu_cfg
= true;
1031 r
->cache
.min_cbm_bits
= 0;
1032 } else if (r
->rid
== RDT_RESOURCE_MBA
) {
1033 hw_res
->msr_base
= MSR_IA32_MBA_BW_BASE
;
1034 hw_res
->msr_update
= mba_wrmsr_amd
;
1035 } else if (r
->rid
== RDT_RESOURCE_SMBA
) {
1036 hw_res
->msr_base
= MSR_IA32_SMBA_BW_BASE
;
1037 hw_res
->msr_update
= mba_wrmsr_amd
;
1042 static __init
void rdt_init_res_defs(void)
1044 if (boot_cpu_data
.x86_vendor
== X86_VENDOR_INTEL
)
1045 rdt_init_res_defs_intel();
1046 else if (boot_cpu_data
.x86_vendor
== X86_VENDOR_AMD
)
1047 rdt_init_res_defs_amd();
1050 static enum cpuhp_state rdt_online
;
1052 /* Runs once on the BSP during boot. */
1053 void resctrl_cpu_detect(struct cpuinfo_x86
*c
)
1055 if (!cpu_has(c
, X86_FEATURE_CQM_LLC
)) {
1056 c
->x86_cache_max_rmid
= -1;
1057 c
->x86_cache_occ_scale
= -1;
1058 c
->x86_cache_mbm_width_offset
= -1;
1062 /* will be overridden if occupancy monitoring exists */
1063 c
->x86_cache_max_rmid
= cpuid_ebx(0xf);
1065 if (cpu_has(c
, X86_FEATURE_CQM_OCCUP_LLC
) ||
1066 cpu_has(c
, X86_FEATURE_CQM_MBM_TOTAL
) ||
1067 cpu_has(c
, X86_FEATURE_CQM_MBM_LOCAL
)) {
1068 u32 eax
, ebx
, ecx
, edx
;
1070 /* QoS sub-leaf, EAX=0Fh, ECX=1 */
1071 cpuid_count(0xf, 1, &eax
, &ebx
, &ecx
, &edx
);
1073 c
->x86_cache_max_rmid
= ecx
;
1074 c
->x86_cache_occ_scale
= ebx
;
1075 c
->x86_cache_mbm_width_offset
= eax
& 0xff;
1077 if (c
->x86_vendor
== X86_VENDOR_AMD
&& !c
->x86_cache_mbm_width_offset
)
1078 c
->x86_cache_mbm_width_offset
= MBM_CNTR_WIDTH_OFFSET_AMD
;
1082 static int __init
resctrl_late_init(void)
1084 struct rdt_resource
*r
;
1088 * Initialize functions(or definitions) that are different
1089 * between vendors here.
1091 rdt_init_res_defs();
1095 if (!get_rdt_resources())
1100 state
= cpuhp_setup_state(CPUHP_AP_ONLINE_DYN
,
1101 "x86/resctrl/cat:online:",
1102 resctrl_arch_online_cpu
,
1103 resctrl_arch_offline_cpu
);
1107 ret
= rdtgroup_init();
1109 cpuhp_remove_state(state
);
1114 for_each_alloc_capable_rdt_resource(r
)
1115 pr_info("%s allocation detected\n", r
->name
);
1117 for_each_mon_capable_rdt_resource(r
)
1118 pr_info("%s monitoring detected\n", r
->name
);
1123 late_initcall(resctrl_late_init
);
1125 static void __exit
resctrl_exit(void)
1127 struct rdt_resource
*r
= &rdt_resources_all
[RDT_RESOURCE_L3
].r_resctrl
;
1129 cpuhp_remove_state(rdt_online
);
1134 rdt_put_mon_l3_config();
1137 __exitcall(resctrl_exit
);