2 * Routines to indentify caches on Intel CPU.
5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
10 #include <linux/init.h>
11 #include <linux/slab.h>
12 #include <linux/device.h>
13 #include <linux/compiler.h>
14 #include <linux/cpu.h>
15 #include <linux/sched.h>
16 #include <linux/pci.h>
18 #include <asm/processor.h>
19 #include <linux/smp.h>
20 #include <asm/amd_nb.h>
30 unsigned char descriptor
;
35 #define MB(x) ((x) * 1024)
37 /* All the cache descriptor types we care about (no TLB or
38 trace cache entries) */
40 static const struct _cache_table __cpuinitconst cache_table
[] =
42 { 0x06, LVL_1_INST
, 8 }, /* 4-way set assoc, 32 byte line size */
43 { 0x08, LVL_1_INST
, 16 }, /* 4-way set assoc, 32 byte line size */
44 { 0x09, LVL_1_INST
, 32 }, /* 4-way set assoc, 64 byte line size */
45 { 0x0a, LVL_1_DATA
, 8 }, /* 2 way set assoc, 32 byte line size */
46 { 0x0c, LVL_1_DATA
, 16 }, /* 4-way set assoc, 32 byte line size */
47 { 0x0d, LVL_1_DATA
, 16 }, /* 4-way set assoc, 64 byte line size */
48 { 0x0e, LVL_1_DATA
, 24 }, /* 6-way set assoc, 64 byte line size */
49 { 0x21, LVL_2
, 256 }, /* 8-way set assoc, 64 byte line size */
50 { 0x22, LVL_3
, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
51 { 0x23, LVL_3
, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
52 { 0x25, LVL_3
, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */
53 { 0x29, LVL_3
, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */
54 { 0x2c, LVL_1_DATA
, 32 }, /* 8-way set assoc, 64 byte line size */
55 { 0x30, LVL_1_INST
, 32 }, /* 8-way set assoc, 64 byte line size */
56 { 0x39, LVL_2
, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
57 { 0x3a, LVL_2
, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */
58 { 0x3b, LVL_2
, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */
59 { 0x3c, LVL_2
, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */
60 { 0x3d, LVL_2
, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */
61 { 0x3e, LVL_2
, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
62 { 0x3f, LVL_2
, 256 }, /* 2-way set assoc, 64 byte line size */
63 { 0x41, LVL_2
, 128 }, /* 4-way set assoc, 32 byte line size */
64 { 0x42, LVL_2
, 256 }, /* 4-way set assoc, 32 byte line size */
65 { 0x43, LVL_2
, 512 }, /* 4-way set assoc, 32 byte line size */
66 { 0x44, LVL_2
, MB(1) }, /* 4-way set assoc, 32 byte line size */
67 { 0x45, LVL_2
, MB(2) }, /* 4-way set assoc, 32 byte line size */
68 { 0x46, LVL_3
, MB(4) }, /* 4-way set assoc, 64 byte line size */
69 { 0x47, LVL_3
, MB(8) }, /* 8-way set assoc, 64 byte line size */
70 { 0x48, LVL_2
, MB(3) }, /* 12-way set assoc, 64 byte line size */
71 { 0x49, LVL_3
, MB(4) }, /* 16-way set assoc, 64 byte line size */
72 { 0x4a, LVL_3
, MB(6) }, /* 12-way set assoc, 64 byte line size */
73 { 0x4b, LVL_3
, MB(8) }, /* 16-way set assoc, 64 byte line size */
74 { 0x4c, LVL_3
, MB(12) }, /* 12-way set assoc, 64 byte line size */
75 { 0x4d, LVL_3
, MB(16) }, /* 16-way set assoc, 64 byte line size */
76 { 0x4e, LVL_2
, MB(6) }, /* 24-way set assoc, 64 byte line size */
77 { 0x60, LVL_1_DATA
, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
78 { 0x66, LVL_1_DATA
, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
79 { 0x67, LVL_1_DATA
, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
80 { 0x68, LVL_1_DATA
, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */
81 { 0x70, LVL_TRACE
, 12 }, /* 8-way set assoc */
82 { 0x71, LVL_TRACE
, 16 }, /* 8-way set assoc */
83 { 0x72, LVL_TRACE
, 32 }, /* 8-way set assoc */
84 { 0x73, LVL_TRACE
, 64 }, /* 8-way set assoc */
85 { 0x78, LVL_2
, MB(1) }, /* 4-way set assoc, 64 byte line size */
86 { 0x79, LVL_2
, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
87 { 0x7a, LVL_2
, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
88 { 0x7b, LVL_2
, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
89 { 0x7c, LVL_2
, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
90 { 0x7d, LVL_2
, MB(2) }, /* 8-way set assoc, 64 byte line size */
91 { 0x7f, LVL_2
, 512 }, /* 2-way set assoc, 64 byte line size */
92 { 0x80, LVL_2
, 512 }, /* 8-way set assoc, 64 byte line size */
93 { 0x82, LVL_2
, 256 }, /* 8-way set assoc, 32 byte line size */
94 { 0x83, LVL_2
, 512 }, /* 8-way set assoc, 32 byte line size */
95 { 0x84, LVL_2
, MB(1) }, /* 8-way set assoc, 32 byte line size */
96 { 0x85, LVL_2
, MB(2) }, /* 8-way set assoc, 32 byte line size */
97 { 0x86, LVL_2
, 512 }, /* 4-way set assoc, 64 byte line size */
98 { 0x87, LVL_2
, MB(1) }, /* 8-way set assoc, 64 byte line size */
99 { 0xd0, LVL_3
, 512 }, /* 4-way set assoc, 64 byte line size */
100 { 0xd1, LVL_3
, MB(1) }, /* 4-way set assoc, 64 byte line size */
101 { 0xd2, LVL_3
, MB(2) }, /* 4-way set assoc, 64 byte line size */
102 { 0xd6, LVL_3
, MB(1) }, /* 8-way set assoc, 64 byte line size */
103 { 0xd7, LVL_3
, MB(2) }, /* 8-way set assoc, 64 byte line size */
104 { 0xd8, LVL_3
, MB(4) }, /* 12-way set assoc, 64 byte line size */
105 { 0xdc, LVL_3
, MB(2) }, /* 12-way set assoc, 64 byte line size */
106 { 0xdd, LVL_3
, MB(4) }, /* 12-way set assoc, 64 byte line size */
107 { 0xde, LVL_3
, MB(8) }, /* 12-way set assoc, 64 byte line size */
108 { 0xe2, LVL_3
, MB(2) }, /* 16-way set assoc, 64 byte line size */
109 { 0xe3, LVL_3
, MB(4) }, /* 16-way set assoc, 64 byte line size */
110 { 0xe4, LVL_3
, MB(8) }, /* 16-way set assoc, 64 byte line size */
111 { 0xea, LVL_3
, MB(12) }, /* 24-way set assoc, 64 byte line size */
112 { 0xeb, LVL_3
, MB(18) }, /* 24-way set assoc, 64 byte line size */
113 { 0xec, LVL_3
, MB(24) }, /* 24-way set assoc, 64 byte line size */
122 CACHE_TYPE_UNIFIED
= 3
125 union _cpuid4_leaf_eax
{
127 enum _cache_type type
:5;
128 unsigned int level
:3;
129 unsigned int is_self_initializing
:1;
130 unsigned int is_fully_associative
:1;
131 unsigned int reserved
:4;
132 unsigned int num_threads_sharing
:12;
133 unsigned int num_cores_on_die
:6;
138 union _cpuid4_leaf_ebx
{
140 unsigned int coherency_line_size
:12;
141 unsigned int physical_line_partition
:10;
142 unsigned int ways_of_associativity
:10;
147 union _cpuid4_leaf_ecx
{
149 unsigned int number_of_sets
:32;
154 struct amd_l3_cache
{
155 struct amd_northbridge
*nb
;
160 struct _cpuid4_info
{
161 union _cpuid4_leaf_eax eax
;
162 union _cpuid4_leaf_ebx ebx
;
163 union _cpuid4_leaf_ecx ecx
;
165 struct amd_l3_cache
*l3
;
166 DECLARE_BITMAP(shared_cpu_map
, NR_CPUS
);
169 /* subset of above _cpuid4_info w/o shared_cpu_map */
170 struct _cpuid4_info_regs
{
171 union _cpuid4_leaf_eax eax
;
172 union _cpuid4_leaf_ebx ebx
;
173 union _cpuid4_leaf_ecx ecx
;
175 struct amd_l3_cache
*l3
;
178 unsigned short num_cache_leaves
;
180 /* AMD doesn't have CPUID4. Emulate it here to report the same
181 information to the user. This makes some assumptions about the machine:
182 L2 not shared, no SMT etc. that is currently true on AMD CPUs.
184 In theory the TLBs could be reported as fake type (they are in "dummy").
188 unsigned line_size
:8;
189 unsigned lines_per_tag
:8;
191 unsigned size_in_kb
:8;
198 unsigned line_size
:8;
199 unsigned lines_per_tag
:4;
201 unsigned size_in_kb
:16;
208 unsigned line_size
:8;
209 unsigned lines_per_tag
:4;
212 unsigned size_encoded
:14;
217 static const unsigned short __cpuinitconst assocs
[] = {
228 [0xf] = 0xffff /* fully associative - no way to show this currently */
231 static const unsigned char __cpuinitconst levels
[] = { 1, 1, 2, 3 };
232 static const unsigned char __cpuinitconst types
[] = { 1, 2, 3, 3 };
234 static void __cpuinit
235 amd_cpuid4(int leaf
, union _cpuid4_leaf_eax
*eax
,
236 union _cpuid4_leaf_ebx
*ebx
,
237 union _cpuid4_leaf_ecx
*ecx
)
240 unsigned line_size
, lines_per_tag
, assoc
, size_in_kb
;
241 union l1_cache l1i
, l1d
;
244 union l1_cache
*l1
= &l1d
;
250 cpuid(0x80000005, &dummy
, &dummy
, &l1d
.val
, &l1i
.val
);
251 cpuid(0x80000006, &dummy
, &dummy
, &l2
.val
, &l3
.val
);
259 assoc
= assocs
[l1
->assoc
];
260 line_size
= l1
->line_size
;
261 lines_per_tag
= l1
->lines_per_tag
;
262 size_in_kb
= l1
->size_in_kb
;
267 assoc
= assocs
[l2
.assoc
];
268 line_size
= l2
.line_size
;
269 lines_per_tag
= l2
.lines_per_tag
;
270 /* cpu_data has errata corrections for K7 applied */
271 size_in_kb
= __this_cpu_read(cpu_info
.x86_cache_size
);
276 assoc
= assocs
[l3
.assoc
];
277 line_size
= l3
.line_size
;
278 lines_per_tag
= l3
.lines_per_tag
;
279 size_in_kb
= l3
.size_encoded
* 512;
280 if (boot_cpu_has(X86_FEATURE_AMD_DCM
)) {
281 size_in_kb
= size_in_kb
>> 1;
289 eax
->split
.is_self_initializing
= 1;
290 eax
->split
.type
= types
[leaf
];
291 eax
->split
.level
= levels
[leaf
];
292 eax
->split
.num_threads_sharing
= 0;
293 eax
->split
.num_cores_on_die
= __this_cpu_read(cpu_info
.x86_max_cores
) - 1;
297 eax
->split
.is_fully_associative
= 1;
298 ebx
->split
.coherency_line_size
= line_size
- 1;
299 ebx
->split
.ways_of_associativity
= assoc
- 1;
300 ebx
->split
.physical_line_partition
= lines_per_tag
- 1;
301 ecx
->split
.number_of_sets
= (size_in_kb
* 1024) / line_size
/
302 (ebx
->split
.ways_of_associativity
+ 1) - 1;
306 struct attribute attr
;
307 ssize_t (*show
)(struct _cpuid4_info
*, char *, unsigned int);
308 ssize_t (*store
)(struct _cpuid4_info
*, const char *, size_t count
,
315 * L3 cache descriptors
317 static void __cpuinit
amd_calc_l3_indices(struct amd_l3_cache
*l3
)
319 unsigned int sc0
, sc1
, sc2
, sc3
;
322 pci_read_config_dword(l3
->nb
->misc
, 0x1C4, &val
);
324 /* calculate subcache sizes */
325 l3
->subcaches
[0] = sc0
= !(val
& BIT(0));
326 l3
->subcaches
[1] = sc1
= !(val
& BIT(4));
327 l3
->subcaches
[2] = sc2
= !(val
& BIT(8)) + !(val
& BIT(9));
328 l3
->subcaches
[3] = sc3
= !(val
& BIT(12)) + !(val
& BIT(13));
330 l3
->indices
= (max(max3(sc0
, sc1
, sc2
), sc3
) << 10) - 1;
333 static void __cpuinit
amd_init_l3_cache(struct _cpuid4_info_regs
*this_leaf
,
336 static struct amd_l3_cache
*__cpuinitdata l3_caches
;
339 /* only for L3, and not in virtualized environments */
340 if (index
< 3 || amd_nb_num() == 0)
344 * Strictly speaking, the amount in @size below is leaked since it is
345 * never freed but this is done only on shutdown so it doesn't matter.
348 int size
= amd_nb_num() * sizeof(struct amd_l3_cache
);
350 l3_caches
= kzalloc(size
, GFP_ATOMIC
);
355 node
= amd_get_nb_id(smp_processor_id());
357 if (!l3_caches
[node
].nb
) {
358 l3_caches
[node
].nb
= node_to_amd_nb(node
);
359 amd_calc_l3_indices(&l3_caches
[node
]);
362 this_leaf
->l3
= &l3_caches
[node
];
366 * check whether a slot used for disabling an L3 index is occupied.
367 * @l3: L3 cache descriptor
368 * @slot: slot number (0..1)
370 * @returns: the disabled index if used or negative value if slot free.
372 int amd_get_l3_disable_slot(struct amd_l3_cache
*l3
, unsigned slot
)
374 unsigned int reg
= 0;
376 pci_read_config_dword(l3
->nb
->misc
, 0x1BC + slot
* 4, ®
);
378 /* check whether this slot is activated already */
379 if (reg
& (3UL << 30))
385 static ssize_t
show_cache_disable(struct _cpuid4_info
*this_leaf
, char *buf
,
390 if (!this_leaf
->l3
||
391 !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE
))
394 index
= amd_get_l3_disable_slot(this_leaf
->l3
, slot
);
396 return sprintf(buf
, "%d\n", index
);
398 return sprintf(buf
, "FREE\n");
401 #define SHOW_CACHE_DISABLE(slot) \
403 show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf, \
406 return show_cache_disable(this_leaf, buf, slot); \
408 SHOW_CACHE_DISABLE(0)
409 SHOW_CACHE_DISABLE(1)
411 static void amd_l3_disable_index(struct amd_l3_cache
*l3
, int cpu
,
412 unsigned slot
, unsigned long idx
)
419 * disable index in all 4 subcaches
421 for (i
= 0; i
< 4; i
++) {
422 u32 reg
= idx
| (i
<< 20);
424 if (!l3
->subcaches
[i
])
427 pci_write_config_dword(l3
->nb
->misc
, 0x1BC + slot
* 4, reg
);
430 * We need to WBINVD on a core on the node containing the L3
431 * cache which indices we disable therefore a simple wbinvd()
437 pci_write_config_dword(l3
->nb
->misc
, 0x1BC + slot
* 4, reg
);
442 * disable a L3 cache index by using a disable-slot
444 * @l3: L3 cache descriptor
445 * @cpu: A CPU on the node containing the L3 cache
446 * @slot: slot number (0..1)
447 * @index: index to disable
449 * @return: 0 on success, error status on failure
451 int amd_set_l3_disable_slot(struct amd_l3_cache
*l3
, int cpu
, unsigned slot
,
456 /* check if @slot is already used or the index is already disabled */
457 ret
= amd_get_l3_disable_slot(l3
, slot
);
461 if (index
> l3
->indices
)
464 /* check whether the other slot has disabled the same index already */
465 if (index
== amd_get_l3_disable_slot(l3
, !slot
))
468 amd_l3_disable_index(l3
, cpu
, slot
, index
);
473 static ssize_t
store_cache_disable(struct _cpuid4_info
*this_leaf
,
474 const char *buf
, size_t count
,
477 unsigned long val
= 0;
480 if (!capable(CAP_SYS_ADMIN
))
483 if (!this_leaf
->l3
||
484 !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE
))
487 cpu
= cpumask_first(to_cpumask(this_leaf
->shared_cpu_map
));
489 if (strict_strtoul(buf
, 10, &val
) < 0)
492 err
= amd_set_l3_disable_slot(this_leaf
->l3
, cpu
, slot
, val
);
495 printk(KERN_WARNING
"L3 disable slot %d in use!\n",
502 #define STORE_CACHE_DISABLE(slot) \
504 store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \
505 const char *buf, size_t count, \
508 return store_cache_disable(this_leaf, buf, count, slot); \
510 STORE_CACHE_DISABLE(0)
511 STORE_CACHE_DISABLE(1)
513 static struct _cache_attr cache_disable_0
= __ATTR(cache_disable_0
, 0644,
514 show_cache_disable_0
, store_cache_disable_0
);
515 static struct _cache_attr cache_disable_1
= __ATTR(cache_disable_1
, 0644,
516 show_cache_disable_1
, store_cache_disable_1
);
519 show_subcaches(struct _cpuid4_info
*this_leaf
, char *buf
, unsigned int cpu
)
521 if (!this_leaf
->l3
|| !amd_nb_has_feature(AMD_NB_L3_PARTITIONING
))
524 return sprintf(buf
, "%x\n", amd_get_subcaches(cpu
));
528 store_subcaches(struct _cpuid4_info
*this_leaf
, const char *buf
, size_t count
,
533 if (!capable(CAP_SYS_ADMIN
))
536 if (!this_leaf
->l3
|| !amd_nb_has_feature(AMD_NB_L3_PARTITIONING
))
539 if (strict_strtoul(buf
, 16, &val
) < 0)
542 if (amd_set_subcaches(cpu
, val
))
548 static struct _cache_attr subcaches
=
549 __ATTR(subcaches
, 0644, show_subcaches
, store_subcaches
);
551 #else /* CONFIG_AMD_NB */
552 #define amd_init_l3_cache(x, y)
553 #endif /* CONFIG_AMD_NB */
556 __cpuinit
cpuid4_cache_lookup_regs(int index
,
557 struct _cpuid4_info_regs
*this_leaf
)
559 union _cpuid4_leaf_eax eax
;
560 union _cpuid4_leaf_ebx ebx
;
561 union _cpuid4_leaf_ecx ecx
;
564 if (boot_cpu_data
.x86_vendor
== X86_VENDOR_AMD
) {
565 amd_cpuid4(index
, &eax
, &ebx
, &ecx
);
566 amd_init_l3_cache(this_leaf
, index
);
568 cpuid_count(4, index
, &eax
.full
, &ebx
.full
, &ecx
.full
, &edx
);
571 if (eax
.split
.type
== CACHE_TYPE_NULL
)
572 return -EIO
; /* better error ? */
574 this_leaf
->eax
= eax
;
575 this_leaf
->ebx
= ebx
;
576 this_leaf
->ecx
= ecx
;
577 this_leaf
->size
= (ecx
.split
.number_of_sets
+ 1) *
578 (ebx
.split
.coherency_line_size
+ 1) *
579 (ebx
.split
.physical_line_partition
+ 1) *
580 (ebx
.split
.ways_of_associativity
+ 1);
584 static int __cpuinit
find_num_cache_leaves(void)
586 unsigned int eax
, ebx
, ecx
, edx
;
587 union _cpuid4_leaf_eax cache_eax
;
592 /* Do cpuid(4) loop to find out num_cache_leaves */
593 cpuid_count(4, i
, &eax
, &ebx
, &ecx
, &edx
);
594 cache_eax
.full
= eax
;
595 } while (cache_eax
.split
.type
!= CACHE_TYPE_NULL
);
599 unsigned int __cpuinit
init_intel_cacheinfo(struct cpuinfo_x86
*c
)
602 unsigned int trace
= 0, l1i
= 0, l1d
= 0, l2
= 0, l3
= 0;
603 unsigned int new_l1d
= 0, new_l1i
= 0; /* Cache sizes from cpuid(4) */
604 unsigned int new_l2
= 0, new_l3
= 0, i
; /* Cache sizes from cpuid(4) */
605 unsigned int l2_id
= 0, l3_id
= 0, num_threads_sharing
, index_msb
;
607 unsigned int cpu
= c
->cpu_index
;
610 if (c
->cpuid_level
> 3) {
611 static int is_initialized
;
613 if (is_initialized
== 0) {
614 /* Init num_cache_leaves from boot CPU */
615 num_cache_leaves
= find_num_cache_leaves();
620 * Whenever possible use cpuid(4), deterministic cache
621 * parameters cpuid leaf to find the cache details
623 for (i
= 0; i
< num_cache_leaves
; i
++) {
624 struct _cpuid4_info_regs this_leaf
;
627 retval
= cpuid4_cache_lookup_regs(i
, &this_leaf
);
629 switch (this_leaf
.eax
.split
.level
) {
631 if (this_leaf
.eax
.split
.type
==
633 new_l1d
= this_leaf
.size
/1024;
634 else if (this_leaf
.eax
.split
.type
==
636 new_l1i
= this_leaf
.size
/1024;
639 new_l2
= this_leaf
.size
/1024;
640 num_threads_sharing
= 1 + this_leaf
.eax
.split
.num_threads_sharing
;
641 index_msb
= get_count_order(num_threads_sharing
);
642 l2_id
= c
->apicid
>> index_msb
;
645 new_l3
= this_leaf
.size
/1024;
646 num_threads_sharing
= 1 + this_leaf
.eax
.split
.num_threads_sharing
;
647 index_msb
= get_count_order(
648 num_threads_sharing
);
649 l3_id
= c
->apicid
>> index_msb
;
658 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
661 if ((num_cache_leaves
== 0 || c
->x86
== 15) && c
->cpuid_level
> 1) {
662 /* supports eax=2 call */
664 unsigned int regs
[4];
665 unsigned char *dp
= (unsigned char *)regs
;
668 if (num_cache_leaves
!= 0 && c
->x86
== 15)
671 /* Number of times to iterate */
672 n
= cpuid_eax(2) & 0xFF;
674 for (i
= 0 ; i
< n
; i
++) {
675 cpuid(2, ®s
[0], ®s
[1], ®s
[2], ®s
[3]);
677 /* If bit 31 is set, this is an unknown format */
678 for (j
= 0 ; j
< 3 ; j
++)
679 if (regs
[j
] & (1 << 31))
682 /* Byte 0 is level count, not a descriptor */
683 for (j
= 1 ; j
< 16 ; j
++) {
684 unsigned char des
= dp
[j
];
687 /* look up this descriptor in the table */
688 while (cache_table
[k
].descriptor
!= 0) {
689 if (cache_table
[k
].descriptor
== des
) {
690 if (only_trace
&& cache_table
[k
].cache_type
!= LVL_TRACE
)
692 switch (cache_table
[k
].cache_type
) {
694 l1i
+= cache_table
[k
].size
;
697 l1d
+= cache_table
[k
].size
;
700 l2
+= cache_table
[k
].size
;
703 l3
+= cache_table
[k
].size
;
706 trace
+= cache_table
[k
].size
;
728 per_cpu(cpu_llc_id
, cpu
) = l2_id
;
735 per_cpu(cpu_llc_id
, cpu
) = l3_id
;
739 c
->x86_cache_size
= l3
? l3
: (l2
? l2
: (l1i
+l1d
));
746 /* pointer to _cpuid4_info array (for each cache leaf) */
747 static DEFINE_PER_CPU(struct _cpuid4_info
*, ici_cpuid4_info
);
748 #define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
751 static void __cpuinit
cache_shared_cpu_map_setup(unsigned int cpu
, int index
)
753 struct _cpuid4_info
*this_leaf
, *sibling_leaf
;
754 unsigned long num_threads_sharing
;
755 int index_msb
, i
, sibling
;
756 struct cpuinfo_x86
*c
= &cpu_data(cpu
);
758 if ((index
== 3) && (c
->x86_vendor
== X86_VENDOR_AMD
)) {
759 for_each_cpu(i
, cpu_llc_shared_mask(cpu
)) {
760 if (!per_cpu(ici_cpuid4_info
, i
))
762 this_leaf
= CPUID4_INFO_IDX(i
, index
);
763 for_each_cpu(sibling
, cpu_llc_shared_mask(cpu
)) {
764 if (!cpu_online(sibling
))
766 set_bit(sibling
, this_leaf
->shared_cpu_map
);
771 this_leaf
= CPUID4_INFO_IDX(cpu
, index
);
772 num_threads_sharing
= 1 + this_leaf
->eax
.split
.num_threads_sharing
;
774 if (num_threads_sharing
== 1)
775 cpumask_set_cpu(cpu
, to_cpumask(this_leaf
->shared_cpu_map
));
777 index_msb
= get_count_order(num_threads_sharing
);
779 for_each_online_cpu(i
) {
780 if (cpu_data(i
).apicid
>> index_msb
==
781 c
->apicid
>> index_msb
) {
783 to_cpumask(this_leaf
->shared_cpu_map
));
784 if (i
!= cpu
&& per_cpu(ici_cpuid4_info
, i
)) {
786 CPUID4_INFO_IDX(i
, index
);
787 cpumask_set_cpu(cpu
, to_cpumask(
788 sibling_leaf
->shared_cpu_map
));
794 static void __cpuinit
cache_remove_shared_cpu_map(unsigned int cpu
, int index
)
796 struct _cpuid4_info
*this_leaf
, *sibling_leaf
;
799 this_leaf
= CPUID4_INFO_IDX(cpu
, index
);
800 for_each_cpu(sibling
, to_cpumask(this_leaf
->shared_cpu_map
)) {
801 sibling_leaf
= CPUID4_INFO_IDX(sibling
, index
);
802 cpumask_clear_cpu(cpu
,
803 to_cpumask(sibling_leaf
->shared_cpu_map
));
807 static void __cpuinit
cache_shared_cpu_map_setup(unsigned int cpu
, int index
)
811 static void __cpuinit
cache_remove_shared_cpu_map(unsigned int cpu
, int index
)
816 static void __cpuinit
free_cache_attributes(unsigned int cpu
)
820 for (i
= 0; i
< num_cache_leaves
; i
++)
821 cache_remove_shared_cpu_map(cpu
, i
);
823 kfree(per_cpu(ici_cpuid4_info
, cpu
)->l3
);
824 kfree(per_cpu(ici_cpuid4_info
, cpu
));
825 per_cpu(ici_cpuid4_info
, cpu
) = NULL
;
829 __cpuinit
cpuid4_cache_lookup(int index
, struct _cpuid4_info
*this_leaf
)
831 struct _cpuid4_info_regs
*leaf_regs
=
832 (struct _cpuid4_info_regs
*)this_leaf
;
834 return cpuid4_cache_lookup_regs(index
, leaf_regs
);
837 static void __cpuinit
get_cpu_leaves(void *_retval
)
839 int j
, *retval
= _retval
, cpu
= smp_processor_id();
841 /* Do cpuid and store the results */
842 for (j
= 0; j
< num_cache_leaves
; j
++) {
843 struct _cpuid4_info
*this_leaf
;
844 this_leaf
= CPUID4_INFO_IDX(cpu
, j
);
845 *retval
= cpuid4_cache_lookup(j
, this_leaf
);
846 if (unlikely(*retval
< 0)) {
849 for (i
= 0; i
< j
; i
++)
850 cache_remove_shared_cpu_map(cpu
, i
);
853 cache_shared_cpu_map_setup(cpu
, j
);
857 static int __cpuinit
detect_cache_attributes(unsigned int cpu
)
861 if (num_cache_leaves
== 0)
864 per_cpu(ici_cpuid4_info
, cpu
) = kzalloc(
865 sizeof(struct _cpuid4_info
) * num_cache_leaves
, GFP_KERNEL
);
866 if (per_cpu(ici_cpuid4_info
, cpu
) == NULL
)
869 smp_call_function_single(cpu
, get_cpu_leaves
, &retval
, true);
871 kfree(per_cpu(ici_cpuid4_info
, cpu
));
872 per_cpu(ici_cpuid4_info
, cpu
) = NULL
;
878 #include <linux/kobject.h>
879 #include <linux/sysfs.h>
881 extern struct sysdev_class cpu_sysdev_class
; /* from drivers/base/cpu.c */
883 /* pointer to kobject for cpuX/cache */
884 static DEFINE_PER_CPU(struct kobject
*, ici_cache_kobject
);
886 struct _index_kobject
{
889 unsigned short index
;
892 /* pointer to array of kobjects for cpuX/cache/indexY */
893 static DEFINE_PER_CPU(struct _index_kobject
*, ici_index_kobject
);
894 #define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y]))
896 #define show_one_plus(file_name, object, val) \
897 static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \
900 return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
903 show_one_plus(level
, eax
.split
.level
, 0);
904 show_one_plus(coherency_line_size
, ebx
.split
.coherency_line_size
, 1);
905 show_one_plus(physical_line_partition
, ebx
.split
.physical_line_partition
, 1);
906 show_one_plus(ways_of_associativity
, ebx
.split
.ways_of_associativity
, 1);
907 show_one_plus(number_of_sets
, ecx
.split
.number_of_sets
, 1);
909 static ssize_t
show_size(struct _cpuid4_info
*this_leaf
, char *buf
,
912 return sprintf(buf
, "%luK\n", this_leaf
->size
/ 1024);
915 static ssize_t
show_shared_cpu_map_func(struct _cpuid4_info
*this_leaf
,
918 ptrdiff_t len
= PTR_ALIGN(buf
+ PAGE_SIZE
- 1, PAGE_SIZE
) - buf
;
922 const struct cpumask
*mask
;
924 mask
= to_cpumask(this_leaf
->shared_cpu_map
);
926 cpulist_scnprintf(buf
, len
-2, mask
) :
927 cpumask_scnprintf(buf
, len
-2, mask
);
934 static inline ssize_t
show_shared_cpu_map(struct _cpuid4_info
*leaf
, char *buf
,
937 return show_shared_cpu_map_func(leaf
, 0, buf
);
940 static inline ssize_t
show_shared_cpu_list(struct _cpuid4_info
*leaf
, char *buf
,
943 return show_shared_cpu_map_func(leaf
, 1, buf
);
946 static ssize_t
show_type(struct _cpuid4_info
*this_leaf
, char *buf
,
949 switch (this_leaf
->eax
.split
.type
) {
950 case CACHE_TYPE_DATA
:
951 return sprintf(buf
, "Data\n");
952 case CACHE_TYPE_INST
:
953 return sprintf(buf
, "Instruction\n");
954 case CACHE_TYPE_UNIFIED
:
955 return sprintf(buf
, "Unified\n");
957 return sprintf(buf
, "Unknown\n");
961 #define to_object(k) container_of(k, struct _index_kobject, kobj)
962 #define to_attr(a) container_of(a, struct _cache_attr, attr)
964 #define define_one_ro(_name) \
965 static struct _cache_attr _name = \
966 __ATTR(_name, 0444, show_##_name, NULL)
968 define_one_ro(level
);
970 define_one_ro(coherency_line_size
);
971 define_one_ro(physical_line_partition
);
972 define_one_ro(ways_of_associativity
);
973 define_one_ro(number_of_sets
);
975 define_one_ro(shared_cpu_map
);
976 define_one_ro(shared_cpu_list
);
978 static struct attribute
*default_attrs
[] = {
981 &coherency_line_size
.attr
,
982 &physical_line_partition
.attr
,
983 &ways_of_associativity
.attr
,
984 &number_of_sets
.attr
,
986 &shared_cpu_map
.attr
,
987 &shared_cpu_list
.attr
,
992 static struct attribute
** __cpuinit
amd_l3_attrs(void)
994 static struct attribute
**attrs
;
1000 n
= sizeof (default_attrs
) / sizeof (struct attribute
*);
1002 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE
))
1005 if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING
))
1008 attrs
= kzalloc(n
* sizeof (struct attribute
*), GFP_KERNEL
);
1010 return attrs
= default_attrs
;
1012 for (n
= 0; default_attrs
[n
]; n
++)
1013 attrs
[n
] = default_attrs
[n
];
1015 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE
)) {
1016 attrs
[n
++] = &cache_disable_0
.attr
;
1017 attrs
[n
++] = &cache_disable_1
.attr
;
1020 if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING
))
1021 attrs
[n
++] = &subcaches
.attr
;
1027 static ssize_t
show(struct kobject
*kobj
, struct attribute
*attr
, char *buf
)
1029 struct _cache_attr
*fattr
= to_attr(attr
);
1030 struct _index_kobject
*this_leaf
= to_object(kobj
);
1034 fattr
->show(CPUID4_INFO_IDX(this_leaf
->cpu
, this_leaf
->index
),
1035 buf
, this_leaf
->cpu
) :
1040 static ssize_t
store(struct kobject
*kobj
, struct attribute
*attr
,
1041 const char *buf
, size_t count
)
1043 struct _cache_attr
*fattr
= to_attr(attr
);
1044 struct _index_kobject
*this_leaf
= to_object(kobj
);
1047 ret
= fattr
->store
?
1048 fattr
->store(CPUID4_INFO_IDX(this_leaf
->cpu
, this_leaf
->index
),
1049 buf
, count
, this_leaf
->cpu
) :
1054 static const struct sysfs_ops sysfs_ops
= {
1059 static struct kobj_type ktype_cache
= {
1060 .sysfs_ops
= &sysfs_ops
,
1061 .default_attrs
= default_attrs
,
1064 static struct kobj_type ktype_percpu_entry
= {
1065 .sysfs_ops
= &sysfs_ops
,
1068 static void __cpuinit
cpuid4_cache_sysfs_exit(unsigned int cpu
)
1070 kfree(per_cpu(ici_cache_kobject
, cpu
));
1071 kfree(per_cpu(ici_index_kobject
, cpu
));
1072 per_cpu(ici_cache_kobject
, cpu
) = NULL
;
1073 per_cpu(ici_index_kobject
, cpu
) = NULL
;
1074 free_cache_attributes(cpu
);
1077 static int __cpuinit
cpuid4_cache_sysfs_init(unsigned int cpu
)
1081 if (num_cache_leaves
== 0)
1084 err
= detect_cache_attributes(cpu
);
1088 /* Allocate all required memory */
1089 per_cpu(ici_cache_kobject
, cpu
) =
1090 kzalloc(sizeof(struct kobject
), GFP_KERNEL
);
1091 if (unlikely(per_cpu(ici_cache_kobject
, cpu
) == NULL
))
1094 per_cpu(ici_index_kobject
, cpu
) = kzalloc(
1095 sizeof(struct _index_kobject
) * num_cache_leaves
, GFP_KERNEL
);
1096 if (unlikely(per_cpu(ici_index_kobject
, cpu
) == NULL
))
1102 cpuid4_cache_sysfs_exit(cpu
);
1106 static DECLARE_BITMAP(cache_dev_map
, NR_CPUS
);
1108 /* Add/Remove cache interface for CPU device */
1109 static int __cpuinit
cache_add_dev(struct sys_device
* sys_dev
)
1111 unsigned int cpu
= sys_dev
->id
;
1113 struct _index_kobject
*this_object
;
1114 struct _cpuid4_info
*this_leaf
;
1117 retval
= cpuid4_cache_sysfs_init(cpu
);
1118 if (unlikely(retval
< 0))
1121 retval
= kobject_init_and_add(per_cpu(ici_cache_kobject
, cpu
),
1122 &ktype_percpu_entry
,
1123 &sys_dev
->kobj
, "%s", "cache");
1125 cpuid4_cache_sysfs_exit(cpu
);
1129 for (i
= 0; i
< num_cache_leaves
; i
++) {
1130 this_object
= INDEX_KOBJECT_PTR(cpu
, i
);
1131 this_object
->cpu
= cpu
;
1132 this_object
->index
= i
;
1134 this_leaf
= CPUID4_INFO_IDX(cpu
, i
);
1136 ktype_cache
.default_attrs
= default_attrs
;
1137 #ifdef CONFIG_AMD_NB
1139 ktype_cache
.default_attrs
= amd_l3_attrs();
1141 retval
= kobject_init_and_add(&(this_object
->kobj
),
1143 per_cpu(ici_cache_kobject
, cpu
),
1145 if (unlikely(retval
)) {
1146 for (j
= 0; j
< i
; j
++)
1147 kobject_put(&(INDEX_KOBJECT_PTR(cpu
, j
)->kobj
));
1148 kobject_put(per_cpu(ici_cache_kobject
, cpu
));
1149 cpuid4_cache_sysfs_exit(cpu
);
1152 kobject_uevent(&(this_object
->kobj
), KOBJ_ADD
);
1154 cpumask_set_cpu(cpu
, to_cpumask(cache_dev_map
));
1156 kobject_uevent(per_cpu(ici_cache_kobject
, cpu
), KOBJ_ADD
);
1160 static void __cpuinit
cache_remove_dev(struct sys_device
* sys_dev
)
1162 unsigned int cpu
= sys_dev
->id
;
1165 if (per_cpu(ici_cpuid4_info
, cpu
) == NULL
)
1167 if (!cpumask_test_cpu(cpu
, to_cpumask(cache_dev_map
)))
1169 cpumask_clear_cpu(cpu
, to_cpumask(cache_dev_map
));
1171 for (i
= 0; i
< num_cache_leaves
; i
++)
1172 kobject_put(&(INDEX_KOBJECT_PTR(cpu
, i
)->kobj
));
1173 kobject_put(per_cpu(ici_cache_kobject
, cpu
));
1174 cpuid4_cache_sysfs_exit(cpu
);
1177 static int __cpuinit
cacheinfo_cpu_callback(struct notifier_block
*nfb
,
1178 unsigned long action
, void *hcpu
)
1180 unsigned int cpu
= (unsigned long)hcpu
;
1181 struct sys_device
*sys_dev
;
1183 sys_dev
= get_cpu_sysdev(cpu
);
1186 case CPU_ONLINE_FROZEN
:
1187 cache_add_dev(sys_dev
);
1190 case CPU_DEAD_FROZEN
:
1191 cache_remove_dev(sys_dev
);
1197 static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier
= {
1198 .notifier_call
= cacheinfo_cpu_callback
,
1201 static int __cpuinit
cache_sysfs_init(void)
1205 if (num_cache_leaves
== 0)
1208 for_each_online_cpu(i
) {
1210 struct sys_device
*sys_dev
= get_cpu_sysdev(i
);
1212 err
= cache_add_dev(sys_dev
);
1216 register_hotcpu_notifier(&cacheinfo_cpu_notifier
);
1220 device_initcall(cache_sysfs_init
);