x86, numa: Reduce minimum fake node size to 32M
[linux/fpc-iii.git] / arch / x86 / kernel / cpu / intel_cacheinfo.c
blob9ecf81f9b90fb0c73416d958b1aa216b17e1ecfa
1 /*
2 * Routines to indentify caches on Intel CPU.
4 * Changes:
5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
8 */
10 #include <linux/init.h>
11 #include <linux/slab.h>
12 #include <linux/device.h>
13 #include <linux/compiler.h>
14 #include <linux/cpu.h>
15 #include <linux/sched.h>
16 #include <linux/pci.h>
18 #include <asm/processor.h>
19 #include <linux/smp.h>
20 #include <asm/amd_nb.h>
21 #include <asm/smp.h>
23 #define LVL_1_INST 1
24 #define LVL_1_DATA 2
25 #define LVL_2 3
26 #define LVL_3 4
27 #define LVL_TRACE 5
29 struct _cache_table {
30 unsigned char descriptor;
31 char cache_type;
32 short size;
35 #define MB(x) ((x) * 1024)
37 /* All the cache descriptor types we care about (no TLB or
38 trace cache entries) */
40 static const struct _cache_table __cpuinitconst cache_table[] =
42 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
43 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
44 { 0x09, LVL_1_INST, 32 }, /* 4-way set assoc, 64 byte line size */
45 { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */
46 { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */
47 { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */
48 { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */
49 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
50 { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
51 { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */
52 { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */
53 { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
54 { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
55 { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
56 { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */
57 { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */
58 { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */
59 { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */
60 { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
61 { 0x3f, LVL_2, 256 }, /* 2-way set assoc, 64 byte line size */
62 { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
63 { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
64 { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
65 { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */
66 { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */
67 { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */
68 { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */
69 { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
70 { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */
71 { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
72 { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */
73 { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */
74 { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */
75 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
76 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
77 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
78 { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */
79 { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */
80 { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
81 { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
82 { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */
83 { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */
84 { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
85 { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
86 { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
87 { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
88 { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */
89 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
90 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
91 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
92 { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */
93 { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */
94 { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
95 { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */
96 { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */
97 { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */
98 { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */
99 { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */
100 { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */
101 { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
102 { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */
103 { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
104 { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */
105 { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */
106 { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
107 { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
108 { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */
109 { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */
110 { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */
111 { 0x00, 0, 0}
115 enum _cache_type {
116 CACHE_TYPE_NULL = 0,
117 CACHE_TYPE_DATA = 1,
118 CACHE_TYPE_INST = 2,
119 CACHE_TYPE_UNIFIED = 3
122 union _cpuid4_leaf_eax {
123 struct {
124 enum _cache_type type:5;
125 unsigned int level:3;
126 unsigned int is_self_initializing:1;
127 unsigned int is_fully_associative:1;
128 unsigned int reserved:4;
129 unsigned int num_threads_sharing:12;
130 unsigned int num_cores_on_die:6;
131 } split;
132 u32 full;
135 union _cpuid4_leaf_ebx {
136 struct {
137 unsigned int coherency_line_size:12;
138 unsigned int physical_line_partition:10;
139 unsigned int ways_of_associativity:10;
140 } split;
141 u32 full;
144 union _cpuid4_leaf_ecx {
145 struct {
146 unsigned int number_of_sets:32;
147 } split;
148 u32 full;
151 struct amd_l3_cache {
152 struct amd_northbridge *nb;
153 unsigned indices;
154 u8 subcaches[4];
157 struct _cpuid4_info {
158 union _cpuid4_leaf_eax eax;
159 union _cpuid4_leaf_ebx ebx;
160 union _cpuid4_leaf_ecx ecx;
161 unsigned long size;
162 struct amd_l3_cache *l3;
163 DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
166 /* subset of above _cpuid4_info w/o shared_cpu_map */
167 struct _cpuid4_info_regs {
168 union _cpuid4_leaf_eax eax;
169 union _cpuid4_leaf_ebx ebx;
170 union _cpuid4_leaf_ecx ecx;
171 unsigned long size;
172 struct amd_l3_cache *l3;
175 unsigned short num_cache_leaves;
177 /* AMD doesn't have CPUID4. Emulate it here to report the same
178 information to the user. This makes some assumptions about the machine:
179 L2 not shared, no SMT etc. that is currently true on AMD CPUs.
181 In theory the TLBs could be reported as fake type (they are in "dummy").
182 Maybe later */
183 union l1_cache {
184 struct {
185 unsigned line_size:8;
186 unsigned lines_per_tag:8;
187 unsigned assoc:8;
188 unsigned size_in_kb:8;
190 unsigned val;
193 union l2_cache {
194 struct {
195 unsigned line_size:8;
196 unsigned lines_per_tag:4;
197 unsigned assoc:4;
198 unsigned size_in_kb:16;
200 unsigned val;
203 union l3_cache {
204 struct {
205 unsigned line_size:8;
206 unsigned lines_per_tag:4;
207 unsigned assoc:4;
208 unsigned res:2;
209 unsigned size_encoded:14;
211 unsigned val;
214 static const unsigned short __cpuinitconst assocs[] = {
215 [1] = 1,
216 [2] = 2,
217 [4] = 4,
218 [6] = 8,
219 [8] = 16,
220 [0xa] = 32,
221 [0xb] = 48,
222 [0xc] = 64,
223 [0xd] = 96,
224 [0xe] = 128,
225 [0xf] = 0xffff /* fully associative - no way to show this currently */
228 static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
229 static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 };
231 static void __cpuinit
232 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
233 union _cpuid4_leaf_ebx *ebx,
234 union _cpuid4_leaf_ecx *ecx)
236 unsigned dummy;
237 unsigned line_size, lines_per_tag, assoc, size_in_kb;
238 union l1_cache l1i, l1d;
239 union l2_cache l2;
240 union l3_cache l3;
241 union l1_cache *l1 = &l1d;
243 eax->full = 0;
244 ebx->full = 0;
245 ecx->full = 0;
247 cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
248 cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
250 switch (leaf) {
251 case 1:
252 l1 = &l1i;
253 case 0:
254 if (!l1->val)
255 return;
256 assoc = assocs[l1->assoc];
257 line_size = l1->line_size;
258 lines_per_tag = l1->lines_per_tag;
259 size_in_kb = l1->size_in_kb;
260 break;
261 case 2:
262 if (!l2.val)
263 return;
264 assoc = assocs[l2.assoc];
265 line_size = l2.line_size;
266 lines_per_tag = l2.lines_per_tag;
267 /* cpu_data has errata corrections for K7 applied */
268 size_in_kb = current_cpu_data.x86_cache_size;
269 break;
270 case 3:
271 if (!l3.val)
272 return;
273 assoc = assocs[l3.assoc];
274 line_size = l3.line_size;
275 lines_per_tag = l3.lines_per_tag;
276 size_in_kb = l3.size_encoded * 512;
277 if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
278 size_in_kb = size_in_kb >> 1;
279 assoc = assoc >> 1;
281 break;
282 default:
283 return;
286 eax->split.is_self_initializing = 1;
287 eax->split.type = types[leaf];
288 eax->split.level = levels[leaf];
289 eax->split.num_threads_sharing = 0;
290 eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
293 if (assoc == 0xffff)
294 eax->split.is_fully_associative = 1;
295 ebx->split.coherency_line_size = line_size - 1;
296 ebx->split.ways_of_associativity = assoc - 1;
297 ebx->split.physical_line_partition = lines_per_tag - 1;
298 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
299 (ebx->split.ways_of_associativity + 1) - 1;
302 struct _cache_attr {
303 struct attribute attr;
304 ssize_t (*show)(struct _cpuid4_info *, char *);
305 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
308 #ifdef CONFIG_AMD_NB
311 * L3 cache descriptors
313 static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
315 unsigned int sc0, sc1, sc2, sc3;
316 u32 val = 0;
318 pci_read_config_dword(l3->nb->misc, 0x1C4, &val);
320 /* calculate subcache sizes */
321 l3->subcaches[0] = sc0 = !(val & BIT(0));
322 l3->subcaches[1] = sc1 = !(val & BIT(4));
323 l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9));
324 l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
326 l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1;
327 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
330 static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
331 int index)
333 static struct amd_l3_cache *__cpuinitdata l3_caches;
334 int node;
336 /* only for L3, and not in virtualized environments */
337 if (index < 3 || amd_nb_num() == 0)
338 return;
341 * Strictly speaking, the amount in @size below is leaked since it is
342 * never freed but this is done only on shutdown so it doesn't matter.
344 if (!l3_caches) {
345 int size = amd_nb_num() * sizeof(struct amd_l3_cache);
347 l3_caches = kzalloc(size, GFP_ATOMIC);
348 if (!l3_caches)
349 return;
352 node = amd_get_nb_id(smp_processor_id());
354 if (!l3_caches[node].nb) {
355 l3_caches[node].nb = node_to_amd_nb(node);
356 amd_calc_l3_indices(&l3_caches[node]);
359 this_leaf->l3 = &l3_caches[node];
363 * check whether a slot used for disabling an L3 index is occupied.
364 * @l3: L3 cache descriptor
365 * @slot: slot number (0..1)
367 * @returns: the disabled index if used or negative value if slot free.
369 int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot)
371 unsigned int reg = 0;
373 pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, &reg);
375 /* check whether this slot is activated already */
376 if (reg & (3UL << 30))
377 return reg & 0xfff;
379 return -1;
382 static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
383 unsigned int slot)
385 int index;
387 if (!this_leaf->l3 ||
388 !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
389 return -EINVAL;
391 index = amd_get_l3_disable_slot(this_leaf->l3, slot);
392 if (index >= 0)
393 return sprintf(buf, "%d\n", index);
395 return sprintf(buf, "FREE\n");
398 #define SHOW_CACHE_DISABLE(slot) \
399 static ssize_t \
400 show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf) \
402 return show_cache_disable(this_leaf, buf, slot); \
404 SHOW_CACHE_DISABLE(0)
405 SHOW_CACHE_DISABLE(1)
407 static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
408 unsigned slot, unsigned long idx)
410 int i;
412 idx |= BIT(30);
415 * disable index in all 4 subcaches
417 for (i = 0; i < 4; i++) {
418 u32 reg = idx | (i << 20);
420 if (!l3->subcaches[i])
421 continue;
423 pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
426 * We need to WBINVD on a core on the node containing the L3
427 * cache which indices we disable therefore a simple wbinvd()
428 * is not sufficient.
430 wbinvd_on_cpu(cpu);
432 reg |= BIT(31);
433 pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
438 * disable a L3 cache index by using a disable-slot
440 * @l3: L3 cache descriptor
441 * @cpu: A CPU on the node containing the L3 cache
442 * @slot: slot number (0..1)
443 * @index: index to disable
445 * @return: 0 on success, error status on failure
447 int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot,
448 unsigned long index)
450 int ret = 0;
452 #define SUBCACHE_MASK (3UL << 20)
453 #define SUBCACHE_INDEX 0xfff
456 * check whether this slot is already used or
457 * the index is already disabled
459 ret = amd_get_l3_disable_slot(l3, slot);
460 if (ret >= 0)
461 return -EINVAL;
464 * check whether the other slot has disabled the
465 * same index already
467 if (index == amd_get_l3_disable_slot(l3, !slot))
468 return -EINVAL;
470 /* do not allow writes outside of allowed bits */
471 if ((index & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) ||
472 ((index & SUBCACHE_INDEX) > l3->indices))
473 return -EINVAL;
475 amd_l3_disable_index(l3, cpu, slot, index);
477 return 0;
480 static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
481 const char *buf, size_t count,
482 unsigned int slot)
484 unsigned long val = 0;
485 int cpu, err = 0;
487 if (!capable(CAP_SYS_ADMIN))
488 return -EPERM;
490 if (!this_leaf->l3 ||
491 !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
492 return -EINVAL;
494 cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
496 if (strict_strtoul(buf, 10, &val) < 0)
497 return -EINVAL;
499 err = amd_set_l3_disable_slot(this_leaf->l3, cpu, slot, val);
500 if (err) {
501 if (err == -EEXIST)
502 printk(KERN_WARNING "L3 disable slot %d in use!\n",
503 slot);
504 return err;
506 return count;
509 #define STORE_CACHE_DISABLE(slot) \
510 static ssize_t \
511 store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \
512 const char *buf, size_t count) \
514 return store_cache_disable(this_leaf, buf, count, slot); \
516 STORE_CACHE_DISABLE(0)
517 STORE_CACHE_DISABLE(1)
519 static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
520 show_cache_disable_0, store_cache_disable_0);
521 static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
522 show_cache_disable_1, store_cache_disable_1);
524 #else /* CONFIG_AMD_NB */
525 #define amd_init_l3_cache(x, y)
526 #endif /* CONFIG_AMD_NB */
528 static int
529 __cpuinit cpuid4_cache_lookup_regs(int index,
530 struct _cpuid4_info_regs *this_leaf)
532 union _cpuid4_leaf_eax eax;
533 union _cpuid4_leaf_ebx ebx;
534 union _cpuid4_leaf_ecx ecx;
535 unsigned edx;
537 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
538 amd_cpuid4(index, &eax, &ebx, &ecx);
539 amd_init_l3_cache(this_leaf, index);
540 } else {
541 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
544 if (eax.split.type == CACHE_TYPE_NULL)
545 return -EIO; /* better error ? */
547 this_leaf->eax = eax;
548 this_leaf->ebx = ebx;
549 this_leaf->ecx = ecx;
550 this_leaf->size = (ecx.split.number_of_sets + 1) *
551 (ebx.split.coherency_line_size + 1) *
552 (ebx.split.physical_line_partition + 1) *
553 (ebx.split.ways_of_associativity + 1);
554 return 0;
557 static int __cpuinit find_num_cache_leaves(void)
559 unsigned int eax, ebx, ecx, edx;
560 union _cpuid4_leaf_eax cache_eax;
561 int i = -1;
563 do {
564 ++i;
565 /* Do cpuid(4) loop to find out num_cache_leaves */
566 cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
567 cache_eax.full = eax;
568 } while (cache_eax.split.type != CACHE_TYPE_NULL);
569 return i;
572 unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
574 /* Cache sizes */
575 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
576 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
577 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
578 unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
579 #ifdef CONFIG_X86_HT
580 unsigned int cpu = c->cpu_index;
581 #endif
583 if (c->cpuid_level > 3) {
584 static int is_initialized;
586 if (is_initialized == 0) {
587 /* Init num_cache_leaves from boot CPU */
588 num_cache_leaves = find_num_cache_leaves();
589 is_initialized++;
593 * Whenever possible use cpuid(4), deterministic cache
594 * parameters cpuid leaf to find the cache details
596 for (i = 0; i < num_cache_leaves; i++) {
597 struct _cpuid4_info_regs this_leaf;
598 int retval;
600 retval = cpuid4_cache_lookup_regs(i, &this_leaf);
601 if (retval >= 0) {
602 switch (this_leaf.eax.split.level) {
603 case 1:
604 if (this_leaf.eax.split.type ==
605 CACHE_TYPE_DATA)
606 new_l1d = this_leaf.size/1024;
607 else if (this_leaf.eax.split.type ==
608 CACHE_TYPE_INST)
609 new_l1i = this_leaf.size/1024;
610 break;
611 case 2:
612 new_l2 = this_leaf.size/1024;
613 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
614 index_msb = get_count_order(num_threads_sharing);
615 l2_id = c->apicid >> index_msb;
616 break;
617 case 3:
618 new_l3 = this_leaf.size/1024;
619 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
620 index_msb = get_count_order(
621 num_threads_sharing);
622 l3_id = c->apicid >> index_msb;
623 break;
624 default:
625 break;
631 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
632 * trace cache
634 if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
635 /* supports eax=2 call */
636 int j, n;
637 unsigned int regs[4];
638 unsigned char *dp = (unsigned char *)regs;
639 int only_trace = 0;
641 if (num_cache_leaves != 0 && c->x86 == 15)
642 only_trace = 1;
644 /* Number of times to iterate */
645 n = cpuid_eax(2) & 0xFF;
647 for (i = 0 ; i < n ; i++) {
648 cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
650 /* If bit 31 is set, this is an unknown format */
651 for (j = 0 ; j < 3 ; j++)
652 if (regs[j] & (1 << 31))
653 regs[j] = 0;
655 /* Byte 0 is level count, not a descriptor */
656 for (j = 1 ; j < 16 ; j++) {
657 unsigned char des = dp[j];
658 unsigned char k = 0;
660 /* look up this descriptor in the table */
661 while (cache_table[k].descriptor != 0) {
662 if (cache_table[k].descriptor == des) {
663 if (only_trace && cache_table[k].cache_type != LVL_TRACE)
664 break;
665 switch (cache_table[k].cache_type) {
666 case LVL_1_INST:
667 l1i += cache_table[k].size;
668 break;
669 case LVL_1_DATA:
670 l1d += cache_table[k].size;
671 break;
672 case LVL_2:
673 l2 += cache_table[k].size;
674 break;
675 case LVL_3:
676 l3 += cache_table[k].size;
677 break;
678 case LVL_TRACE:
679 trace += cache_table[k].size;
680 break;
683 break;
686 k++;
692 if (new_l1d)
693 l1d = new_l1d;
695 if (new_l1i)
696 l1i = new_l1i;
698 if (new_l2) {
699 l2 = new_l2;
700 #ifdef CONFIG_X86_HT
701 per_cpu(cpu_llc_id, cpu) = l2_id;
702 #endif
705 if (new_l3) {
706 l3 = new_l3;
707 #ifdef CONFIG_X86_HT
708 per_cpu(cpu_llc_id, cpu) = l3_id;
709 #endif
712 c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
714 return l2;
717 #ifdef CONFIG_SYSFS
719 /* pointer to _cpuid4_info array (for each cache leaf) */
720 static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
721 #define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
723 #ifdef CONFIG_SMP
724 static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
726 struct _cpuid4_info *this_leaf, *sibling_leaf;
727 unsigned long num_threads_sharing;
728 int index_msb, i, sibling;
729 struct cpuinfo_x86 *c = &cpu_data(cpu);
731 if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
732 for_each_cpu(i, c->llc_shared_map) {
733 if (!per_cpu(ici_cpuid4_info, i))
734 continue;
735 this_leaf = CPUID4_INFO_IDX(i, index);
736 for_each_cpu(sibling, c->llc_shared_map) {
737 if (!cpu_online(sibling))
738 continue;
739 set_bit(sibling, this_leaf->shared_cpu_map);
742 return;
744 this_leaf = CPUID4_INFO_IDX(cpu, index);
745 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
747 if (num_threads_sharing == 1)
748 cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
749 else {
750 index_msb = get_count_order(num_threads_sharing);
752 for_each_online_cpu(i) {
753 if (cpu_data(i).apicid >> index_msb ==
754 c->apicid >> index_msb) {
755 cpumask_set_cpu(i,
756 to_cpumask(this_leaf->shared_cpu_map));
757 if (i != cpu && per_cpu(ici_cpuid4_info, i)) {
758 sibling_leaf =
759 CPUID4_INFO_IDX(i, index);
760 cpumask_set_cpu(cpu, to_cpumask(
761 sibling_leaf->shared_cpu_map));
767 static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
769 struct _cpuid4_info *this_leaf, *sibling_leaf;
770 int sibling;
772 this_leaf = CPUID4_INFO_IDX(cpu, index);
773 for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
774 sibling_leaf = CPUID4_INFO_IDX(sibling, index);
775 cpumask_clear_cpu(cpu,
776 to_cpumask(sibling_leaf->shared_cpu_map));
779 #else
780 static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
784 static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
787 #endif
789 static void __cpuinit free_cache_attributes(unsigned int cpu)
791 int i;
793 for (i = 0; i < num_cache_leaves; i++)
794 cache_remove_shared_cpu_map(cpu, i);
796 kfree(per_cpu(ici_cpuid4_info, cpu)->l3);
797 kfree(per_cpu(ici_cpuid4_info, cpu));
798 per_cpu(ici_cpuid4_info, cpu) = NULL;
801 static int
802 __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
804 struct _cpuid4_info_regs *leaf_regs =
805 (struct _cpuid4_info_regs *)this_leaf;
807 return cpuid4_cache_lookup_regs(index, leaf_regs);
810 static void __cpuinit get_cpu_leaves(void *_retval)
812 int j, *retval = _retval, cpu = smp_processor_id();
814 /* Do cpuid and store the results */
815 for (j = 0; j < num_cache_leaves; j++) {
816 struct _cpuid4_info *this_leaf;
817 this_leaf = CPUID4_INFO_IDX(cpu, j);
818 *retval = cpuid4_cache_lookup(j, this_leaf);
819 if (unlikely(*retval < 0)) {
820 int i;
822 for (i = 0; i < j; i++)
823 cache_remove_shared_cpu_map(cpu, i);
824 break;
826 cache_shared_cpu_map_setup(cpu, j);
830 static int __cpuinit detect_cache_attributes(unsigned int cpu)
832 int retval;
834 if (num_cache_leaves == 0)
835 return -ENOENT;
837 per_cpu(ici_cpuid4_info, cpu) = kzalloc(
838 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
839 if (per_cpu(ici_cpuid4_info, cpu) == NULL)
840 return -ENOMEM;
842 smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
843 if (retval) {
844 kfree(per_cpu(ici_cpuid4_info, cpu));
845 per_cpu(ici_cpuid4_info, cpu) = NULL;
848 return retval;
851 #include <linux/kobject.h>
852 #include <linux/sysfs.h>
854 extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
856 /* pointer to kobject for cpuX/cache */
857 static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
859 struct _index_kobject {
860 struct kobject kobj;
861 unsigned int cpu;
862 unsigned short index;
865 /* pointer to array of kobjects for cpuX/cache/indexY */
866 static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
867 #define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y]))
869 #define show_one_plus(file_name, object, val) \
870 static ssize_t show_##file_name \
871 (struct _cpuid4_info *this_leaf, char *buf) \
873 return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
876 show_one_plus(level, eax.split.level, 0);
877 show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1);
878 show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
879 show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
880 show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
882 static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
884 return sprintf(buf, "%luK\n", this_leaf->size / 1024);
887 static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
888 int type, char *buf)
890 ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
891 int n = 0;
893 if (len > 1) {
894 const struct cpumask *mask;
896 mask = to_cpumask(this_leaf->shared_cpu_map);
897 n = type ?
898 cpulist_scnprintf(buf, len-2, mask) :
899 cpumask_scnprintf(buf, len-2, mask);
900 buf[n++] = '\n';
901 buf[n] = '\0';
903 return n;
906 static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf)
908 return show_shared_cpu_map_func(leaf, 0, buf);
911 static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf)
913 return show_shared_cpu_map_func(leaf, 1, buf);
916 static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
918 switch (this_leaf->eax.split.type) {
919 case CACHE_TYPE_DATA:
920 return sprintf(buf, "Data\n");
921 case CACHE_TYPE_INST:
922 return sprintf(buf, "Instruction\n");
923 case CACHE_TYPE_UNIFIED:
924 return sprintf(buf, "Unified\n");
925 default:
926 return sprintf(buf, "Unknown\n");
930 #define to_object(k) container_of(k, struct _index_kobject, kobj)
931 #define to_attr(a) container_of(a, struct _cache_attr, attr)
933 #define define_one_ro(_name) \
934 static struct _cache_attr _name = \
935 __ATTR(_name, 0444, show_##_name, NULL)
937 define_one_ro(level);
938 define_one_ro(type);
939 define_one_ro(coherency_line_size);
940 define_one_ro(physical_line_partition);
941 define_one_ro(ways_of_associativity);
942 define_one_ro(number_of_sets);
943 define_one_ro(size);
944 define_one_ro(shared_cpu_map);
945 define_one_ro(shared_cpu_list);
947 static struct attribute *default_attrs[] = {
948 &type.attr,
949 &level.attr,
950 &coherency_line_size.attr,
951 &physical_line_partition.attr,
952 &ways_of_associativity.attr,
953 &number_of_sets.attr,
954 &size.attr,
955 &shared_cpu_map.attr,
956 &shared_cpu_list.attr,
957 NULL
960 #ifdef CONFIG_AMD_NB
961 static struct attribute ** __cpuinit amd_l3_attrs(void)
963 static struct attribute **attrs;
964 int n;
966 if (attrs)
967 return attrs;
969 n = sizeof (default_attrs) / sizeof (struct attribute *);
971 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
972 n += 2;
974 attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
975 if (attrs == NULL)
976 return attrs = default_attrs;
978 for (n = 0; default_attrs[n]; n++)
979 attrs[n] = default_attrs[n];
981 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
982 attrs[n++] = &cache_disable_0.attr;
983 attrs[n++] = &cache_disable_1.attr;
986 return attrs;
988 #endif
990 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
992 struct _cache_attr *fattr = to_attr(attr);
993 struct _index_kobject *this_leaf = to_object(kobj);
994 ssize_t ret;
996 ret = fattr->show ?
997 fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
998 buf) :
1000 return ret;
1003 static ssize_t store(struct kobject *kobj, struct attribute *attr,
1004 const char *buf, size_t count)
1006 struct _cache_attr *fattr = to_attr(attr);
1007 struct _index_kobject *this_leaf = to_object(kobj);
1008 ssize_t ret;
1010 ret = fattr->store ?
1011 fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1012 buf, count) :
1014 return ret;
1017 static const struct sysfs_ops sysfs_ops = {
1018 .show = show,
1019 .store = store,
1022 static struct kobj_type ktype_cache = {
1023 .sysfs_ops = &sysfs_ops,
1024 .default_attrs = default_attrs,
1027 static struct kobj_type ktype_percpu_entry = {
1028 .sysfs_ops = &sysfs_ops,
1031 static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
1033 kfree(per_cpu(ici_cache_kobject, cpu));
1034 kfree(per_cpu(ici_index_kobject, cpu));
1035 per_cpu(ici_cache_kobject, cpu) = NULL;
1036 per_cpu(ici_index_kobject, cpu) = NULL;
1037 free_cache_attributes(cpu);
1040 static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
1042 int err;
1044 if (num_cache_leaves == 0)
1045 return -ENOENT;
1047 err = detect_cache_attributes(cpu);
1048 if (err)
1049 return err;
1051 /* Allocate all required memory */
1052 per_cpu(ici_cache_kobject, cpu) =
1053 kzalloc(sizeof(struct kobject), GFP_KERNEL);
1054 if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL))
1055 goto err_out;
1057 per_cpu(ici_index_kobject, cpu) = kzalloc(
1058 sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
1059 if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL))
1060 goto err_out;
1062 return 0;
1064 err_out:
1065 cpuid4_cache_sysfs_exit(cpu);
1066 return -ENOMEM;
1069 static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
1071 /* Add/Remove cache interface for CPU device */
1072 static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
1074 unsigned int cpu = sys_dev->id;
1075 unsigned long i, j;
1076 struct _index_kobject *this_object;
1077 struct _cpuid4_info *this_leaf;
1078 int retval;
1080 retval = cpuid4_cache_sysfs_init(cpu);
1081 if (unlikely(retval < 0))
1082 return retval;
1084 retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
1085 &ktype_percpu_entry,
1086 &sys_dev->kobj, "%s", "cache");
1087 if (retval < 0) {
1088 cpuid4_cache_sysfs_exit(cpu);
1089 return retval;
1092 for (i = 0; i < num_cache_leaves; i++) {
1093 this_object = INDEX_KOBJECT_PTR(cpu, i);
1094 this_object->cpu = cpu;
1095 this_object->index = i;
1097 this_leaf = CPUID4_INFO_IDX(cpu, i);
1099 ktype_cache.default_attrs = default_attrs;
1100 #ifdef CONFIG_AMD_NB
1101 if (this_leaf->l3)
1102 ktype_cache.default_attrs = amd_l3_attrs();
1103 #endif
1104 retval = kobject_init_and_add(&(this_object->kobj),
1105 &ktype_cache,
1106 per_cpu(ici_cache_kobject, cpu),
1107 "index%1lu", i);
1108 if (unlikely(retval)) {
1109 for (j = 0; j < i; j++)
1110 kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
1111 kobject_put(per_cpu(ici_cache_kobject, cpu));
1112 cpuid4_cache_sysfs_exit(cpu);
1113 return retval;
1115 kobject_uevent(&(this_object->kobj), KOBJ_ADD);
1117 cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
1119 kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD);
1120 return 0;
1123 static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
1125 unsigned int cpu = sys_dev->id;
1126 unsigned long i;
1128 if (per_cpu(ici_cpuid4_info, cpu) == NULL)
1129 return;
1130 if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
1131 return;
1132 cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
1134 for (i = 0; i < num_cache_leaves; i++)
1135 kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
1136 kobject_put(per_cpu(ici_cache_kobject, cpu));
1137 cpuid4_cache_sysfs_exit(cpu);
1140 static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
1141 unsigned long action, void *hcpu)
1143 unsigned int cpu = (unsigned long)hcpu;
1144 struct sys_device *sys_dev;
1146 sys_dev = get_cpu_sysdev(cpu);
1147 switch (action) {
1148 case CPU_ONLINE:
1149 case CPU_ONLINE_FROZEN:
1150 cache_add_dev(sys_dev);
1151 break;
1152 case CPU_DEAD:
1153 case CPU_DEAD_FROZEN:
1154 cache_remove_dev(sys_dev);
1155 break;
1157 return NOTIFY_OK;
1160 static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
1161 .notifier_call = cacheinfo_cpu_callback,
1164 static int __cpuinit cache_sysfs_init(void)
1166 int i;
1168 if (num_cache_leaves == 0)
1169 return 0;
1171 for_each_online_cpu(i) {
1172 int err;
1173 struct sys_device *sys_dev = get_cpu_sysdev(i);
1175 err = cache_add_dev(sys_dev);
1176 if (err)
1177 return err;
1179 register_hotcpu_notifier(&cacheinfo_cpu_notifier);
1180 return 0;
1183 device_initcall(cache_sysfs_init);
1185 #endif