Updated to fedora-glibc-20090409T1422
[glibc/history.git] / sysdeps / x86_64 / cacheinfo.c
blob8769e9c966beefdb8abc568b2ec5a2c847146000
1 /* x86_64 cache info.
2 Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA.
21 #include <assert.h>
22 #include <stdbool.h>
23 #include <stdlib.h>
24 #include <unistd.h>
26 #ifdef USE_MULTIARCH
27 # include "multiarch/init-arch.h"
28 #endif
30 static const struct intel_02_cache_info
32 unsigned int idx;
33 int name;
34 long int size;
35 long int assoc;
36 long int linesize;
37 } intel_02_known [] =
39 { 0x06, _SC_LEVEL1_ICACHE_SIZE, 8192, 4, 32 },
40 { 0x08, _SC_LEVEL1_ICACHE_SIZE, 16384, 4, 32 },
41 { 0x09, _SC_LEVEL1_ICACHE_SIZE, 32768, 4, 32 },
42 { 0x0a, _SC_LEVEL1_DCACHE_SIZE, 8192, 2, 32 },
43 { 0x0c, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 32 },
44 { 0x0d, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 64 },
45 { 0x22, _SC_LEVEL3_CACHE_SIZE, 524288, 4, 64 },
46 { 0x23, _SC_LEVEL3_CACHE_SIZE, 1048576, 8, 64 },
47 { 0x25, _SC_LEVEL3_CACHE_SIZE, 2097152, 8, 64 },
48 { 0x29, _SC_LEVEL3_CACHE_SIZE, 4194304, 8, 64 },
49 { 0x2c, _SC_LEVEL1_DCACHE_SIZE, 32768, 8, 64 },
50 { 0x30, _SC_LEVEL1_ICACHE_SIZE, 32768, 8, 64 },
51 { 0x39, _SC_LEVEL2_CACHE_SIZE, 131072, 4, 64 },
52 { 0x3a, _SC_LEVEL2_CACHE_SIZE, 196608, 6, 64 },
53 { 0x3b, _SC_LEVEL2_CACHE_SIZE, 131072, 2, 64 },
54 { 0x3c, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 64 },
55 { 0x3d, _SC_LEVEL2_CACHE_SIZE, 393216, 6, 64 },
56 { 0x3e, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 64 },
57 { 0x3f, _SC_LEVEL2_CACHE_SIZE, 262144, 2, 64 },
58 { 0x41, _SC_LEVEL2_CACHE_SIZE, 131072, 4, 32 },
59 { 0x42, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 32 },
60 { 0x43, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 32 },
61 { 0x44, _SC_LEVEL2_CACHE_SIZE, 1048576, 4, 32 },
62 { 0x45, _SC_LEVEL2_CACHE_SIZE, 2097152, 4, 32 },
63 { 0x46, _SC_LEVEL3_CACHE_SIZE, 4194304, 4, 64 },
64 { 0x47, _SC_LEVEL3_CACHE_SIZE, 8388608, 8, 64 },
65 { 0x48, _SC_LEVEL2_CACHE_SIZE, 3145728, 12, 64 },
66 { 0x49, _SC_LEVEL2_CACHE_SIZE, 4194304, 16, 64 },
67 { 0x4a, _SC_LEVEL3_CACHE_SIZE, 6291456, 12, 64 },
68 { 0x4b, _SC_LEVEL3_CACHE_SIZE, 8388608, 16, 64 },
69 { 0x4c, _SC_LEVEL3_CACHE_SIZE, 12582912, 12, 64 },
70 { 0x4d, _SC_LEVEL3_CACHE_SIZE, 16777216, 16, 64 },
71 { 0x4e, _SC_LEVEL2_CACHE_SIZE, 6291456, 24, 64 },
72 { 0x60, _SC_LEVEL1_DCACHE_SIZE, 16384, 8, 64 },
73 { 0x66, _SC_LEVEL1_DCACHE_SIZE, 8192, 4, 64 },
74 { 0x67, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 64 },
75 { 0x68, _SC_LEVEL1_DCACHE_SIZE, 32768, 4, 64 },
76 { 0x78, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 },
77 { 0x79, _SC_LEVEL2_CACHE_SIZE, 131072, 8, 64 },
78 { 0x7a, _SC_LEVEL2_CACHE_SIZE, 262144, 8, 64 },
79 { 0x7b, _SC_LEVEL2_CACHE_SIZE, 524288, 8, 64 },
80 { 0x7c, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 },
81 { 0x7d, _SC_LEVEL2_CACHE_SIZE, 2097152, 8, 64 },
82 { 0x7f, _SC_LEVEL2_CACHE_SIZE, 524288, 2, 64 },
83 { 0x82, _SC_LEVEL2_CACHE_SIZE, 262144, 8, 32 },
84 { 0x83, _SC_LEVEL2_CACHE_SIZE, 524288, 8, 32 },
85 { 0x84, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 32 },
86 { 0x85, _SC_LEVEL2_CACHE_SIZE, 2097152, 8, 32 },
87 { 0x86, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 64 },
88 { 0x87, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 },
89 { 0xd0, _SC_LEVEL3_CACHE_SIZE, 524288, 4, 64 },
90 { 0xd1, _SC_LEVEL3_CACHE_SIZE, 1048576, 4, 64 },
91 { 0xd2, _SC_LEVEL3_CACHE_SIZE, 2097152, 4, 64 },
92 { 0xd6, _SC_LEVEL3_CACHE_SIZE, 1048576, 8, 64 },
93 { 0xd7, _SC_LEVEL3_CACHE_SIZE, 2097152, 8, 64 },
94 { 0xd8, _SC_LEVEL3_CACHE_SIZE, 4194304, 8, 64 },
95 { 0xdc, _SC_LEVEL3_CACHE_SIZE, 2097152, 12, 64 },
96 { 0xdd, _SC_LEVEL3_CACHE_SIZE, 4194304, 12, 64 },
97 { 0xde, _SC_LEVEL3_CACHE_SIZE, 8388608, 12, 64 },
98 { 0xe3, _SC_LEVEL3_CACHE_SIZE, 2097152, 16, 64 },
99 { 0xe3, _SC_LEVEL3_CACHE_SIZE, 4194304, 16, 64 },
100 { 0xe4, _SC_LEVEL3_CACHE_SIZE, 8388608, 16, 64 },
103 #define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
105 static int
106 intel_02_known_compare (const void *p1, const void *p2)
108 const struct intel_02_cache_info *i1;
109 const struct intel_02_cache_info *i2;
111 i1 = (const struct intel_02_cache_info *) p1;
112 i2 = (const struct intel_02_cache_info *) p2;
114 if (i1->idx == i2->idx)
115 return 0;
117 return i1->idx < i2->idx ? -1 : 1;
121 static long int
122 __attribute__ ((noinline))
123 intel_check_word (int name, unsigned int value, bool *has_level_2,
124 bool *no_level_2_or_3)
126 if ((value & 0x80000000) != 0)
127 /* The register value is reserved. */
128 return 0;
130 /* Fold the name. The _SC_ constants are always in the order SIZE,
131 ASSOC, LINESIZE. */
132 int folded_name = (_SC_LEVEL1_ICACHE_SIZE
133 + ((name - _SC_LEVEL1_ICACHE_SIZE) / 3) * 3);
135 while (value != 0)
137 unsigned int byte = value & 0xff;
139 if (byte == 0x40)
141 *no_level_2_or_3 = true;
143 if (folded_name == _SC_LEVEL3_CACHE_SIZE)
144 /* No need to look further. */
145 break;
147 else
149 if (byte == 0x49 && folded_name == _SC_LEVEL3_CACHE_SIZE)
151 /* Intel reused this value. For family 15, model 6 it
152 specifies the 3rd level cache. Otherwise the 2nd
153 level cache. */
154 unsigned int eax;
155 unsigned int ebx;
156 unsigned int ecx;
157 unsigned int edx;
158 asm volatile ("cpuid"
159 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
160 : "0" (1));
162 unsigned int family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf);
163 unsigned int model = ((((eax >>16) & 0xf) << 4)
164 + ((eax >> 4) & 0xf));
165 if (family == 15 && model == 6)
167 /* The level 3 cache is encoded for this model like
168 the level 2 cache is for other models. Pretend
169 the caller asked for the level 2 cache. */
170 name = (_SC_LEVEL2_CACHE_SIZE
171 + (name - _SC_LEVEL3_CACHE_SIZE));
172 folded_name = _SC_LEVEL3_CACHE_SIZE;
176 struct intel_02_cache_info *found;
177 struct intel_02_cache_info search;
179 search.idx = byte;
180 found = bsearch (&search, intel_02_known, nintel_02_known,
181 sizeof (intel_02_known[0]), intel_02_known_compare);
182 if (found != NULL)
184 if (found->name == folded_name)
186 unsigned int offset = name - folded_name;
188 if (offset == 0)
189 /* Cache size. */
190 return found->size;
191 if (offset == 1)
192 return found->assoc;
194 assert (offset == 2);
195 return found->linesize;
198 if (found->name == _SC_LEVEL2_CACHE_SIZE)
199 *has_level_2 = true;
203 /* Next byte for the next round. */
204 value >>= 8;
207 /* Nothing found. */
208 return 0;
212 static long int __attribute__ ((noinline))
213 handle_intel (int name, unsigned int maxidx)
215 assert (maxidx >= 2);
217 /* OK, we can use the CPUID instruction to get all info about the
218 caches. */
219 unsigned int cnt = 0;
220 unsigned int max = 1;
221 long int result = 0;
222 bool no_level_2_or_3 = false;
223 bool has_level_2 = false;
225 while (cnt++ < max)
227 unsigned int eax;
228 unsigned int ebx;
229 unsigned int ecx;
230 unsigned int edx;
231 asm volatile ("cpuid"
232 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
233 : "0" (2));
235 /* The low byte of EAX in the first round contain the number of
236 rounds we have to make. At least one, the one we are already
237 doing. */
238 if (cnt == 1)
240 max = eax & 0xff;
241 eax &= 0xffffff00;
244 /* Process the individual registers' value. */
245 result = intel_check_word (name, eax, &has_level_2, &no_level_2_or_3);
246 if (result != 0)
247 return result;
249 result = intel_check_word (name, ebx, &has_level_2, &no_level_2_or_3);
250 if (result != 0)
251 return result;
253 result = intel_check_word (name, ecx, &has_level_2, &no_level_2_or_3);
254 if (result != 0)
255 return result;
257 result = intel_check_word (name, edx, &has_level_2, &no_level_2_or_3);
258 if (result != 0)
259 return result;
262 if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE
263 && no_level_2_or_3)
264 return -1;
266 return 0;
270 static long int __attribute__ ((noinline))
271 handle_amd (int name)
273 unsigned int eax;
274 unsigned int ebx;
275 unsigned int ecx;
276 unsigned int edx;
277 asm volatile ("cpuid"
278 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
279 : "0" (0x80000000));
281 /* No level 4 cache (yet). */
282 if (name > _SC_LEVEL3_CACHE_LINESIZE)
283 return 0;
285 unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
286 if (eax < fn)
287 return 0;
289 asm volatile ("cpuid"
290 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
291 : "0" (fn));
293 if (name < _SC_LEVEL1_DCACHE_SIZE)
295 name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
296 ecx = edx;
299 switch (name)
301 case _SC_LEVEL1_DCACHE_SIZE:
302 return (ecx >> 14) & 0x3fc00;
304 case _SC_LEVEL1_DCACHE_ASSOC:
305 ecx >>= 16;
306 if ((ecx & 0xff) == 0xff)
307 /* Fully associative. */
308 return (ecx << 2) & 0x3fc00;
309 return ecx & 0xff;
311 case _SC_LEVEL1_DCACHE_LINESIZE:
312 return ecx & 0xff;
314 case _SC_LEVEL2_CACHE_SIZE:
315 return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
317 case _SC_LEVEL2_CACHE_ASSOC:
318 switch ((ecx >> 12) & 0xf)
320 case 0:
321 case 1:
322 case 2:
323 case 4:
324 return (ecx >> 12) & 0xf;
325 case 6:
326 return 8;
327 case 8:
328 return 16;
329 case 10:
330 return 32;
331 case 11:
332 return 48;
333 case 12:
334 return 64;
335 case 13:
336 return 96;
337 case 14:
338 return 128;
339 case 15:
340 return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
341 default:
342 return 0;
344 /* NOTREACHED */
346 case _SC_LEVEL2_CACHE_LINESIZE:
347 return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
349 case _SC_LEVEL3_CACHE_SIZE:
350 return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
352 case _SC_LEVEL3_CACHE_ASSOC:
353 switch ((edx >> 12) & 0xf)
355 case 0:
356 case 1:
357 case 2:
358 case 4:
359 return (edx >> 12) & 0xf;
360 case 6:
361 return 8;
362 case 8:
363 return 16;
364 case 10:
365 return 32;
366 case 11:
367 return 48;
368 case 12:
369 return 64;
370 case 13:
371 return 96;
372 case 14:
373 return 128;
374 case 15:
375 return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
376 default:
377 return 0;
379 /* NOTREACHED */
381 case _SC_LEVEL3_CACHE_LINESIZE:
382 return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
384 default:
385 assert (! "cannot happen");
387 return -1;
391 /* Get the value of the system variable NAME. */
392 long int
393 attribute_hidden
394 __cache_sysconf (int name)
396 /* Find out what brand of processor. */
397 unsigned int eax;
398 unsigned int ebx;
399 unsigned int ecx;
400 unsigned int edx;
401 asm volatile ("cpuid"
402 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
403 : "0" (0));
405 /* This spells out "GenuineIntel". */
406 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
407 return handle_intel (name, eax);
409 /* This spells out "AuthenticAMD". */
410 if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
411 return handle_amd (name);
413 // XXX Fill in more vendors.
415 /* CPU not known, we have no information. */
416 return 0;
420 /* Half the data cache size for use in memory and string routines, typically
421 L1 size. */
422 long int __x86_64_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
423 /* Shared cache size for use in memory and string routines, typically
424 L2 or L3 size. */
425 long int __x86_64_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
426 long int __x86_64_shared_cache_size attribute_hidden = 1024 * 1024;
427 /* PREFETCHW support flag for use in memory and string routines. */
428 int __x86_64_prefetchw attribute_hidden;
430 /* Instructions preferred for memory and string routines.
432 0: Regular instructions
433 1: MMX instructions
434 2: SSE2 instructions
435 3: SSSE3 instructions
438 int __x86_64_preferred_memory_instruction attribute_hidden;
441 static void
442 __attribute__((constructor))
443 init_cacheinfo (void)
445 /* Find out what brand of processor. */
446 unsigned int eax;
447 unsigned int ebx;
448 unsigned int ecx;
449 unsigned int edx;
450 int max_cpuid_ex;
451 long int data = -1;
452 long int shared = -1;
453 unsigned int level;
454 unsigned int threads = 0;
456 #ifdef USE_MULTIARCH
457 if (__cpu_features.kind == arch_kind_unknown)
458 __init_cpu_features ();
459 # define is_intel __cpu_features.kind == arch_kind_intel
460 # define is_amd __cpu_features.kind == arch_kind_amd
461 # define max_cpuid __cpu_features.max_cpuid
462 #else
463 int max_cpuid;
464 asm volatile ("cpuid"
465 : "=a" (max_cpuid), "=b" (ebx), "=c" (ecx), "=d" (edx)
466 : "0" (0));
467 /* This spells out "GenuineIntel". */
468 # define is_intel \
469 ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69
470 /* This spells out "AuthenticAMD". */
471 # define is_amd \
472 ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65
473 #endif
475 if (is_intel)
477 data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
479 /* Try L3 first. */
480 level = 3;
481 shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
483 if (shared <= 0)
485 /* Try L2 otherwise. */
486 level = 2;
487 shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
490 #ifdef USE_MULTIARCH
491 eax = __cpu_features.cpuid[INTEL_CPUID_INDEX_1].eax;
492 ebx = __cpu_features.cpuid[INTEL_CPUID_INDEX_1].ebx;
493 ecx = __cpu_features.cpuid[INTEL_CPUID_INDEX_1].ecx;
494 edx = __cpu_features.cpuid[INTEL_CPUID_INDEX_1].edx;
495 #else
496 asm volatile ("cpuid"
497 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
498 : "0" (1));
499 #endif
501 /* Intel prefers SSSE3 instructions for memory/string routines
502 if they are avaiable. */
503 if ((ecx & 0x200))
504 __x86_64_preferred_memory_instruction = 3;
505 else
506 __x86_64_preferred_memory_instruction = 2;
508 /* Figure out the number of logical threads that share the
509 highest cache level. */
510 if (max_cpuid >= 4)
512 int i = 0;
514 /* Query until desired cache level is enumerated. */
517 asm volatile ("cpuid"
518 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
519 : "0" (4), "2" (i++));
521 /* There seems to be a bug in at least some Pentium Ds
522 which sometimes fail to iterate all cache parameters.
523 Do not loop indefinitely here, stop in this case and
524 assume there is no such information. */
525 if ((eax & 0x1f) == 0)
526 goto intel_bug_no_cache_info;
528 while (((eax >> 5) & 0x7) != level);
530 threads = ((eax >> 14) & 0x3ff) + 1;
532 else
534 intel_bug_no_cache_info:
535 /* Assume that all logical threads share the highest cache level. */
537 threads = (ebx >> 16) & 0xff;
540 /* Cap usage of highest cache level to the number of supported
541 threads. */
542 if (shared > 0 && threads > 0)
543 shared /= threads;
545 /* This spells out "AuthenticAMD". */
546 else if (is_amd)
548 data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
549 long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
550 shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
552 /* Get maximum extended function. */
553 asm volatile ("cpuid"
554 : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx), "=d" (edx)
555 : "0" (0x80000000));
557 if (shared <= 0)
558 /* No shared L3 cache. All we have is the L2 cache. */
559 shared = core;
560 else
562 /* Figure out the number of logical threads that share L3. */
563 if (max_cpuid_ex >= 0x80000008)
565 /* Get width of APIC ID. */
566 asm volatile ("cpuid"
567 : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx),
568 "=d" (edx)
569 : "0" (0x80000008));
570 threads = 1 << ((ecx >> 12) & 0x0f);
573 if (threads == 0)
575 /* If APIC ID width is not available, use logical
576 processor count. */
577 asm volatile ("cpuid"
578 : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx),
579 "=d" (edx)
580 : "0" (0x00000001));
582 if ((edx & (1 << 28)) != 0)
583 threads = (ebx >> 16) & 0xff;
586 /* Cap usage of highest cache level to the number of
587 supported threads. */
588 if (threads > 0)
589 shared /= threads;
591 /* Account for exclusive L2 and L3 caches. */
592 shared += core;
595 if (max_cpuid_ex >= 0x80000001)
597 asm volatile ("cpuid"
598 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
599 : "0" (0x80000001));
600 /* PREFETCHW || 3DNow! */
601 if ((ecx & 0x100) || (edx & 0x80000000))
602 __x86_64_prefetchw = -1;
606 if (data > 0)
607 __x86_64_data_cache_size_half = data / 2;
609 if (shared > 0)
611 __x86_64_shared_cache_size_half = shared / 2;
612 __x86_64_shared_cache_size = shared;