Updated to fedora-glibc-20090510T1842
[glibc/history.git] / sysdeps / x86_64 / cacheinfo.c
blob1ec4ca19c96c1b5d0bdf6d6a9d4ed9589eb42571
1 /* x86_64 cache info.
2 Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA.
21 #include <assert.h>
22 #include <stdbool.h>
23 #include <stdlib.h>
24 #include <unistd.h>
26 #ifdef USE_MULTIARCH
27 # include "multiarch/init-arch.h"
28 #endif
30 static const struct intel_02_cache_info
32 unsigned int idx;
33 int name;
34 long int size;
35 long int assoc;
36 long int linesize;
37 } intel_02_known [] =
39 { 0x06, _SC_LEVEL1_ICACHE_SIZE, 8192, 4, 32 },
40 { 0x08, _SC_LEVEL1_ICACHE_SIZE, 16384, 4, 32 },
41 { 0x09, _SC_LEVEL1_ICACHE_SIZE, 32768, 4, 32 },
42 { 0x0a, _SC_LEVEL1_DCACHE_SIZE, 8192, 2, 32 },
43 { 0x0c, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 32 },
44 { 0x0d, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 64 },
45 { 0x21, _SC_LEVEL2_CACHE_SIZE, 262144, 8, 64 },
46 { 0x22, _SC_LEVEL3_CACHE_SIZE, 524288, 4, 64 },
47 { 0x23, _SC_LEVEL3_CACHE_SIZE, 1048576, 8, 64 },
48 { 0x25, _SC_LEVEL3_CACHE_SIZE, 2097152, 8, 64 },
49 { 0x29, _SC_LEVEL3_CACHE_SIZE, 4194304, 8, 64 },
50 { 0x2c, _SC_LEVEL1_DCACHE_SIZE, 32768, 8, 64 },
51 { 0x30, _SC_LEVEL1_ICACHE_SIZE, 32768, 8, 64 },
52 { 0x39, _SC_LEVEL2_CACHE_SIZE, 131072, 4, 64 },
53 { 0x3a, _SC_LEVEL2_CACHE_SIZE, 196608, 6, 64 },
54 { 0x3b, _SC_LEVEL2_CACHE_SIZE, 131072, 2, 64 },
55 { 0x3c, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 64 },
56 { 0x3d, _SC_LEVEL2_CACHE_SIZE, 393216, 6, 64 },
57 { 0x3e, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 64 },
58 { 0x3f, _SC_LEVEL2_CACHE_SIZE, 262144, 2, 64 },
59 { 0x41, _SC_LEVEL2_CACHE_SIZE, 131072, 4, 32 },
60 { 0x42, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 32 },
61 { 0x43, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 32 },
62 { 0x44, _SC_LEVEL2_CACHE_SIZE, 1048576, 4, 32 },
63 { 0x45, _SC_LEVEL2_CACHE_SIZE, 2097152, 4, 32 },
64 { 0x46, _SC_LEVEL3_CACHE_SIZE, 4194304, 4, 64 },
65 { 0x47, _SC_LEVEL3_CACHE_SIZE, 8388608, 8, 64 },
66 { 0x48, _SC_LEVEL2_CACHE_SIZE, 3145728, 12, 64 },
67 { 0x49, _SC_LEVEL2_CACHE_SIZE, 4194304, 16, 64 },
68 { 0x4a, _SC_LEVEL3_CACHE_SIZE, 6291456, 12, 64 },
69 { 0x4b, _SC_LEVEL3_CACHE_SIZE, 8388608, 16, 64 },
70 { 0x4c, _SC_LEVEL3_CACHE_SIZE, 12582912, 12, 64 },
71 { 0x4d, _SC_LEVEL3_CACHE_SIZE, 16777216, 16, 64 },
72 { 0x4e, _SC_LEVEL2_CACHE_SIZE, 6291456, 24, 64 },
73 { 0x60, _SC_LEVEL1_DCACHE_SIZE, 16384, 8, 64 },
74 { 0x66, _SC_LEVEL1_DCACHE_SIZE, 8192, 4, 64 },
75 { 0x67, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 64 },
76 { 0x68, _SC_LEVEL1_DCACHE_SIZE, 32768, 4, 64 },
77 { 0x78, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 },
78 { 0x79, _SC_LEVEL2_CACHE_SIZE, 131072, 8, 64 },
79 { 0x7a, _SC_LEVEL2_CACHE_SIZE, 262144, 8, 64 },
80 { 0x7b, _SC_LEVEL2_CACHE_SIZE, 524288, 8, 64 },
81 { 0x7c, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 },
82 { 0x7d, _SC_LEVEL2_CACHE_SIZE, 2097152, 8, 64 },
83 { 0x7f, _SC_LEVEL2_CACHE_SIZE, 524288, 2, 64 },
84 { 0x82, _SC_LEVEL2_CACHE_SIZE, 262144, 8, 32 },
85 { 0x83, _SC_LEVEL2_CACHE_SIZE, 524288, 8, 32 },
86 { 0x84, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 32 },
87 { 0x85, _SC_LEVEL2_CACHE_SIZE, 2097152, 8, 32 },
88 { 0x86, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 64 },
89 { 0x87, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 },
90 { 0xd0, _SC_LEVEL3_CACHE_SIZE, 524288, 4, 64 },
91 { 0xd1, _SC_LEVEL3_CACHE_SIZE, 1048576, 4, 64 },
92 { 0xd2, _SC_LEVEL3_CACHE_SIZE, 2097152, 4, 64 },
93 { 0xd6, _SC_LEVEL3_CACHE_SIZE, 1048576, 8, 64 },
94 { 0xd7, _SC_LEVEL3_CACHE_SIZE, 2097152, 8, 64 },
95 { 0xd8, _SC_LEVEL3_CACHE_SIZE, 4194304, 8, 64 },
96 { 0xdc, _SC_LEVEL3_CACHE_SIZE, 2097152, 12, 64 },
97 { 0xdd, _SC_LEVEL3_CACHE_SIZE, 4194304, 12, 64 },
98 { 0xde, _SC_LEVEL3_CACHE_SIZE, 8388608, 12, 64 },
99 { 0xe3, _SC_LEVEL3_CACHE_SIZE, 2097152, 16, 64 },
100 { 0xe3, _SC_LEVEL3_CACHE_SIZE, 4194304, 16, 64 },
101 { 0xe4, _SC_LEVEL3_CACHE_SIZE, 8388608, 16, 64 },
104 #define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
106 static int
107 intel_02_known_compare (const void *p1, const void *p2)
109 const struct intel_02_cache_info *i1;
110 const struct intel_02_cache_info *i2;
112 i1 = (const struct intel_02_cache_info *) p1;
113 i2 = (const struct intel_02_cache_info *) p2;
115 if (i1->idx == i2->idx)
116 return 0;
118 return i1->idx < i2->idx ? -1 : 1;
122 static long int
123 __attribute__ ((noinline))
124 intel_check_word (int name, unsigned int value, bool *has_level_2,
125 bool *no_level_2_or_3)
127 if ((value & 0x80000000) != 0)
128 /* The register value is reserved. */
129 return 0;
131 /* Fold the name. The _SC_ constants are always in the order SIZE,
132 ASSOC, LINESIZE. */
133 int folded_name = (_SC_LEVEL1_ICACHE_SIZE
134 + ((name - _SC_LEVEL1_ICACHE_SIZE) / 3) * 3);
136 while (value != 0)
138 unsigned int byte = value & 0xff;
140 if (byte == 0x40)
142 *no_level_2_or_3 = true;
144 if (folded_name == _SC_LEVEL3_CACHE_SIZE)
145 /* No need to look further. */
146 break;
148 else
150 if (byte == 0x49 && folded_name == _SC_LEVEL3_CACHE_SIZE)
152 /* Intel reused this value. For family 15, model 6 it
153 specifies the 3rd level cache. Otherwise the 2nd
154 level cache. */
155 unsigned int eax;
156 unsigned int ebx;
157 unsigned int ecx;
158 unsigned int edx;
159 asm volatile ("cpuid"
160 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
161 : "0" (1));
163 unsigned int family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf);
164 unsigned int model = ((((eax >>16) & 0xf) << 4)
165 + ((eax >> 4) & 0xf));
166 if (family == 15 && model == 6)
168 /* The level 3 cache is encoded for this model like
169 the level 2 cache is for other models. Pretend
170 the caller asked for the level 2 cache. */
171 name = (_SC_LEVEL2_CACHE_SIZE
172 + (name - _SC_LEVEL3_CACHE_SIZE));
173 folded_name = _SC_LEVEL3_CACHE_SIZE;
177 struct intel_02_cache_info *found;
178 struct intel_02_cache_info search;
180 search.idx = byte;
181 found = bsearch (&search, intel_02_known, nintel_02_known,
182 sizeof (intel_02_known[0]), intel_02_known_compare);
183 if (found != NULL)
185 if (found->name == folded_name)
187 unsigned int offset = name - folded_name;
189 if (offset == 0)
190 /* Cache size. */
191 return found->size;
192 if (offset == 1)
193 return found->assoc;
195 assert (offset == 2);
196 return found->linesize;
199 if (found->name == _SC_LEVEL2_CACHE_SIZE)
200 *has_level_2 = true;
204 /* Next byte for the next round. */
205 value >>= 8;
208 /* Nothing found. */
209 return 0;
213 static long int __attribute__ ((noinline))
214 handle_intel (int name, unsigned int maxidx)
216 assert (maxidx >= 2);
218 /* OK, we can use the CPUID instruction to get all info about the
219 caches. */
220 unsigned int cnt = 0;
221 unsigned int max = 1;
222 long int result = 0;
223 bool no_level_2_or_3 = false;
224 bool has_level_2 = false;
226 while (cnt++ < max)
228 unsigned int eax;
229 unsigned int ebx;
230 unsigned int ecx;
231 unsigned int edx;
232 asm volatile ("cpuid"
233 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
234 : "0" (2));
236 /* The low byte of EAX in the first round contain the number of
237 rounds we have to make. At least one, the one we are already
238 doing. */
239 if (cnt == 1)
241 max = eax & 0xff;
242 eax &= 0xffffff00;
245 /* Process the individual registers' value. */
246 result = intel_check_word (name, eax, &has_level_2, &no_level_2_or_3);
247 if (result != 0)
248 return result;
250 result = intel_check_word (name, ebx, &has_level_2, &no_level_2_or_3);
251 if (result != 0)
252 return result;
254 result = intel_check_word (name, ecx, &has_level_2, &no_level_2_or_3);
255 if (result != 0)
256 return result;
258 result = intel_check_word (name, edx, &has_level_2, &no_level_2_or_3);
259 if (result != 0)
260 return result;
263 if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE
264 && no_level_2_or_3)
265 return -1;
267 return 0;
271 static long int __attribute__ ((noinline))
272 handle_amd (int name)
274 unsigned int eax;
275 unsigned int ebx;
276 unsigned int ecx;
277 unsigned int edx;
278 asm volatile ("cpuid"
279 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
280 : "0" (0x80000000));
282 /* No level 4 cache (yet). */
283 if (name > _SC_LEVEL3_CACHE_LINESIZE)
284 return 0;
286 unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
287 if (eax < fn)
288 return 0;
290 asm volatile ("cpuid"
291 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
292 : "0" (fn));
294 if (name < _SC_LEVEL1_DCACHE_SIZE)
296 name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
297 ecx = edx;
300 switch (name)
302 case _SC_LEVEL1_DCACHE_SIZE:
303 return (ecx >> 14) & 0x3fc00;
305 case _SC_LEVEL1_DCACHE_ASSOC:
306 ecx >>= 16;
307 if ((ecx & 0xff) == 0xff)
308 /* Fully associative. */
309 return (ecx << 2) & 0x3fc00;
310 return ecx & 0xff;
312 case _SC_LEVEL1_DCACHE_LINESIZE:
313 return ecx & 0xff;
315 case _SC_LEVEL2_CACHE_SIZE:
316 return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
318 case _SC_LEVEL2_CACHE_ASSOC:
319 switch ((ecx >> 12) & 0xf)
321 case 0:
322 case 1:
323 case 2:
324 case 4:
325 return (ecx >> 12) & 0xf;
326 case 6:
327 return 8;
328 case 8:
329 return 16;
330 case 10:
331 return 32;
332 case 11:
333 return 48;
334 case 12:
335 return 64;
336 case 13:
337 return 96;
338 case 14:
339 return 128;
340 case 15:
341 return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
342 default:
343 return 0;
345 /* NOTREACHED */
347 case _SC_LEVEL2_CACHE_LINESIZE:
348 return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
350 case _SC_LEVEL3_CACHE_SIZE:
351 return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
353 case _SC_LEVEL3_CACHE_ASSOC:
354 switch ((edx >> 12) & 0xf)
356 case 0:
357 case 1:
358 case 2:
359 case 4:
360 return (edx >> 12) & 0xf;
361 case 6:
362 return 8;
363 case 8:
364 return 16;
365 case 10:
366 return 32;
367 case 11:
368 return 48;
369 case 12:
370 return 64;
371 case 13:
372 return 96;
373 case 14:
374 return 128;
375 case 15:
376 return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
377 default:
378 return 0;
380 /* NOTREACHED */
382 case _SC_LEVEL3_CACHE_LINESIZE:
383 return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
385 default:
386 assert (! "cannot happen");
388 return -1;
392 /* Get the value of the system variable NAME. */
393 long int
394 attribute_hidden
395 __cache_sysconf (int name)
397 /* Find out what brand of processor. */
398 unsigned int eax;
399 unsigned int ebx;
400 unsigned int ecx;
401 unsigned int edx;
402 asm volatile ("cpuid"
403 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
404 : "0" (0));
406 /* This spells out "GenuineIntel". */
407 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
408 return handle_intel (name, eax);
410 /* This spells out "AuthenticAMD". */
411 if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
412 return handle_amd (name);
414 // XXX Fill in more vendors.
416 /* CPU not known, we have no information. */
417 return 0;
421 /* Half the data cache size for use in memory and string routines, typically
422 L1 size. */
423 long int __x86_64_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
424 /* Shared cache size for use in memory and string routines, typically
425 L2 or L3 size. */
426 long int __x86_64_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
427 long int __x86_64_shared_cache_size attribute_hidden = 1024 * 1024;
428 /* PREFETCHW support flag for use in memory and string routines. */
429 int __x86_64_prefetchw attribute_hidden;
431 /* Instructions preferred for memory and string routines.
433 0: Regular instructions
434 1: MMX instructions
435 2: SSE2 instructions
436 3: SSSE3 instructions
439 int __x86_64_preferred_memory_instruction attribute_hidden;
442 static void
443 __attribute__((constructor))
444 init_cacheinfo (void)
446 /* Find out what brand of processor. */
447 unsigned int eax;
448 unsigned int ebx;
449 unsigned int ecx;
450 unsigned int edx;
451 int max_cpuid_ex;
452 long int data = -1;
453 long int shared = -1;
454 unsigned int level;
455 unsigned int threads = 0;
457 #ifdef USE_MULTIARCH
458 if (__cpu_features.kind == arch_kind_unknown)
459 __init_cpu_features ();
460 # define is_intel __cpu_features.kind == arch_kind_intel
461 # define is_amd __cpu_features.kind == arch_kind_amd
462 # define max_cpuid __cpu_features.max_cpuid
463 #else
464 int max_cpuid;
465 asm volatile ("cpuid"
466 : "=a" (max_cpuid), "=b" (ebx), "=c" (ecx), "=d" (edx)
467 : "0" (0));
468 /* This spells out "GenuineIntel". */
469 # define is_intel \
470 ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69
471 /* This spells out "AuthenticAMD". */
472 # define is_amd \
473 ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65
474 #endif
476 if (is_intel)
478 data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
480 /* Try L3 first. */
481 level = 3;
482 shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
484 if (shared <= 0)
486 /* Try L2 otherwise. */
487 level = 2;
488 shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
491 #ifdef USE_MULTIARCH
492 eax = __cpu_features.cpuid[INTEL_CPUID_INDEX_1].eax;
493 ebx = __cpu_features.cpuid[INTEL_CPUID_INDEX_1].ebx;
494 ecx = __cpu_features.cpuid[INTEL_CPUID_INDEX_1].ecx;
495 edx = __cpu_features.cpuid[INTEL_CPUID_INDEX_1].edx;
496 #else
497 asm volatile ("cpuid"
498 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
499 : "0" (1));
500 #endif
502 /* Intel prefers SSSE3 instructions for memory/string routines
503 if they are avaiable. */
504 if ((ecx & 0x200))
505 __x86_64_preferred_memory_instruction = 3;
506 else
507 __x86_64_preferred_memory_instruction = 2;
509 /* Figure out the number of logical threads that share the
510 highest cache level. */
511 if (max_cpuid >= 4)
513 int i = 0;
515 /* Query until desired cache level is enumerated. */
518 asm volatile ("cpuid"
519 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
520 : "0" (4), "2" (i++));
522 /* There seems to be a bug in at least some Pentium Ds
523 which sometimes fail to iterate all cache parameters.
524 Do not loop indefinitely here, stop in this case and
525 assume there is no such information. */
526 if ((eax & 0x1f) == 0)
527 goto intel_bug_no_cache_info;
529 while (((eax >> 5) & 0x7) != level);
531 threads = ((eax >> 14) & 0x3ff) + 1;
533 else
535 intel_bug_no_cache_info:
536 /* Assume that all logical threads share the highest cache level. */
538 threads = (ebx >> 16) & 0xff;
541 /* Cap usage of highest cache level to the number of supported
542 threads. */
543 if (shared > 0 && threads > 0)
544 shared /= threads;
546 /* This spells out "AuthenticAMD". */
547 else if (is_amd)
549 data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
550 long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
551 shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
553 /* Get maximum extended function. */
554 asm volatile ("cpuid"
555 : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx), "=d" (edx)
556 : "0" (0x80000000));
558 if (shared <= 0)
559 /* No shared L3 cache. All we have is the L2 cache. */
560 shared = core;
561 else
563 /* Figure out the number of logical threads that share L3. */
564 if (max_cpuid_ex >= 0x80000008)
566 /* Get width of APIC ID. */
567 asm volatile ("cpuid"
568 : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx),
569 "=d" (edx)
570 : "0" (0x80000008));
571 threads = 1 << ((ecx >> 12) & 0x0f);
574 if (threads == 0)
576 /* If APIC ID width is not available, use logical
577 processor count. */
578 asm volatile ("cpuid"
579 : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx),
580 "=d" (edx)
581 : "0" (0x00000001));
583 if ((edx & (1 << 28)) != 0)
584 threads = (ebx >> 16) & 0xff;
587 /* Cap usage of highest cache level to the number of
588 supported threads. */
589 if (threads > 0)
590 shared /= threads;
592 /* Account for exclusive L2 and L3 caches. */
593 shared += core;
596 if (max_cpuid_ex >= 0x80000001)
598 asm volatile ("cpuid"
599 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
600 : "0" (0x80000001));
601 /* PREFETCHW || 3DNow! */
602 if ((ecx & 0x100) || (edx & 0x80000000))
603 __x86_64_prefetchw = -1;
607 if (data > 0)
608 __x86_64_data_cache_size_half = data / 2;
610 if (shared > 0)
612 __x86_64_shared_cache_size_half = shared / 2;
613 __x86_64_shared_cache_size = shared;