2 Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 # include "multiarch/init-arch.h"
30 static const struct intel_02_cache_info
39 { 0x06, _SC_LEVEL1_ICACHE_SIZE
, 8192, 4, 32 },
40 { 0x08, _SC_LEVEL1_ICACHE_SIZE
, 16384, 4, 32 },
41 { 0x09, _SC_LEVEL1_ICACHE_SIZE
, 32768, 4, 32 },
42 { 0x0a, _SC_LEVEL1_DCACHE_SIZE
, 8192, 2, 32 },
43 { 0x0c, _SC_LEVEL1_DCACHE_SIZE
, 16384, 4, 32 },
44 { 0x0d, _SC_LEVEL1_DCACHE_SIZE
, 16384, 4, 64 },
45 { 0x21, _SC_LEVEL2_CACHE_SIZE
, 262144, 8, 64 },
46 { 0x22, _SC_LEVEL3_CACHE_SIZE
, 524288, 4, 64 },
47 { 0x23, _SC_LEVEL3_CACHE_SIZE
, 1048576, 8, 64 },
48 { 0x25, _SC_LEVEL3_CACHE_SIZE
, 2097152, 8, 64 },
49 { 0x29, _SC_LEVEL3_CACHE_SIZE
, 4194304, 8, 64 },
50 { 0x2c, _SC_LEVEL1_DCACHE_SIZE
, 32768, 8, 64 },
51 { 0x30, _SC_LEVEL1_ICACHE_SIZE
, 32768, 8, 64 },
52 { 0x39, _SC_LEVEL2_CACHE_SIZE
, 131072, 4, 64 },
53 { 0x3a, _SC_LEVEL2_CACHE_SIZE
, 196608, 6, 64 },
54 { 0x3b, _SC_LEVEL2_CACHE_SIZE
, 131072, 2, 64 },
55 { 0x3c, _SC_LEVEL2_CACHE_SIZE
, 262144, 4, 64 },
56 { 0x3d, _SC_LEVEL2_CACHE_SIZE
, 393216, 6, 64 },
57 { 0x3e, _SC_LEVEL2_CACHE_SIZE
, 524288, 4, 64 },
58 { 0x3f, _SC_LEVEL2_CACHE_SIZE
, 262144, 2, 64 },
59 { 0x41, _SC_LEVEL2_CACHE_SIZE
, 131072, 4, 32 },
60 { 0x42, _SC_LEVEL2_CACHE_SIZE
, 262144, 4, 32 },
61 { 0x43, _SC_LEVEL2_CACHE_SIZE
, 524288, 4, 32 },
62 { 0x44, _SC_LEVEL2_CACHE_SIZE
, 1048576, 4, 32 },
63 { 0x45, _SC_LEVEL2_CACHE_SIZE
, 2097152, 4, 32 },
64 { 0x46, _SC_LEVEL3_CACHE_SIZE
, 4194304, 4, 64 },
65 { 0x47, _SC_LEVEL3_CACHE_SIZE
, 8388608, 8, 64 },
66 { 0x48, _SC_LEVEL2_CACHE_SIZE
, 3145728, 12, 64 },
67 { 0x49, _SC_LEVEL2_CACHE_SIZE
, 4194304, 16, 64 },
68 { 0x4a, _SC_LEVEL3_CACHE_SIZE
, 6291456, 12, 64 },
69 { 0x4b, _SC_LEVEL3_CACHE_SIZE
, 8388608, 16, 64 },
70 { 0x4c, _SC_LEVEL3_CACHE_SIZE
, 12582912, 12, 64 },
71 { 0x4d, _SC_LEVEL3_CACHE_SIZE
, 16777216, 16, 64 },
72 { 0x4e, _SC_LEVEL2_CACHE_SIZE
, 6291456, 24, 64 },
73 { 0x60, _SC_LEVEL1_DCACHE_SIZE
, 16384, 8, 64 },
74 { 0x66, _SC_LEVEL1_DCACHE_SIZE
, 8192, 4, 64 },
75 { 0x67, _SC_LEVEL1_DCACHE_SIZE
, 16384, 4, 64 },
76 { 0x68, _SC_LEVEL1_DCACHE_SIZE
, 32768, 4, 64 },
77 { 0x78, _SC_LEVEL2_CACHE_SIZE
, 1048576, 8, 64 },
78 { 0x79, _SC_LEVEL2_CACHE_SIZE
, 131072, 8, 64 },
79 { 0x7a, _SC_LEVEL2_CACHE_SIZE
, 262144, 8, 64 },
80 { 0x7b, _SC_LEVEL2_CACHE_SIZE
, 524288, 8, 64 },
81 { 0x7c, _SC_LEVEL2_CACHE_SIZE
, 1048576, 8, 64 },
82 { 0x7d, _SC_LEVEL2_CACHE_SIZE
, 2097152, 8, 64 },
83 { 0x7f, _SC_LEVEL2_CACHE_SIZE
, 524288, 2, 64 },
84 { 0x82, _SC_LEVEL2_CACHE_SIZE
, 262144, 8, 32 },
85 { 0x83, _SC_LEVEL2_CACHE_SIZE
, 524288, 8, 32 },
86 { 0x84, _SC_LEVEL2_CACHE_SIZE
, 1048576, 8, 32 },
87 { 0x85, _SC_LEVEL2_CACHE_SIZE
, 2097152, 8, 32 },
88 { 0x86, _SC_LEVEL2_CACHE_SIZE
, 524288, 4, 64 },
89 { 0x87, _SC_LEVEL2_CACHE_SIZE
, 1048576, 8, 64 },
90 { 0xd0, _SC_LEVEL3_CACHE_SIZE
, 524288, 4, 64 },
91 { 0xd1, _SC_LEVEL3_CACHE_SIZE
, 1048576, 4, 64 },
92 { 0xd2, _SC_LEVEL3_CACHE_SIZE
, 2097152, 4, 64 },
93 { 0xd6, _SC_LEVEL3_CACHE_SIZE
, 1048576, 8, 64 },
94 { 0xd7, _SC_LEVEL3_CACHE_SIZE
, 2097152, 8, 64 },
95 { 0xd8, _SC_LEVEL3_CACHE_SIZE
, 4194304, 8, 64 },
96 { 0xdc, _SC_LEVEL3_CACHE_SIZE
, 2097152, 12, 64 },
97 { 0xdd, _SC_LEVEL3_CACHE_SIZE
, 4194304, 12, 64 },
98 { 0xde, _SC_LEVEL3_CACHE_SIZE
, 8388608, 12, 64 },
99 { 0xe3, _SC_LEVEL3_CACHE_SIZE
, 2097152, 16, 64 },
100 { 0xe3, _SC_LEVEL3_CACHE_SIZE
, 4194304, 16, 64 },
101 { 0xe4, _SC_LEVEL3_CACHE_SIZE
, 8388608, 16, 64 },
104 #define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
107 intel_02_known_compare (const void *p1
, const void *p2
)
109 const struct intel_02_cache_info
*i1
;
110 const struct intel_02_cache_info
*i2
;
112 i1
= (const struct intel_02_cache_info
*) p1
;
113 i2
= (const struct intel_02_cache_info
*) p2
;
115 if (i1
->idx
== i2
->idx
)
118 return i1
->idx
< i2
->idx
? -1 : 1;
123 __attribute__ ((noinline
))
124 intel_check_word (int name
, unsigned int value
, bool *has_level_2
,
125 bool *no_level_2_or_3
)
127 if ((value
& 0x80000000) != 0)
128 /* The register value is reserved. */
131 /* Fold the name. The _SC_ constants are always in the order SIZE,
133 int folded_name
= (_SC_LEVEL1_ICACHE_SIZE
134 + ((name
- _SC_LEVEL1_ICACHE_SIZE
) / 3) * 3);
138 unsigned int byte
= value
& 0xff;
142 *no_level_2_or_3
= true;
144 if (folded_name
== _SC_LEVEL3_CACHE_SIZE
)
145 /* No need to look further. */
150 if (byte
== 0x49 && folded_name
== _SC_LEVEL3_CACHE_SIZE
)
152 /* Intel reused this value. For family 15, model 6 it
153 specifies the 3rd level cache. Otherwise the 2nd
159 asm volatile ("cpuid"
160 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
163 unsigned int family
= ((eax
>> 20) & 0xff) + ((eax
>> 8) & 0xf);
164 unsigned int model
= ((((eax
>>16) & 0xf) << 4)
165 + ((eax
>> 4) & 0xf));
166 if (family
== 15 && model
== 6)
168 /* The level 3 cache is encoded for this model like
169 the level 2 cache is for other models. Pretend
170 the caller asked for the level 2 cache. */
171 name
= (_SC_LEVEL2_CACHE_SIZE
172 + (name
- _SC_LEVEL3_CACHE_SIZE
));
173 folded_name
= _SC_LEVEL3_CACHE_SIZE
;
177 struct intel_02_cache_info
*found
;
178 struct intel_02_cache_info search
;
181 found
= bsearch (&search
, intel_02_known
, nintel_02_known
,
182 sizeof (intel_02_known
[0]), intel_02_known_compare
);
185 if (found
->name
== folded_name
)
187 unsigned int offset
= name
- folded_name
;
195 assert (offset
== 2);
196 return found
->linesize
;
199 if (found
->name
== _SC_LEVEL2_CACHE_SIZE
)
204 /* Next byte for the next round. */
213 static long int __attribute__ ((noinline
))
214 handle_intel (int name
, unsigned int maxidx
)
216 assert (maxidx
>= 2);
218 /* OK, we can use the CPUID instruction to get all info about the
220 unsigned int cnt
= 0;
221 unsigned int max
= 1;
223 bool no_level_2_or_3
= false;
224 bool has_level_2
= false;
232 asm volatile ("cpuid"
233 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
236 /* The low byte of EAX in the first round contain the number of
237 rounds we have to make. At least one, the one we are already
245 /* Process the individual registers' value. */
246 result
= intel_check_word (name
, eax
, &has_level_2
, &no_level_2_or_3
);
250 result
= intel_check_word (name
, ebx
, &has_level_2
, &no_level_2_or_3
);
254 result
= intel_check_word (name
, ecx
, &has_level_2
, &no_level_2_or_3
);
258 result
= intel_check_word (name
, edx
, &has_level_2
, &no_level_2_or_3
);
263 if (name
>= _SC_LEVEL2_CACHE_SIZE
&& name
<= _SC_LEVEL3_CACHE_LINESIZE
271 static long int __attribute__ ((noinline
))
272 handle_amd (int name
)
278 asm volatile ("cpuid"
279 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
282 /* No level 4 cache (yet). */
283 if (name
> _SC_LEVEL3_CACHE_LINESIZE
)
286 unsigned int fn
= 0x80000005 + (name
>= _SC_LEVEL2_CACHE_SIZE
);
290 asm volatile ("cpuid"
291 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
294 if (name
< _SC_LEVEL1_DCACHE_SIZE
)
296 name
+= _SC_LEVEL1_DCACHE_SIZE
- _SC_LEVEL1_ICACHE_SIZE
;
302 case _SC_LEVEL1_DCACHE_SIZE
:
303 return (ecx
>> 14) & 0x3fc00;
305 case _SC_LEVEL1_DCACHE_ASSOC
:
307 if ((ecx
& 0xff) == 0xff)
308 /* Fully associative. */
309 return (ecx
<< 2) & 0x3fc00;
312 case _SC_LEVEL1_DCACHE_LINESIZE
:
315 case _SC_LEVEL2_CACHE_SIZE
:
316 return (ecx
& 0xf000) == 0 ? 0 : (ecx
>> 6) & 0x3fffc00;
318 case _SC_LEVEL2_CACHE_ASSOC
:
319 switch ((ecx
>> 12) & 0xf)
325 return (ecx
>> 12) & 0xf;
341 return ((ecx
>> 6) & 0x3fffc00) / (ecx
& 0xff);
347 case _SC_LEVEL2_CACHE_LINESIZE
:
348 return (ecx
& 0xf000) == 0 ? 0 : ecx
& 0xff;
350 case _SC_LEVEL3_CACHE_SIZE
:
351 return (edx
& 0xf000) == 0 ? 0 : (edx
& 0x3ffc0000) << 1;
353 case _SC_LEVEL3_CACHE_ASSOC
:
354 switch ((edx
>> 12) & 0xf)
360 return (edx
>> 12) & 0xf;
376 return ((edx
& 0x3ffc0000) << 1) / (edx
& 0xff);
382 case _SC_LEVEL3_CACHE_LINESIZE
:
383 return (edx
& 0xf000) == 0 ? 0 : edx
& 0xff;
386 assert (! "cannot happen");
392 /* Get the value of the system variable NAME. */
395 __cache_sysconf (int name
)
397 /* Find out what brand of processor. */
402 asm volatile ("cpuid"
403 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
406 /* This spells out "GenuineIntel". */
407 if (ebx
== 0x756e6547 && ecx
== 0x6c65746e && edx
== 0x49656e69)
408 return handle_intel (name
, eax
);
410 /* This spells out "AuthenticAMD". */
411 if (ebx
== 0x68747541 && ecx
== 0x444d4163 && edx
== 0x69746e65)
412 return handle_amd (name
);
414 // XXX Fill in more vendors.
416 /* CPU not known, we have no information. */
421 /* Half the data cache size for use in memory and string routines, typically
423 long int __x86_64_data_cache_size_half attribute_hidden
= 32 * 1024 / 2;
424 /* Shared cache size for use in memory and string routines, typically
426 long int __x86_64_shared_cache_size_half attribute_hidden
= 1024 * 1024 / 2;
427 long int __x86_64_shared_cache_size attribute_hidden
= 1024 * 1024;
428 /* PREFETCHW support flag for use in memory and string routines. */
429 int __x86_64_prefetchw attribute_hidden
;
431 /* Instructions preferred for memory and string routines.
433 0: Regular instructions
436 3: SSSE3 instructions
439 int __x86_64_preferred_memory_instruction attribute_hidden
;
443 __attribute__((constructor
))
444 init_cacheinfo (void)
446 /* Find out what brand of processor. */
453 long int shared
= -1;
455 unsigned int threads
= 0;
458 if (__cpu_features
.kind
== arch_kind_unknown
)
459 __init_cpu_features ();
460 # define is_intel __cpu_features.kind == arch_kind_intel
461 # define is_amd __cpu_features.kind == arch_kind_amd
462 # define max_cpuid __cpu_features.max_cpuid
465 asm volatile ("cpuid"
466 : "=a" (max_cpuid
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
468 /* This spells out "GenuineIntel". */
470 ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69
471 /* This spells out "AuthenticAMD". */
473 ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65
478 data
= handle_intel (_SC_LEVEL1_DCACHE_SIZE
, max_cpuid
);
482 shared
= handle_intel (_SC_LEVEL3_CACHE_SIZE
, max_cpuid
);
486 /* Try L2 otherwise. */
488 shared
= handle_intel (_SC_LEVEL2_CACHE_SIZE
, max_cpuid
);
492 eax
= __cpu_features
.cpuid
[INTEL_CPUID_INDEX_1
].eax
;
493 ebx
= __cpu_features
.cpuid
[INTEL_CPUID_INDEX_1
].ebx
;
494 ecx
= __cpu_features
.cpuid
[INTEL_CPUID_INDEX_1
].ecx
;
495 edx
= __cpu_features
.cpuid
[INTEL_CPUID_INDEX_1
].edx
;
497 asm volatile ("cpuid"
498 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
502 /* Intel prefers SSSE3 instructions for memory/string routines
503 if they are avaiable. */
505 __x86_64_preferred_memory_instruction
= 3;
507 __x86_64_preferred_memory_instruction
= 2;
509 /* Figure out the number of logical threads that share the
510 highest cache level. */
515 /* Query until desired cache level is enumerated. */
518 asm volatile ("cpuid"
519 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
520 : "0" (4), "2" (i
++));
522 /* There seems to be a bug in at least some Pentium Ds
523 which sometimes fail to iterate all cache parameters.
524 Do not loop indefinitely here, stop in this case and
525 assume there is no such information. */
526 if ((eax
& 0x1f) == 0)
527 goto intel_bug_no_cache_info
;
529 while (((eax
>> 5) & 0x7) != level
);
531 threads
= ((eax
>> 14) & 0x3ff) + 1;
535 intel_bug_no_cache_info
:
536 /* Assume that all logical threads share the highest cache level. */
538 threads
= (ebx
>> 16) & 0xff;
541 /* Cap usage of highest cache level to the number of supported
543 if (shared
> 0 && threads
> 0)
546 /* This spells out "AuthenticAMD". */
549 data
= handle_amd (_SC_LEVEL1_DCACHE_SIZE
);
550 long int core
= handle_amd (_SC_LEVEL2_CACHE_SIZE
);
551 shared
= handle_amd (_SC_LEVEL3_CACHE_SIZE
);
553 /* Get maximum extended function. */
554 asm volatile ("cpuid"
555 : "=a" (max_cpuid_ex
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
559 /* No shared L3 cache. All we have is the L2 cache. */
563 /* Figure out the number of logical threads that share L3. */
564 if (max_cpuid_ex
>= 0x80000008)
566 /* Get width of APIC ID. */
567 asm volatile ("cpuid"
568 : "=a" (max_cpuid_ex
), "=b" (ebx
), "=c" (ecx
),
571 threads
= 1 << ((ecx
>> 12) & 0x0f);
576 /* If APIC ID width is not available, use logical
578 asm volatile ("cpuid"
579 : "=a" (max_cpuid_ex
), "=b" (ebx
), "=c" (ecx
),
583 if ((edx
& (1 << 28)) != 0)
584 threads
= (ebx
>> 16) & 0xff;
587 /* Cap usage of highest cache level to the number of
588 supported threads. */
592 /* Account for exclusive L2 and L3 caches. */
596 if (max_cpuid_ex
>= 0x80000001)
598 asm volatile ("cpuid"
599 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
601 /* PREFETCHW || 3DNow! */
602 if ((ecx
& 0x100) || (edx
& 0x80000000))
603 __x86_64_prefetchw
= -1;
608 __x86_64_data_cache_size_half
= data
/ 2;
612 __x86_64_shared_cache_size_half
= shared
/ 2;
613 __x86_64_shared_cache_size
= shared
;