2 Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 # include "multiarch/init-arch.h"
30 static const struct intel_02_cache_info
39 { 0x06, _SC_LEVEL1_ICACHE_SIZE
, 8192, 4, 32 },
40 { 0x08, _SC_LEVEL1_ICACHE_SIZE
, 16384, 4, 32 },
41 { 0x09, _SC_LEVEL1_ICACHE_SIZE
, 32768, 4, 32 },
42 { 0x0a, _SC_LEVEL1_DCACHE_SIZE
, 8192, 2, 32 },
43 { 0x0c, _SC_LEVEL1_DCACHE_SIZE
, 16384, 4, 32 },
44 { 0x0d, _SC_LEVEL1_DCACHE_SIZE
, 16384, 4, 64 },
45 { 0x22, _SC_LEVEL3_CACHE_SIZE
, 524288, 4, 64 },
46 { 0x23, _SC_LEVEL3_CACHE_SIZE
, 1048576, 8, 64 },
47 { 0x25, _SC_LEVEL3_CACHE_SIZE
, 2097152, 8, 64 },
48 { 0x29, _SC_LEVEL3_CACHE_SIZE
, 4194304, 8, 64 },
49 { 0x2c, _SC_LEVEL1_DCACHE_SIZE
, 32768, 8, 64 },
50 { 0x30, _SC_LEVEL1_ICACHE_SIZE
, 32768, 8, 64 },
51 { 0x39, _SC_LEVEL2_CACHE_SIZE
, 131072, 4, 64 },
52 { 0x3a, _SC_LEVEL2_CACHE_SIZE
, 196608, 6, 64 },
53 { 0x3b, _SC_LEVEL2_CACHE_SIZE
, 131072, 2, 64 },
54 { 0x3c, _SC_LEVEL2_CACHE_SIZE
, 262144, 4, 64 },
55 { 0x3d, _SC_LEVEL2_CACHE_SIZE
, 393216, 6, 64 },
56 { 0x3e, _SC_LEVEL2_CACHE_SIZE
, 524288, 4, 64 },
57 { 0x3f, _SC_LEVEL2_CACHE_SIZE
, 262144, 2, 64 },
58 { 0x41, _SC_LEVEL2_CACHE_SIZE
, 131072, 4, 32 },
59 { 0x42, _SC_LEVEL2_CACHE_SIZE
, 262144, 4, 32 },
60 { 0x43, _SC_LEVEL2_CACHE_SIZE
, 524288, 4, 32 },
61 { 0x44, _SC_LEVEL2_CACHE_SIZE
, 1048576, 4, 32 },
62 { 0x45, _SC_LEVEL2_CACHE_SIZE
, 2097152, 4, 32 },
63 { 0x46, _SC_LEVEL3_CACHE_SIZE
, 4194304, 4, 64 },
64 { 0x47, _SC_LEVEL3_CACHE_SIZE
, 8388608, 8, 64 },
65 { 0x48, _SC_LEVEL2_CACHE_SIZE
, 3145728, 12, 64 },
66 { 0x49, _SC_LEVEL2_CACHE_SIZE
, 4194304, 16, 64 },
67 { 0x4a, _SC_LEVEL3_CACHE_SIZE
, 6291456, 12, 64 },
68 { 0x4b, _SC_LEVEL3_CACHE_SIZE
, 8388608, 16, 64 },
69 { 0x4c, _SC_LEVEL3_CACHE_SIZE
, 12582912, 12, 64 },
70 { 0x4d, _SC_LEVEL3_CACHE_SIZE
, 16777216, 16, 64 },
71 { 0x4e, _SC_LEVEL2_CACHE_SIZE
, 6291456, 24, 64 },
72 { 0x60, _SC_LEVEL1_DCACHE_SIZE
, 16384, 8, 64 },
73 { 0x66, _SC_LEVEL1_DCACHE_SIZE
, 8192, 4, 64 },
74 { 0x67, _SC_LEVEL1_DCACHE_SIZE
, 16384, 4, 64 },
75 { 0x68, _SC_LEVEL1_DCACHE_SIZE
, 32768, 4, 64 },
76 { 0x78, _SC_LEVEL2_CACHE_SIZE
, 1048576, 8, 64 },
77 { 0x79, _SC_LEVEL2_CACHE_SIZE
, 131072, 8, 64 },
78 { 0x7a, _SC_LEVEL2_CACHE_SIZE
, 262144, 8, 64 },
79 { 0x7b, _SC_LEVEL2_CACHE_SIZE
, 524288, 8, 64 },
80 { 0x7c, _SC_LEVEL2_CACHE_SIZE
, 1048576, 8, 64 },
81 { 0x7d, _SC_LEVEL2_CACHE_SIZE
, 2097152, 8, 64 },
82 { 0x7f, _SC_LEVEL2_CACHE_SIZE
, 524288, 2, 64 },
83 { 0x82, _SC_LEVEL2_CACHE_SIZE
, 262144, 8, 32 },
84 { 0x83, _SC_LEVEL2_CACHE_SIZE
, 524288, 8, 32 },
85 { 0x84, _SC_LEVEL2_CACHE_SIZE
, 1048576, 8, 32 },
86 { 0x85, _SC_LEVEL2_CACHE_SIZE
, 2097152, 8, 32 },
87 { 0x86, _SC_LEVEL2_CACHE_SIZE
, 524288, 4, 64 },
88 { 0x87, _SC_LEVEL2_CACHE_SIZE
, 1048576, 8, 64 },
89 { 0xd0, _SC_LEVEL3_CACHE_SIZE
, 524288, 4, 64 },
90 { 0xd1, _SC_LEVEL3_CACHE_SIZE
, 1048576, 4, 64 },
91 { 0xd2, _SC_LEVEL3_CACHE_SIZE
, 2097152, 4, 64 },
92 { 0xd6, _SC_LEVEL3_CACHE_SIZE
, 1048576, 8, 64 },
93 { 0xd7, _SC_LEVEL3_CACHE_SIZE
, 2097152, 8, 64 },
94 { 0xd8, _SC_LEVEL3_CACHE_SIZE
, 4194304, 8, 64 },
95 { 0xdc, _SC_LEVEL3_CACHE_SIZE
, 2097152, 12, 64 },
96 { 0xdd, _SC_LEVEL3_CACHE_SIZE
, 4194304, 12, 64 },
97 { 0xde, _SC_LEVEL3_CACHE_SIZE
, 8388608, 12, 64 },
98 { 0xe3, _SC_LEVEL3_CACHE_SIZE
, 2097152, 16, 64 },
99 { 0xe3, _SC_LEVEL3_CACHE_SIZE
, 4194304, 16, 64 },
100 { 0xe4, _SC_LEVEL3_CACHE_SIZE
, 8388608, 16, 64 },
103 #define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
106 intel_02_known_compare (const void *p1
, const void *p2
)
108 const struct intel_02_cache_info
*i1
;
109 const struct intel_02_cache_info
*i2
;
111 i1
= (const struct intel_02_cache_info
*) p1
;
112 i2
= (const struct intel_02_cache_info
*) p2
;
114 if (i1
->idx
== i2
->idx
)
117 return i1
->idx
< i2
->idx
? -1 : 1;
122 __attribute__ ((noinline
))
123 intel_check_word (int name
, unsigned int value
, bool *has_level_2
,
124 bool *no_level_2_or_3
)
126 if ((value
& 0x80000000) != 0)
127 /* The register value is reserved. */
130 /* Fold the name. The _SC_ constants are always in the order SIZE,
132 int folded_name
= (_SC_LEVEL1_ICACHE_SIZE
133 + ((name
- _SC_LEVEL1_ICACHE_SIZE
) / 3) * 3);
137 unsigned int byte
= value
& 0xff;
141 *no_level_2_or_3
= true;
143 if (folded_name
== _SC_LEVEL3_CACHE_SIZE
)
144 /* No need to look further. */
149 if (byte
== 0x49 && folded_name
== _SC_LEVEL3_CACHE_SIZE
)
151 /* Intel reused this value. For family 15, model 6 it
152 specifies the 3rd level cache. Otherwise the 2nd
158 asm volatile ("cpuid"
159 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
162 unsigned int family
= ((eax
>> 20) & 0xff) + ((eax
>> 8) & 0xf);
163 unsigned int model
= ((((eax
>>16) & 0xf) << 4)
164 + ((eax
>> 4) & 0xf));
165 if (family
== 15 && model
== 6)
167 /* The level 3 cache is encoded for this model like
168 the level 2 cache is for other models. Pretend
169 the caller asked for the level 2 cache. */
170 name
= (_SC_LEVEL2_CACHE_SIZE
171 + (name
- _SC_LEVEL3_CACHE_SIZE
));
172 folded_name
= _SC_LEVEL3_CACHE_SIZE
;
176 struct intel_02_cache_info
*found
;
177 struct intel_02_cache_info search
;
180 found
= bsearch (&search
, intel_02_known
, nintel_02_known
,
181 sizeof (intel_02_known
[0]), intel_02_known_compare
);
184 if (found
->name
== folded_name
)
186 unsigned int offset
= name
- folded_name
;
194 assert (offset
== 2);
195 return found
->linesize
;
198 if (found
->name
== _SC_LEVEL2_CACHE_SIZE
)
203 /* Next byte for the next round. */
212 static long int __attribute__ ((noinline
))
213 handle_intel (int name
, unsigned int maxidx
)
215 assert (maxidx
>= 2);
217 /* OK, we can use the CPUID instruction to get all info about the
219 unsigned int cnt
= 0;
220 unsigned int max
= 1;
222 bool no_level_2_or_3
= false;
223 bool has_level_2
= false;
231 asm volatile ("cpuid"
232 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
235 /* The low byte of EAX in the first round contain the number of
236 rounds we have to make. At least one, the one we are already
244 /* Process the individual registers' value. */
245 result
= intel_check_word (name
, eax
, &has_level_2
, &no_level_2_or_3
);
249 result
= intel_check_word (name
, ebx
, &has_level_2
, &no_level_2_or_3
);
253 result
= intel_check_word (name
, ecx
, &has_level_2
, &no_level_2_or_3
);
257 result
= intel_check_word (name
, edx
, &has_level_2
, &no_level_2_or_3
);
262 if (name
>= _SC_LEVEL2_CACHE_SIZE
&& name
<= _SC_LEVEL3_CACHE_LINESIZE
270 static long int __attribute__ ((noinline
))
271 handle_amd (int name
)
277 asm volatile ("cpuid"
278 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
281 /* No level 4 cache (yet). */
282 if (name
> _SC_LEVEL3_CACHE_LINESIZE
)
285 unsigned int fn
= 0x80000005 + (name
>= _SC_LEVEL2_CACHE_SIZE
);
289 asm volatile ("cpuid"
290 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
293 if (name
< _SC_LEVEL1_DCACHE_SIZE
)
295 name
+= _SC_LEVEL1_DCACHE_SIZE
- _SC_LEVEL1_ICACHE_SIZE
;
301 case _SC_LEVEL1_DCACHE_SIZE
:
302 return (ecx
>> 14) & 0x3fc00;
304 case _SC_LEVEL1_DCACHE_ASSOC
:
306 if ((ecx
& 0xff) == 0xff)
307 /* Fully associative. */
308 return (ecx
<< 2) & 0x3fc00;
311 case _SC_LEVEL1_DCACHE_LINESIZE
:
314 case _SC_LEVEL2_CACHE_SIZE
:
315 return (ecx
& 0xf000) == 0 ? 0 : (ecx
>> 6) & 0x3fffc00;
317 case _SC_LEVEL2_CACHE_ASSOC
:
318 switch ((ecx
>> 12) & 0xf)
324 return (ecx
>> 12) & 0xf;
340 return ((ecx
>> 6) & 0x3fffc00) / (ecx
& 0xff);
346 case _SC_LEVEL2_CACHE_LINESIZE
:
347 return (ecx
& 0xf000) == 0 ? 0 : ecx
& 0xff;
349 case _SC_LEVEL3_CACHE_SIZE
:
350 return (edx
& 0xf000) == 0 ? 0 : (edx
& 0x3ffc0000) << 1;
352 case _SC_LEVEL3_CACHE_ASSOC
:
353 switch ((edx
>> 12) & 0xf)
359 return (edx
>> 12) & 0xf;
375 return ((edx
& 0x3ffc0000) << 1) / (edx
& 0xff);
381 case _SC_LEVEL3_CACHE_LINESIZE
:
382 return (edx
& 0xf000) == 0 ? 0 : edx
& 0xff;
385 assert (! "cannot happen");
391 /* Get the value of the system variable NAME. */
394 __cache_sysconf (int name
)
396 /* Find out what brand of processor. */
401 asm volatile ("cpuid"
402 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
405 /* This spells out "GenuineIntel". */
406 if (ebx
== 0x756e6547 && ecx
== 0x6c65746e && edx
== 0x49656e69)
407 return handle_intel (name
, eax
);
409 /* This spells out "AuthenticAMD". */
410 if (ebx
== 0x68747541 && ecx
== 0x444d4163 && edx
== 0x69746e65)
411 return handle_amd (name
);
413 // XXX Fill in more vendors.
415 /* CPU not known, we have no information. */
420 /* Half the data cache size for use in memory and string routines, typically
422 long int __x86_64_data_cache_size_half attribute_hidden
= 32 * 1024 / 2;
423 /* Shared cache size for use in memory and string routines, typically
425 long int __x86_64_shared_cache_size_half attribute_hidden
= 1024 * 1024 / 2;
426 long int __x86_64_shared_cache_size attribute_hidden
= 1024 * 1024;
427 /* PREFETCHW support flag for use in memory and string routines. */
428 int __x86_64_prefetchw attribute_hidden
;
430 /* Instructions preferred for memory and string routines.
432 0: Regular instructions
435 3: SSSE3 instructions
438 int __x86_64_preferred_memory_instruction attribute_hidden
;
442 __attribute__((constructor
))
443 init_cacheinfo (void)
445 /* Find out what brand of processor. */
452 long int shared
= -1;
454 unsigned int threads
= 0;
457 if (__cpu_features
.kind
== arch_kind_unknown
)
458 __init_cpu_features ();
459 # define is_intel __cpu_features.kind == arch_kind_intel
460 # define is_amd __cpu_features.kind == arch_kind_amd
461 # define max_cpuid __cpu_features.max_cpuid
464 asm volatile ("cpuid"
465 : "=a" (max_cpuid
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
467 /* This spells out "GenuineIntel". */
469 ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69
470 /* This spells out "AuthenticAMD". */
472 ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65
477 data
= handle_intel (_SC_LEVEL1_DCACHE_SIZE
, max_cpuid
);
481 shared
= handle_intel (_SC_LEVEL3_CACHE_SIZE
, max_cpuid
);
485 /* Try L2 otherwise. */
487 shared
= handle_intel (_SC_LEVEL2_CACHE_SIZE
, max_cpuid
);
491 eax
= __cpu_features
.cpuid
[INTEL_CPUID_INDEX_1
].eax
;
492 ebx
= __cpu_features
.cpuid
[INTEL_CPUID_INDEX_1
].ebx
;
493 ecx
= __cpu_features
.cpuid
[INTEL_CPUID_INDEX_1
].ecx
;
494 edx
= __cpu_features
.cpuid
[INTEL_CPUID_INDEX_1
].edx
;
496 asm volatile ("cpuid"
497 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
501 /* Intel prefers SSSE3 instructions for memory/string routines
502 if they are avaiable. */
504 __x86_64_preferred_memory_instruction
= 3;
506 __x86_64_preferred_memory_instruction
= 2;
508 /* Figure out the number of logical threads that share the
509 highest cache level. */
514 /* Query until desired cache level is enumerated. */
517 asm volatile ("cpuid"
518 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
519 : "0" (4), "2" (i
++));
521 /* There seems to be a bug in at least some Pentium Ds
522 which sometimes fail to iterate all cache parameters.
523 Do not loop indefinitely here, stop in this case and
524 assume there is no such information. */
525 if ((eax
& 0x1f) == 0)
526 goto intel_bug_no_cache_info
;
528 while (((eax
>> 5) & 0x7) != level
);
530 threads
= ((eax
>> 14) & 0x3ff) + 1;
534 intel_bug_no_cache_info
:
535 /* Assume that all logical threads share the highest cache level. */
537 threads
= (ebx
>> 16) & 0xff;
540 /* Cap usage of highest cache level to the number of supported
542 if (shared
> 0 && threads
> 0)
545 /* This spells out "AuthenticAMD". */
548 data
= handle_amd (_SC_LEVEL1_DCACHE_SIZE
);
549 long int core
= handle_amd (_SC_LEVEL2_CACHE_SIZE
);
550 shared
= handle_amd (_SC_LEVEL3_CACHE_SIZE
);
552 /* Get maximum extended function. */
553 asm volatile ("cpuid"
554 : "=a" (max_cpuid_ex
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
558 /* No shared L3 cache. All we have is the L2 cache. */
562 /* Figure out the number of logical threads that share L3. */
563 if (max_cpuid_ex
>= 0x80000008)
565 /* Get width of APIC ID. */
566 asm volatile ("cpuid"
567 : "=a" (max_cpuid_ex
), "=b" (ebx
), "=c" (ecx
),
570 threads
= 1 << ((ecx
>> 12) & 0x0f);
575 /* If APIC ID width is not available, use logical
577 asm volatile ("cpuid"
578 : "=a" (max_cpuid_ex
), "=b" (ebx
), "=c" (ecx
),
582 if ((edx
& (1 << 28)) != 0)
583 threads
= (ebx
>> 16) & 0xff;
586 /* Cap usage of highest cache level to the number of
587 supported threads. */
591 /* Account for exclusive L2 and L3 caches. */
595 if (max_cpuid_ex
>= 0x80000001)
597 asm volatile ("cpuid"
598 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
600 /* PREFETCHW || 3DNow! */
601 if ((ecx
& 0x100) || (edx
& 0x80000000))
602 __x86_64_prefetchw
= -1;
607 __x86_64_data_cache_size_half
= data
/ 2;
611 __x86_64_shared_cache_size_half
= shared
/ 2;
612 __x86_64_shared_cache_size
= shared
;