2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 #include "gromacs/legacyheaders/gmx_cpuid.h"
49 #ifdef GMX_NATIVE_WINDOWS
50 /* MSVC definition for __cpuid() */
54 /* sysinfo functions */
61 /* sysconf() definition */
66 /* For convenience, and to enable configure-time invocation, we keep all architectures
67 * in a single file, but to avoid repeated ifdefs we set the overall architecture here.
70 /* OK, it is x86, but can we execute cpuid? */
71 #if defined(GMX_X86_GCC_INLINE_ASM) || ( defined(_MSC_VER) && ( (_MSC_VER > 1500) || (_MSC_VER == 1500 & _MSC_FULL_VER >= 150030729)))
72 # define GMX_CPUID_X86
76 /* Global constant character strings corresponding to our enumerated types */
78 gmx_cpuid_vendor_string
[GMX_CPUID_NVENDORS
] =
85 "IBM", /* Used on Power and BlueGene/Q */
90 gmx_cpuid_vendor_string_alternative
[GMX_CPUID_NVENDORS
] =
97 "ibm", /* Used on Power and BlueGene/Q */
102 gmx_cpuid_feature_string
[GMX_CPUID_NFEATURES
] =
151 gmx_cpuid_simd_string
[GMX_CPUID_NSIMD
] =
171 /* Max length of brand string */
172 #define GMX_CPUID_STRLEN 256
175 /* Contents of the abstract datatype */
178 enum gmx_cpuid_vendor vendor
;
179 char brand
[GMX_CPUID_STRLEN
];
183 /* Not using gmx_bool here, since this file must be possible to compile without simple.h */
184 char feature
[GMX_CPUID_NFEATURES
];
186 /* Basic CPU topology information. For x86 this is a bit complicated since the topology differs between
187 * operating systems and sometimes even settings. For most other architectures you can likely just check
188 * the documentation and then write static information to these arrays rather than detecting on-the-fly.
190 int have_cpu_topology
;
191 int nproc
; /* total number of logical processors from OS */
193 int ncores_per_package
;
194 int nhwthreads_per_core
;
196 int * core_id
; /* Local core id in each package */
197 int * hwthread_id
; /* Local hwthread id in each core */
198 int * locality_order
; /* Processor indices sorted in locality order */
202 /* Simple routines to access the data structure. The initialization routine is
203 * further down since that needs to call other static routines in this file.
205 enum gmx_cpuid_vendor
206 gmx_cpuid_vendor (gmx_cpuid_t cpuid
)
208 return cpuid
->vendor
;
213 gmx_cpuid_brand (gmx_cpuid_t cpuid
)
219 gmx_cpuid_family (gmx_cpuid_t cpuid
)
221 return cpuid
->family
;
225 gmx_cpuid_model (gmx_cpuid_t cpuid
)
231 gmx_cpuid_stepping (gmx_cpuid_t cpuid
)
233 return cpuid
->stepping
;
237 gmx_cpuid_feature (gmx_cpuid_t cpuid
,
238 enum gmx_cpuid_feature feature
)
240 return (cpuid
->feature
[feature
] != 0);
245 gmx_cpuid_is_intel_nehalem (const gmx_cpuid_t cpuid
)
247 return (cpuid
->vendor
== GMX_CPUID_VENDOR_INTEL
&&
248 cpuid
->family
== 6 &&
249 (cpuid
->model
== 0x2E ||
250 cpuid
->model
== 0x1A ||
251 cpuid
->model
== 0x1E ||
252 cpuid
->model
== 0x2F ||
253 cpuid
->model
== 0x2C ||
254 cpuid
->model
== 0x25));
258 /* What type of SIMD was compiled in, if any? */
259 #ifdef GMX_SIMD_X86_AVX_512ER
260 static const enum gmx_cpuid_simd compiled_simd
= GMX_CPUID_SIMD_X86_AVX_512ER
;
261 #elif defined GMX_SIMD_X86_AVX_512F
262 static const enum gmx_cpuid_simd compiled_simd
= GMX_CPUID_SIMD_X86_AVX_512F
;
263 #elif defined GMX_SIMD_X86_AVX2_256
264 static const enum gmx_cpuid_simd compiled_simd
= GMX_CPUID_SIMD_X86_AVX2_256
;
265 #elif defined GMX_SIMD_X86_AVX_256
266 static const enum gmx_cpuid_simd compiled_simd
= GMX_CPUID_SIMD_X86_AVX_256
;
267 #elif defined GMX_SIMD_X86_AVX_128_FMA
268 static const enum gmx_cpuid_simd compiled_simd
= GMX_CPUID_SIMD_X86_AVX_128_FMA
;
269 #elif defined GMX_SIMD_X86_SSE4_1
270 static const enum gmx_cpuid_simd compiled_simd
= GMX_CPUID_SIMD_X86_SSE4_1
;
271 #elif defined GMX_SIMD_X86_SSE2
272 static const enum gmx_cpuid_simd compiled_simd
= GMX_CPUID_SIMD_X86_SSE2
;
273 #elif defined GMX_SIMD_ARM_NEON
274 static const enum gmx_cpuid_simd compiled_simd
= GMX_CPUID_SIMD_ARM_NEON
;
275 #elif defined GMX_SIMD_ARM_NEON_ASIMD
276 static const enum gmx_cpuid_simd compiled_simd
= GMX_CPUID_SIMD_ARM_NEON_ASIMD
;
277 #elif defined GMX_SIMD_SPARC64_HPC_ACE
278 static const enum gmx_cpuid_simd compiled_simd
= GMX_CPUID_SIMD_SPARC64_HPC_ACE
;
279 #elif defined GMX_SIMD_IBM_QPX
280 static const enum gmx_cpuid_simd compiled_simd
= GMX_CPUID_SIMD_IBM_QPX
;
281 #elif defined GMX_SIMD_IBM_VMX
282 static const enum gmx_cpuid_simd compiled_simd
= GMX_CPUID_SIMD_IBM_VMX
;
283 #elif defined GMX_SIMD_IBM_VSX
284 static const enum gmx_cpuid_simd compiled_simd
= GMX_CPUID_SIMD_IBM_VSX
;
285 #elif defined GMX_SIMD_REFERENCE
286 static const enum gmx_cpuid_simd compiled_simd
= GMX_CPUID_SIMD_REFERENCE
;
288 static const enum gmx_cpuid_simd compiled_simd
= GMX_CPUID_SIMD_NONE
;
294 /* Execute CPUID on x86 class CPUs. level sets function to exec, and the
295 * contents of register output is returned. See Intel/AMD docs for details.
297 * This version supports extended information where we can also have an input
298 * value in the ecx register. This is ignored for most levels, but some of them
299 * (e.g. level 0xB on Intel) use it.
302 execute_x86cpuid(unsigned int level
,
311 /* Currently CPUID is only supported (1) if we can use an instruction on MSVC, or (2)
312 * if the compiler handles GNU-style inline assembly.
315 #if (defined _MSC_VER)
318 #if (_MSC_VER > 1500) || (_MSC_VER == 1500 & _MSC_FULL_VER >= 150030729)
319 /* MSVC 9.0 SP1 or later */
320 __cpuidex(CPUInfo
, level
, ecxval
);
323 __cpuid(CPUInfo
, level
);
324 /* Set an error code if the user wanted a non-zero ecxval, since we did not have cpuidex */
325 rc
= (ecxval
> 0) ? -1 : 0;
332 #elif (defined GMX_X86_GCC_INLINE_ASM)
333 /* for now this means GMX_X86_GCC_INLINE_ASM should be defined,
334 * but there might be more options added in the future.
340 #if defined(__i386__) && defined(__PIC__)
341 /* Avoid clobbering the global offset table in 32-bit pic code (ebx register) */
342 __asm__
__volatile__ ("xchgl %%ebx, %1 \n\t"
344 "xchgl %%ebx, %1 \n\t"
345 : "+a" (*eax
), "+r" (*ebx
), "+c" (*ecx
), "+d" (*edx
));
347 /* i386 without PIC, or x86-64. Things are easy and we can clobber any reg we want :-) */
348 __asm__
__volatile__ ("cpuid \n\t"
349 : "+a" (*eax
), "+b" (*ebx
), "+c" (*ecx
), "+d" (*edx
));
354 * Apparently this is an x86 platform where we don't know how to call cpuid.
356 * This is REALLY bad, since we will lose all Gromacs SIMD support.
369 /* Identify CPU features common to Intel & AMD - mainly brand string,
370 * version and some features. Vendor has already been detected outside this.
373 cpuid_check_common_x86(gmx_cpuid_t cpuid
)
375 int fn
, max_stdfn
, max_extfn
;
376 unsigned int eax
, ebx
, ecx
, edx
;
377 char str
[GMX_CPUID_STRLEN
];
380 /* Find largest standard/extended function input value */
381 execute_x86cpuid(0x0, 0, &eax
, &ebx
, &ecx
, &edx
);
383 execute_x86cpuid(0x80000000, 0, &eax
, &ebx
, &ecx
, &edx
);
387 if (max_extfn
>= 0x80000005)
389 /* Get CPU brand string */
390 for (fn
= 0x80000002; fn
< 0x80000005; fn
++)
392 execute_x86cpuid(fn
, 0, &eax
, &ebx
, &ecx
, &edx
);
394 memcpy(p
+4, &ebx
, 4);
395 memcpy(p
+8, &ecx
, 4);
396 memcpy(p
+12, &edx
, 4);
401 /* Remove empty initial space */
403 while (isspace(*(p
)))
407 strncpy(cpuid
->brand
, p
, GMX_CPUID_STRLEN
);
411 strncpy(cpuid
->brand
, "Unknown CPU brand", GMX_CPUID_STRLEN
);
414 /* Find basic CPU properties */
417 execute_x86cpuid(0x1, 0, &eax
, &ebx
, &ecx
, &edx
);
419 cpuid
->family
= ((eax
& 0x0FF00000) >> 20) + ((eax
& 0x00000F00) >> 8);
420 /* Note that extended model should be shifted left 4, so only shift right 12 iso 16. */
421 cpuid
->model
= ((eax
& 0x000F0000) >> 12) + ((eax
& 0x000000F0) >> 4);
422 cpuid
->stepping
= (eax
& 0x0000000F);
424 /* Feature flags common to AMD and intel */
425 cpuid
->feature
[GMX_CPUID_FEATURE_X86_SSE3
] = (ecx
& (1 << 0)) != 0;
426 cpuid
->feature
[GMX_CPUID_FEATURE_X86_PCLMULDQ
] = (ecx
& (1 << 1)) != 0;
427 cpuid
->feature
[GMX_CPUID_FEATURE_X86_SSSE3
] = (ecx
& (1 << 9)) != 0;
428 cpuid
->feature
[GMX_CPUID_FEATURE_X86_FMA
] = (ecx
& (1 << 12)) != 0;
429 cpuid
->feature
[GMX_CPUID_FEATURE_X86_CX16
] = (ecx
& (1 << 13)) != 0;
430 cpuid
->feature
[GMX_CPUID_FEATURE_X86_SSE4_1
] = (ecx
& (1 << 19)) != 0;
431 cpuid
->feature
[GMX_CPUID_FEATURE_X86_SSE4_2
] = (ecx
& (1 << 20)) != 0;
432 cpuid
->feature
[GMX_CPUID_FEATURE_X86_POPCNT
] = (ecx
& (1 << 23)) != 0;
433 cpuid
->feature
[GMX_CPUID_FEATURE_X86_AES
] = (ecx
& (1 << 25)) != 0;
434 cpuid
->feature
[GMX_CPUID_FEATURE_X86_AVX
] = (ecx
& (1 << 28)) != 0;
435 cpuid
->feature
[GMX_CPUID_FEATURE_X86_F16C
] = (ecx
& (1 << 29)) != 0;
436 cpuid
->feature
[GMX_CPUID_FEATURE_X86_RDRND
] = (ecx
& (1 << 30)) != 0;
438 cpuid
->feature
[GMX_CPUID_FEATURE_X86_PSE
] = (edx
& (1 << 3)) != 0;
439 cpuid
->feature
[GMX_CPUID_FEATURE_X86_MSR
] = (edx
& (1 << 5)) != 0;
440 cpuid
->feature
[GMX_CPUID_FEATURE_X86_CX8
] = (edx
& (1 << 8)) != 0;
441 cpuid
->feature
[GMX_CPUID_FEATURE_X86_APIC
] = (edx
& (1 << 9)) != 0;
442 cpuid
->feature
[GMX_CPUID_FEATURE_X86_CMOV
] = (edx
& (1 << 15)) != 0;
443 cpuid
->feature
[GMX_CPUID_FEATURE_X86_CLFSH
] = (edx
& (1 << 19)) != 0;
444 cpuid
->feature
[GMX_CPUID_FEATURE_X86_MMX
] = (edx
& (1 << 23)) != 0;
445 cpuid
->feature
[GMX_CPUID_FEATURE_X86_SSE2
] = (edx
& (1 << 26)) != 0;
446 cpuid
->feature
[GMX_CPUID_FEATURE_X86_HTT
] = (edx
& (1 << 28)) != 0;
452 cpuid
->stepping
= -1;
455 if (max_extfn
>= 0x80000001)
457 execute_x86cpuid(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
458 cpuid
->feature
[GMX_CPUID_FEATURE_X86_LAHF_LM
] = (ecx
& (1 << 0)) != 0;
459 cpuid
->feature
[GMX_CPUID_FEATURE_X86_PDPE1GB
] = (edx
& (1 << 26)) != 0;
460 cpuid
->feature
[GMX_CPUID_FEATURE_X86_RDTSCP
] = (edx
& (1 << 27)) != 0;
463 if (max_extfn
>= 0x80000007)
465 execute_x86cpuid(0x80000007, 0, &eax
, &ebx
, &ecx
, &edx
);
466 cpuid
->feature
[GMX_CPUID_FEATURE_X86_NONSTOP_TSC
] = (edx
& (1 << 8)) != 0;
471 /* This routine returns the number of unique different elements found in the array,
472 * and renumbers these starting from 0. For example, the array {0,1,2,8,9,10,8,9,10,0,1,2}
473 * will be rewritten to {0,1,2,3,4,5,3,4,5,0,1,2}, and it returns 6 for the
474 * number of unique elements.
477 cpuid_renumber_elements(int *data
, int n
)
480 int i
, j
, nunique
, found
;
482 unique
= malloc(sizeof(int)*n
);
485 for (i
= 0; i
< n
; i
++)
487 for (j
= 0, found
= 0; j
< nunique
&& !found
; j
++)
489 found
= (data
[i
] == unique
[j
]);
493 /* Insert in sorted order! */
494 for (j
= nunique
++; j
> 0 && unique
[j
-1] > data
[i
]; j
--)
496 unique
[j
] = unique
[j
-1];
502 for (i
= 0; i
< n
; i
++)
504 for (j
= 0; j
< nunique
; j
++)
506 if (data
[i
] == unique
[j
])
516 /* APIC IDs, or everything you wanted to know about your x86 cores but were afraid to ask...
518 * Raw APIC IDs are unfortunately somewhat dirty. For technical reasons they are assigned
519 * in power-of-2 chunks, and even then there are no guarantees about specific numbers - all
520 * we know is that the part for each thread/core/package is unique, and how many bits are
521 * reserved for that part.
522 * This routine does internal renumbering so we get continuous indices, and also
523 * decodes the actual number of packages,cores-per-package and hwthreads-per-core.
524 * Returns: 0 on success, non-zero on failure.
527 cpuid_x86_decode_apic_id(gmx_cpuid_t cpuid
, int *apic_id
, int core_bits
, int hwthread_bits
)
530 int hwthread_mask
, core_mask_after_shift
;
532 cpuid
->hwthread_id
= malloc(sizeof(int)*cpuid
->nproc
);
533 cpuid
->core_id
= malloc(sizeof(int)*cpuid
->nproc
);
534 cpuid
->package_id
= malloc(sizeof(int)*cpuid
->nproc
);
535 cpuid
->locality_order
= malloc(sizeof(int)*cpuid
->nproc
);
537 hwthread_mask
= (1 << hwthread_bits
) - 1;
538 core_mask_after_shift
= (1 << core_bits
) - 1;
540 for (i
= 0; i
< cpuid
->nproc
; i
++)
542 cpuid
->hwthread_id
[i
] = apic_id
[i
] & hwthread_mask
;
543 cpuid
->core_id
[i
] = (apic_id
[i
] >> hwthread_bits
) & core_mask_after_shift
;
544 cpuid
->package_id
[i
] = apic_id
[i
] >> (core_bits
+ hwthread_bits
);
547 cpuid
->npackages
= cpuid_renumber_elements(cpuid
->package_id
, cpuid
->nproc
);
548 cpuid
->ncores_per_package
= cpuid_renumber_elements(cpuid
->core_id
, cpuid
->nproc
);
549 cpuid
->nhwthreads_per_core
= cpuid_renumber_elements(cpuid
->hwthread_id
, cpuid
->nproc
);
551 /* now check for consistency */
552 if ( (cpuid
->npackages
* cpuid
->ncores_per_package
*
553 cpuid
->nhwthreads_per_core
) != cpuid
->nproc
)
555 /* the packages/cores-per-package/hwthreads-per-core counts are
560 /* Create a locality order array, i.e. first all resources in package0, which in turn
561 * are sorted so we first have all resources in core0, where threads are sorted in order, etc.
564 for (i
= 0; i
< cpuid
->nproc
; i
++)
566 idx
= (cpuid
->package_id
[i
]*cpuid
->ncores_per_package
+ cpuid
->core_id
[i
])*cpuid
->nhwthreads_per_core
+ cpuid
->hwthread_id
[i
];
567 cpuid
->locality_order
[idx
] = i
;
573 /* Detection of AMD-specific CPU features */
575 cpuid_check_amd_x86(gmx_cpuid_t cpuid
)
577 int max_stdfn
, max_extfn
, ret
;
578 unsigned int eax
, ebx
, ecx
, edx
;
579 int hwthread_bits
, core_bits
;
582 cpuid_check_common_x86(cpuid
);
584 execute_x86cpuid(0x0, 0, &eax
, &ebx
, &ecx
, &edx
);
587 execute_x86cpuid(0x80000000, 0, &eax
, &ebx
, &ecx
, &edx
);
590 if (max_extfn
>= 0x80000001)
592 execute_x86cpuid(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
594 cpuid
->feature
[GMX_CPUID_FEATURE_X86_SSE4A
] = (ecx
& (1 << 6)) != 0;
595 cpuid
->feature
[GMX_CPUID_FEATURE_X86_MISALIGNSSE
] = (ecx
& (1 << 7)) != 0;
596 cpuid
->feature
[GMX_CPUID_FEATURE_X86_XOP
] = (ecx
& (1 << 11)) != 0;
597 cpuid
->feature
[GMX_CPUID_FEATURE_X86_FMA4
] = (ecx
& (1 << 16)) != 0;
600 /* Query APIC information on AMD */
601 if (max_extfn
>= 0x80000008)
603 #if (defined HAVE_SCHED_AFFINITY && defined HAVE_SYSCONF && defined __linux__)
606 cpu_set_t cpuset
, save_cpuset
;
607 cpuid
->nproc
= sysconf(_SC_NPROCESSORS_ONLN
);
608 apic_id
= malloc(sizeof(int)*cpuid
->nproc
);
609 sched_getaffinity(0, sizeof(cpu_set_t
), &save_cpuset
);
610 /* Get APIC id from each core */
612 for (i
= 0; i
< cpuid
->nproc
; i
++)
615 sched_setaffinity(0, sizeof(cpu_set_t
), &cpuset
);
616 execute_x86cpuid(0x1, 0, &eax
, &ebx
, &ecx
, &edx
);
617 apic_id
[i
] = ebx
>> 24;
620 /* Reset affinity to the value it had when calling this routine */
621 sched_setaffinity(0, sizeof(cpu_set_t
), &save_cpuset
);
622 #define CPUID_HAVE_APIC
623 #elif defined GMX_NATIVE_WINDOWS
627 unsigned int save_affinity
, affinity
;
628 GetSystemInfo( &sysinfo
);
629 cpuid
->nproc
= sysinfo
.dwNumberOfProcessors
;
630 apic_id
= malloc(sizeof(int)*cpuid
->nproc
);
631 /* Get previous affinity mask */
632 save_affinity
= SetThreadAffinityMask(GetCurrentThread(), 1);
633 for (i
= 0; i
< cpuid
->nproc
; i
++)
635 SetThreadAffinityMask(GetCurrentThread(), (((DWORD_PTR
)1)<<i
));
637 execute_x86cpuid(0x1, 0, &eax
, &ebx
, &ecx
, &edx
);
638 apic_id
[i
] = ebx
>> 24;
640 SetThreadAffinityMask(GetCurrentThread(), save_affinity
);
641 #define CPUID_HAVE_APIC
643 #ifdef CPUID_HAVE_APIC
644 /* AMD does not support SMT yet - there are no hwthread bits in apic ID */
646 /* Get number of core bits in apic ID - try modern extended method first */
647 execute_x86cpuid(0x80000008, 0, &eax
, &ebx
, &ecx
, &edx
);
648 core_bits
= (ecx
>> 12) & 0xf;
651 /* Legacy method for old single/dual core AMD CPUs */
653 for (core_bits
= 0; (i
>>core_bits
) > 0; core_bits
++)
658 ret
= cpuid_x86_decode_apic_id(cpuid
, apic_id
, core_bits
,
660 cpuid
->have_cpu_topology
= (ret
== 0);
666 /* Detection of Intel-specific CPU features */
668 cpuid_check_intel_x86(gmx_cpuid_t cpuid
)
670 unsigned int max_stdfn
, max_extfn
, ret
;
671 unsigned int eax
, ebx
, ecx
, edx
;
672 unsigned int max_logical_cores
, max_physical_cores
;
673 int hwthread_bits
, core_bits
;
676 cpuid_check_common_x86(cpuid
);
678 execute_x86cpuid(0x0, 0, &eax
, &ebx
, &ecx
, &edx
);
681 execute_x86cpuid(0x80000000, 0, &eax
, &ebx
, &ecx
, &edx
);
686 execute_x86cpuid(0x1, 0, &eax
, &ebx
, &ecx
, &edx
);
687 cpuid
->feature
[GMX_CPUID_FEATURE_X86_PDCM
] = (ecx
& (1 << 15)) != 0;
688 cpuid
->feature
[GMX_CPUID_FEATURE_X86_PCID
] = (ecx
& (1 << 17)) != 0;
689 cpuid
->feature
[GMX_CPUID_FEATURE_X86_X2APIC
] = (ecx
& (1 << 21)) != 0;
690 cpuid
->feature
[GMX_CPUID_FEATURE_X86_TDT
] = (ecx
& (1 << 24)) != 0;
695 execute_x86cpuid(0x7, 0, &eax
, &ebx
, &ecx
, &edx
);
696 cpuid
->feature
[GMX_CPUID_FEATURE_X86_AVX2
] = (ebx
& (1 << 5)) != 0;
697 cpuid
->feature
[GMX_CPUID_FEATURE_X86_AVX_512F
] = (ebx
& (1 << 16)) != 0;
698 cpuid
->feature
[GMX_CPUID_FEATURE_X86_AVX_512PF
] = (ebx
& (1 << 26)) != 0;
699 cpuid
->feature
[GMX_CPUID_FEATURE_X86_AVX_512ER
] = (ebx
& (1 << 27)) != 0;
700 cpuid
->feature
[GMX_CPUID_FEATURE_X86_AVX_512CD
] = (ebx
& (1 << 28)) != 0;
703 /* Check whether Hyper-Threading is enabled, not only supported */
704 if (cpuid
->feature
[GMX_CPUID_FEATURE_X86_HTT
] && max_stdfn
>= 4)
706 execute_x86cpuid(0x1, 0, &eax
, &ebx
, &ecx
, &edx
);
707 max_logical_cores
= (ebx
>> 16) & 0x0FF;
708 execute_x86cpuid(0x4, 0, &eax
, &ebx
, &ecx
, &edx
);
709 max_physical_cores
= ((eax
>> 26) & 0x3F) + 1;
711 /* Clear HTT flag if we only have 1 logical core per physical */
712 if (max_logical_cores
/max_physical_cores
< 2)
714 cpuid
->feature
[GMX_CPUID_FEATURE_X86_HTT
] = 0;
718 if (max_stdfn
>= 0xB)
720 /* Query x2 APIC information from cores */
721 #if (defined HAVE_SCHED_AFFINITY && defined HAVE_SYSCONF && defined __linux__)
724 cpu_set_t cpuset
, save_cpuset
;
725 cpuid
->nproc
= sysconf(_SC_NPROCESSORS_ONLN
);
726 apic_id
= malloc(sizeof(int)*cpuid
->nproc
);
727 sched_getaffinity(0, sizeof(cpu_set_t
), &save_cpuset
);
728 /* Get x2APIC ID from each hardware thread */
730 for (i
= 0; i
< cpuid
->nproc
; i
++)
733 sched_setaffinity(0, sizeof(cpu_set_t
), &cpuset
);
734 execute_x86cpuid(0xB, 0, &eax
, &ebx
, &ecx
, &edx
);
738 /* Reset affinity to the value it had when calling this routine */
739 sched_setaffinity(0, sizeof(cpu_set_t
), &save_cpuset
);
740 #define CPUID_HAVE_APIC
741 #elif defined GMX_NATIVE_WINDOWS
745 unsigned int save_affinity
, affinity
;
746 GetSystemInfo( &sysinfo
);
747 cpuid
->nproc
= sysinfo
.dwNumberOfProcessors
;
748 apic_id
= malloc(sizeof(int)*cpuid
->nproc
);
749 /* Get previous affinity mask */
750 save_affinity
= SetThreadAffinityMask(GetCurrentThread(), 1);
751 for (i
= 0; i
< cpuid
->nproc
; i
++)
753 SetThreadAffinityMask(GetCurrentThread(), (((DWORD_PTR
)1)<<i
));
755 execute_x86cpuid(0xB, 0, &eax
, &ebx
, &ecx
, &edx
);
758 SetThreadAffinityMask(GetCurrentThread(), save_affinity
);
759 #define CPUID_HAVE_APIC
761 #ifdef CPUID_HAVE_APIC
762 execute_x86cpuid(0xB, 0, &eax
, &ebx
, &ecx
, &edx
);
763 hwthread_bits
= eax
& 0x1F;
764 execute_x86cpuid(0xB, 1, &eax
, &ebx
, &ecx
, &edx
);
765 core_bits
= (eax
& 0x1F) - hwthread_bits
;
766 ret
= cpuid_x86_decode_apic_id(cpuid
, apic_id
, core_bits
,
768 cpuid
->have_cpu_topology
= (ret
== 0);
773 #endif /* GMX_CPUID_X86 */
778 chomp_substring_before_colon(const char *in
, char *s
, int maxlength
)
781 strncpy(s
, in
, maxlength
);
786 while (isspace(*(--p
)) && (p
>= s
))
798 chomp_substring_after_colon(const char *in
, char *s
, int maxlength
)
801 if ( (p
= strchr(in
, ':')) != NULL
)
808 strncpy(s
, p
, maxlength
);
810 while (isspace(*(--p
)) && (p
>= s
))
822 cpuid_check_arm(gmx_cpuid_t cpuid
)
824 #if defined(__linux__) || defined(__linux)
826 char buffer
[GMX_CPUID_STRLEN
], buffer2
[GMX_CPUID_STRLEN
], buffer3
[GMX_CPUID_STRLEN
];
828 if ( (fp
= fopen("/proc/cpuinfo", "r")) != NULL
)
830 while ( (fgets(buffer
, sizeof(buffer
), fp
) != NULL
))
832 chomp_substring_before_colon(buffer
, buffer2
, GMX_CPUID_STRLEN
);
833 chomp_substring_after_colon(buffer
, buffer3
, GMX_CPUID_STRLEN
);
835 if (!strcmp(buffer2
, "Processor"))
837 strncpy(cpuid
->brand
, buffer3
, GMX_CPUID_STRLEN
);
839 else if (!strcmp(buffer2
, "CPU architecture"))
841 cpuid
->family
= strtol(buffer3
, NULL
, 10);
842 if (!strcmp(buffer3
, "AArch64"))
847 else if (!strcmp(buffer2
, "CPU part"))
849 cpuid
->model
= strtol(buffer3
, NULL
, 16);
851 else if (!strcmp(buffer2
, "CPU revision"))
853 cpuid
->stepping
= strtol(buffer3
, NULL
, 10);
855 else if (!strcmp(buffer2
, "Features") && strstr(buffer3
, "neon"))
857 cpuid
->feature
[GMX_CPUID_FEATURE_ARM_NEON
] = 1;
859 else if (!strcmp(buffer2
, "Features") && strstr(buffer3
, "asimd"))
861 cpuid
->feature
[GMX_CPUID_FEATURE_ARM_NEON_ASIMD
] = 1;
868 /* Strange 64-bit non-linux platform. However, since NEON ASIMD is present on all
869 * implementations of AArch64 this far, we assume it is present for now.
871 cpuid
->feature
[GMX_CPUID_FEATURE_ARM_NEON_ASIMD
] = 1;
873 /* Strange 32-bit non-linux platform. We cannot assume that neon is present. */
874 cpuid
->feature
[GMX_CPUID_FEATURE_ARM_NEON
] = 0;
882 cpuid_check_ibm(gmx_cpuid_t cpuid
)
884 #if defined(__linux__) || defined(__linux)
886 char buffer
[GMX_CPUID_STRLEN
], before_colon
[GMX_CPUID_STRLEN
], after_colon
[GMX_CPUID_STRLEN
];
888 if ( (fp
= fopen("/proc/cpuinfo", "r")) != NULL
)
890 while ( (fgets(buffer
, sizeof(buffer
), fp
) != NULL
))
892 chomp_substring_before_colon(buffer
, before_colon
, GMX_CPUID_STRLEN
);
893 chomp_substring_after_colon(buffer
, after_colon
, GMX_CPUID_STRLEN
);
895 if (!strcmp(before_colon
, "cpu") || !strcmp(before_colon
, "Processor"))
897 strncpy(cpuid
->brand
, after_colon
, GMX_CPUID_STRLEN
);
899 if (!strcmp(before_colon
, "model name") ||
900 !strcmp(before_colon
, "model") ||
901 !strcmp(before_colon
, "Processor") ||
902 !strcmp(before_colon
, "cpu"))
904 if (strstr(after_colon
, "altivec"))
906 cpuid
->feature
[GMX_CPUID_FEATURE_IBM_VMX
] = 1;
908 if (!strstr(after_colon
, "POWER6") && !strstr(after_colon
, "Power6") &&
909 !strstr(after_colon
, "power6"))
911 cpuid
->feature
[GMX_CPUID_FEATURE_IBM_VSX
] = 1;
919 if (strstr(cpuid
->brand
, "A2"))
922 cpuid
->feature
[GMX_CPUID_FEATURE_IBM_QPX
] = 1;
925 strncpy(cpuid
->brand
, "Unknown CPU brand", GMX_CPUID_STRLEN
);
926 cpuid
->feature
[GMX_CPUID_FEATURE_IBM_QPX
] = 0;
927 cpuid
->feature
[GMX_CPUID_FEATURE_IBM_VMX
] = 0;
928 cpuid
->feature
[GMX_CPUID_FEATURE_IBM_VSX
] = 0;
934 /* Try to find the vendor of the current CPU, so we know what specific
935 * detection routine to call.
937 static enum gmx_cpuid_vendor
938 cpuid_check_vendor(void)
940 enum gmx_cpuid_vendor i
, vendor
;
941 /* Register data used on x86 */
942 unsigned int eax
, ebx
, ecx
, edx
;
943 char vendorstring
[13];
945 char buffer
[GMX_CPUID_STRLEN
];
946 char before_colon
[GMX_CPUID_STRLEN
];
947 char after_colon
[GMX_CPUID_STRLEN
];
949 /* Set default first */
950 vendor
= GMX_CPUID_VENDOR_UNKNOWN
;
953 execute_x86cpuid(0x0, 0, &eax
, &ebx
, &ecx
, &edx
);
955 memcpy(vendorstring
, &ebx
, 4);
956 memcpy(vendorstring
+4, &edx
, 4);
957 memcpy(vendorstring
+8, &ecx
, 4);
959 vendorstring
[12] = '\0';
961 for (i
= GMX_CPUID_VENDOR_UNKNOWN
; i
< GMX_CPUID_NVENDORS
; i
++)
963 if (!strncmp(vendorstring
, gmx_cpuid_vendor_string
[i
], 12))
968 #elif defined(__linux__) || defined(__linux)
969 /* General Linux. Try to get CPU vendor from /proc/cpuinfo */
970 if ( (fp
= fopen("/proc/cpuinfo", "r")) != NULL
)
972 while ( (vendor
== GMX_CPUID_VENDOR_UNKNOWN
) && (fgets(buffer
, sizeof(buffer
), fp
) != NULL
))
974 chomp_substring_before_colon(buffer
, before_colon
, sizeof(before_colon
));
975 /* Intel/AMD use "vendor_id", IBM "vendor", "model", or "cpu". Fujitsu "manufacture".
976 * On ARM there does not seem to be a vendor, but ARM or AArch64 is listed in the Processor string.
977 * Add others if you have them!
979 if (!strcmp(before_colon
, "vendor_id")
980 || !strcmp(before_colon
, "vendor")
981 || !strcmp(before_colon
, "manufacture")
982 || !strcmp(before_colon
, "model")
983 || !strcmp(before_colon
, "Processor")
984 || !strcmp(before_colon
, "cpu"))
986 chomp_substring_after_colon(buffer
, after_colon
, sizeof(after_colon
));
987 for (i
= GMX_CPUID_VENDOR_UNKNOWN
; i
< GMX_CPUID_NVENDORS
; i
++)
989 /* Be liberal and accept if we find the vendor
990 * string (or alternative string) anywhere. Using
991 * strcasestr() would be non-portable. */
992 if (strstr(after_colon
, gmx_cpuid_vendor_string
[i
])
993 || strstr(after_colon
, gmx_cpuid_vendor_string_alternative
[i
]))
998 /* If we did not find vendor yet, check if it is IBM:
999 * On some Power/PowerPC systems it only says power, not IBM.
1001 if (vendor
== GMX_CPUID_VENDOR_UNKNOWN
&&
1002 ((strstr(after_colon
, "POWER") || strstr(after_colon
, "Power") ||
1003 strstr(after_colon
, "power"))))
1005 vendor
= GMX_CPUID_VENDOR_IBM
;
1011 #elif defined(__arm__) || defined (__arm) || defined(__aarch64__)
1012 /* If we are using ARM on something that is not linux we have to trust the compiler,
1013 * and we cannot get the extra info that might be present in /proc/cpuinfo.
1015 vendor
= GMX_CPUID_VENDOR_ARM
;
1023 gmx_cpuid_topology(gmx_cpuid_t cpuid
,
1026 int * ncores_per_package
,
1027 int * nhwthreads_per_core
,
1028 const int ** package_id
,
1029 const int ** core_id
,
1030 const int ** hwthread_id
,
1031 const int ** locality_order
)
1035 if (cpuid
->have_cpu_topology
)
1037 *nprocessors
= cpuid
->nproc
;
1038 *npackages
= cpuid
->npackages
;
1039 *ncores_per_package
= cpuid
->ncores_per_package
;
1040 *nhwthreads_per_core
= cpuid
->nhwthreads_per_core
;
1041 *package_id
= cpuid
->package_id
;
1042 *core_id
= cpuid
->core_id
;
1043 *hwthread_id
= cpuid
->hwthread_id
;
1044 *locality_order
= cpuid
->locality_order
;
1055 enum gmx_cpuid_x86_smt
1056 gmx_cpuid_x86_smt(gmx_cpuid_t cpuid
)
1058 enum gmx_cpuid_x86_smt rc
;
1060 if (cpuid
->have_cpu_topology
)
1062 rc
= (cpuid
->nhwthreads_per_core
> 1) ? GMX_CPUID_X86_SMT_ENABLED
: GMX_CPUID_X86_SMT_DISABLED
;
1064 else if (cpuid
->vendor
== GMX_CPUID_VENDOR_AMD
|| gmx_cpuid_feature(cpuid
, GMX_CPUID_FEATURE_X86_HTT
) == 0)
1066 rc
= GMX_CPUID_X86_SMT_DISABLED
;
1070 rc
= GMX_CPUID_X86_SMT_CANNOTDETECT
;
1077 gmx_cpuid_init (gmx_cpuid_t
* pcpuid
)
1082 char buffer
[GMX_CPUID_STRLEN
], buffer2
[GMX_CPUID_STRLEN
];
1085 cpuid
= malloc(sizeof(*cpuid
));
1089 for (i
= 0; i
< GMX_CPUID_NFEATURES
; i
++)
1091 cpuid
->feature
[i
] = 0;
1094 cpuid
->have_cpu_topology
= 0;
1096 cpuid
->npackages
= 0;
1097 cpuid
->ncores_per_package
= 0;
1098 cpuid
->nhwthreads_per_core
= 0;
1099 cpuid
->package_id
= NULL
;
1100 cpuid
->core_id
= NULL
;
1101 cpuid
->hwthread_id
= NULL
;
1102 cpuid
->locality_order
= NULL
;
1104 cpuid
->vendor
= cpuid_check_vendor();
1106 switch (cpuid
->vendor
)
1108 #ifdef GMX_CPUID_X86
1109 case GMX_CPUID_VENDOR_INTEL
:
1110 cpuid_check_intel_x86(cpuid
);
1112 case GMX_CPUID_VENDOR_AMD
:
1113 cpuid_check_amd_x86(cpuid
);
1116 case GMX_CPUID_VENDOR_ARM
:
1117 cpuid_check_arm(cpuid
);
1119 case GMX_CPUID_VENDOR_IBM
:
1120 cpuid_check_ibm(cpuid
);
1124 strncpy(cpuid
->brand
, "Unknown CPU brand", GMX_CPUID_STRLEN
);
1125 #if defined(__linux__) || defined(__linux)
1126 /* General Linux. Try to get CPU type from /proc/cpuinfo */
1127 if ( (fp
= fopen("/proc/cpuinfo", "r")) != NULL
)
1130 while ( (found_brand
== 0) && (fgets(buffer
, sizeof(buffer
), fp
) != NULL
))
1132 chomp_substring_before_colon(buffer
, buffer2
, sizeof(buffer2
));
1133 /* Intel uses "model name", Fujitsu and IBM "cpu". */
1134 if (!strcmp(buffer2
, "model name") || !strcmp(buffer2
, "cpu"))
1136 chomp_substring_after_colon(buffer
, cpuid
->brand
, GMX_CPUID_STRLEN
);
1145 cpuid
->stepping
= 0;
1147 for (i
= 0; i
< GMX_CPUID_NFEATURES
; i
++)
1149 cpuid
->feature
[i
] = 0;
1151 cpuid
->feature
[GMX_CPUID_FEATURE_CANNOTDETECT
] = 1;
1160 gmx_cpuid_done (gmx_cpuid_t cpuid
)
1167 gmx_cpuid_formatstring (gmx_cpuid_t cpuid
,
1173 enum gmx_cpuid_feature feature
;
1179 "Family: %2d Model: %2d Stepping: %2d\n"
1181 gmx_cpuid_vendor_string
[gmx_cpuid_vendor(cpuid
)],
1182 gmx_cpuid_brand(cpuid
),
1183 gmx_cpuid_family(cpuid
), gmx_cpuid_model(cpuid
), gmx_cpuid_stepping(cpuid
));
1188 "Family: %2d Model: %2d Stepping: %2d\n"
1190 gmx_cpuid_vendor_string
[gmx_cpuid_vendor(cpuid
)],
1191 gmx_cpuid_brand(cpuid
),
1192 gmx_cpuid_family(cpuid
), gmx_cpuid_model(cpuid
), gmx_cpuid_stepping(cpuid
));
1200 for (feature
= GMX_CPUID_FEATURE_CANNOTDETECT
; feature
< GMX_CPUID_NFEATURES
; feature
++)
1202 if (gmx_cpuid_feature(cpuid
, feature
) == 1)
1205 _snprintf(str
, n
, " %s", gmx_cpuid_feature_string
[feature
]);
1207 snprintf(str
, n
, " %s", gmx_cpuid_feature_string
[feature
]);
1216 _snprintf(str
, n
, "\n");
1218 snprintf(str
, n
, "\n");
1228 gmx_cpuid_simd_suggest (gmx_cpuid_t cpuid
)
1230 enum gmx_cpuid_simd tmpsimd
;
1232 tmpsimd
= GMX_CPUID_SIMD_NONE
;
1234 if (gmx_cpuid_vendor(cpuid
) == GMX_CPUID_VENDOR_INTEL
)
1236 /* TODO: Add check for AVX-512F & AVX-512ER here as soon as we
1237 * have implemented verlet kernels for them. Until then,
1238 * we should pick AVX2 instead for the automatic detection.
1240 if (gmx_cpuid_feature(cpuid
, GMX_CPUID_FEATURE_X86_AVX2
))
1242 tmpsimd
= GMX_CPUID_SIMD_X86_AVX2_256
;
1244 else if (gmx_cpuid_feature(cpuid
, GMX_CPUID_FEATURE_X86_AVX
))
1246 tmpsimd
= GMX_CPUID_SIMD_X86_AVX_256
;
1248 else if (gmx_cpuid_feature(cpuid
, GMX_CPUID_FEATURE_X86_SSE4_1
))
1250 tmpsimd
= GMX_CPUID_SIMD_X86_SSE4_1
;
1252 else if (gmx_cpuid_feature(cpuid
, GMX_CPUID_FEATURE_X86_SSE2
))
1254 tmpsimd
= GMX_CPUID_SIMD_X86_SSE2
;
1257 else if (gmx_cpuid_vendor(cpuid
) == GMX_CPUID_VENDOR_AMD
)
1259 if (gmx_cpuid_feature(cpuid
, GMX_CPUID_FEATURE_X86_AVX
))
1261 tmpsimd
= GMX_CPUID_SIMD_X86_AVX_128_FMA
;
1263 else if (gmx_cpuid_feature(cpuid
, GMX_CPUID_FEATURE_X86_SSE4_1
))
1265 tmpsimd
= GMX_CPUID_SIMD_X86_SSE4_1
;
1267 else if (gmx_cpuid_feature(cpuid
, GMX_CPUID_FEATURE_X86_SSE2
))
1269 tmpsimd
= GMX_CPUID_SIMD_X86_SSE2
;
1272 else if (gmx_cpuid_vendor(cpuid
) == GMX_CPUID_VENDOR_FUJITSU
)
1274 if (strstr(gmx_cpuid_brand(cpuid
), "SPARC64"))
1276 tmpsimd
= GMX_CPUID_SIMD_SPARC64_HPC_ACE
;
1279 else if (gmx_cpuid_vendor(cpuid
) == GMX_CPUID_VENDOR_IBM
)
1281 if (gmx_cpuid_feature(cpuid
, GMX_CPUID_FEATURE_IBM_QPX
))
1283 tmpsimd
= GMX_CPUID_SIMD_IBM_QPX
;
1285 else if (gmx_cpuid_feature(cpuid
, GMX_CPUID_FEATURE_IBM_VSX
))
1287 /* VSX is better than VMX, so we check it first */
1288 tmpsimd
= GMX_CPUID_SIMD_IBM_VSX
;
1290 else if (gmx_cpuid_feature(cpuid
, GMX_CPUID_FEATURE_IBM_VMX
))
1292 tmpsimd
= GMX_CPUID_SIMD_IBM_VMX
;
1295 else if (gmx_cpuid_vendor(cpuid
) == GMX_CPUID_VENDOR_ARM
)
1297 if (gmx_cpuid_feature(cpuid
, GMX_CPUID_FEATURE_ARM_NEON_ASIMD
))
1299 tmpsimd
= GMX_CPUID_SIMD_ARM_NEON_ASIMD
;
1301 else if (gmx_cpuid_feature(cpuid
, GMX_CPUID_FEATURE_ARM_NEON
))
1303 tmpsimd
= GMX_CPUID_SIMD_ARM_NEON
;
1312 gmx_cpuid_simd_check(gmx_cpuid_t cpuid
,
1314 int print_to_stderr
)
1318 enum gmx_cpuid_simd simd
;
1320 simd
= gmx_cpuid_simd_suggest(cpuid
);
1322 rc
= (simd
!= compiled_simd
);
1324 gmx_cpuid_formatstring(cpuid
, str
, 1023);
1330 "\nDetecting CPU SIMD instructions.\nPresent hardware specification:\n"
1332 "SIMD instructions most likely to fit this hardware: %s\n"
1333 "SIMD instructions selected at GROMACS compile time: %s\n\n",
1335 gmx_cpuid_simd_string
[simd
],
1336 gmx_cpuid_simd_string
[compiled_simd
]);
1343 fprintf(log
, "\nBinary not matching hardware - you might be losing performance.\n"
1344 "SIMD instructions most likely to fit this hardware: %s\n"
1345 "SIMD instructions selected at GROMACS compile time: %s\n\n",
1346 gmx_cpuid_simd_string
[simd
],
1347 gmx_cpuid_simd_string
[compiled_simd
]);
1349 if (print_to_stderr
)
1351 fprintf(stderr
, "Compiled SIMD instructions: %s (Gromacs could use %s on this machine, which is better)\n",
1352 gmx_cpuid_simd_string
[compiled_simd
],
1353 gmx_cpuid_simd_string
[simd
]);
1360 #ifdef GMX_CPUID_STANDALONE
1361 /* Stand-alone program to enable queries of CPU features from Cmake.
1362 * Note that you need to check inline ASM capabilities before compiling and set
1363 * -DGMX_X86_GCC_INLINE_ASM for the cpuid instruction to work...
1366 main(int argc
, char **argv
)
1369 enum gmx_cpuid_simd simd
;
1375 "Usage:\n\n%s [flags]\n\n"
1376 "Available flags:\n"
1377 "-vendor Print CPU vendor.\n"
1378 "-brand Print CPU brand string.\n"
1379 "-family Print CPU family version.\n"
1380 "-model Print CPU model version.\n"
1381 "-stepping Print CPU stepping version.\n"
1382 "-features Print CPU feature flags.\n"
1383 "-simd Print suggested GROMACS SIMD instructions.\n",
1388 gmx_cpuid_init(&cpuid
);
1390 if (!strncmp(argv
[1], "-vendor", 3))
1392 printf("%s\n", gmx_cpuid_vendor_string
[cpuid
->vendor
]);
1394 else if (!strncmp(argv
[1], "-brand", 3))
1396 printf("%s\n", cpuid
->brand
);
1398 else if (!strncmp(argv
[1], "-family", 3))
1400 printf("%d\n", cpuid
->family
);
1402 else if (!strncmp(argv
[1], "-model", 3))
1404 printf("%d\n", cpuid
->model
);
1406 else if (!strncmp(argv
[1], "-stepping", 3))
1408 printf("%d\n", cpuid
->stepping
);
1410 else if (!strncmp(argv
[1], "-features", 3))
1413 for (i
= 0; i
< GMX_CPUID_NFEATURES
; i
++)
1415 if (cpuid
->feature
[i
] == 1)
1421 printf("%s", gmx_cpuid_feature_string
[i
]);
1426 else if (!strncmp(argv
[1], "-simd", 3))
1428 simd
= gmx_cpuid_simd_suggest(cpuid
);
1429 fprintf(stdout
, "%s\n", gmx_cpuid_simd_string
[simd
]);
1432 gmx_cpuid_done(cpuid
);