Merge branch 'release-5-0'
[gromacs.git] / src / gromacs / gmxlib / gmx_cpuid.c
blob44091a79e292e98818f25de455f0cfb5dd20f22c
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 #include "gmxpre.h"
37 /*! \cond */
38 #include "gromacs/legacyheaders/gmx_cpuid.h"
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
44 #include <ctype.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
49 #ifdef GMX_NATIVE_WINDOWS
50 /* MSVC definition for __cpuid() */
51 #ifdef _MSC_VER
52 #include <intrin.h>
53 #endif
54 /* sysinfo functions */
55 #include <windows.h>
56 #endif
57 #ifdef HAVE_SCHED_H
58 #include <sched.h>
59 #endif
60 #ifdef HAVE_UNISTD_H
61 /* sysconf() definition */
62 #include <unistd.h>
63 #endif
66 /* For convenience, and to enable configure-time invocation, we keep all architectures
67 * in a single file, but to avoid repeated ifdefs we set the overall architecture here.
69 #ifdef GMX_TARGET_X86
70 /* OK, it is x86, but can we execute cpuid? */
71 #if defined(GMX_X86_GCC_INLINE_ASM) || ( defined(_MSC_VER) && ( (_MSC_VER > 1500) || (_MSC_VER == 1500 & _MSC_FULL_VER >= 150030729)))
72 # define GMX_CPUID_X86
73 #endif
74 #endif
76 /* Global constant character strings corresponding to our enumerated types */
77 const char *
78 gmx_cpuid_vendor_string[GMX_CPUID_NVENDORS] =
80 "CannotDetect",
81 "Unknown",
82 "GenuineIntel",
83 "AuthenticAMD",
84 "Fujitsu",
85 "IBM", /* Used on Power and BlueGene/Q */
86 "ARM"
89 const char *
90 gmx_cpuid_vendor_string_alternative[GMX_CPUID_NVENDORS] =
92 "CannotDetect",
93 "Unknown",
94 "GenuineIntel",
95 "AuthenticAMD",
96 "Fujitsu",
97 "ibm", /* Used on Power and BlueGene/Q */
98 "AArch64"
101 const char *
102 gmx_cpuid_feature_string[GMX_CPUID_NFEATURES] =
104 "CannotDetect",
105 "aes",
106 "apic",
107 "avx",
108 "avx2",
109 "avx512f",
110 "avx512pf",
111 "avx512er",
112 "avx512cd",
113 "clfsh",
114 "cmov",
115 "cx8",
116 "cx16",
117 "f16c",
118 "fma",
119 "fma4",
120 "htt",
121 "lahf_lm",
122 "misalignsse",
123 "mmx",
124 "msr",
125 "nonstop_tsc",
126 "pcid",
127 "pclmuldq",
128 "pdcm",
129 "pdpe1gb",
130 "popcnt",
131 "pse",
132 "rdrnd",
133 "rdtscp",
134 "sse2",
135 "sse3",
136 "sse4a",
137 "sse4.1",
138 "sse4.2",
139 "ssse3",
140 "tdt",
141 "x2apic",
142 "xop",
143 "arm_neon",
144 "arm_neon_asimd",
145 "QPX",
146 "VMX",
147 "VSX"
150 const char *
151 gmx_cpuid_simd_string[GMX_CPUID_NSIMD] =
153 "CannotDetect",
154 "None",
155 "Reference",
156 "SSE2",
157 "SSE4.1",
158 "AVX_128_FMA",
159 "AVX_256",
160 "AVX2_256",
161 "AVX_512F",
162 "AVX_512ER",
163 "Sparc64 HPC-ACE",
164 "IBM_QPX",
165 "IBM_VMX",
166 "IBM_VSX",
167 "ARM_NEON",
168 "ARM_NEON_ASIMD"
171 /* Max length of brand string */
172 #define GMX_CPUID_STRLEN 256
175 /* Contents of the abstract datatype */
176 struct gmx_cpuid
178 enum gmx_cpuid_vendor vendor;
179 char brand[GMX_CPUID_STRLEN];
180 int family;
181 int model;
182 int stepping;
183 /* Not using gmx_bool here, since this file must be possible to compile without simple.h */
184 char feature[GMX_CPUID_NFEATURES];
186 /* Basic CPU topology information. For x86 this is a bit complicated since the topology differs between
187 * operating systems and sometimes even settings. For most other architectures you can likely just check
188 * the documentation and then write static information to these arrays rather than detecting on-the-fly.
190 int have_cpu_topology;
191 int nproc; /* total number of logical processors from OS */
192 int npackages;
193 int ncores_per_package;
194 int nhwthreads_per_core;
195 int * package_id;
196 int * core_id; /* Local core id in each package */
197 int * hwthread_id; /* Local hwthread id in each core */
198 int * locality_order; /* Processor indices sorted in locality order */
202 /* Simple routines to access the data structure. The initialization routine is
203 * further down since that needs to call other static routines in this file.
205 enum gmx_cpuid_vendor
206 gmx_cpuid_vendor (gmx_cpuid_t cpuid)
208 return cpuid->vendor;
212 const char *
213 gmx_cpuid_brand (gmx_cpuid_t cpuid)
215 return cpuid->brand;
219 gmx_cpuid_family (gmx_cpuid_t cpuid)
221 return cpuid->family;
225 gmx_cpuid_model (gmx_cpuid_t cpuid)
227 return cpuid->model;
231 gmx_cpuid_stepping (gmx_cpuid_t cpuid)
233 return cpuid->stepping;
237 gmx_cpuid_feature (gmx_cpuid_t cpuid,
238 enum gmx_cpuid_feature feature)
240 return (cpuid->feature[feature] != 0);
245 gmx_cpuid_is_intel_nehalem (const gmx_cpuid_t cpuid)
247 return (cpuid->vendor == GMX_CPUID_VENDOR_INTEL &&
248 cpuid->family == 6 &&
249 (cpuid->model == 0x2E ||
250 cpuid->model == 0x1A ||
251 cpuid->model == 0x1E ||
252 cpuid->model == 0x2F ||
253 cpuid->model == 0x2C ||
254 cpuid->model == 0x25));
258 /* What type of SIMD was compiled in, if any? */
259 #ifdef GMX_SIMD_X86_AVX_512ER
260 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_512ER;
261 #elif defined GMX_SIMD_X86_AVX_512F
262 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_512F;
263 #elif defined GMX_SIMD_X86_AVX2_256
264 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX2_256;
265 #elif defined GMX_SIMD_X86_AVX_256
266 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_256;
267 #elif defined GMX_SIMD_X86_AVX_128_FMA
268 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_128_FMA;
269 #elif defined GMX_SIMD_X86_SSE4_1
270 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_SSE4_1;
271 #elif defined GMX_SIMD_X86_SSE2
272 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_SSE2;
273 #elif defined GMX_SIMD_ARM_NEON
274 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_ARM_NEON;
275 #elif defined GMX_SIMD_ARM_NEON_ASIMD
276 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_ARM_NEON_ASIMD;
277 #elif defined GMX_SIMD_SPARC64_HPC_ACE
278 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_SPARC64_HPC_ACE;
279 #elif defined GMX_SIMD_IBM_QPX
280 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_IBM_QPX;
281 #elif defined GMX_SIMD_IBM_VMX
282 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_IBM_VMX;
283 #elif defined GMX_SIMD_IBM_VSX
284 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_IBM_VSX;
285 #elif defined GMX_SIMD_REFERENCE
286 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_REFERENCE;
287 #else
288 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_NONE;
289 #endif
292 #ifdef GMX_CPUID_X86
294 /* Execute CPUID on x86 class CPUs. level sets function to exec, and the
295 * contents of register output is returned. See Intel/AMD docs for details.
297 * This version supports extended information where we can also have an input
298 * value in the ecx register. This is ignored for most levels, but some of them
299 * (e.g. level 0xB on Intel) use it.
301 static int
302 execute_x86cpuid(unsigned int level,
303 unsigned int ecxval,
304 unsigned int * eax,
305 unsigned int * ebx,
306 unsigned int * ecx,
307 unsigned int * edx)
309 int rc = 0;
311 /* Currently CPUID is only supported (1) if we can use an instruction on MSVC, or (2)
312 * if the compiler handles GNU-style inline assembly.
315 #if (defined _MSC_VER)
316 int CPUInfo[4];
318 #if (_MSC_VER > 1500) || (_MSC_VER == 1500 & _MSC_FULL_VER >= 150030729)
319 /* MSVC 9.0 SP1 or later */
320 __cpuidex(CPUInfo, level, ecxval);
321 rc = 0;
322 #else
323 __cpuid(CPUInfo, level);
324 /* Set an error code if the user wanted a non-zero ecxval, since we did not have cpuidex */
325 rc = (ecxval > 0) ? -1 : 0;
326 #endif
327 *eax = CPUInfo[0];
328 *ebx = CPUInfo[1];
329 *ecx = CPUInfo[2];
330 *edx = CPUInfo[3];
332 #elif (defined GMX_X86_GCC_INLINE_ASM)
333 /* for now this means GMX_X86_GCC_INLINE_ASM should be defined,
334 * but there might be more options added in the future.
336 *eax = level;
337 *ecx = ecxval;
338 *ebx = 0;
339 *edx = 0;
340 #if defined(__i386__) && defined(__PIC__)
341 /* Avoid clobbering the global offset table in 32-bit pic code (ebx register) */
342 __asm__ __volatile__ ("xchgl %%ebx, %1 \n\t"
343 "cpuid \n\t"
344 "xchgl %%ebx, %1 \n\t"
345 : "+a" (*eax), "+r" (*ebx), "+c" (*ecx), "+d" (*edx));
346 #else
347 /* i386 without PIC, or x86-64. Things are easy and we can clobber any reg we want :-) */
348 __asm__ __volatile__ ("cpuid \n\t"
349 : "+a" (*eax), "+b" (*ebx), "+c" (*ecx), "+d" (*edx));
350 #endif
351 rc = 0;
352 #else
353 /* Death and horror!
354 * Apparently this is an x86 platform where we don't know how to call cpuid.
356 * This is REALLY bad, since we will lose all Gromacs SIMD support.
358 *eax = 0;
359 *ebx = 0;
360 *ecx = 0;
361 *edx = 0;
363 rc = -1;
364 #endif
365 return rc;
369 /* Identify CPU features common to Intel & AMD - mainly brand string,
370 * version and some features. Vendor has already been detected outside this.
372 static int
373 cpuid_check_common_x86(gmx_cpuid_t cpuid)
375 int fn, max_stdfn, max_extfn;
376 unsigned int eax, ebx, ecx, edx;
377 char str[GMX_CPUID_STRLEN];
378 char * p;
380 /* Find largest standard/extended function input value */
381 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
382 max_stdfn = eax;
383 execute_x86cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx);
384 max_extfn = eax;
386 p = str;
387 if (max_extfn >= 0x80000005)
389 /* Get CPU brand string */
390 for (fn = 0x80000002; fn < 0x80000005; fn++)
392 execute_x86cpuid(fn, 0, &eax, &ebx, &ecx, &edx);
393 memcpy(p, &eax, 4);
394 memcpy(p+4, &ebx, 4);
395 memcpy(p+8, &ecx, 4);
396 memcpy(p+12, &edx, 4);
397 p += 16;
399 *p = '\0';
401 /* Remove empty initial space */
402 p = str;
403 while (isspace(*(p)))
405 p++;
407 strncpy(cpuid->brand, p, GMX_CPUID_STRLEN);
409 else
411 strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_STRLEN);
414 /* Find basic CPU properties */
415 if (max_stdfn >= 1)
417 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
419 cpuid->family = ((eax & 0x0FF00000) >> 20) + ((eax & 0x00000F00) >> 8);
420 /* Note that extended model should be shifted left 4, so only shift right 12 iso 16. */
421 cpuid->model = ((eax & 0x000F0000) >> 12) + ((eax & 0x000000F0) >> 4);
422 cpuid->stepping = (eax & 0x0000000F);
424 /* Feature flags common to AMD and intel */
425 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE3] = (ecx & (1 << 0)) != 0;
426 cpuid->feature[GMX_CPUID_FEATURE_X86_PCLMULDQ] = (ecx & (1 << 1)) != 0;
427 cpuid->feature[GMX_CPUID_FEATURE_X86_SSSE3] = (ecx & (1 << 9)) != 0;
428 cpuid->feature[GMX_CPUID_FEATURE_X86_FMA] = (ecx & (1 << 12)) != 0;
429 cpuid->feature[GMX_CPUID_FEATURE_X86_CX16] = (ecx & (1 << 13)) != 0;
430 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4_1] = (ecx & (1 << 19)) != 0;
431 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4_2] = (ecx & (1 << 20)) != 0;
432 cpuid->feature[GMX_CPUID_FEATURE_X86_POPCNT] = (ecx & (1 << 23)) != 0;
433 cpuid->feature[GMX_CPUID_FEATURE_X86_AES] = (ecx & (1 << 25)) != 0;
434 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX] = (ecx & (1 << 28)) != 0;
435 cpuid->feature[GMX_CPUID_FEATURE_X86_F16C] = (ecx & (1 << 29)) != 0;
436 cpuid->feature[GMX_CPUID_FEATURE_X86_RDRND] = (ecx & (1 << 30)) != 0;
438 cpuid->feature[GMX_CPUID_FEATURE_X86_PSE] = (edx & (1 << 3)) != 0;
439 cpuid->feature[GMX_CPUID_FEATURE_X86_MSR] = (edx & (1 << 5)) != 0;
440 cpuid->feature[GMX_CPUID_FEATURE_X86_CX8] = (edx & (1 << 8)) != 0;
441 cpuid->feature[GMX_CPUID_FEATURE_X86_APIC] = (edx & (1 << 9)) != 0;
442 cpuid->feature[GMX_CPUID_FEATURE_X86_CMOV] = (edx & (1 << 15)) != 0;
443 cpuid->feature[GMX_CPUID_FEATURE_X86_CLFSH] = (edx & (1 << 19)) != 0;
444 cpuid->feature[GMX_CPUID_FEATURE_X86_MMX] = (edx & (1 << 23)) != 0;
445 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE2] = (edx & (1 << 26)) != 0;
446 cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] = (edx & (1 << 28)) != 0;
448 else
450 cpuid->family = -1;
451 cpuid->model = -1;
452 cpuid->stepping = -1;
455 if (max_extfn >= 0x80000001)
457 execute_x86cpuid(0x80000001, 0, &eax, &ebx, &ecx, &edx);
458 cpuid->feature[GMX_CPUID_FEATURE_X86_LAHF_LM] = (ecx & (1 << 0)) != 0;
459 cpuid->feature[GMX_CPUID_FEATURE_X86_PDPE1GB] = (edx & (1 << 26)) != 0;
460 cpuid->feature[GMX_CPUID_FEATURE_X86_RDTSCP] = (edx & (1 << 27)) != 0;
463 if (max_extfn >= 0x80000007)
465 execute_x86cpuid(0x80000007, 0, &eax, &ebx, &ecx, &edx);
466 cpuid->feature[GMX_CPUID_FEATURE_X86_NONSTOP_TSC] = (edx & (1 << 8)) != 0;
468 return 0;
471 /* This routine returns the number of unique different elements found in the array,
472 * and renumbers these starting from 0. For example, the array {0,1,2,8,9,10,8,9,10,0,1,2}
473 * will be rewritten to {0,1,2,3,4,5,3,4,5,0,1,2}, and it returns 6 for the
474 * number of unique elements.
476 static int
477 cpuid_renumber_elements(int *data, int n)
479 int *unique;
480 int i, j, nunique, found;
482 unique = malloc(sizeof(int)*n);
484 nunique = 0;
485 for (i = 0; i < n; i++)
487 for (j = 0, found = 0; j < nunique && !found; j++)
489 found = (data[i] == unique[j]);
491 if (!found)
493 /* Insert in sorted order! */
494 for (j = nunique++; j > 0 && unique[j-1] > data[i]; j--)
496 unique[j] = unique[j-1];
498 unique[j] = data[i];
501 /* renumber */
502 for (i = 0; i < n; i++)
504 for (j = 0; j < nunique; j++)
506 if (data[i] == unique[j])
508 data[i] = j;
512 free(unique);
513 return nunique;
516 /* APIC IDs, or everything you wanted to know about your x86 cores but were afraid to ask...
518 * Raw APIC IDs are unfortunately somewhat dirty. For technical reasons they are assigned
519 * in power-of-2 chunks, and even then there are no guarantees about specific numbers - all
520 * we know is that the part for each thread/core/package is unique, and how many bits are
521 * reserved for that part.
522 * This routine does internal renumbering so we get continuous indices, and also
523 * decodes the actual number of packages,cores-per-package and hwthreads-per-core.
524 * Returns: 0 on success, non-zero on failure.
526 static int
527 cpuid_x86_decode_apic_id(gmx_cpuid_t cpuid, int *apic_id, int core_bits, int hwthread_bits)
529 int i, idx;
530 int hwthread_mask, core_mask_after_shift;
532 cpuid->hwthread_id = malloc(sizeof(int)*cpuid->nproc);
533 cpuid->core_id = malloc(sizeof(int)*cpuid->nproc);
534 cpuid->package_id = malloc(sizeof(int)*cpuid->nproc);
535 cpuid->locality_order = malloc(sizeof(int)*cpuid->nproc);
537 hwthread_mask = (1 << hwthread_bits) - 1;
538 core_mask_after_shift = (1 << core_bits) - 1;
540 for (i = 0; i < cpuid->nproc; i++)
542 cpuid->hwthread_id[i] = apic_id[i] & hwthread_mask;
543 cpuid->core_id[i] = (apic_id[i] >> hwthread_bits) & core_mask_after_shift;
544 cpuid->package_id[i] = apic_id[i] >> (core_bits + hwthread_bits);
547 cpuid->npackages = cpuid_renumber_elements(cpuid->package_id, cpuid->nproc);
548 cpuid->ncores_per_package = cpuid_renumber_elements(cpuid->core_id, cpuid->nproc);
549 cpuid->nhwthreads_per_core = cpuid_renumber_elements(cpuid->hwthread_id, cpuid->nproc);
551 /* now check for consistency */
552 if ( (cpuid->npackages * cpuid->ncores_per_package *
553 cpuid->nhwthreads_per_core) != cpuid->nproc)
555 /* the packages/cores-per-package/hwthreads-per-core counts are
556 inconsistent. */
557 return -1;
560 /* Create a locality order array, i.e. first all resources in package0, which in turn
561 * are sorted so we first have all resources in core0, where threads are sorted in order, etc.
564 for (i = 0; i < cpuid->nproc; i++)
566 idx = (cpuid->package_id[i]*cpuid->ncores_per_package + cpuid->core_id[i])*cpuid->nhwthreads_per_core + cpuid->hwthread_id[i];
567 cpuid->locality_order[idx] = i;
569 return 0;
573 /* Detection of AMD-specific CPU features */
574 static int
575 cpuid_check_amd_x86(gmx_cpuid_t cpuid)
577 int max_stdfn, max_extfn, ret;
578 unsigned int eax, ebx, ecx, edx;
579 int hwthread_bits, core_bits;
580 int * apic_id;
582 cpuid_check_common_x86(cpuid);
584 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
585 max_stdfn = eax;
587 execute_x86cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx);
588 max_extfn = eax;
590 if (max_extfn >= 0x80000001)
592 execute_x86cpuid(0x80000001, 0, &eax, &ebx, &ecx, &edx);
594 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4A] = (ecx & (1 << 6)) != 0;
595 cpuid->feature[GMX_CPUID_FEATURE_X86_MISALIGNSSE] = (ecx & (1 << 7)) != 0;
596 cpuid->feature[GMX_CPUID_FEATURE_X86_XOP] = (ecx & (1 << 11)) != 0;
597 cpuid->feature[GMX_CPUID_FEATURE_X86_FMA4] = (ecx & (1 << 16)) != 0;
600 /* Query APIC information on AMD */
601 if (max_extfn >= 0x80000008)
603 #if (defined HAVE_SCHED_AFFINITY && defined HAVE_SYSCONF && defined __linux__)
604 /* Linux */
605 unsigned int i;
606 cpu_set_t cpuset, save_cpuset;
607 cpuid->nproc = sysconf(_SC_NPROCESSORS_ONLN);
608 apic_id = malloc(sizeof(int)*cpuid->nproc);
609 sched_getaffinity(0, sizeof(cpu_set_t), &save_cpuset);
610 /* Get APIC id from each core */
611 CPU_ZERO(&cpuset);
612 for (i = 0; i < cpuid->nproc; i++)
614 CPU_SET(i, &cpuset);
615 sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
616 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
617 apic_id[i] = ebx >> 24;
618 CPU_CLR(i, &cpuset);
620 /* Reset affinity to the value it had when calling this routine */
621 sched_setaffinity(0, sizeof(cpu_set_t), &save_cpuset);
622 #define CPUID_HAVE_APIC
623 #elif defined GMX_NATIVE_WINDOWS
624 /* Windows */
625 DWORD_PTR i;
626 SYSTEM_INFO sysinfo;
627 unsigned int save_affinity, affinity;
628 GetSystemInfo( &sysinfo );
629 cpuid->nproc = sysinfo.dwNumberOfProcessors;
630 apic_id = malloc(sizeof(int)*cpuid->nproc);
631 /* Get previous affinity mask */
632 save_affinity = SetThreadAffinityMask(GetCurrentThread(), 1);
633 for (i = 0; i < cpuid->nproc; i++)
635 SetThreadAffinityMask(GetCurrentThread(), (((DWORD_PTR)1)<<i));
636 Sleep(0);
637 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
638 apic_id[i] = ebx >> 24;
640 SetThreadAffinityMask(GetCurrentThread(), save_affinity);
641 #define CPUID_HAVE_APIC
642 #endif
643 #ifdef CPUID_HAVE_APIC
644 /* AMD does not support SMT yet - there are no hwthread bits in apic ID */
645 hwthread_bits = 0;
646 /* Get number of core bits in apic ID - try modern extended method first */
647 execute_x86cpuid(0x80000008, 0, &eax, &ebx, &ecx, &edx);
648 core_bits = (ecx >> 12) & 0xf;
649 if (core_bits == 0)
651 /* Legacy method for old single/dual core AMD CPUs */
652 int i = ecx & 0xF;
653 for (core_bits = 0; (i>>core_bits) > 0; core_bits++)
658 ret = cpuid_x86_decode_apic_id(cpuid, apic_id, core_bits,
659 hwthread_bits);
660 cpuid->have_cpu_topology = (ret == 0);
661 #endif
663 return 0;
666 /* Detection of Intel-specific CPU features */
667 static int
668 cpuid_check_intel_x86(gmx_cpuid_t cpuid)
670 unsigned int max_stdfn, max_extfn, ret;
671 unsigned int eax, ebx, ecx, edx;
672 unsigned int max_logical_cores, max_physical_cores;
673 int hwthread_bits, core_bits;
674 int * apic_id;
676 cpuid_check_common_x86(cpuid);
678 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
679 max_stdfn = eax;
681 execute_x86cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx);
682 max_extfn = eax;
684 if (max_stdfn >= 1)
686 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
687 cpuid->feature[GMX_CPUID_FEATURE_X86_PDCM] = (ecx & (1 << 15)) != 0;
688 cpuid->feature[GMX_CPUID_FEATURE_X86_PCID] = (ecx & (1 << 17)) != 0;
689 cpuid->feature[GMX_CPUID_FEATURE_X86_X2APIC] = (ecx & (1 << 21)) != 0;
690 cpuid->feature[GMX_CPUID_FEATURE_X86_TDT] = (ecx & (1 << 24)) != 0;
693 if (max_stdfn >= 7)
695 execute_x86cpuid(0x7, 0, &eax, &ebx, &ecx, &edx);
696 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX2] = (ebx & (1 << 5)) != 0;
697 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512F] = (ebx & (1 << 16)) != 0;
698 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512PF] = (ebx & (1 << 26)) != 0;
699 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512ER] = (ebx & (1 << 27)) != 0;
700 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512CD] = (ebx & (1 << 28)) != 0;
703 /* Check whether Hyper-Threading is enabled, not only supported */
704 if (cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] && max_stdfn >= 4)
706 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
707 max_logical_cores = (ebx >> 16) & 0x0FF;
708 execute_x86cpuid(0x4, 0, &eax, &ebx, &ecx, &edx);
709 max_physical_cores = ((eax >> 26) & 0x3F) + 1;
711 /* Clear HTT flag if we only have 1 logical core per physical */
712 if (max_logical_cores/max_physical_cores < 2)
714 cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] = 0;
718 if (max_stdfn >= 0xB)
720 /* Query x2 APIC information from cores */
721 #if (defined HAVE_SCHED_AFFINITY && defined HAVE_SYSCONF && defined __linux__)
722 /* Linux */
723 unsigned int i;
724 cpu_set_t cpuset, save_cpuset;
725 cpuid->nproc = sysconf(_SC_NPROCESSORS_ONLN);
726 apic_id = malloc(sizeof(int)*cpuid->nproc);
727 sched_getaffinity(0, sizeof(cpu_set_t), &save_cpuset);
728 /* Get x2APIC ID from each hardware thread */
729 CPU_ZERO(&cpuset);
730 for (i = 0; i < cpuid->nproc; i++)
732 CPU_SET(i, &cpuset);
733 sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
734 execute_x86cpuid(0xB, 0, &eax, &ebx, &ecx, &edx);
735 apic_id[i] = edx;
736 CPU_CLR(i, &cpuset);
738 /* Reset affinity to the value it had when calling this routine */
739 sched_setaffinity(0, sizeof(cpu_set_t), &save_cpuset);
740 #define CPUID_HAVE_APIC
741 #elif defined GMX_NATIVE_WINDOWS
742 /* Windows */
743 DWORD_PTR i;
744 SYSTEM_INFO sysinfo;
745 unsigned int save_affinity, affinity;
746 GetSystemInfo( &sysinfo );
747 cpuid->nproc = sysinfo.dwNumberOfProcessors;
748 apic_id = malloc(sizeof(int)*cpuid->nproc);
749 /* Get previous affinity mask */
750 save_affinity = SetThreadAffinityMask(GetCurrentThread(), 1);
751 for (i = 0; i < cpuid->nproc; i++)
753 SetThreadAffinityMask(GetCurrentThread(), (((DWORD_PTR)1)<<i));
754 Sleep(0);
755 execute_x86cpuid(0xB, 0, &eax, &ebx, &ecx, &edx);
756 apic_id[i] = edx;
758 SetThreadAffinityMask(GetCurrentThread(), save_affinity);
759 #define CPUID_HAVE_APIC
760 #endif
761 #ifdef CPUID_HAVE_APIC
762 execute_x86cpuid(0xB, 0, &eax, &ebx, &ecx, &edx);
763 hwthread_bits = eax & 0x1F;
764 execute_x86cpuid(0xB, 1, &eax, &ebx, &ecx, &edx);
765 core_bits = (eax & 0x1F) - hwthread_bits;
766 ret = cpuid_x86_decode_apic_id(cpuid, apic_id, core_bits,
767 hwthread_bits);
768 cpuid->have_cpu_topology = (ret == 0);
769 #endif
771 return 0;
773 #endif /* GMX_CPUID_X86 */
777 static void
778 chomp_substring_before_colon(const char *in, char *s, int maxlength)
780 char *p;
781 strncpy(s, in, maxlength);
782 p = strchr(s, ':');
783 if (p != NULL)
785 *p = '\0';
786 while (isspace(*(--p)) && (p >= s))
788 *p = '\0';
791 else
793 *s = '\0';
797 static void
798 chomp_substring_after_colon(const char *in, char *s, int maxlength)
800 char *p;
801 if ( (p = strchr(in, ':')) != NULL)
803 p++;
804 while (isspace(*p))
806 p++;
808 strncpy(s, p, maxlength);
809 p = s+strlen(s);
810 while (isspace(*(--p)) && (p >= s))
812 *p = '\0';
815 else
817 *s = '\0';
821 static int
822 cpuid_check_arm(gmx_cpuid_t cpuid)
824 #if defined(__linux__) || defined(__linux)
825 FILE *fp;
826 char buffer[GMX_CPUID_STRLEN], buffer2[GMX_CPUID_STRLEN], buffer3[GMX_CPUID_STRLEN];
828 if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
830 while ( (fgets(buffer, sizeof(buffer), fp) != NULL))
832 chomp_substring_before_colon(buffer, buffer2, GMX_CPUID_STRLEN);
833 chomp_substring_after_colon(buffer, buffer3, GMX_CPUID_STRLEN);
835 if (!strcmp(buffer2, "Processor"))
837 strncpy(cpuid->brand, buffer3, GMX_CPUID_STRLEN);
839 else if (!strcmp(buffer2, "CPU architecture"))
841 cpuid->family = strtol(buffer3, NULL, 10);
842 if (!strcmp(buffer3, "AArch64"))
844 cpuid->family = 8;
847 else if (!strcmp(buffer2, "CPU part"))
849 cpuid->model = strtol(buffer3, NULL, 16);
851 else if (!strcmp(buffer2, "CPU revision"))
853 cpuid->stepping = strtol(buffer3, NULL, 10);
855 else if (!strcmp(buffer2, "Features") && strstr(buffer3, "neon"))
857 cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON] = 1;
859 else if (!strcmp(buffer2, "Features") && strstr(buffer3, "asimd"))
861 cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON_ASIMD] = 1;
865 fclose(fp);
866 #else
867 # ifdef __aarch64__
868 /* Strange 64-bit non-linux platform. However, since NEON ASIMD is present on all
869 * implementations of AArch64 this far, we assume it is present for now.
871 cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON_ASIMD] = 1;
872 # else
873 /* Strange 32-bit non-linux platform. We cannot assume that neon is present. */
874 cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON] = 0;
875 # endif
876 #endif
877 return 0;
881 static int
882 cpuid_check_ibm(gmx_cpuid_t cpuid)
884 #if defined(__linux__) || defined(__linux)
885 FILE *fp;
886 char buffer[GMX_CPUID_STRLEN], before_colon[GMX_CPUID_STRLEN], after_colon[GMX_CPUID_STRLEN];
888 if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
890 while ( (fgets(buffer, sizeof(buffer), fp) != NULL))
892 chomp_substring_before_colon(buffer, before_colon, GMX_CPUID_STRLEN);
893 chomp_substring_after_colon(buffer, after_colon, GMX_CPUID_STRLEN);
895 if (!strcmp(before_colon, "cpu") || !strcmp(before_colon, "Processor"))
897 strncpy(cpuid->brand, after_colon, GMX_CPUID_STRLEN);
899 if (!strcmp(before_colon, "model name") ||
900 !strcmp(before_colon, "model") ||
901 !strcmp(before_colon, "Processor") ||
902 !strcmp(before_colon, "cpu"))
904 if (strstr(after_colon, "altivec"))
906 cpuid->feature[GMX_CPUID_FEATURE_IBM_VMX] = 1;
908 if (!strstr(after_colon, "POWER6") && !strstr(after_colon, "Power6") &&
909 !strstr(after_colon, "power6"))
911 cpuid->feature[GMX_CPUID_FEATURE_IBM_VSX] = 1;
917 fclose(fp);
919 if (strstr(cpuid->brand, "A2"))
921 /* BlueGene/Q */
922 cpuid->feature[GMX_CPUID_FEATURE_IBM_QPX] = 1;
924 #else
925 strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_STRLEN);
926 cpuid->feature[GMX_CPUID_FEATURE_IBM_QPX] = 0;
927 cpuid->feature[GMX_CPUID_FEATURE_IBM_VMX] = 0;
928 cpuid->feature[GMX_CPUID_FEATURE_IBM_VSX] = 0;
929 #endif
930 return 0;
934 /* Try to find the vendor of the current CPU, so we know what specific
935 * detection routine to call.
937 static enum gmx_cpuid_vendor
938 cpuid_check_vendor(void)
940 enum gmx_cpuid_vendor i, vendor;
941 /* Register data used on x86 */
942 unsigned int eax, ebx, ecx, edx;
943 char vendorstring[13];
944 FILE * fp;
945 char buffer[GMX_CPUID_STRLEN];
946 char before_colon[GMX_CPUID_STRLEN];
947 char after_colon[GMX_CPUID_STRLEN];
949 /* Set default first */
950 vendor = GMX_CPUID_VENDOR_UNKNOWN;
952 #ifdef GMX_CPUID_X86
953 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
955 memcpy(vendorstring, &ebx, 4);
956 memcpy(vendorstring+4, &edx, 4);
957 memcpy(vendorstring+8, &ecx, 4);
959 vendorstring[12] = '\0';
961 for (i = GMX_CPUID_VENDOR_UNKNOWN; i < GMX_CPUID_NVENDORS; i++)
963 if (!strncmp(vendorstring, gmx_cpuid_vendor_string[i], 12))
965 vendor = i;
968 #elif defined(__linux__) || defined(__linux)
969 /* General Linux. Try to get CPU vendor from /proc/cpuinfo */
970 if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
972 while ( (vendor == GMX_CPUID_VENDOR_UNKNOWN) && (fgets(buffer, sizeof(buffer), fp) != NULL))
974 chomp_substring_before_colon(buffer, before_colon, sizeof(before_colon));
975 /* Intel/AMD use "vendor_id", IBM "vendor", "model", or "cpu". Fujitsu "manufacture".
976 * On ARM there does not seem to be a vendor, but ARM or AArch64 is listed in the Processor string.
977 * Add others if you have them!
979 if (!strcmp(before_colon, "vendor_id")
980 || !strcmp(before_colon, "vendor")
981 || !strcmp(before_colon, "manufacture")
982 || !strcmp(before_colon, "model")
983 || !strcmp(before_colon, "Processor")
984 || !strcmp(before_colon, "cpu"))
986 chomp_substring_after_colon(buffer, after_colon, sizeof(after_colon));
987 for (i = GMX_CPUID_VENDOR_UNKNOWN; i < GMX_CPUID_NVENDORS; i++)
989 /* Be liberal and accept if we find the vendor
990 * string (or alternative string) anywhere. Using
991 * strcasestr() would be non-portable. */
992 if (strstr(after_colon, gmx_cpuid_vendor_string[i])
993 || strstr(after_colon, gmx_cpuid_vendor_string_alternative[i]))
995 vendor = i;
998 /* If we did not find vendor yet, check if it is IBM:
999 * On some Power/PowerPC systems it only says power, not IBM.
1001 if (vendor == GMX_CPUID_VENDOR_UNKNOWN &&
1002 ((strstr(after_colon, "POWER") || strstr(after_colon, "Power") ||
1003 strstr(after_colon, "power"))))
1005 vendor = GMX_CPUID_VENDOR_IBM;
1010 fclose(fp);
1011 #elif defined(__arm__) || defined (__arm) || defined(__aarch64__)
1012 /* If we are using ARM on something that is not linux we have to trust the compiler,
1013 * and we cannot get the extra info that might be present in /proc/cpuinfo.
1015 vendor = GMX_CPUID_VENDOR_ARM;
1016 #endif
1017 return vendor;
1023 gmx_cpuid_topology(gmx_cpuid_t cpuid,
1024 int * nprocessors,
1025 int * npackages,
1026 int * ncores_per_package,
1027 int * nhwthreads_per_core,
1028 const int ** package_id,
1029 const int ** core_id,
1030 const int ** hwthread_id,
1031 const int ** locality_order)
1033 int rc;
1035 if (cpuid->have_cpu_topology)
1037 *nprocessors = cpuid->nproc;
1038 *npackages = cpuid->npackages;
1039 *ncores_per_package = cpuid->ncores_per_package;
1040 *nhwthreads_per_core = cpuid->nhwthreads_per_core;
1041 *package_id = cpuid->package_id;
1042 *core_id = cpuid->core_id;
1043 *hwthread_id = cpuid->hwthread_id;
1044 *locality_order = cpuid->locality_order;
1045 rc = 0;
1047 else
1049 rc = -1;
1051 return rc;
1055 enum gmx_cpuid_x86_smt
1056 gmx_cpuid_x86_smt(gmx_cpuid_t cpuid)
1058 enum gmx_cpuid_x86_smt rc;
1060 if (cpuid->have_cpu_topology)
1062 rc = (cpuid->nhwthreads_per_core > 1) ? GMX_CPUID_X86_SMT_ENABLED : GMX_CPUID_X86_SMT_DISABLED;
1064 else if (cpuid->vendor == GMX_CPUID_VENDOR_AMD || gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_HTT) == 0)
1066 rc = GMX_CPUID_X86_SMT_DISABLED;
1068 else
1070 rc = GMX_CPUID_X86_SMT_CANNOTDETECT;
1072 return rc;
1077 gmx_cpuid_init (gmx_cpuid_t * pcpuid)
1079 gmx_cpuid_t cpuid;
1080 int i;
1081 FILE * fp;
1082 char buffer[GMX_CPUID_STRLEN], buffer2[GMX_CPUID_STRLEN];
1083 int found_brand;
1085 cpuid = malloc(sizeof(*cpuid));
1087 *pcpuid = cpuid;
1089 for (i = 0; i < GMX_CPUID_NFEATURES; i++)
1091 cpuid->feature[i] = 0;
1094 cpuid->have_cpu_topology = 0;
1095 cpuid->nproc = 0;
1096 cpuid->npackages = 0;
1097 cpuid->ncores_per_package = 0;
1098 cpuid->nhwthreads_per_core = 0;
1099 cpuid->package_id = NULL;
1100 cpuid->core_id = NULL;
1101 cpuid->hwthread_id = NULL;
1102 cpuid->locality_order = NULL;
1104 cpuid->vendor = cpuid_check_vendor();
1106 switch (cpuid->vendor)
1108 #ifdef GMX_CPUID_X86
1109 case GMX_CPUID_VENDOR_INTEL:
1110 cpuid_check_intel_x86(cpuid);
1111 break;
1112 case GMX_CPUID_VENDOR_AMD:
1113 cpuid_check_amd_x86(cpuid);
1114 break;
1115 #endif
1116 case GMX_CPUID_VENDOR_ARM:
1117 cpuid_check_arm(cpuid);
1118 break;
1119 case GMX_CPUID_VENDOR_IBM:
1120 cpuid_check_ibm(cpuid);
1121 break;
1122 default:
1123 /* Default value */
1124 strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_STRLEN);
1125 #if defined(__linux__) || defined(__linux)
1126 /* General Linux. Try to get CPU type from /proc/cpuinfo */
1127 if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
1129 found_brand = 0;
1130 while ( (found_brand == 0) && (fgets(buffer, sizeof(buffer), fp) != NULL))
1132 chomp_substring_before_colon(buffer, buffer2, sizeof(buffer2));
1133 /* Intel uses "model name", Fujitsu and IBM "cpu". */
1134 if (!strcmp(buffer2, "model name") || !strcmp(buffer2, "cpu"))
1136 chomp_substring_after_colon(buffer, cpuid->brand, GMX_CPUID_STRLEN);
1137 found_brand = 1;
1141 fclose(fp);
1142 #endif
1143 cpuid->family = 0;
1144 cpuid->model = 0;
1145 cpuid->stepping = 0;
1147 for (i = 0; i < GMX_CPUID_NFEATURES; i++)
1149 cpuid->feature[i] = 0;
1151 cpuid->feature[GMX_CPUID_FEATURE_CANNOTDETECT] = 1;
1152 break;
1154 return 0;
1159 void
1160 gmx_cpuid_done (gmx_cpuid_t cpuid)
1162 free(cpuid);
1167 gmx_cpuid_formatstring (gmx_cpuid_t cpuid,
1168 char * str,
1169 int n)
1171 int c;
1172 int i;
1173 enum gmx_cpuid_feature feature;
1175 #ifdef _MSC_VER
1176 _snprintf(str, n,
1177 "Vendor: %s\n"
1178 "Brand: %s\n"
1179 "Family: %2d Model: %2d Stepping: %2d\n"
1180 "Features:",
1181 gmx_cpuid_vendor_string[gmx_cpuid_vendor(cpuid)],
1182 gmx_cpuid_brand(cpuid),
1183 gmx_cpuid_family(cpuid), gmx_cpuid_model(cpuid), gmx_cpuid_stepping(cpuid));
1184 #else
1185 snprintf(str, n,
1186 "Vendor: %s\n"
1187 "Brand: %s\n"
1188 "Family: %2d Model: %2d Stepping: %2d\n"
1189 "Features:",
1190 gmx_cpuid_vendor_string[gmx_cpuid_vendor(cpuid)],
1191 gmx_cpuid_brand(cpuid),
1192 gmx_cpuid_family(cpuid), gmx_cpuid_model(cpuid), gmx_cpuid_stepping(cpuid));
1193 #endif
1195 str[n-1] = '\0';
1196 c = strlen(str);
1197 n -= c;
1198 str += c;
1200 for (feature = GMX_CPUID_FEATURE_CANNOTDETECT; feature < GMX_CPUID_NFEATURES; feature++)
1202 if (gmx_cpuid_feature(cpuid, feature) == 1)
1204 #ifdef _MSC_VER
1205 _snprintf(str, n, " %s", gmx_cpuid_feature_string[feature]);
1206 #else
1207 snprintf(str, n, " %s", gmx_cpuid_feature_string[feature]);
1208 #endif
1209 str[n-1] = '\0';
1210 c = strlen(str);
1211 n -= c;
1212 str += c;
1215 #ifdef _MSC_VER
1216 _snprintf(str, n, "\n");
1217 #else
1218 snprintf(str, n, "\n");
1219 #endif
1220 str[n-1] = '\0';
1222 return 0;
1227 enum gmx_cpuid_simd
1228 gmx_cpuid_simd_suggest (gmx_cpuid_t cpuid)
1230 enum gmx_cpuid_simd tmpsimd;
1232 tmpsimd = GMX_CPUID_SIMD_NONE;
1234 if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_INTEL)
1236 /* TODO: Add check for AVX-512F & AVX-512ER here as soon as we
1237 * have implemented verlet kernels for them. Until then,
1238 * we should pick AVX2 instead for the automatic detection.
1240 if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX2))
1242 tmpsimd = GMX_CPUID_SIMD_X86_AVX2_256;
1244 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX))
1246 tmpsimd = GMX_CPUID_SIMD_X86_AVX_256;
1248 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE4_1))
1250 tmpsimd = GMX_CPUID_SIMD_X86_SSE4_1;
1252 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE2))
1254 tmpsimd = GMX_CPUID_SIMD_X86_SSE2;
1257 else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_AMD)
1259 if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX))
1261 tmpsimd = GMX_CPUID_SIMD_X86_AVX_128_FMA;
1263 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE4_1))
1265 tmpsimd = GMX_CPUID_SIMD_X86_SSE4_1;
1267 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE2))
1269 tmpsimd = GMX_CPUID_SIMD_X86_SSE2;
1272 else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_FUJITSU)
1274 if (strstr(gmx_cpuid_brand(cpuid), "SPARC64"))
1276 tmpsimd = GMX_CPUID_SIMD_SPARC64_HPC_ACE;
1279 else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_IBM)
1281 if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_IBM_QPX))
1283 tmpsimd = GMX_CPUID_SIMD_IBM_QPX;
1285 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_IBM_VSX))
1287 /* VSX is better than VMX, so we check it first */
1288 tmpsimd = GMX_CPUID_SIMD_IBM_VSX;
1290 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_IBM_VMX))
1292 tmpsimd = GMX_CPUID_SIMD_IBM_VMX;
1295 else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_ARM)
1297 if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_ARM_NEON_ASIMD))
1299 tmpsimd = GMX_CPUID_SIMD_ARM_NEON_ASIMD;
1301 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_ARM_NEON))
1303 tmpsimd = GMX_CPUID_SIMD_ARM_NEON;
1306 return tmpsimd;
1312 gmx_cpuid_simd_check(gmx_cpuid_t cpuid,
1313 FILE * log,
1314 int print_to_stderr)
1316 int rc;
1317 char str[1024];
1318 enum gmx_cpuid_simd simd;
1320 simd = gmx_cpuid_simd_suggest(cpuid);
1322 rc = (simd != compiled_simd);
1324 gmx_cpuid_formatstring(cpuid, str, 1023);
1325 str[1023] = '\0';
1327 if (log != NULL)
1329 fprintf(log,
1330 "\nDetecting CPU SIMD instructions.\nPresent hardware specification:\n"
1331 "%s"
1332 "SIMD instructions most likely to fit this hardware: %s\n"
1333 "SIMD instructions selected at GROMACS compile time: %s\n\n",
1334 str,
1335 gmx_cpuid_simd_string[simd],
1336 gmx_cpuid_simd_string[compiled_simd]);
1339 if (rc != 0)
1341 if (log != NULL)
1343 fprintf(log, "\nBinary not matching hardware - you might be losing performance.\n"
1344 "SIMD instructions most likely to fit this hardware: %s\n"
1345 "SIMD instructions selected at GROMACS compile time: %s\n\n",
1346 gmx_cpuid_simd_string[simd],
1347 gmx_cpuid_simd_string[compiled_simd]);
1349 if (print_to_stderr)
1351 fprintf(stderr, "Compiled SIMD instructions: %s (Gromacs could use %s on this machine, which is better)\n",
1352 gmx_cpuid_simd_string[compiled_simd],
1353 gmx_cpuid_simd_string[simd]);
1356 return rc;
1360 #ifdef GMX_CPUID_STANDALONE
1361 /* Stand-alone program to enable queries of CPU features from Cmake.
1362 * Note that you need to check inline ASM capabilities before compiling and set
1363 * -DGMX_X86_GCC_INLINE_ASM for the cpuid instruction to work...
1366 main(int argc, char **argv)
1368 gmx_cpuid_t cpuid;
1369 enum gmx_cpuid_simd simd;
1370 int i, cnt;
1372 if (argc < 2)
1374 fprintf(stdout,
1375 "Usage:\n\n%s [flags]\n\n"
1376 "Available flags:\n"
1377 "-vendor Print CPU vendor.\n"
1378 "-brand Print CPU brand string.\n"
1379 "-family Print CPU family version.\n"
1380 "-model Print CPU model version.\n"
1381 "-stepping Print CPU stepping version.\n"
1382 "-features Print CPU feature flags.\n"
1383 "-simd Print suggested GROMACS SIMD instructions.\n",
1384 argv[0]);
1385 exit(0);
1388 gmx_cpuid_init(&cpuid);
1390 if (!strncmp(argv[1], "-vendor", 3))
1392 printf("%s\n", gmx_cpuid_vendor_string[cpuid->vendor]);
1394 else if (!strncmp(argv[1], "-brand", 3))
1396 printf("%s\n", cpuid->brand);
1398 else if (!strncmp(argv[1], "-family", 3))
1400 printf("%d\n", cpuid->family);
1402 else if (!strncmp(argv[1], "-model", 3))
1404 printf("%d\n", cpuid->model);
1406 else if (!strncmp(argv[1], "-stepping", 3))
1408 printf("%d\n", cpuid->stepping);
1410 else if (!strncmp(argv[1], "-features", 3))
1412 cnt = 0;
1413 for (i = 0; i < GMX_CPUID_NFEATURES; i++)
1415 if (cpuid->feature[i] == 1)
1417 if (cnt++ > 0)
1419 printf(" ");
1421 printf("%s", gmx_cpuid_feature_string[i]);
1424 printf("\n");
1426 else if (!strncmp(argv[1], "-simd", 3))
1428 simd = gmx_cpuid_simd_suggest(cpuid);
1429 fprintf(stdout, "%s\n", gmx_cpuid_simd_string[simd]);
1432 gmx_cpuid_done(cpuid);
1435 return 0;
1438 #endif
1440 /*! \endcond */