Fix SIMD detection on new AMD AVX CPUs w/o fma
[gromacs/AngularHB.git] / src / gromacs / gmxlib / gmx_cpuid.c
blob48e5ddd2531240508b058ecca66e75349a0caec1
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 #include "gmxpre.h"
37 /*! \cond */
38 #include "gromacs/legacyheaders/gmx_cpuid.h"
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
44 #include <ctype.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
49 #ifdef GMX_NATIVE_WINDOWS
50 /* MSVC definition for __cpuid() */
51 #ifdef _MSC_VER
52 #include <intrin.h>
53 #endif
54 /* sysinfo functions */
55 #include <windows.h>
56 #endif
57 #ifdef HAVE_SCHED_H
58 #include <sched.h>
59 #endif
60 #ifdef HAVE_UNISTD_H
61 /* sysconf() definition */
62 #include <unistd.h>
63 #endif
66 /* For convenience, and to enable configure-time invocation, we keep all architectures
67 * in a single file, but to avoid repeated ifdefs we set the overall architecture here.
69 #ifdef GMX_TARGET_X86
70 /* OK, it is x86, but can we execute cpuid? */
71 #if defined(GMX_X86_GCC_INLINE_ASM) || ( defined(_MSC_VER) && ( (_MSC_VER > 1500) || (_MSC_VER == 1500 & _MSC_FULL_VER >= 150030729)))
72 # define GMX_CPUID_X86
73 #endif
74 #endif
76 /* Global constant character strings corresponding to our enumerated types */
77 const char *
78 gmx_cpuid_vendor_string[GMX_CPUID_NVENDORS] =
80 "CannotDetect",
81 "Unknown",
82 "GenuineIntel",
83 "AuthenticAMD",
84 "Fujitsu",
85 "IBM", /* Used on Power and BlueGene/Q */
86 "ARM"
89 const char *
90 gmx_cpuid_vendor_string_alternative[GMX_CPUID_NVENDORS] =
92 "CannotDetect",
93 "Unknown",
94 "GenuineIntel",
95 "AuthenticAMD",
96 "Fujitsu",
97 "ibm", /* Used on Power and BlueGene/Q */
98 "AArch64"
101 const char *
102 gmx_cpuid_feature_string[GMX_CPUID_NFEATURES] =
104 "CannotDetect",
105 "aes",
106 "apic",
107 "avx",
108 "avx2",
109 "avx512f",
110 "avx512pf",
111 "avx512er",
112 "avx512cd",
113 "clfsh",
114 "cmov",
115 "cx8",
116 "cx16",
117 "f16c",
118 "fma",
119 "fma4",
120 "htt",
121 "lahf_lm",
122 "misalignsse",
123 "mmx",
124 "msr",
125 "nonstop_tsc",
126 "pcid",
127 "pclmuldq",
128 "pdcm",
129 "pdpe1gb",
130 "popcnt",
131 "pse",
132 "rdrnd",
133 "rdtscp",
134 "sse2",
135 "sse3",
136 "sse4a",
137 "sse4.1",
138 "sse4.2",
139 "ssse3",
140 "tdt",
141 "x2apic",
142 "xop",
143 "arm_neon",
144 "arm_neon_asimd",
145 "QPX",
146 "VMX",
147 "VSX"
150 const char *
151 gmx_cpuid_simd_string[GMX_CPUID_NSIMD] =
153 "CannotDetect",
154 "None",
155 "Reference",
156 "SSE2",
157 "SSE4.1",
158 "AVX_128_FMA",
159 "AVX_256",
160 "AVX2_256",
161 "AVX_512F",
162 "AVX_512ER",
163 "Sparc64 HPC-ACE",
164 "IBM_QPX",
165 "IBM_VMX",
166 "IBM_VSX",
167 "ARM_NEON",
168 "ARM_NEON_ASIMD"
171 /* Max length of brand string */
172 #define GMX_CPUID_STRLEN 256
175 /* Contents of the abstract datatype */
176 struct gmx_cpuid
178 enum gmx_cpuid_vendor vendor;
179 char brand[GMX_CPUID_STRLEN];
180 int family;
181 int model;
182 int stepping;
183 /* Not using gmx_bool here, since this file must be possible to compile without simple.h */
184 char feature[GMX_CPUID_NFEATURES];
186 /* Basic CPU topology information. For x86 this is a bit complicated since the topology differs between
187 * operating systems and sometimes even settings. For most other architectures you can likely just check
188 * the documentation and then write static information to these arrays rather than detecting on-the-fly.
190 int have_cpu_topology;
191 int nproc; /* total number of logical processors from OS */
192 int npackages;
193 int ncores_per_package;
194 int nhwthreads_per_core;
195 int * package_id;
196 int * core_id; /* Local core id in each package */
197 int * hwthread_id; /* Local hwthread id in each core */
198 int * locality_order; /* Processor indices sorted in locality order */
202 /* Simple routines to access the data structure. The initialization routine is
203 * further down since that needs to call other static routines in this file.
205 enum gmx_cpuid_vendor
206 gmx_cpuid_vendor (gmx_cpuid_t cpuid)
208 return cpuid->vendor;
212 const char *
213 gmx_cpuid_brand (gmx_cpuid_t cpuid)
215 return cpuid->brand;
219 gmx_cpuid_family (gmx_cpuid_t cpuid)
221 return cpuid->family;
225 gmx_cpuid_model (gmx_cpuid_t cpuid)
227 return cpuid->model;
231 gmx_cpuid_stepping (gmx_cpuid_t cpuid)
233 return cpuid->stepping;
237 gmx_cpuid_feature (gmx_cpuid_t cpuid,
238 enum gmx_cpuid_feature feature)
240 return (cpuid->feature[feature] != 0);
245 gmx_cpuid_is_intel_nehalem (const gmx_cpuid_t cpuid)
247 return (cpuid->vendor == GMX_CPUID_VENDOR_INTEL &&
248 cpuid->family == 6 &&
249 (cpuid->model == 0x2E ||
250 cpuid->model == 0x1A ||
251 cpuid->model == 0x1E ||
252 cpuid->model == 0x2F ||
253 cpuid->model == 0x2C ||
254 cpuid->model == 0x25));
258 /* What type of SIMD was compiled in, if any? */
259 #ifdef GMX_SIMD_X86_AVX_512ER
260 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_512ER;
261 #elif defined GMX_SIMD_X86_AVX_512F
262 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_512F;
263 #elif defined GMX_SIMD_X86_AVX2_256
264 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX2_256;
265 #elif defined GMX_SIMD_X86_AVX_256
266 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_256;
267 #elif defined GMX_SIMD_X86_AVX_128_FMA
268 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_AVX_128_FMA;
269 #elif defined GMX_SIMD_X86_SSE4_1
270 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_SSE4_1;
271 #elif defined GMX_SIMD_X86_SSE2
272 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_X86_SSE2;
273 #elif defined GMX_SIMD_ARM_NEON
274 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_ARM_NEON;
275 #elif defined GMX_SIMD_ARM_NEON_ASIMD
276 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_ARM_NEON_ASIMD;
277 #elif defined GMX_SIMD_SPARC64_HPC_ACE
278 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_SPARC64_HPC_ACE;
279 #elif defined GMX_SIMD_IBM_QPX
280 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_IBM_QPX;
281 #elif defined GMX_SIMD_IBM_VMX
282 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_IBM_VMX;
283 #elif defined GMX_SIMD_IBM_VSX
284 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_IBM_VSX;
285 #elif defined GMX_SIMD_REFERENCE
286 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_REFERENCE;
287 #else
288 static const enum gmx_cpuid_simd compiled_simd = GMX_CPUID_SIMD_NONE;
289 #endif
292 enum gmx_cpuid_simd
293 gmx_compiled_simd()
295 return compiled_simd;
299 #ifdef GMX_CPUID_X86
301 /* Execute CPUID on x86 class CPUs. level sets function to exec, and the
302 * contents of register output is returned. See Intel/AMD docs for details.
304 * This version supports extended information where we can also have an input
305 * value in the ecx register. This is ignored for most levels, but some of them
306 * (e.g. level 0xB on Intel) use it.
308 static int
309 execute_x86cpuid(unsigned int level,
310 unsigned int ecxval,
311 unsigned int * eax,
312 unsigned int * ebx,
313 unsigned int * ecx,
314 unsigned int * edx)
316 int rc = 0;
318 /* Currently CPUID is only supported (1) if we can use an instruction on MSVC, or (2)
319 * if the compiler handles GNU-style inline assembly.
322 #if (defined _MSC_VER)
323 int CPUInfo[4];
325 #if (_MSC_VER > 1500) || (_MSC_VER == 1500 & _MSC_FULL_VER >= 150030729)
326 /* MSVC 9.0 SP1 or later */
327 __cpuidex(CPUInfo, level, ecxval);
328 rc = 0;
329 #else
330 __cpuid(CPUInfo, level);
331 /* Set an error code if the user wanted a non-zero ecxval, since we did not have cpuidex */
332 rc = (ecxval > 0) ? -1 : 0;
333 #endif
334 *eax = CPUInfo[0];
335 *ebx = CPUInfo[1];
336 *ecx = CPUInfo[2];
337 *edx = CPUInfo[3];
339 #elif (defined GMX_X86_GCC_INLINE_ASM)
340 /* for now this means GMX_X86_GCC_INLINE_ASM should be defined,
341 * but there might be more options added in the future.
343 *eax = level;
344 *ecx = ecxval;
345 *ebx = 0;
346 *edx = 0;
347 #if defined(__i386__) && defined(__PIC__)
348 /* Avoid clobbering the global offset table in 32-bit pic code (ebx register) */
349 __asm__ __volatile__ ("xchgl %%ebx, %1 \n\t"
350 "cpuid \n\t"
351 "xchgl %%ebx, %1 \n\t"
352 : "+a" (*eax), "+r" (*ebx), "+c" (*ecx), "+d" (*edx));
353 #else
354 /* i386 without PIC, or x86-64. Things are easy and we can clobber any reg we want :-) */
355 __asm__ __volatile__ ("cpuid \n\t"
356 : "+a" (*eax), "+b" (*ebx), "+c" (*ecx), "+d" (*edx));
357 #endif
358 rc = 0;
359 #else
360 /* Death and horror!
361 * Apparently this is an x86 platform where we don't know how to call cpuid.
363 * This is REALLY bad, since we will lose all Gromacs SIMD support.
365 *eax = 0;
366 *ebx = 0;
367 *ecx = 0;
368 *edx = 0;
370 rc = -1;
371 #endif
372 return rc;
376 /* Identify CPU features common to Intel & AMD - mainly brand string,
377 * version and some features. Vendor has already been detected outside this.
379 static int
380 cpuid_check_common_x86(gmx_cpuid_t cpuid)
382 int fn, max_stdfn, max_extfn;
383 unsigned int eax, ebx, ecx, edx;
384 char str[GMX_CPUID_STRLEN];
385 char * p;
387 /* Find largest standard/extended function input value */
388 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
389 max_stdfn = eax;
390 execute_x86cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx);
391 max_extfn = eax;
393 p = str;
394 if (max_extfn >= 0x80000005)
396 /* Get CPU brand string */
397 for (fn = 0x80000002; fn < 0x80000005; fn++)
399 execute_x86cpuid(fn, 0, &eax, &ebx, &ecx, &edx);
400 memcpy(p, &eax, 4);
401 memcpy(p+4, &ebx, 4);
402 memcpy(p+8, &ecx, 4);
403 memcpy(p+12, &edx, 4);
404 p += 16;
406 *p = '\0';
408 /* Remove empty initial space */
409 p = str;
410 while (isspace(*(p)))
412 p++;
414 strncpy(cpuid->brand, p, GMX_CPUID_STRLEN);
416 else
418 strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_STRLEN);
421 /* Find basic CPU properties */
422 if (max_stdfn >= 1)
424 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
426 cpuid->family = ((eax & 0x0FF00000) >> 20) + ((eax & 0x00000F00) >> 8);
427 /* Note that extended model should be shifted left 4, so only shift right 12 iso 16. */
428 cpuid->model = ((eax & 0x000F0000) >> 12) + ((eax & 0x000000F0) >> 4);
429 cpuid->stepping = (eax & 0x0000000F);
431 /* Feature flags common to AMD and intel */
432 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE3] = (ecx & (1 << 0)) != 0;
433 cpuid->feature[GMX_CPUID_FEATURE_X86_PCLMULDQ] = (ecx & (1 << 1)) != 0;
434 cpuid->feature[GMX_CPUID_FEATURE_X86_SSSE3] = (ecx & (1 << 9)) != 0;
435 cpuid->feature[GMX_CPUID_FEATURE_X86_FMA] = (ecx & (1 << 12)) != 0;
436 cpuid->feature[GMX_CPUID_FEATURE_X86_CX16] = (ecx & (1 << 13)) != 0;
437 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4_1] = (ecx & (1 << 19)) != 0;
438 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4_2] = (ecx & (1 << 20)) != 0;
439 cpuid->feature[GMX_CPUID_FEATURE_X86_POPCNT] = (ecx & (1 << 23)) != 0;
440 cpuid->feature[GMX_CPUID_FEATURE_X86_AES] = (ecx & (1 << 25)) != 0;
441 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX] = (ecx & (1 << 28)) != 0;
442 cpuid->feature[GMX_CPUID_FEATURE_X86_F16C] = (ecx & (1 << 29)) != 0;
443 cpuid->feature[GMX_CPUID_FEATURE_X86_RDRND] = (ecx & (1 << 30)) != 0;
445 cpuid->feature[GMX_CPUID_FEATURE_X86_PSE] = (edx & (1 << 3)) != 0;
446 cpuid->feature[GMX_CPUID_FEATURE_X86_MSR] = (edx & (1 << 5)) != 0;
447 cpuid->feature[GMX_CPUID_FEATURE_X86_CX8] = (edx & (1 << 8)) != 0;
448 cpuid->feature[GMX_CPUID_FEATURE_X86_APIC] = (edx & (1 << 9)) != 0;
449 cpuid->feature[GMX_CPUID_FEATURE_X86_CMOV] = (edx & (1 << 15)) != 0;
450 cpuid->feature[GMX_CPUID_FEATURE_X86_CLFSH] = (edx & (1 << 19)) != 0;
451 cpuid->feature[GMX_CPUID_FEATURE_X86_MMX] = (edx & (1 << 23)) != 0;
452 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE2] = (edx & (1 << 26)) != 0;
453 cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] = (edx & (1 << 28)) != 0;
455 else
457 cpuid->family = -1;
458 cpuid->model = -1;
459 cpuid->stepping = -1;
462 if (max_extfn >= 0x80000001)
464 execute_x86cpuid(0x80000001, 0, &eax, &ebx, &ecx, &edx);
465 cpuid->feature[GMX_CPUID_FEATURE_X86_LAHF_LM] = (ecx & (1 << 0)) != 0;
466 cpuid->feature[GMX_CPUID_FEATURE_X86_PDPE1GB] = (edx & (1 << 26)) != 0;
467 cpuid->feature[GMX_CPUID_FEATURE_X86_RDTSCP] = (edx & (1 << 27)) != 0;
470 if (max_extfn >= 0x80000007)
472 execute_x86cpuid(0x80000007, 0, &eax, &ebx, &ecx, &edx);
473 cpuid->feature[GMX_CPUID_FEATURE_X86_NONSTOP_TSC] = (edx & (1 << 8)) != 0;
475 return 0;
478 /* This routine returns the number of unique different elements found in the array,
479 * and renumbers these starting from 0. For example, the array {0,1,2,8,9,10,8,9,10,0,1,2}
480 * will be rewritten to {0,1,2,3,4,5,3,4,5,0,1,2}, and it returns 6 for the
481 * number of unique elements.
483 static int
484 cpuid_renumber_elements(int *data, int n)
486 int *unique;
487 int i, j, nunique, found;
489 unique = malloc(sizeof(int)*n);
491 nunique = 0;
492 for (i = 0; i < n; i++)
494 for (j = 0, found = 0; j < nunique && !found; j++)
496 found = (data[i] == unique[j]);
498 if (!found)
500 /* Insert in sorted order! */
501 for (j = nunique++; j > 0 && unique[j-1] > data[i]; j--)
503 unique[j] = unique[j-1];
505 unique[j] = data[i];
508 /* renumber */
509 for (i = 0; i < n; i++)
511 for (j = 0; j < nunique; j++)
513 if (data[i] == unique[j])
515 data[i] = j;
519 free(unique);
520 return nunique;
523 /* APIC IDs, or everything you wanted to know about your x86 cores but were afraid to ask...
525 * Raw APIC IDs are unfortunately somewhat dirty. For technical reasons they are assigned
526 * in power-of-2 chunks, and even then there are no guarantees about specific numbers - all
527 * we know is that the part for each thread/core/package is unique, and how many bits are
528 * reserved for that part.
529 * This routine does internal renumbering so we get continuous indices, and also
530 * decodes the actual number of packages,cores-per-package and hwthreads-per-core.
531 * Returns: 0 on success, non-zero on failure.
533 static int
534 cpuid_x86_decode_apic_id(gmx_cpuid_t cpuid, int *apic_id, int core_bits, int hwthread_bits)
536 int i, idx;
537 int hwthread_mask, core_mask_after_shift;
539 cpuid->hwthread_id = malloc(sizeof(int)*cpuid->nproc);
540 cpuid->core_id = malloc(sizeof(int)*cpuid->nproc);
541 cpuid->package_id = malloc(sizeof(int)*cpuid->nproc);
542 cpuid->locality_order = malloc(sizeof(int)*cpuid->nproc);
544 hwthread_mask = (1 << hwthread_bits) - 1;
545 core_mask_after_shift = (1 << core_bits) - 1;
547 for (i = 0; i < cpuid->nproc; i++)
549 cpuid->hwthread_id[i] = apic_id[i] & hwthread_mask;
550 cpuid->core_id[i] = (apic_id[i] >> hwthread_bits) & core_mask_after_shift;
551 cpuid->package_id[i] = apic_id[i] >> (core_bits + hwthread_bits);
554 cpuid->npackages = cpuid_renumber_elements(cpuid->package_id, cpuid->nproc);
555 cpuid->ncores_per_package = cpuid_renumber_elements(cpuid->core_id, cpuid->nproc);
556 cpuid->nhwthreads_per_core = cpuid_renumber_elements(cpuid->hwthread_id, cpuid->nproc);
558 /* now check for consistency */
559 if ( (cpuid->npackages * cpuid->ncores_per_package *
560 cpuid->nhwthreads_per_core) != cpuid->nproc)
562 /* the packages/cores-per-package/hwthreads-per-core counts are
563 inconsistent. */
564 return -1;
567 /* Create a locality order array, i.e. first all resources in package0, which in turn
568 * are sorted so we first have all resources in core0, where threads are sorted in order, etc.
571 for (i = 0; i < cpuid->nproc; i++)
573 idx = (cpuid->package_id[i]*cpuid->ncores_per_package + cpuid->core_id[i])*cpuid->nhwthreads_per_core + cpuid->hwthread_id[i];
574 cpuid->locality_order[idx] = i;
576 return 0;
580 /* Detection of AMD-specific CPU features */
581 static int
582 cpuid_check_amd_x86(gmx_cpuid_t cpuid)
584 int max_stdfn, max_extfn, ret;
585 unsigned int eax, ebx, ecx, edx;
586 int hwthread_bits, core_bits;
587 int * apic_id;
589 cpuid_check_common_x86(cpuid);
591 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
592 max_stdfn = eax;
594 execute_x86cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx);
595 max_extfn = eax;
597 if (max_extfn >= 0x80000001)
599 execute_x86cpuid(0x80000001, 0, &eax, &ebx, &ecx, &edx);
601 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4A] = (ecx & (1 << 6)) != 0;
602 cpuid->feature[GMX_CPUID_FEATURE_X86_MISALIGNSSE] = (ecx & (1 << 7)) != 0;
603 cpuid->feature[GMX_CPUID_FEATURE_X86_XOP] = (ecx & (1 << 11)) != 0;
604 cpuid->feature[GMX_CPUID_FEATURE_X86_FMA4] = (ecx & (1 << 16)) != 0;
607 /* Query APIC information on AMD */
608 if (max_extfn >= 0x80000008)
610 #if (defined HAVE_SCHED_AFFINITY && defined HAVE_SYSCONF && defined __linux__)
611 /* Linux */
612 unsigned int i;
613 cpu_set_t cpuset, save_cpuset;
614 cpuid->nproc = sysconf(_SC_NPROCESSORS_ONLN);
615 apic_id = malloc(sizeof(int)*cpuid->nproc);
616 sched_getaffinity(0, sizeof(cpu_set_t), &save_cpuset);
617 /* Get APIC id from each core */
618 CPU_ZERO(&cpuset);
619 for (i = 0; i < cpuid->nproc; i++)
621 CPU_SET(i, &cpuset);
622 sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
623 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
624 apic_id[i] = ebx >> 24;
625 CPU_CLR(i, &cpuset);
627 /* Reset affinity to the value it had when calling this routine */
628 sched_setaffinity(0, sizeof(cpu_set_t), &save_cpuset);
629 #define CPUID_HAVE_APIC
630 #elif defined GMX_NATIVE_WINDOWS
631 /* Windows */
632 DWORD_PTR i;
633 SYSTEM_INFO sysinfo;
634 unsigned int save_affinity, affinity;
635 GetSystemInfo( &sysinfo );
636 cpuid->nproc = sysinfo.dwNumberOfProcessors;
637 apic_id = malloc(sizeof(int)*cpuid->nproc);
638 /* Get previous affinity mask */
639 save_affinity = SetThreadAffinityMask(GetCurrentThread(), 1);
640 for (i = 0; i < cpuid->nproc; i++)
642 SetThreadAffinityMask(GetCurrentThread(), (((DWORD_PTR)1)<<i));
643 Sleep(0);
644 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
645 apic_id[i] = ebx >> 24;
647 SetThreadAffinityMask(GetCurrentThread(), save_affinity);
648 #define CPUID_HAVE_APIC
649 #endif
650 #ifdef CPUID_HAVE_APIC
651 /* AMD does not support SMT yet - there are no hwthread bits in apic ID */
652 hwthread_bits = 0;
653 /* Get number of core bits in apic ID - try modern extended method first */
654 execute_x86cpuid(0x80000008, 0, &eax, &ebx, &ecx, &edx);
655 core_bits = (ecx >> 12) & 0xf;
656 if (core_bits == 0)
658 /* Legacy method for old single/dual core AMD CPUs */
659 int i = ecx & 0xF;
660 for (core_bits = 0; (i>>core_bits) > 0; core_bits++)
665 ret = cpuid_x86_decode_apic_id(cpuid, apic_id, core_bits,
666 hwthread_bits);
667 cpuid->have_cpu_topology = (ret == 0);
668 #endif
670 return 0;
673 /* Detection of Intel-specific CPU features */
674 static int
675 cpuid_check_intel_x86(gmx_cpuid_t cpuid)
677 unsigned int max_stdfn, max_extfn, ret;
678 unsigned int eax, ebx, ecx, edx;
679 unsigned int max_logical_cores, max_physical_cores;
680 int hwthread_bits, core_bits;
681 int * apic_id;
683 cpuid_check_common_x86(cpuid);
685 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
686 max_stdfn = eax;
688 execute_x86cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx);
689 max_extfn = eax;
691 if (max_stdfn >= 1)
693 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
694 cpuid->feature[GMX_CPUID_FEATURE_X86_PDCM] = (ecx & (1 << 15)) != 0;
695 cpuid->feature[GMX_CPUID_FEATURE_X86_PCID] = (ecx & (1 << 17)) != 0;
696 cpuid->feature[GMX_CPUID_FEATURE_X86_X2APIC] = (ecx & (1 << 21)) != 0;
697 cpuid->feature[GMX_CPUID_FEATURE_X86_TDT] = (ecx & (1 << 24)) != 0;
700 if (max_stdfn >= 7)
702 execute_x86cpuid(0x7, 0, &eax, &ebx, &ecx, &edx);
703 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX2] = (ebx & (1 << 5)) != 0;
704 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512F] = (ebx & (1 << 16)) != 0;
705 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512PF] = (ebx & (1 << 26)) != 0;
706 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512ER] = (ebx & (1 << 27)) != 0;
707 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX_512CD] = (ebx & (1 << 28)) != 0;
710 /* Check whether Hyper-Threading is enabled, not only supported */
711 if (cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] && max_stdfn >= 4)
713 execute_x86cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
714 max_logical_cores = (ebx >> 16) & 0x0FF;
715 execute_x86cpuid(0x4, 0, &eax, &ebx, &ecx, &edx);
716 max_physical_cores = ((eax >> 26) & 0x3F) + 1;
718 /* Clear HTT flag if we only have 1 logical core per physical */
719 if (max_logical_cores/max_physical_cores < 2)
721 cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] = 0;
725 if (max_stdfn >= 0xB)
727 /* Query x2 APIC information from cores */
728 #if (defined HAVE_SCHED_AFFINITY && defined HAVE_SYSCONF && defined __linux__)
729 /* Linux */
730 unsigned int i;
731 cpu_set_t cpuset, save_cpuset;
732 cpuid->nproc = sysconf(_SC_NPROCESSORS_ONLN);
733 apic_id = malloc(sizeof(int)*cpuid->nproc);
734 sched_getaffinity(0, sizeof(cpu_set_t), &save_cpuset);
735 /* Get x2APIC ID from each hardware thread */
736 CPU_ZERO(&cpuset);
737 for (i = 0; i < cpuid->nproc; i++)
739 CPU_SET(i, &cpuset);
740 sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
741 execute_x86cpuid(0xB, 0, &eax, &ebx, &ecx, &edx);
742 apic_id[i] = edx;
743 CPU_CLR(i, &cpuset);
745 /* Reset affinity to the value it had when calling this routine */
746 sched_setaffinity(0, sizeof(cpu_set_t), &save_cpuset);
747 #define CPUID_HAVE_APIC
748 #elif defined GMX_NATIVE_WINDOWS
749 /* Windows */
750 DWORD_PTR i;
751 SYSTEM_INFO sysinfo;
752 unsigned int save_affinity, affinity;
753 GetSystemInfo( &sysinfo );
754 cpuid->nproc = sysinfo.dwNumberOfProcessors;
755 apic_id = malloc(sizeof(int)*cpuid->nproc);
756 /* Get previous affinity mask */
757 save_affinity = SetThreadAffinityMask(GetCurrentThread(), 1);
758 for (i = 0; i < cpuid->nproc; i++)
760 SetThreadAffinityMask(GetCurrentThread(), (((DWORD_PTR)1)<<i));
761 Sleep(0);
762 execute_x86cpuid(0xB, 0, &eax, &ebx, &ecx, &edx);
763 apic_id[i] = edx;
765 SetThreadAffinityMask(GetCurrentThread(), save_affinity);
766 #define CPUID_HAVE_APIC
767 #endif
768 #ifdef CPUID_HAVE_APIC
769 execute_x86cpuid(0xB, 0, &eax, &ebx, &ecx, &edx);
770 hwthread_bits = eax & 0x1F;
771 execute_x86cpuid(0xB, 1, &eax, &ebx, &ecx, &edx);
772 core_bits = (eax & 0x1F) - hwthread_bits;
773 ret = cpuid_x86_decode_apic_id(cpuid, apic_id, core_bits,
774 hwthread_bits);
775 cpuid->have_cpu_topology = (ret == 0);
776 #endif
778 return 0;
780 #endif /* GMX_CPUID_X86 */
784 static void
785 chomp_substring_before_colon(const char *in, char *s, int maxlength)
787 char *p;
788 strncpy(s, in, maxlength);
789 p = strchr(s, ':');
790 if (p != NULL)
792 *p = '\0';
793 while (isspace(*(--p)) && (p >= s))
795 *p = '\0';
798 else
800 *s = '\0';
804 static void
805 chomp_substring_after_colon(const char *in, char *s, int maxlength)
807 char *p;
808 if ( (p = strchr(in, ':')) != NULL)
810 p++;
811 while (isspace(*p))
813 p++;
815 strncpy(s, p, maxlength);
816 p = s+strlen(s);
817 while (isspace(*(--p)) && (p >= s))
819 *p = '\0';
822 else
824 *s = '\0';
828 static int
829 cpuid_check_arm(gmx_cpuid_t cpuid)
831 #if defined(__linux__) || defined(__linux)
832 FILE *fp;
833 char buffer[GMX_CPUID_STRLEN], buffer2[GMX_CPUID_STRLEN], buffer3[GMX_CPUID_STRLEN];
835 if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
837 while ( (fgets(buffer, sizeof(buffer), fp) != NULL))
839 chomp_substring_before_colon(buffer, buffer2, GMX_CPUID_STRLEN);
840 chomp_substring_after_colon(buffer, buffer3, GMX_CPUID_STRLEN);
842 if (!strcmp(buffer2, "Processor"))
844 strncpy(cpuid->brand, buffer3, GMX_CPUID_STRLEN);
846 else if (!strcmp(buffer2, "CPU architecture"))
848 cpuid->family = strtol(buffer3, NULL, 10);
849 if (!strcmp(buffer3, "AArch64"))
851 cpuid->family = 8;
854 else if (!strcmp(buffer2, "CPU part"))
856 cpuid->model = strtol(buffer3, NULL, 16);
858 else if (!strcmp(buffer2, "CPU revision"))
860 cpuid->stepping = strtol(buffer3, NULL, 10);
862 else if (!strcmp(buffer2, "Features") && strstr(buffer3, "neon"))
864 cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON] = 1;
866 else if (!strcmp(buffer2, "Features") && strstr(buffer3, "asimd"))
868 cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON_ASIMD] = 1;
872 fclose(fp);
873 #else
874 # ifdef __aarch64__
875 /* Strange 64-bit non-linux platform. However, since NEON ASIMD is present on all
876 * implementations of AArch64 this far, we assume it is present for now.
878 cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON_ASIMD] = 1;
879 # else
880 /* Strange 32-bit non-linux platform. We cannot assume that neon is present. */
881 cpuid->feature[GMX_CPUID_FEATURE_ARM_NEON] = 0;
882 # endif
883 #endif
884 return 0;
888 static int
889 cpuid_check_ibm(gmx_cpuid_t cpuid)
891 #if defined(__linux__) || defined(__linux)
892 FILE *fp;
893 char buffer[GMX_CPUID_STRLEN], before_colon[GMX_CPUID_STRLEN], after_colon[GMX_CPUID_STRLEN];
895 if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
897 while ( (fgets(buffer, sizeof(buffer), fp) != NULL))
899 chomp_substring_before_colon(buffer, before_colon, GMX_CPUID_STRLEN);
900 chomp_substring_after_colon(buffer, after_colon, GMX_CPUID_STRLEN);
902 if (!strcmp(before_colon, "cpu") || !strcmp(before_colon, "Processor"))
904 strncpy(cpuid->brand, after_colon, GMX_CPUID_STRLEN);
906 if (!strcmp(before_colon, "model name") ||
907 !strcmp(before_colon, "model") ||
908 !strcmp(before_colon, "Processor") ||
909 !strcmp(before_colon, "cpu"))
911 if (strstr(after_colon, "altivec"))
913 cpuid->feature[GMX_CPUID_FEATURE_IBM_VMX] = 1;
915 if (!strstr(after_colon, "POWER6") && !strstr(after_colon, "Power6") &&
916 !strstr(after_colon, "power6"))
918 cpuid->feature[GMX_CPUID_FEATURE_IBM_VSX] = 1;
924 fclose(fp);
926 if (strstr(cpuid->brand, "A2"))
928 /* BlueGene/Q */
929 cpuid->feature[GMX_CPUID_FEATURE_IBM_QPX] = 1;
931 #else
932 strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_STRLEN);
933 cpuid->feature[GMX_CPUID_FEATURE_IBM_QPX] = 0;
934 cpuid->feature[GMX_CPUID_FEATURE_IBM_VMX] = 0;
935 cpuid->feature[GMX_CPUID_FEATURE_IBM_VSX] = 0;
936 #endif
937 return 0;
941 /* Try to find the vendor of the current CPU, so we know what specific
942 * detection routine to call.
944 static enum gmx_cpuid_vendor
945 cpuid_check_vendor(void)
947 enum gmx_cpuid_vendor i, vendor;
948 /* Register data used on x86 */
949 unsigned int eax, ebx, ecx, edx;
950 char vendorstring[13];
951 FILE * fp;
952 char buffer[GMX_CPUID_STRLEN];
953 char before_colon[GMX_CPUID_STRLEN];
954 char after_colon[GMX_CPUID_STRLEN];
956 /* Set default first */
957 vendor = GMX_CPUID_VENDOR_UNKNOWN;
959 #ifdef GMX_CPUID_X86
960 execute_x86cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
962 memcpy(vendorstring, &ebx, 4);
963 memcpy(vendorstring+4, &edx, 4);
964 memcpy(vendorstring+8, &ecx, 4);
966 vendorstring[12] = '\0';
968 for (i = GMX_CPUID_VENDOR_UNKNOWN; i < GMX_CPUID_NVENDORS; i++)
970 if (!strncmp(vendorstring, gmx_cpuid_vendor_string[i], 12))
972 vendor = i;
975 #elif defined(__linux__) || defined(__linux)
976 /* General Linux. Try to get CPU vendor from /proc/cpuinfo */
977 if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
979 while ( (vendor == GMX_CPUID_VENDOR_UNKNOWN) && (fgets(buffer, sizeof(buffer), fp) != NULL))
981 chomp_substring_before_colon(buffer, before_colon, sizeof(before_colon));
982 /* Intel/AMD use "vendor_id", IBM "vendor", "model", or "cpu". Fujitsu "manufacture".
983 * On ARM there does not seem to be a vendor, but ARM or AArch64 is listed in the Processor string.
984 * Add others if you have them!
986 if (!strcmp(before_colon, "vendor_id")
987 || !strcmp(before_colon, "vendor")
988 || !strcmp(before_colon, "manufacture")
989 || !strcmp(before_colon, "model")
990 || !strcmp(before_colon, "Processor")
991 || !strcmp(before_colon, "cpu"))
993 chomp_substring_after_colon(buffer, after_colon, sizeof(after_colon));
994 for (i = GMX_CPUID_VENDOR_UNKNOWN; i < GMX_CPUID_NVENDORS; i++)
996 /* Be liberal and accept if we find the vendor
997 * string (or alternative string) anywhere. Using
998 * strcasestr() would be non-portable. */
999 if (strstr(after_colon, gmx_cpuid_vendor_string[i])
1000 || strstr(after_colon, gmx_cpuid_vendor_string_alternative[i]))
1002 vendor = i;
1005 /* If we did not find vendor yet, check if it is IBM:
1006 * On some Power/PowerPC systems it only says power, not IBM.
1008 if (vendor == GMX_CPUID_VENDOR_UNKNOWN &&
1009 ((strstr(after_colon, "POWER") || strstr(after_colon, "Power") ||
1010 strstr(after_colon, "power"))))
1012 vendor = GMX_CPUID_VENDOR_IBM;
1017 fclose(fp);
1018 #elif defined(__arm__) || defined (__arm) || defined(__aarch64__)
1019 /* If we are using ARM on something that is not linux we have to trust the compiler,
1020 * and we cannot get the extra info that might be present in /proc/cpuinfo.
1022 vendor = GMX_CPUID_VENDOR_ARM;
1023 #endif
1024 return vendor;
1030 gmx_cpuid_topology(gmx_cpuid_t cpuid,
1031 int * nprocessors,
1032 int * npackages,
1033 int * ncores_per_package,
1034 int * nhwthreads_per_core,
1035 const int ** package_id,
1036 const int ** core_id,
1037 const int ** hwthread_id,
1038 const int ** locality_order)
1040 int rc;
1042 if (cpuid->have_cpu_topology)
1044 *nprocessors = cpuid->nproc;
1045 *npackages = cpuid->npackages;
1046 *ncores_per_package = cpuid->ncores_per_package;
1047 *nhwthreads_per_core = cpuid->nhwthreads_per_core;
1048 *package_id = cpuid->package_id;
1049 *core_id = cpuid->core_id;
1050 *hwthread_id = cpuid->hwthread_id;
1051 *locality_order = cpuid->locality_order;
1052 rc = 0;
1054 else
1056 rc = -1;
1058 return rc;
1062 enum gmx_cpuid_x86_smt
1063 gmx_cpuid_x86_smt(gmx_cpuid_t cpuid)
1065 enum gmx_cpuid_x86_smt rc;
1067 if (cpuid->have_cpu_topology)
1069 rc = (cpuid->nhwthreads_per_core > 1) ? GMX_CPUID_X86_SMT_ENABLED : GMX_CPUID_X86_SMT_DISABLED;
1071 else if (cpuid->vendor == GMX_CPUID_VENDOR_AMD || gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_HTT) == 0)
1073 rc = GMX_CPUID_X86_SMT_DISABLED;
1075 else
1077 rc = GMX_CPUID_X86_SMT_CANNOTDETECT;
1079 return rc;
1084 gmx_cpuid_init (gmx_cpuid_t * pcpuid)
1086 gmx_cpuid_t cpuid;
1087 int i;
1088 FILE * fp;
1089 char buffer[GMX_CPUID_STRLEN], buffer2[GMX_CPUID_STRLEN];
1090 int found_brand;
1092 cpuid = malloc(sizeof(*cpuid));
1094 *pcpuid = cpuid;
1096 for (i = 0; i < GMX_CPUID_NFEATURES; i++)
1098 cpuid->feature[i] = 0;
1101 cpuid->have_cpu_topology = 0;
1102 cpuid->nproc = 0;
1103 cpuid->npackages = 0;
1104 cpuid->ncores_per_package = 0;
1105 cpuid->nhwthreads_per_core = 0;
1106 cpuid->package_id = NULL;
1107 cpuid->core_id = NULL;
1108 cpuid->hwthread_id = NULL;
1109 cpuid->locality_order = NULL;
1111 cpuid->vendor = cpuid_check_vendor();
1113 switch (cpuid->vendor)
1115 #ifdef GMX_CPUID_X86
1116 case GMX_CPUID_VENDOR_INTEL:
1117 cpuid_check_intel_x86(cpuid);
1118 break;
1119 case GMX_CPUID_VENDOR_AMD:
1120 cpuid_check_amd_x86(cpuid);
1121 break;
1122 #endif
1123 case GMX_CPUID_VENDOR_ARM:
1124 cpuid_check_arm(cpuid);
1125 break;
1126 case GMX_CPUID_VENDOR_IBM:
1127 cpuid_check_ibm(cpuid);
1128 break;
1129 default:
1130 /* Default value */
1131 strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_STRLEN);
1132 #if defined(__linux__) || defined(__linux)
1133 /* General Linux. Try to get CPU type from /proc/cpuinfo */
1134 if ( (fp = fopen("/proc/cpuinfo", "r")) != NULL)
1136 found_brand = 0;
1137 while ( (found_brand == 0) && (fgets(buffer, sizeof(buffer), fp) != NULL))
1139 chomp_substring_before_colon(buffer, buffer2, sizeof(buffer2));
1140 /* Intel uses "model name", Fujitsu and IBM "cpu". */
1141 if (!strcmp(buffer2, "model name") || !strcmp(buffer2, "cpu"))
1143 chomp_substring_after_colon(buffer, cpuid->brand, GMX_CPUID_STRLEN);
1144 found_brand = 1;
1148 fclose(fp);
1149 #endif
1150 cpuid->family = 0;
1151 cpuid->model = 0;
1152 cpuid->stepping = 0;
1154 for (i = 0; i < GMX_CPUID_NFEATURES; i++)
1156 cpuid->feature[i] = 0;
1158 cpuid->feature[GMX_CPUID_FEATURE_CANNOTDETECT] = 1;
1159 break;
1161 return 0;
1166 void
1167 gmx_cpuid_done (gmx_cpuid_t cpuid)
1169 free(cpuid);
1174 gmx_cpuid_formatstring (gmx_cpuid_t cpuid,
1175 char * str,
1176 int n)
1178 int c;
1179 int i;
1180 enum gmx_cpuid_feature feature;
1182 #ifdef _MSC_VER
1183 _snprintf(str, n,
1184 " Vendor: %s\n"
1185 " Brand: %s\n"
1186 " Family: %2d model: %2d stepping: %2d\n"
1187 " CPU features:",
1188 gmx_cpuid_vendor_string[gmx_cpuid_vendor(cpuid)],
1189 gmx_cpuid_brand(cpuid),
1190 gmx_cpuid_family(cpuid), gmx_cpuid_model(cpuid), gmx_cpuid_stepping(cpuid));
1191 #else
1192 snprintf(str, n,
1193 " Vendor: %s\n"
1194 " Brand: %s\n"
1195 " Family: %2d model: %2d stepping: %2d\n"
1196 " CPU features:",
1197 gmx_cpuid_vendor_string[gmx_cpuid_vendor(cpuid)],
1198 gmx_cpuid_brand(cpuid),
1199 gmx_cpuid_family(cpuid), gmx_cpuid_model(cpuid), gmx_cpuid_stepping(cpuid));
1200 #endif
1202 str[n-1] = '\0';
1203 c = strlen(str);
1204 n -= c;
1205 str += c;
1207 for (feature = GMX_CPUID_FEATURE_CANNOTDETECT; feature < GMX_CPUID_NFEATURES; feature++)
1209 if (gmx_cpuid_feature(cpuid, feature) == 1)
1211 #ifdef _MSC_VER
1212 _snprintf(str, n, " %s", gmx_cpuid_feature_string[feature]);
1213 #else
1214 snprintf(str, n, " %s", gmx_cpuid_feature_string[feature]);
1215 #endif
1216 str[n-1] = '\0';
1217 c = strlen(str);
1218 n -= c;
1219 str += c;
1222 #ifdef _MSC_VER
1223 _snprintf(str, n, "\n");
1224 #else
1225 snprintf(str, n, "\n");
1226 #endif
1227 str[n-1] = '\0';
1229 return 0;
1234 enum gmx_cpuid_simd
1235 gmx_cpuid_simd_suggest (gmx_cpuid_t cpuid)
1237 enum gmx_cpuid_simd tmpsimd;
1239 tmpsimd = GMX_CPUID_SIMD_NONE;
1241 if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_INTEL)
1243 /* TODO: Add check for AVX-512F & AVX-512ER here as soon as we
1244 * have implemented verlet kernels for them. Until then,
1245 * we should pick AVX2 instead for the automatic detection.
1247 if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX2))
1249 tmpsimd = GMX_CPUID_SIMD_X86_AVX2_256;
1251 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX))
1253 tmpsimd = GMX_CPUID_SIMD_X86_AVX_256;
1255 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE4_1))
1257 tmpsimd = GMX_CPUID_SIMD_X86_SSE4_1;
1259 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE2))
1261 tmpsimd = GMX_CPUID_SIMD_X86_SSE2;
1264 else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_AMD)
1266 if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX2))
1268 tmpsimd = GMX_CPUID_SIMD_X86_AVX2_256;
1270 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_AVX))
1272 if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_FMA))
1274 tmpsimd = GMX_CPUID_SIMD_X86_AVX_128_FMA;
1276 else
1278 tmpsimd = GMX_CPUID_SIMD_X86_AVX_256;
1281 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE4_1))
1283 tmpsimd = GMX_CPUID_SIMD_X86_SSE4_1;
1285 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_X86_SSE2))
1287 tmpsimd = GMX_CPUID_SIMD_X86_SSE2;
1290 else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_FUJITSU)
1292 if (strstr(gmx_cpuid_brand(cpuid), "SPARC64"))
1294 tmpsimd = GMX_CPUID_SIMD_SPARC64_HPC_ACE;
1297 else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_IBM)
1299 if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_IBM_QPX))
1301 tmpsimd = GMX_CPUID_SIMD_IBM_QPX;
1303 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_IBM_VSX))
1305 /* VSX is better than VMX, so we check it first */
1306 tmpsimd = GMX_CPUID_SIMD_IBM_VSX;
1308 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_IBM_VMX))
1310 tmpsimd = GMX_CPUID_SIMD_IBM_VMX;
1313 else if (gmx_cpuid_vendor(cpuid) == GMX_CPUID_VENDOR_ARM)
1315 if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_ARM_NEON_ASIMD))
1317 tmpsimd = GMX_CPUID_SIMD_ARM_NEON_ASIMD;
1319 else if (gmx_cpuid_feature(cpuid, GMX_CPUID_FEATURE_ARM_NEON))
1321 tmpsimd = GMX_CPUID_SIMD_ARM_NEON;
1324 return tmpsimd;
1329 gmx_cpuid_simd_check(enum gmx_cpuid_simd simd_suggest,
1330 FILE * log,
1331 int print_to_stderr)
1333 int rc;
1335 rc = (simd_suggest != compiled_simd);
1337 if (rc != 0)
1339 if (log != NULL)
1341 fprintf(log, "\nBinary not matching hardware - you might be losing performance.\n"
1342 "SIMD instructions most likely to fit this hardware: %s\n"
1343 "SIMD instructions selected at GROMACS compile time: %s\n\n",
1344 gmx_cpuid_simd_string[simd_suggest],
1345 gmx_cpuid_simd_string[compiled_simd]);
1347 if (print_to_stderr)
1349 fprintf(stderr, "Compiled SIMD instructions: %s, GROMACS could use %s on this machine, which is better\n\n",
1350 gmx_cpuid_simd_string[compiled_simd],
1351 gmx_cpuid_simd_string[simd_suggest]);
1354 return rc;
1358 #ifdef GMX_CPUID_STANDALONE
1359 /* Stand-alone program to enable queries of CPU features from Cmake.
1360 * Note that you need to check inline ASM capabilities before compiling and set
1361 * -DGMX_X86_GCC_INLINE_ASM for the cpuid instruction to work...
1364 main(int argc, char **argv)
1366 gmx_cpuid_t cpuid;
1367 enum gmx_cpuid_simd simd;
1368 int i, cnt;
1370 if (argc < 2)
1372 fprintf(stdout,
1373 "Usage:\n\n%s [flags]\n\n"
1374 "Available flags:\n"
1375 "-vendor Print CPU vendor.\n"
1376 "-brand Print CPU brand string.\n"
1377 "-family Print CPU family version.\n"
1378 "-model Print CPU model version.\n"
1379 "-stepping Print CPU stepping version.\n"
1380 "-features Print CPU feature flags.\n"
1381 "-simd Print suggested GROMACS SIMD instructions.\n",
1382 argv[0]);
1383 exit(0);
1386 gmx_cpuid_init(&cpuid);
1388 if (!strncmp(argv[1], "-vendor", 3))
1390 printf("%s\n", gmx_cpuid_vendor_string[cpuid->vendor]);
1392 else if (!strncmp(argv[1], "-brand", 3))
1394 printf("%s\n", cpuid->brand);
1396 else if (!strncmp(argv[1], "-family", 3))
1398 printf("%d\n", cpuid->family);
1400 else if (!strncmp(argv[1], "-model", 3))
1402 printf("%d\n", cpuid->model);
1404 else if (!strncmp(argv[1], "-stepping", 3))
1406 printf("%d\n", cpuid->stepping);
1408 else if (!strncmp(argv[1], "-features", 3))
1410 cnt = 0;
1411 for (i = 0; i < GMX_CPUID_NFEATURES; i++)
1413 if (cpuid->feature[i] == 1)
1415 if (cnt++ > 0)
1417 printf(" ");
1419 printf("%s", gmx_cpuid_feature_string[i]);
1422 printf("\n");
1424 else if (!strncmp(argv[1], "-simd", 3))
1426 simd = gmx_cpuid_simd_suggest(cpuid);
1427 fprintf(stdout, "%s\n", gmx_cpuid_simd_string[simd]);
1430 gmx_cpuid_done(cpuid);
1433 return 0;
1436 #endif
1438 /*! \endcond */