Merge branch 'master' of git://git.gromacs.org/gromacs
[gromacs/adressmacs.git] / include / gmx_cyclecounter.h
blobd3a587c3fe4308d2931559a678d966b62105ce1c
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This file is part of Gromacs Copyright (c) 1991-2006
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
15 * And Hey:
16 * Gnomes, ROck Monsters And Chili Sauce
19 #ifndef _GMX_CYCLECOUNTER_H_
20 #define _GMX_CYCLECOUNTER_H_
22 /** @file gmx_cyclecounter.h
24 * @brief High-resolution timestamp or CPU clock cycle counters.
26 * After reading the current value with gmx_cycles_read() you can add or
27 * subtract these numbers as normal integers of type gmx_cycles_t.
30 #ifdef _MSC_VER
31 #include <intrin.h>
32 #endif
34 #ifdef __cplusplus
35 extern "C"
37 #endif
38 #if 0
39 } /* fixes auto-indentation problems */
40 #endif
44 /* Minor implementation note:
46 * I like to use these counters in other programs too, so to avoid making
47 * it dependent on other Gromacs definitions I use the #ifdef's to set
48 * architecture-specific inline macros instead of using gmx_inline from
49 * gmx_types.h /Erik 2005-12-10
52 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
53 (defined(__i386__) || defined(__x86_64__)))
54 /* x86 or x86-64 with GCC inline assembly */
55 typedef unsigned long long
56 gmx_cycles_t;
58 #elif defined(_MSC_VER)
59 #include <windows.h>
60 typedef __int64
61 gmx_cycles_t;
63 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
64 /* HP compiler on ia64 */
65 #include <machine/sys/inline.h>
66 typedef unsigned long
67 gmx_cycles_t;
69 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
70 /* Intel compiler on ia64 */
71 #include <ia64intrin.h>
72 typedef unsigned long
73 gmx_cycles_t;
75 #elif defined(__GNUC__) && defined(__ia64__)
76 /* ia64 with GCC inline assembly */
77 typedef unsigned long
78 gmx_cycles_t;
80 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
81 /* HP PA-RISC, inline asm with gcc */
82 typedef unsigned long
83 gmx_cycles_t;
85 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
86 /* HP PA-RISC, instruction when using HP compiler */
87 #include <machine/inline.h>
88 typedef unsigned long
89 gmx_cycles_t;
91 #elif defined(__GNUC__) && defined(__s390__)
92 /* S390, taken from FFTW who got it from James Treacy */
93 typedef unsigned long long
94 gmx_cycles_t;
96 #elif defined(__GNUC__) && defined(__alpha__)
97 /* gcc inline assembly on alpha CPUs */
98 typedef unsigned long
99 gmx_cycles_t;
101 #elif defined(__GNUC__) && defined(__sparc_v9__)
102 /* gcc inline assembly on sparc v9 */
103 typedef unsigned long
104 gmx_cycles_t;
106 #elif defined(__DECC) && defined(__alpha)
107 /* Digital GEM C compiler on alpha */
108 #include <c_asm.h>
109 typedef unsigned long
110 gmx_cycles_t;
112 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
113 /* Irix compilers on SGI hardware. Get nanoseconds from struct timespec */
114 typedef unsigned long long
115 gmx_cycles_t;
117 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
118 /* Solaris high-resolution timers */
119 typedef hrtime_t
120 gmx_cycles_t;
122 #elif defined(__xlC__) && defined (_AIX)
123 /* AIX compilers */
124 #include <sys/time.h>
125 #include <sys/systemcfg.h>
126 typedef unsigned long long
127 gmx_cycles_t;
129 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
130 ( defined(__powerpc__) || defined(__ppc__) ) )
131 /* PowerPC using gcc inline assembly (also works on xlc>=7.0 with -qasm=gcc) */
132 typedef unsigned long long
133 gmx_cycles_t;
135 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
136 /* Metrowerks on macintosh */
137 typedef unsigned long long
138 gmx_cycles_t;
140 #else
141 /*! \brief Integer-like datatype for cycle counter values
143 * Depending on your system this will usually be something like long long,
144 * or a special cycle datatype from the system header files. It is NOT
145 * necessarily real processor cycles - many systems count in nanoseconds
146 * or a special external time register at fixed frequency (not the CPU freq.)
148 * You can subtract or add gmx_cycle_t types just as normal integers, and if
149 * you run the calibration routine you can also multiply it with a factor to
150 * translate the cycle data to seconds.
152 typedef long
153 gmx_cycles_t;
155 #endif
159 /*! \brief Check if high-resolution cycle counters are available
161 * Not all architectures provide any way to read timestep counters
162 * in the CPU, and on some it is broken. Although we refer to it
163 * as cycle counters, it is not necessarily given in units of
164 * cycles.
166 * If you notice that system is missing, implement support for it,
167 * find out how to detect the system during preprocessing, and send us a
168 * patch.
170 * \return 1 if cycle counters are available, 0 if not.
172 * \note This functions not need to be in the header for performance
173 * reasons, but it is very important that we get exactly the
174 * same detection as for gmx_cycles_read() routines. If you
175 * compile the library with one compiler, and then use a different
176 * one when later linking to the library it might happen that the
177 * library supports cyclecounters but not the headers, or vice versa.
179 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
180 (defined(__i386__) || defined(__x86_64__)))
181 static __inline__ int gmx_cycles_have_counter(void)
183 /* x86 or x86-64 with GCC inline assembly - pentium TSC register */
184 return 1;
186 #elif (defined(_MSC_VER))
187 static __inline int gmx_cycles_have_counter(void)
189 return 1;
191 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
192 static inline int gmx_cycles_have_counter(void)
194 /* HP compiler on ia64, use special instruction to read ITC */
195 return 1;
197 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
198 static __inline__ int gmx_cycles_have_counter(void)
200 /* Intel compiler on ia64, use special instruction to read ITC */
201 return 1;
203 #elif defined(__GNUC__) && defined(__ia64__)
204 static __inline__ int gmx_cycles_have_counter(void)
206 /* AMD64 with GCC inline assembly - TSC register */
207 return 1;
209 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
210 static __inline__ int gmx_cycles_have_counter(void)
212 /* HP PA-RISC, inline asm with gcc */
213 return 1;
215 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
216 static inline int gmx_cycles_have_counter(void)
218 /* HP PA-RISC, instruction when using HP compiler */
219 return 1;
221 #elif defined(__GNUC__) && defined(__s390__)
222 static __inline__ int gmx_cycles_have_counter(void)
224 /* S390, taken from FFTW who got it from James Treacy */
225 return 1;
227 #elif defined(__GNUC__) && defined(__alpha__)
228 static __inline__ int gmx_cycles_have_counter(void)
230 /* gcc inline assembly on alpha CPUs */
231 return 1;
233 #elif defined(__GNUC__) && defined(__sparc_v9__)
234 static __inline__ int gmx_cycles_have_counter(void)
236 /* gcc inline assembly on sparc v9 */
237 return 1;
239 #elif defined(__DECC) && defined(__alpha)
240 static __inline int gmx_cycles_have_counter(void)
242 /* Digital GEM C compiler on alpha */
243 return 1;
245 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
246 static __inline int gmx_cycles_have_counter(void)
248 /* Irix compilers on SGI hardware */
249 return 1;
251 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
252 static inline int gmx_cycles_have_counter(void)
254 /* Solaris high-resolution timers */
255 return 1;
257 #elif defined(__xlC__) && defined (_AIX)
258 static inline int gmx_cycles_have_counter(void)
260 /* AIX compilers */
261 return 1;
263 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
264 ( defined(__powerpc__) || defined(__ppc__) ) )
265 static __inline__ int gmx_cycles_have_counter(void)
267 /* PowerPC using gcc inline assembly (and xlc>=7.0 with -qasm=gcc) */
268 return 1;
270 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
271 static __inline__ int gmx_cycles_have_counter(void)
273 /* Metrowerks on macintosh */
274 return 1;
276 #else
277 static int gmx_cycles_have_counter(void)
279 /* No cycle counter that we know of on this system */
280 return 0;
282 #endif
289 /*! \brief Read CPU cycle counter
291 * This routine returns an abstract datatype containing a
292 * cycle counter timestamp.
294 * \return Opaque data corresponding to a cycle reading.
296 * Please note that on most systems it takes several cycles
297 * to read and return the cycle counters. If you are measuring
298 * small intervals, you can compensate for this time by calling
299 * the routine twice and calculating what the difference is.
300 * Subtract this from your other measurements to get an accurate result.
302 * Use gmx_cycles_difference() to get a real number corresponding to
303 * the difference between two gmx_cycles_t values returned from this
304 * routine.
306 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
307 (defined(__i386__) || defined(__x86_64__)))
308 static __inline__ gmx_cycles_t gmx_cycles_read(void)
310 /* x86 with GCC inline assembly - pentium TSC register */
311 gmx_cycles_t cycle;
312 unsigned low,high;
314 __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high));
316 cycle = ((unsigned long long)low) | (((unsigned long long)high)<<32);
318 return cycle;
320 #elif defined(_MSC_VER)
321 static __inline gmx_cycles_t gmx_cycles_read(void)
323 return __rdtsc();
325 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
326 static inline gmx_cycles_t gmx_cycles_read(void)
328 /* HP compiler on ia64 */
329 gmx_cycles_t ret;
330 ret = _Asm_mov_from_ar (_AREG_ITC);
331 return ret;
333 #elif (defined(__INTEL_COMPILER) && defined(__ia64__))
334 static __inline__ gmx_cycles_t gmx_cycles_read(void)
336 /* Intel compiler on ia64 */
337 return __getReg(_IA64_REG_AR_ITC);
339 #elif defined(__GNUC__) && defined(__ia64__)
340 static __inline__ gmx_cycles_t gmx_cycles_read(void)
342 /* ia64 with GCC inline assembly */
343 gmx_cycles_t ret;
344 __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(ret));
345 return ret;
347 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
348 static __inline__ gmx_cycles_t gmx_cycles_read(void)
350 /* HP PA-RISC, inline asm with gcc */
351 gmx_cycles_t ret;
352 __asm__ __volatile__("mfctl 16, %0": "=r" (ret));
353 /* no input, nothing else clobbered */
354 return ret;
356 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
357 static inline gmx_cycles_t gmx_cycles_read(void)
359 /* HP PA-RISC, instruction when using HP compiler */
360 gmx_cycles_t ret;
361 _MFCTL(16, ret);
362 return ret;
364 #elif defined(__GNUC__) && defined(__s390__)
365 static __inline__ gmx_cycles_t gmx_cycles_read(void)
367 /* S390, taken from FFTW who got it from James Treacy */
368 gmx_cycles_t cycle;
369 __asm__("stck 0(%0)" : : "a" (&(cycle)) : "memory", "cc");
370 return cycle;
372 #elif defined(__GNUC__) && defined(__alpha__)
373 static __inline__ gmx_cycles_t gmx_cycles_read(void)
375 /* gcc inline assembly on alpha CPUs */
376 unsigned long cycle;
377 __asm__ __volatile__ ("rpcc %0" : "=r"(cycle));
378 return (cycle & 0xFFFFFFFF);
380 #elif defined(__GNUC__) && defined(__sparc_v9__)
381 static __inline__ gmx_cycles_t gmx_cycles_read(void)
383 /* gcc inline assembly on sparc v9 */
384 unsigned long ret;
385 __asm__("rd %%tick, %0" : "=r" (ret));
386 return ret;
388 #elif defined(__DECC) && defined(__alpha)
389 static __inline gmx_cycles_t gmx_cycles_read(void)
391 /* Digital GEM C compiler on alpha */
392 unsigned long cycle;
393 cycle = asm("rpcc %v0");
394 return (cycle & 0xFFFFFFFF);
396 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
397 static __inline gmx_cycles_t gmx_cycles_read(void)
399 /* Irix compilers on SGI hardware */
400 struct timespec t;
401 clock_gettime(CLOCK_SGI_CYCLE, &t);
402 /* Return the number of nanoseconds, so we can subtract/add */
403 return ((unsigned long long)t.tv_sec)*1000000000+
404 (unsigned long long)t.tv_nsec;
406 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
407 static inline gmx_cycles_t gmx_cycles_read(void)
409 /* Solaris high-resolution timers */
410 return gethrtime();
412 #elif defined(__xlC__) && defined (_AIX)
413 static inline gmx_cycles_t gmx_cycles_read(void)
415 /* AIX compilers. Inline the calculation instead of using library functions */
416 timebasestruct_t t1;
417 read_real_time(&t1, TIMEBASE_SZ);
418 /* POWER returns real time (seconds + nanoseconds),
419 * POWER_PC returns high/low 32 bits of a counter.
421 if(t1.flag==RTC_POWER_PC)
423 return ((gmx_cycles_t)t1.tb_high)<<32 | (gmx_cycles_t)t1.tb_low;
425 else
427 return ((gmx_cycles_t)t1.tb_high)*1000000000+(gmx_cycles_t)t1.tb_low;
430 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
431 ( defined(__powerpc__) || defined(__ppc__) ) )
432 static __inline__ gmx_cycles_t gmx_cycles_read(void)
434 /* PowerPC using gcc inline assembly (and xlC>=7.0 with -qasm=gcc) */
435 unsigned long low, high1, high2;
438 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
439 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
440 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
442 while (high1 != high2);
444 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
446 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
447 static __inline__ gmx_cycles_t gmx_cycles_read(void)
449 /* Metrowerks on macintosh */
450 unsigned int long low, high1, high2;
453 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
454 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
455 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
457 while (high1 != high2);
459 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
461 #else
462 static gmx_cycles_t gmx_cycles_read(void)
464 return 0;
466 #endif
475 /*! \brief Calculate number of seconds per cycle tick on host
477 * This routine runs a timer loop to calibrate the number of
478 * seconds per the units returned from gmx_cycles_difference()
480 * To calculate the time used, call gmx_cycles_read() twice,
481 * and then use this routine to calculate the difference as a double
482 * precision floating-point number.
484 * \param sampletime Minimum number of seconds to sample.
485 * One second should give you a reasonably accurate calibration.
486 * \return Number of seconds per cycle unit. If it is not possible to
487 * calculate on this system (for whatever reason) the return value
488 * will be -1, so check that it is positive before using it.
490 double
491 gmx_cycles_calibrate(double sampletime);
494 #ifdef __cplusplus
496 #endif
500 #endif /* _GMX_CYCLECOUNTER_H_ */