changed reading hint
[gromacs/adressmacs.git] / src / fftw / fftw-int.h
blobb919e3334998bb1de2e9a958fa4fcc8e6ba0b7d4
1 /*
2 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 /* fftw.h -- system-wide definitions */
21 /* $Id$ */
23 #ifndef FFTW_INT_H
24 #define FFTW_INT_H
25 #include "fftw.h"
27 #ifdef __cplusplus
28 extern "C" {
29 #else
30 #endif /* __cplusplus */
32 /****************************************************************************/
33 /* Private Functions */
34 /****************************************************************************/
36 extern fftw_twiddle *fftw_create_twiddle(int n, const fftw_codelet_desc *d);
37 extern void fftw_destroy_twiddle(fftw_twiddle *tw);
39 extern void fftw_strided_copy(int, fftw_complex *, int, fftw_complex *);
40 extern void fftw_executor_simple(int, const fftw_complex *, fftw_complex *,
41 fftw_plan_node *, int, int);
43 extern fftwnd_plan fftwnd_create_plan_aux(int rank, const int *n,
44 fftw_direction dir, int flags);
45 extern fftw_plan *fftwnd_new_plan_array(int rank);
46 extern fftw_plan *fftwnd_create_plans_generic(fftw_plan *plans,
47 int rank, const int *n,
48 fftw_direction dir, int flags);
49 extern fftw_plan *fftwnd_create_plans_specific(fftw_plan *plans,
50 int rank, const int *n,
51 const int *n_after,
52 fftw_direction dir, int flags,
53 fftw_complex *in, int istride,
54 fftw_complex *out, int ostride);
55 extern int fftwnd_work_size(int rank, const int *n, int flags, int ncopies);
57 extern void fftwnd_aux(fftwnd_plan p, int cur_dim,
58 fftw_complex *in, int istride,
59 fftw_complex *out, int ostride,
60 fftw_complex *work);
61 extern void fftwnd_aux_howmany(fftwnd_plan p, int cur_dim,
62 int howmany,
63 fftw_complex *in, int istride, int idist,
64 fftw_complex *out, int ostride, int odist,
65 fftw_complex *work);
67 /* wisdom prototypes */
68 enum fftw_wisdom_category {
69 FFTW_WISDOM, RFFTW_WISDOM
72 extern int fftw_wisdom_lookup(int n, int flags, fftw_direction dir,
73 enum fftw_wisdom_category category,
74 int istride, int ostride,
75 enum fftw_node_type *type,
76 int *signature, int replace_p);
77 extern void fftw_wisdom_add(int n, int flags, fftw_direction dir,
78 enum fftw_wisdom_category cat,
79 int istride, int ostride,
80 enum fftw_node_type type,
81 int signature);
83 /* Private planner functions: */
84 extern double fftw_estimate_node(fftw_plan_node *p);
85 extern fftw_plan_node *fftw_make_node_notw(int size,
86 const fftw_codelet_desc *config);
87 extern fftw_plan_node *fftw_make_node_real2hc(int size,
88 const fftw_codelet_desc *config);
89 extern fftw_plan_node *fftw_make_node_hc2real(int size,
90 const fftw_codelet_desc *config);
91 extern fftw_plan_node *fftw_make_node_twiddle(int n,
92 const fftw_codelet_desc *config,
93 fftw_plan_node *recurse,
94 int flags);
95 extern fftw_plan_node *fftw_make_node_hc2hc(int n,
96 fftw_direction dir,
97 const fftw_codelet_desc *config,
98 fftw_plan_node *recurse,
99 int flags);
100 extern fftw_plan_node *fftw_make_node_generic(int n, int size,
101 fftw_generic_codelet *codelet,
102 fftw_plan_node *recurse,
103 int flags);
104 extern fftw_plan_node *fftw_make_node_rgeneric(int n, int size,
105 fftw_direction dir,
106 fftw_rgeneric_codelet * codelet,
107 fftw_plan_node *recurse,
108 int flags);
109 extern int fftw_factor(int n);
110 extern fftw_plan_node *fftw_make_node(void);
111 extern fftw_plan fftw_make_plan(int n, fftw_direction dir,
112 fftw_plan_node *root, int flags,
113 enum fftw_node_type wisdom_type,
114 int wisdom_signature);
115 extern void fftw_use_plan(fftw_plan p);
116 extern void fftw_use_node(fftw_plan_node *p);
117 extern void fftw_destroy_plan_internal(fftw_plan p);
118 extern fftw_plan fftw_pick_better(fftw_plan p1, fftw_plan p2);
119 extern fftw_plan fftw_lookup(fftw_plan *table, int n, int flags);
120 extern void fftw_insert(fftw_plan *table, fftw_plan this_plan, int n);
121 extern void fftw_make_empty_table(fftw_plan *table);
122 extern void fftw_destroy_table(fftw_plan *table);
123 extern void fftw_complete_twiddle(fftw_plan_node *p, int n);
125 extern fftw_plan_node *fftw_make_node_rader(int n, int size,
126 fftw_direction dir,
127 fftw_plan_node *recurse,
128 int flags);
129 extern fftw_rader_data *fftw_rader_top;
131 /****************************************************************************/
132 /* Floating Point Types */
133 /****************************************************************************/
136 * We use these definitions to make it easier for people to change
137 * FFTW to use long double and similar types. You shouldn't have to
138 * change this just to use float or double.
142 * Change this if your floating-point constants need to be expressed
143 * in a special way. For example, if fftw_real is long double, you
144 * will need to append L to your fp constants to make them of the
145 * same precision. Do this by changing "x" below to "x##L".
147 #define FFTW_KONST(x) ((fftw_real) x)
149 #define FFTW_TRIG_SIN sin
150 #define FFTW_TRIG_COS cos
151 typedef double FFTW_TRIG_REAL; /* the argument type for sin and cos */
153 #define FFTW_K2PI FFTW_KONST(6.2831853071795864769252867665590057683943388)
155 /****************************************************************************/
156 /* gcc/x86 hacks */
157 /****************************************************************************/
160 * gcc 2.[78].x and x86 specific hacks. These macros align the stack
161 * pointer so that the double precision temporary variables in the
162 * codelets will be aligned to a multiple of 8 bytes (*way* faster on
163 * pentium and pentiumpro)
165 #ifdef __GNUC__
166 #ifdef __i386__
167 #ifdef FFTW_ENABLE_I386_HACKS
168 #ifndef FFTW_ENABLE_FLOAT
169 #define FFTW_USING_I386_HACKS
170 #define HACK_ALIGN_STACK_EVEN() { \
171 if ((((long) (__builtin_alloca(0))) & 0x7)) __builtin_alloca(4); \
174 #define HACK_ALIGN_STACK_ODD() { \
175 if (!(((long) (__builtin_alloca(0))) & 0x7)) __builtin_alloca(4); \
178 #ifdef FFTW_DEBUG_ALIGNMENT
179 #define ASSERT_ALIGNED_DOUBLE() { \
180 double __foo; \
181 if ((((long) &__foo) & 0x7)) abort(); \
183 #endif
185 #endif
186 #endif
187 #endif
188 #endif
190 #ifndef HACK_ALIGN_STACK_EVEN
191 #define HACK_ALIGN_STACK_EVEN()
192 #endif
193 #ifndef HACK_ALIGN_STACK_ODD
194 #define HACK_ALIGN_STACK_ODD()
195 #endif
196 #ifndef ASSERT_ALIGNED_DOUBLE
197 #define ASSERT_ALIGNED_DOUBLE()
198 #endif
200 /****************************************************************************/
201 /* Timers */
202 /****************************************************************************/
205 * Here, you can use all the nice timers available in your machine.
210 Things you should define to include your own clock:
212 fftw_time -- the data type used to store a time
214 extern fftw_time fftw_get_time(void);
215 -- a function returning the current time. (We have
216 implemented this as a macro in most cases.)
218 extern fftw_time fftw_time_diff(fftw_time t1, fftw_time t2);
219 -- returns the time difference (t1 - t2).
220 If t1 < t2, it may simply return zero (although this
221 is not required). (We have implemented this as a macro
222 in most cases.)
224 extern double fftw_time_to_sec(fftw_time t);
225 -- returns the time t expressed in seconds, as a double.
226 (Implemented as a macro in most cases.)
228 FFTW_TIME_MIN -- a double-precision macro holding the minimum
229 time interval (in seconds) for accurate time measurements.
230 This should probably be at least 100 times the precision of
231 your clock (we use even longer intervals, to be conservative).
232 This will determine how long the planner takes to measure
233 the speeds of different possible plans.
235 Bracket all of your definitions with an appropriate #ifdef so that
236 they will be enabled on your machine. If you do add your own
237 high-precision timer code, let us know (at fftw@theory.lcs.mit.edu).
239 Only declarations should go in this file. Any function definitions
240 that you need should go into timer.c.
244 * define a symbol so that we know that we have the fftw_time_diff
245 * function/macro (it did not exist prior to FFTW 1.2)
247 #define FFTW_HAS_TIME_DIFF
249 /**********************************************
250 * SOLARIS
251 **********************************************/
252 #if defined(HAVE_GETHRTIME)
254 /* we use the nanosecond virtual timer */
255 #ifdef HAVE_SYS_TIME_H
256 #include <sys/time.h>
257 #endif
259 typedef hrtime_t fftw_time;
261 #define fftw_get_time() gethrtime()
262 #define fftw_time_diff(t1,t2) ((t1) - (t2))
263 #define fftw_time_to_sec(t) ((double) t / 1.0e9)
266 * a measurement is valid if it runs for at least
267 * FFTW_TIME_MIN seconds.
269 #define FFTW_TIME_MIN (1.0e-4) /* for Solaris nanosecond timer */
270 #define FFTW_TIME_REPEAT 8
272 /**********************************************
273 * Pentium time stamp counter
274 **********************************************/
275 #elif defined(__GNUC__) && defined(__i386__) && defined(FFTW_ENABLE_PENTIUM_TIMER)
278 * Use internal Pentium register (time stamp counter). Resolution
279 * is 1/FFTW_CYCLES_PER_SEC seconds (e.g. 5 ns for Pentium 200 MHz).
280 * (This code was contributed by Wolfgang Reimer)
283 #ifndef FFTW_CYCLES_PER_SEC
284 #error "Must define FFTW_CYCLES_PER_SEC in fftw/config.h to use the Pentium cycle counter"
285 #endif
287 typedef unsigned long long fftw_time;
289 static __inline__ fftw_time read_tsc()
291 struct {
292 long unsigned lo, hi;
293 } counter;
294 long unsigned sav_eax, sav_edx;
295 __asm__("movl %%eax,%0":"=m"(sav_eax));
296 __asm__("movl %%edx,%0":"=m"(sav_edx));
297 __asm__("rdtsc");
298 __asm__("movl %%eax,%0":"=m"(counter.lo));
299 __asm__("movl %%edx,%0":"=m"(counter.hi));
300 __asm__("movl %0,%%eax": : "m"(sav_eax):"eax");
301 __asm__("movl %0,%%edx": : "m"(sav_edx):"edx");
302 return *(fftw_time *) & counter;
305 #define fftw_get_time() read_tsc()
306 #define fftw_time_diff(t1,t2) ((t1) - (t2))
307 #define fftw_time_to_sec(t) (((double) (t)) / FFTW_CYCLES_PER_SEC)
308 #define FFTW_TIME_MIN (1.0e-4) /* for Pentium TSC register */
310 /************* generic systems having gettimeofday ************/
311 #elif defined(HAVE_GETTIMEOFDAY) || defined(HAVE_BSDGETTIMEOFDAY)
312 #ifdef HAVE_SYS_TIME_H
313 #include <sys/time.h>
314 #endif
315 #ifdef HAVE_UNISTD_H
316 #include <unistd.h>
317 #endif
318 #define FFTW_USE_GETTIMEOFDAY
320 typedef struct timeval fftw_time;
322 extern fftw_time fftw_gettimeofday_get_time(void);
323 extern fftw_time fftw_gettimeofday_time_diff(fftw_time t1, fftw_time t2);
324 #define fftw_get_time() fftw_gettimeofday_get_time()
325 #define fftw_time_diff(t1, t2) fftw_gettimeofday_time_diff(t1, t2)
326 #define fftw_time_to_sec(t) ((double)(t).tv_sec + (double)(t).tv_usec * 1.0E-6)
328 #ifndef FFTW_TIME_MIN
329 /* this should be fine on any system claiming a microsecond timer */
330 #define FFTW_TIME_MIN (1.0e-2)
331 #endif
333 /**********************************************
334 * MACINTOSH
335 **********************************************/
336 #elif defined(HAVE_MAC_TIMER)
339 * By default, use the microsecond-timer in the Mac Time Manager.
340 * Alternatively, by changing the following #if 1 to #if 0, you
341 * can use the nanosecond timer available *only* on PCI PowerMacs.
343 #ifndef HAVE_MAC_PCI_TIMER /* use time manager */
346 * Use Macintosh Time Manager routines (maximum resolution is about 20
347 * microseconds).
349 typedef struct fftw_time_struct {
350 unsigned long hi, lo;
351 } fftw_time;
353 extern fftw_time get_Mac_microseconds(void);
355 #define fftw_get_time() get_Mac_microseconds()
357 /* define as a function instead of a macro: */
358 extern fftw_time fftw_time_diff(fftw_time t1, fftw_time t2);
360 #define fftw_time_to_sec(t) ((t).lo * 1.0e-6 + 4294967295.0e-6 * (t).hi)
362 /* very conservative, since timer should be accurate to 20e-6: */
363 /* (although this seems not to be the case in practice) */
364 #define FFTW_TIME_MIN (5.0e-2) /* for MacOS Time Manager timer */
366 #else /* use nanosecond timer */
368 /* Use the nanosecond timer available on PCI PowerMacs. */
370 #include <DriverServices.h>
372 typedef AbsoluteTime fftw_time;
373 #define fftw_get_time() UpTime()
374 #define fftw_time_diff(t1,t2) SubAbsoluteFromAbsolute(t1,t2)
375 #define fftw_time_to_sec(t) (AbsoluteToNanoseconds(t).lo * 1.0e-9)
377 /* Extremely conservative minimum time: */
378 /* for MacOS PCI PowerMac nanosecond timer */
379 #define FFTW_TIME_MIN (5.0e-3)
381 #endif /* use nanosecond timer */
383 /**********************************************
384 * WINDOWS
385 **********************************************/
386 #elif defined(HAVE_WIN32_TIMER)
388 #include <time.h>
390 typedef unsigned long fftw_time;
391 extern unsigned long GetPerfTime(void);
392 extern double GetPerfSec(double ticks);
394 #define fftw_get_time() GetPerfTime()
395 #define fftw_time_diff(t1,t2) ((t1) - (t2))
396 #define fftw_time_to_sec(t) GetPerfSec(t)
398 #define FFTW_TIME_MIN (5.0e-2) /* for Win32 timer */
400 /**********************************************
401 * CRAY
402 **********************************************/
403 #elif defined(_CRAYMPP) /* Cray MPP system */
405 double SECONDR(void); /*
406 * I think you have to link with -lsci to
407 * get this
410 typedef double fftw_time;
411 #define fftw_get_time() SECONDR()
412 #define fftw_time_diff(t1,t2) ((t1) - (t2))
413 #define fftw_time_to_sec(t) (t)
415 #define FFTW_TIME_MIN (1.0e-1) /* for Cray MPP SECONDR timer */
417 /**********************************************
418 * VANILLA UNIX/ISO C SYSTEMS
419 **********************************************/
420 /* last resort: use good old Unix clock() */
421 #else
423 #include <time.h>
425 typedef clock_t fftw_time;
427 #ifndef CLOCKS_PER_SEC
428 #ifdef sun
429 /* stupid sunos4 prototypes */
430 #define CLOCKS_PER_SEC 1000000
431 extern long clock(void);
432 #else /* not sun, we don't know CLOCKS_PER_SEC */
433 #error Please define CLOCKS_PER_SEC
434 #endif
435 #endif
437 #define fftw_get_time() clock()
438 #define fftw_time_diff(t1,t2) ((t1) - (t2))
439 #define fftw_time_to_sec(t) (((double) (t)) / CLOCKS_PER_SEC)
442 * ***VERY*** conservative constant: this says that a
443 * measurement must run for 200ms in order to be valid.
444 * You had better check the manual of your machine
445 * to discover if it can do better than this
447 #define FFTW_TIME_MIN (2.0e-1) /* for default clock() timer */
449 #endif /* UNIX clock() */
451 /* take FFTW_TIME_REPEAT measurements... */
452 #ifndef FFTW_TIME_REPEAT
453 #define FFTW_TIME_REPEAT 4
454 #endif
456 /* but do not run for more than TIME_LIMIT seconds while measuring one FFT */
457 #ifndef FFTW_TIME_LIMIT
458 #define FFTW_TIME_LIMIT 2.0
459 #endif
461 #ifdef __cplusplus
462 } /* extern "C" */
464 #endif /* __cplusplus */
466 #endif /* FFTW_INT_H */