2 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 /* This file was automatically generated --- DO NOT EDIT */
21 /* Generated on Tue May 18 13:55:35 EDT 1999 */
26 /* Generated by: ./genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -hc2hc-forward 9 */
29 * This function contains 180 FP additions, 120 FP multiplications,
30 * (or, 133 additions, 73 multiplications, 47 fused multiply/add),
31 * 35 stack variables, and 72 memory accesses
33 static const fftw_real K342020143
= FFTW_KONST(+0.342020143325668733044099614682259580763083368);
34 static const fftw_real K813797681
= FFTW_KONST(+0.813797681349373692844693217248393223289101568);
35 static const fftw_real K939692620
= FFTW_KONST(+0.939692620785908384054109277324731469936208134);
36 static const fftw_real K296198132
= FFTW_KONST(+0.296198132726023843175338011893050938967728390);
37 static const fftw_real K852868531
= FFTW_KONST(+0.852868531952443209628250963940074071936020296);
38 static const fftw_real K173648177
= FFTW_KONST(+0.173648177666930348851716626769314796000375677);
39 static const fftw_real K556670399
= FFTW_KONST(+0.556670399226419366452912952047023132968291906);
40 static const fftw_real K766044443
= FFTW_KONST(+0.766044443118978035202392650555416673935832457);
41 static const fftw_real K984807753
= FFTW_KONST(+0.984807753012208059366743024589523013670643252);
42 static const fftw_real K150383733
= FFTW_KONST(+0.150383733180435296639271897612501926072238258);
43 static const fftw_real K642787609
= FFTW_KONST(+0.642787609686539326322643409907263432907559884);
44 static const fftw_real K663413948
= FFTW_KONST(+0.663413948168938396205421319635891297216863310);
45 static const fftw_real K866025403
= FFTW_KONST(+0.866025403784438646763723170752936183471402627);
46 static const fftw_real K500000000
= FFTW_KONST(+0.500000000000000000000000000000000000000000000);
55 void fftw_hc2hc_forward_9(fftw_real
*A
, const fftw_complex
*W
, int iostride
, int m
, int dist
)
61 Y
= A
+ (9 * iostride
);
76 ASSERT_ALIGNED_DOUBLE();
83 ASSERT_ALIGNED_DOUBLE();
84 tmp141
= X
[2 * iostride
];
85 tmp142
= X
[5 * iostride
];
86 tmp143
= X
[8 * iostride
];
87 tmp144
= tmp142
+ tmp143
;
88 tmp145
= tmp141
+ tmp144
;
89 tmp150
= tmp141
- (K500000000
* tmp144
);
90 tmp149
= tmp143
- tmp142
;
92 tmp132
= X
[3 * iostride
];
93 tmp133
= X
[6 * iostride
];
94 tmp134
= tmp132
+ tmp133
;
95 tmp157
= tmp133
- tmp132
;
101 ASSERT_ALIGNED_DOUBLE();
102 tmp136
= X
[iostride
];
103 tmp137
= X
[4 * iostride
];
104 tmp138
= X
[7 * iostride
];
105 tmp139
= tmp137
+ tmp138
;
106 tmp140
= tmp136
+ tmp139
;
107 tmp148
= tmp136
- (K500000000
* tmp139
);
108 tmp151
= tmp138
- tmp137
;
110 Y
[-3 * iostride
] = K866025403
* (tmp145
- tmp140
);
111 tmp135
= tmp131
+ tmp134
;
112 tmp146
= tmp140
+ tmp145
;
113 X
[3 * iostride
] = tmp135
- (K500000000
* tmp146
);
114 X
[0] = tmp135
+ tmp146
;
124 ASSERT_ALIGNED_DOUBLE();
125 tmp159
= K866025403
* tmp157
;
126 tmp155
= (K663413948
* tmp151
) - (K642787609
* tmp148
);
127 tmp156
= (K150383733
* tmp149
) - (K984807753
* tmp150
);
128 tmp158
= tmp155
+ tmp156
;
129 tmp147
= tmp131
- (K500000000
* tmp134
);
130 tmp152
= (K766044443
* tmp148
) + (K556670399
* tmp151
);
131 tmp153
= (K173648177
* tmp150
) + (K852868531
* tmp149
);
132 tmp154
= tmp152
+ tmp153
;
133 X
[iostride
] = tmp147
+ tmp154
;
134 X
[4 * iostride
] = tmp147
+ (K866025403
* (tmp155
- tmp156
)) - (K500000000
* tmp154
);
135 X
[2 * iostride
] = tmp147
+ (K173648177
* tmp148
) - (K296198132
* tmp149
) - (K939692620
* tmp150
) - (K852868531
* tmp151
);
136 Y
[-iostride
] = tmp159
+ tmp158
;
137 Y
[-4 * iostride
] = (K866025403
* (tmp157
+ (tmp153
- tmp152
))) - (K500000000
* tmp158
);
138 Y
[-2 * iostride
] = (K813797681
* tmp149
) - (K342020143
* tmp150
) - (K150383733
* tmp151
) - (K984807753
* tmp148
) - tmp159
;
143 for (i
= 2; i
< m
; i
= i
+ 2, X
= X
+ dist
, Y
= Y
- dist
, W
= W
+ 8) {
164 ASSERT_ALIGNED_DOUBLE();
170 ASSERT_ALIGNED_DOUBLE();
172 tmp117
= Y
[-8 * iostride
];
178 ASSERT_ALIGNED_DOUBLE();
179 tmp21
= X
[3 * iostride
];
180 tmp23
= Y
[-5 * iostride
];
183 tmp24
= (tmp20
* tmp21
) - (tmp22
* tmp23
);
184 tmp68
= (tmp22
* tmp21
) + (tmp20
* tmp23
);
191 ASSERT_ALIGNED_DOUBLE();
192 tmp26
= X
[6 * iostride
];
193 tmp28
= Y
[-2 * iostride
];
196 tmp29
= (tmp25
* tmp26
) - (tmp27
* tmp28
);
197 tmp69
= (tmp27
* tmp26
) + (tmp25
* tmp28
);
199 tmp70
= K866025403
* (tmp68
- tmp69
);
200 tmp116
= tmp68
+ tmp69
;
201 tmp123
= tmp117
- (K500000000
* tmp116
);
202 tmp122
= K866025403
* (tmp29
- tmp24
);
203 tmp30
= tmp24
+ tmp29
;
204 tmp67
= tmp19
- (K500000000
* tmp30
);
215 ASSERT_ALIGNED_DOUBLE();
221 ASSERT_ALIGNED_DOUBLE();
222 tmp50
= X
[2 * iostride
];
223 tmp52
= Y
[-6 * iostride
];
226 tmp53
= (tmp49
* tmp50
) - (tmp51
* tmp52
);
227 tmp89
= (tmp51
* tmp50
) + (tmp49
* tmp52
);
234 ASSERT_ALIGNED_DOUBLE();
235 tmp55
= X
[5 * iostride
];
236 tmp57
= Y
[-3 * iostride
];
239 tmp58
= (tmp54
* tmp55
) - (tmp56
* tmp57
);
240 tmp84
= (tmp56
* tmp55
) + (tmp54
* tmp57
);
247 ASSERT_ALIGNED_DOUBLE();
248 tmp60
= X
[8 * iostride
];
252 tmp63
= (tmp59
* tmp60
) - (tmp61
* tmp62
);
253 tmp85
= (tmp61
* tmp60
) + (tmp59
* tmp62
);
255 tmp64
= tmp58
+ tmp63
;
256 tmp90
= tmp84
+ tmp85
;
262 ASSERT_ALIGNED_DOUBLE();
263 tmp65
= tmp53
+ tmp64
;
264 tmp83
= tmp53
- (K500000000
* tmp64
);
265 tmp86
= K866025403
* (tmp84
- tmp85
);
266 tmp87
= tmp83
+ tmp86
;
267 tmp104
= tmp83
- tmp86
;
268 tmp113
= tmp89
+ tmp90
;
269 tmp88
= K866025403
* (tmp63
- tmp58
);
270 tmp91
= tmp89
- (K500000000
* tmp90
);
271 tmp92
= tmp88
+ tmp91
;
272 tmp103
= tmp91
- tmp88
;
284 ASSERT_ALIGNED_DOUBLE();
290 ASSERT_ALIGNED_DOUBLE();
292 tmp35
= Y
[-7 * iostride
];
295 tmp36
= (tmp32
* tmp33
) - (tmp34
* tmp35
);
296 tmp78
= (tmp34
* tmp33
) + (tmp32
* tmp35
);
303 ASSERT_ALIGNED_DOUBLE();
304 tmp38
= X
[4 * iostride
];
305 tmp40
= Y
[-4 * iostride
];
308 tmp41
= (tmp37
* tmp38
) - (tmp39
* tmp40
);
309 tmp73
= (tmp39
* tmp38
) + (tmp37
* tmp40
);
316 ASSERT_ALIGNED_DOUBLE();
317 tmp43
= X
[7 * iostride
];
318 tmp45
= Y
[-iostride
];
321 tmp46
= (tmp42
* tmp43
) - (tmp44
* tmp45
);
322 tmp74
= (tmp44
* tmp43
) + (tmp42
* tmp45
);
324 tmp47
= tmp41
+ tmp46
;
325 tmp79
= tmp73
+ tmp74
;
331 ASSERT_ALIGNED_DOUBLE();
332 tmp48
= tmp36
+ tmp47
;
333 tmp72
= tmp36
- (K500000000
* tmp47
);
334 tmp75
= K866025403
* (tmp73
- tmp74
);
335 tmp76
= tmp72
+ tmp75
;
336 tmp100
= tmp72
- tmp75
;
337 tmp112
= tmp78
+ tmp79
;
338 tmp77
= K866025403
* (tmp46
- tmp41
);
339 tmp80
= tmp78
- (K500000000
* tmp79
);
340 tmp81
= tmp77
+ tmp80
;
341 tmp101
= tmp80
- tmp77
;
349 ASSERT_ALIGNED_DOUBLE();
350 tmp114
= K866025403
* (tmp112
- tmp113
);
351 tmp31
= tmp19
+ tmp30
;
352 tmp66
= tmp48
+ tmp65
;
353 tmp111
= tmp31
- (K500000000
* tmp66
);
354 X
[0] = tmp31
+ tmp66
;
355 X
[3 * iostride
] = tmp111
+ tmp114
;
356 Y
[-6 * iostride
] = tmp111
- tmp114
;
363 ASSERT_ALIGNED_DOUBLE();
364 tmp120
= K866025403
* (tmp65
- tmp48
);
365 tmp115
= tmp112
+ tmp113
;
366 tmp118
= tmp116
+ tmp117
;
367 tmp119
= tmp118
- (K500000000
* tmp115
);
368 Y
[0] = tmp115
+ tmp118
;
369 Y
[-3 * iostride
] = tmp120
+ tmp119
;
370 X
[6 * iostride
] = -(tmp119
- tmp120
);
381 ASSERT_ALIGNED_DOUBLE();
382 tmp71
= tmp67
+ tmp70
;
383 tmp124
= tmp122
+ tmp123
;
389 ASSERT_ALIGNED_DOUBLE();
390 tmp82
= (K766044443
* tmp76
) + (K642787609
* tmp81
);
391 tmp93
= (K173648177
* tmp87
) + (K984807753
* tmp92
);
392 tmp94
= tmp82
+ tmp93
;
393 tmp126
= K866025403
* (tmp93
- tmp82
);
394 tmp96
= (K766044443
* tmp81
) - (K642787609
* tmp76
);
395 tmp97
= (K173648177
* tmp92
) - (K984807753
* tmp87
);
396 tmp98
= K866025403
* (tmp96
- tmp97
);
397 tmp121
= tmp96
+ tmp97
;
399 X
[iostride
] = tmp71
+ tmp94
;
400 tmp95
= tmp71
- (K500000000
* tmp94
);
401 Y
[-7 * iostride
] = tmp95
- tmp98
;
402 X
[4 * iostride
] = tmp95
+ tmp98
;
403 Y
[-iostride
] = tmp121
+ tmp124
;
404 tmp125
= tmp124
- (K500000000
* tmp121
);
405 X
[7 * iostride
] = -(tmp125
- tmp126
);
406 Y
[-4 * iostride
] = tmp126
+ tmp125
;
417 ASSERT_ALIGNED_DOUBLE();
418 tmp99
= tmp67
- tmp70
;
419 tmp128
= tmp123
- tmp122
;
425 ASSERT_ALIGNED_DOUBLE();
426 tmp102
= (K173648177
* tmp100
) + (K984807753
* tmp101
);
427 tmp105
= (K342020143
* tmp103
) - (K939692620
* tmp104
);
428 tmp106
= tmp102
+ tmp105
;
429 tmp127
= K866025403
* (tmp105
- tmp102
);
430 tmp108
= (K173648177
* tmp101
) - (K984807753
* tmp100
);
431 tmp109
= (K342020143
* tmp104
) + (K939692620
* tmp103
);
432 tmp110
= K866025403
* (tmp108
+ tmp109
);
433 tmp129
= tmp108
- tmp109
;
435 X
[2 * iostride
] = tmp99
+ tmp106
;
436 tmp107
= tmp99
- (K500000000
* tmp106
);
437 Y
[-8 * iostride
] = tmp107
- tmp110
;
438 Y
[-5 * iostride
] = tmp107
+ tmp110
;
439 Y
[-2 * iostride
] = tmp129
+ tmp128
;
440 tmp130
= tmp128
- (K500000000
* tmp129
);
441 X
[5 * iostride
] = -(tmp127
+ tmp130
);
442 X
[8 * iostride
] = -(tmp130
- tmp127
);
464 ASSERT_ALIGNED_DOUBLE();
465 tmp5
= X
[2 * iostride
];
466 tmp10
= X
[7 * iostride
];
467 tmp14
= tmp5
+ tmp10
;
468 tmp18
= tmp5
- tmp10
;
469 tmp7
= X
[4 * iostride
];
470 tmp8
= X
[5 * iostride
];
473 tmp6
= X
[8 * iostride
];
478 tmp3
= X
[3 * iostride
];
479 tmp2
= X
[6 * iostride
];
480 tmp11
= K866025403
* (tmp3
+ tmp2
);
481 tmp15
= tmp1
- (K500000000
* (tmp2
- tmp3
));
482 tmp4
= tmp1
+ tmp2
- tmp3
;
483 Y
[0] = -(tmp11
+ (K984807753
* tmp12
) + (K342020143
* tmp13
) + (K642787609
* tmp14
));
484 Y
[-3 * iostride
] = (K342020143
* tmp12
) + (K984807753
* tmp14
) - (K642787609
* tmp13
) - tmp11
;
485 Y
[-2 * iostride
] = tmp11
+ (K342020143
* tmp14
) - (K984807753
* tmp13
) - (K642787609
* tmp12
);
486 X
[2 * iostride
] = tmp15
+ (K173648177
* tmp17
) - (K939692620
* tmp18
) - (K766044443
* tmp16
);
487 X
[3 * iostride
] = tmp15
+ (K939692620
* tmp16
) + (K766044443
* tmp17
) + (K173648177
* tmp18
);
488 X
[0] = tmp15
+ (K766044443
* tmp18
) - (K939692620
* tmp17
) - (K173648177
* tmp16
);
489 Y
[-iostride
] = K866025403
* (tmp8
+ tmp7
- (tmp5
+ tmp6
+ tmp9
+ tmp10
));
490 X
[iostride
] = tmp4
+ (K500000000
* (tmp8
+ tmp9
+ tmp10
- (tmp5
+ tmp6
+ tmp7
)));
491 X
[4 * iostride
] = tmp4
+ tmp5
+ tmp6
+ tmp7
- (tmp8
+ tmp9
+ tmp10
);
495 static const int twiddle_order
[] =
496 {1, 2, 3, 4, 5, 6, 7, 8};
497 fftw_codelet_desc fftw_hc2hc_forward_9_desc
=
499 "fftw_hc2hc_forward_9",
500 (void (*)()) fftw_hc2hc_forward_9
,