2 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 /* This file was automatically generated --- DO NOT EDIT */
21 /* Generated on Tue May 18 13:56:04 EDT 1999 */
26 /* Generated by: ./genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -hc2hc-backward 8 */
29 * This function contains 108 FP additions, 50 FP multiplications,
30 * (or, 90 additions, 32 multiplications, 18 fused multiply/add),
31 * 31 stack variables, and 64 memory accesses
33 static const fftw_real K765366864
= FFTW_KONST(+0.765366864730179543456919968060797733522689125);
34 static const fftw_real K1_847759065
= FFTW_KONST(+1.847759065022573512256366378793576573644833252);
35 static const fftw_real K707106781
= FFTW_KONST(+0.707106781186547524400844362104849039284835938);
36 static const fftw_real K1_414213562
= FFTW_KONST(+1.414213562373095048801688724209698078569671875);
37 static const fftw_real K2_000000000
= FFTW_KONST(+2.000000000000000000000000000000000000000000000);
46 void fftw_hc2hc_backward_8(fftw_real
*A
, const fftw_complex
*W
, int iostride
, int m
, int dist
)
52 Y
= A
+ (8 * iostride
);
64 ASSERT_ALIGNED_DOUBLE();
70 ASSERT_ALIGNED_DOUBLE();
71 tmp106
= X
[2 * iostride
];
72 tmp107
= K2_000000000
* tmp106
;
73 tmp117
= Y
[-2 * iostride
];
74 tmp118
= K2_000000000
* tmp117
;
76 tmp104
= X
[4 * iostride
];
77 tmp105
= tmp103
+ tmp104
;
78 tmp116
= tmp103
- tmp104
;
84 ASSERT_ALIGNED_DOUBLE();
86 tmp110
= X
[3 * iostride
];
87 tmp111
= K2_000000000
* (tmp109
+ tmp110
);
88 tmp120
= tmp109
- tmp110
;
89 tmp113
= Y
[-iostride
];
90 tmp114
= Y
[-3 * iostride
];
91 tmp115
= K2_000000000
* (tmp113
- tmp114
);
92 tmp121
= tmp114
+ tmp113
;
95 tmp108
= tmp105
+ tmp107
;
96 X
[4 * iostride
] = tmp108
- tmp111
;
97 X
[0] = tmp108
+ tmp111
;
98 tmp112
= tmp105
- tmp107
;
99 X
[6 * iostride
] = tmp112
+ tmp115
;
100 X
[2 * iostride
] = tmp112
- tmp115
;
106 ASSERT_ALIGNED_DOUBLE();
107 tmp119
= tmp116
- tmp118
;
108 tmp122
= K1_414213562
* (tmp120
- tmp121
);
109 X
[5 * iostride
] = tmp119
- tmp122
;
110 X
[iostride
] = tmp119
+ tmp122
;
111 tmp123
= tmp116
+ tmp118
;
112 tmp124
= K1_414213562
* (tmp120
+ tmp121
);
113 X
[3 * iostride
] = tmp123
- tmp124
;
114 X
[7 * iostride
] = tmp123
+ tmp124
;
119 for (i
= 2; i
< m
; i
= i
+ 2, X
= X
+ dist
, Y
= Y
- dist
, W
= W
+ 7) {
136 ASSERT_ALIGNED_DOUBLE();
146 ASSERT_ALIGNED_DOUBLE();
152 ASSERT_ALIGNED_DOUBLE();
154 tmp24
= Y
[-4 * iostride
];
155 tmp25
= tmp23
+ tmp24
;
156 tmp68
= tmp23
- tmp24
;
158 tmp41
= X
[4 * iostride
];
159 tmp42
= tmp40
- tmp41
;
160 tmp81
= tmp40
+ tmp41
;
167 ASSERT_ALIGNED_DOUBLE();
168 tmp26
= X
[2 * iostride
];
169 tmp27
= Y
[-6 * iostride
];
170 tmp28
= tmp26
+ tmp27
;
171 tmp80
= tmp26
- tmp27
;
172 tmp43
= Y
[-2 * iostride
];
173 tmp44
= X
[6 * iostride
];
174 tmp45
= tmp43
- tmp44
;
175 tmp69
= tmp43
+ tmp44
;
177 tmp29
= tmp25
+ tmp28
;
178 tmp60
= tmp25
- tmp28
;
179 tmp46
= tmp42
+ tmp45
;
180 tmp56
= tmp42
- tmp45
;
181 tmp70
= tmp68
- tmp69
;
182 tmp96
= tmp68
+ tmp69
;
183 tmp82
= tmp80
+ tmp81
;
184 tmp92
= tmp81
- tmp80
;
195 ASSERT_ALIGNED_DOUBLE();
201 ASSERT_ALIGNED_DOUBLE();
203 tmp31
= Y
[-5 * iostride
];
204 tmp32
= tmp30
+ tmp31
;
205 tmp71
= tmp30
- tmp31
;
206 tmp47
= Y
[-iostride
];
207 tmp48
= X
[5 * iostride
];
208 tmp49
= tmp47
- tmp48
;
209 tmp72
= tmp47
+ tmp48
;
216 ASSERT_ALIGNED_DOUBLE();
217 tmp33
= Y
[-7 * iostride
];
218 tmp34
= X
[3 * iostride
];
219 tmp35
= tmp33
+ tmp34
;
220 tmp74
= tmp33
- tmp34
;
221 tmp50
= Y
[-3 * iostride
];
222 tmp51
= X
[7 * iostride
];
223 tmp52
= tmp50
- tmp51
;
224 tmp75
= tmp50
+ tmp51
;
226 tmp36
= tmp32
+ tmp35
;
227 tmp57
= tmp32
- tmp35
;
228 tmp53
= tmp49
+ tmp52
;
229 tmp61
= tmp52
- tmp49
;
230 tmp73
= tmp71
- tmp72
;
231 tmp83
= tmp71
+ tmp72
;
232 tmp76
= tmp74
- tmp75
;
233 tmp84
= tmp74
+ tmp75
;
235 X
[0] = tmp29
+ tmp36
;
236 Y
[-7 * iostride
] = tmp46
+ tmp53
;
242 ASSERT_ALIGNED_DOUBLE();
243 tmp38
= tmp29
- tmp36
;
244 tmp54
= tmp46
- tmp53
;
247 X
[4 * iostride
] = (tmp37
* tmp38
) + (tmp39
* tmp54
);
248 Y
[-3 * iostride
] = (tmp37
* tmp54
) - (tmp39
* tmp38
);
255 ASSERT_ALIGNED_DOUBLE();
256 tmp64
= tmp57
+ tmp56
;
257 tmp66
= tmp60
+ tmp61
;
260 Y
[-5 * iostride
] = (tmp63
* tmp64
) - (tmp65
* tmp66
);
261 X
[2 * iostride
] = (tmp65
* tmp64
) + (tmp63
* tmp66
);
268 ASSERT_ALIGNED_DOUBLE();
269 tmp58
= tmp56
- tmp57
;
270 tmp62
= tmp60
- tmp61
;
273 Y
[-iostride
] = (tmp55
* tmp58
) - (tmp59
* tmp62
);
274 X
[6 * iostride
] = (tmp59
* tmp58
) + (tmp55
* tmp62
);
283 ASSERT_ALIGNED_DOUBLE();
284 tmp93
= K707106781
* (tmp73
- tmp76
);
285 tmp94
= tmp92
+ tmp93
;
286 tmp100
= tmp92
- tmp93
;
287 tmp97
= K707106781
* (tmp83
+ tmp84
);
288 tmp98
= tmp96
- tmp97
;
289 tmp102
= tmp96
+ tmp97
;
295 ASSERT_ALIGNED_DOUBLE();
298 Y
[-4 * iostride
] = (tmp91
* tmp94
) - (tmp95
* tmp98
);
299 X
[3 * iostride
] = (tmp95
* tmp94
) + (tmp91
* tmp98
);
302 Y
[0] = (tmp99
* tmp100
) - (tmp101
* tmp102
);
303 X
[7 * iostride
] = (tmp101
* tmp100
) + (tmp99
* tmp102
);
313 ASSERT_ALIGNED_DOUBLE();
314 tmp77
= K707106781
* (tmp73
+ tmp76
);
315 tmp78
= tmp70
- tmp77
;
316 tmp88
= tmp70
+ tmp77
;
317 tmp85
= K707106781
* (tmp83
- tmp84
);
318 tmp86
= tmp82
- tmp85
;
319 tmp90
= tmp82
+ tmp85
;
325 ASSERT_ALIGNED_DOUBLE();
328 X
[5 * iostride
] = (tmp67
* tmp78
) + (tmp79
* tmp86
);
329 Y
[-2 * iostride
] = (tmp67
* tmp86
) - (tmp79
* tmp78
);
332 X
[iostride
] = (tmp87
* tmp88
) + (tmp89
* tmp90
);
333 Y
[-6 * iostride
] = (tmp87
* tmp90
) - (tmp89
* tmp88
);
348 ASSERT_ALIGNED_DOUBLE();
354 ASSERT_ALIGNED_DOUBLE();
356 tmp2
= X
[3 * iostride
];
360 tmp14
= Y
[-3 * iostride
];
361 tmp15
= tmp13
+ tmp14
;
362 tmp20
= tmp13
- tmp14
;
369 ASSERT_ALIGNED_DOUBLE();
370 tmp4
= X
[2 * iostride
];
374 tmp8
= Y
[-2 * iostride
];
379 X
[0] = K2_000000000
* (tmp3
+ tmp6
);
381 tmp22
= tmp20
- tmp21
;
382 X
[2 * iostride
] = K1_414213562
* (tmp19
- tmp22
);
383 X
[6 * iostride
] = -(K1_414213562
* (tmp19
+ tmp22
));
384 X
[4 * iostride
] = -(K2_000000000
* (tmp21
+ tmp20
));
390 ASSERT_ALIGNED_DOUBLE();
391 tmp11
= tmp7
- tmp10
;
392 tmp16
= tmp12
+ tmp15
;
393 X
[iostride
] = (K1_847759065
* tmp11
) - (K765366864
* tmp16
);
394 X
[5 * iostride
] = -((K765366864
* tmp11
) + (K1_847759065
* tmp16
));
395 tmp17
= tmp7
+ tmp10
;
396 tmp18
= tmp15
- tmp12
;
397 X
[3 * iostride
] = (K765366864
* tmp17
) - (K1_847759065
* tmp18
);
398 X
[7 * iostride
] = -((K1_847759065
* tmp17
) + (K765366864
* tmp18
));
403 static const int twiddle_order
[] =
404 {1, 2, 3, 4, 5, 6, 7};
405 fftw_codelet_desc fftw_hc2hc_backward_8_desc
=
407 "fftw_hc2hc_backward_8",
408 (void (*)()) fftw_hc2hc_backward_8
,