Bump clang version to 18 (#14116)
[betaflight.git] / lib / main / CMSIS / DSP / Source / TransformFunctions / arm_cfft_radix4_f32.c
blobd6f66aebf897aec232cd2caf06db1e697af7f636
1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cfft_radix4_f32.c
4 * Description: Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function
6 * $Date: 27. January 2017
7 * $Revision: V.1.5.1
9 * Target Processor: Cortex-M cores
10 * -------------------------------------------------------------------- */
12 * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
14 * SPDX-License-Identifier: Apache-2.0
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
20 * www.apache.org/licenses/LICENSE-2.0
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
29 #include "arm_math.h"
31 extern void arm_bitreversal_f32(
32 float32_t * pSrc,
33 uint16_t fftSize,
34 uint16_t bitRevFactor,
35 uint16_t * pBitRevTab);
37 void arm_radix4_butterfly_f32(
38 float32_t * pSrc,
39 uint16_t fftLen,
40 float32_t * pCoef,
41 uint16_t twidCoefModifier);
43 void arm_radix4_butterfly_inverse_f32(
44 float32_t * pSrc,
45 uint16_t fftLen,
46 float32_t * pCoef,
47 uint16_t twidCoefModifier,
48 float32_t onebyfftLen);
51 /**
52 * @ingroup groupTransforms
55 /**
56 * @addtogroup ComplexFFT
57 * @{
60 /**
61 * @details
62 * @brief Processing function for the floating-point Radix-4 CFFT/CIFFT.
63 * @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed
64 * in the future.
65 * @param[in] *S points to an instance of the floating-point Radix-4 CFFT/CIFFT structure.
66 * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
67 * @return none.
70 void arm_cfft_radix4_f32(
71 const arm_cfft_radix4_instance_f32 * S,
72 float32_t * pSrc)
74 if (S->ifftFlag == 1U)
76 /* Complex IFFT radix-4 */
77 arm_radix4_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier, S->onebyfftLen);
79 else
81 /* Complex FFT radix-4 */
82 arm_radix4_butterfly_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
85 if (S->bitReverseFlag == 1U)
87 /* Bit Reversal */
88 arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
93 /**
94 * @} end of ComplexFFT group
97 /* ----------------------------------------------------------------------
98 * Internal helper function used by the FFTs
99 * ---------------------------------------------------------------------- */
102 * @brief Core function for the floating-point CFFT butterfly process.
103 * @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
104 * @param[in] fftLen length of the FFT.
105 * @param[in] *pCoef points to the twiddle coefficient buffer.
106 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
107 * @return none.
110 void arm_radix4_butterfly_f32(
111 float32_t * pSrc,
112 uint16_t fftLen,
113 float32_t * pCoef,
114 uint16_t twidCoefModifier)
117 float32_t co1, co2, co3, si1, si2, si3;
118 uint32_t ia1, ia2, ia3;
119 uint32_t i0, i1, i2, i3;
120 uint32_t n1, n2, j, k;
122 #if defined (ARM_MATH_DSP)
124 /* Run the below code for Cortex-M4 and Cortex-M3 */
126 float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
127 float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
128 Ybminusd;
129 float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
130 float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
131 float32_t *ptr1;
132 float32_t p0,p1,p2,p3,p4,p5;
133 float32_t a0,a1,a2,a3,a4,a5,a6,a7;
135 /* Initializations for the first stage */
136 n2 = fftLen;
137 n1 = n2;
139 /* n2 = fftLen/4 */
140 n2 >>= 2U;
141 i0 = 0U;
142 ia1 = 0U;
144 j = n2;
146 /* Calculation of first stage */
149 /* index calculation for the input as, */
150 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
151 i1 = i0 + n2;
152 i2 = i1 + n2;
153 i3 = i2 + n2;
155 xaIn = pSrc[(2U * i0)];
156 yaIn = pSrc[(2U * i0) + 1U];
158 xbIn = pSrc[(2U * i1)];
159 ybIn = pSrc[(2U * i1) + 1U];
161 xcIn = pSrc[(2U * i2)];
162 ycIn = pSrc[(2U * i2) + 1U];
164 xdIn = pSrc[(2U * i3)];
165 ydIn = pSrc[(2U * i3) + 1U];
167 /* xa + xc */
168 Xaplusc = xaIn + xcIn;
169 /* xb + xd */
170 Xbplusd = xbIn + xdIn;
171 /* ya + yc */
172 Yaplusc = yaIn + ycIn;
173 /* yb + yd */
174 Ybplusd = ybIn + ydIn;
176 /* index calculation for the coefficients */
177 ia2 = ia1 + ia1;
178 co2 = pCoef[ia2 * 2U];
179 si2 = pCoef[(ia2 * 2U) + 1U];
181 /* xa - xc */
182 Xaminusc = xaIn - xcIn;
183 /* xb - xd */
184 Xbminusd = xbIn - xdIn;
185 /* ya - yc */
186 Yaminusc = yaIn - ycIn;
187 /* yb - yd */
188 Ybminusd = ybIn - ydIn;
190 /* xa' = xa + xb + xc + xd */
191 pSrc[(2U * i0)] = Xaplusc + Xbplusd;
192 /* ya' = ya + yb + yc + yd */
193 pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
195 /* (xa - xc) + (yb - yd) */
196 Xb12C_out = (Xaminusc + Ybminusd);
197 /* (ya - yc) + (xb - xd) */
198 Yb12C_out = (Yaminusc - Xbminusd);
199 /* (xa + xc) - (xb + xd) */
200 Xc12C_out = (Xaplusc - Xbplusd);
201 /* (ya + yc) - (yb + yd) */
202 Yc12C_out = (Yaplusc - Ybplusd);
203 /* (xa - xc) - (yb - yd) */
204 Xd12C_out = (Xaminusc - Ybminusd);
205 /* (ya - yc) + (xb - xd) */
206 Yd12C_out = (Xbminusd + Yaminusc);
208 co1 = pCoef[ia1 * 2U];
209 si1 = pCoef[(ia1 * 2U) + 1U];
211 /* index calculation for the coefficients */
212 ia3 = ia2 + ia1;
213 co3 = pCoef[ia3 * 2U];
214 si3 = pCoef[(ia3 * 2U) + 1U];
216 Xb12_out = Xb12C_out * co1;
217 Yb12_out = Yb12C_out * co1;
218 Xc12_out = Xc12C_out * co2;
219 Yc12_out = Yc12C_out * co2;
220 Xd12_out = Xd12C_out * co3;
221 Yd12_out = Yd12C_out * co3;
223 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
224 //Xb12_out -= Yb12C_out * si1;
225 p0 = Yb12C_out * si1;
226 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
227 //Yb12_out += Xb12C_out * si1;
228 p1 = Xb12C_out * si1;
229 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
230 //Xc12_out -= Yc12C_out * si2;
231 p2 = Yc12C_out * si2;
232 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
233 //Yc12_out += Xc12C_out * si2;
234 p3 = Xc12C_out * si2;
235 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
236 //Xd12_out -= Yd12C_out * si3;
237 p4 = Yd12C_out * si3;
238 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
239 //Yd12_out += Xd12C_out * si3;
240 p5 = Xd12C_out * si3;
242 Xb12_out += p0;
243 Yb12_out -= p1;
244 Xc12_out += p2;
245 Yc12_out -= p3;
246 Xd12_out += p4;
247 Yd12_out -= p5;
249 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
250 pSrc[2U * i1] = Xc12_out;
252 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
253 pSrc[(2U * i1) + 1U] = Yc12_out;
255 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
256 pSrc[2U * i2] = Xb12_out;
258 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
259 pSrc[(2U * i2) + 1U] = Yb12_out;
261 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
262 pSrc[2U * i3] = Xd12_out;
264 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
265 pSrc[(2U * i3) + 1U] = Yd12_out;
267 /* Twiddle coefficients index modifier */
268 ia1 += twidCoefModifier;
270 /* Updating input index */
271 i0++;
274 while (--j);
276 twidCoefModifier <<= 2U;
278 /* Calculation of second stage to excluding last stage */
279 for (k = fftLen >> 2U; k > 4U; k >>= 2U)
281 /* Initializations for the first stage */
282 n1 = n2;
283 n2 >>= 2U;
284 ia1 = 0U;
286 /* Calculation of first stage */
287 j = 0;
290 /* index calculation for the coefficients */
291 ia2 = ia1 + ia1;
292 ia3 = ia2 + ia1;
293 co1 = pCoef[ia1 * 2U];
294 si1 = pCoef[(ia1 * 2U) + 1U];
295 co2 = pCoef[ia2 * 2U];
296 si2 = pCoef[(ia2 * 2U) + 1U];
297 co3 = pCoef[ia3 * 2U];
298 si3 = pCoef[(ia3 * 2U) + 1U];
300 /* Twiddle coefficients index modifier */
301 ia1 += twidCoefModifier;
303 i0 = j;
306 /* index calculation for the input as, */
307 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
308 i1 = i0 + n2;
309 i2 = i1 + n2;
310 i3 = i2 + n2;
312 xaIn = pSrc[(2U * i0)];
313 yaIn = pSrc[(2U * i0) + 1U];
315 xbIn = pSrc[(2U * i1)];
316 ybIn = pSrc[(2U * i1) + 1U];
318 xcIn = pSrc[(2U * i2)];
319 ycIn = pSrc[(2U * i2) + 1U];
321 xdIn = pSrc[(2U * i3)];
322 ydIn = pSrc[(2U * i3) + 1U];
324 /* xa - xc */
325 Xaminusc = xaIn - xcIn;
326 /* (xb - xd) */
327 Xbminusd = xbIn - xdIn;
328 /* ya - yc */
329 Yaminusc = yaIn - ycIn;
330 /* (yb - yd) */
331 Ybminusd = ybIn - ydIn;
333 /* xa + xc */
334 Xaplusc = xaIn + xcIn;
335 /* xb + xd */
336 Xbplusd = xbIn + xdIn;
337 /* ya + yc */
338 Yaplusc = yaIn + ycIn;
339 /* yb + yd */
340 Ybplusd = ybIn + ydIn;
342 /* (xa - xc) + (yb - yd) */
343 Xb12C_out = (Xaminusc + Ybminusd);
344 /* (ya - yc) - (xb - xd) */
345 Yb12C_out = (Yaminusc - Xbminusd);
346 /* xa + xc -(xb + xd) */
347 Xc12C_out = (Xaplusc - Xbplusd);
348 /* (ya + yc) - (yb + yd) */
349 Yc12C_out = (Yaplusc - Ybplusd);
350 /* (xa - xc) - (yb - yd) */
351 Xd12C_out = (Xaminusc - Ybminusd);
352 /* (ya - yc) + (xb - xd) */
353 Yd12C_out = (Xbminusd + Yaminusc);
355 pSrc[(2U * i0)] = Xaplusc + Xbplusd;
356 pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
358 Xb12_out = Xb12C_out * co1;
359 Yb12_out = Yb12C_out * co1;
360 Xc12_out = Xc12C_out * co2;
361 Yc12_out = Yc12C_out * co2;
362 Xd12_out = Xd12C_out * co3;
363 Yd12_out = Yd12C_out * co3;
365 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
366 //Xb12_out -= Yb12C_out * si1;
367 p0 = Yb12C_out * si1;
368 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
369 //Yb12_out += Xb12C_out * si1;
370 p1 = Xb12C_out * si1;
371 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
372 //Xc12_out -= Yc12C_out * si2;
373 p2 = Yc12C_out * si2;
374 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
375 //Yc12_out += Xc12C_out * si2;
376 p3 = Xc12C_out * si2;
377 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
378 //Xd12_out -= Yd12C_out * si3;
379 p4 = Yd12C_out * si3;
380 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
381 //Yd12_out += Xd12C_out * si3;
382 p5 = Xd12C_out * si3;
384 Xb12_out += p0;
385 Yb12_out -= p1;
386 Xc12_out += p2;
387 Yc12_out -= p3;
388 Xd12_out += p4;
389 Yd12_out -= p5;
391 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
392 pSrc[2U * i1] = Xc12_out;
394 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
395 pSrc[(2U * i1) + 1U] = Yc12_out;
397 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
398 pSrc[2U * i2] = Xb12_out;
400 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
401 pSrc[(2U * i2) + 1U] = Yb12_out;
403 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
404 pSrc[2U * i3] = Xd12_out;
406 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
407 pSrc[(2U * i3) + 1U] = Yd12_out;
409 i0 += n1;
410 } while (i0 < fftLen);
411 j++;
412 } while (j <= (n2 - 1U));
413 twidCoefModifier <<= 2U;
416 j = fftLen >> 2;
417 ptr1 = &pSrc[0];
419 /* Calculations of last stage */
422 xaIn = ptr1[0];
423 yaIn = ptr1[1];
424 xbIn = ptr1[2];
425 ybIn = ptr1[3];
426 xcIn = ptr1[4];
427 ycIn = ptr1[5];
428 xdIn = ptr1[6];
429 ydIn = ptr1[7];
431 /* xa + xc */
432 Xaplusc = xaIn + xcIn;
434 /* xa - xc */
435 Xaminusc = xaIn - xcIn;
437 /* ya + yc */
438 Yaplusc = yaIn + ycIn;
440 /* ya - yc */
441 Yaminusc = yaIn - ycIn;
443 /* xb + xd */
444 Xbplusd = xbIn + xdIn;
446 /* yb + yd */
447 Ybplusd = ybIn + ydIn;
449 /* (xb-xd) */
450 Xbminusd = xbIn - xdIn;
452 /* (yb-yd) */
453 Ybminusd = ybIn - ydIn;
455 /* xa' = xa + xb + xc + xd */
456 a0 = (Xaplusc + Xbplusd);
457 /* ya' = ya + yb + yc + yd */
458 a1 = (Yaplusc + Ybplusd);
459 /* xc' = (xa-xb+xc-xd) */
460 a2 = (Xaplusc - Xbplusd);
461 /* yc' = (ya-yb+yc-yd) */
462 a3 = (Yaplusc - Ybplusd);
463 /* xb' = (xa+yb-xc-yd) */
464 a4 = (Xaminusc + Ybminusd);
465 /* yb' = (ya-xb-yc+xd) */
466 a5 = (Yaminusc - Xbminusd);
467 /* xd' = (xa-yb-xc+yd)) */
468 a6 = (Xaminusc - Ybminusd);
469 /* yd' = (ya+xb-yc-xd) */
470 a7 = (Xbminusd + Yaminusc);
472 ptr1[0] = a0;
473 ptr1[1] = a1;
474 ptr1[2] = a2;
475 ptr1[3] = a3;
476 ptr1[4] = a4;
477 ptr1[5] = a5;
478 ptr1[6] = a6;
479 ptr1[7] = a7;
481 /* increment pointer by 8 */
482 ptr1 += 8U;
483 } while (--j);
485 #else
487 float32_t t1, t2, r1, r2, s1, s2;
489 /* Run the below code for Cortex-M0 */
491 /* Initializations for the fft calculation */
492 n2 = fftLen;
493 n1 = n2;
494 for (k = fftLen; k > 1U; k >>= 2U)
496 /* Initializations for the fft calculation */
497 n1 = n2;
498 n2 >>= 2U;
499 ia1 = 0U;
501 /* FFT Calculation */
502 j = 0;
505 /* index calculation for the coefficients */
506 ia2 = ia1 + ia1;
507 ia3 = ia2 + ia1;
508 co1 = pCoef[ia1 * 2U];
509 si1 = pCoef[(ia1 * 2U) + 1U];
510 co2 = pCoef[ia2 * 2U];
511 si2 = pCoef[(ia2 * 2U) + 1U];
512 co3 = pCoef[ia3 * 2U];
513 si3 = pCoef[(ia3 * 2U) + 1U];
515 /* Twiddle coefficients index modifier */
516 ia1 = ia1 + twidCoefModifier;
518 i0 = j;
521 /* index calculation for the input as, */
522 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
523 i1 = i0 + n2;
524 i2 = i1 + n2;
525 i3 = i2 + n2;
527 /* xa + xc */
528 r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
530 /* xa - xc */
531 r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
533 /* ya + yc */
534 s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
536 /* ya - yc */
537 s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
539 /* xb + xd */
540 t1 = pSrc[2U * i1] + pSrc[2U * i3];
542 /* xa' = xa + xb + xc + xd */
543 pSrc[2U * i0] = r1 + t1;
545 /* xa + xc -(xb + xd) */
546 r1 = r1 - t1;
548 /* yb + yd */
549 t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
551 /* ya' = ya + yb + yc + yd */
552 pSrc[(2U * i0) + 1U] = s1 + t2;
554 /* (ya + yc) - (yb + yd) */
555 s1 = s1 - t2;
557 /* (yb - yd) */
558 t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
560 /* (xb - xd) */
561 t2 = pSrc[2U * i1] - pSrc[2U * i3];
563 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
564 pSrc[2U * i1] = (r1 * co2) + (s1 * si2);
566 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
567 pSrc[(2U * i1) + 1U] = (s1 * co2) - (r1 * si2);
569 /* (xa - xc) + (yb - yd) */
570 r1 = r2 + t1;
572 /* (xa - xc) - (yb - yd) */
573 r2 = r2 - t1;
575 /* (ya - yc) - (xb - xd) */
576 s1 = s2 - t2;
578 /* (ya - yc) + (xb - xd) */
579 s2 = s2 + t2;
581 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
582 pSrc[2U * i2] = (r1 * co1) + (s1 * si1);
584 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
585 pSrc[(2U * i2) + 1U] = (s1 * co1) - (r1 * si1);
587 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
588 pSrc[2U * i3] = (r2 * co3) + (s2 * si3);
590 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
591 pSrc[(2U * i3) + 1U] = (s2 * co3) - (r2 * si3);
593 i0 += n1;
594 } while ( i0 < fftLen);
595 j++;
596 } while (j <= (n2 - 1U));
597 twidCoefModifier <<= 2U;
600 #endif /* #if defined (ARM_MATH_DSP) */
605 * @brief Core function for the floating-point CIFFT butterfly process.
606 * @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
607 * @param[in] fftLen length of the FFT.
608 * @param[in] *pCoef points to twiddle coefficient buffer.
609 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
610 * @param[in] onebyfftLen value of 1/fftLen.
611 * @return none.
614 void arm_radix4_butterfly_inverse_f32(
615 float32_t * pSrc,
616 uint16_t fftLen,
617 float32_t * pCoef,
618 uint16_t twidCoefModifier,
619 float32_t onebyfftLen)
621 float32_t co1, co2, co3, si1, si2, si3;
622 uint32_t ia1, ia2, ia3;
623 uint32_t i0, i1, i2, i3;
624 uint32_t n1, n2, j, k;
626 #if defined (ARM_MATH_DSP)
628 float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
629 float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
630 Ybminusd;
631 float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
632 float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
633 float32_t *ptr1;
634 float32_t p0,p1,p2,p3,p4,p5,p6,p7;
635 float32_t a0,a1,a2,a3,a4,a5,a6,a7;
638 /* Initializations for the first stage */
639 n2 = fftLen;
640 n1 = n2;
642 /* n2 = fftLen/4 */
643 n2 >>= 2U;
644 i0 = 0U;
645 ia1 = 0U;
647 j = n2;
649 /* Calculation of first stage */
652 /* index calculation for the input as, */
653 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
654 i1 = i0 + n2;
655 i2 = i1 + n2;
656 i3 = i2 + n2;
658 /* Butterfly implementation */
659 xaIn = pSrc[(2U * i0)];
660 yaIn = pSrc[(2U * i0) + 1U];
662 xcIn = pSrc[(2U * i2)];
663 ycIn = pSrc[(2U * i2) + 1U];
665 xbIn = pSrc[(2U * i1)];
666 ybIn = pSrc[(2U * i1) + 1U];
668 xdIn = pSrc[(2U * i3)];
669 ydIn = pSrc[(2U * i3) + 1U];
671 /* xa + xc */
672 Xaplusc = xaIn + xcIn;
673 /* xb + xd */
674 Xbplusd = xbIn + xdIn;
675 /* ya + yc */
676 Yaplusc = yaIn + ycIn;
677 /* yb + yd */
678 Ybplusd = ybIn + ydIn;
680 /* index calculation for the coefficients */
681 ia2 = ia1 + ia1;
682 co2 = pCoef[ia2 * 2U];
683 si2 = pCoef[(ia2 * 2U) + 1U];
685 /* xa - xc */
686 Xaminusc = xaIn - xcIn;
687 /* xb - xd */
688 Xbminusd = xbIn - xdIn;
689 /* ya - yc */
690 Yaminusc = yaIn - ycIn;
691 /* yb - yd */
692 Ybminusd = ybIn - ydIn;
694 /* xa' = xa + xb + xc + xd */
695 pSrc[(2U * i0)] = Xaplusc + Xbplusd;
697 /* ya' = ya + yb + yc + yd */
698 pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
700 /* (xa - xc) - (yb - yd) */
701 Xb12C_out = (Xaminusc - Ybminusd);
702 /* (ya - yc) + (xb - xd) */
703 Yb12C_out = (Yaminusc + Xbminusd);
704 /* (xa + xc) - (xb + xd) */
705 Xc12C_out = (Xaplusc - Xbplusd);
706 /* (ya + yc) - (yb + yd) */
707 Yc12C_out = (Yaplusc - Ybplusd);
708 /* (xa - xc) + (yb - yd) */
709 Xd12C_out = (Xaminusc + Ybminusd);
710 /* (ya - yc) - (xb - xd) */
711 Yd12C_out = (Yaminusc - Xbminusd);
713 co1 = pCoef[ia1 * 2U];
714 si1 = pCoef[(ia1 * 2U) + 1U];
716 /* index calculation for the coefficients */
717 ia3 = ia2 + ia1;
718 co3 = pCoef[ia3 * 2U];
719 si3 = pCoef[(ia3 * 2U) + 1U];
721 Xb12_out = Xb12C_out * co1;
722 Yb12_out = Yb12C_out * co1;
723 Xc12_out = Xc12C_out * co2;
724 Yc12_out = Yc12C_out * co2;
725 Xd12_out = Xd12C_out * co3;
726 Yd12_out = Yd12C_out * co3;
728 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
729 //Xb12_out -= Yb12C_out * si1;
730 p0 = Yb12C_out * si1;
731 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
732 //Yb12_out += Xb12C_out * si1;
733 p1 = Xb12C_out * si1;
734 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
735 //Xc12_out -= Yc12C_out * si2;
736 p2 = Yc12C_out * si2;
737 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
738 //Yc12_out += Xc12C_out * si2;
739 p3 = Xc12C_out * si2;
740 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
741 //Xd12_out -= Yd12C_out * si3;
742 p4 = Yd12C_out * si3;
743 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
744 //Yd12_out += Xd12C_out * si3;
745 p5 = Xd12C_out * si3;
747 Xb12_out -= p0;
748 Yb12_out += p1;
749 Xc12_out -= p2;
750 Yc12_out += p3;
751 Xd12_out -= p4;
752 Yd12_out += p5;
754 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
755 pSrc[2U * i1] = Xc12_out;
757 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
758 pSrc[(2U * i1) + 1U] = Yc12_out;
760 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
761 pSrc[2U * i2] = Xb12_out;
763 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
764 pSrc[(2U * i2) + 1U] = Yb12_out;
766 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
767 pSrc[2U * i3] = Xd12_out;
769 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
770 pSrc[(2U * i3) + 1U] = Yd12_out;
772 /* Twiddle coefficients index modifier */
773 ia1 = ia1 + twidCoefModifier;
775 /* Updating input index */
776 i0 = i0 + 1U;
778 } while (--j);
780 twidCoefModifier <<= 2U;
782 /* Calculation of second stage to excluding last stage */
783 for (k = fftLen >> 2U; k > 4U; k >>= 2U)
785 /* Initializations for the first stage */
786 n1 = n2;
787 n2 >>= 2U;
788 ia1 = 0U;
790 /* Calculation of first stage */
791 j = 0;
794 /* index calculation for the coefficients */
795 ia2 = ia1 + ia1;
796 ia3 = ia2 + ia1;
797 co1 = pCoef[ia1 * 2U];
798 si1 = pCoef[(ia1 * 2U) + 1U];
799 co2 = pCoef[ia2 * 2U];
800 si2 = pCoef[(ia2 * 2U) + 1U];
801 co3 = pCoef[ia3 * 2U];
802 si3 = pCoef[(ia3 * 2U) + 1U];
804 /* Twiddle coefficients index modifier */
805 ia1 = ia1 + twidCoefModifier;
807 i0 = j;
810 /* index calculation for the input as, */
811 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
812 i1 = i0 + n2;
813 i2 = i1 + n2;
814 i3 = i2 + n2;
816 xaIn = pSrc[(2U * i0)];
817 yaIn = pSrc[(2U * i0) + 1U];
819 xbIn = pSrc[(2U * i1)];
820 ybIn = pSrc[(2U * i1) + 1U];
822 xcIn = pSrc[(2U * i2)];
823 ycIn = pSrc[(2U * i2) + 1U];
825 xdIn = pSrc[(2U * i3)];
826 ydIn = pSrc[(2U * i3) + 1U];
828 /* xa - xc */
829 Xaminusc = xaIn - xcIn;
830 /* (xb - xd) */
831 Xbminusd = xbIn - xdIn;
832 /* ya - yc */
833 Yaminusc = yaIn - ycIn;
834 /* (yb - yd) */
835 Ybminusd = ybIn - ydIn;
837 /* xa + xc */
838 Xaplusc = xaIn + xcIn;
839 /* xb + xd */
840 Xbplusd = xbIn + xdIn;
841 /* ya + yc */
842 Yaplusc = yaIn + ycIn;
843 /* yb + yd */
844 Ybplusd = ybIn + ydIn;
846 /* (xa - xc) - (yb - yd) */
847 Xb12C_out = (Xaminusc - Ybminusd);
848 /* (ya - yc) + (xb - xd) */
849 Yb12C_out = (Yaminusc + Xbminusd);
850 /* xa + xc -(xb + xd) */
851 Xc12C_out = (Xaplusc - Xbplusd);
852 /* (ya + yc) - (yb + yd) */
853 Yc12C_out = (Yaplusc - Ybplusd);
854 /* (xa - xc) + (yb - yd) */
855 Xd12C_out = (Xaminusc + Ybminusd);
856 /* (ya - yc) - (xb - xd) */
857 Yd12C_out = (Yaminusc - Xbminusd);
859 pSrc[(2U * i0)] = Xaplusc + Xbplusd;
860 pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
862 Xb12_out = Xb12C_out * co1;
863 Yb12_out = Yb12C_out * co1;
864 Xc12_out = Xc12C_out * co2;
865 Yc12_out = Yc12C_out * co2;
866 Xd12_out = Xd12C_out * co3;
867 Yd12_out = Yd12C_out * co3;
869 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
870 //Xb12_out -= Yb12C_out * si1;
871 p0 = Yb12C_out * si1;
872 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
873 //Yb12_out += Xb12C_out * si1;
874 p1 = Xb12C_out * si1;
875 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
876 //Xc12_out -= Yc12C_out * si2;
877 p2 = Yc12C_out * si2;
878 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
879 //Yc12_out += Xc12C_out * si2;
880 p3 = Xc12C_out * si2;
881 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
882 //Xd12_out -= Yd12C_out * si3;
883 p4 = Yd12C_out * si3;
884 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
885 //Yd12_out += Xd12C_out * si3;
886 p5 = Xd12C_out * si3;
888 Xb12_out -= p0;
889 Yb12_out += p1;
890 Xc12_out -= p2;
891 Yc12_out += p3;
892 Xd12_out -= p4;
893 Yd12_out += p5;
895 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
896 pSrc[2U * i1] = Xc12_out;
898 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
899 pSrc[(2U * i1) + 1U] = Yc12_out;
901 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
902 pSrc[2U * i2] = Xb12_out;
904 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
905 pSrc[(2U * i2) + 1U] = Yb12_out;
907 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
908 pSrc[2U * i3] = Xd12_out;
910 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
911 pSrc[(2U * i3) + 1U] = Yd12_out;
913 i0 += n1;
914 } while (i0 < fftLen);
915 j++;
916 } while (j <= (n2 - 1U));
917 twidCoefModifier <<= 2U;
919 /* Initializations of last stage */
921 j = fftLen >> 2;
922 ptr1 = &pSrc[0];
924 /* Calculations of last stage */
927 xaIn = ptr1[0];
928 yaIn = ptr1[1];
929 xbIn = ptr1[2];
930 ybIn = ptr1[3];
931 xcIn = ptr1[4];
932 ycIn = ptr1[5];
933 xdIn = ptr1[6];
934 ydIn = ptr1[7];
936 /* Butterfly implementation */
937 /* xa + xc */
938 Xaplusc = xaIn + xcIn;
940 /* xa - xc */
941 Xaminusc = xaIn - xcIn;
943 /* ya + yc */
944 Yaplusc = yaIn + ycIn;
946 /* ya - yc */
947 Yaminusc = yaIn - ycIn;
949 /* xb + xd */
950 Xbplusd = xbIn + xdIn;
952 /* yb + yd */
953 Ybplusd = ybIn + ydIn;
955 /* (xb-xd) */
956 Xbminusd = xbIn - xdIn;
958 /* (yb-yd) */
959 Ybminusd = ybIn - ydIn;
961 /* xa' = (xa+xb+xc+xd) * onebyfftLen */
962 a0 = (Xaplusc + Xbplusd);
963 /* ya' = (ya+yb+yc+yd) * onebyfftLen */
964 a1 = (Yaplusc + Ybplusd);
965 /* xc' = (xa-xb+xc-xd) * onebyfftLen */
966 a2 = (Xaplusc - Xbplusd);
967 /* yc' = (ya-yb+yc-yd) * onebyfftLen */
968 a3 = (Yaplusc - Ybplusd);
969 /* xb' = (xa-yb-xc+yd) * onebyfftLen */
970 a4 = (Xaminusc - Ybminusd);
971 /* yb' = (ya+xb-yc-xd) * onebyfftLen */
972 a5 = (Yaminusc + Xbminusd);
973 /* xd' = (xa-yb-xc+yd) * onebyfftLen */
974 a6 = (Xaminusc + Ybminusd);
975 /* yd' = (ya-xb-yc+xd) * onebyfftLen */
976 a7 = (Yaminusc - Xbminusd);
978 p0 = a0 * onebyfftLen;
979 p1 = a1 * onebyfftLen;
980 p2 = a2 * onebyfftLen;
981 p3 = a3 * onebyfftLen;
982 p4 = a4 * onebyfftLen;
983 p5 = a5 * onebyfftLen;
984 p6 = a6 * onebyfftLen;
985 p7 = a7 * onebyfftLen;
987 /* xa' = (xa+xb+xc+xd) * onebyfftLen */
988 ptr1[0] = p0;
989 /* ya' = (ya+yb+yc+yd) * onebyfftLen */
990 ptr1[1] = p1;
991 /* xc' = (xa-xb+xc-xd) * onebyfftLen */
992 ptr1[2] = p2;
993 /* yc' = (ya-yb+yc-yd) * onebyfftLen */
994 ptr1[3] = p3;
995 /* xb' = (xa-yb-xc+yd) * onebyfftLen */
996 ptr1[4] = p4;
997 /* yb' = (ya+xb-yc-xd) * onebyfftLen */
998 ptr1[5] = p5;
999 /* xd' = (xa-yb-xc+yd) * onebyfftLen */
1000 ptr1[6] = p6;
1001 /* yd' = (ya-xb-yc+xd) * onebyfftLen */
1002 ptr1[7] = p7;
1004 /* increment source pointer by 8 for next calculations */
1005 ptr1 = ptr1 + 8U;
1007 } while (--j);
1009 #else
1011 float32_t t1, t2, r1, r2, s1, s2;
1013 /* Run the below code for Cortex-M0 */
1015 /* Initializations for the first stage */
1016 n2 = fftLen;
1017 n1 = n2;
1019 /* Calculation of first stage */
1020 for (k = fftLen; k > 4U; k >>= 2U)
1022 /* Initializations for the first stage */
1023 n1 = n2;
1024 n2 >>= 2U;
1025 ia1 = 0U;
1027 /* Calculation of first stage */
1028 j = 0;
1031 /* index calculation for the coefficients */
1032 ia2 = ia1 + ia1;
1033 ia3 = ia2 + ia1;
1034 co1 = pCoef[ia1 * 2U];
1035 si1 = pCoef[(ia1 * 2U) + 1U];
1036 co2 = pCoef[ia2 * 2U];
1037 si2 = pCoef[(ia2 * 2U) + 1U];
1038 co3 = pCoef[ia3 * 2U];
1039 si3 = pCoef[(ia3 * 2U) + 1U];
1041 /* Twiddle coefficients index modifier */
1042 ia1 = ia1 + twidCoefModifier;
1044 i0 = j;
1047 /* index calculation for the input as, */
1048 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1049 i1 = i0 + n2;
1050 i2 = i1 + n2;
1051 i3 = i2 + n2;
1053 /* xa + xc */
1054 r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
1056 /* xa - xc */
1057 r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
1059 /* ya + yc */
1060 s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
1062 /* ya - yc */
1063 s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
1065 /* xb + xd */
1066 t1 = pSrc[2U * i1] + pSrc[2U * i3];
1068 /* xa' = xa + xb + xc + xd */
1069 pSrc[2U * i0] = r1 + t1;
1071 /* xa + xc -(xb + xd) */
1072 r1 = r1 - t1;
1074 /* yb + yd */
1075 t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
1077 /* ya' = ya + yb + yc + yd */
1078 pSrc[(2U * i0) + 1U] = s1 + t2;
1080 /* (ya + yc) - (yb + yd) */
1081 s1 = s1 - t2;
1083 /* (yb - yd) */
1084 t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
1086 /* (xb - xd) */
1087 t2 = pSrc[2U * i1] - pSrc[2U * i3];
1089 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1090 pSrc[2U * i1] = (r1 * co2) - (s1 * si2);
1092 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1093 pSrc[(2U * i1) + 1U] = (s1 * co2) + (r1 * si2);
1095 /* (xa - xc) - (yb - yd) */
1096 r1 = r2 - t1;
1098 /* (xa - xc) + (yb - yd) */
1099 r2 = r2 + t1;
1101 /* (ya - yc) + (xb - xd) */
1102 s1 = s2 + t2;
1104 /* (ya - yc) - (xb - xd) */
1105 s2 = s2 - t2;
1107 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1108 pSrc[2U * i2] = (r1 * co1) - (s1 * si1);
1110 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1111 pSrc[(2U * i2) + 1U] = (s1 * co1) + (r1 * si1);
1113 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1114 pSrc[2U * i3] = (r2 * co3) - (s2 * si3);
1116 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1117 pSrc[(2U * i3) + 1U] = (s2 * co3) + (r2 * si3);
1119 i0 += n1;
1120 } while ( i0 < fftLen);
1121 j++;
1122 } while (j <= (n2 - 1U));
1123 twidCoefModifier <<= 2U;
1125 /* Initializations of last stage */
1126 n1 = n2;
1127 n2 >>= 2U;
1129 /* Calculations of last stage */
1130 for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
1132 /* index calculation for the input as, */
1133 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1134 i1 = i0 + n2;
1135 i2 = i1 + n2;
1136 i3 = i2 + n2;
1138 /* Butterfly implementation */
1139 /* xa + xc */
1140 r1 = pSrc[2U * i0] + pSrc[2U * i2];
1142 /* xa - xc */
1143 r2 = pSrc[2U * i0] - pSrc[2U * i2];
1145 /* ya + yc */
1146 s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
1148 /* ya - yc */
1149 s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
1151 /* xc + xd */
1152 t1 = pSrc[2U * i1] + pSrc[2U * i3];
1154 /* xa' = xa + xb + xc + xd */
1155 pSrc[2U * i0] = (r1 + t1) * onebyfftLen;
1157 /* (xa + xb) - (xc + xd) */
1158 r1 = r1 - t1;
1160 /* yb + yd */
1161 t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
1163 /* ya' = ya + yb + yc + yd */
1164 pSrc[(2U * i0) + 1U] = (s1 + t2) * onebyfftLen;
1166 /* (ya + yc) - (yb + yd) */
1167 s1 = s1 - t2;
1169 /* (yb-yd) */
1170 t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
1172 /* (xb-xd) */
1173 t2 = pSrc[2U * i1] - pSrc[2U * i3];
1175 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1176 pSrc[2U * i1] = r1 * onebyfftLen;
1178 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1179 pSrc[(2U * i1) + 1U] = s1 * onebyfftLen;
1181 /* (xa - xc) - (yb-yd) */
1182 r1 = r2 - t1;
1184 /* (xa - xc) + (yb-yd) */
1185 r2 = r2 + t1;
1187 /* (ya - yc) + (xb-xd) */
1188 s1 = s2 + t2;
1190 /* (ya - yc) - (xb-xd) */
1191 s2 = s2 - t2;
1193 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1194 pSrc[2U * i2] = r1 * onebyfftLen;
1196 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1197 pSrc[(2U * i2) + 1U] = s1 * onebyfftLen;
1199 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1200 pSrc[2U * i3] = r2 * onebyfftLen;
1202 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1203 pSrc[(2U * i3) + 1U] = s2 * onebyfftLen;
1206 #endif /* #if defined (ARM_MATH_DSP) */