1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cfft_radix4_q15.c
4 * Description: This file has function definition of Radix-4 FFT & IFFT function and
5 * In-place bit reversal using bit reversal table
7 * $Date: 27. January 2017
10 * Target Processor: Cortex-M cores
11 * -------------------------------------------------------------------- */
13 * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
15 * SPDX-License-Identifier: Apache-2.0
17 * Licensed under the Apache License, Version 2.0 (the License); you may
18 * not use this file except in compliance with the License.
19 * You may obtain a copy of the License at
21 * www.apache.org/licenses/LICENSE-2.0
23 * Unless required by applicable law or agreed to in writing, software
24 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26 * See the License for the specific language governing permissions and
27 * limitations under the License.
33 void arm_radix4_butterfly_q15(
37 uint32_t twidCoefModifier
);
39 void arm_radix4_butterfly_inverse_q15(
43 uint32_t twidCoefModifier
);
45 void arm_bitreversal_q15(
48 uint16_t bitRevFactor
,
49 uint16_t * pBitRevTab
);
52 * @ingroup groupTransforms
56 * @addtogroup ComplexFFT
63 * @brief Processing function for the Q15 CFFT/CIFFT.
64 * @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed
65 * @param[in] *S points to an instance of the Q15 CFFT/CIFFT structure.
66 * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place.
69 * \par Input and output formats:
71 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
72 * Hence the output format is different for different FFT sizes.
73 * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
75 * \image html CFFTQ15.gif "Input and Output Formats for Q15 CFFT"
76 * \image html CIFFTQ15.gif "Input and Output Formats for Q15 CIFFT"
79 void arm_cfft_radix4_q15(
80 const arm_cfft_radix4_instance_q15
* S
,
83 if (S
->ifftFlag
== 1U)
85 /* Complex IFFT radix-4 */
86 arm_radix4_butterfly_inverse_q15(pSrc
, S
->fftLen
, S
->pTwiddle
, S
->twidCoefModifier
);
90 /* Complex FFT radix-4 */
91 arm_radix4_butterfly_q15(pSrc
, S
->fftLen
, S
->pTwiddle
, S
->twidCoefModifier
);
94 if (S
->bitReverseFlag
== 1U)
97 arm_bitreversal_q15(pSrc
, S
->fftLen
, S
->bitRevFactor
, S
->pBitRevTable
);
103 * @} end of ComplexFFT group
107 * Radix-4 FFT algorithm used is :
109 * Input real and imaginary data:
111 * x(n+N/4 ) = xb + j * yb
112 * x(n+N/2 ) = xc + j * yc
113 * x(n+3N 4) = xd + j * yd
116 * Output real and imaginary data:
117 * x(4r) = xa'+ j * ya'
118 * x(4r+1) = xb'+ j * yb'
119 * x(4r+2) = xc'+ j * yc'
120 * x(4r+3) = xd'+ j * yd'
123 * Twiddle factors for radix-4 FFT:
124 * Wn = co1 + j * (- si1)
125 * W2n = co2 + j * (- si2)
126 * W3n = co3 + j * (- si3)
128 * The real and imaginary output values for the radix-4 butterfly are
129 * xa' = xa + xb + xc + xd
130 * ya' = ya + yb + yc + yd
131 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
132 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
133 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
134 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
135 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
136 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
141 * @brief Core function for the Q15 CFFT butterfly process.
142 * @param[in, out] *pSrc16 points to the in-place buffer of Q15 data type.
143 * @param[in] fftLen length of the FFT.
144 * @param[in] *pCoef16 points to twiddle coefficient buffer.
145 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
149 void arm_radix4_butterfly_q15(
153 uint32_t twidCoefModifier
)
156 #if defined (ARM_MATH_DSP)
158 /* Run the below code for Cortex-M4 and Cortex-M3 */
161 q31_t C1
, C2
, C3
, out1
, out2
;
162 uint32_t n1
, n2
, ic
, i0
, j
, k
;
170 q31_t xaya
, xbyb
, xcyc
, xdyd
;
172 /* Total process is divided into three stages */
174 /* process first stage, middle stages, & last stage */
176 /* Initializations for the first stage */
183 /* Index for twiddle coefficient */
186 /* Index for input read and output write */
190 pSi1
= pSi0
+ 2 * n2
;
191 pSi2
= pSi1
+ 2 * n2
;
192 pSi3
= pSi2
+ 2 * n2
;
194 /* Input is in 1.15(q15) format */
196 /* start of first stage process */
199 /* Butterfly implementation */
201 /* Reading i0, i0+fftLen/2 inputs */
202 /* Read ya (real), xa(imag) input */
203 T
= _SIMD32_OFFSET(pSi0
);
204 T
= __SHADD16(T
, 0); // this is just a SIMD arithmetic shift right by 1
205 T
= __SHADD16(T
, 0); // it turns out doing this twice is 2 cycles, the alternative takes 3 cycles
206 //in = ((int16_t) (T & 0xFFFF)) >> 2; // alternative code that takes 3 cycles
207 //T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
209 /* Read yc (real), xc(imag) input */
210 S
= _SIMD32_OFFSET(pSi2
);
214 /* R = packed((ya + yc), (xa + xc) ) */
217 /* S = packed((ya - yc), (xa - xc) ) */
220 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
221 /* Read yb (real), xb(imag) input */
222 T
= _SIMD32_OFFSET(pSi1
);
226 /* Read yd (real), xd(imag) input */
227 U
= _SIMD32_OFFSET(pSi3
);
231 /* T = packed((yb + yd), (xb + xd) ) */
234 /* writing the butterfly processed i0 sample */
235 /* xa' = xa + xb + xc + xd */
236 /* ya' = ya + yb + yc + yd */
237 _SIMD32_OFFSET(pSi0
) = __SHADD16(R
, T
);
240 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
243 /* co2 & si2 are read from SIMD Coefficient pointer */
244 C2
= _SIMD32_OFFSET(pCoef16
+ (4U * ic
));
246 #ifndef ARM_MATH_BIG_ENDIAN
248 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
249 out1
= __SMUAD(C2
, R
) >> 16U;
250 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
251 out2
= __SMUSDX(C2
, R
);
255 /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
256 out1
= __SMUSDX(R
, C2
) >> 16U;
257 /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
258 out2
= __SMUAD(C2
, R
);
260 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
262 /* Reading i0+fftLen/4 */
263 /* T = packed(yb, xb) */
264 T
= _SIMD32_OFFSET(pSi1
);
268 /* writing the butterfly processed i0 + fftLen/4 sample */
269 /* writing output(xc', yc') in little endian format */
270 _SIMD32_OFFSET(pSi1
) =
271 (q31_t
) ((out2
) & 0xFFFF0000) | (out1
& 0x0000FFFF);
274 /* Butterfly calculations */
275 /* U = packed(yd, xd) */
276 U
= _SIMD32_OFFSET(pSi3
);
280 /* T = packed(yb-yd, xb-xd) */
283 #ifndef ARM_MATH_BIG_ENDIAN
285 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
287 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
292 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
294 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
297 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
299 /* co1 & si1 are read from SIMD Coefficient pointer */
300 C1
= _SIMD32_OFFSET(pCoef16
+ (2U * ic
));
301 /* Butterfly process for the i0+fftLen/2 sample */
303 #ifndef ARM_MATH_BIG_ENDIAN
305 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
306 out1
= __SMUAD(C1
, S
) >> 16U;
307 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
308 out2
= __SMUSDX(C1
, S
);
312 /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
313 out1
= __SMUSDX(S
, C1
) >> 16U;
314 /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
315 out2
= __SMUAD(C1
, S
);
317 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
319 /* writing output(xb', yb') in little endian format */
320 _SIMD32_OFFSET(pSi2
) =
321 ((out2
) & 0xFFFF0000) | ((out1
) & 0x0000FFFF);
325 /* co3 & si3 are read from SIMD Coefficient pointer */
326 C3
= _SIMD32_OFFSET(pCoef16
+ (6U * ic
));
327 /* Butterfly process for the i0+3fftLen/4 sample */
329 #ifndef ARM_MATH_BIG_ENDIAN
331 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
332 out1
= __SMUAD(C3
, R
) >> 16U;
333 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
334 out2
= __SMUSDX(C3
, R
);
338 /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
339 out1
= __SMUSDX(R
, C3
) >> 16U;
340 /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
341 out2
= __SMUAD(C3
, R
);
343 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
345 /* writing output(xd', yd') in little endian format */
346 _SIMD32_OFFSET(pSi3
) =
347 ((out2
) & 0xFFFF0000) | (out1
& 0x0000FFFF);
350 /* Twiddle coefficients index modifier */
351 ic
= ic
+ twidCoefModifier
;
354 /* data is in 4.11(q11) format */
356 /* end of first stage process */
359 /* start of middle stage process */
361 /* Twiddle coefficients index modifier */
362 twidCoefModifier
<<= 2U;
364 /* Calculation of Middle stage */
365 for (k
= fftLen
/ 4U; k
> 4U; k
>>= 2U)
367 /* Initializations for the middle stage */
372 for (j
= 0U; j
<= (n2
- 1U); j
++)
374 /* index calculation for the coefficients */
375 C1
= _SIMD32_OFFSET(pCoef16
+ (2U * ic
));
376 C2
= _SIMD32_OFFSET(pCoef16
+ (4U * ic
));
377 C3
= _SIMD32_OFFSET(pCoef16
+ (6U * ic
));
379 /* Twiddle coefficients index modifier */
380 ic
= ic
+ twidCoefModifier
;
382 pSi0
= pSrc16
+ 2 * j
;
383 pSi1
= pSi0
+ 2 * n2
;
384 pSi2
= pSi1
+ 2 * n2
;
385 pSi3
= pSi2
+ 2 * n2
;
387 /* Butterfly implementation */
388 for (i0
= j
; i0
< fftLen
; i0
+= n1
)
390 /* Reading i0, i0+fftLen/2 inputs */
391 /* Read ya (real), xa(imag) input */
392 T
= _SIMD32_OFFSET(pSi0
);
394 /* Read yc (real), xc(imag) input */
395 S
= _SIMD32_OFFSET(pSi2
);
397 /* R = packed( (ya + yc), (xa + xc)) */
400 /* S = packed((ya - yc), (xa - xc)) */
403 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
404 /* Read yb (real), xb(imag) input */
405 T
= _SIMD32_OFFSET(pSi1
);
407 /* Read yd (real), xd(imag) input */
408 U
= _SIMD32_OFFSET(pSi3
);
410 /* T = packed( (yb + yd), (xb + xd)) */
413 /* writing the butterfly processed i0 sample */
415 /* xa' = xa + xb + xc + xd */
416 /* ya' = ya + yb + yc + yd */
417 out1
= __SHADD16(R
, T
);
418 out1
= __SHADD16(out1
, 0);
419 _SIMD32_OFFSET(pSi0
) = out1
;
422 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
425 #ifndef ARM_MATH_BIG_ENDIAN
427 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
428 out1
= __SMUAD(C2
, R
) >> 16U;
430 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
431 out2
= __SMUSDX(C2
, R
);
435 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
436 out1
= __SMUSDX(R
, C2
) >> 16U;
438 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
439 out2
= __SMUAD(C2
, R
);
441 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
443 /* Reading i0+3fftLen/4 */
444 /* Read yb (real), xb(imag) input */
445 T
= _SIMD32_OFFSET(pSi1
);
447 /* writing the butterfly processed i0 + fftLen/4 sample */
448 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
449 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
450 _SIMD32_OFFSET(pSi1
) =
451 ((out2
) & 0xFFFF0000) | (out1
& 0x0000FFFF);
454 /* Butterfly calculations */
456 /* Read yd (real), xd(imag) input */
457 U
= _SIMD32_OFFSET(pSi3
);
459 /* T = packed(yb-yd, xb-xd) */
462 #ifndef ARM_MATH_BIG_ENDIAN
464 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
467 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
471 /* Butterfly process for the i0+fftLen/2 sample */
472 out1
= __SMUAD(C1
, S
) >> 16U;
473 out2
= __SMUSDX(C1
, S
);
477 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
480 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
484 /* Butterfly process for the i0+fftLen/2 sample */
485 out1
= __SMUSDX(S
, C1
) >> 16U;
486 out2
= __SMUAD(C1
, S
);
488 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
490 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
491 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
492 _SIMD32_OFFSET(pSi2
) =
493 ((out2
) & 0xFFFF0000) | (out1
& 0x0000FFFF);
496 /* Butterfly process for the i0+3fftLen/4 sample */
498 #ifndef ARM_MATH_BIG_ENDIAN
500 out1
= __SMUAD(C3
, R
) >> 16U;
501 out2
= __SMUSDX(C3
, R
);
505 out1
= __SMUSDX(R
, C3
) >> 16U;
506 out2
= __SMUAD(C3
, R
);
508 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
510 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
511 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
512 _SIMD32_OFFSET(pSi3
) =
513 ((out2
) & 0xFFFF0000) | (out1
& 0x0000FFFF);
517 /* Twiddle coefficients index modifier */
518 twidCoefModifier
<<= 2U;
520 /* end of middle stage process */
523 /* data is in 10.6(q6) format for the 1024 point */
524 /* data is in 8.8(q8) format for the 256 point */
525 /* data is in 6.10(q10) format for the 64 point */
526 /* data is in 4.12(q12) format for the 16 point */
528 /* Initializations for the last stage */
533 /* start of last stage process */
535 /* Butterfly implementation */
538 /* Read xa (real), ya(imag) input */
539 xaya
= *__SIMD32(ptr1
)++;
541 /* Read xb (real), yb(imag) input */
542 xbyb
= *__SIMD32(ptr1
)++;
544 /* Read xc (real), yc(imag) input */
545 xcyc
= *__SIMD32(ptr1
)++;
547 /* Read xd (real), yd(imag) input */
548 xdyd
= *__SIMD32(ptr1
)++;
550 /* R = packed((ya + yc), (xa + xc)) */
551 R
= __QADD16(xaya
, xcyc
);
553 /* T = packed((yb + yd), (xb + xd)) */
554 T
= __QADD16(xbyb
, xdyd
);
556 /* pointer updation for writing */
560 /* xa' = xa + xb + xc + xd */
561 /* ya' = ya + yb + yc + yd */
562 *__SIMD32(ptr1
)++ = __SHADD16(R
, T
);
564 /* T = packed((yb + yd), (xb + xd)) */
565 T
= __QADD16(xbyb
, xdyd
);
567 /* xc' = (xa-xb+xc-xd) */
568 /* yc' = (ya-yb+yc-yd) */
569 *__SIMD32(ptr1
)++ = __SHSUB16(R
, T
);
571 /* S = packed((ya - yc), (xa - xc)) */
572 S
= __QSUB16(xaya
, xcyc
);
574 /* Read yd (real), xd(imag) input */
575 /* T = packed( (yb - yd), (xb - xd)) */
576 U
= __QSUB16(xbyb
, xdyd
);
578 #ifndef ARM_MATH_BIG_ENDIAN
580 /* xb' = (xa+yb-xc-yd) */
581 /* yb' = (ya-xb-yc+xd) */
582 *__SIMD32(ptr1
)++ = __SHSAX(S
, U
);
585 /* xd' = (xa-yb-xc+yd) */
586 /* yd' = (ya+xb-yc-xd) */
587 *__SIMD32(ptr1
)++ = __SHASX(S
, U
);
591 /* xb' = (xa+yb-xc-yd) */
592 /* yb' = (ya-xb-yc+xd) */
593 *__SIMD32(ptr1
)++ = __SHASX(S
, U
);
596 /* xd' = (xa-yb-xc+yd) */
597 /* yd' = (ya+xb-yc-xd) */
598 *__SIMD32(ptr1
)++ = __SHSAX(S
, U
);
600 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
604 /* end of last stage process */
606 /* output is in 11.5(q5) format for the 1024 point */
607 /* output is in 9.7(q7) format for the 256 point */
608 /* output is in 7.9(q9) format for the 64 point */
609 /* output is in 5.11(q11) format for the 16 point */
614 /* Run the below code for Cortex-M0 */
616 q15_t R0
, R1
, S0
, S1
, T0
, T1
, U0
, U1
;
617 q15_t Co1
, Si1
, Co2
, Si2
, Co3
, Si3
, out1
, out2
;
618 uint32_t n1
, n2
, ic
, i0
, i1
, i2
, i3
, j
, k
;
620 /* Total process is divided into three stages */
622 /* process first stage, middle stages, & last stage */
624 /* Initializations for the first stage */
631 /* Index for twiddle coefficient */
634 /* Index for input read and output write */
638 /* Input is in 1.15(q15) format */
640 /* start of first stage process */
643 /* Butterfly implementation */
645 /* index calculation for the input as, */
646 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
651 /* Reading i0, i0+fftLen/2 inputs */
653 /* input is down scale by 4 to avoid overflow */
654 /* Read ya (real), xa(imag) input */
655 T0
= pSrc16
[i0
* 2U] >> 2U;
656 T1
= pSrc16
[(i0
* 2U) + 1U] >> 2U;
658 /* input is down scale by 4 to avoid overflow */
659 /* Read yc (real), xc(imag) input */
660 S0
= pSrc16
[i2
* 2U] >> 2U;
661 S1
= pSrc16
[(i2
* 2U) + 1U] >> 2U;
664 R0
= __SSAT(T0
+ S0
, 16U);
666 R1
= __SSAT(T1
+ S1
, 16U);
669 S0
= __SSAT(T0
- S0
, 16);
671 S1
= __SSAT(T1
- S1
, 16);
673 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
674 /* input is down scale by 4 to avoid overflow */
675 /* Read yb (real), xb(imag) input */
676 T0
= pSrc16
[i1
* 2U] >> 2U;
677 T1
= pSrc16
[(i1
* 2U) + 1U] >> 2U;
679 /* input is down scale by 4 to avoid overflow */
680 /* Read yd (real), xd(imag) input */
681 U0
= pSrc16
[i3
* 2U] >> 2U;
682 U1
= pSrc16
[(i3
* 2U) + 1] >> 2U;
685 T0
= __SSAT(T0
+ U0
, 16U);
687 T1
= __SSAT(T1
+ U1
, 16U);
689 /* writing the butterfly processed i0 sample */
690 /* ya' = ya + yb + yc + yd */
691 /* xa' = xa + xb + xc + xd */
692 pSrc16
[i0
* 2U] = (R0
>> 1U) + (T0
>> 1U);
693 pSrc16
[(i0
* 2U) + 1U] = (R1
>> 1U) + (T1
>> 1U);
695 /* R0 = (ya + yc) - (yb + yd) */
696 /* R1 = (xa + xc) - (xb + xd) */
697 R0
= __SSAT(R0
- T0
, 16U);
698 R1
= __SSAT(R1
- T1
, 16U);
700 /* co2 & si2 are read from Coefficient pointer */
701 Co2
= pCoef16
[2U * ic
* 2U];
702 Si2
= pCoef16
[(2U * ic
* 2U) + 1];
704 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
705 out1
= (q15_t
) ((Co2
* R0
+ Si2
* R1
) >> 16U);
706 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
707 out2
= (q15_t
) ((-Si2
* R0
+ Co2
* R1
) >> 16U);
709 /* Reading i0+fftLen/4 */
710 /* input is down scale by 4 to avoid overflow */
711 /* T0 = yb, T1 = xb */
712 T0
= pSrc16
[i1
* 2U] >> 2;
713 T1
= pSrc16
[(i1
* 2U) + 1] >> 2;
715 /* writing the butterfly processed i0 + fftLen/4 sample */
716 /* writing output(xc', yc') in little endian format */
717 pSrc16
[i1
* 2U] = out1
;
718 pSrc16
[(i1
* 2U) + 1] = out2
;
720 /* Butterfly calculations */
721 /* input is down scale by 4 to avoid overflow */
722 /* U0 = yd, U1 = xd */
723 U0
= pSrc16
[i3
* 2U] >> 2;
724 U1
= pSrc16
[(i3
* 2U) + 1] >> 2;
726 T0
= __SSAT(T0
- U0
, 16);
728 T1
= __SSAT(T1
- U1
, 16);
730 /* R1 = (ya-yc) + (xb- xd), R0 = (xa-xc) - (yb-yd)) */
731 R0
= (q15_t
) __SSAT((q31_t
) (S0
- T1
), 16);
732 R1
= (q15_t
) __SSAT((q31_t
) (S1
+ T0
), 16);
734 /* S1 = (ya-yc) - (xb- xd), S0 = (xa-xc) + (yb-yd)) */
735 S0
= (q15_t
) __SSAT(((q31_t
) S0
+ T1
), 16U);
736 S1
= (q15_t
) __SSAT(((q31_t
) S1
- T0
), 16U);
738 /* co1 & si1 are read from Coefficient pointer */
739 Co1
= pCoef16
[ic
* 2U];
740 Si1
= pCoef16
[(ic
* 2U) + 1];
741 /* Butterfly process for the i0+fftLen/2 sample */
742 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
743 out1
= (q15_t
) ((Si1
* S1
+ Co1
* S0
) >> 16);
744 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
745 out2
= (q15_t
) ((-Si1
* S0
+ Co1
* S1
) >> 16);
747 /* writing output(xb', yb') in little endian format */
748 pSrc16
[i2
* 2U] = out1
;
749 pSrc16
[(i2
* 2U) + 1] = out2
;
751 /* Co3 & si3 are read from Coefficient pointer */
752 Co3
= pCoef16
[3U * (ic
* 2U)];
753 Si3
= pCoef16
[(3U * (ic
* 2U)) + 1];
754 /* Butterfly process for the i0+3fftLen/4 sample */
755 /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
756 out1
= (q15_t
) ((Si3
* R1
+ Co3
* R0
) >> 16U);
757 /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
758 out2
= (q15_t
) ((-Si3
* R0
+ Co3
* R1
) >> 16U);
759 /* writing output(xd', yd') in little endian format */
760 pSrc16
[i3
* 2U] = out1
;
761 pSrc16
[(i3
* 2U) + 1] = out2
;
763 /* Twiddle coefficients index modifier */
764 ic
= ic
+ twidCoefModifier
;
766 /* Updating input index */
770 /* data is in 4.11(q11) format */
772 /* end of first stage process */
775 /* start of middle stage process */
777 /* Twiddle coefficients index modifier */
778 twidCoefModifier
<<= 2U;
780 /* Calculation of Middle stage */
781 for (k
= fftLen
/ 4U; k
> 4U; k
>>= 2U)
783 /* Initializations for the middle stage */
788 for (j
= 0U; j
<= (n2
- 1U); j
++)
790 /* index calculation for the coefficients */
791 Co1
= pCoef16
[ic
* 2U];
792 Si1
= pCoef16
[(ic
* 2U) + 1U];
793 Co2
= pCoef16
[2U * (ic
* 2U)];
794 Si2
= pCoef16
[(2U * (ic
* 2U)) + 1U];
795 Co3
= pCoef16
[3U * (ic
* 2U)];
796 Si3
= pCoef16
[(3U * (ic
* 2U)) + 1U];
798 /* Twiddle coefficients index modifier */
799 ic
= ic
+ twidCoefModifier
;
801 /* Butterfly implementation */
802 for (i0
= j
; i0
< fftLen
; i0
+= n1
)
804 /* index calculation for the input as, */
805 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
810 /* Reading i0, i0+fftLen/2 inputs */
811 /* Read ya (real), xa(imag) input */
812 T0
= pSrc16
[i0
* 2U];
813 T1
= pSrc16
[(i0
* 2U) + 1U];
815 /* Read yc (real), xc(imag) input */
816 S0
= pSrc16
[i2
* 2U];
817 S1
= pSrc16
[(i2
* 2U) + 1U];
819 /* R0 = (ya + yc), R1 = (xa + xc) */
820 R0
= __SSAT(T0
+ S0
, 16);
821 R1
= __SSAT(T1
+ S1
, 16);
823 /* S0 = (ya - yc), S1 =(xa - xc) */
824 S0
= __SSAT(T0
- S0
, 16);
825 S1
= __SSAT(T1
- S1
, 16);
827 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
828 /* Read yb (real), xb(imag) input */
829 T0
= pSrc16
[i1
* 2U];
830 T1
= pSrc16
[(i1
* 2U) + 1U];
832 /* Read yd (real), xd(imag) input */
833 U0
= pSrc16
[i3
* 2U];
834 U1
= pSrc16
[(i3
* 2U) + 1U];
837 /* T0 = (yb + yd), T1 = (xb + xd) */
838 T0
= __SSAT(T0
+ U0
, 16);
839 T1
= __SSAT(T1
+ U1
, 16);
841 /* writing the butterfly processed i0 sample */
843 /* xa' = xa + xb + xc + xd */
844 /* ya' = ya + yb + yc + yd */
845 out1
= ((R0
>> 1U) + (T0
>> 1U)) >> 1U;
846 out2
= ((R1
>> 1U) + (T1
>> 1U)) >> 1U;
848 pSrc16
[i0
* 2U] = out1
;
849 pSrc16
[(2U * i0
) + 1U] = out2
;
851 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
852 R0
= (R0
>> 1U) - (T0
>> 1U);
853 R1
= (R1
>> 1U) - (T1
>> 1U);
855 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
856 out1
= (q15_t
) ((Co2
* R0
+ Si2
* R1
) >> 16U);
858 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
859 out2
= (q15_t
) ((-Si2
* R0
+ Co2
* R1
) >> 16U);
861 /* Reading i0+3fftLen/4 */
862 /* Read yb (real), xb(imag) input */
863 T0
= pSrc16
[i1
* 2U];
864 T1
= pSrc16
[(i1
* 2U) + 1U];
866 /* writing the butterfly processed i0 + fftLen/4 sample */
867 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
868 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
869 pSrc16
[i1
* 2U] = out1
;
870 pSrc16
[(i1
* 2U) + 1U] = out2
;
872 /* Butterfly calculations */
874 /* Read yd (real), xd(imag) input */
875 U0
= pSrc16
[i3
* 2U];
876 U1
= pSrc16
[(i3
* 2U) + 1U];
878 /* T0 = yb-yd, T1 = xb-xd */
879 T0
= __SSAT(T0
- U0
, 16);
880 T1
= __SSAT(T1
- U1
, 16);
882 /* R0 = (ya-yc) + (xb- xd), R1 = (xa-xc) - (yb-yd)) */
883 R0
= (S0
>> 1U) - (T1
>> 1U);
884 R1
= (S1
>> 1U) + (T0
>> 1U);
886 /* S0 = (ya-yc) - (xb- xd), S1 = (xa-xc) + (yb-yd)) */
887 S0
= (S0
>> 1U) + (T1
>> 1U);
888 S1
= (S1
>> 1U) - (T0
>> 1U);
890 /* Butterfly process for the i0+fftLen/2 sample */
891 out1
= (q15_t
) ((Co1
* S0
+ Si1
* S1
) >> 16U);
893 out2
= (q15_t
) ((-Si1
* S0
+ Co1
* S1
) >> 16U);
895 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
896 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
897 pSrc16
[i2
* 2U] = out1
;
898 pSrc16
[(i2
* 2U) + 1U] = out2
;
900 /* Butterfly process for the i0+3fftLen/4 sample */
901 out1
= (q15_t
) ((Si3
* R1
+ Co3
* R0
) >> 16U);
903 out2
= (q15_t
) ((-Si3
* R0
+ Co3
* R1
) >> 16U);
904 /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
905 /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
906 pSrc16
[i3
* 2U] = out1
;
907 pSrc16
[(i3
* 2U) + 1U] = out2
;
910 /* Twiddle coefficients index modifier */
911 twidCoefModifier
<<= 2U;
913 /* end of middle stage process */
916 /* data is in 10.6(q6) format for the 1024 point */
917 /* data is in 8.8(q8) format for the 256 point */
918 /* data is in 6.10(q10) format for the 64 point */
919 /* data is in 4.12(q12) format for the 16 point */
921 /* Initializations for the last stage */
925 /* start of last stage process */
927 /* Butterfly implementation */
928 for (i0
= 0U; i0
<= (fftLen
- n1
); i0
+= n1
)
930 /* index calculation for the input as, */
931 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
936 /* Reading i0, i0+fftLen/2 inputs */
937 /* Read ya (real), xa(imag) input */
938 T0
= pSrc16
[i0
* 2U];
939 T1
= pSrc16
[(i0
* 2U) + 1U];
941 /* Read yc (real), xc(imag) input */
942 S0
= pSrc16
[i2
* 2U];
943 S1
= pSrc16
[(i2
* 2U) + 1U];
945 /* R0 = (ya + yc), R1 = (xa + xc) */
946 R0
= __SSAT(T0
+ S0
, 16U);
947 R1
= __SSAT(T1
+ S1
, 16U);
949 /* S0 = (ya - yc), S1 = (xa - xc) */
950 S0
= __SSAT(T0
- S0
, 16U);
951 S1
= __SSAT(T1
- S1
, 16U);
953 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
954 /* Read yb (real), xb(imag) input */
955 T0
= pSrc16
[i1
* 2U];
956 T1
= pSrc16
[(i1
* 2U) + 1U];
957 /* Read yd (real), xd(imag) input */
958 U0
= pSrc16
[i3
* 2U];
959 U1
= pSrc16
[(i3
* 2U) + 1U];
961 /* T0 = (yb + yd), T1 = (xb + xd)) */
962 T0
= __SSAT(T0
+ U0
, 16U);
963 T1
= __SSAT(T1
+ U1
, 16U);
965 /* writing the butterfly processed i0 sample */
966 /* xa' = xa + xb + xc + xd */
967 /* ya' = ya + yb + yc + yd */
968 pSrc16
[i0
* 2U] = (R0
>> 1U) + (T0
>> 1U);
969 pSrc16
[(i0
* 2U) + 1U] = (R1
>> 1U) + (T1
>> 1U);
971 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
972 R0
= (R0
>> 1U) - (T0
>> 1U);
973 R1
= (R1
>> 1U) - (T1
>> 1U);
974 /* Read yb (real), xb(imag) input */
975 T0
= pSrc16
[i1
* 2U];
976 T1
= pSrc16
[(i1
* 2U) + 1U];
978 /* writing the butterfly processed i0 + fftLen/4 sample */
979 /* xc' = (xa-xb+xc-xd) */
980 /* yc' = (ya-yb+yc-yd) */
981 pSrc16
[i1
* 2U] = R0
;
982 pSrc16
[(i1
* 2U) + 1U] = R1
;
984 /* Read yd (real), xd(imag) input */
985 U0
= pSrc16
[i3
* 2U];
986 U1
= pSrc16
[(i3
* 2U) + 1U];
987 /* T0 = (yb - yd), T1 = (xb - xd) */
988 T0
= __SSAT(T0
- U0
, 16U);
989 T1
= __SSAT(T1
- U1
, 16U);
991 /* writing the butterfly processed i0 + fftLen/2 sample */
992 /* xb' = (xa+yb-xc-yd) */
993 /* yb' = (ya-xb-yc+xd) */
994 pSrc16
[i2
* 2U] = (S0
>> 1U) + (T1
>> 1U);
995 pSrc16
[(i2
* 2U) + 1U] = (S1
>> 1U) - (T0
>> 1U);
997 /* writing the butterfly processed i0 + 3fftLen/4 sample */
998 /* xd' = (xa-yb-xc+yd) */
999 /* yd' = (ya+xb-yc-xd) */
1000 pSrc16
[i3
* 2U] = (S0
>> 1U) - (T1
>> 1U);
1001 pSrc16
[(i3
* 2U) + 1U] = (S1
>> 1U) + (T0
>> 1U);
1005 /* end of last stage process */
1007 /* output is in 11.5(q5) format for the 1024 point */
1008 /* output is in 9.7(q7) format for the 256 point */
1009 /* output is in 7.9(q9) format for the 64 point */
1010 /* output is in 5.11(q11) format for the 16 point */
1012 #endif /* #if defined (ARM_MATH_DSP) */
1018 * @brief Core function for the Q15 CIFFT butterfly process.
1019 * @param[in, out] *pSrc16 points to the in-place buffer of Q15 data type.
1020 * @param[in] fftLen length of the FFT.
1021 * @param[in] *pCoef16 points to twiddle coefficient buffer.
1022 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
1027 * Radix-4 IFFT algorithm used is :
1029 * CIFFT uses same twiddle coefficients as CFFT function
1030 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
1033 * IFFT is implemented with following changes in equations from FFT
1035 * Input real and imaginary data:
1036 * x(n) = xa + j * ya
1037 * x(n+N/4 ) = xb + j * yb
1038 * x(n+N/2 ) = xc + j * yc
1039 * x(n+3N 4) = xd + j * yd
1042 * Output real and imaginary data:
1043 * x(4r) = xa'+ j * ya'
1044 * x(4r+1) = xb'+ j * yb'
1045 * x(4r+2) = xc'+ j * yc'
1046 * x(4r+3) = xd'+ j * yd'
1049 * Twiddle factors for radix-4 IFFT:
1050 * Wn = co1 + j * (si1)
1051 * W2n = co2 + j * (si2)
1052 * W3n = co3 + j * (si3)
1054 * The real and imaginary output values for the radix-4 butterfly are
1055 * xa' = xa + xb + xc + xd
1056 * ya' = ya + yb + yc + yd
1057 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
1058 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
1059 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
1060 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
1061 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
1062 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
1066 void arm_radix4_butterfly_inverse_q15(
1070 uint32_t twidCoefModifier
)
1073 #if defined (ARM_MATH_DSP)
1075 /* Run the below code for Cortex-M4 and Cortex-M3 */
1078 q31_t C1
, C2
, C3
, out1
, out2
;
1079 uint32_t n1
, n2
, ic
, i0
, j
, k
;
1087 q31_t xaya
, xbyb
, xcyc
, xdyd
;
1089 /* Total process is divided into three stages */
1091 /* process first stage, middle stages, & last stage */
1093 /* Initializations for the first stage */
1100 /* Index for twiddle coefficient */
1103 /* Index for input read and output write */
1107 pSi1
= pSi0
+ 2 * n2
;
1108 pSi2
= pSi1
+ 2 * n2
;
1109 pSi3
= pSi2
+ 2 * n2
;
1111 /* Input is in 1.15(q15) format */
1113 /* start of first stage process */
1116 /* Butterfly implementation */
1118 /* Reading i0, i0+fftLen/2 inputs */
1119 /* Read ya (real), xa(imag) input */
1120 T
= _SIMD32_OFFSET(pSi0
);
1121 T
= __SHADD16(T
, 0);
1122 T
= __SHADD16(T
, 0);
1124 /* Read yc (real), xc(imag) input */
1125 S
= _SIMD32_OFFSET(pSi2
);
1126 S
= __SHADD16(S
, 0);
1127 S
= __SHADD16(S
, 0);
1129 /* R = packed((ya + yc), (xa + xc) ) */
1132 /* S = packed((ya - yc), (xa - xc) ) */
1135 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1136 /* Read yb (real), xb(imag) input */
1137 T
= _SIMD32_OFFSET(pSi1
);
1138 T
= __SHADD16(T
, 0);
1139 T
= __SHADD16(T
, 0);
1141 /* Read yd (real), xd(imag) input */
1142 U
= _SIMD32_OFFSET(pSi3
);
1143 U
= __SHADD16(U
, 0);
1144 U
= __SHADD16(U
, 0);
1146 /* T = packed((yb + yd), (xb + xd) ) */
1149 /* writing the butterfly processed i0 sample */
1150 /* xa' = xa + xb + xc + xd */
1151 /* ya' = ya + yb + yc + yd */
1152 _SIMD32_OFFSET(pSi0
) = __SHADD16(R
, T
);
1155 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
1158 /* co2 & si2 are read from SIMD Coefficient pointer */
1159 C2
= _SIMD32_OFFSET(pCoef16
+ (4U * ic
));
1161 #ifndef ARM_MATH_BIG_ENDIAN
1163 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
1164 out1
= __SMUSD(C2
, R
) >> 16U;
1165 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
1166 out2
= __SMUADX(C2
, R
);
1170 /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
1171 out1
= __SMUADX(C2
, R
) >> 16U;
1172 /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
1173 out2
= __SMUSD(__QSUB16(0, C2
), R
);
1175 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1177 /* Reading i0+fftLen/4 */
1178 /* T = packed(yb, xb) */
1179 T
= _SIMD32_OFFSET(pSi1
);
1180 T
= __SHADD16(T
, 0);
1181 T
= __SHADD16(T
, 0);
1183 /* writing the butterfly processed i0 + fftLen/4 sample */
1184 /* writing output(xc', yc') in little endian format */
1185 _SIMD32_OFFSET(pSi1
) =
1186 (q31_t
) ((out2
) & 0xFFFF0000) | (out1
& 0x0000FFFF);
1189 /* Butterfly calculations */
1190 /* U = packed(yd, xd) */
1191 U
= _SIMD32_OFFSET(pSi3
);
1192 U
= __SHADD16(U
, 0);
1193 U
= __SHADD16(U
, 0);
1195 /* T = packed(yb-yd, xb-xd) */
1198 #ifndef ARM_MATH_BIG_ENDIAN
1200 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
1202 /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */
1207 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
1209 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
1212 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1214 /* co1 & si1 are read from SIMD Coefficient pointer */
1215 C1
= _SIMD32_OFFSET(pCoef16
+ (2U * ic
));
1216 /* Butterfly process for the i0+fftLen/2 sample */
1218 #ifndef ARM_MATH_BIG_ENDIAN
1220 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
1221 out1
= __SMUSD(C1
, S
) >> 16U;
1222 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
1223 out2
= __SMUADX(C1
, S
);
1227 /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
1228 out1
= __SMUADX(C1
, S
) >> 16U;
1229 /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
1230 out2
= __SMUSD(__QSUB16(0, C1
), S
);
1232 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1234 /* writing output(xb', yb') in little endian format */
1235 _SIMD32_OFFSET(pSi2
) =
1236 ((out2
) & 0xFFFF0000) | ((out1
) & 0x0000FFFF);
1240 /* co3 & si3 are read from SIMD Coefficient pointer */
1241 C3
= _SIMD32_OFFSET(pCoef16
+ (6U * ic
));
1242 /* Butterfly process for the i0+3fftLen/4 sample */
1244 #ifndef ARM_MATH_BIG_ENDIAN
1246 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
1247 out1
= __SMUSD(C3
, R
) >> 16U;
1248 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
1249 out2
= __SMUADX(C3
, R
);
1253 /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
1254 out1
= __SMUADX(C3
, R
) >> 16U;
1255 /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
1256 out2
= __SMUSD(__QSUB16(0, C3
), R
);
1258 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1260 /* writing output(xd', yd') in little endian format */
1261 _SIMD32_OFFSET(pSi3
) =
1262 ((out2
) & 0xFFFF0000) | (out1
& 0x0000FFFF);
1265 /* Twiddle coefficients index modifier */
1266 ic
= ic
+ twidCoefModifier
;
1269 /* data is in 4.11(q11) format */
1271 /* end of first stage process */
1274 /* start of middle stage process */
1276 /* Twiddle coefficients index modifier */
1277 twidCoefModifier
<<= 2U;
1279 /* Calculation of Middle stage */
1280 for (k
= fftLen
/ 4U; k
> 4U; k
>>= 2U)
1282 /* Initializations for the middle stage */
1287 for (j
= 0U; j
<= (n2
- 1U); j
++)
1289 /* index calculation for the coefficients */
1290 C1
= _SIMD32_OFFSET(pCoef16
+ (2U * ic
));
1291 C2
= _SIMD32_OFFSET(pCoef16
+ (4U * ic
));
1292 C3
= _SIMD32_OFFSET(pCoef16
+ (6U * ic
));
1294 /* Twiddle coefficients index modifier */
1295 ic
= ic
+ twidCoefModifier
;
1297 pSi0
= pSrc16
+ 2 * j
;
1298 pSi1
= pSi0
+ 2 * n2
;
1299 pSi2
= pSi1
+ 2 * n2
;
1300 pSi3
= pSi2
+ 2 * n2
;
1302 /* Butterfly implementation */
1303 for (i0
= j
; i0
< fftLen
; i0
+= n1
)
1305 /* Reading i0, i0+fftLen/2 inputs */
1306 /* Read ya (real), xa(imag) input */
1307 T
= _SIMD32_OFFSET(pSi0
);
1309 /* Read yc (real), xc(imag) input */
1310 S
= _SIMD32_OFFSET(pSi2
);
1312 /* R = packed( (ya + yc), (xa + xc)) */
1315 /* S = packed((ya - yc), (xa - xc)) */
1318 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1319 /* Read yb (real), xb(imag) input */
1320 T
= _SIMD32_OFFSET(pSi1
);
1322 /* Read yd (real), xd(imag) input */
1323 U
= _SIMD32_OFFSET(pSi3
);
1325 /* T = packed( (yb + yd), (xb + xd)) */
1328 /* writing the butterfly processed i0 sample */
1330 /* xa' = xa + xb + xc + xd */
1331 /* ya' = ya + yb + yc + yd */
1332 out1
= __SHADD16(R
, T
);
1333 out1
= __SHADD16(out1
, 0);
1334 _SIMD32_OFFSET(pSi0
) = out1
;
1337 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
1338 R
= __SHSUB16(R
, T
);
1340 #ifndef ARM_MATH_BIG_ENDIAN
1342 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
1343 out1
= __SMUSD(C2
, R
) >> 16U;
1345 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
1346 out2
= __SMUADX(C2
, R
);
1350 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
1351 out1
= __SMUADX(R
, C2
) >> 16U;
1353 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
1354 out2
= __SMUSD(__QSUB16(0, C2
), R
);
1356 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1358 /* Reading i0+3fftLen/4 */
1359 /* Read yb (real), xb(imag) input */
1360 T
= _SIMD32_OFFSET(pSi1
);
1362 /* writing the butterfly processed i0 + fftLen/4 sample */
1363 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
1364 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
1365 _SIMD32_OFFSET(pSi1
) =
1366 ((out2
) & 0xFFFF0000) | (out1
& 0x0000FFFF);
1369 /* Butterfly calculations */
1371 /* Read yd (real), xd(imag) input */
1372 U
= _SIMD32_OFFSET(pSi3
);
1374 /* T = packed(yb-yd, xb-xd) */
1377 #ifndef ARM_MATH_BIG_ENDIAN
1379 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
1382 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
1386 /* Butterfly process for the i0+fftLen/2 sample */
1387 out1
= __SMUSD(C1
, S
) >> 16U;
1388 out2
= __SMUADX(C1
, S
);
1392 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
1395 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
1399 /* Butterfly process for the i0+fftLen/2 sample */
1400 out1
= __SMUADX(S
, C1
) >> 16U;
1401 out2
= __SMUSD(__QSUB16(0, C1
), S
);
1403 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1405 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
1406 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
1407 _SIMD32_OFFSET(pSi2
) =
1408 ((out2
) & 0xFFFF0000) | (out1
& 0x0000FFFF);
1411 /* Butterfly process for the i0+3fftLen/4 sample */
1413 #ifndef ARM_MATH_BIG_ENDIAN
1415 out1
= __SMUSD(C3
, R
) >> 16U;
1416 out2
= __SMUADX(C3
, R
);
1420 out1
= __SMUADX(C3
, R
) >> 16U;
1421 out2
= __SMUSD(__QSUB16(0, C3
), R
);
1423 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1425 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
1426 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
1427 _SIMD32_OFFSET(pSi3
) =
1428 ((out2
) & 0xFFFF0000) | (out1
& 0x0000FFFF);
1432 /* Twiddle coefficients index modifier */
1433 twidCoefModifier
<<= 2U;
1435 /* end of middle stage process */
1437 /* data is in 10.6(q6) format for the 1024 point */
1438 /* data is in 8.8(q8) format for the 256 point */
1439 /* data is in 6.10(q10) format for the 64 point */
1440 /* data is in 4.12(q12) format for the 16 point */
1442 /* Initializations for the last stage */
1447 /* start of last stage process */
1449 /* Butterfly implementation */
1452 /* Read xa (real), ya(imag) input */
1453 xaya
= *__SIMD32(ptr1
)++;
1455 /* Read xb (real), yb(imag) input */
1456 xbyb
= *__SIMD32(ptr1
)++;
1458 /* Read xc (real), yc(imag) input */
1459 xcyc
= *__SIMD32(ptr1
)++;
1461 /* Read xd (real), yd(imag) input */
1462 xdyd
= *__SIMD32(ptr1
)++;
1464 /* R = packed((ya + yc), (xa + xc)) */
1465 R
= __QADD16(xaya
, xcyc
);
1467 /* T = packed((yb + yd), (xb + xd)) */
1468 T
= __QADD16(xbyb
, xdyd
);
1470 /* pointer updation for writing */
1474 /* xa' = xa + xb + xc + xd */
1475 /* ya' = ya + yb + yc + yd */
1476 *__SIMD32(ptr1
)++ = __SHADD16(R
, T
);
1478 /* T = packed((yb + yd), (xb + xd)) */
1479 T
= __QADD16(xbyb
, xdyd
);
1481 /* xc' = (xa-xb+xc-xd) */
1482 /* yc' = (ya-yb+yc-yd) */
1483 *__SIMD32(ptr1
)++ = __SHSUB16(R
, T
);
1485 /* S = packed((ya - yc), (xa - xc)) */
1486 S
= __QSUB16(xaya
, xcyc
);
1488 /* Read yd (real), xd(imag) input */
1489 /* T = packed( (yb - yd), (xb - xd)) */
1490 U
= __QSUB16(xbyb
, xdyd
);
1492 #ifndef ARM_MATH_BIG_ENDIAN
1494 /* xb' = (xa+yb-xc-yd) */
1495 /* yb' = (ya-xb-yc+xd) */
1496 *__SIMD32(ptr1
)++ = __SHASX(S
, U
);
1499 /* xd' = (xa-yb-xc+yd) */
1500 /* yd' = (ya+xb-yc-xd) */
1501 *__SIMD32(ptr1
)++ = __SHSAX(S
, U
);
1505 /* xb' = (xa+yb-xc-yd) */
1506 /* yb' = (ya-xb-yc+xd) */
1507 *__SIMD32(ptr1
)++ = __SHSAX(S
, U
);
1510 /* xd' = (xa-yb-xc+yd) */
1511 /* yd' = (ya+xb-yc-xd) */
1512 *__SIMD32(ptr1
)++ = __SHASX(S
, U
);
1515 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1519 /* end of last stage process */
1521 /* output is in 11.5(q5) format for the 1024 point */
1522 /* output is in 9.7(q7) format for the 256 point */
1523 /* output is in 7.9(q9) format for the 64 point */
1524 /* output is in 5.11(q11) format for the 16 point */
1529 /* Run the below code for Cortex-M0 */
1531 q15_t R0
, R1
, S0
, S1
, T0
, T1
, U0
, U1
;
1532 q15_t Co1
, Si1
, Co2
, Si2
, Co3
, Si3
, out1
, out2
;
1533 uint32_t n1
, n2
, ic
, i0
, i1
, i2
, i3
, j
, k
;
1535 /* Total process is divided into three stages */
1537 /* process first stage, middle stages, & last stage */
1539 /* Initializations for the first stage */
1546 /* Index for twiddle coefficient */
1549 /* Index for input read and output write */
1554 /* Input is in 1.15(q15) format */
1556 /* Start of first stage process */
1559 /* Butterfly implementation */
1561 /* index calculation for the input as, */
1562 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
1567 /* Reading i0, i0+fftLen/2 inputs */
1568 /* input is down scale by 4 to avoid overflow */
1569 /* Read ya (real), xa(imag) input */
1570 T0
= pSrc16
[i0
* 2U] >> 2U;
1571 T1
= pSrc16
[(i0
* 2U) + 1U] >> 2U;
1572 /* input is down scale by 4 to avoid overflow */
1573 /* Read yc (real), xc(imag) input */
1574 S0
= pSrc16
[i2
* 2U] >> 2U;
1575 S1
= pSrc16
[(i2
* 2U) + 1U] >> 2U;
1577 /* R0 = (ya + yc), R1 = (xa + xc) */
1578 R0
= __SSAT(T0
+ S0
, 16U);
1579 R1
= __SSAT(T1
+ S1
, 16U);
1580 /* S0 = (ya - yc), S1 = (xa - xc) */
1581 S0
= __SSAT(T0
- S0
, 16U);
1582 S1
= __SSAT(T1
- S1
, 16U);
1584 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1585 /* input is down scale by 4 to avoid overflow */
1586 /* Read yb (real), xb(imag) input */
1587 T0
= pSrc16
[i1
* 2U] >> 2U;
1588 T1
= pSrc16
[(i1
* 2U) + 1U] >> 2U;
1589 /* Read yd (real), xd(imag) input */
1590 /* input is down scale by 4 to avoid overflow */
1591 U0
= pSrc16
[i3
* 2U] >> 2U;
1592 U1
= pSrc16
[(i3
* 2U) + 1U] >> 2U;
1594 /* T0 = (yb + yd), T1 = (xb + xd) */
1595 T0
= __SSAT(T0
+ U0
, 16U);
1596 T1
= __SSAT(T1
+ U1
, 16U);
1598 /* writing the butterfly processed i0 sample */
1599 /* xa' = xa + xb + xc + xd */
1600 /* ya' = ya + yb + yc + yd */
1601 pSrc16
[i0
* 2U] = (R0
>> 1U) + (T0
>> 1U);
1602 pSrc16
[(i0
* 2U) + 1U] = (R1
>> 1U) + (T1
>> 1U);
1604 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc)- (xb + xd) */
1605 R0
= __SSAT(R0
- T0
, 16U);
1606 R1
= __SSAT(R1
- T1
, 16U);
1607 /* co2 & si2 are read from Coefficient pointer */
1608 Co2
= pCoef16
[2U * ic
* 2U];
1609 Si2
= pCoef16
[(2U * ic
* 2U) + 1U];
1610 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
1611 out1
= (q15_t
) ((Co2
* R0
- Si2
* R1
) >> 16U);
1612 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1613 out2
= (q15_t
) ((Si2
* R0
+ Co2
* R1
) >> 16U);
1615 /* Reading i0+fftLen/4 */
1616 /* input is down scale by 4 to avoid overflow */
1617 /* T0 = yb, T1 = xb */
1618 T0
= pSrc16
[i1
* 2U] >> 2U;
1619 T1
= pSrc16
[(i1
* 2U) + 1U] >> 2U;
1621 /* writing the butterfly processed i0 + fftLen/4 sample */
1622 /* writing output(xc', yc') in little endian format */
1623 pSrc16
[i1
* 2U] = out1
;
1624 pSrc16
[(i1
* 2U) + 1U] = out2
;
1626 /* Butterfly calculations */
1627 /* input is down scale by 4 to avoid overflow */
1628 /* U0 = yd, U1 = xd) */
1629 U0
= pSrc16
[i3
* 2U] >> 2U;
1630 U1
= pSrc16
[(i3
* 2U) + 1U] >> 2U;
1632 /* T0 = yb-yd, T1 = xb-xd) */
1633 T0
= __SSAT(T0
- U0
, 16U);
1634 T1
= __SSAT(T1
- U1
, 16U);
1635 /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
1636 R0
= (q15_t
) __SSAT((q31_t
) (S0
+ T1
), 16);
1637 R1
= (q15_t
) __SSAT((q31_t
) (S1
- T0
), 16);
1638 /* S = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
1639 S0
= (q15_t
) __SSAT((q31_t
) (S0
- T1
), 16);
1640 S1
= (q15_t
) __SSAT((q31_t
) (S1
+ T0
), 16);
1642 /* co1 & si1 are read from Coefficient pointer */
1643 Co1
= pCoef16
[ic
* 2U];
1644 Si1
= pCoef16
[(ic
* 2U) + 1U];
1645 /* Butterfly process for the i0+fftLen/2 sample */
1646 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
1647 out1
= (q15_t
) ((Co1
* S0
- Si1
* S1
) >> 16U);
1648 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
1649 out2
= (q15_t
) ((Si1
* S0
+ Co1
* S1
) >> 16U);
1650 /* writing output(xb', yb') in little endian format */
1651 pSrc16
[i2
* 2U] = out1
;
1652 pSrc16
[(i2
* 2U) + 1U] = out2
;
1654 /* Co3 & si3 are read from Coefficient pointer */
1655 Co3
= pCoef16
[3U * ic
* 2U];
1656 Si3
= pCoef16
[(3U * ic
* 2U) + 1U];
1657 /* Butterfly process for the i0+3fftLen/4 sample */
1658 /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
1659 out1
= (q15_t
) ((Co3
* R0
- Si3
* R1
) >> 16U);
1660 /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
1661 out2
= (q15_t
) ((Si3
* R0
+ Co3
* R1
) >> 16U);
1662 /* writing output(xd', yd') in little endian format */
1663 pSrc16
[i3
* 2U] = out1
;
1664 pSrc16
[(i3
* 2U) + 1U] = out2
;
1666 /* Twiddle coefficients index modifier */
1667 ic
= ic
+ twidCoefModifier
;
1669 /* Updating input index */
1674 /* End of first stage process */
1676 /* data is in 4.11(q11) format */
1679 /* Start of Middle stage process */
1681 /* Twiddle coefficients index modifier */
1682 twidCoefModifier
<<= 2U;
1684 /* Calculation of Middle stage */
1685 for (k
= fftLen
/ 4U; k
> 4U; k
>>= 2U)
1687 /* Initializations for the middle stage */
1692 for (j
= 0U; j
<= (n2
- 1U); j
++)
1694 /* index calculation for the coefficients */
1695 Co1
= pCoef16
[ic
* 2U];
1696 Si1
= pCoef16
[(ic
* 2U) + 1U];
1697 Co2
= pCoef16
[2U * ic
* 2U];
1698 Si2
= pCoef16
[2U * ic
* 2U + 1U];
1699 Co3
= pCoef16
[3U * ic
* 2U];
1700 Si3
= pCoef16
[(3U * ic
* 2U) + 1U];
1702 /* Twiddle coefficients index modifier */
1703 ic
= ic
+ twidCoefModifier
;
1705 /* Butterfly implementation */
1706 for (i0
= j
; i0
< fftLen
; i0
+= n1
)
1708 /* index calculation for the input as, */
1709 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
1714 /* Reading i0, i0+fftLen/2 inputs */
1715 /* Read ya (real), xa(imag) input */
1716 T0
= pSrc16
[i0
* 2U];
1717 T1
= pSrc16
[(i0
* 2U) + 1U];
1719 /* Read yc (real), xc(imag) input */
1720 S0
= pSrc16
[i2
* 2U];
1721 S1
= pSrc16
[(i2
* 2U) + 1U];
1724 /* R0 = (ya + yc), R1 = (xa + xc) */
1725 R0
= __SSAT(T0
+ S0
, 16U);
1726 R1
= __SSAT(T1
+ S1
, 16U);
1727 /* S0 = (ya - yc), S1 = (xa - xc) */
1728 S0
= __SSAT(T0
- S0
, 16U);
1729 S1
= __SSAT(T1
- S1
, 16U);
1731 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1732 /* Read yb (real), xb(imag) input */
1733 T0
= pSrc16
[i1
* 2U];
1734 T1
= pSrc16
[(i1
* 2U) + 1U];
1736 /* Read yd (real), xd(imag) input */
1737 U0
= pSrc16
[i3
* 2U];
1738 U1
= pSrc16
[(i3
* 2U) + 1U];
1740 /* T0 = (yb + yd), T1 = (xb + xd) */
1741 T0
= __SSAT(T0
+ U0
, 16U);
1742 T1
= __SSAT(T1
+ U1
, 16U);
1744 /* writing the butterfly processed i0 sample */
1745 /* xa' = xa + xb + xc + xd */
1746 /* ya' = ya + yb + yc + yd */
1747 pSrc16
[i0
* 2U] = ((R0
>> 1U) + (T0
>> 1U)) >> 1U;
1748 pSrc16
[(i0
* 2U) + 1U] = ((R1
>> 1U) + (T1
>> 1U)) >> 1U;
1750 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
1751 R0
= (R0
>> 1U) - (T0
>> 1U);
1752 R1
= (R1
>> 1U) - (T1
>> 1U);
1754 /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */
1755 out1
= (q15_t
) ((Co2
* R0
- Si2
* R1
) >> 16);
1756 /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1757 out2
= (q15_t
) ((Si2
* R0
+ Co2
* R1
) >> 16);
1759 /* Reading i0+3fftLen/4 */
1760 /* Read yb (real), xb(imag) input */
1761 T0
= pSrc16
[i1
* 2U];
1762 T1
= pSrc16
[(i1
* 2U) + 1U];
1764 /* writing the butterfly processed i0 + fftLen/4 sample */
1765 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
1766 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1767 pSrc16
[i1
* 2U] = out1
;
1768 pSrc16
[(i1
* 2U) + 1U] = out2
;
1770 /* Butterfly calculations */
1771 /* Read yd (real), xd(imag) input */
1772 U0
= pSrc16
[i3
* 2U];
1773 U1
= pSrc16
[(i3
* 2U) + 1U];
1775 /* T0 = yb-yd, T1 = xb-xd) */
1776 T0
= __SSAT(T0
- U0
, 16U);
1777 T1
= __SSAT(T1
- U1
, 16U);
1779 /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
1780 R0
= (S0
>> 1U) + (T1
>> 1U);
1781 R1
= (S1
>> 1U) - (T0
>> 1U);
1783 /* S1 = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
1784 S0
= (S0
>> 1U) - (T1
>> 1U);
1785 S1
= (S1
>> 1U) + (T0
>> 1U);
1787 /* Butterfly process for the i0+fftLen/2 sample */
1788 out1
= (q15_t
) ((Co1
* S0
- Si1
* S1
) >> 16U);
1789 out2
= (q15_t
) ((Si1
* S0
+ Co1
* S1
) >> 16U);
1790 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
1791 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
1792 pSrc16
[i2
* 2U] = out1
;
1793 pSrc16
[(i2
* 2U) + 1U] = out2
;
1795 /* Butterfly process for the i0+3fftLen/4 sample */
1796 out1
= (q15_t
) ((Co3
* R0
- Si3
* R1
) >> 16U);
1798 out2
= (q15_t
) ((Si3
* R0
+ Co3
* R1
) >> 16U);
1799 /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
1800 /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
1801 pSrc16
[i3
* 2U] = out1
;
1802 pSrc16
[(i3
* 2U) + 1U] = out2
;
1807 /* Twiddle coefficients index modifier */
1808 twidCoefModifier
<<= 2U;
1810 /* End of Middle stages process */
1813 /* data is in 10.6(q6) format for the 1024 point */
1814 /* data is in 8.8(q8) format for the 256 point */
1815 /* data is in 6.10(q10) format for the 64 point */
1816 /* data is in 4.12(q12) format for the 16 point */
1818 /* start of last stage process */
1821 /* Initializations for the last stage */
1825 /* Butterfly implementation */
1826 for (i0
= 0U; i0
<= (fftLen
- n1
); i0
+= n1
)
1828 /* index calculation for the input as, */
1829 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
1834 /* Reading i0, i0+fftLen/2 inputs */
1835 /* Read ya (real), xa(imag) input */
1836 T0
= pSrc16
[i0
* 2U];
1837 T1
= pSrc16
[(i0
* 2U) + 1U];
1838 /* Read yc (real), xc(imag) input */
1839 S0
= pSrc16
[i2
* 2U];
1840 S1
= pSrc16
[(i2
* 2U) + 1U];
1842 /* R0 = (ya + yc), R1 = (xa + xc) */
1843 R0
= __SSAT(T0
+ S0
, 16U);
1844 R1
= __SSAT(T1
+ S1
, 16U);
1845 /* S0 = (ya - yc), S1 = (xa - xc) */
1846 S0
= __SSAT(T0
- S0
, 16U);
1847 S1
= __SSAT(T1
- S1
, 16U);
1849 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1850 /* Read yb (real), xb(imag) input */
1851 T0
= pSrc16
[i1
* 2U];
1852 T1
= pSrc16
[(i1
* 2U) + 1U];
1853 /* Read yd (real), xd(imag) input */
1854 U0
= pSrc16
[i3
* 2U];
1855 U1
= pSrc16
[(i3
* 2U) + 1U];
1857 /* T0 = (yb + yd), T1 = (xb + xd) */
1858 T0
= __SSAT(T0
+ U0
, 16U);
1859 T1
= __SSAT(T1
+ U1
, 16U);
1861 /* writing the butterfly processed i0 sample */
1862 /* xa' = xa + xb + xc + xd */
1863 /* ya' = ya + yb + yc + yd */
1864 pSrc16
[i0
* 2U] = (R0
>> 1U) + (T0
>> 1U);
1865 pSrc16
[(i0
* 2U) + 1U] = (R1
>> 1U) + (T1
>> 1U);
1867 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
1868 R0
= (R0
>> 1U) - (T0
>> 1U);
1869 R1
= (R1
>> 1U) - (T1
>> 1U);
1871 /* Read yb (real), xb(imag) input */
1872 T0
= pSrc16
[i1
* 2U];
1873 T1
= pSrc16
[(i1
* 2U) + 1U];
1875 /* writing the butterfly processed i0 + fftLen/4 sample */
1876 /* xc' = (xa-xb+xc-xd) */
1877 /* yc' = (ya-yb+yc-yd) */
1878 pSrc16
[i1
* 2U] = R0
;
1879 pSrc16
[(i1
* 2U) + 1U] = R1
;
1881 /* Read yd (real), xd(imag) input */
1882 U0
= pSrc16
[i3
* 2U];
1883 U1
= pSrc16
[(i3
* 2U) + 1U];
1884 /* T0 = (yb - yd), T1 = (xb - xd) */
1885 T0
= __SSAT(T0
- U0
, 16U);
1886 T1
= __SSAT(T1
- U1
, 16U);
1888 /* writing the butterfly processed i0 + fftLen/2 sample */
1889 /* xb' = (xa-yb-xc+yd) */
1890 /* yb' = (ya+xb-yc-xd) */
1891 pSrc16
[i2
* 2U] = (S0
>> 1U) - (T1
>> 1U);
1892 pSrc16
[(i2
* 2U) + 1U] = (S1
>> 1U) + (T0
>> 1U);
1895 /* writing the butterfly processed i0 + 3fftLen/4 sample */
1896 /* xd' = (xa+yb-xc-yd) */
1897 /* yd' = (ya-xb-yc+xd) */
1898 pSrc16
[i3
* 2U] = (S0
>> 1U) + (T1
>> 1U);
1899 pSrc16
[(i3
* 2U) + 1U] = (S1
>> 1U) - (T0
>> 1U);
1901 /* end of last stage process */
1903 /* output is in 11.5(q5) format for the 1024 point */
1904 /* output is in 9.7(q7) format for the 256 point */
1905 /* output is in 7.9(q9) format for the 64 point */
1906 /* output is in 5.11(q11) format for the 16 point */
1908 #endif /* #if defined (ARM_MATH_DSP) */