1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cfft_radix4_q31.c
4 * Description: This file has function definition of Radix-4 FFT & IFFT function and
5 * In-place bit reversal using bit reversal table
7 * $Date: 27. January 2017
10 * Target Processor: Cortex-M cores
11 * -------------------------------------------------------------------- */
13 * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
15 * SPDX-License-Identifier: Apache-2.0
17 * Licensed under the Apache License, Version 2.0 (the License); you may
18 * not use this file except in compliance with the License.
19 * You may obtain a copy of the License at
21 * www.apache.org/licenses/LICENSE-2.0
23 * Unless required by applicable law or agreed to in writing, software
24 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26 * See the License for the specific language governing permissions and
27 * limitations under the License.
32 void arm_radix4_butterfly_inverse_q31(
36 uint32_t twidCoefModifier
);
38 void arm_radix4_butterfly_q31(
42 uint32_t twidCoefModifier
);
44 void arm_bitreversal_q31(
47 uint16_t bitRevFactor
,
48 uint16_t * pBitRevTab
);
51 * @ingroup groupTransforms
55 * @addtogroup ComplexFFT
61 * @brief Processing function for the Q31 CFFT/CIFFT.
62 * @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed
63 * @param[in] *S points to an instance of the Q31 CFFT/CIFFT structure.
64 * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
67 * \par Input and output formats:
69 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
70 * Hence the output format is different for different FFT sizes.
71 * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
73 * \image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT"
74 * \image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT"
78 void arm_cfft_radix4_q31(
79 const arm_cfft_radix4_instance_q31
* S
,
82 if (S
->ifftFlag
== 1U)
84 /* Complex IFFT radix-4 */
85 arm_radix4_butterfly_inverse_q31(pSrc
, S
->fftLen
, S
->pTwiddle
, S
->twidCoefModifier
);
89 /* Complex FFT radix-4 */
90 arm_radix4_butterfly_q31(pSrc
, S
->fftLen
, S
->pTwiddle
, S
->twidCoefModifier
);
93 if (S
->bitReverseFlag
== 1U)
96 arm_bitreversal_q31(pSrc
, S
->fftLen
, S
->bitRevFactor
, S
->pBitRevTable
);
102 * @} end of ComplexFFT group
106 * Radix-4 FFT algorithm used is :
108 * Input real and imaginary data:
110 * x(n+N/4 ) = xb + j * yb
111 * x(n+N/2 ) = xc + j * yc
112 * x(n+3N 4) = xd + j * yd
115 * Output real and imaginary data:
116 * x(4r) = xa'+ j * ya'
117 * x(4r+1) = xb'+ j * yb'
118 * x(4r+2) = xc'+ j * yc'
119 * x(4r+3) = xd'+ j * yd'
122 * Twiddle factors for radix-4 FFT:
123 * Wn = co1 + j * (- si1)
124 * W2n = co2 + j * (- si2)
125 * W3n = co3 + j * (- si3)
127 * Butterfly implementation:
128 * xa' = xa + xb + xc + xd
129 * ya' = ya + yb + yc + yd
130 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
131 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
132 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
133 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
134 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
135 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
140 * @brief Core function for the Q31 CFFT butterfly process.
141 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type.
142 * @param[in] fftLen length of the FFT.
143 * @param[in] *pCoef points to twiddle coefficient buffer.
144 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
148 void arm_radix4_butterfly_q31(
152 uint32_t twidCoefModifier
)
154 #if defined(ARM_MATH_CM7)
155 uint32_t n1
, n2
, ia1
, ia2
, ia3
, i0
, i1
, i2
, i3
, j
, k
;
156 q31_t t1
, t2
, r1
, r2
, s1
, s2
, co1
, co2
, co3
, si1
, si2
, si3
;
158 q31_t xa
, xb
, xc
, xd
;
159 q31_t ya
, yb
, yc
, yd
;
160 q31_t xa_out
, xb_out
, xc_out
, xd_out
;
161 q31_t ya_out
, yb_out
, yc_out
, yd_out
;
164 q63_t xaya
, xbyb
, xcyc
, xdyd
;
165 /* Total process is divided into three stages */
167 /* process first stage, middle stages, & last stage */
170 /* start of first stage process */
172 /* Initializations for the first stage */
182 /* Calculation of first stage */
185 /* index calculation for the input as, */
186 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
191 /* input is in 1.31(q31) format and provide 4 guard bits for the input */
193 /* Butterfly implementation */
195 r1
= (pSrc
[(2U * i0
)] >> 4U) + (pSrc
[(2U * i2
)] >> 4U);
197 r2
= (pSrc
[2U * i0
] >> 4U) - (pSrc
[2U * i2
] >> 4U);
200 t1
= (pSrc
[2U * i1
] >> 4U) + (pSrc
[2U * i3
] >> 4U);
203 s1
= (pSrc
[(2U * i0
) + 1U] >> 4U) + (pSrc
[(2U * i2
) + 1U] >> 4U);
205 s2
= (pSrc
[(2U * i0
) + 1U] >> 4U) - (pSrc
[(2U * i2
) + 1U] >> 4U);
207 /* xa' = xa + xb + xc + xd */
208 pSrc
[2U * i0
] = (r1
+ t1
);
209 /* (xa + xc) - (xb + xd) */
212 t2
= (pSrc
[(2U * i1
) + 1U] >> 4U) + (pSrc
[(2U * i3
) + 1U] >> 4U);
214 /* ya' = ya + yb + yc + yd */
215 pSrc
[(2U * i0
) + 1U] = (s1
+ t2
);
217 /* (ya + yc) - (yb + yd) */
221 t1
= (pSrc
[(2U * i1
) + 1U] >> 4U) - (pSrc
[(2U * i3
) + 1U] >> 4U);
223 t2
= (pSrc
[2U * i1
] >> 4U) - (pSrc
[2U * i3
] >> 4U);
225 /* index calculation for the coefficients */
227 co2
= pCoef
[ia2
* 2U];
228 si2
= pCoef
[(ia2
* 2U) + 1U];
230 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
231 pSrc
[2U * i1
] = (((int32_t) (((q63_t
) r1
* co2
) >> 32)) +
232 ((int32_t) (((q63_t
) s1
* si2
) >> 32))) << 1U;
234 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
235 pSrc
[(2U * i1
) + 1U] = (((int32_t) (((q63_t
) s1
* co2
) >> 32)) -
236 ((int32_t) (((q63_t
) r1
* si2
) >> 32))) << 1U;
238 /* (xa - xc) + (yb - yd) */
240 /* (xa - xc) - (yb - yd) */
243 /* (ya - yc) - (xb - xd) */
245 /* (ya - yc) + (xb - xd) */
248 co1
= pCoef
[ia1
* 2U];
249 si1
= pCoef
[(ia1
* 2U) + 1U];
251 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
252 pSrc
[2U * i2
] = (((int32_t) (((q63_t
) r1
* co1
) >> 32)) +
253 ((int32_t) (((q63_t
) s1
* si1
) >> 32))) << 1U;
255 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
256 pSrc
[(2U * i2
) + 1U] = (((int32_t) (((q63_t
) s1
* co1
) >> 32)) -
257 ((int32_t) (((q63_t
) r1
* si1
) >> 32))) << 1U;
259 /* index calculation for the coefficients */
261 co3
= pCoef
[ia3
* 2U];
262 si3
= pCoef
[(ia3
* 2U) + 1U];
264 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
265 pSrc
[2U * i3
] = (((int32_t) (((q63_t
) r2
* co3
) >> 32)) +
266 ((int32_t) (((q63_t
) s2
* si3
) >> 32))) << 1U;
268 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
269 pSrc
[(2U * i3
) + 1U] = (((int32_t) (((q63_t
) s2
* co3
) >> 32)) -
270 ((int32_t) (((q63_t
) r2
* si3
) >> 32))) << 1U;
272 /* Twiddle coefficients index modifier */
273 ia1
= ia1
+ twidCoefModifier
;
275 /* Updating input index */
280 /* end of first stage process */
282 /* data is in 5.27(q27) format */
285 /* start of Middle stages process */
288 /* each stage in middle stages provides two down scaling of the input */
290 twidCoefModifier
<<= 2U;
293 for (k
= fftLen
/ 4U; k
> 4U; k
>>= 2U)
295 /* Initializations for the first stage */
300 /* Calculation of first stage */
301 for (j
= 0U; j
<= (n2
- 1U); j
++)
303 /* index calculation for the coefficients */
306 co1
= pCoef
[ia1
* 2U];
307 si1
= pCoef
[(ia1
* 2U) + 1U];
308 co2
= pCoef
[ia2
* 2U];
309 si2
= pCoef
[(ia2
* 2U) + 1U];
310 co3
= pCoef
[ia3
* 2U];
311 si3
= pCoef
[(ia3
* 2U) + 1U];
312 /* Twiddle coefficients index modifier */
313 ia1
= ia1
+ twidCoefModifier
;
315 for (i0
= j
; i0
< fftLen
; i0
+= n1
)
317 /* index calculation for the input as, */
318 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
323 /* Butterfly implementation */
325 r1
= pSrc
[2U * i0
] + pSrc
[2U * i2
];
327 r2
= pSrc
[2U * i0
] - pSrc
[2U * i2
];
330 s1
= pSrc
[(2U * i0
) + 1U] + pSrc
[(2U * i2
) + 1U];
332 s2
= pSrc
[(2U * i0
) + 1U] - pSrc
[(2U * i2
) + 1U];
335 t1
= pSrc
[2U * i1
] + pSrc
[2U * i3
];
337 /* xa' = xa + xb + xc + xd */
338 pSrc
[2U * i0
] = (r1
+ t1
) >> 2U;
339 /* xa + xc -(xb + xd) */
343 t2
= pSrc
[(2U * i1
) + 1U] + pSrc
[(2U * i3
) + 1U];
344 /* ya' = ya + yb + yc + yd */
345 pSrc
[(2U * i0
) + 1U] = (s1
+ t2
) >> 2U;
347 /* (ya + yc) - (yb + yd) */
351 t1
= pSrc
[(2U * i1
) + 1U] - pSrc
[(2U * i3
) + 1U];
353 t2
= pSrc
[2U * i1
] - pSrc
[2U * i3
];
355 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
356 pSrc
[2U * i1
] = (((int32_t) (((q63_t
) r1
* co2
) >> 32)) +
357 ((int32_t) (((q63_t
) s1
* si2
) >> 32))) >> 1U;
359 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
360 pSrc
[(2U * i1
) + 1U] = (((int32_t) (((q63_t
) s1
* co2
) >> 32)) -
361 ((int32_t) (((q63_t
) r1
* si2
) >> 32))) >> 1U;
363 /* (xa - xc) + (yb - yd) */
365 /* (xa - xc) - (yb - yd) */
368 /* (ya - yc) - (xb - xd) */
370 /* (ya - yc) + (xb - xd) */
373 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
374 pSrc
[2U * i2
] = (((int32_t) (((q63_t
) r1
* co1
) >> 32)) +
375 ((int32_t) (((q63_t
) s1
* si1
) >> 32))) >> 1U;
377 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
378 pSrc
[(2U * i2
) + 1U] = (((int32_t) (((q63_t
) s1
* co1
) >> 32)) -
379 ((int32_t) (((q63_t
) r1
* si1
) >> 32))) >> 1U;
381 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
382 pSrc
[2U * i3
] = (((int32_t) (((q63_t
) r2
* co3
) >> 32)) +
383 ((int32_t) (((q63_t
) s2
* si3
) >> 32))) >> 1U;
385 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
386 pSrc
[(2U * i3
) + 1U] = (((int32_t) (((q63_t
) s2
* co3
) >> 32)) -
387 ((int32_t) (((q63_t
) r2
* si3
) >> 32))) >> 1U;
390 twidCoefModifier
<<= 2U;
393 uint32_t n1
, n2
, ia1
, ia2
, ia3
, i0
, j
, k
;
394 q31_t t1
, t2
, r1
, r2
, s1
, s2
, co1
, co2
, co3
, si1
, si2
, si3
;
396 q31_t xa
, xb
, xc
, xd
;
397 q31_t ya
, yb
, yc
, yd
;
398 q31_t xa_out
, xb_out
, xc_out
, xd_out
;
399 q31_t ya_out
, yb_out
, yc_out
, yd_out
;
406 q63_t xaya
, xbyb
, xcyc
, xdyd
;
407 /* Total process is divided into three stages */
409 /* process first stage, middle stages, & last stage */
412 /* start of first stage process */
414 /* Initializations for the first stage */
425 pSi1
= pSi0
+ 2 * n2
;
426 pSi2
= pSi1
+ 2 * n2
;
427 pSi3
= pSi2
+ 2 * n2
;
429 /* Calculation of first stage */
432 /* input is in 1.31(q31) format and provide 4 guard bits for the input */
434 /* Butterfly implementation */
436 r1
= (pSi0
[0] >> 4U) + (pSi2
[0] >> 4U);
438 r2
= (pSi0
[0] >> 4U) - (pSi2
[0] >> 4U);
441 t1
= (pSi1
[0] >> 4U) + (pSi3
[0] >> 4U);
444 s1
= (pSi0
[1] >> 4U) + (pSi2
[1] >> 4U);
446 s2
= (pSi0
[1] >> 4U) - (pSi2
[1] >> 4U);
448 /* xa' = xa + xb + xc + xd */
450 /* (xa + xc) - (xb + xd) */
453 t2
= (pSi1
[1] >> 4U) + (pSi3
[1] >> 4U);
455 /* ya' = ya + yb + yc + yd */
458 /* (ya + yc) - (yb + yd) */
462 t1
= (pSi1
[1] >> 4U) - (pSi3
[1] >> 4U);
464 t2
= (pSi1
[0] >> 4U) - (pSi3
[0] >> 4U);
466 /* index calculation for the coefficients */
468 co2
= pCoef
[ia2
* 2U];
469 si2
= pCoef
[(ia2
* 2U) + 1U];
471 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
472 *pSi1
++ = (((int32_t) (((q63_t
) r1
* co2
) >> 32)) +
473 ((int32_t) (((q63_t
) s1
* si2
) >> 32))) << 1U;
475 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
476 *pSi1
++ = (((int32_t) (((q63_t
) s1
* co2
) >> 32)) -
477 ((int32_t) (((q63_t
) r1
* si2
) >> 32))) << 1U;
479 /* (xa - xc) + (yb - yd) */
481 /* (xa - xc) - (yb - yd) */
484 /* (ya - yc) - (xb - xd) */
486 /* (ya - yc) + (xb - xd) */
489 co1
= pCoef
[ia1
* 2U];
490 si1
= pCoef
[(ia1
* 2U) + 1U];
492 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
493 *pSi2
++ = (((int32_t) (((q63_t
) r1
* co1
) >> 32)) +
494 ((int32_t) (((q63_t
) s1
* si1
) >> 32))) << 1U;
496 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
497 *pSi2
++ = (((int32_t) (((q63_t
) s1
* co1
) >> 32)) -
498 ((int32_t) (((q63_t
) r1
* si1
) >> 32))) << 1U;
500 /* index calculation for the coefficients */
502 co3
= pCoef
[ia3
* 2U];
503 si3
= pCoef
[(ia3
* 2U) + 1U];
505 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
506 *pSi3
++ = (((int32_t) (((q63_t
) r2
* co3
) >> 32)) +
507 ((int32_t) (((q63_t
) s2
* si3
) >> 32))) << 1U;
509 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
510 *pSi3
++ = (((int32_t) (((q63_t
) s2
* co3
) >> 32)) -
511 ((int32_t) (((q63_t
) r2
* si3
) >> 32))) << 1U;
513 /* Twiddle coefficients index modifier */
514 ia1
= ia1
+ twidCoefModifier
;
518 /* end of first stage process */
520 /* data is in 5.27(q27) format */
523 /* start of Middle stages process */
526 /* each stage in middle stages provides two down scaling of the input */
528 twidCoefModifier
<<= 2U;
531 for (k
= fftLen
/ 4U; k
> 4U; k
>>= 2U)
533 /* Initializations for the first stage */
538 /* Calculation of first stage */
539 for (j
= 0U; j
<= (n2
- 1U); j
++)
541 /* index calculation for the coefficients */
544 co1
= pCoef
[ia1
* 2U];
545 si1
= pCoef
[(ia1
* 2U) + 1U];
546 co2
= pCoef
[ia2
* 2U];
547 si2
= pCoef
[(ia2
* 2U) + 1U];
548 co3
= pCoef
[ia3
* 2U];
549 si3
= pCoef
[(ia3
* 2U) + 1U];
550 /* Twiddle coefficients index modifier */
551 ia1
= ia1
+ twidCoefModifier
;
554 pSi1
= pSi0
+ 2 * n2
;
555 pSi2
= pSi1
+ 2 * n2
;
556 pSi3
= pSi2
+ 2 * n2
;
558 for (i0
= j
; i0
< fftLen
; i0
+= n1
)
560 /* Butterfly implementation */
562 r1
= pSi0
[0] + pSi2
[0];
565 r2
= pSi0
[0] - pSi2
[0];
569 s1
= pSi0
[1] + pSi2
[1];
572 s2
= pSi0
[1] - pSi2
[1];
576 t1
= pSi1
[0] + pSi3
[0];
579 /* xa' = xa + xb + xc + xd */
580 pSi0
[0] = (r1
+ t1
) >> 2U;
581 /* xa + xc -(xb + xd) */
585 t2
= pSi1
[1] + pSi3
[1];
587 /* ya' = ya + yb + yc + yd */
588 pSi0
[1] = (s1
+ t2
) >> 2U;
591 /* (ya + yc) - (yb + yd) */
595 t1
= pSi1
[1] - pSi3
[1];
598 t2
= pSi1
[0] - pSi3
[0];
601 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
602 pSi1
[0] = (((int32_t) (((q63_t
) r1
* co2
) >> 32)) +
603 ((int32_t) (((q63_t
) s1
* si2
) >> 32))) >> 1U;
605 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
606 pSi1
[1] = (((int32_t) (((q63_t
) s1
* co2
) >> 32)) -
607 ((int32_t) (((q63_t
) r1
* si2
) >> 32))) >> 1U;
610 /* (xa - xc) + (yb - yd) */
612 /* (xa - xc) - (yb - yd) */
615 /* (ya - yc) - (xb - xd) */
617 /* (ya - yc) + (xb - xd) */
620 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
621 pSi2
[0] = (((int32_t) (((q63_t
) r1
* co1
) >> 32)) +
622 ((int32_t) (((q63_t
) s1
* si1
) >> 32))) >> 1U;
624 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
625 pSi2
[1] = (((int32_t) (((q63_t
) s1
* co1
) >> 32)) -
626 ((int32_t) (((q63_t
) r1
* si1
) >> 32))) >> 1U;
629 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
630 pSi3
[0] = (((int32_t) (((q63_t
) r2
* co3
) >> 32)) +
631 ((int32_t) (((q63_t
) s2
* si3
) >> 32))) >> 1U;
633 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
634 pSi3
[1] = (((int32_t) (((q63_t
) s2
* co3
) >> 32)) -
635 ((int32_t) (((q63_t
) r2
* si3
) >> 32))) >> 1U;
639 twidCoefModifier
<<= 2U;
643 /* End of Middle stages process */
645 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
646 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
647 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
648 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
651 /* start of Last stage process */
652 /* Initializations for the last stage */
656 /* Calculations of last stage */
660 #ifndef ARM_MATH_BIG_ENDIAN
662 /* Read xa (real), ya(imag) input */
663 xaya
= *__SIMD64(ptr1
)++;
665 ya
= (q31_t
) (xaya
>> 32);
667 /* Read xb (real), yb(imag) input */
668 xbyb
= *__SIMD64(ptr1
)++;
670 yb
= (q31_t
) (xbyb
>> 32);
672 /* Read xc (real), yc(imag) input */
673 xcyc
= *__SIMD64(ptr1
)++;
675 yc
= (q31_t
) (xcyc
>> 32);
677 /* Read xc (real), yc(imag) input */
678 xdyd
= *__SIMD64(ptr1
)++;
680 yd
= (q31_t
) (xdyd
>> 32);
684 /* Read xa (real), ya(imag) input */
685 xaya
= *__SIMD64(ptr1
)++;
687 xa
= (q31_t
) (xaya
>> 32);
689 /* Read xb (real), yb(imag) input */
690 xbyb
= *__SIMD64(ptr1
)++;
692 xb
= (q31_t
) (xbyb
>> 32);
694 /* Read xc (real), yc(imag) input */
695 xcyc
= *__SIMD64(ptr1
)++;
697 xc
= (q31_t
) (xcyc
>> 32);
699 /* Read xc (real), yc(imag) input */
700 xdyd
= *__SIMD64(ptr1
)++;
702 xd
= (q31_t
) (xdyd
>> 32);
707 /* xa' = xa + xb + xc + xd */
708 xa_out
= xa
+ xb
+ xc
+ xd
;
710 /* ya' = ya + yb + yc + yd */
711 ya_out
= ya
+ yb
+ yc
+ yd
;
713 /* pointer updation for writing */
716 /* writing xa' and ya' */
720 xc_out
= (xa
- xb
+ xc
- xd
);
721 yc_out
= (ya
- yb
+ yc
- yd
);
723 /* writing xc' and yc' */
727 xb_out
= (xa
+ yb
- xc
- yd
);
728 yb_out
= (ya
- xb
- yc
+ xd
);
730 /* writing xb' and yb' */
734 xd_out
= (xa
- yb
- xc
+ yd
);
735 yd_out
= (ya
+ xb
- yc
- xd
);
737 /* writing xd' and yd' */
744 /* output is in 11.21(q21) format for the 1024 point */
745 /* output is in 9.23(q23) format for the 256 point */
746 /* output is in 7.25(q25) format for the 64 point */
747 /* output is in 5.27(q27) format for the 16 point */
749 /* End of last stage process */
755 * @brief Core function for the Q31 CIFFT butterfly process.
756 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type.
757 * @param[in] fftLen length of the FFT.
758 * @param[in] *pCoef points to twiddle coefficient buffer.
759 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
765 * Radix-4 IFFT algorithm used is :
767 * CIFFT uses same twiddle coefficients as CFFT Function
768 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
771 * IFFT is implemented with following changes in equations from FFT
773 * Input real and imaginary data:
775 * x(n+N/4 ) = xb + j * yb
776 * x(n+N/2 ) = xc + j * yc
777 * x(n+3N 4) = xd + j * yd
780 * Output real and imaginary data:
781 * x(4r) = xa'+ j * ya'
782 * x(4r+1) = xb'+ j * yb'
783 * x(4r+2) = xc'+ j * yc'
784 * x(4r+3) = xd'+ j * yd'
787 * Twiddle factors for radix-4 IFFT:
788 * Wn = co1 + j * (si1)
789 * W2n = co2 + j * (si2)
790 * W3n = co3 + j * (si3)
792 * The real and imaginary output values for the radix-4 butterfly are
793 * xa' = xa + xb + xc + xd
794 * ya' = ya + yb + yc + yd
795 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
796 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
797 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
798 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
799 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
800 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
804 void arm_radix4_butterfly_inverse_q31(
808 uint32_t twidCoefModifier
)
810 #if defined(ARM_MATH_CM7)
811 uint32_t n1
, n2
, ia1
, ia2
, ia3
, i0
, i1
, i2
, i3
, j
, k
;
812 q31_t t1
, t2
, r1
, r2
, s1
, s2
, co1
, co2
, co3
, si1
, si2
, si3
;
813 q31_t xa
, xb
, xc
, xd
;
814 q31_t ya
, yb
, yc
, yd
;
815 q31_t xa_out
, xb_out
, xc_out
, xd_out
;
816 q31_t ya_out
, yb_out
, yc_out
, yd_out
;
819 q63_t xaya
, xbyb
, xcyc
, xdyd
;
821 /* input is be 1.31(q31) format for all FFT sizes */
822 /* Total process is divided into three stages */
823 /* process first stage, middle stages, & last stage */
825 /* Start of first stage process */
827 /* Initializations for the first stage */
840 /* input is in 1.31(q31) format and provide 4 guard bits for the input */
842 /* index calculation for the input as, */
843 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
848 /* Butterfly implementation */
850 r1
= (pSrc
[2U * i0
] >> 4U) + (pSrc
[2U * i2
] >> 4U);
852 r2
= (pSrc
[2U * i0
] >> 4U) - (pSrc
[2U * i2
] >> 4U);
855 t1
= (pSrc
[2U * i1
] >> 4U) + (pSrc
[2U * i3
] >> 4U);
858 s1
= (pSrc
[(2U * i0
) + 1U] >> 4U) + (pSrc
[(2U * i2
) + 1U] >> 4U);
860 s2
= (pSrc
[(2U * i0
) + 1U] >> 4U) - (pSrc
[(2U * i2
) + 1U] >> 4U);
862 /* xa' = xa + xb + xc + xd */
863 pSrc
[2U * i0
] = (r1
+ t1
);
864 /* (xa + xc) - (xb + xd) */
867 t2
= (pSrc
[(2U * i1
) + 1U] >> 4U) + (pSrc
[(2U * i3
) + 1U] >> 4U);
868 /* ya' = ya + yb + yc + yd */
869 pSrc
[(2U * i0
) + 1U] = (s1
+ t2
);
871 /* (ya + yc) - (yb + yd) */
875 t1
= (pSrc
[(2U * i1
) + 1U] >> 4U) - (pSrc
[(2U * i3
) + 1U] >> 4U);
877 t2
= (pSrc
[2U * i1
] >> 4U) - (pSrc
[2U * i3
] >> 4U);
879 /* index calculation for the coefficients */
881 co2
= pCoef
[ia2
* 2U];
882 si2
= pCoef
[(ia2
* 2U) + 1U];
884 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
885 pSrc
[2U * i1
] = (((int32_t) (((q63_t
) r1
* co2
) >> 32)) -
886 ((int32_t) (((q63_t
) s1
* si2
) >> 32))) << 1U;
888 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
889 pSrc
[2U * i1
+ 1U] = (((int32_t) (((q63_t
) s1
* co2
) >> 32)) +
890 ((int32_t) (((q63_t
) r1
* si2
) >> 32))) << 1U;
892 /* (xa - xc) - (yb - yd) */
894 /* (xa - xc) + (yb - yd) */
897 /* (ya - yc) + (xb - xd) */
899 /* (ya - yc) - (xb - xd) */
902 co1
= pCoef
[ia1
* 2U];
903 si1
= pCoef
[(ia1
* 2U) + 1U];
905 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
906 pSrc
[2U * i2
] = (((int32_t) (((q63_t
) r1
* co1
) >> 32)) -
907 ((int32_t) (((q63_t
) s1
* si1
) >> 32))) << 1U;
909 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
910 pSrc
[(2U * i2
) + 1U] = (((int32_t) (((q63_t
) s1
* co1
) >> 32)) +
911 ((int32_t) (((q63_t
) r1
* si1
) >> 32))) << 1U;
913 /* index calculation for the coefficients */
915 co3
= pCoef
[ia3
* 2U];
916 si3
= pCoef
[(ia3
* 2U) + 1U];
918 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
919 pSrc
[2U * i3
] = (((int32_t) (((q63_t
) r2
* co3
) >> 32)) -
920 ((int32_t) (((q63_t
) s2
* si3
) >> 32))) << 1U;
922 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
923 pSrc
[(2U * i3
) + 1U] = (((int32_t) (((q63_t
) s2
* co3
) >> 32)) +
924 ((int32_t) (((q63_t
) r2
* si3
) >> 32))) << 1U;
926 /* Twiddle coefficients index modifier */
927 ia1
= ia1
+ twidCoefModifier
;
929 /* Updating input index */
934 /* data is in 5.27(q27) format */
935 /* each stage provides two down scaling of the input */
938 /* Start of Middle stages process */
940 twidCoefModifier
<<= 2U;
942 /* Calculation of second stage to excluding last stage */
943 for (k
= fftLen
/ 4U; k
> 4U; k
>>= 2U)
945 /* Initializations for the first stage */
950 for (j
= 0; j
<= (n2
- 1U); j
++)
952 /* index calculation for the coefficients */
955 co1
= pCoef
[ia1
* 2U];
956 si1
= pCoef
[(ia1
* 2U) + 1U];
957 co2
= pCoef
[ia2
* 2U];
958 si2
= pCoef
[(ia2
* 2U) + 1U];
959 co3
= pCoef
[ia3
* 2U];
960 si3
= pCoef
[(ia3
* 2U) + 1U];
961 /* Twiddle coefficients index modifier */
962 ia1
= ia1
+ twidCoefModifier
;
964 for (i0
= j
; i0
< fftLen
; i0
+= n1
)
966 /* index calculation for the input as, */
967 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
972 /* Butterfly implementation */
974 r1
= pSrc
[2U * i0
] + pSrc
[2U * i2
];
976 r2
= pSrc
[2U * i0
] - pSrc
[2U * i2
];
979 s1
= pSrc
[(2U * i0
) + 1U] + pSrc
[(2U * i2
) + 1U];
981 s2
= pSrc
[(2U * i0
) + 1U] - pSrc
[(2U * i2
) + 1U];
984 t1
= pSrc
[2U * i1
] + pSrc
[2U * i3
];
986 /* xa' = xa + xb + xc + xd */
987 pSrc
[2U * i0
] = (r1
+ t1
) >> 2U;
988 /* xa + xc -(xb + xd) */
991 t2
= pSrc
[(2U * i1
) + 1U] + pSrc
[(2U * i3
) + 1U];
992 /* ya' = ya + yb + yc + yd */
993 pSrc
[(2U * i0
) + 1U] = (s1
+ t2
) >> 2U;
995 /* (ya + yc) - (yb + yd) */
999 t1
= pSrc
[(2U * i1
) + 1U] - pSrc
[(2U * i3
) + 1U];
1001 t2
= pSrc
[2U * i1
] - pSrc
[2U * i3
];
1003 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1004 pSrc
[2U * i1
] = (((int32_t) (((q63_t
) r1
* co2
) >> 32U)) -
1005 ((int32_t) (((q63_t
) s1
* si2
) >> 32U))) >> 1U;
1007 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1008 pSrc
[(2U * i1
) + 1U] =
1009 (((int32_t) (((q63_t
) s1
* co2
) >> 32U)) +
1010 ((int32_t) (((q63_t
) r1
* si2
) >> 32U))) >> 1U;
1012 /* (xa - xc) - (yb - yd) */
1014 /* (xa - xc) + (yb - yd) */
1017 /* (ya - yc) + (xb - xd) */
1019 /* (ya - yc) - (xb - xd) */
1022 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1023 pSrc
[2U * i2
] = (((int32_t) (((q63_t
) r1
* co1
) >> 32)) -
1024 ((int32_t) (((q63_t
) s1
* si1
) >> 32))) >> 1U;
1026 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1027 pSrc
[(2U * i2
) + 1U] = (((int32_t) (((q63_t
) s1
* co1
) >> 32)) +
1028 ((int32_t) (((q63_t
) r1
* si1
) >> 32))) >> 1U;
1030 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1031 pSrc
[(2U * i3
)] = (((int32_t) (((q63_t
) r2
* co3
) >> 32)) -
1032 ((int32_t) (((q63_t
) s2
* si3
) >> 32))) >> 1U;
1034 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1035 pSrc
[(2U * i3
) + 1U] = (((int32_t) (((q63_t
) s2
* co3
) >> 32)) +
1036 ((int32_t) (((q63_t
) r2
* si3
) >> 32))) >> 1U;
1039 twidCoefModifier
<<= 2U;
1042 uint32_t n1
, n2
, ia1
, ia2
, ia3
, i0
, j
, k
;
1043 q31_t t1
, t2
, r1
, r2
, s1
, s2
, co1
, co2
, co3
, si1
, si2
, si3
;
1044 q31_t xa
, xb
, xc
, xd
;
1045 q31_t ya
, yb
, yc
, yd
;
1046 q31_t xa_out
, xb_out
, xc_out
, xd_out
;
1047 q31_t ya_out
, yb_out
, yc_out
, yd_out
;
1054 q63_t xaya
, xbyb
, xcyc
, xdyd
;
1056 /* input is be 1.31(q31) format for all FFT sizes */
1057 /* Total process is divided into three stages */
1058 /* process first stage, middle stages, & last stage */
1060 /* Start of first stage process */
1062 /* Initializations for the first stage */
1073 pSi1
= pSi0
+ 2 * n2
;
1074 pSi2
= pSi1
+ 2 * n2
;
1075 pSi3
= pSi2
+ 2 * n2
;
1079 /* Butterfly implementation */
1081 r1
= (pSi0
[0] >> 4U) + (pSi2
[0] >> 4U);
1083 r2
= (pSi0
[0] >> 4U) - (pSi2
[0] >> 4U);
1086 t1
= (pSi1
[0] >> 4U) + (pSi3
[0] >> 4U);
1089 s1
= (pSi0
[1] >> 4U) + (pSi2
[1] >> 4U);
1091 s2
= (pSi0
[1] >> 4U) - (pSi2
[1] >> 4U);
1093 /* xa' = xa + xb + xc + xd */
1094 *pSi0
++ = (r1
+ t1
);
1095 /* (xa + xc) - (xb + xd) */
1098 t2
= (pSi1
[1] >> 4U) + (pSi3
[1] >> 4U);
1099 /* ya' = ya + yb + yc + yd */
1100 *pSi0
++ = (s1
+ t2
);
1102 /* (ya + yc) - (yb + yd) */
1106 t1
= (pSi1
[1] >> 4U) - (pSi3
[1] >> 4U);
1108 t2
= (pSi1
[0] >> 4U) - (pSi3
[0] >> 4U);
1110 /* index calculation for the coefficients */
1112 co2
= pCoef
[ia2
* 2U];
1113 si2
= pCoef
[(ia2
* 2U) + 1U];
1115 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1116 *pSi1
++ = (((int32_t) (((q63_t
) r1
* co2
) >> 32)) -
1117 ((int32_t) (((q63_t
) s1
* si2
) >> 32))) << 1U;
1119 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1120 *pSi1
++ = (((int32_t) (((q63_t
) s1
* co2
) >> 32)) +
1121 ((int32_t) (((q63_t
) r1
* si2
) >> 32))) << 1U;
1123 /* (xa - xc) - (yb - yd) */
1125 /* (xa - xc) + (yb - yd) */
1128 /* (ya - yc) + (xb - xd) */
1130 /* (ya - yc) - (xb - xd) */
1133 co1
= pCoef
[ia1
* 2U];
1134 si1
= pCoef
[(ia1
* 2U) + 1U];
1136 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1137 *pSi2
++ = (((int32_t) (((q63_t
) r1
* co1
) >> 32)) -
1138 ((int32_t) (((q63_t
) s1
* si1
) >> 32))) << 1U;
1140 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1141 *pSi2
++ = (((int32_t) (((q63_t
) s1
* co1
) >> 32)) +
1142 ((int32_t) (((q63_t
) r1
* si1
) >> 32))) << 1U;
1144 /* index calculation for the coefficients */
1146 co3
= pCoef
[ia3
* 2U];
1147 si3
= pCoef
[(ia3
* 2U) + 1U];
1149 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1150 *pSi3
++ = (((int32_t) (((q63_t
) r2
* co3
) >> 32)) -
1151 ((int32_t) (((q63_t
) s2
* si3
) >> 32))) << 1U;
1153 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1154 *pSi3
++ = (((int32_t) (((q63_t
) s2
* co3
) >> 32)) +
1155 ((int32_t) (((q63_t
) r2
* si3
) >> 32))) << 1U;
1157 /* Twiddle coefficients index modifier */
1158 ia1
= ia1
+ twidCoefModifier
;
1162 /* data is in 5.27(q27) format */
1163 /* each stage provides two down scaling of the input */
1166 /* Start of Middle stages process */
1168 twidCoefModifier
<<= 2U;
1170 /* Calculation of second stage to excluding last stage */
1171 for (k
= fftLen
/ 4U; k
> 4U; k
>>= 2U)
1173 /* Initializations for the first stage */
1178 for (j
= 0; j
<= (n2
- 1U); j
++)
1180 /* index calculation for the coefficients */
1183 co1
= pCoef
[ia1
* 2U];
1184 si1
= pCoef
[(ia1
* 2U) + 1U];
1185 co2
= pCoef
[ia2
* 2U];
1186 si2
= pCoef
[(ia2
* 2U) + 1U];
1187 co3
= pCoef
[ia3
* 2U];
1188 si3
= pCoef
[(ia3
* 2U) + 1U];
1189 /* Twiddle coefficients index modifier */
1190 ia1
= ia1
+ twidCoefModifier
;
1192 pSi0
= pSrc
+ 2 * j
;
1193 pSi1
= pSi0
+ 2 * n2
;
1194 pSi2
= pSi1
+ 2 * n2
;
1195 pSi3
= pSi2
+ 2 * n2
;
1197 for (i0
= j
; i0
< fftLen
; i0
+= n1
)
1199 /* Butterfly implementation */
1201 r1
= pSi0
[0] + pSi2
[0];
1204 r2
= pSi0
[0] - pSi2
[0];
1208 s1
= pSi0
[1] + pSi2
[1];
1211 s2
= pSi0
[1] - pSi2
[1];
1215 t1
= pSi1
[0] + pSi3
[0];
1218 /* xa' = xa + xb + xc + xd */
1219 pSi0
[0] = (r1
+ t1
) >> 2U;
1220 /* xa + xc -(xb + xd) */
1223 t2
= pSi1
[1] + pSi3
[1];
1225 /* ya' = ya + yb + yc + yd */
1226 pSi0
[1] = (s1
+ t2
) >> 2U;
1229 /* (ya + yc) - (yb + yd) */
1233 t1
= pSi1
[1] - pSi3
[1];
1236 t2
= pSi1
[0] - pSi3
[0];
1239 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1240 pSi1
[0] = (((int32_t) (((q63_t
) r1
* co2
) >> 32U)) -
1241 ((int32_t) (((q63_t
) s1
* si2
) >> 32U))) >> 1U;
1243 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1246 (((int32_t) (((q63_t
) s1
* co2
) >> 32U)) +
1247 ((int32_t) (((q63_t
) r1
* si2
) >> 32U))) >> 1U;
1250 /* (xa - xc) - (yb - yd) */
1252 /* (xa - xc) + (yb - yd) */
1255 /* (ya - yc) + (xb - xd) */
1257 /* (ya - yc) - (xb - xd) */
1260 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1261 pSi2
[0] = (((int32_t) (((q63_t
) r1
* co1
) >> 32)) -
1262 ((int32_t) (((q63_t
) s1
* si1
) >> 32))) >> 1U;
1264 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1265 pSi2
[1] = (((int32_t) (((q63_t
) s1
* co1
) >> 32)) +
1266 ((int32_t) (((q63_t
) r1
* si1
) >> 32))) >> 1U;
1269 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1270 pSi3
[0] = (((int32_t) (((q63_t
) r2
* co3
) >> 32)) -
1271 ((int32_t) (((q63_t
) s2
* si3
) >> 32))) >> 1U;
1273 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1274 pSi3
[1] = (((int32_t) (((q63_t
) s2
* co3
) >> 32)) +
1275 ((int32_t) (((q63_t
) r2
* si3
) >> 32))) >> 1U;
1279 twidCoefModifier
<<= 2U;
1283 /* End of Middle stages process */
1285 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
1286 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
1287 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
1288 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
1291 /* Start of last stage process */
1294 /* Initializations for the last stage */
1298 /* Calculations of last stage */
1301 #ifndef ARM_MATH_BIG_ENDIAN
1302 /* Read xa (real), ya(imag) input */
1303 xaya
= *__SIMD64(ptr1
)++;
1305 ya
= (q31_t
) (xaya
>> 32);
1307 /* Read xb (real), yb(imag) input */
1308 xbyb
= *__SIMD64(ptr1
)++;
1310 yb
= (q31_t
) (xbyb
>> 32);
1312 /* Read xc (real), yc(imag) input */
1313 xcyc
= *__SIMD64(ptr1
)++;
1315 yc
= (q31_t
) (xcyc
>> 32);
1317 /* Read xc (real), yc(imag) input */
1318 xdyd
= *__SIMD64(ptr1
)++;
1320 yd
= (q31_t
) (xdyd
>> 32);
1324 /* Read xa (real), ya(imag) input */
1325 xaya
= *__SIMD64(ptr1
)++;
1327 xa
= (q31_t
) (xaya
>> 32);
1329 /* Read xb (real), yb(imag) input */
1330 xbyb
= *__SIMD64(ptr1
)++;
1332 xb
= (q31_t
) (xbyb
>> 32);
1334 /* Read xc (real), yc(imag) input */
1335 xcyc
= *__SIMD64(ptr1
)++;
1337 xc
= (q31_t
) (xcyc
>> 32);
1339 /* Read xc (real), yc(imag) input */
1340 xdyd
= *__SIMD64(ptr1
)++;
1342 xd
= (q31_t
) (xdyd
>> 32);
1347 /* xa' = xa + xb + xc + xd */
1348 xa_out
= xa
+ xb
+ xc
+ xd
;
1350 /* ya' = ya + yb + yc + yd */
1351 ya_out
= ya
+ yb
+ yc
+ yd
;
1353 /* pointer updation for writing */
1356 /* writing xa' and ya' */
1360 xc_out
= (xa
- xb
+ xc
- xd
);
1361 yc_out
= (ya
- yb
+ yc
- yd
);
1363 /* writing xc' and yc' */
1367 xb_out
= (xa
- yb
- xc
+ yd
);
1368 yb_out
= (ya
+ xb
- yc
- xd
);
1370 /* writing xb' and yb' */
1374 xd_out
= (xa
+ yb
- xc
- yd
);
1375 yd_out
= (ya
- xb
- yc
+ xd
);
1377 /* writing xd' and yd' */
1383 /* output is in 11.21(q21) format for the 1024 point */
1384 /* output is in 9.23(q23) format for the 256 point */
1385 /* output is in 7.25(q25) format for the 64 point */
1386 /* output is in 5.27(q27) format for the 16 point */
1388 /* End of last stage process */