1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cfft_f32.c
4 * Description: Combined Radix Decimation in Frequency CFFT Floating point processing function
6 * $Date: 27. January 2017
9 * Target Processor: Cortex-M cores
10 * -------------------------------------------------------------------- */
12 * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
14 * SPDX-License-Identifier: Apache-2.0
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
20 * www.apache.org/licenses/LICENSE-2.0
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
30 #include "arm_common_tables.h"
32 extern void arm_radix8_butterfly_f32(
35 const float32_t
* pCoef
,
36 uint16_t twidCoefModifier
);
38 extern void arm_bitreversal_32(
40 const uint16_t bitRevLen
,
41 const uint16_t * pBitRevTable
);
44 * @ingroup groupTransforms
48 * @defgroup ComplexFFT Complex FFT Functions
51 * The Fast Fourier Transform (FFT) is an efficient algorithm for computing the
52 * Discrete Fourier Transform (DFT). The FFT can be orders of magnitude faster
53 * than the DFT, especially for long lengths.
54 * The algorithms described in this section
55 * operate on complex data. A separate set of functions is devoted to handling
58 * There are separate algorithms for handling floating-point, Q15, and Q31 data
59 * types. The algorithms available for each data type are described next.
61 * The FFT functions operate in-place. That is, the array holding the input data
62 * will also be used to hold the corresponding result. The input data is complex
63 * and contains <code>2*fftLen</code> interleaved values as shown below.
64 * <pre> {real[0], imag[0], real[1], imag[1],..} </pre>
65 * The FFT result will be contained in the same array and the frequency domain
66 * values will have the same interleaving.
69 * The floating-point complex FFT uses a mixed-radix algorithm. Multiple radix-8
70 * stages are performed along with a single radix-2 or radix-4 stage, as needed.
71 * The algorithm supports lengths of [16, 32, 64, ..., 4096] and each length uses
72 * a different twiddle factor table.
74 * The function uses the standard FFT definition and output values may grow by a
75 * factor of <code>fftLen</code> when computing the forward transform. The
76 * inverse transform includes a scale of <code>1/fftLen</code> as part of the
77 * calculation and this matches the textbook definition of the inverse FFT.
79 * Pre-initialized data structures containing twiddle factors and bit reversal
80 * tables are provided and defined in <code>arm_const_structs.h</code>. Include
81 * this header in your function and then pass one of the constant structures as
82 * an argument to arm_cfft_f32. For example:
84 * <code>arm_cfft_f32(arm_cfft_sR_f32_len64, pSrc, 1, 1)</code>
86 * computes a 64-point inverse complex FFT including bit reversal.
87 * The data structures are treated as constant data and not modified during the
88 * calculation. The same data structure can be reused for multiple transforms
89 * including mixing forward and inverse transforms.
91 * Earlier releases of the library provided separate radix-2 and radix-4
92 * algorithms that operated on floating-point data. These functions are still
93 * provided but are deprecated. The older functions are slower and less general
94 * than the new functions.
96 * An example of initialization of the constants for the arm_cfft_f32 function follows:
98 * const static arm_cfft_instance_f32 *S;
102 * S = &arm_cfft_sR_f32_len16;
105 * S = &arm_cfft_sR_f32_len32;
108 * S = &arm_cfft_sR_f32_len64;
111 * S = &arm_cfft_sR_f32_len128;
114 * S = &arm_cfft_sR_f32_len256;
117 * S = &arm_cfft_sR_f32_len512;
120 * S = &arm_cfft_sR_f32_len1024;
123 * S = &arm_cfft_sR_f32_len2048;
126 * S = &arm_cfft_sR_f32_len4096;
131 * The floating-point complex FFT uses a mixed-radix algorithm. Multiple radix-4
132 * stages are performed along with a single radix-2 stage, as needed.
133 * The algorithm supports lengths of [16, 32, 64, ..., 4096] and each length uses
134 * a different twiddle factor table.
136 * The function uses the standard FFT definition and output values may grow by a
137 * factor of <code>fftLen</code> when computing the forward transform. The
138 * inverse transform includes a scale of <code>1/fftLen</code> as part of the
139 * calculation and this matches the textbook definition of the inverse FFT.
141 * Pre-initialized data structures containing twiddle factors and bit reversal
142 * tables are provided and defined in <code>arm_const_structs.h</code>. Include
143 * this header in your function and then pass one of the constant structures as
144 * an argument to arm_cfft_q31. For example:
146 * <code>arm_cfft_q31(arm_cfft_sR_q31_len64, pSrc, 1, 1)</code>
148 * computes a 64-point inverse complex FFT including bit reversal.
149 * The data structures are treated as constant data and not modified during the
150 * calculation. The same data structure can be reused for multiple transforms
151 * including mixing forward and inverse transforms.
153 * Earlier releases of the library provided separate radix-2 and radix-4
154 * algorithms that operated on floating-point data. These functions are still
155 * provided but are deprecated. The older functions are slower and less general
156 * than the new functions.
158 * An example of initialization of the constants for the arm_cfft_q31 function follows:
160 * const static arm_cfft_instance_q31 *S;
164 * S = &arm_cfft_sR_q31_len16;
167 * S = &arm_cfft_sR_q31_len32;
170 * S = &arm_cfft_sR_q31_len64;
173 * S = &arm_cfft_sR_q31_len128;
176 * S = &arm_cfft_sR_q31_len256;
179 * S = &arm_cfft_sR_q31_len512;
182 * S = &arm_cfft_sR_q31_len1024;
185 * S = &arm_cfft_sR_q31_len2048;
188 * S = &arm_cfft_sR_q31_len4096;
195 void arm_cfft_radix8by2_f32( arm_cfft_instance_f32
* S
, float32_t
* p1
)
197 uint32_t L
= S
->fftLen
;
198 float32_t
* pCol1
, * pCol2
, * pMid1
, * pMid2
;
199 float32_t
* p2
= p1
+ L
;
200 const float32_t
* tw
= (float32_t
*) S
->pTwiddle
;
201 float32_t t1
[4], t2
[4], t3
[4], t4
[4], twR
, twI
;
202 float32_t m0
, m1
, m2
, m3
;
210 // Initialize mid pointers
214 // do two dot Fourier transform
215 for ( l
= L
>> 2; l
> 0; l
-- )
237 *p1
++ = t1
[0] + t2
[0];
238 *p1
++ = t1
[1] + t2
[1];
239 *p1
++ = t1
[2] + t2
[2];
240 *p1
++ = t1
[3] + t2
[3]; // col 1
242 t2
[0] = t1
[0] - t2
[0];
243 t2
[1] = t1
[1] - t2
[1];
244 t2
[2] = t1
[2] - t2
[2];
245 t2
[3] = t1
[3] - t2
[3]; // for col 2
247 *pMid1
++ = t3
[0] + t4
[0];
248 *pMid1
++ = t3
[1] + t4
[1];
249 *pMid1
++ = t3
[2] + t4
[2];
250 *pMid1
++ = t3
[3] + t4
[3]; // col 1
252 t4
[0] = t4
[0] - t3
[0];
253 t4
[1] = t4
[1] - t3
[1];
254 t4
[2] = t4
[2] - t3
[2];
255 t4
[3] = t4
[3] - t3
[3]; // for col 2
260 // multiply by twiddle factors
266 // R = R * Tr - I * Ti
268 // I = I * Tr + R * Ti
271 // use vertical symmetry
272 // 0.9988 - 0.0491i <==> -0.0491 - 0.9988i
302 arm_radix8_butterfly_f32( pCol1
, L
, (float32_t
*) S
->pTwiddle
, 2U);
304 arm_radix8_butterfly_f32( pCol2
, L
, (float32_t
*) S
->pTwiddle
, 2U);
307 void arm_cfft_radix8by4_f32( arm_cfft_instance_f32
* S
, float32_t
* p1
)
309 uint32_t L
= S
->fftLen
>> 1;
310 float32_t
* pCol1
, *pCol2
, *pCol3
, *pCol4
, *pEnd1
, *pEnd2
, *pEnd3
, *pEnd4
;
311 const float32_t
*tw2
, *tw3
, *tw4
;
312 float32_t
* p2
= p1
+ L
;
313 float32_t
* p3
= p2
+ L
;
314 float32_t
* p4
= p3
+ L
;
315 float32_t t2
[4], t3
[4], t4
[4], twR
, twI
;
316 float32_t p1ap3_0
, p1sp3_0
, p1ap3_1
, p1sp3_1
;
317 float32_t m0
, m1
, m2
, m3
;
318 uint32_t l
, twMod2
, twMod3
, twMod4
;
320 pCol1
= p1
; // points to real values by default
324 pEnd1
= p2
- 1; // points to imaginary values by default
329 tw2
= tw3
= tw4
= (float32_t
*) S
->pTwiddle
;
333 // do four dot Fourier transform
340 p1ap3_0
= p1
[0] + p3
[0];
341 p1sp3_0
= p1
[0] - p3
[0];
342 p1ap3_1
= p1
[1] + p3
[1];
343 p1sp3_1
= p1
[1] - p3
[1];
346 t2
[0] = p1sp3_0
+ p2
[1] - p4
[1];
347 t2
[1] = p1sp3_1
- p2
[0] + p4
[0];
349 t3
[0] = p1ap3_0
- p2
[0] - p4
[0];
350 t3
[1] = p1ap3_1
- p2
[1] - p4
[1];
352 t4
[0] = p1sp3_0
- p2
[1] + p4
[1];
353 t4
[1] = p1sp3_1
+ p2
[0] - p4
[0];
355 *p1
++ = p1ap3_0
+ p2
[0] + p4
[0];
356 *p1
++ = p1ap3_1
+ p2
[1] + p4
[1];
358 // Twiddle factors are ones
370 for (l
= (L
- 2) >> 1; l
> 0; l
-- )
373 p1ap3_0
= p1
[0] + p3
[0];
374 p1sp3_0
= p1
[0] - p3
[0];
375 p1ap3_1
= p1
[1] + p3
[1];
376 p1sp3_1
= p1
[1] - p3
[1];
378 t2
[0] = p1sp3_0
+ p2
[1] - p4
[1];
379 t2
[1] = p1sp3_1
- p2
[0] + p4
[0];
381 t3
[0] = p1ap3_0
- p2
[0] - p4
[0];
382 t3
[1] = p1ap3_1
- p2
[1] - p4
[1];
384 t4
[0] = p1sp3_0
- p2
[1] + p4
[1];
385 t4
[1] = p1sp3_1
+ p2
[0] - p4
[0];
387 *p1
++ = p1ap3_0
+ p2
[0] + p4
[0];
388 *p1
++ = p1ap3_1
+ p2
[1] + p4
[1];
391 p1ap3_1
= pEnd1
[-1] + pEnd3
[-1];
392 p1sp3_1
= pEnd1
[-1] - pEnd3
[-1];
393 p1ap3_0
= pEnd1
[0] + pEnd3
[0];
394 p1sp3_0
= pEnd1
[0] - pEnd3
[0];
396 t2
[2] = pEnd2
[0] - pEnd4
[0] + p1sp3_1
;
397 t2
[3] = pEnd1
[0] - pEnd3
[0] - pEnd2
[-1] + pEnd4
[-1];
399 t3
[2] = p1ap3_1
- pEnd2
[-1] - pEnd4
[-1];
400 t3
[3] = p1ap3_0
- pEnd2
[0] - pEnd4
[0];
402 t4
[2] = pEnd2
[0] - pEnd4
[0] - p1sp3_1
;
403 t4
[3] = pEnd4
[-1] - pEnd2
[-1] - p1sp3_0
;
405 *pEnd1
-- = p1ap3_0
+ pEnd2
[0] + pEnd4
[0];
406 *pEnd1
-- = p1ap3_1
+ pEnd2
[-1] + pEnd4
[-1];
409 // read twiddle factors
412 // multiply by twiddle factors
413 // let Z1 = a + i(b), Z2 = c + i(d)
414 // => Z1 * Z2 = (a*c - b*d) + i(b*c + a*d)
424 // use vertical symmetry col 2
425 // 0.9997 - 0.0245i <==> 0.0245 - 0.9997i
447 // use vertical symmetry col 3
448 // 0.9988 - 0.0491i <==> -0.9988 - 0.0491i
470 // use vertical symmetry col 4
471 // 0.9973 - 0.0736i <==> -0.0736 + 0.9973i
483 // Twiddle factors are
484 // 1.0000 0.7071-0.7071i -1.0000i -0.7071-0.7071i
485 p1ap3_0
= p1
[0] + p3
[0];
486 p1sp3_0
= p1
[0] - p3
[0];
487 p1ap3_1
= p1
[1] + p3
[1];
488 p1sp3_1
= p1
[1] - p3
[1];
491 t2
[0] = p1sp3_0
+ p2
[1] - p4
[1];
492 t2
[1] = p1sp3_1
- p2
[0] + p4
[0];
494 t3
[0] = p1ap3_0
- p2
[0] - p4
[0];
495 t3
[1] = p1ap3_1
- p2
[1] - p4
[1];
497 t4
[0] = p1sp3_0
- p2
[1] + p4
[1];
498 t4
[1] = p1sp3_1
+ p2
[0] - p4
[0];
500 *p1
++ = p1ap3_0
+ p2
[0] + p4
[0];
501 *p1
++ = p1ap3_1
+ p2
[1] + p4
[1];
538 arm_radix8_butterfly_f32( pCol1
, L
, (float32_t
*) S
->pTwiddle
, 4U);
540 arm_radix8_butterfly_f32( pCol2
, L
, (float32_t
*) S
->pTwiddle
, 4U);
542 arm_radix8_butterfly_f32( pCol3
, L
, (float32_t
*) S
->pTwiddle
, 4U);
544 arm_radix8_butterfly_f32( pCol4
, L
, (float32_t
*) S
->pTwiddle
, 4U);
548 * @addtogroup ComplexFFT
554 * @brief Processing function for the floating-point complex FFT.
555 * @param[in] *S points to an instance of the floating-point CFFT structure.
556 * @param[in, out] *p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
557 * @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
558 * @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
563 const arm_cfft_instance_f32
* S
,
566 uint8_t bitReverseFlag
)
568 uint32_t L
= S
->fftLen
, l
;
569 float32_t invL
, * pSrc
;
573 /* Conjugate input data */
587 arm_cfft_radix8by2_f32 ( (arm_cfft_instance_f32
*) S
, p1
);
592 arm_cfft_radix8by4_f32 ( (arm_cfft_instance_f32
*) S
, p1
);
597 arm_radix8_butterfly_f32( p1
, L
, (float32_t
*) S
->pTwiddle
, 1);
601 if ( bitReverseFlag
)
602 arm_bitreversal_32((uint32_t*)p1
,S
->bitRevLength
,S
->pBitRevTable
);
606 invL
= 1.0f
/(float32_t
)L
;
607 /* Conjugate and scale output data */
612 *pSrc
= -(*pSrc
) * invL
;
619 * @} end of ComplexFFT group