1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_rfft_q15.c
4 * Description: RFFT & RIFFT Q15 process function
6 * $Date: 27. January 2017
9 * Target Processor: Cortex-M cores
10 * -------------------------------------------------------------------- */
12 * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
14 * SPDX-License-Identifier: Apache-2.0
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
20 * www.apache.org/licenses/LICENSE-2.0
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
31 /* ----------------------------------------------------------------------
32 * Internal functions prototypes
33 * -------------------------------------------------------------------- */
35 void arm_split_rfft_q15(
43 void arm_split_rifft_q15(
57 * @brief Processing function for the Q15 RFFT/RIFFT.
58 * @param[in] *S points to an instance of the Q15 RFFT/RIFFT structure.
59 * @param[in] *pSrc points to the input buffer.
60 * @param[out] *pDst points to the output buffer.
63 * \par Input an output formats:
65 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
66 * Hence the output format is different for different RFFT sizes.
67 * The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
69 * \image html RFFTQ15.gif "Input and Output Formats for Q15 RFFT"
71 * \image html RIFFTQ15.gif "Input and Output Formats for Q15 RIFFT"
75 const arm_rfft_instance_q15
* S
,
79 const arm_cfft_instance_q15
*S_CFFT
= S
->pCfft
;
81 uint32_t L2
= S
->fftLenReal
>> 1;
83 /* Calculation of RIFFT of input */
84 if (S
->ifftFlagR
== 1U)
86 /* Real IFFT core process */
87 arm_split_rifft_q15(pSrc
, L2
, S
->pTwiddleAReal
,
88 S
->pTwiddleBReal
, pDst
, S
->twidCoefRModifier
);
90 /* Complex IFFT process */
91 arm_cfft_q15(S_CFFT
, pDst
, S
->ifftFlagR
, S
->bitReverseFlagR
);
93 for(i
=0;i
<S
->fftLenReal
;i
++)
95 pDst
[i
] = pDst
[i
] << 1;
100 /* Calculation of RFFT of input */
102 /* Complex FFT process */
103 arm_cfft_q15(S_CFFT
, pSrc
, S
->ifftFlagR
, S
->bitReverseFlagR
);
105 /* Real FFT core process */
106 arm_split_rfft_q15(pSrc
, L2
, S
->pTwiddleAReal
,
107 S
->pTwiddleBReal
, pDst
, S
->twidCoefRModifier
);
112 * @} end of RealFFT group
116 * @brief Core Real FFT process
117 * @param *pSrc points to the input buffer.
118 * @param fftLen length of FFT.
119 * @param *pATable points to the A twiddle Coef buffer.
120 * @param *pBTable points to the B twiddle Coef buffer.
121 * @param *pDst points to the output buffer.
122 * @param modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
124 * The function implements a Real FFT
127 void arm_split_rfft_q15(
135 uint32_t i
; /* Loop Counter */
136 q31_t outR
, outI
; /* Temporary variables for output */
137 q15_t
*pCoefA
, *pCoefB
; /* Temporary pointers for twiddle factors */
138 q15_t
*pSrc1
, *pSrc2
;
139 #if defined (ARM_MATH_DSP)
143 // pSrc[2U * fftLen] = pSrc[0];
144 // pSrc[(2U * fftLen) + 1U] = pSrc[1];
146 pCoefA
= &pATable
[modifier
* 2U];
147 pCoefB
= &pBTable
[modifier
* 2U];
150 pSrc2
= &pSrc
[(2U * fftLen
) - 2U];
152 #if defined (ARM_MATH_DSP)
154 /* Run the below code for Cortex-M4 and Cortex-M3 */
157 pD2
= pDst
+ (4U * fftLen
) - 2;
159 for(i
= fftLen
- 1; i
> 0; i
--)
162 outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
163 + pSrc[2 * n - 2 * i] * pBTable[2 * i] +
164 pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
167 /* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
168 pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
169 pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); */
172 #ifndef ARM_MATH_BIG_ENDIAN
174 /* pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1] */
175 outR
= __SMUSD(*__SIMD32(pSrc1
), *__SIMD32(pCoefA
));
179 /* -(pSrc[2 * i + 1] * pATable[2 * i + 1] - pSrc[2 * i] * pATable[2 * i]) */
180 outR
= -(__SMUSD(*__SIMD32(pSrc1
), *__SIMD32(pCoefA
)));
182 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
184 /* pSrc[2 * n - 2 * i] * pBTable[2 * i] +
185 pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
186 outR
= __SMLAD(*__SIMD32(pSrc2
), *__SIMD32(pCoefB
), outR
) >> 16U;
188 /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
189 pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
191 #ifndef ARM_MATH_BIG_ENDIAN
193 outI
= __SMUSDX(*__SIMD32(pSrc2
)--, *__SIMD32(pCoefB
));
197 outI
= __SMUSDX(*__SIMD32(pCoefB
), *__SIMD32(pSrc2
)--);
199 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
201 /* (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] */
202 outI
= __SMLADX(*__SIMD32(pSrc1
)++, *__SIMD32(pCoefA
), outI
);
205 *pD1
++ = (q15_t
) outR
;
206 *pD1
++ = outI
>> 16U;
208 /* write complex conjugate output */
209 pD2
[0] = (q15_t
) outR
;
210 pD2
[1] = -(outI
>> 16U);
213 /* update coefficient pointer */
214 pCoefB
= pCoefB
+ (2U * modifier
);
215 pCoefA
= pCoefA
+ (2U * modifier
);
218 pDst
[2U * fftLen
] = (pSrc
[0] - pSrc
[1]) >> 1;
219 pDst
[(2U * fftLen
) + 1U] = 0;
221 pDst
[0] = (pSrc
[0] + pSrc
[1]) >> 1;
226 /* Run the below code for Cortex-M0 */
232 outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
233 + pSrc[2 * n - 2 * i] * pBTable[2 * i] +
234 pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
237 outR
= *pSrc1
* *pCoefA
;
238 outR
= outR
- (*(pSrc1
+ 1) * *(pCoefA
+ 1));
239 outR
= outR
+ (*pSrc2
* *pCoefB
);
240 outR
= (outR
+ (*(pSrc2
+ 1) * *(pCoefB
+ 1))) >> 16;
243 /* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
244 pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
245 pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
248 outI
= *pSrc2
* *(pCoefB
+ 1);
249 outI
= outI
- (*(pSrc2
+ 1) * *pCoefB
);
250 outI
= outI
+ (*(pSrc1
+ 1) * *pCoefA
);
251 outI
= outI
+ (*pSrc1
* *(pCoefA
+ 1));
253 /* update input pointers */
258 pDst
[2U * i
] = (q15_t
) outR
;
259 pDst
[(2U * i
) + 1U] = outI
>> 16U;
261 /* write complex conjugate output */
262 pDst
[(4U * fftLen
) - (2U * i
)] = (q15_t
) outR
;
263 pDst
[((4U * fftLen
) - (2U * i
)) + 1U] = -(outI
>> 16U);
265 /* update coefficient pointer */
266 pCoefB
= pCoefB
+ (2U * modifier
);
267 pCoefA
= pCoefA
+ (2U * modifier
);
272 pDst
[2U * fftLen
] = (pSrc
[0] - pSrc
[1]) >> 1;
273 pDst
[(2U * fftLen
) + 1U] = 0;
275 pDst
[0] = (pSrc
[0] + pSrc
[1]) >> 1;
278 #endif /* #if defined (ARM_MATH_DSP) */
283 * @brief Core Real IFFT process
284 * @param[in] *pSrc points to the input buffer.
285 * @param[in] fftLen length of FFT.
286 * @param[in] *pATable points to the twiddle Coef A buffer.
287 * @param[in] *pBTable points to the twiddle Coef B buffer.
288 * @param[out] *pDst points to the output buffer.
289 * @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
291 * The function implements a Real IFFT
293 void arm_split_rifft_q15(
301 uint32_t i
; /* Loop Counter */
302 q31_t outR
, outI
; /* Temporary variables for output */
303 q15_t
*pCoefA
, *pCoefB
; /* Temporary pointers for twiddle factors */
304 q15_t
*pSrc1
, *pSrc2
;
305 q15_t
*pDst1
= &pDst
[0];
307 pCoefA
= &pATable
[0];
308 pCoefB
= &pBTable
[0];
311 pSrc2
= &pSrc
[2U * fftLen
];
313 #if defined (ARM_MATH_DSP)
315 /* Run the below code for Cortex-M4 and Cortex-M3 */
321 outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
322 pIn[2 * n - 2 * i] * pBTable[2 * i] -
323 pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
325 outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
326 pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
327 pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
331 #ifndef ARM_MATH_BIG_ENDIAN
333 /* pIn[2 * n - 2 * i] * pBTable[2 * i] -
334 pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
335 outR
= __SMUSD(*__SIMD32(pSrc2
), *__SIMD32(pCoefB
));
339 /* -(-pIn[2 * n - 2 * i] * pBTable[2 * i] +
340 pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1])) */
341 outR
= -(__SMUSD(*__SIMD32(pSrc2
), *__SIMD32(pCoefB
)));
343 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
345 /* pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
346 pIn[2 * n - 2 * i] * pBTable[2 * i] */
347 outR
= __SMLAD(*__SIMD32(pSrc1
), *__SIMD32(pCoefA
), outR
) >> 16U;
350 -pIn[2 * n - 2 * i] * pBTable[2 * i + 1] +
351 pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
352 outI
= __SMUADX(*__SIMD32(pSrc2
)--, *__SIMD32(pCoefB
));
354 /* pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] */
356 #ifndef ARM_MATH_BIG_ENDIAN
358 outI
= __SMLSDX(*__SIMD32(pCoefA
), *__SIMD32(pSrc1
)++, -outI
);
362 outI
= __SMLSDX(*__SIMD32(pSrc1
)++, *__SIMD32(pCoefA
), -outI
);
364 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
367 #ifndef ARM_MATH_BIG_ENDIAN
369 *__SIMD32(pDst1
)++ = __PKHBT(outR
, (outI
>> 16U), 16);
373 *__SIMD32(pDst1
)++ = __PKHBT((outI
>> 16U), outR
, 16);
375 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
377 /* update coefficient pointer */
378 pCoefB
= pCoefB
+ (2U * modifier
);
379 pCoefA
= pCoefA
+ (2U * modifier
);
384 /* Run the below code for Cortex-M0 */
390 outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
391 pIn[2 * n - 2 * i] * pBTable[2 * i] -
392 pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
395 outR
= *pSrc2
* *pCoefB
;
396 outR
= outR
- (*(pSrc2
+ 1) * *(pCoefB
+ 1));
397 outR
= outR
+ (*pSrc1
* *pCoefA
);
398 outR
= (outR
+ (*(pSrc1
+ 1) * *(pCoefA
+ 1))) >> 16;
401 outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
402 pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
403 pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
406 outI
= *(pSrc1
+ 1) * *pCoefA
;
407 outI
= outI
- (*pSrc1
* *(pCoefA
+ 1));
408 outI
= outI
- (*pSrc2
* *(pCoefB
+ 1));
409 outI
= outI
- (*(pSrc2
+ 1) * *(pCoefB
));
411 /* update input pointers */
416 *pDst1
++ = (q15_t
) outR
;
417 *pDst1
++ = (q15_t
) (outI
>> 16);
419 /* update coefficient pointer */
420 pCoefB
= pCoefB
+ (2U * modifier
);
421 pCoefA
= pCoefA
+ (2U * modifier
);
425 #endif /* #if defined (ARM_MATH_DSP) */