lib/main/CMSIS/DSP/Source/TransformFunctions/arm_rfft_q15.c

   1 /* ----------------------------------------------------------------------
   2  * Project:      CMSIS DSP Library
   3  * Title:        arm_rfft_q15.c
   4  * Description:  RFFT & RIFFT Q15 process function
   5  *
   6  * $Date:        27. January 2017
   7  * $Revision:    V.1.5.1
   8  *
   9  * Target Processor: Cortex-M cores
  10  * -------------------------------------------------------------------- */
  11 /*
  12  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
  13  *
  14  * SPDX-License-Identifier: Apache-2.0
  15  *
  16  * Licensed under the Apache License, Version 2.0 (the License); you may
  17  * not use this file except in compliance with the License.
  18  * You may obtain a copy of the License at
  19  *
  20  * www.apache.org/licenses/LICENSE-2.0
  21  *
  22  * Unless required by applicable law or agreed to in writing, software
  23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25  * See the License for the specific language governing permissions and
  26  * limitations under the License.
  27  */
  28
  29 #include "arm_math.h"
  30
  31 /* ----------------------------------------------------------------------
  32  * Internal functions prototypes
  33  * -------------------------------------------------------------------- */
  34
  35 void arm_split_rfft_q15(
  36     q15_t * pSrc,
  37     uint32_t fftLen,
  38     q15_t * pATable,
  39     q15_t * pBTable,
  40     q15_t * pDst,
  41     uint32_t modifier);
  42
  43 void arm_split_rifft_q15(
  44     q15_t * pSrc,
  45     uint32_t fftLen,
  46     q15_t * pATable,
  47     q15_t * pBTable,
  48     q15_t * pDst,
  49     uint32_t modifier);
  50
  51 /**
  52 * @addtogroup RealFFT
  53 * @{
  54 */
  55
  56 /**
  57 * @brief Processing function for the Q15 RFFT/RIFFT.
  58 * @param[in]  *S    points to an instance of the Q15 RFFT/RIFFT structure.
  59 * @param[in]  *pSrc points to the input buffer.
  60 * @param[out] *pDst points to the output buffer.
  61 * @return none.
  62 *
  63 * \par Input an output formats:
  64 * \par
  65 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
  66 * Hence the output format is different for different RFFT sizes.
  67 * The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
  68 * \par
  69 * \image html RFFTQ15.gif "Input and Output Formats for Q15 RFFT"
  70 * \par
  71 * \image html RIFFTQ15.gif "Input and Output Formats for Q15 RIFFT"
  72 */
  73
  74 void arm_rfft_q15(
  75     const arm_rfft_instance_q15 * S,
  76     q15_t * pSrc,
  77     q15_t * pDst)
  78 {
  79     const arm_cfft_instance_q15 *S_CFFT = S->pCfft;
  80     uint32_t i;
  81     uint32_t L2 = S->fftLenReal >> 1;
  82
  83     /* Calculation of RIFFT of input */
  84     if (S->ifftFlagR == 1U)
  85     {
  86         /*  Real IFFT core process */
  87         arm_split_rifft_q15(pSrc, L2, S->pTwiddleAReal,
  88                             S->pTwiddleBReal, pDst, S->twidCoefRModifier);
  89
  90         /* Complex IFFT process */
  91         arm_cfft_q15(S_CFFT, pDst, S->ifftFlagR, S->bitReverseFlagR);
  92
  93         for(i=0;i<S->fftLenReal;i++)
  94         {
  95             pDst[i] = pDst[i] << 1;
  96         }
  97     }
  98     else
  99     {
 100         /* Calculation of RFFT of input */
 101
 102         /* Complex FFT process */
 103         arm_cfft_q15(S_CFFT, pSrc, S->ifftFlagR, S->bitReverseFlagR);
 104
 105         /*  Real FFT core process */
 106         arm_split_rfft_q15(pSrc, L2, S->pTwiddleAReal,
 107                             S->pTwiddleBReal, pDst, S->twidCoefRModifier);
 108     }
 109 }
 110
 111 /**
 112 * @} end of RealFFT group
 113 */
 114
 115 /**
 116 * @brief  Core Real FFT process
 117 * @param  *pSrc                                 points to the input buffer.
 118 * @param  fftLen                                length of FFT.
 119 * @param  *pATable                      points to the A twiddle Coef buffer.
 120 * @param  *pBTable                      points to the B twiddle Coef buffer.
 121 * @param  *pDst                                 points to the output buffer.
 122 * @param  modifier              twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
 123 * @return none.
 124 * The function implements a Real FFT
 125 */
 126
 127 void arm_split_rfft_q15(
 128     q15_t * pSrc,
 129     uint32_t fftLen,
 130     q15_t * pATable,
 131     q15_t * pBTable,
 132     q15_t * pDst,
 133     uint32_t modifier)
 134 {
 135     uint32_t i;                                    /* Loop Counter */
 136     q31_t outR, outI;                              /* Temporary variables for output */
 137     q15_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */
 138     q15_t *pSrc1, *pSrc2;
 139 #if defined (ARM_MATH_DSP)
 140     q15_t *pD1, *pD2;
 141 #endif
 142
 143     //  pSrc[2U * fftLen] = pSrc[0];
 144     //  pSrc[(2U * fftLen) + 1U] = pSrc[1];
 145
 146     pCoefA = &pATable[modifier * 2U];
 147     pCoefB = &pBTable[modifier * 2U];
 148
 149     pSrc1 = &pSrc[2];
 150     pSrc2 = &pSrc[(2U * fftLen) - 2U];
 151
 152 #if defined (ARM_MATH_DSP)
 153
 154     /* Run the below code for Cortex-M4 and Cortex-M3 */
 155     i = 1U;
 156     pD1 = pDst + 2;
 157     pD2 = pDst + (4U * fftLen) - 2;
 158
 159     for(i = fftLen - 1; i > 0; i--)
 160     {
 161         /*
 162         outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
 163         + pSrc[2 * n - 2 * i] * pBTable[2 * i] +
 164         pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
 165         */
 166
 167         /* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
 168         pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
 169         pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); */
 170
 171
 172 #ifndef ARM_MATH_BIG_ENDIAN
 173
 174         /* pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1] */
 175         outR = __SMUSD(*__SIMD32(pSrc1), *__SIMD32(pCoefA));
 176
 177 #else
 178
 179         /* -(pSrc[2 * i + 1] * pATable[2 * i + 1] - pSrc[2 * i] * pATable[2 * i]) */
 180         outR = -(__SMUSD(*__SIMD32(pSrc1), *__SIMD32(pCoefA)));
 181
 182 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
 183
 184         /* pSrc[2 * n - 2 * i] * pBTable[2 * i] +
 185         pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
 186         outR = __SMLAD(*__SIMD32(pSrc2), *__SIMD32(pCoefB), outR) >> 16U;
 187
 188         /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
 189         pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
 190
 191 #ifndef ARM_MATH_BIG_ENDIAN
 192
 193         outI = __SMUSDX(*__SIMD32(pSrc2)--, *__SIMD32(pCoefB));
 194
 195 #else
 196
 197         outI = __SMUSDX(*__SIMD32(pCoefB), *__SIMD32(pSrc2)--);
 198
 199 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
 200
 201         /* (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] */
 202         outI = __SMLADX(*__SIMD32(pSrc1)++, *__SIMD32(pCoefA), outI);
 203
 204         /* write output */
 205         *pD1++ = (q15_t) outR;
 206         *pD1++ = outI >> 16U;
 207
 208         /* write complex conjugate output */
 209         pD2[0] = (q15_t) outR;
 210         pD2[1] = -(outI >> 16U);
 211         pD2 -= 2;
 212
 213         /* update coefficient pointer */
 214         pCoefB = pCoefB + (2U * modifier);
 215         pCoefA = pCoefA + (2U * modifier);
 216     }
 217
 218     pDst[2U * fftLen] = (pSrc[0] - pSrc[1]) >> 1;
 219     pDst[(2U * fftLen) + 1U] = 0;
 220
 221     pDst[0] = (pSrc[0] + pSrc[1]) >> 1;
 222     pDst[1] = 0;
 223
 224 #else
 225
 226     /* Run the below code for Cortex-M0 */
 227     i = 1U;
 228
 229     while (i < fftLen)
 230     {
 231         /*
 232         outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
 233         + pSrc[2 * n - 2 * i] * pBTable[2 * i] +
 234         pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
 235         */
 236
 237         outR = *pSrc1 * *pCoefA;
 238         outR = outR - (*(pSrc1 + 1) * *(pCoefA + 1));
 239         outR = outR + (*pSrc2 * *pCoefB);
 240         outR = (outR + (*(pSrc2 + 1) * *(pCoefB + 1))) >> 16;
 241
 242
 243         /* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
 244         pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
 245         pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
 246         */
 247
 248         outI = *pSrc2 * *(pCoefB + 1);
 249         outI = outI - (*(pSrc2 + 1) * *pCoefB);
 250         outI = outI + (*(pSrc1 + 1) * *pCoefA);
 251         outI = outI + (*pSrc1 * *(pCoefA + 1));
 252
 253         /* update input pointers */
 254         pSrc1 += 2U;
 255         pSrc2 -= 2U;
 256
 257         /* write output */
 258         pDst[2U * i] = (q15_t) outR;
 259         pDst[(2U * i) + 1U] = outI >> 16U;
 260
 261         /* write complex conjugate output */
 262         pDst[(4U * fftLen) - (2U * i)] = (q15_t) outR;
 263         pDst[((4U * fftLen) - (2U * i)) + 1U] = -(outI >> 16U);
 264
 265         /* update coefficient pointer */
 266         pCoefB = pCoefB + (2U * modifier);
 267         pCoefA = pCoefA + (2U * modifier);
 268
 269         i++;
 270     }
 271
 272     pDst[2U * fftLen] = (pSrc[0] - pSrc[1]) >> 1;
 273     pDst[(2U * fftLen) + 1U] = 0;
 274
 275     pDst[0] = (pSrc[0] + pSrc[1]) >> 1;
 276     pDst[1] = 0;
 277
 278 #endif /* #if defined (ARM_MATH_DSP) */
 279 }
 280
 281
 282 /**
 283 * @brief  Core Real IFFT process
 284 * @param[in]   *pSrc                            points to the input buffer.
 285 * @param[in]   fftLen               length of FFT.
 286 * @param[in]   *pATable                         points to the twiddle Coef A buffer.
 287 * @param[in]   *pBTable                         points to the twiddle Coef B buffer.
 288 * @param[out]  *pDst                            points to the output buffer.
 289 * @param[in]   modifier                 twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
 290 * @return none.
 291 * The function implements a Real IFFT
 292 */
 293 void arm_split_rifft_q15(
 294     q15_t * pSrc,
 295     uint32_t fftLen,
 296     q15_t * pATable,
 297     q15_t * pBTable,
 298     q15_t * pDst,
 299     uint32_t modifier)
 300 {
 301     uint32_t i;                                    /* Loop Counter */
 302     q31_t outR, outI;                              /* Temporary variables for output */
 303     q15_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */
 304     q15_t *pSrc1, *pSrc2;
 305     q15_t *pDst1 = &pDst[0];
 306
 307     pCoefA = &pATable[0];
 308     pCoefB = &pBTable[0];
 309
 310     pSrc1 = &pSrc[0];
 311     pSrc2 = &pSrc[2U * fftLen];
 312
 313 #if defined (ARM_MATH_DSP)
 314
 315     /* Run the below code for Cortex-M4 and Cortex-M3 */
 316     i = fftLen;
 317
 318     while (i > 0U)
 319     {
 320         /*
 321         outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
 322         pIn[2 * n - 2 * i] * pBTable[2 * i] -
 323         pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
 324
 325         outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
 326         pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
 327         pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
 328         */
 329
 330
 331 #ifndef ARM_MATH_BIG_ENDIAN
 332
 333         /* pIn[2 * n - 2 * i] * pBTable[2 * i] -
 334         pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
 335         outR = __SMUSD(*__SIMD32(pSrc2), *__SIMD32(pCoefB));
 336
 337 #else
 338
 339         /* -(-pIn[2 * n - 2 * i] * pBTable[2 * i] +
 340         pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1])) */
 341         outR = -(__SMUSD(*__SIMD32(pSrc2), *__SIMD32(pCoefB)));
 342
 343 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
 344
 345         /* pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
 346         pIn[2 * n - 2 * i] * pBTable[2 * i] */
 347         outR = __SMLAD(*__SIMD32(pSrc1), *__SIMD32(pCoefA), outR) >> 16U;
 348
 349         /*
 350         -pIn[2 * n - 2 * i] * pBTable[2 * i + 1] +
 351         pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
 352         outI = __SMUADX(*__SIMD32(pSrc2)--, *__SIMD32(pCoefB));
 353
 354         /* pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] */
 355
 356 #ifndef ARM_MATH_BIG_ENDIAN
 357
 358         outI = __SMLSDX(*__SIMD32(pCoefA), *__SIMD32(pSrc1)++, -outI);
 359
 360 #else
 361
 362         outI = __SMLSDX(*__SIMD32(pSrc1)++, *__SIMD32(pCoefA), -outI);
 363
 364 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
 365         /* write output */
 366
 367 #ifndef ARM_MATH_BIG_ENDIAN
 368
 369         *__SIMD32(pDst1)++ = __PKHBT(outR, (outI >> 16U), 16);
 370
 371 #else
 372
 373         *__SIMD32(pDst1)++ = __PKHBT((outI >> 16U), outR, 16);
 374
 375 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
 376
 377         /* update coefficient pointer */
 378         pCoefB = pCoefB + (2U * modifier);
 379         pCoefA = pCoefA + (2U * modifier);
 380
 381         i--;
 382     }
 383 #else
 384     /* Run the below code for Cortex-M0 */
 385     i = fftLen;
 386
 387     while (i > 0U)
 388     {
 389         /*
 390         outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
 391         pIn[2 * n - 2 * i] * pBTable[2 * i] -
 392         pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
 393         */
 394
 395         outR = *pSrc2 * *pCoefB;
 396         outR = outR - (*(pSrc2 + 1) * *(pCoefB + 1));
 397         outR = outR + (*pSrc1 * *pCoefA);
 398         outR = (outR + (*(pSrc1 + 1) * *(pCoefA + 1))) >> 16;
 399
 400         /*
 401         outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
 402         pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
 403         pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
 404         */
 405
 406         outI = *(pSrc1 + 1) * *pCoefA;
 407         outI = outI - (*pSrc1 * *(pCoefA + 1));
 408         outI = outI - (*pSrc2 * *(pCoefB + 1));
 409         outI = outI - (*(pSrc2 + 1) * *(pCoefB));
 410
 411         /* update input pointers */
 412         pSrc1 += 2U;
 413         pSrc2 -= 2U;
 414
 415         /* write output */
 416         *pDst1++ = (q15_t) outR;
 417         *pDst1++ = (q15_t) (outI >> 16);
 418
 419         /* update coefficient pointer */
 420         pCoefB = pCoefB + (2U * modifier);
 421         pCoefA = pCoefA + (2U * modifier);
 422
 423         i--;
 424     }
 425 #endif /* #if defined (ARM_MATH_DSP) */
 426 }