lib/main/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_f32.c

   1 /* ----------------------------------------------------------------------
   2  * Project:      CMSIS DSP Library
   3  * Title:        arm_cfft_radix4_f32.c
   4  * Description:  Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function
   5  *
   6  * $Date:        27. January 2017
   7  * $Revision:    V.1.5.1
   8  *
   9  * Target Processor: Cortex-M cores
  10  * -------------------------------------------------------------------- */
  11 /*
  12  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
  13  *
  14  * SPDX-License-Identifier: Apache-2.0
  15  *
  16  * Licensed under the Apache License, Version 2.0 (the License); you may
  17  * not use this file except in compliance with the License.
  18  * You may obtain a copy of the License at
  19  *
  20  * www.apache.org/licenses/LICENSE-2.0
  21  *
  22  * Unless required by applicable law or agreed to in writing, software
  23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25  * See the License for the specific language governing permissions and
  26  * limitations under the License.
  27  */
  28
  29 #include "arm_math.h"
  30
  31 extern void arm_bitreversal_f32(
  32 float32_t * pSrc,
  33 uint16_t fftSize,
  34 uint16_t bitRevFactor,
  35 uint16_t * pBitRevTab);
  36
  37 void arm_radix4_butterfly_f32(
  38 float32_t * pSrc,
  39 uint16_t fftLen,
  40 float32_t * pCoef,
  41 uint16_t twidCoefModifier);
  42
  43 void arm_radix4_butterfly_inverse_f32(
  44 float32_t * pSrc,
  45 uint16_t fftLen,
  46 float32_t * pCoef,
  47 uint16_t twidCoefModifier,
  48 float32_t onebyfftLen);
  49
  50
  51 /**
  52 * @ingroup groupTransforms
  53 */
  54
  55 /**
  56 * @addtogroup ComplexFFT
  57 * @{
  58 */
  59
  60 /**
  61 * @details
  62 * @brief Processing function for the floating-point Radix-4 CFFT/CIFFT.
  63 * @deprecated Do not use this function.  It has been superseded by \ref arm_cfft_f32 and will be removed
  64 * in the future.
  65 * @param[in]      *S    points to an instance of the floating-point Radix-4 CFFT/CIFFT structure.
  66 * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
  67 * @return none.
  68 */
  69
  70 void arm_cfft_radix4_f32(
  71   const arm_cfft_radix4_instance_f32 * S,
  72   float32_t * pSrc)
  73 {
  74    if (S->ifftFlag == 1U)
  75    {
  76       /*  Complex IFFT radix-4  */
  77       arm_radix4_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier, S->onebyfftLen);
  78    }
  79    else
  80    {
  81       /*  Complex FFT radix-4  */
  82       arm_radix4_butterfly_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
  83    }
  84
  85    if (S->bitReverseFlag == 1U)
  86    {
  87       /*  Bit Reversal */
  88       arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
  89    }
  90
  91 }
  92
  93 /**
  94 * @} end of ComplexFFT group
  95 */
  96
  97 /* ----------------------------------------------------------------------
  98  * Internal helper function used by the FFTs
  99  * ---------------------------------------------------------------------- */
 100
 101 /*
 102 * @brief  Core function for the floating-point CFFT butterfly process.
 103 * @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.
 104 * @param[in]      fftLen           length of the FFT.
 105 * @param[in]      *pCoef           points to the twiddle coefficient buffer.
 106 * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
 107 * @return none.
 108 */
 109
 110 void arm_radix4_butterfly_f32(
 111 float32_t * pSrc,
 112 uint16_t fftLen,
 113 float32_t * pCoef,
 114 uint16_t twidCoefModifier)
 115 {
 116
 117    float32_t co1, co2, co3, si1, si2, si3;
 118    uint32_t ia1, ia2, ia3;
 119    uint32_t i0, i1, i2, i3;
 120    uint32_t n1, n2, j, k;
 121
 122 #if defined (ARM_MATH_DSP)
 123
 124    /* Run the below code for Cortex-M4 and Cortex-M3 */
 125
 126    float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
 127    float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
 128    Ybminusd;
 129    float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
 130    float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
 131    float32_t *ptr1;
 132    float32_t p0,p1,p2,p3,p4,p5;
 133    float32_t a0,a1,a2,a3,a4,a5,a6,a7;
 134
 135    /*  Initializations for the first stage */
 136    n2 = fftLen;
 137    n1 = n2;
 138
 139    /* n2 = fftLen/4 */
 140    n2 >>= 2U;
 141    i0 = 0U;
 142    ia1 = 0U;
 143
 144    j = n2;
 145
 146    /*  Calculation of first stage */
 147    do
 148    {
 149       /*  index calculation for the input as, */
 150       /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
 151       i1 = i0 + n2;
 152       i2 = i1 + n2;
 153       i3 = i2 + n2;
 154
 155       xaIn = pSrc[(2U * i0)];
 156       yaIn = pSrc[(2U * i0) + 1U];
 157
 158       xbIn = pSrc[(2U * i1)];
 159       ybIn = pSrc[(2U * i1) + 1U];
 160
 161       xcIn = pSrc[(2U * i2)];
 162       ycIn = pSrc[(2U * i2) + 1U];
 163
 164       xdIn = pSrc[(2U * i3)];
 165       ydIn = pSrc[(2U * i3) + 1U];
 166
 167       /* xa + xc */
 168       Xaplusc = xaIn + xcIn;
 169       /* xb + xd */
 170       Xbplusd = xbIn + xdIn;
 171       /* ya + yc */
 172       Yaplusc = yaIn + ycIn;
 173       /* yb + yd */
 174       Ybplusd = ybIn + ydIn;
 175
 176       /*  index calculation for the coefficients */
 177       ia2 = ia1 + ia1;
 178       co2 = pCoef[ia2 * 2U];
 179       si2 = pCoef[(ia2 * 2U) + 1U];
 180
 181       /* xa - xc */
 182       Xaminusc = xaIn - xcIn;
 183       /* xb - xd */
 184       Xbminusd = xbIn - xdIn;
 185       /* ya - yc */
 186       Yaminusc = yaIn - ycIn;
 187       /* yb - yd */
 188       Ybminusd = ybIn - ydIn;
 189
 190       /* xa' = xa + xb + xc + xd */
 191       pSrc[(2U * i0)] = Xaplusc + Xbplusd;
 192       /* ya' = ya + yb + yc + yd */
 193       pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
 194
 195       /* (xa - xc) + (yb - yd) */
 196       Xb12C_out = (Xaminusc + Ybminusd);
 197       /* (ya - yc) + (xb - xd) */
 198       Yb12C_out = (Yaminusc - Xbminusd);
 199       /* (xa + xc) - (xb + xd) */
 200       Xc12C_out = (Xaplusc - Xbplusd);
 201       /* (ya + yc) - (yb + yd) */
 202       Yc12C_out = (Yaplusc - Ybplusd);
 203       /* (xa - xc) - (yb - yd) */
 204       Xd12C_out = (Xaminusc - Ybminusd);
 205       /* (ya - yc) + (xb - xd) */
 206       Yd12C_out = (Xbminusd + Yaminusc);
 207
 208       co1 = pCoef[ia1 * 2U];
 209       si1 = pCoef[(ia1 * 2U) + 1U];
 210
 211       /*  index calculation for the coefficients */
 212       ia3 = ia2 + ia1;
 213       co3 = pCoef[ia3 * 2U];
 214       si3 = pCoef[(ia3 * 2U) + 1U];
 215
 216       Xb12_out = Xb12C_out * co1;
 217       Yb12_out = Yb12C_out * co1;
 218       Xc12_out = Xc12C_out * co2;
 219       Yc12_out = Yc12C_out * co2;
 220       Xd12_out = Xd12C_out * co3;
 221       Yd12_out = Yd12C_out * co3;
 222
 223       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
 224       //Xb12_out -= Yb12C_out * si1;
 225       p0 = Yb12C_out * si1;
 226       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
 227       //Yb12_out += Xb12C_out * si1;
 228       p1 = Xb12C_out * si1;
 229       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
 230       //Xc12_out -= Yc12C_out * si2;
 231       p2 = Yc12C_out * si2;
 232       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
 233       //Yc12_out += Xc12C_out * si2;
 234       p3 = Xc12C_out * si2;
 235       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
 236       //Xd12_out -= Yd12C_out * si3;
 237       p4 = Yd12C_out * si3;
 238       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
 239       //Yd12_out += Xd12C_out * si3;
 240       p5 = Xd12C_out * si3;
 241
 242       Xb12_out += p0;
 243       Yb12_out -= p1;
 244       Xc12_out += p2;
 245       Yc12_out -= p3;
 246       Xd12_out += p4;
 247       Yd12_out -= p5;
 248
 249       /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
 250       pSrc[2U * i1] = Xc12_out;
 251
 252       /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
 253       pSrc[(2U * i1) + 1U] = Yc12_out;
 254
 255       /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
 256       pSrc[2U * i2] = Xb12_out;
 257
 258       /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
 259       pSrc[(2U * i2) + 1U] = Yb12_out;
 260
 261       /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
 262       pSrc[2U * i3] = Xd12_out;
 263
 264       /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
 265       pSrc[(2U * i3) + 1U] = Yd12_out;
 266
 267       /*  Twiddle coefficients index modifier */
 268       ia1 += twidCoefModifier;
 269
 270       /*  Updating input index */
 271       i0++;
 272
 273    }
 274    while (--j);
 275
 276    twidCoefModifier <<= 2U;
 277
 278    /*  Calculation of second stage to excluding last stage */
 279    for (k = fftLen >> 2U; k > 4U; k >>= 2U)
 280    {
 281       /*  Initializations for the first stage */
 282       n1 = n2;
 283       n2 >>= 2U;
 284       ia1 = 0U;
 285
 286       /*  Calculation of first stage */
 287       j = 0;
 288       do
 289       {
 290          /*  index calculation for the coefficients */
 291          ia2 = ia1 + ia1;
 292          ia3 = ia2 + ia1;
 293          co1 = pCoef[ia1 * 2U];
 294          si1 = pCoef[(ia1 * 2U) + 1U];
 295          co2 = pCoef[ia2 * 2U];
 296          si2 = pCoef[(ia2 * 2U) + 1U];
 297          co3 = pCoef[ia3 * 2U];
 298          si3 = pCoef[(ia3 * 2U) + 1U];
 299
 300          /*  Twiddle coefficients index modifier */
 301          ia1 += twidCoefModifier;
 302
 303          i0 = j;
 304          do
 305          {
 306             /*  index calculation for the input as, */
 307             /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
 308             i1 = i0 + n2;
 309             i2 = i1 + n2;
 310             i3 = i2 + n2;
 311
 312             xaIn = pSrc[(2U * i0)];
 313             yaIn = pSrc[(2U * i0) + 1U];
 314
 315             xbIn = pSrc[(2U * i1)];
 316             ybIn = pSrc[(2U * i1) + 1U];
 317
 318             xcIn = pSrc[(2U * i2)];
 319             ycIn = pSrc[(2U * i2) + 1U];
 320
 321             xdIn = pSrc[(2U * i3)];
 322             ydIn = pSrc[(2U * i3) + 1U];
 323
 324             /* xa - xc */
 325             Xaminusc = xaIn - xcIn;
 326             /* (xb - xd) */
 327             Xbminusd = xbIn - xdIn;
 328             /* ya - yc */
 329             Yaminusc = yaIn - ycIn;
 330             /* (yb - yd) */
 331             Ybminusd = ybIn - ydIn;
 332
 333             /* xa + xc */
 334             Xaplusc = xaIn + xcIn;
 335             /* xb + xd */
 336             Xbplusd = xbIn + xdIn;
 337             /* ya + yc */
 338             Yaplusc = yaIn + ycIn;
 339             /* yb + yd */
 340             Ybplusd = ybIn + ydIn;
 341
 342             /* (xa - xc) + (yb - yd) */
 343             Xb12C_out = (Xaminusc + Ybminusd);
 344             /* (ya - yc) -  (xb - xd) */
 345             Yb12C_out = (Yaminusc - Xbminusd);
 346             /* xa + xc -(xb + xd) */
 347             Xc12C_out = (Xaplusc - Xbplusd);
 348             /* (ya + yc) - (yb + yd) */
 349             Yc12C_out = (Yaplusc - Ybplusd);
 350             /* (xa - xc) - (yb - yd) */
 351             Xd12C_out = (Xaminusc - Ybminusd);
 352             /* (ya - yc) +  (xb - xd) */
 353             Yd12C_out = (Xbminusd + Yaminusc);
 354
 355             pSrc[(2U * i0)] = Xaplusc + Xbplusd;
 356             pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
 357
 358             Xb12_out = Xb12C_out * co1;
 359             Yb12_out = Yb12C_out * co1;
 360             Xc12_out = Xc12C_out * co2;
 361             Yc12_out = Yc12C_out * co2;
 362             Xd12_out = Xd12C_out * co3;
 363             Yd12_out = Yd12C_out * co3;
 364
 365             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
 366             //Xb12_out -= Yb12C_out * si1;
 367             p0 = Yb12C_out * si1;
 368             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
 369             //Yb12_out += Xb12C_out * si1;
 370             p1 = Xb12C_out * si1;
 371             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
 372             //Xc12_out -= Yc12C_out * si2;
 373             p2 = Yc12C_out * si2;
 374             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
 375             //Yc12_out += Xc12C_out * si2;
 376             p3 = Xc12C_out * si2;
 377             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
 378             //Xd12_out -= Yd12C_out * si3;
 379             p4 = Yd12C_out * si3;
 380             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
 381             //Yd12_out += Xd12C_out * si3;
 382             p5 = Xd12C_out * si3;
 383
 384             Xb12_out += p0;
 385             Yb12_out -= p1;
 386             Xc12_out += p2;
 387             Yc12_out -= p3;
 388             Xd12_out += p4;
 389             Yd12_out -= p5;
 390
 391             /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
 392             pSrc[2U * i1] = Xc12_out;
 393
 394             /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
 395             pSrc[(2U * i1) + 1U] = Yc12_out;
 396
 397             /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
 398             pSrc[2U * i2] = Xb12_out;
 399
 400             /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
 401             pSrc[(2U * i2) + 1U] = Yb12_out;
 402
 403             /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
 404             pSrc[2U * i3] = Xd12_out;
 405
 406             /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
 407             pSrc[(2U * i3) + 1U] = Yd12_out;
 408
 409             i0 += n1;
 410          } while (i0 < fftLen);
 411          j++;
 412       } while (j <= (n2 - 1U));
 413       twidCoefModifier <<= 2U;
 414    }
 415
 416    j = fftLen >> 2;
 417    ptr1 = &pSrc[0];
 418
 419    /*  Calculations of last stage */
 420    do
 421    {
 422       xaIn = ptr1[0];
 423       yaIn = ptr1[1];
 424       xbIn = ptr1[2];
 425       ybIn = ptr1[3];
 426       xcIn = ptr1[4];
 427       ycIn = ptr1[5];
 428       xdIn = ptr1[6];
 429       ydIn = ptr1[7];
 430
 431       /* xa + xc */
 432       Xaplusc = xaIn + xcIn;
 433
 434       /* xa - xc */
 435       Xaminusc = xaIn - xcIn;
 436
 437       /* ya + yc */
 438       Yaplusc = yaIn + ycIn;
 439
 440       /* ya - yc */
 441       Yaminusc = yaIn - ycIn;
 442
 443       /* xb + xd */
 444       Xbplusd = xbIn + xdIn;
 445
 446       /* yb + yd */
 447       Ybplusd = ybIn + ydIn;
 448
 449       /* (xb-xd) */
 450       Xbminusd = xbIn - xdIn;
 451
 452       /* (yb-yd) */
 453       Ybminusd = ybIn - ydIn;
 454
 455       /* xa' = xa + xb + xc + xd */
 456       a0 = (Xaplusc + Xbplusd);
 457       /* ya' = ya + yb + yc + yd */
 458       a1 = (Yaplusc + Ybplusd);
 459       /* xc' = (xa-xb+xc-xd) */
 460       a2 = (Xaplusc - Xbplusd);
 461       /* yc' = (ya-yb+yc-yd) */
 462       a3 = (Yaplusc - Ybplusd);
 463       /* xb' = (xa+yb-xc-yd) */
 464       a4 = (Xaminusc + Ybminusd);
 465       /* yb' = (ya-xb-yc+xd) */
 466       a5 = (Yaminusc - Xbminusd);
 467       /* xd' = (xa-yb-xc+yd)) */
 468       a6 = (Xaminusc - Ybminusd);
 469       /* yd' = (ya+xb-yc-xd) */
 470       a7 = (Xbminusd + Yaminusc);
 471
 472       ptr1[0] = a0;
 473       ptr1[1] = a1;
 474       ptr1[2] = a2;
 475       ptr1[3] = a3;
 476       ptr1[4] = a4;
 477       ptr1[5] = a5;
 478       ptr1[6] = a6;
 479       ptr1[7] = a7;
 480
 481       /* increment pointer by 8 */
 482       ptr1 += 8U;
 483    } while (--j);
 484
 485 #else
 486
 487    float32_t t1, t2, r1, r2, s1, s2;
 488
 489    /* Run the below code for Cortex-M0 */
 490
 491    /*  Initializations for the fft calculation */
 492    n2 = fftLen;
 493    n1 = n2;
 494    for (k = fftLen; k > 1U; k >>= 2U)
 495    {
 496       /*  Initializations for the fft calculation */
 497       n1 = n2;
 498       n2 >>= 2U;
 499       ia1 = 0U;
 500
 501       /*  FFT Calculation */
 502       j = 0;
 503       do
 504       {
 505          /*  index calculation for the coefficients */
 506          ia2 = ia1 + ia1;
 507          ia3 = ia2 + ia1;
 508          co1 = pCoef[ia1 * 2U];
 509          si1 = pCoef[(ia1 * 2U) + 1U];
 510          co2 = pCoef[ia2 * 2U];
 511          si2 = pCoef[(ia2 * 2U) + 1U];
 512          co3 = pCoef[ia3 * 2U];
 513          si3 = pCoef[(ia3 * 2U) + 1U];
 514
 515          /*  Twiddle coefficients index modifier */
 516          ia1 = ia1 + twidCoefModifier;
 517
 518          i0 = j;
 519          do
 520          {
 521             /*  index calculation for the input as, */
 522             /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
 523             i1 = i0 + n2;
 524             i2 = i1 + n2;
 525             i3 = i2 + n2;
 526
 527             /* xa + xc */
 528             r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
 529
 530             /* xa - xc */
 531             r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
 532
 533             /* ya + yc */
 534             s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
 535
 536             /* ya - yc */
 537             s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
 538
 539             /* xb + xd */
 540             t1 = pSrc[2U * i1] + pSrc[2U * i3];
 541
 542             /* xa' = xa + xb + xc + xd */
 543             pSrc[2U * i0] = r1 + t1;
 544
 545             /* xa + xc -(xb + xd) */
 546             r1 = r1 - t1;
 547
 548             /* yb + yd */
 549             t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
 550
 551             /* ya' = ya + yb + yc + yd */
 552             pSrc[(2U * i0) + 1U] = s1 + t2;
 553
 554             /* (ya + yc) - (yb + yd) */
 555             s1 = s1 - t2;
 556
 557             /* (yb - yd) */
 558             t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
 559
 560             /* (xb - xd) */
 561             t2 = pSrc[2U * i1] - pSrc[2U * i3];
 562
 563             /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
 564             pSrc[2U * i1] = (r1 * co2) + (s1 * si2);
 565
 566             /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
 567             pSrc[(2U * i1) + 1U] = (s1 * co2) - (r1 * si2);
 568
 569             /* (xa - xc) + (yb - yd) */
 570             r1 = r2 + t1;
 571
 572             /* (xa - xc) - (yb - yd) */
 573             r2 = r2 - t1;
 574
 575             /* (ya - yc) -  (xb - xd) */
 576             s1 = s2 - t2;
 577
 578             /* (ya - yc) +  (xb - xd) */
 579             s2 = s2 + t2;
 580
 581             /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
 582             pSrc[2U * i2] = (r1 * co1) + (s1 * si1);
 583
 584             /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
 585             pSrc[(2U * i2) + 1U] = (s1 * co1) - (r1 * si1);
 586
 587             /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
 588             pSrc[2U * i3] = (r2 * co3) + (s2 * si3);
 589
 590             /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
 591             pSrc[(2U * i3) + 1U] = (s2 * co3) - (r2 * si3);
 592
 593             i0 += n1;
 594          } while ( i0 < fftLen);
 595          j++;
 596       } while (j <= (n2 - 1U));
 597       twidCoefModifier <<= 2U;
 598    }
 599
 600 #endif /* #if defined (ARM_MATH_DSP) */
 601
 602 }
 603
 604 /*
 605 * @brief  Core function for the floating-point CIFFT butterfly process.
 606 * @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.
 607 * @param[in]      fftLen           length of the FFT.
 608 * @param[in]      *pCoef           points to twiddle coefficient buffer.
 609 * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
 610 * @param[in]      onebyfftLen      value of 1/fftLen.
 611 * @return none.
 612 */
 613
 614 void arm_radix4_butterfly_inverse_f32(
 615 float32_t * pSrc,
 616 uint16_t fftLen,
 617 float32_t * pCoef,
 618 uint16_t twidCoefModifier,
 619 float32_t onebyfftLen)
 620 {
 621    float32_t co1, co2, co3, si1, si2, si3;
 622    uint32_t ia1, ia2, ia3;
 623    uint32_t i0, i1, i2, i3;
 624    uint32_t n1, n2, j, k;
 625
 626 #if defined (ARM_MATH_DSP)
 627
 628    float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
 629    float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
 630    Ybminusd;
 631    float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
 632    float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
 633    float32_t *ptr1;
 634    float32_t p0,p1,p2,p3,p4,p5,p6,p7;
 635    float32_t a0,a1,a2,a3,a4,a5,a6,a7;
 636
 637
 638    /*  Initializations for the first stage */
 639    n2 = fftLen;
 640    n1 = n2;
 641
 642    /* n2 = fftLen/4 */
 643    n2 >>= 2U;
 644    i0 = 0U;
 645    ia1 = 0U;
 646
 647    j = n2;
 648
 649    /*  Calculation of first stage */
 650    do
 651    {
 652       /*  index calculation for the input as, */
 653       /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
 654       i1 = i0 + n2;
 655       i2 = i1 + n2;
 656       i3 = i2 + n2;
 657
 658       /*  Butterfly implementation */
 659       xaIn = pSrc[(2U * i0)];
 660       yaIn = pSrc[(2U * i0) + 1U];
 661
 662       xcIn = pSrc[(2U * i2)];
 663       ycIn = pSrc[(2U * i2) + 1U];
 664
 665       xbIn = pSrc[(2U * i1)];
 666       ybIn = pSrc[(2U * i1) + 1U];
 667
 668       xdIn = pSrc[(2U * i3)];
 669       ydIn = pSrc[(2U * i3) + 1U];
 670
 671       /* xa + xc */
 672       Xaplusc = xaIn + xcIn;
 673       /* xb + xd */
 674       Xbplusd = xbIn + xdIn;
 675       /* ya + yc */
 676       Yaplusc = yaIn + ycIn;
 677       /* yb + yd */
 678       Ybplusd = ybIn + ydIn;
 679
 680       /*  index calculation for the coefficients */
 681       ia2 = ia1 + ia1;
 682       co2 = pCoef[ia2 * 2U];
 683       si2 = pCoef[(ia2 * 2U) + 1U];
 684
 685       /* xa - xc */
 686       Xaminusc = xaIn - xcIn;
 687       /* xb - xd */
 688       Xbminusd = xbIn - xdIn;
 689       /* ya - yc */
 690       Yaminusc = yaIn - ycIn;
 691       /* yb - yd */
 692       Ybminusd = ybIn - ydIn;
 693
 694       /* xa' = xa + xb + xc + xd */
 695       pSrc[(2U * i0)] = Xaplusc + Xbplusd;
 696
 697       /* ya' = ya + yb + yc + yd */
 698       pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
 699
 700       /* (xa - xc) - (yb - yd) */
 701       Xb12C_out = (Xaminusc - Ybminusd);
 702       /* (ya - yc) + (xb - xd) */
 703       Yb12C_out = (Yaminusc + Xbminusd);
 704       /* (xa + xc) - (xb + xd) */
 705       Xc12C_out = (Xaplusc - Xbplusd);
 706       /* (ya + yc) - (yb + yd) */
 707       Yc12C_out = (Yaplusc - Ybplusd);
 708       /* (xa - xc) + (yb - yd) */
 709       Xd12C_out = (Xaminusc + Ybminusd);
 710       /* (ya - yc) - (xb - xd) */
 711       Yd12C_out = (Yaminusc - Xbminusd);
 712
 713       co1 = pCoef[ia1 * 2U];
 714       si1 = pCoef[(ia1 * 2U) + 1U];
 715
 716       /*  index calculation for the coefficients */
 717       ia3 = ia2 + ia1;
 718       co3 = pCoef[ia3 * 2U];
 719       si3 = pCoef[(ia3 * 2U) + 1U];
 720
 721       Xb12_out = Xb12C_out * co1;
 722       Yb12_out = Yb12C_out * co1;
 723       Xc12_out = Xc12C_out * co2;
 724       Yc12_out = Yc12C_out * co2;
 725       Xd12_out = Xd12C_out * co3;
 726       Yd12_out = Yd12C_out * co3;
 727
 728       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
 729       //Xb12_out -= Yb12C_out * si1;
 730       p0 = Yb12C_out * si1;
 731       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
 732       //Yb12_out += Xb12C_out * si1;
 733       p1 = Xb12C_out * si1;
 734       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
 735       //Xc12_out -= Yc12C_out * si2;
 736       p2 = Yc12C_out * si2;
 737       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
 738       //Yc12_out += Xc12C_out * si2;
 739       p3 = Xc12C_out * si2;
 740       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
 741       //Xd12_out -= Yd12C_out * si3;
 742       p4 = Yd12C_out * si3;
 743       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
 744       //Yd12_out += Xd12C_out * si3;
 745       p5 = Xd12C_out * si3;
 746
 747       Xb12_out -= p0;
 748       Yb12_out += p1;
 749       Xc12_out -= p2;
 750       Yc12_out += p3;
 751       Xd12_out -= p4;
 752       Yd12_out += p5;
 753
 754       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
 755       pSrc[2U * i1] = Xc12_out;
 756
 757       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
 758       pSrc[(2U * i1) + 1U] = Yc12_out;
 759
 760       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
 761       pSrc[2U * i2] = Xb12_out;
 762
 763       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
 764       pSrc[(2U * i2) + 1U] = Yb12_out;
 765
 766       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
 767       pSrc[2U * i3] = Xd12_out;
 768
 769       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
 770       pSrc[(2U * i3) + 1U] = Yd12_out;
 771
 772       /*  Twiddle coefficients index modifier */
 773       ia1 = ia1 + twidCoefModifier;
 774
 775       /*  Updating input index */
 776       i0 = i0 + 1U;
 777
 778    } while (--j);
 779
 780    twidCoefModifier <<= 2U;
 781
 782    /*  Calculation of second stage to excluding last stage */
 783    for (k = fftLen >> 2U; k > 4U; k >>= 2U)
 784    {
 785       /*  Initializations for the first stage */
 786       n1 = n2;
 787       n2 >>= 2U;
 788       ia1 = 0U;
 789
 790       /*  Calculation of first stage */
 791       j = 0;
 792       do
 793       {
 794          /*  index calculation for the coefficients */
 795          ia2 = ia1 + ia1;
 796          ia3 = ia2 + ia1;
 797          co1 = pCoef[ia1 * 2U];
 798          si1 = pCoef[(ia1 * 2U) + 1U];
 799          co2 = pCoef[ia2 * 2U];
 800          si2 = pCoef[(ia2 * 2U) + 1U];
 801          co3 = pCoef[ia3 * 2U];
 802          si3 = pCoef[(ia3 * 2U) + 1U];
 803
 804          /*  Twiddle coefficients index modifier */
 805          ia1 = ia1 + twidCoefModifier;
 806
 807          i0 = j;
 808          do
 809          {
 810             /*  index calculation for the input as, */
 811             /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
 812             i1 = i0 + n2;
 813             i2 = i1 + n2;
 814             i3 = i2 + n2;
 815
 816             xaIn = pSrc[(2U * i0)];
 817             yaIn = pSrc[(2U * i0) + 1U];
 818
 819             xbIn = pSrc[(2U * i1)];
 820             ybIn = pSrc[(2U * i1) + 1U];
 821
 822             xcIn = pSrc[(2U * i2)];
 823             ycIn = pSrc[(2U * i2) + 1U];
 824
 825             xdIn = pSrc[(2U * i3)];
 826             ydIn = pSrc[(2U * i3) + 1U];
 827
 828             /* xa - xc */
 829             Xaminusc = xaIn - xcIn;
 830             /* (xb - xd) */
 831             Xbminusd = xbIn - xdIn;
 832             /* ya - yc */
 833             Yaminusc = yaIn - ycIn;
 834             /* (yb - yd) */
 835             Ybminusd = ybIn - ydIn;
 836
 837             /* xa + xc */
 838             Xaplusc = xaIn + xcIn;
 839             /* xb + xd */
 840             Xbplusd = xbIn + xdIn;
 841             /* ya + yc */
 842             Yaplusc = yaIn + ycIn;
 843             /* yb + yd */
 844             Ybplusd = ybIn + ydIn;
 845
 846             /* (xa - xc) - (yb - yd) */
 847             Xb12C_out = (Xaminusc - Ybminusd);
 848             /* (ya - yc) +  (xb - xd) */
 849             Yb12C_out = (Yaminusc + Xbminusd);
 850             /* xa + xc -(xb + xd) */
 851             Xc12C_out = (Xaplusc - Xbplusd);
 852             /* (ya + yc) - (yb + yd) */
 853             Yc12C_out = (Yaplusc - Ybplusd);
 854             /* (xa - xc) + (yb - yd) */
 855             Xd12C_out = (Xaminusc + Ybminusd);
 856             /* (ya - yc) -  (xb - xd) */
 857             Yd12C_out = (Yaminusc - Xbminusd);
 858
 859             pSrc[(2U * i0)] = Xaplusc + Xbplusd;
 860             pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
 861
 862             Xb12_out = Xb12C_out * co1;
 863             Yb12_out = Yb12C_out * co1;
 864             Xc12_out = Xc12C_out * co2;
 865             Yc12_out = Yc12C_out * co2;
 866             Xd12_out = Xd12C_out * co3;
 867             Yd12_out = Yd12C_out * co3;
 868
 869             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
 870             //Xb12_out -= Yb12C_out * si1;
 871             p0 = Yb12C_out * si1;
 872             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
 873             //Yb12_out += Xb12C_out * si1;
 874             p1 = Xb12C_out * si1;
 875             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
 876             //Xc12_out -= Yc12C_out * si2;
 877             p2 = Yc12C_out * si2;
 878             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
 879             //Yc12_out += Xc12C_out * si2;
 880             p3 = Xc12C_out * si2;
 881             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
 882             //Xd12_out -= Yd12C_out * si3;
 883             p4 = Yd12C_out * si3;
 884             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
 885             //Yd12_out += Xd12C_out * si3;
 886             p5 = Xd12C_out * si3;
 887
 888             Xb12_out -= p0;
 889             Yb12_out += p1;
 890             Xc12_out -= p2;
 891             Yc12_out += p3;
 892             Xd12_out -= p4;
 893             Yd12_out += p5;
 894
 895             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
 896             pSrc[2U * i1] = Xc12_out;
 897
 898             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
 899             pSrc[(2U * i1) + 1U] = Yc12_out;
 900
 901             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
 902             pSrc[2U * i2] = Xb12_out;
 903
 904             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
 905             pSrc[(2U * i2) + 1U] = Yb12_out;
 906
 907             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
 908             pSrc[2U * i3] = Xd12_out;
 909
 910             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
 911             pSrc[(2U * i3) + 1U] = Yd12_out;
 912
 913             i0 += n1;
 914          } while (i0 < fftLen);
 915          j++;
 916       } while (j <= (n2 - 1U));
 917       twidCoefModifier <<= 2U;
 918    }
 919    /*  Initializations of last stage */
 920
 921    j = fftLen >> 2;
 922    ptr1 = &pSrc[0];
 923
 924    /*  Calculations of last stage */
 925    do
 926    {
 927       xaIn = ptr1[0];
 928       yaIn = ptr1[1];
 929       xbIn = ptr1[2];
 930       ybIn = ptr1[3];
 931       xcIn = ptr1[4];
 932       ycIn = ptr1[5];
 933       xdIn = ptr1[6];
 934       ydIn = ptr1[7];
 935
 936       /*  Butterfly implementation */
 937       /* xa + xc */
 938       Xaplusc = xaIn + xcIn;
 939
 940       /* xa - xc */
 941       Xaminusc = xaIn - xcIn;
 942
 943       /* ya + yc */
 944       Yaplusc = yaIn + ycIn;
 945
 946       /* ya - yc */
 947       Yaminusc = yaIn - ycIn;
 948
 949       /* xb + xd */
 950       Xbplusd = xbIn + xdIn;
 951
 952       /* yb + yd */
 953       Ybplusd = ybIn + ydIn;
 954
 955       /* (xb-xd) */
 956       Xbminusd = xbIn - xdIn;
 957
 958       /* (yb-yd) */
 959       Ybminusd = ybIn - ydIn;
 960
 961       /* xa' = (xa+xb+xc+xd) * onebyfftLen */
 962       a0 = (Xaplusc + Xbplusd);
 963       /* ya' = (ya+yb+yc+yd) * onebyfftLen */
 964       a1 = (Yaplusc + Ybplusd);
 965       /* xc' = (xa-xb+xc-xd) * onebyfftLen */
 966       a2 = (Xaplusc - Xbplusd);
 967       /* yc' = (ya-yb+yc-yd) * onebyfftLen  */
 968       a3 = (Yaplusc - Ybplusd);
 969       /* xb' = (xa-yb-xc+yd) * onebyfftLen */
 970       a4 = (Xaminusc - Ybminusd);
 971       /* yb' = (ya+xb-yc-xd) * onebyfftLen */
 972       a5 = (Yaminusc + Xbminusd);
 973       /* xd' = (xa-yb-xc+yd) * onebyfftLen */
 974       a6 = (Xaminusc + Ybminusd);
 975       /* yd' = (ya-xb-yc+xd) * onebyfftLen */
 976       a7 = (Yaminusc - Xbminusd);
 977
 978       p0 = a0 * onebyfftLen;
 979       p1 = a1 * onebyfftLen;
 980       p2 = a2 * onebyfftLen;
 981       p3 = a3 * onebyfftLen;
 982       p4 = a4 * onebyfftLen;
 983       p5 = a5 * onebyfftLen;
 984       p6 = a6 * onebyfftLen;
 985       p7 = a7 * onebyfftLen;
 986
 987       /* xa' = (xa+xb+xc+xd) * onebyfftLen */
 988       ptr1[0] = p0;
 989       /* ya' = (ya+yb+yc+yd) * onebyfftLen */
 990       ptr1[1] = p1;
 991       /* xc' = (xa-xb+xc-xd) * onebyfftLen */
 992       ptr1[2] = p2;
 993       /* yc' = (ya-yb+yc-yd) * onebyfftLen  */
 994       ptr1[3] = p3;
 995       /* xb' = (xa-yb-xc+yd) * onebyfftLen */
 996       ptr1[4] = p4;
 997       /* yb' = (ya+xb-yc-xd) * onebyfftLen */
 998       ptr1[5] = p5;
 999       /* xd' = (xa-yb-xc+yd) * onebyfftLen */
1000       ptr1[6] = p6;
1001       /* yd' = (ya-xb-yc+xd) * onebyfftLen */
1002       ptr1[7] = p7;
1003
1004       /* increment source pointer by 8 for next calculations */
1005       ptr1 = ptr1 + 8U;
1006
1007    } while (--j);
1008
1009 #else
1010
1011    float32_t t1, t2, r1, r2, s1, s2;
1012
1013    /* Run the below code for Cortex-M0 */
1014
1015    /*  Initializations for the first stage */
1016    n2 = fftLen;
1017    n1 = n2;
1018
1019    /*  Calculation of first stage */
1020    for (k = fftLen; k > 4U; k >>= 2U)
1021    {
1022       /*  Initializations for the first stage */
1023       n1 = n2;
1024       n2 >>= 2U;
1025       ia1 = 0U;
1026
1027       /*  Calculation of first stage */
1028       j = 0;
1029       do
1030       {
1031          /*  index calculation for the coefficients */
1032          ia2 = ia1 + ia1;
1033          ia3 = ia2 + ia1;
1034          co1 = pCoef[ia1 * 2U];
1035          si1 = pCoef[(ia1 * 2U) + 1U];
1036          co2 = pCoef[ia2 * 2U];
1037          si2 = pCoef[(ia2 * 2U) + 1U];
1038          co3 = pCoef[ia3 * 2U];
1039          si3 = pCoef[(ia3 * 2U) + 1U];
1040
1041          /*  Twiddle coefficients index modifier */
1042          ia1 = ia1 + twidCoefModifier;
1043
1044          i0 = j;
1045          do
1046          {
1047             /*  index calculation for the input as, */
1048             /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1049             i1 = i0 + n2;
1050             i2 = i1 + n2;
1051             i3 = i2 + n2;
1052
1053             /* xa + xc */
1054             r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
1055
1056             /* xa - xc */
1057             r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
1058
1059             /* ya + yc */
1060             s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
1061
1062             /* ya - yc */
1063             s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
1064
1065             /* xb + xd */
1066             t1 = pSrc[2U * i1] + pSrc[2U * i3];
1067
1068             /* xa' = xa + xb + xc + xd */
1069             pSrc[2U * i0] = r1 + t1;
1070
1071             /* xa + xc -(xb + xd) */
1072             r1 = r1 - t1;
1073
1074             /* yb + yd */
1075             t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
1076
1077             /* ya' = ya + yb + yc + yd */
1078             pSrc[(2U * i0) + 1U] = s1 + t2;
1079
1080             /* (ya + yc) - (yb + yd) */
1081             s1 = s1 - t2;
1082
1083             /* (yb - yd) */
1084             t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
1085
1086             /* (xb - xd) */
1087             t2 = pSrc[2U * i1] - pSrc[2U * i3];
1088
1089             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1090             pSrc[2U * i1] = (r1 * co2) - (s1 * si2);
1091
1092             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1093             pSrc[(2U * i1) + 1U] = (s1 * co2) + (r1 * si2);
1094
1095             /* (xa - xc) - (yb - yd) */
1096             r1 = r2 - t1;
1097
1098             /* (xa - xc) + (yb - yd) */
1099             r2 = r2 + t1;
1100
1101             /* (ya - yc) +  (xb - xd) */
1102             s1 = s2 + t2;
1103
1104             /* (ya - yc) -  (xb - xd) */
1105             s2 = s2 - t2;
1106
1107             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1108             pSrc[2U * i2] = (r1 * co1) - (s1 * si1);
1109
1110             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1111             pSrc[(2U * i2) + 1U] = (s1 * co1) + (r1 * si1);
1112
1113             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1114             pSrc[2U * i3] = (r2 * co3) - (s2 * si3);
1115
1116             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1117             pSrc[(2U * i3) + 1U] = (s2 * co3) + (r2 * si3);
1118
1119             i0 += n1;
1120          } while ( i0 < fftLen);
1121          j++;
1122       } while (j <= (n2 - 1U));
1123       twidCoefModifier <<= 2U;
1124    }
1125    /*  Initializations of last stage */
1126    n1 = n2;
1127    n2 >>= 2U;
1128
1129    /*  Calculations of last stage */
1130    for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
1131    {
1132       /*  index calculation for the input as, */
1133       /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1134       i1 = i0 + n2;
1135       i2 = i1 + n2;
1136       i3 = i2 + n2;
1137
1138       /*  Butterfly implementation */
1139       /* xa + xc */
1140       r1 = pSrc[2U * i0] + pSrc[2U * i2];
1141
1142       /* xa - xc */
1143       r2 = pSrc[2U * i0] - pSrc[2U * i2];
1144
1145       /* ya + yc */
1146       s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
1147
1148       /* ya - yc */
1149       s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
1150
1151       /* xc + xd */
1152       t1 = pSrc[2U * i1] + pSrc[2U * i3];
1153
1154       /* xa' = xa + xb + xc + xd */
1155       pSrc[2U * i0] = (r1 + t1) * onebyfftLen;
1156
1157       /* (xa + xb) - (xc + xd) */
1158       r1 = r1 - t1;
1159
1160       /* yb + yd */
1161       t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
1162
1163       /* ya' = ya + yb + yc + yd */
1164       pSrc[(2U * i0) + 1U] = (s1 + t2) * onebyfftLen;
1165
1166       /* (ya + yc) - (yb + yd) */
1167       s1 = s1 - t2;
1168
1169       /* (yb-yd) */
1170       t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
1171
1172       /* (xb-xd) */
1173       t2 = pSrc[2U * i1] - pSrc[2U * i3];
1174
1175       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1176       pSrc[2U * i1] = r1 * onebyfftLen;
1177
1178       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1179       pSrc[(2U * i1) + 1U] = s1 * onebyfftLen;
1180
1181       /* (xa - xc) - (yb-yd) */
1182       r1 = r2 - t1;
1183
1184       /* (xa - xc) + (yb-yd) */
1185       r2 = r2 + t1;
1186
1187       /* (ya - yc) + (xb-xd) */
1188       s1 = s2 + t2;
1189
1190       /* (ya - yc) - (xb-xd) */
1191       s2 = s2 - t2;
1192
1193       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1194       pSrc[2U * i2] = r1 * onebyfftLen;
1195
1196       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1197       pSrc[(2U * i2) + 1U] = s1 * onebyfftLen;
1198
1199       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1200       pSrc[2U * i3] = r2 * onebyfftLen;
1201
1202       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1203       pSrc[(2U * i3) + 1U] = s2 * onebyfftLen;
1204    }
1205
1206 #endif /* #if defined (ARM_MATH_DSP) */
1207 }
1208
1209