lib/main/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f32.c

   1 /* ----------------------------------------------------------------------
   2  * Project:      CMSIS DSP Library
   3  * Title:        arm_cmplx_mult_cmplx_f32.c
   4  * Description:  Floating-point complex-by-complex multiplication
   5  *
   6  * $Date:        27. January 2017
   7  * $Revision:    V.1.5.1
   8  *
   9  * Target Processor: Cortex-M cores
  10  * -------------------------------------------------------------------- */
  11 /*
  12  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
  13  *
  14  * SPDX-License-Identifier: Apache-2.0
  15  *
  16  * Licensed under the Apache License, Version 2.0 (the License); you may
  17  * not use this file except in compliance with the License.
  18  * You may obtain a copy of the License at
  19  *
  20  * www.apache.org/licenses/LICENSE-2.0
  21  *
  22  * Unless required by applicable law or agreed to in writing, software
  23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25  * See the License for the specific language governing permissions and
  26  * limitations under the License.
  27  */
  28
  29 #include "arm_math.h"
  30
  31 /**
  32  * @ingroup groupCmplxMath
  33  */
  34
  35 /**
  36  * @defgroup CmplxByCmplxMult Complex-by-Complex Multiplication
  37  *
  38  * Multiplies a complex vector by another complex vector and generates a complex result.
  39  * The data in the complex arrays is stored in an interleaved fashion
  40  * (real, imag, real, imag, ...).
  41  * The parameter <code>numSamples</code> represents the number of complex
  42  * samples processed.  The complex arrays have a total of <code>2*numSamples</code>
  43  * real values.
  44  *
  45  * The underlying algorithm is used:
  46  *
  47  * <pre>
  48  * for(n=0; n<numSamples; n++) {
  49  *     pDst[(2*n)+0] = pSrcA[(2*n)+0] * pSrcB[(2*n)+0] - pSrcA[(2*n)+1] * pSrcB[(2*n)+1];
  50  *     pDst[(2*n)+1] = pSrcA[(2*n)+0] * pSrcB[(2*n)+1] + pSrcA[(2*n)+1] * pSrcB[(2*n)+0];
  51  * }
  52  * </pre>
  53  *
  54  * There are separate functions for floating-point, Q15, and Q31 data types.
  55  */
  56
  57 /**
  58  * @addtogroup CmplxByCmplxMult
  59  * @{
  60  */
  61
  62
  63 /**
  64  * @brief  Floating-point complex-by-complex multiplication
  65  * @param[in]  *pSrcA points to the first input vector
  66  * @param[in]  *pSrcB points to the second input vector
  67  * @param[out]  *pDst  points to the output vector
  68  * @param[in]  numSamples number of complex samples in each vector
  69  * @return none.
  70  */
  71
  72 void arm_cmplx_mult_cmplx_f32(
  73   float32_t * pSrcA,
  74   float32_t * pSrcB,
  75   float32_t * pDst,
  76   uint32_t numSamples)
  77 {
  78   float32_t a1, b1, c1, d1;                      /* Temporary variables to store real and imaginary values */
  79   uint32_t blkCnt;                               /* loop counters */
  80
  81 #if defined (ARM_MATH_DSP)
  82
  83   /* Run the below code for Cortex-M4 and Cortex-M3 */
  84   float32_t a2, b2, c2, d2;                      /* Temporary variables to store real and imaginary values */
  85   float32_t acc1, acc2, acc3, acc4;
  86
  87
  88   /* loop Unrolling */
  89   blkCnt = numSamples >> 2U;
  90
  91   /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
  92    ** a second loop below computes the remaining 1 to 3 samples. */
  93   while (blkCnt > 0U)
  94   {
  95     /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
  96     /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
  97     a1 = *pSrcA;                /* A[2 * i] */
  98     c1 = *pSrcB;                /* B[2 * i] */
  99
 100     b1 = *(pSrcA + 1);          /* A[2 * i + 1] */
 101     acc1 = a1 * c1;             /* acc1 = A[2 * i] * B[2 * i] */
 102
 103     a2 = *(pSrcA + 2);          /* A[2 * i + 2] */
 104     acc2 = (b1 * c1);           /* acc2 = A[2 * i + 1] * B[2 * i] */
 105
 106     d1 = *(pSrcB + 1);          /* B[2 * i + 1] */
 107     c2 = *(pSrcB + 2);          /* B[2 * i + 2] */
 108     acc1 -= b1 * d1;            /* acc1 =      A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1] */
 109
 110     d2 = *(pSrcB + 3);          /* B[2 * i + 3] */
 111     acc3 = a2 * c2;             /* acc3 =       A[2 * i + 2] * B[2 * i + 2] */
 112
 113     b2 = *(pSrcA + 3);          /* A[2 * i + 3] */
 114     acc2 += (a1 * d1);          /* acc2 =      A[2 * i + 1] * B[2 * i] + A[2 * i] * B[2 * i + 1] */
 115
 116     a1 = *(pSrcA + 4);          /* A[2 * i + 4] */
 117     acc4 = (a2 * d2);           /* acc4 =   A[2 * i + 2] * B[2 * i + 3] */
 118
 119     c1 = *(pSrcB + 4);          /* B[2 * i + 4] */
 120     acc3 -= (b2 * d2);          /* acc3 =       A[2 * i + 2] * B[2 * i + 2] - A[2 * i + 3] * B[2 * i + 3] */
 121     *pDst = acc1;               /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1] */
 122
 123     b1 = *(pSrcA + 5);          /* A[2 * i + 5] */
 124     acc4 += b2 * c2;            /* acc4 =   A[2 * i + 2] * B[2 * i + 3] + A[2 * i + 3] * B[2 * i + 2] */
 125
 126     *(pDst + 1) = acc2;         /* C[2 * i + 1] = A[2 * i + 1] * B[2 * i] + A[2 * i] * B[2 * i + 1]  */
 127     acc1 = (a1 * c1);
 128
 129     d1 = *(pSrcB + 5);
 130     acc2 = (b1 * c1);
 131
 132     *(pDst + 2) = acc3;
 133     *(pDst + 3) = acc4;
 134
 135     a2 = *(pSrcA + 6);
 136     acc1 -= (b1 * d1);
 137
 138     c2 = *(pSrcB + 6);
 139     acc2 += (a1 * d1);
 140
 141     b2 = *(pSrcA + 7);
 142     acc3 = (a2 * c2);
 143
 144     d2 = *(pSrcB + 7);
 145     acc4 = (b2 * c2);
 146
 147     *(pDst + 4) = acc1;
 148     pSrcA += 8U;
 149
 150     acc3 -= (b2 * d2);
 151     acc4 += (a2 * d2);
 152
 153     *(pDst + 5) = acc2;
 154     pSrcB += 8U;
 155
 156     *(pDst + 6) = acc3;
 157     *(pDst + 7) = acc4;
 158
 159     pDst += 8U;
 160
 161     /* Decrement the numSamples loop counter */
 162     blkCnt--;
 163   }
 164
 165   /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
 166    ** No loop unrolling is used. */
 167   blkCnt = numSamples % 0x4U;
 168
 169 #else
 170
 171   /* Run the below code for Cortex-M0 */
 172   blkCnt = numSamples;
 173
 174 #endif /* #if defined (ARM_MATH_DSP) */
 175
 176   while (blkCnt > 0U)
 177   {
 178     /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
 179     /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
 180     a1 = *pSrcA++;
 181     b1 = *pSrcA++;
 182     c1 = *pSrcB++;
 183     d1 = *pSrcB++;
 184
 185     /* store the result in the destination buffer. */
 186     *pDst++ = (a1 * c1) - (b1 * d1);
 187     *pDst++ = (a1 * d1) + (b1 * c1);
 188
 189     /* Decrement the numSamples loop counter */
 190     blkCnt--;
 191   }
 192 }
 193
 194 /**
 195  * @} end of CmplxByCmplxMult group
 196  */