lib/main/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q31.c

   1 /* ----------------------------------------------------------------------
   2  * Project:      CMSIS DSP Library
   3  * Title:        arm_cmplx_mult_cmplx_q31.c
   4  * Description:  Q31 complex-by-complex multiplication
   5  *
   6  * $Date:        27. January 2017
   7  * $Revision:    V.1.5.1
   8  *
   9  * Target Processor: Cortex-M cores
  10  * -------------------------------------------------------------------- */
  11 /*
  12  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
  13  *
  14  * SPDX-License-Identifier: Apache-2.0
  15  *
  16  * Licensed under the Apache License, Version 2.0 (the License); you may
  17  * not use this file except in compliance with the License.
  18  * You may obtain a copy of the License at
  19  *
  20  * www.apache.org/licenses/LICENSE-2.0
  21  *
  22  * Unless required by applicable law or agreed to in writing, software
  23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25  * See the License for the specific language governing permissions and
  26  * limitations under the License.
  27  */
  28
  29 #include "arm_math.h"
  30
  31 /**
  32  * @ingroup groupCmplxMath
  33  */
  34
  35 /**
  36  * @addtogroup CmplxByCmplxMult
  37  * @{
  38  */
  39
  40
  41 /**
  42  * @brief  Q31 complex-by-complex multiplication
  43  * @param[in]  *pSrcA points to the first input vector
  44  * @param[in]  *pSrcB points to the second input vector
  45  * @param[out]  *pDst  points to the output vector
  46  * @param[in]  numSamples number of complex samples in each vector
  47  * @return none.
  48  *
  49  * <b>Scaling and Overflow Behavior:</b>
  50  * \par
  51  * The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
  52  * Input down scaling is not required.
  53  */
  54
  55 void arm_cmplx_mult_cmplx_q31(
  56   q31_t * pSrcA,
  57   q31_t * pSrcB,
  58   q31_t * pDst,
  59   uint32_t numSamples)
  60 {
  61   q31_t a, b, c, d;                              /* Temporary variables to store real and imaginary values */
  62   uint32_t blkCnt;                               /* loop counters */
  63   q31_t mul1, mul2, mul3, mul4;
  64   q31_t out1, out2;
  65
  66 #if defined (ARM_MATH_DSP)
  67
  68   /* Run the below code for Cortex-M4 and Cortex-M3 */
  69
  70   /* loop Unrolling */
  71   blkCnt = numSamples >> 2U;
  72
  73   /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
  74    ** a second loop below computes the remaining 1 to 3 samples. */
  75   while (blkCnt > 0U)
  76   {
  77     /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
  78     /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
  79     a = *pSrcA++;
  80     b = *pSrcA++;
  81     c = *pSrcB++;
  82     d = *pSrcB++;
  83
  84     mul1 = (q31_t) (((q63_t) a * c) >> 32);
  85     mul2 = (q31_t) (((q63_t) b * d) >> 32);
  86     mul3 = (q31_t) (((q63_t) a * d) >> 32);
  87     mul4 = (q31_t) (((q63_t) b * c) >> 32);
  88
  89     mul1 = (mul1 >> 1);
  90     mul2 = (mul2 >> 1);
  91     mul3 = (mul3 >> 1);
  92     mul4 = (mul4 >> 1);
  93
  94     out1 = mul1 - mul2;
  95     out2 = mul3 + mul4;
  96
  97     /* store the real result in 3.29 format in the destination buffer. */
  98     *pDst++ = out1;
  99     /* store the imag result in 3.29 format in the destination buffer. */
 100     *pDst++ = out2;
 101
 102     a = *pSrcA++;
 103     b = *pSrcA++;
 104     c = *pSrcB++;
 105     d = *pSrcB++;
 106
 107     mul1 = (q31_t) (((q63_t) a * c) >> 32);
 108     mul2 = (q31_t) (((q63_t) b * d) >> 32);
 109     mul3 = (q31_t) (((q63_t) a * d) >> 32);
 110     mul4 = (q31_t) (((q63_t) b * c) >> 32);
 111
 112     mul1 = (mul1 >> 1);
 113     mul2 = (mul2 >> 1);
 114     mul3 = (mul3 >> 1);
 115     mul4 = (mul4 >> 1);
 116
 117     out1 = mul1 - mul2;
 118     out2 = mul3 + mul4;
 119
 120     /* store the real result in 3.29 format in the destination buffer. */
 121     *pDst++ = out1;
 122     /* store the imag result in 3.29 format in the destination buffer. */
 123     *pDst++ = out2;
 124
 125     a = *pSrcA++;
 126     b = *pSrcA++;
 127     c = *pSrcB++;
 128     d = *pSrcB++;
 129
 130     mul1 = (q31_t) (((q63_t) a * c) >> 32);
 131     mul2 = (q31_t) (((q63_t) b * d) >> 32);
 132     mul3 = (q31_t) (((q63_t) a * d) >> 32);
 133     mul4 = (q31_t) (((q63_t) b * c) >> 32);
 134
 135     mul1 = (mul1 >> 1);
 136     mul2 = (mul2 >> 1);
 137     mul3 = (mul3 >> 1);
 138     mul4 = (mul4 >> 1);
 139
 140     out1 = mul1 - mul2;
 141     out2 = mul3 + mul4;
 142
 143     /* store the real result in 3.29 format in the destination buffer. */
 144     *pDst++ = out1;
 145     /* store the imag result in 3.29 format in the destination buffer. */
 146     *pDst++ = out2;
 147
 148     a = *pSrcA++;
 149     b = *pSrcA++;
 150     c = *pSrcB++;
 151     d = *pSrcB++;
 152
 153     mul1 = (q31_t) (((q63_t) a * c) >> 32);
 154     mul2 = (q31_t) (((q63_t) b * d) >> 32);
 155     mul3 = (q31_t) (((q63_t) a * d) >> 32);
 156     mul4 = (q31_t) (((q63_t) b * c) >> 32);
 157
 158     mul1 = (mul1 >> 1);
 159     mul2 = (mul2 >> 1);
 160     mul3 = (mul3 >> 1);
 161     mul4 = (mul4 >> 1);
 162
 163     out1 = mul1 - mul2;
 164     out2 = mul3 + mul4;
 165
 166     /* store the real result in 3.29 format in the destination buffer. */
 167     *pDst++ = out1;
 168     /* store the imag result in 3.29 format in the destination buffer. */
 169     *pDst++ = out2;
 170
 171     /* Decrement the blockSize loop counter */
 172     blkCnt--;
 173   }
 174
 175   /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
 176    ** No loop unrolling is used. */
 177   blkCnt = numSamples % 0x4U;
 178
 179   while (blkCnt > 0U)
 180   {
 181     /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
 182     /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
 183     a = *pSrcA++;
 184     b = *pSrcA++;
 185     c = *pSrcB++;
 186     d = *pSrcB++;
 187
 188     mul1 = (q31_t) (((q63_t) a * c) >> 32);
 189     mul2 = (q31_t) (((q63_t) b * d) >> 32);
 190     mul3 = (q31_t) (((q63_t) a * d) >> 32);
 191     mul4 = (q31_t) (((q63_t) b * c) >> 32);
 192
 193     mul1 = (mul1 >> 1);
 194     mul2 = (mul2 >> 1);
 195     mul3 = (mul3 >> 1);
 196     mul4 = (mul4 >> 1);
 197
 198     out1 = mul1 - mul2;
 199     out2 = mul3 + mul4;
 200
 201     /* store the real result in 3.29 format in the destination buffer. */
 202     *pDst++ = out1;
 203     /* store the imag result in 3.29 format in the destination buffer. */
 204     *pDst++ = out2;
 205
 206     /* Decrement the blockSize loop counter */
 207     blkCnt--;
 208   }
 209
 210 #else
 211
 212   /* Run the below code for Cortex-M0 */
 213
 214   /* loop Unrolling */
 215   blkCnt = numSamples >> 1U;
 216
 217   /* First part of the processing with loop unrolling.  Compute 2 outputs at a time.
 218    ** a second loop below computes the remaining 1 sample. */
 219   while (blkCnt > 0U)
 220   {
 221     /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
 222     /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
 223     a = *pSrcA++;
 224     b = *pSrcA++;
 225     c = *pSrcB++;
 226     d = *pSrcB++;
 227
 228     mul1 = (q31_t) (((q63_t) a * c) >> 32);
 229     mul2 = (q31_t) (((q63_t) b * d) >> 32);
 230     mul3 = (q31_t) (((q63_t) a * d) >> 32);
 231     mul4 = (q31_t) (((q63_t) b * c) >> 32);
 232
 233     mul1 = (mul1 >> 1);
 234     mul2 = (mul2 >> 1);
 235     mul3 = (mul3 >> 1);
 236     mul4 = (mul4 >> 1);
 237
 238     out1 = mul1 - mul2;
 239     out2 = mul3 + mul4;
 240
 241     /* store the real result in 3.29 format in the destination buffer. */
 242     *pDst++ = out1;
 243     /* store the imag result in 3.29 format in the destination buffer. */
 244     *pDst++ = out2;
 245
 246     a = *pSrcA++;
 247     b = *pSrcA++;
 248     c = *pSrcB++;
 249     d = *pSrcB++;
 250
 251     mul1 = (q31_t) (((q63_t) a * c) >> 32);
 252     mul2 = (q31_t) (((q63_t) b * d) >> 32);
 253     mul3 = (q31_t) (((q63_t) a * d) >> 32);
 254     mul4 = (q31_t) (((q63_t) b * c) >> 32);
 255
 256     mul1 = (mul1 >> 1);
 257     mul2 = (mul2 >> 1);
 258     mul3 = (mul3 >> 1);
 259     mul4 = (mul4 >> 1);
 260
 261     out1 = mul1 - mul2;
 262     out2 = mul3 + mul4;
 263
 264     /* store the real result in 3.29 format in the destination buffer. */
 265     *pDst++ = out1;
 266     /* store the imag result in 3.29 format in the destination buffer. */
 267     *pDst++ = out2;
 268
 269     /* Decrement the blockSize loop counter */
 270     blkCnt--;
 271   }
 272
 273   /* If the blockSize is not a multiple of 2, compute any remaining output samples here.
 274    ** No loop unrolling is used. */
 275   blkCnt = numSamples % 0x2U;
 276
 277   while (blkCnt > 0U)
 278   {
 279     /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
 280     /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
 281     a = *pSrcA++;
 282     b = *pSrcA++;
 283     c = *pSrcB++;
 284     d = *pSrcB++;
 285
 286     mul1 = (q31_t) (((q63_t) a * c) >> 32);
 287     mul2 = (q31_t) (((q63_t) b * d) >> 32);
 288     mul3 = (q31_t) (((q63_t) a * d) >> 32);
 289     mul4 = (q31_t) (((q63_t) b * c) >> 32);
 290
 291     mul1 = (mul1 >> 1);
 292     mul2 = (mul2 >> 1);
 293     mul3 = (mul3 >> 1);
 294     mul4 = (mul4 >> 1);
 295
 296     out1 = mul1 - mul2;
 297     out2 = mul3 + mul4;
 298
 299     /* store the real result in 3.29 format in the destination buffer. */
 300     *pDst++ = out1;
 301     /* store the imag result in 3.29 format in the destination buffer. */
 302     *pDst++ = out2;
 303
 304     /* Decrement the blockSize loop counter */
 305     blkCnt--;
 306   }
 307
 308 #endif /* #if defined (ARM_MATH_DSP) */
 309
 310 }
 311
 312 /**
 313  * @} end of CmplxByCmplxMult group
 314  */