lib/main/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c

   1 /* ----------------------------------------------------------------------
   2  * Project:      CMSIS DSP Library
   3  * Title:        arm_cmplx_mult_real_q15.c
   4  * Description:  Q15 complex by real multiplication
   5  *
   6  * $Date:        27. January 2017
   7  * $Revision:    V.1.5.1
   8  *
   9  * Target Processor: Cortex-M cores
  10  * -------------------------------------------------------------------- */
  11 /*
  12  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
  13  *
  14  * SPDX-License-Identifier: Apache-2.0
  15  *
  16  * Licensed under the Apache License, Version 2.0 (the License); you may
  17  * not use this file except in compliance with the License.
  18  * You may obtain a copy of the License at
  19  *
  20  * www.apache.org/licenses/LICENSE-2.0
  21  *
  22  * Unless required by applicable law or agreed to in writing, software
  23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25  * See the License for the specific language governing permissions and
  26  * limitations under the License.
  27  */
  28
  29 #include "arm_math.h"
  30
  31 /**
  32  * @ingroup groupCmplxMath
  33  */
  34
  35 /**
  36  * @addtogroup CmplxByRealMult
  37  * @{
  38  */
  39
  40
  41 /**
  42  * @brief  Q15 complex-by-real multiplication
  43  * @param[in]  *pSrcCmplx points to the complex input vector
  44  * @param[in]  *pSrcReal points to the real input vector
  45  * @param[out]  *pCmplxDst points to the complex output vector
  46  * @param[in]  numSamples number of samples in each vector
  47  * @return none.
  48  *
  49  * <b>Scaling and Overflow Behavior:</b>
  50  * \par
  51  * The function uses saturating arithmetic.
  52  * Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
  53  */
  54
  55 void arm_cmplx_mult_real_q15(
  56   q15_t * pSrcCmplx,
  57   q15_t * pSrcReal,
  58   q15_t * pCmplxDst,
  59   uint32_t numSamples)
  60 {
  61   q15_t in;                                      /* Temporary variable to store input value */
  62
  63 #if defined (ARM_MATH_DSP)
  64
  65   /* Run the below code for Cortex-M4 and Cortex-M3 */
  66   uint32_t blkCnt;                               /* loop counters */
  67   q31_t inA1, inA2;                              /* Temporary variables to hold input data */
  68   q31_t inB1;                                    /* Temporary variables to hold input data */
  69   q15_t out1, out2, out3, out4;                  /* Temporary variables to hold output data */
  70   q31_t mul1, mul2, mul3, mul4;                  /* Temporary variables to hold intermediate data */
  71
  72   /* loop Unrolling */
  73   blkCnt = numSamples >> 2U;
  74
  75   /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
  76    ** a second loop below computes the remaining 1 to 3 samples. */
  77   while (blkCnt > 0U)
  78   {
  79     /* C[2 * i] = A[2 * i] * B[i].            */
  80     /* C[2 * i + 1] = A[2 * i + 1] * B[i].        */
  81     /* read complex number both real and imaginary from complex input buffer */
  82     inA1 = *__SIMD32(pSrcCmplx)++;
  83     /* read two real values at a time from real input buffer */
  84     inB1 = *__SIMD32(pSrcReal)++;
  85     /* read complex number both real and imaginary from complex input buffer */
  86     inA2 = *__SIMD32(pSrcCmplx)++;
  87
  88     /* multiply complex number with real numbers */
  89 #ifndef ARM_MATH_BIG_ENDIAN
  90
  91     mul1 = (q31_t) ((q15_t) (inA1) * (q15_t) (inB1));
  92     mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1));
  93     mul3 = (q31_t) ((q15_t) (inA2) * (q15_t) (inB1 >> 16));
  94     mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB1 >> 16));
  95
  96 #else
  97
  98     mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
  99     mul1 = (q31_t) ((q15_t) inA1 * (q15_t) (inB1 >> 16));
 100     mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) inB1);
 101     mul3 = (q31_t) ((q15_t) inA2 * (q15_t) inB1);
 102
 103 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
 104
 105     /* saturate the result */
 106     out1 = (q15_t) __SSAT(mul1 >> 15U, 16);
 107     out2 = (q15_t) __SSAT(mul2 >> 15U, 16);
 108     out3 = (q15_t) __SSAT(mul3 >> 15U, 16);
 109     out4 = (q15_t) __SSAT(mul4 >> 15U, 16);
 110
 111     /* pack real and imaginary outputs and store them to destination */
 112     *__SIMD32(pCmplxDst)++ = __PKHBT(out1, out2, 16);
 113     *__SIMD32(pCmplxDst)++ = __PKHBT(out3, out4, 16);
 114
 115     inA1 = *__SIMD32(pSrcCmplx)++;
 116     inB1 = *__SIMD32(pSrcReal)++;
 117     inA2 = *__SIMD32(pSrcCmplx)++;
 118
 119 #ifndef ARM_MATH_BIG_ENDIAN
 120
 121     mul1 = (q31_t) ((q15_t) (inA1) * (q15_t) (inB1));
 122     mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1));
 123     mul3 = (q31_t) ((q15_t) (inA2) * (q15_t) (inB1 >> 16));
 124     mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB1 >> 16));
 125
 126 #else
 127
 128     mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
 129     mul1 = (q31_t) ((q15_t) inA1 * (q15_t) (inB1 >> 16));
 130     mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) inB1);
 131     mul3 = (q31_t) ((q15_t) inA2 * (q15_t) inB1);
 132
 133 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
 134
 135     out1 = (q15_t) __SSAT(mul1 >> 15U, 16);
 136     out2 = (q15_t) __SSAT(mul2 >> 15U, 16);
 137     out3 = (q15_t) __SSAT(mul3 >> 15U, 16);
 138     out4 = (q15_t) __SSAT(mul4 >> 15U, 16);
 139
 140     *__SIMD32(pCmplxDst)++ = __PKHBT(out1, out2, 16);
 141     *__SIMD32(pCmplxDst)++ = __PKHBT(out3, out4, 16);
 142
 143     /* Decrement the numSamples loop counter */
 144     blkCnt--;
 145   }
 146
 147   /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
 148    ** No loop unrolling is used. */
 149   blkCnt = numSamples % 0x4U;
 150
 151   while (blkCnt > 0U)
 152   {
 153     /* C[2 * i] = A[2 * i] * B[i].            */
 154     /* C[2 * i + 1] = A[2 * i + 1] * B[i].        */
 155     in = *pSrcReal++;
 156     /* store the result in the destination buffer. */
 157     *pCmplxDst++ =
 158       (q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16);
 159     *pCmplxDst++ =
 160       (q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16);
 161
 162     /* Decrement the numSamples loop counter */
 163     blkCnt--;
 164   }
 165
 166 #else
 167
 168   /* Run the below code for Cortex-M0 */
 169
 170   while (numSamples > 0U)
 171   {
 172     /* realOut = realA * realB.            */
 173     /* imagOut = imagA * realB.                */
 174     in = *pSrcReal++;
 175     /* store the result in the destination buffer. */
 176     *pCmplxDst++ =
 177       (q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16);
 178     *pCmplxDst++ =
 179       (q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16);
 180
 181     /* Decrement the numSamples loop counter */
 182     numSamples--;
 183   }
 184
 185 #endif /* #if defined (ARM_MATH_DSP) */
 186
 187 }
 188
 189 /**
 190  * @} end of CmplxByRealMult group
 191  */