lib/main/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q15.c

   1 /* ----------------------------------------------------------------------
   2  * Project:      CMSIS DSP Library
   3  * Title:        arm_cmplx_mag_squared_q15.c
   4  * Description:  Q15 complex magnitude squared
   5  *
   6  * $Date:        27. January 2017
   7  * $Revision:    V.1.5.1
   8  *
   9  * Target Processor: Cortex-M cores
  10  * -------------------------------------------------------------------- */
  11 /*
  12  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
  13  *
  14  * SPDX-License-Identifier: Apache-2.0
  15  *
  16  * Licensed under the Apache License, Version 2.0 (the License); you may
  17  * not use this file except in compliance with the License.
  18  * You may obtain a copy of the License at
  19  *
  20  * www.apache.org/licenses/LICENSE-2.0
  21  *
  22  * Unless required by applicable law or agreed to in writing, software
  23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25  * See the License for the specific language governing permissions and
  26  * limitations under the License.
  27  */
  28
  29 #include "arm_math.h"
  30
  31 /**
  32  * @ingroup groupCmplxMath
  33  */
  34
  35 /**
  36  * @addtogroup cmplx_mag_squared
  37  * @{
  38  */
  39
  40 /**
  41  * @brief  Q15 complex magnitude squared
  42  * @param  *pSrc points to the complex input vector
  43  * @param  *pDst points to the real output vector
  44  * @param  numSamples number of complex samples in the input vector
  45  * @return none.
  46  *
  47  * <b>Scaling and Overflow Behavior:</b>
  48  * \par
  49  * The function implements 1.15 by 1.15 multiplications and finally output is converted into 3.13 format.
  50  */
  51
  52 void arm_cmplx_mag_squared_q15(
  53   q15_t * pSrc,
  54   q15_t * pDst,
  55   uint32_t numSamples)
  56 {
  57   q31_t acc0, acc1;                              /* Accumulators */
  58
  59 #if defined (ARM_MATH_DSP)
  60
  61   /* Run the below code for Cortex-M4 and Cortex-M3 */
  62   uint32_t blkCnt;                               /* loop counter */
  63   q31_t in1, in2, in3, in4;
  64   q31_t acc2, acc3;
  65
  66   /*loop Unrolling */
  67   blkCnt = numSamples >> 2U;
  68
  69   /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
  70    ** a second loop below computes the remaining 1 to 3 samples. */
  71   while (blkCnt > 0U)
  72   {
  73     /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
  74     in1 = *__SIMD32(pSrc)++;
  75     in2 = *__SIMD32(pSrc)++;
  76     in3 = *__SIMD32(pSrc)++;
  77     in4 = *__SIMD32(pSrc)++;
  78
  79     acc0 = __SMUAD(in1, in1);
  80     acc1 = __SMUAD(in2, in2);
  81     acc2 = __SMUAD(in3, in3);
  82     acc3 = __SMUAD(in4, in4);
  83
  84     /* store the result in 3.13 format in the destination buffer. */
  85     *pDst++ = (q15_t) (acc0 >> 17);
  86     *pDst++ = (q15_t) (acc1 >> 17);
  87     *pDst++ = (q15_t) (acc2 >> 17);
  88     *pDst++ = (q15_t) (acc3 >> 17);
  89
  90     /* Decrement the loop counter */
  91     blkCnt--;
  92   }
  93
  94   /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
  95    ** No loop unrolling is used. */
  96   blkCnt = numSamples % 0x4U;
  97
  98   while (blkCnt > 0U)
  99   {
 100     /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
 101     in1 = *__SIMD32(pSrc)++;
 102     acc0 = __SMUAD(in1, in1);
 103
 104     /* store the result in 3.13 format in the destination buffer. */
 105     *pDst++ = (q15_t) (acc0 >> 17);
 106
 107     /* Decrement the loop counter */
 108     blkCnt--;
 109   }
 110
 111 #else
 112
 113   /* Run the below code for Cortex-M0 */
 114   q15_t real, imag;                              /* Temporary variables to store real and imaginary values */
 115
 116   while (numSamples > 0U)
 117   {
 118     /* out = ((real * real) + (imag * imag)) */
 119     real = *pSrc++;
 120     imag = *pSrc++;
 121     acc0 = (real * real);
 122     acc1 = (imag * imag);
 123     /* store the result in 3.13 format in the destination buffer. */
 124     *pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17);
 125
 126     /* Decrement the loop counter */
 127     numSamples--;
 128   }
 129
 130 #endif /* #if defined (ARM_MATH_DSP) */
 131
 132 }
 133
 134 /**
 135  * @} end of cmplx_mag_squared group
 136  */