lib/main/CMSIS/DSP/Source/BasicMathFunctions/arm_shift_q7.c

   1 /* ----------------------------------------------------------------------
   2  * Project:      CMSIS DSP Library
   3  * Title:        arm_shift_q7.c
   4  * Description:  Processing function for the Q7 Shifting
   5  *
   6  * $Date:        27. January 2017
   7  * $Revision:    V.1.5.1
   8  *
   9  * Target Processor: Cortex-M cores
  10  * -------------------------------------------------------------------- */
  11 /*
  12  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
  13  *
  14  * SPDX-License-Identifier: Apache-2.0
  15  *
  16  * Licensed under the Apache License, Version 2.0 (the License); you may
  17  * not use this file except in compliance with the License.
  18  * You may obtain a copy of the License at
  19  *
  20  * www.apache.org/licenses/LICENSE-2.0
  21  *
  22  * Unless required by applicable law or agreed to in writing, software
  23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25  * See the License for the specific language governing permissions and
  26  * limitations under the License.
  27  */
  28
  29 #include "arm_math.h"
  30
  31 /**
  32  * @ingroup groupMath
  33  */
  34
  35 /**
  36  * @addtogroup shift
  37  * @{
  38  */
  39
  40
  41 /**
  42  * @brief  Shifts the elements of a Q7 vector a specified number of bits.
  43  * @param[in]  *pSrc points to the input vector
  44  * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
  45  * @param[out]  *pDst points to the output vector
  46  * @param[in]  blockSize number of samples in the vector
  47  * @return none.
  48  *
  49  * \par Conditions for optimum performance
  50  *  Input and output buffers should be aligned by 32-bit
  51  *
  52  *
  53  * <b>Scaling and Overflow Behavior:</b>
  54  * \par
  55  * The function uses saturating arithmetic.
  56  * Results outside of the allowable Q7 range [0x8 0x7F] will be saturated.
  57  */
  58
  59 void arm_shift_q7(
  60   q7_t * pSrc,
  61   int8_t shiftBits,
  62   q7_t * pDst,
  63   uint32_t blockSize)
  64 {
  65   uint32_t blkCnt;                               /* loop counter */
  66   uint8_t sign;                                  /* Sign of shiftBits */
  67
  68 #if defined (ARM_MATH_DSP)
  69
  70 /* Run the below code for Cortex-M4 and Cortex-M3 */
  71   q7_t in1;                                      /* Input value1 */
  72   q7_t in2;                                      /* Input value2 */
  73   q7_t in3;                                      /* Input value3 */
  74   q7_t in4;                                      /* Input value4 */
  75
  76
  77   /*loop Unrolling */
  78   blkCnt = blockSize >> 2U;
  79
  80   /* Getting the sign of shiftBits */
  81   sign = (shiftBits & 0x80);
  82
  83   /* If the shift value is positive then do right shift else left shift */
  84   if (sign == 0U)
  85   {
  86     /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
  87      ** a second loop below computes the remaining 1 to 3 samples. */
  88     while (blkCnt > 0U)
  89     {
  90       /* C = A << shiftBits */
  91       /* Read 4 inputs */
  92       in1 = *pSrc;
  93       in2 = *(pSrc + 1);
  94       in3 = *(pSrc + 2);
  95       in4 = *(pSrc + 3);
  96
  97       /* Store the Shifted result in the destination buffer in single cycle by packing the outputs */
  98       *__SIMD32(pDst)++ = __PACKq7(__SSAT((in1 << shiftBits), 8),
  99                                    __SSAT((in2 << shiftBits), 8),
 100                                    __SSAT((in3 << shiftBits), 8),
 101                                    __SSAT((in4 << shiftBits), 8));
 102       /* Update source pointer to process next sampels */
 103       pSrc += 4U;
 104
 105       /* Decrement the loop counter */
 106       blkCnt--;
 107     }
 108
 109     /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
 110      ** No loop unrolling is used. */
 111     blkCnt = blockSize % 0x4U;
 112
 113     while (blkCnt > 0U)
 114     {
 115       /* C = A << shiftBits */
 116       /* Shift the input and then store the result in the destination buffer. */
 117       *pDst++ = (q7_t) __SSAT((*pSrc++ << shiftBits), 8);
 118
 119       /* Decrement the loop counter */
 120       blkCnt--;
 121     }
 122   }
 123   else
 124   {
 125     shiftBits = -shiftBits;
 126     /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
 127      ** a second loop below computes the remaining 1 to 3 samples. */
 128     while (blkCnt > 0U)
 129     {
 130       /* C = A >> shiftBits */
 131       /* Read 4 inputs */
 132       in1 = *pSrc;
 133       in2 = *(pSrc + 1);
 134       in3 = *(pSrc + 2);
 135       in4 = *(pSrc + 3);
 136
 137       /* Store the Shifted result in the destination buffer in single cycle by packing the outputs */
 138       *__SIMD32(pDst)++ = __PACKq7((in1 >> shiftBits), (in2 >> shiftBits),
 139                                    (in3 >> shiftBits), (in4 >> shiftBits));
 140
 141
 142       pSrc += 4U;
 143
 144       /* Decrement the loop counter */
 145       blkCnt--;
 146     }
 147
 148     /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
 149      ** No loop unrolling is used. */
 150     blkCnt = blockSize % 0x4U;
 151
 152     while (blkCnt > 0U)
 153     {
 154       /* C = A >> shiftBits */
 155       /* Shift the input and then store the result in the destination buffer. */
 156       in1 = *pSrc++;
 157       *pDst++ = (in1 >> shiftBits);
 158
 159       /* Decrement the loop counter */
 160       blkCnt--;
 161     }
 162   }
 163
 164 #else
 165
 166   /* Run the below code for Cortex-M0 */
 167
 168   /* Getting the sign of shiftBits */
 169   sign = (shiftBits & 0x80);
 170
 171   /* If the shift value is positive then do right shift else left shift */
 172   if (sign == 0U)
 173   {
 174     /* Initialize blkCnt with number of samples */
 175     blkCnt = blockSize;
 176
 177     while (blkCnt > 0U)
 178     {
 179       /* C = A << shiftBits */
 180       /* Shift the input and then store the result in the destination buffer. */
 181       *pDst++ = (q7_t) __SSAT(((q15_t) * pSrc++ << shiftBits), 8);
 182
 183       /* Decrement the loop counter */
 184       blkCnt--;
 185     }
 186   }
 187   else
 188   {
 189     /* Initialize blkCnt with number of samples */
 190     blkCnt = blockSize;
 191
 192     while (blkCnt > 0U)
 193     {
 194       /* C = A >> shiftBits */
 195       /* Shift the input and then store the result in the destination buffer. */
 196       *pDst++ = (*pSrc++ >> -shiftBits);
 197
 198       /* Decrement the loop counter */
 199       blkCnt--;
 200     }
 201   }
 202
 203 #endif /* #if defined (ARM_MATH_DSP) */
 204 }
 205
 206 /**
 207  * @} end of shift group
 208  */