Merge pull request #10417 from HGLRC-T/HGLRCF405V2
[inav.git] / lib / main / CMSIS / DSP / Source / ComplexMathFunctions / arm_cmplx_mult_cmplx_q15.c
blob2869837d2fbdafc4219be4aa379e35e0cc57f1f6
1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cmplx_mult_cmplx_q15.c
4 * Description: Q15 complex-by-complex multiplication
6 * $Date: 27. January 2017
7 * $Revision: V.1.5.1
9 * Target Processor: Cortex-M cores
10 * -------------------------------------------------------------------- */
12 * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
14 * SPDX-License-Identifier: Apache-2.0
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
20 * www.apache.org/licenses/LICENSE-2.0
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
29 #include "arm_math.h"
31 /**
32 * @ingroup groupCmplxMath
35 /**
36 * @addtogroup CmplxByCmplxMult
37 * @{
40 /**
41 * @brief Q15 complex-by-complex multiplication
42 * @param[in] *pSrcA points to the first input vector
43 * @param[in] *pSrcB points to the second input vector
44 * @param[out] *pDst points to the output vector
45 * @param[in] numSamples number of complex samples in each vector
46 * @return none.
48 * <b>Scaling and Overflow Behavior:</b>
49 * \par
50 * The function implements 1.15 by 1.15 multiplications and finally output is converted into 3.13 format.
53 void arm_cmplx_mult_cmplx_q15(
54 q15_t * pSrcA,
55 q15_t * pSrcB,
56 q15_t * pDst,
57 uint32_t numSamples)
59 q15_t a, b, c, d; /* Temporary variables to store real and imaginary values */
61 #if defined (ARM_MATH_DSP)
63 /* Run the below code for Cortex-M4 and Cortex-M3 */
64 uint32_t blkCnt; /* loop counters */
66 /* loop Unrolling */
67 blkCnt = numSamples >> 2U;
69 /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
70 ** a second loop below computes the remaining 1 to 3 samples. */
71 while (blkCnt > 0U)
73 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
74 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
75 a = *pSrcA++;
76 b = *pSrcA++;
77 c = *pSrcB++;
78 d = *pSrcB++;
80 /* store the result in 3.13 format in the destination buffer. */
81 *pDst++ =
82 (q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
83 /* store the result in 3.13 format in the destination buffer. */
84 *pDst++ =
85 (q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
87 a = *pSrcA++;
88 b = *pSrcA++;
89 c = *pSrcB++;
90 d = *pSrcB++;
92 /* store the result in 3.13 format in the destination buffer. */
93 *pDst++ =
94 (q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
95 /* store the result in 3.13 format in the destination buffer. */
96 *pDst++ =
97 (q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
99 a = *pSrcA++;
100 b = *pSrcA++;
101 c = *pSrcB++;
102 d = *pSrcB++;
104 /* store the result in 3.13 format in the destination buffer. */
105 *pDst++ =
106 (q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
107 /* store the result in 3.13 format in the destination buffer. */
108 *pDst++ =
109 (q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
111 a = *pSrcA++;
112 b = *pSrcA++;
113 c = *pSrcB++;
114 d = *pSrcB++;
116 /* store the result in 3.13 format in the destination buffer. */
117 *pDst++ =
118 (q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
119 /* store the result in 3.13 format in the destination buffer. */
120 *pDst++ =
121 (q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
123 /* Decrement the blockSize loop counter */
124 blkCnt--;
127 /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
128 ** No loop unrolling is used. */
129 blkCnt = numSamples % 0x4U;
131 while (blkCnt > 0U)
133 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
134 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
135 a = *pSrcA++;
136 b = *pSrcA++;
137 c = *pSrcB++;
138 d = *pSrcB++;
140 /* store the result in 3.13 format in the destination buffer. */
141 *pDst++ =
142 (q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
143 /* store the result in 3.13 format in the destination buffer. */
144 *pDst++ =
145 (q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
147 /* Decrement the blockSize loop counter */
148 blkCnt--;
151 #else
153 /* Run the below code for Cortex-M0 */
155 while (numSamples > 0U)
157 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
158 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
159 a = *pSrcA++;
160 b = *pSrcA++;
161 c = *pSrcB++;
162 d = *pSrcB++;
164 /* store the result in 3.13 format in the destination buffer. */
165 *pDst++ =
166 (q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
167 /* store the result in 3.13 format in the destination buffer. */
168 *pDst++ =
169 (q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
171 /* Decrement the blockSize loop counter */
172 numSamples--;
175 #endif /* #if defined (ARM_MATH_DSP) */
180 * @} end of CmplxByCmplxMult group