before merging master
[inav.git] / lib / main / CMSIS / DSP / Source / ComplexMathFunctions / arm_cmplx_mult_cmplx_q31.c
blobb01c4f675d6c085f5b395646b74ad3cc355ab244
1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cmplx_mult_cmplx_q31.c
4 * Description: Q31 complex-by-complex multiplication
6 * $Date: 27. January 2017
7 * $Revision: V.1.5.1
9 * Target Processor: Cortex-M cores
10 * -------------------------------------------------------------------- */
12 * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
14 * SPDX-License-Identifier: Apache-2.0
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
20 * www.apache.org/licenses/LICENSE-2.0
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
29 #include "arm_math.h"
31 /**
32 * @ingroup groupCmplxMath
35 /**
36 * @addtogroup CmplxByCmplxMult
37 * @{
41 /**
42 * @brief Q31 complex-by-complex multiplication
43 * @param[in] *pSrcA points to the first input vector
44 * @param[in] *pSrcB points to the second input vector
45 * @param[out] *pDst points to the output vector
46 * @param[in] numSamples number of complex samples in each vector
47 * @return none.
49 * <b>Scaling and Overflow Behavior:</b>
50 * \par
51 * The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
52 * Input down scaling is not required.
55 void arm_cmplx_mult_cmplx_q31(
56 q31_t * pSrcA,
57 q31_t * pSrcB,
58 q31_t * pDst,
59 uint32_t numSamples)
61 q31_t a, b, c, d; /* Temporary variables to store real and imaginary values */
62 uint32_t blkCnt; /* loop counters */
63 q31_t mul1, mul2, mul3, mul4;
64 q31_t out1, out2;
66 #if defined (ARM_MATH_DSP)
68 /* Run the below code for Cortex-M4 and Cortex-M3 */
70 /* loop Unrolling */
71 blkCnt = numSamples >> 2U;
73 /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
74 ** a second loop below computes the remaining 1 to 3 samples. */
75 while (blkCnt > 0U)
77 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
78 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
79 a = *pSrcA++;
80 b = *pSrcA++;
81 c = *pSrcB++;
82 d = *pSrcB++;
84 mul1 = (q31_t) (((q63_t) a * c) >> 32);
85 mul2 = (q31_t) (((q63_t) b * d) >> 32);
86 mul3 = (q31_t) (((q63_t) a * d) >> 32);
87 mul4 = (q31_t) (((q63_t) b * c) >> 32);
89 mul1 = (mul1 >> 1);
90 mul2 = (mul2 >> 1);
91 mul3 = (mul3 >> 1);
92 mul4 = (mul4 >> 1);
94 out1 = mul1 - mul2;
95 out2 = mul3 + mul4;
97 /* store the real result in 3.29 format in the destination buffer. */
98 *pDst++ = out1;
99 /* store the imag result in 3.29 format in the destination buffer. */
100 *pDst++ = out2;
102 a = *pSrcA++;
103 b = *pSrcA++;
104 c = *pSrcB++;
105 d = *pSrcB++;
107 mul1 = (q31_t) (((q63_t) a * c) >> 32);
108 mul2 = (q31_t) (((q63_t) b * d) >> 32);
109 mul3 = (q31_t) (((q63_t) a * d) >> 32);
110 mul4 = (q31_t) (((q63_t) b * c) >> 32);
112 mul1 = (mul1 >> 1);
113 mul2 = (mul2 >> 1);
114 mul3 = (mul3 >> 1);
115 mul4 = (mul4 >> 1);
117 out1 = mul1 - mul2;
118 out2 = mul3 + mul4;
120 /* store the real result in 3.29 format in the destination buffer. */
121 *pDst++ = out1;
122 /* store the imag result in 3.29 format in the destination buffer. */
123 *pDst++ = out2;
125 a = *pSrcA++;
126 b = *pSrcA++;
127 c = *pSrcB++;
128 d = *pSrcB++;
130 mul1 = (q31_t) (((q63_t) a * c) >> 32);
131 mul2 = (q31_t) (((q63_t) b * d) >> 32);
132 mul3 = (q31_t) (((q63_t) a * d) >> 32);
133 mul4 = (q31_t) (((q63_t) b * c) >> 32);
135 mul1 = (mul1 >> 1);
136 mul2 = (mul2 >> 1);
137 mul3 = (mul3 >> 1);
138 mul4 = (mul4 >> 1);
140 out1 = mul1 - mul2;
141 out2 = mul3 + mul4;
143 /* store the real result in 3.29 format in the destination buffer. */
144 *pDst++ = out1;
145 /* store the imag result in 3.29 format in the destination buffer. */
146 *pDst++ = out2;
148 a = *pSrcA++;
149 b = *pSrcA++;
150 c = *pSrcB++;
151 d = *pSrcB++;
153 mul1 = (q31_t) (((q63_t) a * c) >> 32);
154 mul2 = (q31_t) (((q63_t) b * d) >> 32);
155 mul3 = (q31_t) (((q63_t) a * d) >> 32);
156 mul4 = (q31_t) (((q63_t) b * c) >> 32);
158 mul1 = (mul1 >> 1);
159 mul2 = (mul2 >> 1);
160 mul3 = (mul3 >> 1);
161 mul4 = (mul4 >> 1);
163 out1 = mul1 - mul2;
164 out2 = mul3 + mul4;
166 /* store the real result in 3.29 format in the destination buffer. */
167 *pDst++ = out1;
168 /* store the imag result in 3.29 format in the destination buffer. */
169 *pDst++ = out2;
171 /* Decrement the blockSize loop counter */
172 blkCnt--;
175 /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
176 ** No loop unrolling is used. */
177 blkCnt = numSamples % 0x4U;
179 while (blkCnt > 0U)
181 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
182 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
183 a = *pSrcA++;
184 b = *pSrcA++;
185 c = *pSrcB++;
186 d = *pSrcB++;
188 mul1 = (q31_t) (((q63_t) a * c) >> 32);
189 mul2 = (q31_t) (((q63_t) b * d) >> 32);
190 mul3 = (q31_t) (((q63_t) a * d) >> 32);
191 mul4 = (q31_t) (((q63_t) b * c) >> 32);
193 mul1 = (mul1 >> 1);
194 mul2 = (mul2 >> 1);
195 mul3 = (mul3 >> 1);
196 mul4 = (mul4 >> 1);
198 out1 = mul1 - mul2;
199 out2 = mul3 + mul4;
201 /* store the real result in 3.29 format in the destination buffer. */
202 *pDst++ = out1;
203 /* store the imag result in 3.29 format in the destination buffer. */
204 *pDst++ = out2;
206 /* Decrement the blockSize loop counter */
207 blkCnt--;
210 #else
212 /* Run the below code for Cortex-M0 */
214 /* loop Unrolling */
215 blkCnt = numSamples >> 1U;
217 /* First part of the processing with loop unrolling. Compute 2 outputs at a time.
218 ** a second loop below computes the remaining 1 sample. */
219 while (blkCnt > 0U)
221 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
222 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
223 a = *pSrcA++;
224 b = *pSrcA++;
225 c = *pSrcB++;
226 d = *pSrcB++;
228 mul1 = (q31_t) (((q63_t) a * c) >> 32);
229 mul2 = (q31_t) (((q63_t) b * d) >> 32);
230 mul3 = (q31_t) (((q63_t) a * d) >> 32);
231 mul4 = (q31_t) (((q63_t) b * c) >> 32);
233 mul1 = (mul1 >> 1);
234 mul2 = (mul2 >> 1);
235 mul3 = (mul3 >> 1);
236 mul4 = (mul4 >> 1);
238 out1 = mul1 - mul2;
239 out2 = mul3 + mul4;
241 /* store the real result in 3.29 format in the destination buffer. */
242 *pDst++ = out1;
243 /* store the imag result in 3.29 format in the destination buffer. */
244 *pDst++ = out2;
246 a = *pSrcA++;
247 b = *pSrcA++;
248 c = *pSrcB++;
249 d = *pSrcB++;
251 mul1 = (q31_t) (((q63_t) a * c) >> 32);
252 mul2 = (q31_t) (((q63_t) b * d) >> 32);
253 mul3 = (q31_t) (((q63_t) a * d) >> 32);
254 mul4 = (q31_t) (((q63_t) b * c) >> 32);
256 mul1 = (mul1 >> 1);
257 mul2 = (mul2 >> 1);
258 mul3 = (mul3 >> 1);
259 mul4 = (mul4 >> 1);
261 out1 = mul1 - mul2;
262 out2 = mul3 + mul4;
264 /* store the real result in 3.29 format in the destination buffer. */
265 *pDst++ = out1;
266 /* store the imag result in 3.29 format in the destination buffer. */
267 *pDst++ = out2;
269 /* Decrement the blockSize loop counter */
270 blkCnt--;
273 /* If the blockSize is not a multiple of 2, compute any remaining output samples here.
274 ** No loop unrolling is used. */
275 blkCnt = numSamples % 0x2U;
277 while (blkCnt > 0U)
279 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
280 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
281 a = *pSrcA++;
282 b = *pSrcA++;
283 c = *pSrcB++;
284 d = *pSrcB++;
286 mul1 = (q31_t) (((q63_t) a * c) >> 32);
287 mul2 = (q31_t) (((q63_t) b * d) >> 32);
288 mul3 = (q31_t) (((q63_t) a * d) >> 32);
289 mul4 = (q31_t) (((q63_t) b * c) >> 32);
291 mul1 = (mul1 >> 1);
292 mul2 = (mul2 >> 1);
293 mul3 = (mul3 >> 1);
294 mul4 = (mul4 >> 1);
296 out1 = mul1 - mul2;
297 out2 = mul3 + mul4;
299 /* store the real result in 3.29 format in the destination buffer. */
300 *pDst++ = out1;
301 /* store the imag result in 3.29 format in the destination buffer. */
302 *pDst++ = out2;
304 /* Decrement the blockSize loop counter */
305 blkCnt--;
308 #endif /* #if defined (ARM_MATH_DSP) */
313 * @} end of CmplxByCmplxMult group