1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cmplx_mult_cmplx_q31.c
4 * Description: Q31 complex-by-complex multiplication
6 * $Date: 27. January 2017
9 * Target Processor: Cortex-M cores
10 * -------------------------------------------------------------------- */
12 * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
14 * SPDX-License-Identifier: Apache-2.0
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
20 * www.apache.org/licenses/LICENSE-2.0
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
32 * @ingroup groupCmplxMath
36 * @addtogroup CmplxByCmplxMult
42 * @brief Q31 complex-by-complex multiplication
43 * @param[in] *pSrcA points to the first input vector
44 * @param[in] *pSrcB points to the second input vector
45 * @param[out] *pDst points to the output vector
46 * @param[in] numSamples number of complex samples in each vector
49 * <b>Scaling and Overflow Behavior:</b>
51 * The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
52 * Input down scaling is not required.
55 void arm_cmplx_mult_cmplx_q31(
61 q31_t a
, b
, c
, d
; /* Temporary variables to store real and imaginary values */
62 uint32_t blkCnt
; /* loop counters */
63 q31_t mul1
, mul2
, mul3
, mul4
;
66 #if defined (ARM_MATH_DSP)
68 /* Run the below code for Cortex-M4 and Cortex-M3 */
71 blkCnt
= numSamples
>> 2U;
73 /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
74 ** a second loop below computes the remaining 1 to 3 samples. */
77 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
78 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
84 mul1
= (q31_t
) (((q63_t
) a
* c
) >> 32);
85 mul2
= (q31_t
) (((q63_t
) b
* d
) >> 32);
86 mul3
= (q31_t
) (((q63_t
) a
* d
) >> 32);
87 mul4
= (q31_t
) (((q63_t
) b
* c
) >> 32);
97 /* store the real result in 3.29 format in the destination buffer. */
99 /* store the imag result in 3.29 format in the destination buffer. */
107 mul1
= (q31_t
) (((q63_t
) a
* c
) >> 32);
108 mul2
= (q31_t
) (((q63_t
) b
* d
) >> 32);
109 mul3
= (q31_t
) (((q63_t
) a
* d
) >> 32);
110 mul4
= (q31_t
) (((q63_t
) b
* c
) >> 32);
120 /* store the real result in 3.29 format in the destination buffer. */
122 /* store the imag result in 3.29 format in the destination buffer. */
130 mul1
= (q31_t
) (((q63_t
) a
* c
) >> 32);
131 mul2
= (q31_t
) (((q63_t
) b
* d
) >> 32);
132 mul3
= (q31_t
) (((q63_t
) a
* d
) >> 32);
133 mul4
= (q31_t
) (((q63_t
) b
* c
) >> 32);
143 /* store the real result in 3.29 format in the destination buffer. */
145 /* store the imag result in 3.29 format in the destination buffer. */
153 mul1
= (q31_t
) (((q63_t
) a
* c
) >> 32);
154 mul2
= (q31_t
) (((q63_t
) b
* d
) >> 32);
155 mul3
= (q31_t
) (((q63_t
) a
* d
) >> 32);
156 mul4
= (q31_t
) (((q63_t
) b
* c
) >> 32);
166 /* store the real result in 3.29 format in the destination buffer. */
168 /* store the imag result in 3.29 format in the destination buffer. */
171 /* Decrement the blockSize loop counter */
175 /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
176 ** No loop unrolling is used. */
177 blkCnt
= numSamples
% 0x4U
;
181 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
182 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
188 mul1
= (q31_t
) (((q63_t
) a
* c
) >> 32);
189 mul2
= (q31_t
) (((q63_t
) b
* d
) >> 32);
190 mul3
= (q31_t
) (((q63_t
) a
* d
) >> 32);
191 mul4
= (q31_t
) (((q63_t
) b
* c
) >> 32);
201 /* store the real result in 3.29 format in the destination buffer. */
203 /* store the imag result in 3.29 format in the destination buffer. */
206 /* Decrement the blockSize loop counter */
212 /* Run the below code for Cortex-M0 */
215 blkCnt
= numSamples
>> 1U;
217 /* First part of the processing with loop unrolling. Compute 2 outputs at a time.
218 ** a second loop below computes the remaining 1 sample. */
221 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
222 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
228 mul1
= (q31_t
) (((q63_t
) a
* c
) >> 32);
229 mul2
= (q31_t
) (((q63_t
) b
* d
) >> 32);
230 mul3
= (q31_t
) (((q63_t
) a
* d
) >> 32);
231 mul4
= (q31_t
) (((q63_t
) b
* c
) >> 32);
241 /* store the real result in 3.29 format in the destination buffer. */
243 /* store the imag result in 3.29 format in the destination buffer. */
251 mul1
= (q31_t
) (((q63_t
) a
* c
) >> 32);
252 mul2
= (q31_t
) (((q63_t
) b
* d
) >> 32);
253 mul3
= (q31_t
) (((q63_t
) a
* d
) >> 32);
254 mul4
= (q31_t
) (((q63_t
) b
* c
) >> 32);
264 /* store the real result in 3.29 format in the destination buffer. */
266 /* store the imag result in 3.29 format in the destination buffer. */
269 /* Decrement the blockSize loop counter */
273 /* If the blockSize is not a multiple of 2, compute any remaining output samples here.
274 ** No loop unrolling is used. */
275 blkCnt
= numSamples
% 0x2U
;
279 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
280 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
286 mul1
= (q31_t
) (((q63_t
) a
* c
) >> 32);
287 mul2
= (q31_t
) (((q63_t
) b
* d
) >> 32);
288 mul3
= (q31_t
) (((q63_t
) a
* d
) >> 32);
289 mul4
= (q31_t
) (((q63_t
) b
* c
) >> 32);
299 /* store the real result in 3.29 format in the destination buffer. */
301 /* store the imag result in 3.29 format in the destination buffer. */
304 /* Decrement the blockSize loop counter */
308 #endif /* #if defined (ARM_MATH_DSP) */
313 * @} end of CmplxByCmplxMult group