2 * This program computes a simple vector dot product using hard
3 * wired input buffers of 128 samples each. These values are in
8 .include "testutils.inc"
11 // load buffer addresses into pointer regs
16 // number of loop iterations is
2^N with
r4|
=1<<N
17 // to process
128 samples need
64 iterations
19 LSETUP
( loop1
, loop1
) LC0
= P4;
22 // For now
, serialize two
32b loads.
23 // These should
be done in parallel with the dual mac.
25 R0 = [ I0
++ ];
R1 = [ I1
++ ];
27 loop1
: A1
+= R0.H
* R1.H
, A0
+= R0.
L * R1.
L ||
R0 = [ I0
++ ] ||
R1 = [ I1
++ ];
29 A1
+= R0.H
* R1.H
, A0
+= R0.
L * R1.
L;
31 // extract two partial results from accumulators
32 // and do final addition
35 DBGA
( R0.
L , 0x5600 );
// 0x00545600 = 0.002574 fract
36 DBGA
( R0.H
, 0x0054 );