1 /* { dg-do compile } */
2 /* { dg-additional-options "-Ofast" } */
3 /* { dg-require-effective-target vect_double } */
5 void foo(double* restrict y
, const double* restrict x0
, const double* restrict x1
, int clen
)
8 double f00_re
= x0
[0+xi
+0];
9 double f10_re
= x1
[0+xi
+0];
10 double f01_re
= x0
[0+xi
+1];
11 double f11_re
= x1
[0+xi
+1];
12 double f00_im
= x0
[4+xi
+0];
13 double f10_im
= x1
[4+xi
+0];
14 double f01_im
= x0
[4+xi
+1];
15 double f11_im
= x1
[4+xi
+1];
16 int clen2
= (clen
+xi
) * 2;
18 double* y1
= &y
[clen2
];
20 for (int c
= 0; c
< clen2
; c
+= 8) {
21 // y0[c] = y0[c] - x0[c]*conj(f00) - x1[c]*conj(f10);
22 // y1[c] = y1[c] - x0[c]*conj(f01) - x1[c]*conj(f11);
24 for (int k
= 0; k
< 4; ++k
) {
25 double x0_re
= x0
[c
+0+k
];
26 double x0_im
= x0
[c
+4+k
];
27 double y0_re
= y0
[c
+0+k
];
28 double y0_im
= y0
[c
+4+k
];
29 double y1_re
= y1
[c
+0+k
];
30 double y1_im
= y1
[c
+4+k
];
31 y0_re
= y0_re
- x0_re
* f00_re
- x0_im
* f00_im
;
32 y0_im
= y0_im
+ x0_re
* f00_im
- x0_im
* f00_re
;
33 y1_re
= y1_re
- x0_re
* f01_re
- x0_im
* f01_im
;
34 y1_im
= y1_im
+ x0_re
* f01_im
- x0_im
* f01_re
;
35 double x1_re
= x1
[c
+0+k
];
36 double x1_im
= x1
[c
+4+k
];
37 y0_re
= y0_re
- x1_re
* f10_re
- x1_im
* f10_im
;
38 y0_im
= y0_im
+ x1_re
* f10_im
- x1_im
* f10_re
;
39 y1_re
= y1_re
- x1_re
* f11_re
- x1_im
* f11_im
;
40 y1_im
= y1_im
+ x1_re
* f11_im
- x1_im
* f11_re
;
49 /* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target { ! { vect_load_lanes && vect_strided8 } } } } } */
50 /* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" { target { ! { vect_load_lanes && vect_strided8 } } } } } */