1 /* { dg-do compile } */
2 /* { dg-require-effective-target vect_double } */
3 /* { dg-additional-options "--param vect-epilogues-nomask=0" { target riscv*-*-* } } */
5 typedef struct { double re
, im
; } dcmlx_t
;
6 typedef struct { double re
[4], im
[4]; } dcmlx4_t
;
8 void foo_i2(dcmlx4_t dst
[], const dcmlx_t src
[], int n
)
10 for (int i
= 0; i
< n
; ++i
) {
11 dcmlx_t s00
= src
[i
*4+0];
12 dcmlx_t s01
= src
[i
*4+1];
13 dcmlx_t s02
= src
[i
*4+2];
14 dcmlx_t s03
= src
[i
*4+3];
16 dcmlx_t s10
= src
[i
*4+0+n
];
17 dcmlx_t s11
= src
[i
*4+1+n
];
18 dcmlx_t s12
= src
[i
*4+2+n
];
19 dcmlx_t s13
= src
[i
*4+3+n
];
21 dst
[i
*2+0].re
[0] = s00
.re
;
22 dst
[i
*2+0].re
[1] = s01
.re
;
23 dst
[i
*2+0].re
[2] = s02
.re
;
24 dst
[i
*2+0].re
[3] = s03
.re
;
25 dst
[i
*2+0].im
[0] = s00
.im
;
26 dst
[i
*2+0].im
[1] = s01
.im
;
27 dst
[i
*2+0].im
[2] = s02
.im
;
28 dst
[i
*2+0].im
[3] = s03
.im
;
30 dst
[i
*2+1].re
[0] = s10
.re
;
31 dst
[i
*2+1].re
[1] = s11
.re
;
32 dst
[i
*2+1].re
[2] = s12
.re
;
33 dst
[i
*2+1].re
[3] = s13
.re
;
34 dst
[i
*2+1].im
[0] = s10
.im
;
35 dst
[i
*2+1].im
[1] = s11
.im
;
36 dst
[i
*2+1].im
[2] = s12
.im
;
37 dst
[i
*2+1].im
[3] = s13
.im
;
41 /* The first step to produce optimal code is to appropriately detect the
42 load and store groups. */
43 /* { dg-final { scan-tree-dump "Detected interleaving load of size 8" "vect" } } */
44 /* { dg-final { scan-tree-dump "Detected interleaving store of size 16" "vect" } } */
45 /* We're not able to peel & apply re-aligning to make accesses well-aligned for !vect_hw_misalign,
46 but we could by peeling the stores for alignment and applying re-aligning loads. */
47 /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { ! vect_hw_misalign } } } } */
48 /* { dg-final { scan-tree-dump-not "gap of 6 elements" "vect" } } */