1 /* { dg-require-effective-target vect_int } */
22 #define N (VECTOR_BITS * 3 / 32 + 4)
27 void foo (int *__restrict__ pInput
, int *__restrict__ pOutput
,
28 int *__restrict__ pInput2
, int *__restrict__ pOutput2
)
32 for (i
= 0; i
< N
/ 3; i
++)
41 *pOutput
++ = M00
* a
+ M01
* b
+ M02
* c
;
42 *pOutput
++ = M10
* a
+ M11
* b
+ M12
* c
;
43 *pOutput
++ = M20
* a
+ M21
* b
+ M22
* c
;
45 *pOutput2
++ = K00
* d
+ K01
* e
;
46 *pOutput2
++ = K10
* d
+ K11
* e
;
50 int main (int argc
, const char* argv
[])
52 int input
[N
], output
[N
], i
;
53 int input2
[N
], output2
[N
];
59 for (i
= 0; i
< N
; i
++)
65 __asm__
volatile ("");
69 int check_results
[N
] = { 1470, 395, 28271, 5958, 1655, 111653, 10446, 2915,
70 195035, 14934, 4175, 278417, 19422, 5435, 361799,
72 int check_results2
[N
] = { 4322, 135, 13776, 629, 23230, 1123, 32684, 1617,
73 42138, 2111, 0, 0, 0, 0, 0, 0 };
75 volatile int check_results
[N
] = {};
76 volatile int check_results2
[N
] = {};
78 for (int i
= 0; i
< N
/ 3; i
++)
81 int b
= input
[i
* 3 + 1];
82 int c
= input
[i
* 3 + 2];
83 int d
= input2
[i
* 2];
84 int e
= input2
[i
* 2 + 1];
86 check_results
[i
* 3] = M00
* a
+ M01
* b
+ M02
* c
;
87 check_results
[i
* 3 + 1] = M10
* a
+ M11
* b
+ M12
* c
;
88 check_results
[i
* 3 + 2] = M20
* a
+ M21
* b
+ M22
* c
;
90 check_results2
[i
* 2] = K00
* d
+ K01
* e
;
91 check_results2
[i
* 2 + 1] = K10
* d
+ K11
* e
;
93 asm volatile ("" ::: "memory");
97 foo (input
, output
, input2
, output2
);
100 for (i
= 0; i
< N
; i
++)
101 if (output
[i
] != check_results
[i
] || output2
[i
] != check_results2
[i
])
107 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */
108 /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm3_int || vect_load_lanes } } } } */
109 /* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
110 /* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */