1 /* { dg-require-effective-target vect_int } */
2 /* { dg-require-effective-target vect_perm } */
38 #define N (VECTOR_BITS * 5 / 32)
43 void foo (unsigned int *__restrict__ pInput
, unsigned int *__restrict__ pOutput
)
45 unsigned int i
, a
, b
, c
, d
, e
;
47 for (i
= 0; i
< N
/ 5; i
++)
55 *pOutput
++ = M00
* a
+ M01
* b
+ M02
* c
+ M03
* d
+ M04
* e
;
56 *pOutput
++ = M10
* a
+ M11
* b
+ M12
* c
+ M13
* d
+ M14
* e
;
57 *pOutput
++ = M20
* a
+ M21
* b
+ M22
* c
+ M23
* d
+ M24
* e
;
58 *pOutput
++ = M30
* a
+ M31
* b
+ M32
* c
+ M33
* d
+ M34
* e
;
59 *pOutput
++ = M40
* a
+ M41
* b
+ M42
* c
+ M43
* d
+ M44
* e
;
63 int main (int argc
, const char* argv
[])
65 unsigned int input
[N
], output
[N
], i
;
69 for (i
= 0; i
< N
; i
++)
73 asm volatile ("" ::: "memory");
77 unsigned int check_results
[N
]
78 = {3208, 1334, 28764, 35679, 2789, 13028, 4754, 168364, 91254, 12399,
79 22848, 8174, 307964, 146829, 22009, 32668, 11594, 447564, 202404, 31619};
81 volatile unsigned int check_results
[N
];
83 for (i
= 0; i
< N
/ 5; i
++)
85 unsigned int a
= input
[i
* 5];
86 unsigned int b
= input
[i
* 5 + 1];
87 unsigned int c
= input
[i
* 5 + 2];
88 unsigned int d
= input
[i
* 5 + 3];
89 unsigned int e
= input
[i
* 5 + 4];
91 check_results
[i
* 5] = M00
* a
+ M01
* b
+ M02
* c
+ M03
* d
+ M04
* e
;
92 check_results
[i
* 5 + 1] = (M10
* a
+ M11
* b
+ M12
* c
94 check_results
[i
* 5 + 2] = (M20
* a
+ M21
* b
+ M22
* c
96 check_results
[i
* 5 + 3] = (M30
* a
+ M31
* b
+ M32
* c
98 check_results
[i
* 5 + 4] = (M40
* a
+ M41
* b
+ M42
* c
107 for (i
= 0; i
< N
; i
++)
108 if (output
[i
] != check_results
[i
])
114 /* Currently interleaving is not supported for a group-size of 5. */
116 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
117 /* { dg-final { scan-tree-dump-times "gaps requires scalar epilogue loop" 0 "vect" } } */
118 /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */