1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
5 declare <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
6 declare <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
7 declare <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
8 declare <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
10 declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
11 declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
13 ; Tests showing replacement of variable rotates with immediate splat versions.
15 define { <16 x i32>, <16 x i32>, <16 x i32> } @test_splat_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
16 ; KNL-LABEL: test_splat_rol_v16i32:
18 ; KNL-NEXT: vmovdqa64 %zmm1, %zmm3
19 ; KNL-NEXT: kmovw %edi, %k1
20 ; KNL-NEXT: vprold $5, %zmm0, %zmm3 {%k1}
21 ; KNL-NEXT: vprold $6, %zmm0, %zmm1 {%k1} {z}
22 ; KNL-NEXT: vprold $7, %zmm0, %zmm2
23 ; KNL-NEXT: vmovdqa64 %zmm3, %zmm0
26 ; SKX-LABEL: test_splat_rol_v16i32:
28 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm3
29 ; SKX-NEXT: kmovd %edi, %k1
30 ; SKX-NEXT: vprold $5, %zmm0, %zmm3 {%k1}
31 ; SKX-NEXT: vprold $6, %zmm0, %zmm1 {%k1} {z}
32 ; SKX-NEXT: vprold $7, %zmm0, %zmm2
33 ; SKX-NEXT: vmovdqa64 %zmm3, %zmm0
35 %res0 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2)
36 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>, <16 x i32> zeroinitializer, i16 %x2)
37 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>, <16 x i32> %x1, i16 -1)
38 %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0
39 %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res3, <16 x i32> %res1, 1
40 %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res4, <16 x i32> %res2, 2
41 ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5
44 define { <8 x i64>, <8 x i64>, <8 x i64> } @test_splat_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
45 ; KNL-LABEL: test_splat_rol_v8i64:
47 ; KNL-NEXT: vmovdqa64 %zmm1, %zmm3
48 ; KNL-NEXT: kmovw %edi, %k1
49 ; KNL-NEXT: vprolq $5, %zmm0, %zmm3 {%k1}
50 ; KNL-NEXT: vprolq $6, %zmm0, %zmm1 {%k1} {z}
51 ; KNL-NEXT: vprolq $7, %zmm0, %zmm2
52 ; KNL-NEXT: vmovdqa64 %zmm3, %zmm0
55 ; SKX-LABEL: test_splat_rol_v8i64:
57 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm3
58 ; SKX-NEXT: kmovd %edi, %k1
59 ; SKX-NEXT: vprolq $5, %zmm0, %zmm3 {%k1}
60 ; SKX-NEXT: vprolq $6, %zmm0, %zmm1 {%k1} {z}
61 ; SKX-NEXT: vprolq $7, %zmm0, %zmm2
62 ; SKX-NEXT: vmovdqa64 %zmm3, %zmm0
64 %res0 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2)
65 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6>, <8 x i64> zeroinitializer, i8 %x2)
66 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>, <8 x i64> %x1, i8 -1)
67 %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0
68 %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res3, <8 x i64> %res1, 1
69 %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res4, <8 x i64> %res2, 2
70 ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5
73 define { <16 x i32>, <16 x i32>, <16 x i32> } @test_splat_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
74 ; KNL-LABEL: test_splat_ror_v16i32:
76 ; KNL-NEXT: vmovdqa64 %zmm1, %zmm3
77 ; KNL-NEXT: kmovw %edi, %k1
78 ; KNL-NEXT: vprord $5, %zmm0, %zmm3 {%k1}
79 ; KNL-NEXT: vprord $6, %zmm0, %zmm1 {%k1} {z}
80 ; KNL-NEXT: vprord $7, %zmm0, %zmm2
81 ; KNL-NEXT: vmovdqa64 %zmm3, %zmm0
84 ; SKX-LABEL: test_splat_ror_v16i32:
86 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm3
87 ; SKX-NEXT: kmovd %edi, %k1
88 ; SKX-NEXT: vprord $5, %zmm0, %zmm3 {%k1}
89 ; SKX-NEXT: vprord $6, %zmm0, %zmm1 {%k1} {z}
90 ; SKX-NEXT: vprord $7, %zmm0, %zmm2
91 ; SKX-NEXT: vmovdqa64 %zmm3, %zmm0
93 %res0 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2)
94 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>, <16 x i32> zeroinitializer, i16 %x2)
95 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>, <16 x i32> %x1, i16 -1)
96 %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0
97 %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res3, <16 x i32> %res1, 1
98 %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res4, <16 x i32> %res2, 2
99 ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5
102 define { <8 x i64>, <8 x i64>, <8 x i64> } @test_splat_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
103 ; KNL-LABEL: test_splat_ror_v8i64:
105 ; KNL-NEXT: vmovdqa64 %zmm1, %zmm3
106 ; KNL-NEXT: kmovw %edi, %k1
107 ; KNL-NEXT: vprorq $5, %zmm0, %zmm3 {%k1}
108 ; KNL-NEXT: vprorq $6, %zmm0, %zmm1 {%k1} {z}
109 ; KNL-NEXT: vprorq $7, %zmm0, %zmm2
110 ; KNL-NEXT: vmovdqa64 %zmm3, %zmm0
113 ; SKX-LABEL: test_splat_ror_v8i64:
115 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm3
116 ; SKX-NEXT: kmovd %edi, %k1
117 ; SKX-NEXT: vprorq $5, %zmm0, %zmm3 {%k1}
118 ; SKX-NEXT: vprorq $6, %zmm0, %zmm1 {%k1} {z}
119 ; SKX-NEXT: vprorq $7, %zmm0, %zmm2
120 ; SKX-NEXT: vmovdqa64 %zmm3, %zmm0
122 %res0 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2)
123 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6>, <8 x i64> zeroinitializer, i8 %x2)
124 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>, <8 x i64> %x1, i8 -1)
125 %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0
126 %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res3, <8 x i64> %res1, 1
127 %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res4, <8 x i64> %res2, 2
128 ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5
131 ; Tests showing replacement of out-of-bounds variable rotates with in-bounds immediate splat versions.
133 define { <16 x i32>, <16 x i32>, <16 x i32> } @test_splat_bounds_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
134 ; KNL-LABEL: test_splat_bounds_rol_v16i32:
136 ; KNL-NEXT: vmovdqa64 %zmm1, %zmm3
137 ; KNL-NEXT: kmovw %edi, %k1
138 ; KNL-NEXT: vprold $1, %zmm0, %zmm3 {%k1}
139 ; KNL-NEXT: vprold $31, %zmm0, %zmm1 {%k1} {z}
140 ; KNL-NEXT: vprold $30, %zmm0, %zmm2
141 ; KNL-NEXT: vmovdqa64 %zmm3, %zmm0
144 ; SKX-LABEL: test_splat_bounds_rol_v16i32:
146 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm3
147 ; SKX-NEXT: kmovd %edi, %k1
148 ; SKX-NEXT: vprold $1, %zmm0, %zmm3 {%k1}
149 ; SKX-NEXT: vprold $31, %zmm0, %zmm1 {%k1} {z}
150 ; SKX-NEXT: vprold $30, %zmm0, %zmm2
151 ; SKX-NEXT: vmovdqa64 %zmm3, %zmm0
153 %res0 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2)
154 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> zeroinitializer, i16 %x2)
155 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534>, <16 x i32> %x1, i16 -1)
156 %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0
157 %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res3, <16 x i32> %res1, 1
158 %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res4, <16 x i32> %res2, 2
159 ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5
162 define { <8 x i64>, <8 x i64>, <8 x i64> } @test_splat_bounds_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
163 ; KNL-LABEL: test_splat_bounds_rol_v8i64:
165 ; KNL-NEXT: vmovdqa64 %zmm1, %zmm3
166 ; KNL-NEXT: kmovw %edi, %k1
167 ; KNL-NEXT: vprolq $62, %zmm0, %zmm3 {%k1}
168 ; KNL-NEXT: vprolq $1, %zmm0, %zmm1 {%k1} {z}
169 ; KNL-NEXT: vprolq $63, %zmm0, %zmm2
170 ; KNL-NEXT: vmovdqa64 %zmm3, %zmm0
173 ; SKX-LABEL: test_splat_bounds_rol_v8i64:
175 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm3
176 ; SKX-NEXT: kmovd %edi, %k1
177 ; SKX-NEXT: vprolq $62, %zmm0, %zmm3 {%k1}
178 ; SKX-NEXT: vprolq $1, %zmm0, %zmm1 {%k1} {z}
179 ; SKX-NEXT: vprolq $63, %zmm0, %zmm2
180 ; SKX-NEXT: vmovdqa64 %zmm3, %zmm0
182 %res0 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2)
183 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65>, <8 x i64> zeroinitializer, i8 %x2)
184 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x1, i8 -1)
185 %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0
186 %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res3, <8 x i64> %res1, 1
187 %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res4, <8 x i64> %res2, 2
188 ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5
191 define { <16 x i32>, <16 x i32>, <16 x i32> } @test_splat_bounds_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
192 ; KNL-LABEL: test_splat_bounds_ror_v16i32:
194 ; KNL-NEXT: vmovdqa64 %zmm1, %zmm3
195 ; KNL-NEXT: kmovw %edi, %k1
196 ; KNL-NEXT: vprord $1, %zmm0, %zmm3 {%k1}
197 ; KNL-NEXT: vprord $31, %zmm0, %zmm1 {%k1} {z}
198 ; KNL-NEXT: vprord $30, %zmm0, %zmm2
199 ; KNL-NEXT: vmovdqa64 %zmm3, %zmm0
202 ; SKX-LABEL: test_splat_bounds_ror_v16i32:
204 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm3
205 ; SKX-NEXT: kmovd %edi, %k1
206 ; SKX-NEXT: vprord $1, %zmm0, %zmm3 {%k1}
207 ; SKX-NEXT: vprord $31, %zmm0, %zmm1 {%k1} {z}
208 ; SKX-NEXT: vprord $30, %zmm0, %zmm2
209 ; SKX-NEXT: vmovdqa64 %zmm3, %zmm0
211 %res0 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2)
212 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> zeroinitializer, i16 %x2)
213 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534>, <16 x i32> %x1, i16 -1)
214 %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0
215 %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res3, <16 x i32> %res1, 1
216 %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res4, <16 x i32> %res2, 2
217 ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5
220 define { <8 x i64>, <8 x i64>, <8 x i64> } @test_splat_bounds_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
221 ; KNL-LABEL: test_splat_bounds_ror_v8i64:
223 ; KNL-NEXT: vmovdqa64 %zmm1, %zmm3
224 ; KNL-NEXT: kmovw %edi, %k1
225 ; KNL-NEXT: vprorq $62, %zmm0, %zmm3 {%k1}
226 ; KNL-NEXT: vprorq $1, %zmm0, %zmm1 {%k1} {z}
227 ; KNL-NEXT: vprorq $63, %zmm0, %zmm2
228 ; KNL-NEXT: vmovdqa64 %zmm3, %zmm0
231 ; SKX-LABEL: test_splat_bounds_ror_v8i64:
233 ; SKX-NEXT: vmovdqa64 %zmm1, %zmm3
234 ; SKX-NEXT: kmovd %edi, %k1
235 ; SKX-NEXT: vprorq $62, %zmm0, %zmm3 {%k1}
236 ; SKX-NEXT: vprorq $1, %zmm0, %zmm1 {%k1} {z}
237 ; SKX-NEXT: vprorq $63, %zmm0, %zmm2
238 ; SKX-NEXT: vmovdqa64 %zmm3, %zmm0
240 %res0 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2)
241 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65>, <8 x i64> zeroinitializer, i8 %x2)
242 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x1, i8 -1)
243 %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0
244 %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res3, <8 x i64> %res1, 1
245 %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res4, <8 x i64> %res2, 2
246 ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5
250 ; We also test with a target shuffle so that this can't be constant folded upon creation, it must
251 ; wait until the target shuffle has been constant folded in combineX86ShufflesRecursively.
253 define <8 x i64> @test_fold_rol_v8i64() {
254 ; CHECK-LABEL: test_fold_rol_v8i64:
256 ; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [1,2,4,9223372036854775808,2,4611686018427387904,9223372036854775808,9223372036854775808]
258 %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, <8 x i64> <i64 0, i64 1, i64 2, i64 63, i64 65, i64 65534, i64 65535, i64 -1>, <8 x i64> zeroinitializer, i8 -1)
262 define <16 x i32> @test_fold_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1) {
263 ; CHECK-LABEL: test_fold_rol_v16i32:
265 ; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
266 ; CHECK-NEXT: vprolvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
268 %res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <16 x i32> zeroinitializer, i16 -1)
269 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %res0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x i32> zeroinitializer, i16 -1)
273 define <8 x i64> @test_fold_ror_v8i64() {
274 ; CHECK-LABEL: test_fold_ror_v8i64:
276 ; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1]
277 ; CHECK-NEXT: vprorvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
279 %res0 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, <8 x i64> zeroinitializer, i8 -1)
280 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %res0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, <8 x i64> zeroinitializer, i8 -1)
284 define <16 x i32> @test_fold_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1) {
285 ; CHECK-LABEL: test_fold_ror_v16i32:
287 ; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
288 ; CHECK-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
290 %res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <16 x i32> zeroinitializer, i16 -1)
291 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %res0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x i32> zeroinitializer, i16 -1)