1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
5 declare <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
6 declare <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
7 declare <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
8 declare <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
10 declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
11 declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
13 ; Tests showing replacement of variable rotates with immediate splat versions.
15 define <16 x i32> @test_splat_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
16 ; KNL-LABEL: test_splat_rol_v16i32:
18 ; KNL-NEXT: kmovw %edi, %k1
19 ; KNL-NEXT: vprold $5, %zmm0, %zmm1 {%k1}
20 ; KNL-NEXT: vprold $6, %zmm0, %zmm2 {%k1} {z}
21 ; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1
22 ; KNL-NEXT: vprold $7, %zmm0, %zmm0
23 ; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0
26 ; SKX-LABEL: test_splat_rol_v16i32:
28 ; SKX-NEXT: kmovd %edi, %k1
29 ; SKX-NEXT: vprold $5, %zmm0, %zmm1 {%k1}
30 ; SKX-NEXT: vprold $6, %zmm0, %zmm2 {%k1} {z}
31 ; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1
32 ; SKX-NEXT: vprold $7, %zmm0, %zmm0
33 ; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0
35 %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2)
36 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>, <16 x i32> zeroinitializer, i16 %x2)
37 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>, <16 x i32> %x1, i16 -1)
38 %res3 = add <16 x i32> %res, %res1
39 %res4 = add <16 x i32> %res3, %res2
43 define <8 x i64>@test_splat_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
44 ; KNL-LABEL: test_splat_rol_v8i64:
46 ; KNL-NEXT: kmovw %edi, %k1
47 ; KNL-NEXT: vprolq $5, %zmm0, %zmm1 {%k1}
48 ; KNL-NEXT: vprolq $6, %zmm0, %zmm2 {%k1} {z}
49 ; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1
50 ; KNL-NEXT: vprolq $7, %zmm0, %zmm0
51 ; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0
54 ; SKX-LABEL: test_splat_rol_v8i64:
56 ; SKX-NEXT: kmovd %edi, %k1
57 ; SKX-NEXT: vprolq $5, %zmm0, %zmm1 {%k1}
58 ; SKX-NEXT: vprolq $6, %zmm0, %zmm2 {%k1} {z}
59 ; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1
60 ; SKX-NEXT: vprolq $7, %zmm0, %zmm0
61 ; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0
63 %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2)
64 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6>, <8 x i64> zeroinitializer, i8 %x2)
65 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>, <8 x i64> %x1, i8 -1)
66 %res3 = add <8 x i64> %res, %res1
67 %res4 = add <8 x i64> %res3, %res2
71 define <16 x i32> @test_splat_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
72 ; KNL-LABEL: test_splat_ror_v16i32:
74 ; KNL-NEXT: kmovw %edi, %k1
75 ; KNL-NEXT: vprord $5, %zmm0, %zmm1 {%k1}
76 ; KNL-NEXT: vprord $6, %zmm0, %zmm2 {%k1} {z}
77 ; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1
78 ; KNL-NEXT: vprord $7, %zmm0, %zmm0
79 ; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0
82 ; SKX-LABEL: test_splat_ror_v16i32:
84 ; SKX-NEXT: kmovd %edi, %k1
85 ; SKX-NEXT: vprord $5, %zmm0, %zmm1 {%k1}
86 ; SKX-NEXT: vprord $6, %zmm0, %zmm2 {%k1} {z}
87 ; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1
88 ; SKX-NEXT: vprord $7, %zmm0, %zmm0
89 ; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0
91 %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2)
92 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>, <16 x i32> zeroinitializer, i16 %x2)
93 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>, <16 x i32> %x1, i16 -1)
94 %res3 = add <16 x i32> %res, %res1
95 %res4 = add <16 x i32> %res3, %res2
99 define <8 x i64>@test_splat_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
100 ; KNL-LABEL: test_splat_ror_v8i64:
102 ; KNL-NEXT: kmovw %edi, %k1
103 ; KNL-NEXT: vprorq $5, %zmm0, %zmm1 {%k1}
104 ; KNL-NEXT: vprorq $6, %zmm0, %zmm2 {%k1} {z}
105 ; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1
106 ; KNL-NEXT: vprorq $7, %zmm0, %zmm0
107 ; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0
110 ; SKX-LABEL: test_splat_ror_v8i64:
112 ; SKX-NEXT: kmovd %edi, %k1
113 ; SKX-NEXT: vprorq $5, %zmm0, %zmm1 {%k1}
114 ; SKX-NEXT: vprorq $6, %zmm0, %zmm2 {%k1} {z}
115 ; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1
116 ; SKX-NEXT: vprorq $7, %zmm0, %zmm0
117 ; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0
119 %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2)
120 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6>, <8 x i64> zeroinitializer, i8 %x2)
121 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>, <8 x i64> %x1, i8 -1)
122 %res3 = add <8 x i64> %res, %res1
123 %res4 = add <8 x i64> %res3, %res2
127 ; Tests showing replacement of out-of-bounds variable rotates with in-bounds immediate splat versions.
129 define <16 x i32> @test_splat_bounds_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
130 ; KNL-LABEL: test_splat_bounds_rol_v16i32:
132 ; KNL-NEXT: kmovw %edi, %k1
133 ; KNL-NEXT: vprold $1, %zmm0, %zmm1 {%k1}
134 ; KNL-NEXT: vprold $31, %zmm0, %zmm2 {%k1} {z}
135 ; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1
136 ; KNL-NEXT: vprold $30, %zmm0, %zmm0
137 ; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0
140 ; SKX-LABEL: test_splat_bounds_rol_v16i32:
142 ; SKX-NEXT: kmovd %edi, %k1
143 ; SKX-NEXT: vprold $1, %zmm0, %zmm1 {%k1}
144 ; SKX-NEXT: vprold $31, %zmm0, %zmm2 {%k1} {z}
145 ; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1
146 ; SKX-NEXT: vprold $30, %zmm0, %zmm0
147 ; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0
149 %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2)
150 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> zeroinitializer, i16 %x2)
151 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534>, <16 x i32> %x1, i16 -1)
152 %res3 = add <16 x i32> %res, %res1
153 %res4 = add <16 x i32> %res3, %res2
157 define <8 x i64>@test_splat_bounds_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
158 ; KNL-LABEL: test_splat_bounds_rol_v8i64:
160 ; KNL-NEXT: kmovw %edi, %k1
161 ; KNL-NEXT: vprolq $62, %zmm0, %zmm1 {%k1}
162 ; KNL-NEXT: vprolq $1, %zmm0, %zmm2 {%k1} {z}
163 ; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1
164 ; KNL-NEXT: vprolq $63, %zmm0, %zmm0
165 ; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0
168 ; SKX-LABEL: test_splat_bounds_rol_v8i64:
170 ; SKX-NEXT: kmovd %edi, %k1
171 ; SKX-NEXT: vprolq $62, %zmm0, %zmm1 {%k1}
172 ; SKX-NEXT: vprolq $1, %zmm0, %zmm2 {%k1} {z}
173 ; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1
174 ; SKX-NEXT: vprolq $63, %zmm0, %zmm0
175 ; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0
177 %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2)
178 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65>, <8 x i64> zeroinitializer, i8 %x2)
179 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x1, i8 -1)
180 %res3 = add <8 x i64> %res, %res1
181 %res4 = add <8 x i64> %res3, %res2
185 define <16 x i32> @test_splat_bounds_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
186 ; KNL-LABEL: test_splat_bounds_ror_v16i32:
188 ; KNL-NEXT: kmovw %edi, %k1
189 ; KNL-NEXT: vprord $1, %zmm0, %zmm1 {%k1}
190 ; KNL-NEXT: vprord $31, %zmm0, %zmm2 {%k1} {z}
191 ; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1
192 ; KNL-NEXT: vprord $30, %zmm0, %zmm0
193 ; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0
196 ; SKX-LABEL: test_splat_bounds_ror_v16i32:
198 ; SKX-NEXT: kmovd %edi, %k1
199 ; SKX-NEXT: vprord $1, %zmm0, %zmm1 {%k1}
200 ; SKX-NEXT: vprord $31, %zmm0, %zmm2 {%k1} {z}
201 ; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1
202 ; SKX-NEXT: vprord $30, %zmm0, %zmm0
203 ; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0
205 %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2)
206 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> zeroinitializer, i16 %x2)
207 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534>, <16 x i32> %x1, i16 -1)
208 %res3 = add <16 x i32> %res, %res1
209 %res4 = add <16 x i32> %res3, %res2
213 define <8 x i64>@test_splat_bounds_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
214 ; KNL-LABEL: test_splat_bounds_ror_v8i64:
216 ; KNL-NEXT: kmovw %edi, %k1
217 ; KNL-NEXT: vprorq $62, %zmm0, %zmm1 {%k1}
218 ; KNL-NEXT: vprorq $1, %zmm0, %zmm2 {%k1} {z}
219 ; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1
220 ; KNL-NEXT: vprorq $63, %zmm0, %zmm0
221 ; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0
224 ; SKX-LABEL: test_splat_bounds_ror_v8i64:
226 ; SKX-NEXT: kmovd %edi, %k1
227 ; SKX-NEXT: vprorq $62, %zmm0, %zmm1 {%k1}
228 ; SKX-NEXT: vprorq $1, %zmm0, %zmm2 {%k1} {z}
229 ; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1
230 ; SKX-NEXT: vprorq $63, %zmm0, %zmm0
231 ; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0
233 %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2)
234 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65>, <8 x i64> zeroinitializer, i8 %x2)
235 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x1, i8 -1)
236 %res3 = add <8 x i64> %res, %res1
237 %res4 = add <8 x i64> %res3, %res2
242 ; We also test with a target shuffle so that this can't be constant folded upon creation, it must
243 ; wait until the target shuffle has been constant folded in combineX86ShufflesRecursively.
245 define <8 x i64> @test_fold_rol_v8i64() {
246 ; CHECK-LABEL: test_fold_rol_v8i64:
248 ; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [1,2,4,9223372036854775808,2,4611686018427387904,9223372036854775808,9223372036854775808]
250 %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, <8 x i64> <i64 0, i64 1, i64 2, i64 63, i64 65, i64 65534, i64 65535, i64 -1>, <8 x i64> zeroinitializer, i8 -1)
254 define <16 x i32> @test_fold_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1) {
255 ; CHECK-LABEL: test_fold_rol_v16i32:
257 ; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
258 ; CHECK-NEXT: vprolvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
260 %res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <16 x i32> zeroinitializer, i16 -1)
261 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %res0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x i32> zeroinitializer, i16 -1)
265 define <8 x i64> @test_fold_ror_v8i64() {
266 ; CHECK-LABEL: test_fold_ror_v8i64:
268 ; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1]
269 ; CHECK-NEXT: vprorvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
271 %res0 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, <8 x i64> zeroinitializer, i8 -1)
272 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %res0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, <8 x i64> zeroinitializer, i8 -1)
276 define <16 x i32> @test_fold_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1) {
277 ; CHECK-LABEL: test_fold_ror_v16i32:
279 ; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
280 ; CHECK-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
282 %res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <16 x i32> zeroinitializer, i16 -1)
283 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %res0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x i32> zeroinitializer, i16 -1)