1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
4 declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, <4 x i32>, <4 x float>, i8)
6 define <4 x float> @test_llvm_x86_avx2_gather_d_ps(i8* %b, <4 x i32> %iv, <4 x float> %mask) #0 {
7 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_ps:
8 ; CHECK: # %bb.0: # %entry
9 ; CHECK-NEXT: movq %rsp, %rax
10 ; CHECK-NEXT: movq $-1, %rcx
11 ; CHECK-NEXT: sarq $63, %rax
12 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
13 ; CHECK-NEXT: orq %rax, %rdi
14 ; CHECK-NEXT: vmovq %rax, %xmm3
15 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3
16 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
17 ; CHECK-NEXT: vgatherdps %xmm1, (%rdi,%xmm0), %xmm2
18 ; CHECK-NEXT: shlq $47, %rax
19 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
20 ; CHECK-NEXT: orq %rax, %rsp
23 %v = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> zeroinitializer, i8* %b, <4 x i32> %iv, <4 x float> %mask, i8 1)
27 declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, <2 x i64>, <4 x float>, i8)
29 define <4 x float> @test_llvm_x86_avx2_gather_q_ps(i8* %b, <2 x i64> %iv, <4 x float> %mask) #0 {
30 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_ps:
31 ; CHECK: # %bb.0: # %entry
32 ; CHECK-NEXT: movq %rsp, %rax
33 ; CHECK-NEXT: movq $-1, %rcx
34 ; CHECK-NEXT: sarq $63, %rax
35 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
36 ; CHECK-NEXT: orq %rax, %rdi
37 ; CHECK-NEXT: vmovq %rax, %xmm3
38 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3
39 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
40 ; CHECK-NEXT: vgatherqps %xmm1, (%rdi,%xmm0), %xmm2
41 ; CHECK-NEXT: shlq $47, %rax
42 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
43 ; CHECK-NEXT: orq %rax, %rsp
46 %v = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> zeroinitializer, i8* %b, <2 x i64> %iv, <4 x float> %mask, i8 1)
50 declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, <4 x i32>, <2 x double>, i8)
52 define <2 x double> @test_llvm_x86_avx2_gather_d_pd(i8* %b, <4 x i32> %iv, <2 x double> %mask) #0 {
53 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_pd:
54 ; CHECK: # %bb.0: # %entry
55 ; CHECK-NEXT: movq %rsp, %rax
56 ; CHECK-NEXT: movq $-1, %rcx
57 ; CHECK-NEXT: sarq $63, %rax
58 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
59 ; CHECK-NEXT: orq %rax, %rdi
60 ; CHECK-NEXT: vmovq %rax, %xmm3
61 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3
62 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
63 ; CHECK-NEXT: vgatherdpd %xmm1, (%rdi,%xmm0), %xmm2
64 ; CHECK-NEXT: shlq $47, %rax
65 ; CHECK-NEXT: vmovapd %xmm2, %xmm0
66 ; CHECK-NEXT: orq %rax, %rsp
69 %v = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> zeroinitializer, i8* %b, <4 x i32> %iv, <2 x double> %mask, i8 1)
73 declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, <2 x i64>, <2 x double>, i8)
75 define <2 x double> @test_llvm_x86_avx2_gather_q_pd(i8* %b, <2 x i64> %iv, <2 x double> %mask) #0 {
76 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_pd:
77 ; CHECK: # %bb.0: # %entry
78 ; CHECK-NEXT: movq %rsp, %rax
79 ; CHECK-NEXT: movq $-1, %rcx
80 ; CHECK-NEXT: sarq $63, %rax
81 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
82 ; CHECK-NEXT: orq %rax, %rdi
83 ; CHECK-NEXT: vmovq %rax, %xmm3
84 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3
85 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
86 ; CHECK-NEXT: vgatherqpd %xmm1, (%rdi,%xmm0), %xmm2
87 ; CHECK-NEXT: shlq $47, %rax
88 ; CHECK-NEXT: vmovapd %xmm2, %xmm0
89 ; CHECK-NEXT: orq %rax, %rsp
92 %v = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> zeroinitializer, i8* %b, <2 x i64> %iv, <2 x double> %mask, i8 1)
96 declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, <8 x i32>, <8 x float>, i8)
98 define <8 x float> @test_llvm_x86_avx2_gather_d_ps_256(i8* %b, <8 x i32> %iv, <8 x float> %mask) #0 {
99 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_ps_256:
100 ; CHECK: # %bb.0: # %entry
101 ; CHECK-NEXT: movq %rsp, %rax
102 ; CHECK-NEXT: movq $-1, %rcx
103 ; CHECK-NEXT: sarq $63, %rax
104 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
105 ; CHECK-NEXT: orq %rax, %rdi
106 ; CHECK-NEXT: vmovq %rax, %xmm3
107 ; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3
108 ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0
109 ; CHECK-NEXT: vgatherdps %ymm1, (%rdi,%ymm0), %ymm2
110 ; CHECK-NEXT: shlq $47, %rax
111 ; CHECK-NEXT: vmovaps %ymm2, %ymm0
112 ; CHECK-NEXT: orq %rax, %rsp
115 %v = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> zeroinitializer, i8* %b, <8 x i32> %iv, <8 x float> %mask, i8 1)
119 declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, <4 x i64>, <4 x float>, i8)
121 define <4 x float> @test_llvm_x86_avx2_gather_q_ps_256(i8* %b, <4 x i64> %iv, <4 x float> %mask) #0 {
122 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_ps_256:
123 ; CHECK: # %bb.0: # %entry
124 ; CHECK-NEXT: movq %rsp, %rax
125 ; CHECK-NEXT: movq $-1, %rcx
126 ; CHECK-NEXT: sarq $63, %rax
127 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
128 ; CHECK-NEXT: orq %rax, %rdi
129 ; CHECK-NEXT: vmovq %rax, %xmm3
130 ; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3
131 ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0
132 ; CHECK-NEXT: vgatherqps %xmm1, (%rdi,%ymm0), %xmm2
133 ; CHECK-NEXT: shlq $47, %rax
134 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
135 ; CHECK-NEXT: orq %rax, %rsp
136 ; CHECK-NEXT: vzeroupper
139 %v = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> zeroinitializer, i8* %b, <4 x i64> %iv, <4 x float> %mask, i8 1)
143 declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, <4 x i32>, <4 x double>, i8)
145 define <4 x double> @test_llvm_x86_avx2_gather_d_pd_256(i8* %b, <4 x i32> %iv, <4 x double> %mask) #0 {
146 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_pd_256:
147 ; CHECK: # %bb.0: # %entry
148 ; CHECK-NEXT: movq %rsp, %rax
149 ; CHECK-NEXT: movq $-1, %rcx
150 ; CHECK-NEXT: sarq $63, %rax
151 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
152 ; CHECK-NEXT: orq %rax, %rdi
153 ; CHECK-NEXT: vmovq %rax, %xmm3
154 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3
155 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
156 ; CHECK-NEXT: vgatherdpd %ymm1, (%rdi,%xmm0), %ymm2
157 ; CHECK-NEXT: shlq $47, %rax
158 ; CHECK-NEXT: vmovapd %ymm2, %ymm0
159 ; CHECK-NEXT: orq %rax, %rsp
162 %v = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> zeroinitializer, i8* %b, <4 x i32> %iv, <4 x double> %mask, i8 1)
166 declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, <4 x i64>, <4 x double>, i8)
168 define <4 x double> @test_llvm_x86_avx2_gather_q_pd_256(i8* %b, <4 x i64> %iv, <4 x double> %mask) #0 {
169 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_pd_256:
170 ; CHECK: # %bb.0: # %entry
171 ; CHECK-NEXT: movq %rsp, %rax
172 ; CHECK-NEXT: movq $-1, %rcx
173 ; CHECK-NEXT: sarq $63, %rax
174 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
175 ; CHECK-NEXT: orq %rax, %rdi
176 ; CHECK-NEXT: vmovq %rax, %xmm3
177 ; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3
178 ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0
179 ; CHECK-NEXT: vgatherqpd %ymm1, (%rdi,%ymm0), %ymm2
180 ; CHECK-NEXT: shlq $47, %rax
181 ; CHECK-NEXT: vmovapd %ymm2, %ymm0
182 ; CHECK-NEXT: orq %rax, %rsp
185 %v = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> zeroinitializer, i8* %b, <4 x i64> %iv, <4 x double> %mask, i8 1)
189 declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32>, i8)
191 define <4 x i32> @test_llvm_x86_avx2_gather_d_d(i8* %b, <4 x i32> %iv, <4 x i32> %mask) #0 {
192 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_d:
193 ; CHECK: # %bb.0: # %entry
194 ; CHECK-NEXT: movq %rsp, %rax
195 ; CHECK-NEXT: movq $-1, %rcx
196 ; CHECK-NEXT: sarq $63, %rax
197 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
198 ; CHECK-NEXT: orq %rax, %rdi
199 ; CHECK-NEXT: vmovq %rax, %xmm3
200 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3
201 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
202 ; CHECK-NEXT: vpgatherdd %xmm1, (%rdi,%xmm0), %xmm2
203 ; CHECK-NEXT: shlq $47, %rax
204 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0
205 ; CHECK-NEXT: orq %rax, %rsp
208 %v = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> zeroinitializer, i8* %b, <4 x i32> %iv, <4 x i32> %mask, i8 1)
212 declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, <2 x i64>, <4 x i32>, i8)
214 define <4 x i32> @test_llvm_x86_avx2_gather_q_d(i8* %b, <2 x i64> %iv, <4 x i32> %mask) #0 {
215 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_d:
216 ; CHECK: # %bb.0: # %entry
217 ; CHECK-NEXT: movq %rsp, %rax
218 ; CHECK-NEXT: movq $-1, %rcx
219 ; CHECK-NEXT: sarq $63, %rax
220 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
221 ; CHECK-NEXT: orq %rax, %rdi
222 ; CHECK-NEXT: vmovq %rax, %xmm3
223 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3
224 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
225 ; CHECK-NEXT: vpgatherqd %xmm1, (%rdi,%xmm0), %xmm2
226 ; CHECK-NEXT: shlq $47, %rax
227 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0
228 ; CHECK-NEXT: orq %rax, %rsp
231 %v = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> zeroinitializer, i8* %b, <2 x i64> %iv, <4 x i32> %mask, i8 1)
235 declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, <4 x i32>, <2 x i64>, i8)
237 define <2 x i64> @test_llvm_x86_avx2_gather_d_q(i8* %b, <4 x i32> %iv, <2 x i64> %mask) #0 {
238 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_q:
239 ; CHECK: # %bb.0: # %entry
240 ; CHECK-NEXT: movq %rsp, %rax
241 ; CHECK-NEXT: movq $-1, %rcx
242 ; CHECK-NEXT: sarq $63, %rax
243 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
244 ; CHECK-NEXT: orq %rax, %rdi
245 ; CHECK-NEXT: vmovq %rax, %xmm3
246 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3
247 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
248 ; CHECK-NEXT: vpgatherdq %xmm1, (%rdi,%xmm0), %xmm2
249 ; CHECK-NEXT: shlq $47, %rax
250 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0
251 ; CHECK-NEXT: orq %rax, %rsp
254 %v = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> zeroinitializer, i8* %b, <4 x i32> %iv, <2 x i64> %mask, i8 1)
258 declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, <2 x i64>, <2 x i64>, i8)
260 define <2 x i64> @test_llvm_x86_avx2_gather_q_q(i8* %b, <2 x i64> %iv, <2 x i64> %mask) #0 {
261 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_q:
262 ; CHECK: # %bb.0: # %entry
263 ; CHECK-NEXT: movq %rsp, %rax
264 ; CHECK-NEXT: movq $-1, %rcx
265 ; CHECK-NEXT: sarq $63, %rax
266 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
267 ; CHECK-NEXT: orq %rax, %rdi
268 ; CHECK-NEXT: vmovq %rax, %xmm3
269 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3
270 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
271 ; CHECK-NEXT: vpgatherqq %xmm1, (%rdi,%xmm0), %xmm2
272 ; CHECK-NEXT: shlq $47, %rax
273 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0
274 ; CHECK-NEXT: orq %rax, %rsp
277 %v = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> zeroinitializer, i8* %b, <2 x i64> %iv, <2 x i64> %mask, i8 1)
281 declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, <8 x i32>, <8 x i32>, i8)
283 define <8 x i32> @test_llvm_x86_avx2_gather_d_d_256(i8* %b, <8 x i32> %iv, <8 x i32> %mask) #0 {
284 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_d_256:
285 ; CHECK: # %bb.0: # %entry
286 ; CHECK-NEXT: movq %rsp, %rax
287 ; CHECK-NEXT: movq $-1, %rcx
288 ; CHECK-NEXT: sarq $63, %rax
289 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
290 ; CHECK-NEXT: orq %rax, %rdi
291 ; CHECK-NEXT: vmovq %rax, %xmm3
292 ; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3
293 ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0
294 ; CHECK-NEXT: vpgatherdd %ymm1, (%rdi,%ymm0), %ymm2
295 ; CHECK-NEXT: shlq $47, %rax
296 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0
297 ; CHECK-NEXT: orq %rax, %rsp
300 %v = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> zeroinitializer, i8* %b, <8 x i32> %iv, <8 x i32> %mask, i8 1)
304 declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, <4 x i64>, <4 x i32>, i8)
306 define <4 x i32> @test_llvm_x86_avx2_gather_q_d_256(i8* %b, <4 x i64> %iv, <4 x i32> %mask) #0 {
307 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_d_256:
308 ; CHECK: # %bb.0: # %entry
309 ; CHECK-NEXT: movq %rsp, %rax
310 ; CHECK-NEXT: movq $-1, %rcx
311 ; CHECK-NEXT: sarq $63, %rax
312 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
313 ; CHECK-NEXT: orq %rax, %rdi
314 ; CHECK-NEXT: vmovq %rax, %xmm3
315 ; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3
316 ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0
317 ; CHECK-NEXT: vpgatherqd %xmm1, (%rdi,%ymm0), %xmm2
318 ; CHECK-NEXT: shlq $47, %rax
319 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0
320 ; CHECK-NEXT: orq %rax, %rsp
321 ; CHECK-NEXT: vzeroupper
324 %v = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> zeroinitializer, i8* %b, <4 x i64> %iv, <4 x i32> %mask, i8 1)
328 declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, <4 x i32>, <4 x i64>, i8)
330 define <4 x i64> @test_llvm_x86_avx2_gather_d_q_256(i8* %b, <4 x i32> %iv, <4 x i64> %mask) #0 {
331 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_q_256:
332 ; CHECK: # %bb.0: # %entry
333 ; CHECK-NEXT: movq %rsp, %rax
334 ; CHECK-NEXT: movq $-1, %rcx
335 ; CHECK-NEXT: sarq $63, %rax
336 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
337 ; CHECK-NEXT: orq %rax, %rdi
338 ; CHECK-NEXT: vmovq %rax, %xmm3
339 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3
340 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
341 ; CHECK-NEXT: vpgatherdq %ymm1, (%rdi,%xmm0), %ymm2
342 ; CHECK-NEXT: shlq $47, %rax
343 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0
344 ; CHECK-NEXT: orq %rax, %rsp
347 %v = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> zeroinitializer, i8* %b, <4 x i32> %iv, <4 x i64> %mask, i8 1)
351 declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, <4 x i64>, <4 x i64>, i8)
353 define <4 x i64> @test_llvm_x86_avx2_gather_q_q_256(i8* %b, <4 x i64> %iv, <4 x i64> %mask) #0 {
354 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_q_256:
355 ; CHECK: # %bb.0: # %entry
356 ; CHECK-NEXT: movq %rsp, %rax
357 ; CHECK-NEXT: movq $-1, %rcx
358 ; CHECK-NEXT: sarq $63, %rax
359 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
360 ; CHECK-NEXT: orq %rax, %rdi
361 ; CHECK-NEXT: vmovq %rax, %xmm3
362 ; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3
363 ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0
364 ; CHECK-NEXT: vpgatherqq %ymm1, (%rdi,%ymm0), %ymm2
365 ; CHECK-NEXT: shlq $47, %rax
366 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0
367 ; CHECK-NEXT: orq %rax, %rsp
370 %v = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> zeroinitializer, i8* %b, <4 x i64> %iv, <4 x i64> %mask, i8 1)
374 declare <16 x float> @llvm.x86.avx512.gather.dps.512(<16 x float>, i8*, <16 x i32>, i16, i32)
376 define <16 x float> @test_llvm_x86_avx512_gather_dps_512(i8* %b, <16 x i32> %iv) #1 {
377 ; CHECK-LABEL: test_llvm_x86_avx512_gather_dps_512:
378 ; CHECK: # %bb.0: # %entry
379 ; CHECK-NEXT: movq %rsp, %rax
380 ; CHECK-NEXT: movq $-1, %rcx
381 ; CHECK-NEXT: sarq $63, %rax
382 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
383 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
384 ; CHECK-NEXT: orq %rax, %rdi
385 ; CHECK-NEXT: vpbroadcastq %rax, %zmm2
386 ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0
387 ; CHECK-NEXT: vgatherdps (%rdi,%zmm0), %zmm1 {%k1}
388 ; CHECK-NEXT: shlq $47, %rax
389 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
390 ; CHECK-NEXT: orq %rax, %rsp
393 %v = call <16 x float> @llvm.x86.avx512.gather.dps.512(<16 x float> zeroinitializer, i8* %b, <16 x i32> %iv, i16 -1, i32 1)
397 declare <8 x double> @llvm.x86.avx512.gather.dpd.512(<8 x double>, i8*, <8 x i32>, i8, i32)
399 define <8 x double> @test_llvm_x86_avx512_gather_dpd_512(i8* %b, <8 x i32> %iv) #1 {
400 ; CHECK-LABEL: test_llvm_x86_avx512_gather_dpd_512:
401 ; CHECK: # %bb.0: # %entry
402 ; CHECK-NEXT: movq %rsp, %rax
403 ; CHECK-NEXT: movq $-1, %rcx
404 ; CHECK-NEXT: sarq $63, %rax
405 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
406 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
407 ; CHECK-NEXT: orq %rax, %rdi
408 ; CHECK-NEXT: vmovq %rax, %xmm2
409 ; CHECK-NEXT: vpbroadcastq %xmm2, %ymm2
410 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0
411 ; CHECK-NEXT: vgatherdpd (%rdi,%ymm0), %zmm1 {%k1}
412 ; CHECK-NEXT: shlq $47, %rax
413 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
414 ; CHECK-NEXT: orq %rax, %rsp
417 %v = call <8 x double> @llvm.x86.avx512.gather.dpd.512(<8 x double> zeroinitializer, i8* %b, <8 x i32> %iv, i8 -1, i32 1)
421 declare <8 x float> @llvm.x86.avx512.gather.qps.512(<8 x float>, i8*, <8 x i64>, i8, i32)
423 define <8 x float> @test_llvm_x86_avx512_gather_qps_512(i8* %b, <8 x i64> %iv) #1 {
424 ; CHECK-LABEL: test_llvm_x86_avx512_gather_qps_512:
425 ; CHECK: # %bb.0: # %entry
426 ; CHECK-NEXT: movq %rsp, %rax
427 ; CHECK-NEXT: movq $-1, %rcx
428 ; CHECK-NEXT: sarq $63, %rax
429 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
430 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
431 ; CHECK-NEXT: orq %rax, %rdi
432 ; CHECK-NEXT: vpbroadcastq %rax, %zmm2
433 ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0
434 ; CHECK-NEXT: vgatherqps (%rdi,%zmm0), %ymm1 {%k1}
435 ; CHECK-NEXT: shlq $47, %rax
436 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
437 ; CHECK-NEXT: orq %rax, %rsp
440 %v = call <8 x float> @llvm.x86.avx512.gather.qps.512(<8 x float> zeroinitializer, i8* %b, <8 x i64> %iv, i8 -1, i32 1)
444 declare <8 x double> @llvm.x86.avx512.gather.qpd.512(<8 x double>, i8*, <8 x i64>, i8, i32)
446 define <8 x double> @test_llvm_x86_avx512_gather_qpd_512(i8* %b, <8 x i64> %iv) #1 {
447 ; CHECK-LABEL: test_llvm_x86_avx512_gather_qpd_512:
448 ; CHECK: # %bb.0: # %entry
449 ; CHECK-NEXT: movq %rsp, %rax
450 ; CHECK-NEXT: movq $-1, %rcx
451 ; CHECK-NEXT: sarq $63, %rax
452 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
453 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
454 ; CHECK-NEXT: orq %rax, %rdi
455 ; CHECK-NEXT: vpbroadcastq %rax, %zmm2
456 ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0
457 ; CHECK-NEXT: vgatherqpd (%rdi,%zmm0), %zmm1 {%k1}
458 ; CHECK-NEXT: shlq $47, %rax
459 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
460 ; CHECK-NEXT: orq %rax, %rsp
463 %v = call <8 x double> @llvm.x86.avx512.gather.qpd.512(<8 x double> zeroinitializer, i8* %b, <8 x i64> %iv, i8 -1, i32 1)
467 declare <16 x i32> @llvm.x86.avx512.gather.dpi.512(<16 x i32>, i8*, <16 x i32>, i16, i32)
469 define <16 x i32> @test_llvm_x86_avx512_gather_dpi_512(i8* %b, <16 x i32> %iv) #1 {
470 ; CHECK-LABEL: test_llvm_x86_avx512_gather_dpi_512:
471 ; CHECK: # %bb.0: # %entry
472 ; CHECK-NEXT: movq %rsp, %rax
473 ; CHECK-NEXT: movq $-1, %rcx
474 ; CHECK-NEXT: sarq $63, %rax
475 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
476 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
477 ; CHECK-NEXT: orq %rax, %rdi
478 ; CHECK-NEXT: vpbroadcastq %rax, %zmm2
479 ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0
480 ; CHECK-NEXT: vpgatherdd (%rdi,%zmm0), %zmm1 {%k1}
481 ; CHECK-NEXT: shlq $47, %rax
482 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
483 ; CHECK-NEXT: orq %rax, %rsp
486 %v = call <16 x i32> @llvm.x86.avx512.gather.dpi.512(<16 x i32> zeroinitializer, i8* %b, <16 x i32> %iv, i16 -1, i32 1)
490 declare <8 x i64> @llvm.x86.avx512.gather.dpq.512(<8 x i64>, i8*, <8 x i32>, i8, i32)
492 define <8 x i64> @test_llvm_x86_avx512_gather_dpq_512(i8* %b, <8 x i32> %iv) #1 {
493 ; CHECK-LABEL: test_llvm_x86_avx512_gather_dpq_512:
494 ; CHECK: # %bb.0: # %entry
495 ; CHECK-NEXT: movq %rsp, %rax
496 ; CHECK-NEXT: movq $-1, %rcx
497 ; CHECK-NEXT: sarq $63, %rax
498 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
499 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
500 ; CHECK-NEXT: orq %rax, %rdi
501 ; CHECK-NEXT: vmovq %rax, %xmm2
502 ; CHECK-NEXT: vpbroadcastq %xmm2, %ymm2
503 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0
504 ; CHECK-NEXT: vpgatherdq (%rdi,%ymm0), %zmm1 {%k1}
505 ; CHECK-NEXT: shlq $47, %rax
506 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
507 ; CHECK-NEXT: orq %rax, %rsp
510 %v = call <8 x i64> @llvm.x86.avx512.gather.dpq.512(<8 x i64> zeroinitializer, i8* %b, <8 x i32> %iv, i8 -1, i32 1)
515 declare <8 x i32> @llvm.x86.avx512.gather.qpi.512(<8 x i32>, i8*, <8 x i64>, i8, i32)
517 define <8 x i32> @test_llvm_x86_avx512_gather_qpi_512(i8* %b, <8 x i64> %iv) #1 {
518 ; CHECK-LABEL: test_llvm_x86_avx512_gather_qpi_512:
519 ; CHECK: # %bb.0: # %entry
520 ; CHECK-NEXT: movq %rsp, %rax
521 ; CHECK-NEXT: movq $-1, %rcx
522 ; CHECK-NEXT: sarq $63, %rax
523 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
524 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
525 ; CHECK-NEXT: orq %rax, %rdi
526 ; CHECK-NEXT: vpbroadcastq %rax, %zmm2
527 ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0
528 ; CHECK-NEXT: vpgatherqd (%rdi,%zmm0), %ymm1 {%k1}
529 ; CHECK-NEXT: shlq $47, %rax
530 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
531 ; CHECK-NEXT: orq %rax, %rsp
534 %v = call <8 x i32> @llvm.x86.avx512.gather.qpi.512(<8 x i32> zeroinitializer, i8* %b, <8 x i64> %iv, i8 -1, i32 1)
538 declare <8 x i64> @llvm.x86.avx512.gather.qpq.512(<8 x i64>, i8*, <8 x i64>, i8, i32)
540 define <8 x i64> @test_llvm_x86_avx512_gather_qpq_512(i8* %b, <8 x i64> %iv) #1 {
541 ; CHECK-LABEL: test_llvm_x86_avx512_gather_qpq_512:
542 ; CHECK: # %bb.0: # %entry
543 ; CHECK-NEXT: movq %rsp, %rax
544 ; CHECK-NEXT: movq $-1, %rcx
545 ; CHECK-NEXT: sarq $63, %rax
546 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
547 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
548 ; CHECK-NEXT: orq %rax, %rdi
549 ; CHECK-NEXT: vpbroadcastq %rax, %zmm2
550 ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0
551 ; CHECK-NEXT: vpgatherqq (%rdi,%zmm0), %zmm1 {%k1}
552 ; CHECK-NEXT: shlq $47, %rax
553 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
554 ; CHECK-NEXT: orq %rax, %rsp
557 %v = call <8 x i64> @llvm.x86.avx512.gather.qpq.512(<8 x i64> zeroinitializer, i8* %b, <8 x i64> %iv, i8 -1, i32 1)
561 declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8*, i32, i32);
563 define void @test_llvm_x86_avx512_gatherpf_qps_512(<8 x i64> %iv, i8* %b) #1 {
564 ; CHECK-LABEL: test_llvm_x86_avx512_gatherpf_qps_512:
565 ; CHECK: # %bb.0: # %entry
566 ; CHECK-NEXT: movq %rsp, %rax
567 ; CHECK-NEXT: movq $-1, %rcx
568 ; CHECK-NEXT: sarq $63, %rax
569 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
570 ; CHECK-NEXT: orq %rax, %rdi
571 ; CHECK-NEXT: vpbroadcastq %rax, %zmm1
572 ; CHECK-NEXT: vporq %zmm0, %zmm1, %zmm0
573 ; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1}
574 ; CHECK-NEXT: shlq $47, %rax
575 ; CHECK-NEXT: orq %rax, %rsp
576 ; CHECK-NEXT: vzeroupper
579 call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %iv, i8* %b, i32 4, i32 3)
583 declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, i8*, <4 x i32>, i8, i32)
585 define <4 x float> @test_llvm_x86_avx512_gather3siv4_sf(i8* %b, <4 x i32> %iv) #2 {
586 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_sf:
587 ; CHECK: # %bb.0: # %entry
588 ; CHECK-NEXT: movq %rsp, %rax
589 ; CHECK-NEXT: movq $-1, %rcx
590 ; CHECK-NEXT: sarq $63, %rax
591 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
592 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
593 ; CHECK-NEXT: orq %rax, %rdi
594 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2
595 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0
596 ; CHECK-NEXT: vgatherdps (%rdi,%xmm0), %xmm1 {%k1}
597 ; CHECK-NEXT: shlq $47, %rax
598 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
599 ; CHECK-NEXT: orq %rax, %rsp
602 %v = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1)
606 declare <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float>, i8*, <2 x i64>, i8, i32)
608 define <4 x float> @test_llvm_x86_avx512_gather3div4_sf(i8* %b, <2 x i64> %iv) #2 {
609 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_sf:
610 ; CHECK: # %bb.0: # %entry
611 ; CHECK-NEXT: movq %rsp, %rax
612 ; CHECK-NEXT: movq $-1, %rcx
613 ; CHECK-NEXT: sarq $63, %rax
614 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
615 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
616 ; CHECK-NEXT: orq %rax, %rdi
617 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2
618 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0
619 ; CHECK-NEXT: vgatherqps (%rdi,%xmm0), %xmm1 {%k1}
620 ; CHECK-NEXT: shlq $47, %rax
621 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
622 ; CHECK-NEXT: orq %rax, %rsp
625 %v = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> zeroinitializer, i8* %b, <2 x i64> %iv, i8 -1, i32 1)
629 declare <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double>, i8*, <4 x i32>, i8, i32)
631 define <2 x double> @test_llvm_x86_avx512_gather3siv2_df(i8* %b, <4 x i32> %iv) #2 {
632 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv2_df:
633 ; CHECK: # %bb.0: # %entry
634 ; CHECK-NEXT: movq %rsp, %rax
635 ; CHECK-NEXT: movq $-1, %rcx
636 ; CHECK-NEXT: sarq $63, %rax
637 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
638 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
639 ; CHECK-NEXT: orq %rax, %rdi
640 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2
641 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0
642 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm0), %xmm1 {%k1}
643 ; CHECK-NEXT: shlq $47, %rax
644 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
645 ; CHECK-NEXT: orq %rax, %rsp
648 %v = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1)
652 declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, i8*, <2 x i64>, i8, i32)
654 define <2 x double> @test_llvm_x86_avx512_gather3div2_df(i8* %b, <2 x i64> %iv) #2 {
655 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div2_df:
656 ; CHECK: # %bb.0: # %entry
657 ; CHECK-NEXT: movq %rsp, %rax
658 ; CHECK-NEXT: movq $-1, %rcx
659 ; CHECK-NEXT: sarq $63, %rax
660 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
661 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
662 ; CHECK-NEXT: orq %rax, %rdi
663 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2
664 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0
665 ; CHECK-NEXT: vgatherqpd (%rdi,%xmm0), %xmm1 {%k1}
666 ; CHECK-NEXT: shlq $47, %rax
667 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
668 ; CHECK-NEXT: orq %rax, %rsp
671 %v = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> zeroinitializer, i8* %b, <2 x i64> %iv, i8 -1, i32 1)
675 declare <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float>, i8*, <8 x i32>, i8, i32)
677 define <8 x float> @test_llvm_x86_avx512_gather3siv8_sf(i8* %b, <8 x i32> %iv) #2 {
678 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv8_sf:
679 ; CHECK: # %bb.0: # %entry
680 ; CHECK-NEXT: movq %rsp, %rax
681 ; CHECK-NEXT: movq $-1, %rcx
682 ; CHECK-NEXT: sarq $63, %rax
683 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
684 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
685 ; CHECK-NEXT: orq %rax, %rdi
686 ; CHECK-NEXT: vpbroadcastq %rax, %ymm2
687 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0
688 ; CHECK-NEXT: vgatherdps (%rdi,%ymm0), %ymm1 {%k1}
689 ; CHECK-NEXT: shlq $47, %rax
690 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
691 ; CHECK-NEXT: orq %rax, %rsp
694 %v = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> zeroinitializer, i8* %b, <8 x i32> %iv, i8 -1, i32 1)
698 declare <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float>, i8*, <4 x i64>, i8, i32)
700 define <4 x float> @test_llvm_x86_avx512_gather3div8_sf(i8* %b, <4 x i64> %iv) #2 {
701 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div8_sf:
702 ; CHECK: # %bb.0: # %entry
703 ; CHECK-NEXT: movq %rsp, %rax
704 ; CHECK-NEXT: movq $-1, %rcx
705 ; CHECK-NEXT: sarq $63, %rax
706 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
707 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
708 ; CHECK-NEXT: orq %rax, %rdi
709 ; CHECK-NEXT: vpbroadcastq %rax, %ymm2
710 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0
711 ; CHECK-NEXT: vgatherqps (%rdi,%ymm0), %xmm1 {%k1}
712 ; CHECK-NEXT: shlq $47, %rax
713 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
714 ; CHECK-NEXT: orq %rax, %rsp
715 ; CHECK-NEXT: vzeroupper
718 %v = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> zeroinitializer, i8* %b, <4 x i64> %iv, i8 -1, i32 1)
722 declare <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double>, i8*, <4 x i32>, i8, i32)
724 define <4 x double> @test_llvm_x86_avx512_gather3siv4_df(i8* %b, <4 x i32> %iv) #2 {
725 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_df:
726 ; CHECK: # %bb.0: # %entry
727 ; CHECK-NEXT: movq %rsp, %rax
728 ; CHECK-NEXT: movq $-1, %rcx
729 ; CHECK-NEXT: sarq $63, %rax
730 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
731 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
732 ; CHECK-NEXT: orq %rax, %rdi
733 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2
734 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0
735 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm0), %ymm1 {%k1}
736 ; CHECK-NEXT: shlq $47, %rax
737 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
738 ; CHECK-NEXT: orq %rax, %rsp
741 %v = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1)
745 declare <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double>, i8*, <4 x i64>, i8, i32)
747 define <4 x double> @test_llvm_x86_avx512_gather3div4_df(i8* %b, <4 x i64> %iv) #2 {
748 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_df:
749 ; CHECK: # %bb.0: # %entry
750 ; CHECK-NEXT: movq %rsp, %rax
751 ; CHECK-NEXT: movq $-1, %rcx
752 ; CHECK-NEXT: sarq $63, %rax
753 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
754 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
755 ; CHECK-NEXT: orq %rax, %rdi
756 ; CHECK-NEXT: vpbroadcastq %rax, %ymm2
757 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0
758 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm0), %ymm1 {%k1}
759 ; CHECK-NEXT: shlq $47, %rax
760 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
761 ; CHECK-NEXT: orq %rax, %rsp
764 %v = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> zeroinitializer, i8* %b, <4 x i64> %iv, i8 -1, i32 1)
768 declare <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32>, i8*, <4 x i32>, i8, i32)
770 define <4 x i32> @test_llvm_x86_avx512_gather3siv4_si(i8* %b, <4 x i32> %iv) #2 {
771 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_si:
772 ; CHECK: # %bb.0: # %entry
773 ; CHECK-NEXT: movq %rsp, %rax
774 ; CHECK-NEXT: movq $-1, %rcx
775 ; CHECK-NEXT: sarq $63, %rax
776 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
777 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
778 ; CHECK-NEXT: orq %rax, %rdi
779 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2
780 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0
781 ; CHECK-NEXT: vpgatherdd (%rdi,%xmm0), %xmm1 {%k1}
782 ; CHECK-NEXT: shlq $47, %rax
783 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
784 ; CHECK-NEXT: orq %rax, %rsp
787 %v = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1)
791 declare <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32>, i8*, <2 x i64>, i8, i32)
793 define <4 x i32> @test_llvm_x86_avx512_gather3div4_si(i8* %b, <2 x i64> %iv) #2 {
794 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_si:
795 ; CHECK: # %bb.0: # %entry
796 ; CHECK-NEXT: movq %rsp, %rax
797 ; CHECK-NEXT: movq $-1, %rcx
798 ; CHECK-NEXT: sarq $63, %rax
799 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
800 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
801 ; CHECK-NEXT: orq %rax, %rdi
802 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2
803 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0
804 ; CHECK-NEXT: vpgatherqd (%rdi,%xmm0), %xmm1 {%k1}
805 ; CHECK-NEXT: shlq $47, %rax
806 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
807 ; CHECK-NEXT: orq %rax, %rsp
810 %v = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> zeroinitializer, i8* %b, <2 x i64> %iv, i8 -1, i32 1)
814 declare <2 x i64> @llvm.x86.avx512.gather3siv2.di(<2 x i64>, i8*, <4 x i32>, i8, i32)
816 define <2 x i64> @test_llvm_x86_avx512_gather3siv2_di(i8* %b, <4 x i32> %iv) #2 {
817 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv2_di:
818 ; CHECK: # %bb.0: # %entry
819 ; CHECK-NEXT: movq %rsp, %rax
820 ; CHECK-NEXT: movq $-1, %rcx
821 ; CHECK-NEXT: sarq $63, %rax
822 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
823 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
824 ; CHECK-NEXT: orq %rax, %rdi
825 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2
826 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0
827 ; CHECK-NEXT: vpgatherdq (%rdi,%xmm0), %xmm1 {%k1}
828 ; CHECK-NEXT: shlq $47, %rax
829 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
830 ; CHECK-NEXT: orq %rax, %rsp
833 %v = call <2 x i64> @llvm.x86.avx512.gather3siv2.di(<2 x i64> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1)
837 declare <2 x i64> @llvm.x86.avx512.gather3div2.di(<2 x i64>, i8*, <2 x i64>, i8, i32)
839 define <2 x i64> @test_llvm_x86_avx512_gather3div2_di(i8* %b, <2 x i64> %iv) #2 {
840 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div2_di:
841 ; CHECK: # %bb.0: # %entry
842 ; CHECK-NEXT: movq %rsp, %rax
843 ; CHECK-NEXT: movq $-1, %rcx
844 ; CHECK-NEXT: sarq $63, %rax
845 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
846 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
847 ; CHECK-NEXT: orq %rax, %rdi
848 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2
849 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0
850 ; CHECK-NEXT: vpgatherqq (%rdi,%xmm0), %xmm1 {%k1}
851 ; CHECK-NEXT: shlq $47, %rax
852 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
853 ; CHECK-NEXT: orq %rax, %rsp
856 %v = call <2 x i64> @llvm.x86.avx512.gather3div2.di(<2 x i64> zeroinitializer, i8* %b, <2 x i64> %iv, i8 -1, i32 1)
860 declare <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32>, i8*, <8 x i32>, i8, i32)
862 define <8 x i32> @test_llvm_x86_avx512_gather3siv8_si(i8* %b, <8 x i32> %iv) #2 {
863 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv8_si:
864 ; CHECK: # %bb.0: # %entry
865 ; CHECK-NEXT: movq %rsp, %rax
866 ; CHECK-NEXT: movq $-1, %rcx
867 ; CHECK-NEXT: sarq $63, %rax
868 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
869 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
870 ; CHECK-NEXT: orq %rax, %rdi
871 ; CHECK-NEXT: vpbroadcastq %rax, %ymm2
872 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0
873 ; CHECK-NEXT: vpgatherdd (%rdi,%ymm0), %ymm1 {%k1}
874 ; CHECK-NEXT: shlq $47, %rax
875 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
876 ; CHECK-NEXT: orq %rax, %rsp
879 %v = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> zeroinitializer, i8* %b, <8 x i32> %iv, i8 -1, i32 1)
883 declare <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32>, i8*, <4 x i64>, i8, i32)
885 define <4 x i32> @test_llvm_x86_avx512_gather3div8_si(i8* %b, <4 x i64> %iv) #2 {
886 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div8_si:
887 ; CHECK: # %bb.0: # %entry
888 ; CHECK-NEXT: movq %rsp, %rax
889 ; CHECK-NEXT: movq $-1, %rcx
890 ; CHECK-NEXT: sarq $63, %rax
891 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
892 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
893 ; CHECK-NEXT: orq %rax, %rdi
894 ; CHECK-NEXT: vpbroadcastq %rax, %ymm2
895 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0
896 ; CHECK-NEXT: vpgatherqd (%rdi,%ymm0), %xmm1 {%k1}
897 ; CHECK-NEXT: shlq $47, %rax
898 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
899 ; CHECK-NEXT: orq %rax, %rsp
900 ; CHECK-NEXT: vzeroupper
903 %v = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> zeroinitializer, i8* %b, <4 x i64> %iv, i8 -1, i32 1)
907 declare <4 x i64> @llvm.x86.avx512.gather3siv4.di(<4 x i64>, i8*, <4 x i32>, i8, i32)
909 define <4 x i64> @test_llvm_x86_avx512_gather3siv4_di(i8* %b, <4 x i32> %iv) #2 {
910 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_di:
911 ; CHECK: # %bb.0: # %entry
912 ; CHECK-NEXT: movq %rsp, %rax
913 ; CHECK-NEXT: movq $-1, %rcx
914 ; CHECK-NEXT: sarq $63, %rax
915 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
916 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
917 ; CHECK-NEXT: orq %rax, %rdi
918 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2
919 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0
920 ; CHECK-NEXT: vpgatherdq (%rdi,%xmm0), %ymm1 {%k1}
921 ; CHECK-NEXT: shlq $47, %rax
922 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
923 ; CHECK-NEXT: orq %rax, %rsp
926 %v = call <4 x i64> @llvm.x86.avx512.gather3siv4.di(<4 x i64> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1)
930 declare <4 x i64> @llvm.x86.avx512.gather3div4.di(<4 x i64>, i8*, <4 x i64>, i8, i32)
932 define <4 x i64> @test_llvm_x86_avx512_gather3div4_di(i8* %b, <4 x i64> %iv) #2 {
933 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_di:
934 ; CHECK: # %bb.0: # %entry
935 ; CHECK-NEXT: movq %rsp, %rax
936 ; CHECK-NEXT: movq $-1, %rcx
937 ; CHECK-NEXT: sarq $63, %rax
938 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
939 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
940 ; CHECK-NEXT: orq %rax, %rdi
941 ; CHECK-NEXT: vpbroadcastq %rax, %ymm2
942 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0
943 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm0), %ymm1 {%k1}
944 ; CHECK-NEXT: shlq $47, %rax
945 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
946 ; CHECK-NEXT: orq %rax, %rsp
949 %v = call <4 x i64> @llvm.x86.avx512.gather3div4.di(<4 x i64> zeroinitializer, i8* %b, <4 x i64> %iv, i8 -1, i32 1)
953 attributes #0 = { nounwind speculative_load_hardening "target-features"="+avx2" }
954 attributes #1 = { nounwind speculative_load_hardening "target-features"="+avx512f" }
955 attributes #2 = { nounwind speculative_load_hardening "target-features"="+avx512vl" }