1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
4 declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, ptr, <4 x i32>, <4 x float>, i8)
6 define <4 x float> @test_llvm_x86_avx2_gather_d_ps(ptr %b, <4 x i32> %iv, <4 x float> %mask) #0 {
7 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_ps:
8 ; CHECK: # %bb.0: # %entry
9 ; CHECK-NEXT: movq %rsp, %rax
10 ; CHECK-NEXT: movq $-1, %rcx
11 ; CHECK-NEXT: sarq $63, %rax
12 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
13 ; CHECK-NEXT: orq %rax, %rdi
14 ; CHECK-NEXT: vmovq %rax, %xmm3
15 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3
16 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
17 ; CHECK-NEXT: vgatherdps %xmm1, (%rdi,%xmm0), %xmm2
18 ; CHECK-NEXT: shlq $47, %rax
19 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
20 ; CHECK-NEXT: orq %rax, %rsp
23 %v = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> zeroinitializer, ptr %b, <4 x i32> %iv, <4 x float> %mask, i8 1)
27 declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, ptr, <2 x i64>, <4 x float>, i8)
29 define <4 x float> @test_llvm_x86_avx2_gather_q_ps(ptr %b, <2 x i64> %iv, <4 x float> %mask) #0 {
30 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_ps:
31 ; CHECK: # %bb.0: # %entry
32 ; CHECK-NEXT: movq %rsp, %rax
33 ; CHECK-NEXT: movq $-1, %rcx
34 ; CHECK-NEXT: sarq $63, %rax
35 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
36 ; CHECK-NEXT: orq %rax, %rdi
37 ; CHECK-NEXT: vmovq %rax, %xmm3
38 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3
39 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
40 ; CHECK-NEXT: vgatherqps %xmm1, (%rdi,%xmm0), %xmm2
41 ; CHECK-NEXT: shlq $47, %rax
42 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
43 ; CHECK-NEXT: orq %rax, %rsp
46 %v = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> zeroinitializer, ptr %b, <2 x i64> %iv, <4 x float> %mask, i8 1)
50 declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, ptr, <4 x i32>, <2 x double>, i8)
52 define <2 x double> @test_llvm_x86_avx2_gather_d_pd(ptr %b, <4 x i32> %iv, <2 x double> %mask) #0 {
53 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_pd:
54 ; CHECK: # %bb.0: # %entry
55 ; CHECK-NEXT: movq %rsp, %rax
56 ; CHECK-NEXT: movq $-1, %rcx
57 ; CHECK-NEXT: sarq $63, %rax
58 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
59 ; CHECK-NEXT: orq %rax, %rdi
60 ; CHECK-NEXT: vmovq %rax, %xmm3
61 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3
62 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
63 ; CHECK-NEXT: vgatherdpd %xmm1, (%rdi,%xmm0), %xmm2
64 ; CHECK-NEXT: shlq $47, %rax
65 ; CHECK-NEXT: vmovapd %xmm2, %xmm0
66 ; CHECK-NEXT: orq %rax, %rsp
69 %v = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> zeroinitializer, ptr %b, <4 x i32> %iv, <2 x double> %mask, i8 1)
73 declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, ptr, <2 x i64>, <2 x double>, i8)
75 define <2 x double> @test_llvm_x86_avx2_gather_q_pd(ptr %b, <2 x i64> %iv, <2 x double> %mask) #0 {
76 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_pd:
77 ; CHECK: # %bb.0: # %entry
78 ; CHECK-NEXT: movq %rsp, %rax
79 ; CHECK-NEXT: movq $-1, %rcx
80 ; CHECK-NEXT: sarq $63, %rax
81 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
82 ; CHECK-NEXT: orq %rax, %rdi
83 ; CHECK-NEXT: vmovq %rax, %xmm3
84 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3
85 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
86 ; CHECK-NEXT: vgatherqpd %xmm1, (%rdi,%xmm0), %xmm2
87 ; CHECK-NEXT: shlq $47, %rax
88 ; CHECK-NEXT: vmovapd %xmm2, %xmm0
89 ; CHECK-NEXT: orq %rax, %rsp
92 %v = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> zeroinitializer, ptr %b, <2 x i64> %iv, <2 x double> %mask, i8 1)
96 declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, ptr, <8 x i32>, <8 x float>, i8)
98 define <8 x float> @test_llvm_x86_avx2_gather_d_ps_256(ptr %b, <8 x i32> %iv, <8 x float> %mask) #0 {
99 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_ps_256:
100 ; CHECK: # %bb.0: # %entry
101 ; CHECK-NEXT: movq %rsp, %rax
102 ; CHECK-NEXT: movq $-1, %rcx
103 ; CHECK-NEXT: sarq $63, %rax
104 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
105 ; CHECK-NEXT: orq %rax, %rdi
106 ; CHECK-NEXT: vmovq %rax, %xmm3
107 ; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3
108 ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0
109 ; CHECK-NEXT: vgatherdps %ymm1, (%rdi,%ymm0), %ymm2
110 ; CHECK-NEXT: shlq $47, %rax
111 ; CHECK-NEXT: vmovaps %ymm2, %ymm0
112 ; CHECK-NEXT: orq %rax, %rsp
115 %v = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> zeroinitializer, ptr %b, <8 x i32> %iv, <8 x float> %mask, i8 1)
119 declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, ptr, <4 x i64>, <4 x float>, i8)
121 define <4 x float> @test_llvm_x86_avx2_gather_q_ps_256(ptr %b, <4 x i64> %iv, <4 x float> %mask) #0 {
122 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_ps_256:
123 ; CHECK: # %bb.0: # %entry
124 ; CHECK-NEXT: movq %rsp, %rax
125 ; CHECK-NEXT: movq $-1, %rcx
126 ; CHECK-NEXT: sarq $63, %rax
127 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
128 ; CHECK-NEXT: orq %rax, %rdi
129 ; CHECK-NEXT: vmovq %rax, %xmm3
130 ; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3
131 ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0
132 ; CHECK-NEXT: vgatherqps %xmm1, (%rdi,%ymm0), %xmm2
133 ; CHECK-NEXT: shlq $47, %rax
134 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
135 ; CHECK-NEXT: orq %rax, %rsp
136 ; CHECK-NEXT: vzeroupper
139 %v = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> zeroinitializer, ptr %b, <4 x i64> %iv, <4 x float> %mask, i8 1)
143 declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, ptr, <4 x i32>, <4 x double>, i8)
145 define <4 x double> @test_llvm_x86_avx2_gather_d_pd_256(ptr %b, <4 x i32> %iv, <4 x double> %mask) #0 {
146 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_pd_256:
147 ; CHECK: # %bb.0: # %entry
148 ; CHECK-NEXT: movq %rsp, %rax
149 ; CHECK-NEXT: movq $-1, %rcx
150 ; CHECK-NEXT: sarq $63, %rax
151 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
152 ; CHECK-NEXT: orq %rax, %rdi
153 ; CHECK-NEXT: vmovq %rax, %xmm3
154 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3
155 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
156 ; CHECK-NEXT: vgatherdpd %ymm1, (%rdi,%xmm0), %ymm2
157 ; CHECK-NEXT: shlq $47, %rax
158 ; CHECK-NEXT: vmovapd %ymm2, %ymm0
159 ; CHECK-NEXT: orq %rax, %rsp
162 %v = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> zeroinitializer, ptr %b, <4 x i32> %iv, <4 x double> %mask, i8 1)
166 declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, ptr, <4 x i64>, <4 x double>, i8)
168 define <4 x double> @test_llvm_x86_avx2_gather_q_pd_256(ptr %b, <4 x i64> %iv, <4 x double> %mask) #0 {
169 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_pd_256:
170 ; CHECK: # %bb.0: # %entry
171 ; CHECK-NEXT: movq %rsp, %rax
172 ; CHECK-NEXT: movq $-1, %rcx
173 ; CHECK-NEXT: sarq $63, %rax
174 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
175 ; CHECK-NEXT: orq %rax, %rdi
176 ; CHECK-NEXT: vmovq %rax, %xmm3
177 ; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3
178 ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0
179 ; CHECK-NEXT: vgatherqpd %ymm1, (%rdi,%ymm0), %ymm2
180 ; CHECK-NEXT: shlq $47, %rax
181 ; CHECK-NEXT: vmovapd %ymm2, %ymm0
182 ; CHECK-NEXT: orq %rax, %rsp
185 %v = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> zeroinitializer, ptr %b, <4 x i64> %iv, <4 x double> %mask, i8 1)
189 declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, ptr, <4 x i32>, <4 x i32>, i8)
191 define <4 x i32> @test_llvm_x86_avx2_gather_d_d(ptr %b, <4 x i32> %iv, <4 x i32> %mask) #0 {
192 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_d:
193 ; CHECK: # %bb.0: # %entry
194 ; CHECK-NEXT: movq %rsp, %rax
195 ; CHECK-NEXT: movq $-1, %rcx
196 ; CHECK-NEXT: sarq $63, %rax
197 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
198 ; CHECK-NEXT: orq %rax, %rdi
199 ; CHECK-NEXT: vmovq %rax, %xmm3
200 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3
201 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
202 ; CHECK-NEXT: vpgatherdd %xmm1, (%rdi,%xmm0), %xmm2
203 ; CHECK-NEXT: shlq $47, %rax
204 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0
205 ; CHECK-NEXT: orq %rax, %rsp
208 %v = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> zeroinitializer, ptr %b, <4 x i32> %iv, <4 x i32> %mask, i8 1)
212 declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, ptr, <2 x i64>, <4 x i32>, i8)
214 define <4 x i32> @test_llvm_x86_avx2_gather_q_d(ptr %b, <2 x i64> %iv, <4 x i32> %mask) #0 {
215 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_d:
216 ; CHECK: # %bb.0: # %entry
217 ; CHECK-NEXT: movq %rsp, %rax
218 ; CHECK-NEXT: movq $-1, %rcx
219 ; CHECK-NEXT: sarq $63, %rax
220 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
221 ; CHECK-NEXT: orq %rax, %rdi
222 ; CHECK-NEXT: vmovq %rax, %xmm3
223 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3
224 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
225 ; CHECK-NEXT: vpgatherqd %xmm1, (%rdi,%xmm0), %xmm2
226 ; CHECK-NEXT: shlq $47, %rax
227 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0
228 ; CHECK-NEXT: orq %rax, %rsp
231 %v = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> zeroinitializer, ptr %b, <2 x i64> %iv, <4 x i32> %mask, i8 1)
235 declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, ptr, <4 x i32>, <2 x i64>, i8)
237 define <2 x i64> @test_llvm_x86_avx2_gather_d_q(ptr %b, <4 x i32> %iv, <2 x i64> %mask) #0 {
238 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_q:
239 ; CHECK: # %bb.0: # %entry
240 ; CHECK-NEXT: movq %rsp, %rax
241 ; CHECK-NEXT: movq $-1, %rcx
242 ; CHECK-NEXT: sarq $63, %rax
243 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
244 ; CHECK-NEXT: orq %rax, %rdi
245 ; CHECK-NEXT: vmovq %rax, %xmm3
246 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3
247 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
248 ; CHECK-NEXT: vpgatherdq %xmm1, (%rdi,%xmm0), %xmm2
249 ; CHECK-NEXT: shlq $47, %rax
250 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0
251 ; CHECK-NEXT: orq %rax, %rsp
254 %v = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> zeroinitializer, ptr %b, <4 x i32> %iv, <2 x i64> %mask, i8 1)
258 declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, ptr, <2 x i64>, <2 x i64>, i8)
260 define <2 x i64> @test_llvm_x86_avx2_gather_q_q(ptr %b, <2 x i64> %iv, <2 x i64> %mask) #0 {
261 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_q:
262 ; CHECK: # %bb.0: # %entry
263 ; CHECK-NEXT: movq %rsp, %rax
264 ; CHECK-NEXT: movq $-1, %rcx
265 ; CHECK-NEXT: sarq $63, %rax
266 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
267 ; CHECK-NEXT: orq %rax, %rdi
268 ; CHECK-NEXT: vmovq %rax, %xmm3
269 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3
270 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
271 ; CHECK-NEXT: vpgatherqq %xmm1, (%rdi,%xmm0), %xmm2
272 ; CHECK-NEXT: shlq $47, %rax
273 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0
274 ; CHECK-NEXT: orq %rax, %rsp
277 %v = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> zeroinitializer, ptr %b, <2 x i64> %iv, <2 x i64> %mask, i8 1)
281 declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, ptr, <8 x i32>, <8 x i32>, i8)
283 define <8 x i32> @test_llvm_x86_avx2_gather_d_d_256(ptr %b, <8 x i32> %iv, <8 x i32> %mask) #0 {
284 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_d_256:
285 ; CHECK: # %bb.0: # %entry
286 ; CHECK-NEXT: movq %rsp, %rax
287 ; CHECK-NEXT: movq $-1, %rcx
288 ; CHECK-NEXT: sarq $63, %rax
289 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
290 ; CHECK-NEXT: orq %rax, %rdi
291 ; CHECK-NEXT: vmovq %rax, %xmm3
292 ; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3
293 ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0
294 ; CHECK-NEXT: vpgatherdd %ymm1, (%rdi,%ymm0), %ymm2
295 ; CHECK-NEXT: shlq $47, %rax
296 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0
297 ; CHECK-NEXT: orq %rax, %rsp
300 %v = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> zeroinitializer, ptr %b, <8 x i32> %iv, <8 x i32> %mask, i8 1)
304 declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, ptr, <4 x i64>, <4 x i32>, i8)
306 define <4 x i32> @test_llvm_x86_avx2_gather_q_d_256(ptr %b, <4 x i64> %iv, <4 x i32> %mask) #0 {
307 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_d_256:
308 ; CHECK: # %bb.0: # %entry
309 ; CHECK-NEXT: movq %rsp, %rax
310 ; CHECK-NEXT: movq $-1, %rcx
311 ; CHECK-NEXT: sarq $63, %rax
312 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
313 ; CHECK-NEXT: orq %rax, %rdi
314 ; CHECK-NEXT: vmovq %rax, %xmm3
315 ; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3
316 ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0
317 ; CHECK-NEXT: vpgatherqd %xmm1, (%rdi,%ymm0), %xmm2
318 ; CHECK-NEXT: shlq $47, %rax
319 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0
320 ; CHECK-NEXT: orq %rax, %rsp
321 ; CHECK-NEXT: vzeroupper
324 %v = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> zeroinitializer, ptr %b, <4 x i64> %iv, <4 x i32> %mask, i8 1)
328 declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, ptr, <4 x i32>, <4 x i64>, i8)
330 define <4 x i64> @test_llvm_x86_avx2_gather_d_q_256(ptr %b, <4 x i32> %iv, <4 x i64> %mask) #0 {
331 ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_q_256:
332 ; CHECK: # %bb.0: # %entry
333 ; CHECK-NEXT: movq %rsp, %rax
334 ; CHECK-NEXT: movq $-1, %rcx
335 ; CHECK-NEXT: sarq $63, %rax
336 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
337 ; CHECK-NEXT: orq %rax, %rdi
338 ; CHECK-NEXT: vmovq %rax, %xmm3
339 ; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3
340 ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
341 ; CHECK-NEXT: vpgatherdq %ymm1, (%rdi,%xmm0), %ymm2
342 ; CHECK-NEXT: shlq $47, %rax
343 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0
344 ; CHECK-NEXT: orq %rax, %rsp
347 %v = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> zeroinitializer, ptr %b, <4 x i32> %iv, <4 x i64> %mask, i8 1)
351 declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, ptr, <4 x i64>, <4 x i64>, i8)
353 define <4 x i64> @test_llvm_x86_avx2_gather_q_q_256(ptr %b, <4 x i64> %iv, <4 x i64> %mask) #0 {
354 ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_q_256:
355 ; CHECK: # %bb.0: # %entry
356 ; CHECK-NEXT: movq %rsp, %rax
357 ; CHECK-NEXT: movq $-1, %rcx
358 ; CHECK-NEXT: sarq $63, %rax
359 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
360 ; CHECK-NEXT: orq %rax, %rdi
361 ; CHECK-NEXT: vmovq %rax, %xmm3
362 ; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3
363 ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0
364 ; CHECK-NEXT: vpgatherqq %ymm1, (%rdi,%ymm0), %ymm2
365 ; CHECK-NEXT: shlq $47, %rax
366 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0
367 ; CHECK-NEXT: orq %rax, %rsp
370 %v = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> zeroinitializer, ptr %b, <4 x i64> %iv, <4 x i64> %mask, i8 1)
374 declare <16 x float> @llvm.x86.avx512.gather.dps.512(<16 x float>, ptr, <16 x i32>, i16, i32)
376 define <16 x float> @test_llvm_x86_avx512_gather_dps_512(ptr %b, <16 x i32> %iv) #1 {
377 ; CHECK-LABEL: test_llvm_x86_avx512_gather_dps_512:
378 ; CHECK: # %bb.0: # %entry
379 ; CHECK-NEXT: movq %rsp, %rax
380 ; CHECK-NEXT: movq $-1, %rcx
381 ; CHECK-NEXT: sarq $63, %rax
382 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
383 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
384 ; CHECK-NEXT: orq %rax, %rdi
385 ; CHECK-NEXT: vpbroadcastq %rax, %zmm2
386 ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0
387 ; CHECK-NEXT: vgatherdps (%rdi,%zmm0), %zmm1 {%k1}
388 ; CHECK-NEXT: shlq $47, %rax
389 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
390 ; CHECK-NEXT: orq %rax, %rsp
393 %v = call <16 x float> @llvm.x86.avx512.gather.dps.512(<16 x float> zeroinitializer, ptr %b, <16 x i32> %iv, i16 -1, i32 1)
397 declare <8 x double> @llvm.x86.avx512.gather.dpd.512(<8 x double>, ptr, <8 x i32>, i8, i32)
399 define <8 x double> @test_llvm_x86_avx512_gather_dpd_512(ptr %b, <8 x i32> %iv) #1 {
400 ; CHECK-LABEL: test_llvm_x86_avx512_gather_dpd_512:
401 ; CHECK: # %bb.0: # %entry
402 ; CHECK-NEXT: movq %rsp, %rax
403 ; CHECK-NEXT: movq $-1, %rcx
404 ; CHECK-NEXT: sarq $63, %rax
405 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
406 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
407 ; CHECK-NEXT: orq %rax, %rdi
408 ; CHECK-NEXT: vmovq %rax, %xmm2
409 ; CHECK-NEXT: vpbroadcastq %xmm2, %ymm2
410 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0
411 ; CHECK-NEXT: vgatherdpd (%rdi,%ymm0), %zmm1 {%k1}
412 ; CHECK-NEXT: shlq $47, %rax
413 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
414 ; CHECK-NEXT: orq %rax, %rsp
417 %v = call <8 x double> @llvm.x86.avx512.gather.dpd.512(<8 x double> zeroinitializer, ptr %b, <8 x i32> %iv, i8 -1, i32 1)
421 declare <8 x float> @llvm.x86.avx512.gather.qps.512(<8 x float>, ptr, <8 x i64>, i8, i32)
423 define <8 x float> @test_llvm_x86_avx512_gather_qps_512(ptr %b, <8 x i64> %iv) #1 {
424 ; CHECK-LABEL: test_llvm_x86_avx512_gather_qps_512:
425 ; CHECK: # %bb.0: # %entry
426 ; CHECK-NEXT: movq %rsp, %rax
427 ; CHECK-NEXT: movq $-1, %rcx
428 ; CHECK-NEXT: sarq $63, %rax
429 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
430 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
431 ; CHECK-NEXT: orq %rax, %rdi
432 ; CHECK-NEXT: vpbroadcastq %rax, %zmm2
433 ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0
434 ; CHECK-NEXT: vgatherqps (%rdi,%zmm0), %ymm1 {%k1}
435 ; CHECK-NEXT: shlq $47, %rax
436 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
437 ; CHECK-NEXT: orq %rax, %rsp
440 %v = call <8 x float> @llvm.x86.avx512.gather.qps.512(<8 x float> zeroinitializer, ptr %b, <8 x i64> %iv, i8 -1, i32 1)
444 declare <8 x double> @llvm.x86.avx512.gather.qpd.512(<8 x double>, ptr, <8 x i64>, i8, i32)
446 define <8 x double> @test_llvm_x86_avx512_gather_qpd_512(ptr %b, <8 x i64> %iv) #1 {
447 ; CHECK-LABEL: test_llvm_x86_avx512_gather_qpd_512:
448 ; CHECK: # %bb.0: # %entry
449 ; CHECK-NEXT: movq %rsp, %rax
450 ; CHECK-NEXT: movq $-1, %rcx
451 ; CHECK-NEXT: sarq $63, %rax
452 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
453 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
454 ; CHECK-NEXT: orq %rax, %rdi
455 ; CHECK-NEXT: vpbroadcastq %rax, %zmm2
456 ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0
457 ; CHECK-NEXT: vgatherqpd (%rdi,%zmm0), %zmm1 {%k1}
458 ; CHECK-NEXT: shlq $47, %rax
459 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
460 ; CHECK-NEXT: orq %rax, %rsp
463 %v = call <8 x double> @llvm.x86.avx512.gather.qpd.512(<8 x double> zeroinitializer, ptr %b, <8 x i64> %iv, i8 -1, i32 1)
467 declare <16 x i32> @llvm.x86.avx512.gather.dpi.512(<16 x i32>, ptr, <16 x i32>, i16, i32)
469 define <16 x i32> @test_llvm_x86_avx512_gather_dpi_512(ptr %b, <16 x i32> %iv) #1 {
470 ; CHECK-LABEL: test_llvm_x86_avx512_gather_dpi_512:
471 ; CHECK: # %bb.0: # %entry
472 ; CHECK-NEXT: movq %rsp, %rax
473 ; CHECK-NEXT: movq $-1, %rcx
474 ; CHECK-NEXT: sarq $63, %rax
475 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
476 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
477 ; CHECK-NEXT: orq %rax, %rdi
478 ; CHECK-NEXT: vpbroadcastq %rax, %zmm2
479 ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0
480 ; CHECK-NEXT: vpgatherdd (%rdi,%zmm0), %zmm1 {%k1}
481 ; CHECK-NEXT: shlq $47, %rax
482 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
483 ; CHECK-NEXT: orq %rax, %rsp
486 %v = call <16 x i32> @llvm.x86.avx512.gather.dpi.512(<16 x i32> zeroinitializer, ptr %b, <16 x i32> %iv, i16 -1, i32 1)
490 declare <8 x i64> @llvm.x86.avx512.gather.dpq.512(<8 x i64>, ptr, <8 x i32>, i8, i32)
492 define <8 x i64> @test_llvm_x86_avx512_gather_dpq_512(ptr %b, <8 x i32> %iv) #1 {
493 ; CHECK-LABEL: test_llvm_x86_avx512_gather_dpq_512:
494 ; CHECK: # %bb.0: # %entry
495 ; CHECK-NEXT: movq %rsp, %rax
496 ; CHECK-NEXT: movq $-1, %rcx
497 ; CHECK-NEXT: sarq $63, %rax
498 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
499 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
500 ; CHECK-NEXT: orq %rax, %rdi
501 ; CHECK-NEXT: vmovq %rax, %xmm2
502 ; CHECK-NEXT: vpbroadcastq %xmm2, %ymm2
503 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0
504 ; CHECK-NEXT: vpgatherdq (%rdi,%ymm0), %zmm1 {%k1}
505 ; CHECK-NEXT: shlq $47, %rax
506 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
507 ; CHECK-NEXT: orq %rax, %rsp
510 %v = call <8 x i64> @llvm.x86.avx512.gather.dpq.512(<8 x i64> zeroinitializer, ptr %b, <8 x i32> %iv, i8 -1, i32 1)
515 declare <8 x i32> @llvm.x86.avx512.gather.qpi.512(<8 x i32>, ptr, <8 x i64>, i8, i32)
517 define <8 x i32> @test_llvm_x86_avx512_gather_qpi_512(ptr %b, <8 x i64> %iv) #1 {
518 ; CHECK-LABEL: test_llvm_x86_avx512_gather_qpi_512:
519 ; CHECK: # %bb.0: # %entry
520 ; CHECK-NEXT: movq %rsp, %rax
521 ; CHECK-NEXT: movq $-1, %rcx
522 ; CHECK-NEXT: sarq $63, %rax
523 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
524 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
525 ; CHECK-NEXT: orq %rax, %rdi
526 ; CHECK-NEXT: vpbroadcastq %rax, %zmm2
527 ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0
528 ; CHECK-NEXT: vpgatherqd (%rdi,%zmm0), %ymm1 {%k1}
529 ; CHECK-NEXT: shlq $47, %rax
530 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
531 ; CHECK-NEXT: orq %rax, %rsp
534 %v = call <8 x i32> @llvm.x86.avx512.gather.qpi.512(<8 x i32> zeroinitializer, ptr %b, <8 x i64> %iv, i8 -1, i32 1)
538 declare <8 x i64> @llvm.x86.avx512.gather.qpq.512(<8 x i64>, ptr, <8 x i64>, i8, i32)
540 define <8 x i64> @test_llvm_x86_avx512_gather_qpq_512(ptr %b, <8 x i64> %iv) #1 {
541 ; CHECK-LABEL: test_llvm_x86_avx512_gather_qpq_512:
542 ; CHECK: # %bb.0: # %entry
543 ; CHECK-NEXT: movq %rsp, %rax
544 ; CHECK-NEXT: movq $-1, %rcx
545 ; CHECK-NEXT: sarq $63, %rax
546 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
547 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
548 ; CHECK-NEXT: orq %rax, %rdi
549 ; CHECK-NEXT: vpbroadcastq %rax, %zmm2
550 ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0
551 ; CHECK-NEXT: vpgatherqq (%rdi,%zmm0), %zmm1 {%k1}
552 ; CHECK-NEXT: shlq $47, %rax
553 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
554 ; CHECK-NEXT: orq %rax, %rsp
557 %v = call <8 x i64> @llvm.x86.avx512.gather.qpq.512(<8 x i64> zeroinitializer, ptr %b, <8 x i64> %iv, i8 -1, i32 1)
561 declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, ptr, <4 x i32>, i8, i32)
563 define <4 x float> @test_llvm_x86_avx512_gather3siv4_sf(ptr %b, <4 x i32> %iv) #2 {
564 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_sf:
565 ; CHECK: # %bb.0: # %entry
566 ; CHECK-NEXT: movq %rsp, %rax
567 ; CHECK-NEXT: movq $-1, %rcx
568 ; CHECK-NEXT: sarq $63, %rax
569 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
570 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
571 ; CHECK-NEXT: orq %rax, %rdi
572 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2
573 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0
574 ; CHECK-NEXT: vgatherdps (%rdi,%xmm0), %xmm1 {%k1}
575 ; CHECK-NEXT: shlq $47, %rax
576 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
577 ; CHECK-NEXT: orq %rax, %rsp
580 %v = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> zeroinitializer, ptr %b, <4 x i32> %iv, i8 -1, i32 1)
584 declare <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float>, ptr, <2 x i64>, i8, i32)
586 define <4 x float> @test_llvm_x86_avx512_gather3div4_sf(ptr %b, <2 x i64> %iv) #2 {
587 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_sf:
588 ; CHECK: # %bb.0: # %entry
589 ; CHECK-NEXT: movq %rsp, %rax
590 ; CHECK-NEXT: movq $-1, %rcx
591 ; CHECK-NEXT: sarq $63, %rax
592 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
593 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
594 ; CHECK-NEXT: orq %rax, %rdi
595 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2
596 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0
597 ; CHECK-NEXT: vgatherqps (%rdi,%xmm0), %xmm1 {%k1}
598 ; CHECK-NEXT: shlq $47, %rax
599 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
600 ; CHECK-NEXT: orq %rax, %rsp
603 %v = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> zeroinitializer, ptr %b, <2 x i64> %iv, i8 -1, i32 1)
607 declare <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double>, ptr, <4 x i32>, i8, i32)
609 define <2 x double> @test_llvm_x86_avx512_gather3siv2_df(ptr %b, <4 x i32> %iv) #2 {
610 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv2_df:
611 ; CHECK: # %bb.0: # %entry
612 ; CHECK-NEXT: movq %rsp, %rax
613 ; CHECK-NEXT: movq $-1, %rcx
614 ; CHECK-NEXT: sarq $63, %rax
615 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
616 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
617 ; CHECK-NEXT: orq %rax, %rdi
618 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2
619 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0
620 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm0), %xmm1 {%k1}
621 ; CHECK-NEXT: shlq $47, %rax
622 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
623 ; CHECK-NEXT: orq %rax, %rsp
626 %v = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> zeroinitializer, ptr %b, <4 x i32> %iv, i8 -1, i32 1)
630 declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, ptr, <2 x i64>, i8, i32)
632 define <2 x double> @test_llvm_x86_avx512_gather3div2_df(ptr %b, <2 x i64> %iv) #2 {
633 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div2_df:
634 ; CHECK: # %bb.0: # %entry
635 ; CHECK-NEXT: movq %rsp, %rax
636 ; CHECK-NEXT: movq $-1, %rcx
637 ; CHECK-NEXT: sarq $63, %rax
638 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
639 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
640 ; CHECK-NEXT: orq %rax, %rdi
641 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2
642 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0
643 ; CHECK-NEXT: vgatherqpd (%rdi,%xmm0), %xmm1 {%k1}
644 ; CHECK-NEXT: shlq $47, %rax
645 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
646 ; CHECK-NEXT: orq %rax, %rsp
649 %v = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> zeroinitializer, ptr %b, <2 x i64> %iv, i8 -1, i32 1)
653 declare <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float>, ptr, <8 x i32>, i8, i32)
655 define <8 x float> @test_llvm_x86_avx512_gather3siv8_sf(ptr %b, <8 x i32> %iv) #2 {
656 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv8_sf:
657 ; CHECK: # %bb.0: # %entry
658 ; CHECK-NEXT: movq %rsp, %rax
659 ; CHECK-NEXT: movq $-1, %rcx
660 ; CHECK-NEXT: sarq $63, %rax
661 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
662 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
663 ; CHECK-NEXT: orq %rax, %rdi
664 ; CHECK-NEXT: vpbroadcastq %rax, %ymm2
665 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0
666 ; CHECK-NEXT: vgatherdps (%rdi,%ymm0), %ymm1 {%k1}
667 ; CHECK-NEXT: shlq $47, %rax
668 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
669 ; CHECK-NEXT: orq %rax, %rsp
672 %v = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> zeroinitializer, ptr %b, <8 x i32> %iv, i8 -1, i32 1)
676 declare <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float>, ptr, <4 x i64>, i8, i32)
678 define <4 x float> @test_llvm_x86_avx512_gather3div8_sf(ptr %b, <4 x i64> %iv) #2 {
679 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div8_sf:
680 ; CHECK: # %bb.0: # %entry
681 ; CHECK-NEXT: movq %rsp, %rax
682 ; CHECK-NEXT: movq $-1, %rcx
683 ; CHECK-NEXT: sarq $63, %rax
684 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
685 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
686 ; CHECK-NEXT: orq %rax, %rdi
687 ; CHECK-NEXT: vpbroadcastq %rax, %ymm2
688 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0
689 ; CHECK-NEXT: vgatherqps (%rdi,%ymm0), %xmm1 {%k1}
690 ; CHECK-NEXT: shlq $47, %rax
691 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
692 ; CHECK-NEXT: orq %rax, %rsp
693 ; CHECK-NEXT: vzeroupper
696 %v = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> zeroinitializer, ptr %b, <4 x i64> %iv, i8 -1, i32 1)
700 declare <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double>, ptr, <4 x i32>, i8, i32)
702 define <4 x double> @test_llvm_x86_avx512_gather3siv4_df(ptr %b, <4 x i32> %iv) #2 {
703 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_df:
704 ; CHECK: # %bb.0: # %entry
705 ; CHECK-NEXT: movq %rsp, %rax
706 ; CHECK-NEXT: movq $-1, %rcx
707 ; CHECK-NEXT: sarq $63, %rax
708 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
709 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
710 ; CHECK-NEXT: orq %rax, %rdi
711 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2
712 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0
713 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm0), %ymm1 {%k1}
714 ; CHECK-NEXT: shlq $47, %rax
715 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
716 ; CHECK-NEXT: orq %rax, %rsp
719 %v = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> zeroinitializer, ptr %b, <4 x i32> %iv, i8 -1, i32 1)
723 declare <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double>, ptr, <4 x i64>, i8, i32)
725 define <4 x double> @test_llvm_x86_avx512_gather3div4_df(ptr %b, <4 x i64> %iv) #2 {
726 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_df:
727 ; CHECK: # %bb.0: # %entry
728 ; CHECK-NEXT: movq %rsp, %rax
729 ; CHECK-NEXT: movq $-1, %rcx
730 ; CHECK-NEXT: sarq $63, %rax
731 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
732 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
733 ; CHECK-NEXT: orq %rax, %rdi
734 ; CHECK-NEXT: vpbroadcastq %rax, %ymm2
735 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0
736 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm0), %ymm1 {%k1}
737 ; CHECK-NEXT: shlq $47, %rax
738 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
739 ; CHECK-NEXT: orq %rax, %rsp
742 %v = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> zeroinitializer, ptr %b, <4 x i64> %iv, i8 -1, i32 1)
746 declare <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32>, ptr, <4 x i32>, i8, i32)
748 define <4 x i32> @test_llvm_x86_avx512_gather3siv4_si(ptr %b, <4 x i32> %iv) #2 {
749 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_si:
750 ; CHECK: # %bb.0: # %entry
751 ; CHECK-NEXT: movq %rsp, %rax
752 ; CHECK-NEXT: movq $-1, %rcx
753 ; CHECK-NEXT: sarq $63, %rax
754 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
755 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
756 ; CHECK-NEXT: orq %rax, %rdi
757 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2
758 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0
759 ; CHECK-NEXT: vpgatherdd (%rdi,%xmm0), %xmm1 {%k1}
760 ; CHECK-NEXT: shlq $47, %rax
761 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
762 ; CHECK-NEXT: orq %rax, %rsp
765 %v = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> zeroinitializer, ptr %b, <4 x i32> %iv, i8 -1, i32 1)
769 declare <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32>, ptr, <2 x i64>, i8, i32)
771 define <4 x i32> @test_llvm_x86_avx512_gather3div4_si(ptr %b, <2 x i64> %iv) #2 {
772 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_si:
773 ; CHECK: # %bb.0: # %entry
774 ; CHECK-NEXT: movq %rsp, %rax
775 ; CHECK-NEXT: movq $-1, %rcx
776 ; CHECK-NEXT: sarq $63, %rax
777 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
778 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
779 ; CHECK-NEXT: orq %rax, %rdi
780 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2
781 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0
782 ; CHECK-NEXT: vpgatherqd (%rdi,%xmm0), %xmm1 {%k1}
783 ; CHECK-NEXT: shlq $47, %rax
784 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
785 ; CHECK-NEXT: orq %rax, %rsp
788 %v = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> zeroinitializer, ptr %b, <2 x i64> %iv, i8 -1, i32 1)
792 declare <2 x i64> @llvm.x86.avx512.gather3siv2.di(<2 x i64>, ptr, <4 x i32>, i8, i32)
794 define <2 x i64> @test_llvm_x86_avx512_gather3siv2_di(ptr %b, <4 x i32> %iv) #2 {
795 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv2_di:
796 ; CHECK: # %bb.0: # %entry
797 ; CHECK-NEXT: movq %rsp, %rax
798 ; CHECK-NEXT: movq $-1, %rcx
799 ; CHECK-NEXT: sarq $63, %rax
800 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
801 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
802 ; CHECK-NEXT: orq %rax, %rdi
803 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2
804 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0
805 ; CHECK-NEXT: vpgatherdq (%rdi,%xmm0), %xmm1 {%k1}
806 ; CHECK-NEXT: shlq $47, %rax
807 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
808 ; CHECK-NEXT: orq %rax, %rsp
811 %v = call <2 x i64> @llvm.x86.avx512.gather3siv2.di(<2 x i64> zeroinitializer, ptr %b, <4 x i32> %iv, i8 -1, i32 1)
815 declare <2 x i64> @llvm.x86.avx512.gather3div2.di(<2 x i64>, ptr, <2 x i64>, i8, i32)
817 define <2 x i64> @test_llvm_x86_avx512_gather3div2_di(ptr %b, <2 x i64> %iv) #2 {
818 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div2_di:
819 ; CHECK: # %bb.0: # %entry
820 ; CHECK-NEXT: movq %rsp, %rax
821 ; CHECK-NEXT: movq $-1, %rcx
822 ; CHECK-NEXT: sarq $63, %rax
823 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
824 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
825 ; CHECK-NEXT: orq %rax, %rdi
826 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2
827 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0
828 ; CHECK-NEXT: vpgatherqq (%rdi,%xmm0), %xmm1 {%k1}
829 ; CHECK-NEXT: shlq $47, %rax
830 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
831 ; CHECK-NEXT: orq %rax, %rsp
834 %v = call <2 x i64> @llvm.x86.avx512.gather3div2.di(<2 x i64> zeroinitializer, ptr %b, <2 x i64> %iv, i8 -1, i32 1)
838 declare <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32>, ptr, <8 x i32>, i8, i32)
840 define <8 x i32> @test_llvm_x86_avx512_gather3siv8_si(ptr %b, <8 x i32> %iv) #2 {
841 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv8_si:
842 ; CHECK: # %bb.0: # %entry
843 ; CHECK-NEXT: movq %rsp, %rax
844 ; CHECK-NEXT: movq $-1, %rcx
845 ; CHECK-NEXT: sarq $63, %rax
846 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
847 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
848 ; CHECK-NEXT: orq %rax, %rdi
849 ; CHECK-NEXT: vpbroadcastq %rax, %ymm2
850 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0
851 ; CHECK-NEXT: vpgatherdd (%rdi,%ymm0), %ymm1 {%k1}
852 ; CHECK-NEXT: shlq $47, %rax
853 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
854 ; CHECK-NEXT: orq %rax, %rsp
857 %v = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> zeroinitializer, ptr %b, <8 x i32> %iv, i8 -1, i32 1)
861 declare <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32>, ptr, <4 x i64>, i8, i32)
863 define <4 x i32> @test_llvm_x86_avx512_gather3div8_si(ptr %b, <4 x i64> %iv) #2 {
864 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div8_si:
865 ; CHECK: # %bb.0: # %entry
866 ; CHECK-NEXT: movq %rsp, %rax
867 ; CHECK-NEXT: movq $-1, %rcx
868 ; CHECK-NEXT: sarq $63, %rax
869 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
870 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
871 ; CHECK-NEXT: orq %rax, %rdi
872 ; CHECK-NEXT: vpbroadcastq %rax, %ymm2
873 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0
874 ; CHECK-NEXT: vpgatherqd (%rdi,%ymm0), %xmm1 {%k1}
875 ; CHECK-NEXT: shlq $47, %rax
876 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
877 ; CHECK-NEXT: orq %rax, %rsp
878 ; CHECK-NEXT: vzeroupper
881 %v = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> zeroinitializer, ptr %b, <4 x i64> %iv, i8 -1, i32 1)
885 declare <4 x i64> @llvm.x86.avx512.gather3siv4.di(<4 x i64>, ptr, <4 x i32>, i8, i32)
887 define <4 x i64> @test_llvm_x86_avx512_gather3siv4_di(ptr %b, <4 x i32> %iv) #2 {
888 ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_di:
889 ; CHECK: # %bb.0: # %entry
890 ; CHECK-NEXT: movq %rsp, %rax
891 ; CHECK-NEXT: movq $-1, %rcx
892 ; CHECK-NEXT: sarq $63, %rax
893 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
894 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
895 ; CHECK-NEXT: orq %rax, %rdi
896 ; CHECK-NEXT: vpbroadcastq %rax, %xmm2
897 ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0
898 ; CHECK-NEXT: vpgatherdq (%rdi,%xmm0), %ymm1 {%k1}
899 ; CHECK-NEXT: shlq $47, %rax
900 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
901 ; CHECK-NEXT: orq %rax, %rsp
904 %v = call <4 x i64> @llvm.x86.avx512.gather3siv4.di(<4 x i64> zeroinitializer, ptr %b, <4 x i32> %iv, i8 -1, i32 1)
908 declare <4 x i64> @llvm.x86.avx512.gather3div4.di(<4 x i64>, ptr, <4 x i64>, i8, i32)
910 define <4 x i64> @test_llvm_x86_avx512_gather3div4_di(ptr %b, <4 x i64> %iv) #2 {
911 ; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_di:
912 ; CHECK: # %bb.0: # %entry
913 ; CHECK-NEXT: movq %rsp, %rax
914 ; CHECK-NEXT: movq $-1, %rcx
915 ; CHECK-NEXT: sarq $63, %rax
916 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
917 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
918 ; CHECK-NEXT: orq %rax, %rdi
919 ; CHECK-NEXT: vpbroadcastq %rax, %ymm2
920 ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0
921 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm0), %ymm1 {%k1}
922 ; CHECK-NEXT: shlq $47, %rax
923 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
924 ; CHECK-NEXT: orq %rax, %rsp
927 %v = call <4 x i64> @llvm.x86.avx512.gather3div4.di(<4 x i64> zeroinitializer, ptr %b, <4 x i64> %iv, i8 -1, i32 1)
931 attributes #0 = { nounwind speculative_load_hardening "target-features"="+avx2" }
932 attributes #1 = { nounwind speculative_load_hardening "target-features"="+avx512f" }
933 attributes #2 = { nounwind speculative_load_hardening "target-features"="+avx512vl" }