1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mcpu=skylake -mtriple=i386-unknown-linux-gnu -mattr=+avx2 | FileCheck --check-prefix=X86 %s
3 ; RUN: llc < %s -mcpu=skylake -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck --check-prefix=X64 %s
4 ; RUN: llc < %s -mcpu=skx -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2,-avx512f | FileCheck --check-prefix=X64 %s
5 ; RUN: llc < %s -mcpu=skylake -mtriple=x86_64-unknown-linux-gnu -mattr=-avx2 | FileCheck --check-prefix=NOGATHER %s
7 declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ptrs, i32 %align, <2 x i1> %masks, <2 x i32> %passthro)
9 define <2 x i32> @masked_gather_v2i32(<2 x i32*>* %ptr, <2 x i1> %masks, <2 x i32> %passthro) {
10 ; X86-LABEL: masked_gather_v2i32:
11 ; X86: # %bb.0: # %entry
12 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
13 ; X86-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
14 ; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
15 ; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
16 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
17 ; X86-NEXT: vpgatherdd %xmm0, (,%xmm2), %xmm1
18 ; X86-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
21 ; X64-LABEL: masked_gather_v2i32:
22 ; X64: # %bb.0: # %entry
23 ; X64-NEXT: vmovdqa (%rdi), %xmm2
24 ; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
25 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
26 ; X64-NEXT: vpslld $31, %xmm0, %xmm0
27 ; X64-NEXT: vpgatherqd %xmm0, (,%xmm2), %xmm1
28 ; X64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
31 ; NOGATHER-LABEL: masked_gather_v2i32:
32 ; NOGATHER: # %bb.0: # %entry
33 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
34 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
35 ; NOGATHER-NEXT: testb $1, %al
36 ; NOGATHER-NEXT: je .LBB0_2
37 ; NOGATHER-NEXT: # %bb.1: # %cond.load
38 ; NOGATHER-NEXT: vmovq %xmm2, %rax
39 ; NOGATHER-NEXT: movl (%rax), %eax
40 ; NOGATHER-NEXT: vpinsrq $0, %rax, %xmm1, %xmm1
41 ; NOGATHER-NEXT: .LBB0_2: # %else
42 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
43 ; NOGATHER-NEXT: testb $1, %al
44 ; NOGATHER-NEXT: je .LBB0_4
45 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
46 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
47 ; NOGATHER-NEXT: movl (%rax), %eax
48 ; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm1, %xmm1
49 ; NOGATHER-NEXT: .LBB0_4: # %else2
50 ; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0
53 %ld = load <2 x i32*>, <2 x i32*>* %ptr
54 %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ld, i32 0, <2 x i1> %masks, <2 x i32> %passthro)
58 define <4 x i32> @masked_gather_v2i32_concat(<2 x i32*>* %ptr, <2 x i1> %masks, <2 x i32> %passthro) {
59 ; X86-LABEL: masked_gather_v2i32_concat:
60 ; X86: # %bb.0: # %entry
61 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
62 ; X86-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
63 ; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
64 ; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
65 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
66 ; X86-NEXT: vpgatherdd %xmm0, (,%xmm2), %xmm1
67 ; X86-NEXT: vmovdqa %xmm1, %xmm0
70 ; X64-LABEL: masked_gather_v2i32_concat:
71 ; X64: # %bb.0: # %entry
72 ; X64-NEXT: vmovdqa (%rdi), %xmm2
73 ; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
74 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
75 ; X64-NEXT: vpslld $31, %xmm0, %xmm0
76 ; X64-NEXT: vpgatherqd %xmm0, (,%xmm2), %xmm1
77 ; X64-NEXT: vmovdqa %xmm1, %xmm0
80 ; NOGATHER-LABEL: masked_gather_v2i32_concat:
81 ; NOGATHER: # %bb.0: # %entry
82 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
83 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
84 ; NOGATHER-NEXT: testb $1, %al
85 ; NOGATHER-NEXT: je .LBB1_2
86 ; NOGATHER-NEXT: # %bb.1: # %cond.load
87 ; NOGATHER-NEXT: vmovq %xmm2, %rax
88 ; NOGATHER-NEXT: movl (%rax), %eax
89 ; NOGATHER-NEXT: vpinsrq $0, %rax, %xmm1, %xmm1
90 ; NOGATHER-NEXT: .LBB1_2: # %else
91 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
92 ; NOGATHER-NEXT: testb $1, %al
93 ; NOGATHER-NEXT: je .LBB1_4
94 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
95 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
96 ; NOGATHER-NEXT: movl (%rax), %eax
97 ; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm1, %xmm1
98 ; NOGATHER-NEXT: .LBB1_4: # %else2
99 ; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
100 ; NOGATHER-NEXT: retq
102 %ld = load <2 x i32*>, <2 x i32*>* %ptr
103 %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ld, i32 0, <2 x i1> %masks, <2 x i32> %passthro)
104 %res2 = shufflevector <2 x i32> %res, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
108 declare <2 x float> @llvm.masked.gather.v2float(<2 x float*> %ptrs, i32 %align, <2 x i1> %masks, <2 x float> %passthro)
110 define <2 x float> @masked_gather_v2float(<2 x float*>* %ptr, <2 x i1> %masks, <2 x float> %passthro) {
111 ; X86-LABEL: masked_gather_v2float:
112 ; X86: # %bb.0: # %entry
113 ; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
114 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
115 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
116 ; X86-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
117 ; X86-NEXT: vgatherdps %xmm0, (,%xmm2), %xmm1
118 ; X86-NEXT: vmovaps %xmm1, %xmm0
121 ; X64-LABEL: masked_gather_v2float:
122 ; X64: # %bb.0: # %entry
123 ; X64-NEXT: vmovaps (%rdi), %xmm2
124 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
125 ; X64-NEXT: vpslld $31, %xmm0, %xmm0
126 ; X64-NEXT: vgatherqps %xmm0, (,%xmm2), %xmm1
127 ; X64-NEXT: vmovaps %xmm1, %xmm0
130 ; NOGATHER-LABEL: masked_gather_v2float:
131 ; NOGATHER: # %bb.0: # %entry
132 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
133 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
134 ; NOGATHER-NEXT: testb $1, %al
135 ; NOGATHER-NEXT: je .LBB2_2
136 ; NOGATHER-NEXT: # %bb.1: # %cond.load
137 ; NOGATHER-NEXT: vmovq %xmm2, %rax
138 ; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
139 ; NOGATHER-NEXT: vblendps {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3]
140 ; NOGATHER-NEXT: .LBB2_2: # %else
141 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
142 ; NOGATHER-NEXT: testb $1, %al
143 ; NOGATHER-NEXT: je .LBB2_4
144 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
145 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
146 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
147 ; NOGATHER-NEXT: .LBB2_4: # %else2
148 ; NOGATHER-NEXT: vmovaps %xmm1, %xmm0
149 ; NOGATHER-NEXT: retq
151 %ld = load <2 x float*>, <2 x float*>* %ptr
152 %res = call <2 x float> @llvm.masked.gather.v2float(<2 x float*> %ld, i32 0, <2 x i1> %masks, <2 x float> %passthro)
156 define <4 x float> @masked_gather_v2float_concat(<2 x float*>* %ptr, <2 x i1> %masks, <2 x float> %passthro) {
157 ; X86-LABEL: masked_gather_v2float_concat:
158 ; X86: # %bb.0: # %entry
159 ; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
160 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
161 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
162 ; X86-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
163 ; X86-NEXT: vgatherdps %xmm0, (,%xmm2), %xmm1
164 ; X86-NEXT: vmovaps %xmm1, %xmm0
167 ; X64-LABEL: masked_gather_v2float_concat:
168 ; X64: # %bb.0: # %entry
169 ; X64-NEXT: vmovaps (%rdi), %xmm2
170 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
171 ; X64-NEXT: vpslld $31, %xmm0, %xmm0
172 ; X64-NEXT: vgatherqps %xmm0, (,%xmm2), %xmm1
173 ; X64-NEXT: vmovaps %xmm1, %xmm0
176 ; NOGATHER-LABEL: masked_gather_v2float_concat:
177 ; NOGATHER: # %bb.0: # %entry
178 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
179 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
180 ; NOGATHER-NEXT: testb $1, %al
181 ; NOGATHER-NEXT: je .LBB3_2
182 ; NOGATHER-NEXT: # %bb.1: # %cond.load
183 ; NOGATHER-NEXT: vmovq %xmm2, %rax
184 ; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
185 ; NOGATHER-NEXT: vblendps {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3]
186 ; NOGATHER-NEXT: .LBB3_2: # %else
187 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
188 ; NOGATHER-NEXT: testb $1, %al
189 ; NOGATHER-NEXT: je .LBB3_4
190 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
191 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
192 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
193 ; NOGATHER-NEXT: .LBB3_4: # %else2
194 ; NOGATHER-NEXT: vmovaps %xmm1, %xmm0
195 ; NOGATHER-NEXT: retq
197 %ld = load <2 x float*>, <2 x float*>* %ptr
198 %res = call <2 x float> @llvm.masked.gather.v2float(<2 x float*> %ld, i32 0, <2 x i1> %masks, <2 x float> %passthro)
199 %res2 = shufflevector <2 x float> %res, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
200 ret <4 x float> %res2
204 declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 %align, <4 x i1> %masks, <4 x i32> %passthro)
206 define <4 x i32> @masked_gather_v4i32(<4 x i32*> %ptrs, <4 x i1> %masks, <4 x i32> %passthro) {
207 ; X86-LABEL: masked_gather_v4i32:
208 ; X86: # %bb.0: # %entry
209 ; X86-NEXT: vpslld $31, %xmm1, %xmm1
210 ; X86-NEXT: vpgatherdd %xmm1, (,%xmm0), %xmm2
211 ; X86-NEXT: vmovdqa %xmm2, %xmm0
214 ; X64-LABEL: masked_gather_v4i32:
215 ; X64: # %bb.0: # %entry
216 ; X64-NEXT: vpslld $31, %xmm1, %xmm1
217 ; X64-NEXT: vpgatherqd %xmm1, (,%ymm0), %xmm2
218 ; X64-NEXT: vmovdqa %xmm2, %xmm0
219 ; X64-NEXT: vzeroupper
222 ; NOGATHER-LABEL: masked_gather_v4i32:
223 ; NOGATHER: # %bb.0: # %entry
224 ; NOGATHER-NEXT: vpextrb $0, %xmm1, %eax
225 ; NOGATHER-NEXT: testb $1, %al
226 ; NOGATHER-NEXT: je .LBB4_2
227 ; NOGATHER-NEXT: # %bb.1: # %cond.load
228 ; NOGATHER-NEXT: vmovq %xmm0, %rax
229 ; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm2, %xmm2
230 ; NOGATHER-NEXT: .LBB4_2: # %else
231 ; NOGATHER-NEXT: vpextrb $4, %xmm1, %eax
232 ; NOGATHER-NEXT: testb $1, %al
233 ; NOGATHER-NEXT: je .LBB4_4
234 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
235 ; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
236 ; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm2, %xmm2
237 ; NOGATHER-NEXT: .LBB4_4: # %else2
238 ; NOGATHER-NEXT: vpextrb $8, %xmm1, %eax
239 ; NOGATHER-NEXT: testb $1, %al
240 ; NOGATHER-NEXT: je .LBB4_6
241 ; NOGATHER-NEXT: # %bb.5: # %cond.load4
242 ; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm3
243 ; NOGATHER-NEXT: vmovq %xmm3, %rax
244 ; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm2, %xmm2
245 ; NOGATHER-NEXT: .LBB4_6: # %else5
246 ; NOGATHER-NEXT: vpextrb $12, %xmm1, %eax
247 ; NOGATHER-NEXT: testb $1, %al
248 ; NOGATHER-NEXT: je .LBB4_8
249 ; NOGATHER-NEXT: # %bb.7: # %cond.load7
250 ; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0
251 ; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
252 ; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm2, %xmm2
253 ; NOGATHER-NEXT: .LBB4_8: # %else8
254 ; NOGATHER-NEXT: vmovdqa %xmm2, %xmm0
255 ; NOGATHER-NEXT: vzeroupper
256 ; NOGATHER-NEXT: retq
258 %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 0, <4 x i1> %masks, <4 x i32> %passthro)
262 declare <4 x float> @llvm.masked.gather.v4float(<4 x float*> %ptrs, i32 %align, <4 x i1> %masks, <4 x float> %passthro)
264 define <4 x float> @masked_gather_v4float(<4 x float*> %ptrs, <4 x i1> %masks, <4 x float> %passthro) {
265 ; X86-LABEL: masked_gather_v4float:
266 ; X86: # %bb.0: # %entry
267 ; X86-NEXT: vpslld $31, %xmm1, %xmm1
268 ; X86-NEXT: vgatherdps %xmm1, (,%xmm0), %xmm2
269 ; X86-NEXT: vmovaps %xmm2, %xmm0
272 ; X64-LABEL: masked_gather_v4float:
273 ; X64: # %bb.0: # %entry
274 ; X64-NEXT: vpslld $31, %xmm1, %xmm1
275 ; X64-NEXT: vgatherqps %xmm1, (,%ymm0), %xmm2
276 ; X64-NEXT: vmovaps %xmm2, %xmm0
277 ; X64-NEXT: vzeroupper
280 ; NOGATHER-LABEL: masked_gather_v4float:
281 ; NOGATHER: # %bb.0: # %entry
282 ; NOGATHER-NEXT: vpextrb $0, %xmm1, %eax
283 ; NOGATHER-NEXT: testb $1, %al
284 ; NOGATHER-NEXT: je .LBB5_2
285 ; NOGATHER-NEXT: # %bb.1: # %cond.load
286 ; NOGATHER-NEXT: vmovq %xmm0, %rax
287 ; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
288 ; NOGATHER-NEXT: vblendps {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3]
289 ; NOGATHER-NEXT: .LBB5_2: # %else
290 ; NOGATHER-NEXT: vpextrb $4, %xmm1, %eax
291 ; NOGATHER-NEXT: testb $1, %al
292 ; NOGATHER-NEXT: je .LBB5_4
293 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
294 ; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
295 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
296 ; NOGATHER-NEXT: .LBB5_4: # %else2
297 ; NOGATHER-NEXT: vpextrb $8, %xmm1, %eax
298 ; NOGATHER-NEXT: testb $1, %al
299 ; NOGATHER-NEXT: je .LBB5_6
300 ; NOGATHER-NEXT: # %bb.5: # %cond.load4
301 ; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm3
302 ; NOGATHER-NEXT: vmovq %xmm3, %rax
303 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
304 ; NOGATHER-NEXT: .LBB5_6: # %else5
305 ; NOGATHER-NEXT: vpextrb $12, %xmm1, %eax
306 ; NOGATHER-NEXT: testb $1, %al
307 ; NOGATHER-NEXT: je .LBB5_8
308 ; NOGATHER-NEXT: # %bb.7: # %cond.load7
309 ; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0
310 ; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
311 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
312 ; NOGATHER-NEXT: .LBB5_8: # %else8
313 ; NOGATHER-NEXT: vmovaps %xmm2, %xmm0
314 ; NOGATHER-NEXT: vzeroupper
315 ; NOGATHER-NEXT: retq
317 %res = call <4 x float> @llvm.masked.gather.v4float(<4 x float*> %ptrs, i32 0, <4 x i1> %masks, <4 x float> %passthro)
321 declare <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptrs, i32 %align, <8 x i1> %masks, <8 x i32> %passthro)
323 define <8 x i32> @masked_gather_v8i32(<8 x i32*>* %ptr, <8 x i1> %masks, <8 x i32> %passthro) {
324 ; X86-LABEL: masked_gather_v8i32:
325 ; X86: # %bb.0: # %entry
326 ; X86-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
327 ; X86-NEXT: vpslld $31, %ymm0, %ymm0
328 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
329 ; X86-NEXT: vmovdqa (%eax), %ymm2
330 ; X86-NEXT: vpgatherdd %ymm0, (,%ymm2), %ymm1
331 ; X86-NEXT: vmovdqa %ymm1, %ymm0
334 ; X64-LABEL: masked_gather_v8i32:
335 ; X64: # %bb.0: # %entry
336 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
337 ; X64-NEXT: vpslld $31, %ymm0, %ymm0
338 ; X64-NEXT: vpsrad $31, %ymm0, %ymm0
339 ; X64-NEXT: vmovdqa (%rdi), %ymm2
340 ; X64-NEXT: vmovdqa 32(%rdi), %ymm3
341 ; X64-NEXT: vextracti128 $1, %ymm1, %xmm4
342 ; X64-NEXT: vextracti128 $1, %ymm0, %xmm5
343 ; X64-NEXT: vpgatherqd %xmm5, (,%ymm3), %xmm4
344 ; X64-NEXT: vpgatherqd %xmm0, (,%ymm2), %xmm1
345 ; X64-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm0
348 ; NOGATHER-LABEL: masked_gather_v8i32:
349 ; NOGATHER: # %bb.0: # %entry
350 ; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3
351 ; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm2
352 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
353 ; NOGATHER-NEXT: testb $1, %al
354 ; NOGATHER-NEXT: je .LBB6_2
355 ; NOGATHER-NEXT: # %bb.1: # %cond.load
356 ; NOGATHER-NEXT: vmovq %xmm3, %rax
357 ; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm1, %xmm4
358 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
359 ; NOGATHER-NEXT: .LBB6_2: # %else
360 ; NOGATHER-NEXT: vpextrb $2, %xmm0, %eax
361 ; NOGATHER-NEXT: testb $1, %al
362 ; NOGATHER-NEXT: je .LBB6_4
363 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
364 ; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
365 ; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm1, %xmm4
366 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
367 ; NOGATHER-NEXT: .LBB6_4: # %else2
368 ; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
369 ; NOGATHER-NEXT: testb $1, %al
370 ; NOGATHER-NEXT: je .LBB6_6
371 ; NOGATHER-NEXT: # %bb.5: # %cond.load4
372 ; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4
373 ; NOGATHER-NEXT: vmovq %xmm4, %rax
374 ; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm1, %xmm4
375 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
376 ; NOGATHER-NEXT: .LBB6_6: # %else5
377 ; NOGATHER-NEXT: vpextrb $6, %xmm0, %eax
378 ; NOGATHER-NEXT: testb $1, %al
379 ; NOGATHER-NEXT: je .LBB6_8
380 ; NOGATHER-NEXT: # %bb.7: # %cond.load7
381 ; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
382 ; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
383 ; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm1, %xmm3
384 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
385 ; NOGATHER-NEXT: .LBB6_8: # %else8
386 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
387 ; NOGATHER-NEXT: testb $1, %al
388 ; NOGATHER-NEXT: je .LBB6_10
389 ; NOGATHER-NEXT: # %bb.9: # %cond.load10
390 ; NOGATHER-NEXT: vmovq %xmm2, %rax
391 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
392 ; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm3, %xmm3
393 ; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
394 ; NOGATHER-NEXT: .LBB6_10: # %else11
395 ; NOGATHER-NEXT: vpextrb $10, %xmm0, %eax
396 ; NOGATHER-NEXT: testb $1, %al
397 ; NOGATHER-NEXT: je .LBB6_12
398 ; NOGATHER-NEXT: # %bb.11: # %cond.load13
399 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
400 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
401 ; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm3, %xmm3
402 ; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
403 ; NOGATHER-NEXT: .LBB6_12: # %else14
404 ; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
405 ; NOGATHER-NEXT: testb $1, %al
406 ; NOGATHER-NEXT: je .LBB6_14
407 ; NOGATHER-NEXT: # %bb.13: # %cond.load16
408 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
409 ; NOGATHER-NEXT: vmovq %xmm3, %rax
410 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
411 ; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm3, %xmm3
412 ; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
413 ; NOGATHER-NEXT: .LBB6_14: # %else17
414 ; NOGATHER-NEXT: vpextrb $14, %xmm0, %eax
415 ; NOGATHER-NEXT: testb $1, %al
416 ; NOGATHER-NEXT: je .LBB6_16
417 ; NOGATHER-NEXT: # %bb.15: # %cond.load19
418 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
419 ; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
420 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
421 ; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm0, %xmm0
422 ; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
423 ; NOGATHER-NEXT: .LBB6_16: # %else20
424 ; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
425 ; NOGATHER-NEXT: retq
427 %ld = load <8 x i32*>, <8 x i32*>* %ptr
428 %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ld, i32 0, <8 x i1> %masks, <8 x i32> %passthro)
432 declare <8 x float> @llvm.masked.gather.v8float(<8 x float*> %ptrs, i32 %align, <8 x i1> %masks, <8 x float> %passthro)
434 define <8 x float> @masked_gather_v8float(<8 x float*>* %ptr, <8 x i1> %masks, <8 x float> %passthro) {
435 ; X86-LABEL: masked_gather_v8float:
436 ; X86: # %bb.0: # %entry
437 ; X86-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
438 ; X86-NEXT: vpslld $31, %ymm0, %ymm0
439 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
440 ; X86-NEXT: vmovaps (%eax), %ymm2
441 ; X86-NEXT: vgatherdps %ymm0, (,%ymm2), %ymm1
442 ; X86-NEXT: vmovaps %ymm1, %ymm0
445 ; X64-LABEL: masked_gather_v8float:
446 ; X64: # %bb.0: # %entry
447 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
448 ; X64-NEXT: vpslld $31, %ymm0, %ymm0
449 ; X64-NEXT: vpsrad $31, %ymm0, %ymm0
450 ; X64-NEXT: vmovaps (%rdi), %ymm2
451 ; X64-NEXT: vmovaps 32(%rdi), %ymm3
452 ; X64-NEXT: vextractf128 $1, %ymm1, %xmm4
453 ; X64-NEXT: vextracti128 $1, %ymm0, %xmm5
454 ; X64-NEXT: vgatherqps %xmm5, (,%ymm3), %xmm4
455 ; X64-NEXT: vgatherqps %xmm0, (,%ymm2), %xmm1
456 ; X64-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm0
459 ; NOGATHER-LABEL: masked_gather_v8float:
460 ; NOGATHER: # %bb.0: # %entry
461 ; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3
462 ; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm2
463 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
464 ; NOGATHER-NEXT: testb $1, %al
465 ; NOGATHER-NEXT: je .LBB7_2
466 ; NOGATHER-NEXT: # %bb.1: # %cond.load
467 ; NOGATHER-NEXT: vmovq %xmm3, %rax
468 ; NOGATHER-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
469 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0],ymm1[1,2,3,4,5,6,7]
470 ; NOGATHER-NEXT: .LBB7_2: # %else
471 ; NOGATHER-NEXT: vpextrb $2, %xmm0, %eax
472 ; NOGATHER-NEXT: testb $1, %al
473 ; NOGATHER-NEXT: je .LBB7_4
474 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
475 ; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
476 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm1[0],mem[0],xmm1[2,3]
477 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
478 ; NOGATHER-NEXT: .LBB7_4: # %else2
479 ; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
480 ; NOGATHER-NEXT: testb $1, %al
481 ; NOGATHER-NEXT: je .LBB7_6
482 ; NOGATHER-NEXT: # %bb.5: # %cond.load4
483 ; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4
484 ; NOGATHER-NEXT: vmovq %xmm4, %rax
485 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm1[0,1],mem[0],xmm1[3]
486 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
487 ; NOGATHER-NEXT: .LBB7_6: # %else5
488 ; NOGATHER-NEXT: vpextrb $6, %xmm0, %eax
489 ; NOGATHER-NEXT: testb $1, %al
490 ; NOGATHER-NEXT: je .LBB7_8
491 ; NOGATHER-NEXT: # %bb.7: # %cond.load7
492 ; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
493 ; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
494 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm1[0,1,2],mem[0]
495 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
496 ; NOGATHER-NEXT: .LBB7_8: # %else8
497 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
498 ; NOGATHER-NEXT: testb $1, %al
499 ; NOGATHER-NEXT: je .LBB7_10
500 ; NOGATHER-NEXT: # %bb.9: # %cond.load10
501 ; NOGATHER-NEXT: vmovq %xmm2, %rax
502 ; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
503 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm4
504 ; NOGATHER-NEXT: vblendps {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3]
505 ; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
506 ; NOGATHER-NEXT: .LBB7_10: # %else11
507 ; NOGATHER-NEXT: vpextrb $10, %xmm0, %eax
508 ; NOGATHER-NEXT: testb $1, %al
509 ; NOGATHER-NEXT: je .LBB7_12
510 ; NOGATHER-NEXT: # %bb.11: # %cond.load13
511 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
512 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
513 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3]
514 ; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
515 ; NOGATHER-NEXT: .LBB7_12: # %else14
516 ; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
517 ; NOGATHER-NEXT: testb $1, %al
518 ; NOGATHER-NEXT: je .LBB7_14
519 ; NOGATHER-NEXT: # %bb.13: # %cond.load16
520 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
521 ; NOGATHER-NEXT: vmovq %xmm3, %rax
522 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
523 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],mem[0],xmm3[3]
524 ; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
525 ; NOGATHER-NEXT: .LBB7_14: # %else17
526 ; NOGATHER-NEXT: vpextrb $14, %xmm0, %eax
527 ; NOGATHER-NEXT: testb $1, %al
528 ; NOGATHER-NEXT: je .LBB7_16
529 ; NOGATHER-NEXT: # %bb.15: # %cond.load19
530 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
531 ; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
532 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
533 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
534 ; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
535 ; NOGATHER-NEXT: .LBB7_16: # %else20
536 ; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
537 ; NOGATHER-NEXT: retq
539 %ld = load <8 x float*>, <8 x float*>* %ptr
540 %res = call <8 x float> @llvm.masked.gather.v8float(<8 x float*> %ld, i32 0, <8 x i1> %masks, <8 x float> %passthro)
544 declare <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %ptrs, i32 %align, <4 x i1> %masks, <4 x i64> %passthro)
546 define <4 x i64> @masked_gather_v4i64(<4 x i64*>* %ptr, <4 x i1> %masks, <4 x i64> %passthro) {
547 ; X86-LABEL: masked_gather_v4i64:
548 ; X86: # %bb.0: # %entry
549 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
550 ; X86-NEXT: vpmovsxdq %xmm0, %ymm0
551 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
552 ; X86-NEXT: vmovdqa (%eax), %xmm2
553 ; X86-NEXT: vpgatherdq %ymm0, (,%xmm2), %ymm1
554 ; X86-NEXT: vmovdqa %ymm1, %ymm0
557 ; X64-LABEL: masked_gather_v4i64:
558 ; X64: # %bb.0: # %entry
559 ; X64-NEXT: vpslld $31, %xmm0, %xmm0
560 ; X64-NEXT: vpmovsxdq %xmm0, %ymm0
561 ; X64-NEXT: vmovdqa (%rdi), %ymm2
562 ; X64-NEXT: vpgatherqq %ymm0, (,%ymm2), %ymm1
563 ; X64-NEXT: vmovdqa %ymm1, %ymm0
566 ; NOGATHER-LABEL: masked_gather_v4i64:
567 ; NOGATHER: # %bb.0: # %entry
568 ; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2
569 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
570 ; NOGATHER-NEXT: testb $1, %al
571 ; NOGATHER-NEXT: je .LBB8_2
572 ; NOGATHER-NEXT: # %bb.1: # %cond.load
573 ; NOGATHER-NEXT: vmovq %xmm2, %rax
574 ; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm1, %xmm3
575 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
576 ; NOGATHER-NEXT: .LBB8_2: # %else
577 ; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
578 ; NOGATHER-NEXT: testb $1, %al
579 ; NOGATHER-NEXT: je .LBB8_4
580 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
581 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
582 ; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm1, %xmm3
583 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
584 ; NOGATHER-NEXT: .LBB8_4: # %else2
585 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
586 ; NOGATHER-NEXT: testb $1, %al
587 ; NOGATHER-NEXT: je .LBB8_6
588 ; NOGATHER-NEXT: # %bb.5: # %cond.load4
589 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
590 ; NOGATHER-NEXT: vmovq %xmm3, %rax
591 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
592 ; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm3, %xmm3
593 ; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
594 ; NOGATHER-NEXT: .LBB8_6: # %else5
595 ; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
596 ; NOGATHER-NEXT: testb $1, %al
597 ; NOGATHER-NEXT: je .LBB8_8
598 ; NOGATHER-NEXT: # %bb.7: # %cond.load7
599 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
600 ; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
601 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
602 ; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm0, %xmm0
603 ; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
604 ; NOGATHER-NEXT: .LBB8_8: # %else8
605 ; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
606 ; NOGATHER-NEXT: retq
608 %ld = load <4 x i64*>, <4 x i64*>* %ptr
609 %res = call <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %ld, i32 0, <4 x i1> %masks, <4 x i64> %passthro)
613 declare <4 x double> @llvm.masked.gather.v4double(<4 x double*> %ptrs, i32 %align, <4 x i1> %masks, <4 x double> %passthro)
615 define <4 x double> @masked_gather_v4double(<4 x double*>* %ptr, <4 x i1> %masks, <4 x double> %passthro) {
616 ; X86-LABEL: masked_gather_v4double:
617 ; X86: # %bb.0: # %entry
618 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
619 ; X86-NEXT: vpmovsxdq %xmm0, %ymm0
620 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
621 ; X86-NEXT: vmovapd (%eax), %xmm2
622 ; X86-NEXT: vgatherdpd %ymm0, (,%xmm2), %ymm1
623 ; X86-NEXT: vmovapd %ymm1, %ymm0
626 ; X64-LABEL: masked_gather_v4double:
627 ; X64: # %bb.0: # %entry
628 ; X64-NEXT: vpslld $31, %xmm0, %xmm0
629 ; X64-NEXT: vpmovsxdq %xmm0, %ymm0
630 ; X64-NEXT: vmovapd (%rdi), %ymm2
631 ; X64-NEXT: vgatherqpd %ymm0, (,%ymm2), %ymm1
632 ; X64-NEXT: vmovapd %ymm1, %ymm0
635 ; NOGATHER-LABEL: masked_gather_v4double:
636 ; NOGATHER: # %bb.0: # %entry
637 ; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2
638 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
639 ; NOGATHER-NEXT: testb $1, %al
640 ; NOGATHER-NEXT: je .LBB9_2
641 ; NOGATHER-NEXT: # %bb.1: # %cond.load
642 ; NOGATHER-NEXT: vmovq %xmm2, %rax
643 ; NOGATHER-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
644 ; NOGATHER-NEXT: vblendpd {{.*#+}} ymm1 = ymm3[0],ymm1[1,2,3]
645 ; NOGATHER-NEXT: .LBB9_2: # %else
646 ; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
647 ; NOGATHER-NEXT: testb $1, %al
648 ; NOGATHER-NEXT: je .LBB9_4
649 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
650 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
651 ; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm3 = xmm1[0],mem[0]
652 ; NOGATHER-NEXT: vblendpd {{.*#+}} ymm1 = ymm3[0,1],ymm1[2,3]
653 ; NOGATHER-NEXT: .LBB9_4: # %else2
654 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
655 ; NOGATHER-NEXT: testb $1, %al
656 ; NOGATHER-NEXT: je .LBB9_6
657 ; NOGATHER-NEXT: # %bb.5: # %cond.load4
658 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
659 ; NOGATHER-NEXT: vmovq %xmm3, %rax
660 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
661 ; NOGATHER-NEXT: vmovlpd {{.*#+}} xmm3 = mem[0],xmm3[1]
662 ; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
663 ; NOGATHER-NEXT: .LBB9_6: # %else5
664 ; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
665 ; NOGATHER-NEXT: testb $1, %al
666 ; NOGATHER-NEXT: je .LBB9_8
667 ; NOGATHER-NEXT: # %bb.7: # %cond.load7
668 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
669 ; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
670 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
671 ; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
672 ; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
673 ; NOGATHER-NEXT: .LBB9_8: # %else8
674 ; NOGATHER-NEXT: vmovapd %ymm1, %ymm0
675 ; NOGATHER-NEXT: retq
677 %ld = load <4 x double*>, <4 x double*>* %ptr
678 %res = call <4 x double> @llvm.masked.gather.v4double(<4 x double*> %ld, i32 0, <4 x i1> %masks, <4 x double> %passthro)
679 ret <4 x double> %res
682 declare <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %ptrs, i32 %align, <2 x i1> %masks, <2 x i64> %passthro)
684 define <2 x i64> @masked_gather_v2i64(<2 x i64*>* %ptr, <2 x i1> %masks, <2 x i64> %passthro) {
685 ; X86-LABEL: masked_gather_v2i64:
686 ; X86: # %bb.0: # %entry
687 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
688 ; X86-NEXT: vpmovsxdq (%eax), %xmm2
689 ; X86-NEXT: vpsllq $63, %xmm0, %xmm0
690 ; X86-NEXT: vpgatherqq %xmm0, (,%xmm2), %xmm1
691 ; X86-NEXT: vmovdqa %xmm1, %xmm0
694 ; X64-LABEL: masked_gather_v2i64:
695 ; X64: # %bb.0: # %entry
696 ; X64-NEXT: vpsllq $63, %xmm0, %xmm0
697 ; X64-NEXT: vmovdqa (%rdi), %xmm2
698 ; X64-NEXT: vpgatherqq %xmm0, (,%xmm2), %xmm1
699 ; X64-NEXT: vmovdqa %xmm1, %xmm0
702 ; NOGATHER-LABEL: masked_gather_v2i64:
703 ; NOGATHER: # %bb.0: # %entry
704 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
705 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
706 ; NOGATHER-NEXT: testb $1, %al
707 ; NOGATHER-NEXT: je .LBB10_2
708 ; NOGATHER-NEXT: # %bb.1: # %cond.load
709 ; NOGATHER-NEXT: vmovq %xmm2, %rax
710 ; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm1, %xmm1
711 ; NOGATHER-NEXT: .LBB10_2: # %else
712 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
713 ; NOGATHER-NEXT: testb $1, %al
714 ; NOGATHER-NEXT: je .LBB10_4
715 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
716 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
717 ; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm1, %xmm1
718 ; NOGATHER-NEXT: .LBB10_4: # %else2
719 ; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0
720 ; NOGATHER-NEXT: retq
722 %ld = load <2 x i64*>, <2 x i64*>* %ptr
723 %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %ld, i32 0, <2 x i1> %masks, <2 x i64> %passthro)
727 declare <2 x double> @llvm.masked.gather.v2double(<2 x double*> %ptrs, i32 %align, <2 x i1> %masks, <2 x double> %passthro)
729 define <2 x double> @masked_gather_v2double(<2 x double*>* %ptr, <2 x i1> %masks, <2 x double> %passthro) {
730 ; X86-LABEL: masked_gather_v2double:
731 ; X86: # %bb.0: # %entry
732 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
733 ; X86-NEXT: vpmovsxdq (%eax), %xmm2
734 ; X86-NEXT: vpsllq $63, %xmm0, %xmm0
735 ; X86-NEXT: vgatherqpd %xmm0, (,%xmm2), %xmm1
736 ; X86-NEXT: vmovapd %xmm1, %xmm0
739 ; X64-LABEL: masked_gather_v2double:
740 ; X64: # %bb.0: # %entry
741 ; X64-NEXT: vpsllq $63, %xmm0, %xmm0
742 ; X64-NEXT: vmovapd (%rdi), %xmm2
743 ; X64-NEXT: vgatherqpd %xmm0, (,%xmm2), %xmm1
744 ; X64-NEXT: vmovapd %xmm1, %xmm0
747 ; NOGATHER-LABEL: masked_gather_v2double:
748 ; NOGATHER: # %bb.0: # %entry
749 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
750 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
751 ; NOGATHER-NEXT: testb $1, %al
752 ; NOGATHER-NEXT: je .LBB11_2
753 ; NOGATHER-NEXT: # %bb.1: # %cond.load
754 ; NOGATHER-NEXT: vmovq %xmm2, %rax
755 ; NOGATHER-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1]
756 ; NOGATHER-NEXT: .LBB11_2: # %else
757 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
758 ; NOGATHER-NEXT: testb $1, %al
759 ; NOGATHER-NEXT: je .LBB11_4
760 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
761 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
762 ; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
763 ; NOGATHER-NEXT: .LBB11_4: # %else2
764 ; NOGATHER-NEXT: vmovapd %xmm1, %xmm0
765 ; NOGATHER-NEXT: retq
767 %ld = load <2 x double*>, <2 x double*>* %ptr
768 %res = call <2 x double> @llvm.masked.gather.v2double(<2 x double*> %ld, i32 0, <2 x i1> %masks, <2 x double> %passthro)
769 ret <2 x double> %res