1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mcpu=skylake -mtriple=i386-unknown-linux-gnu -mattr=+avx2 | FileCheck --check-prefix=X86 %s
3 ; RUN: llc < %s -mcpu=skylake -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck --check-prefix=X64 %s
4 ; RUN: llc < %s -mcpu=skx -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2,-avx512f | FileCheck --check-prefix=X64 %s
5 ; RUN: llc < %s -mcpu=skylake -mtriple=x86_64-unknown-linux-gnu -mattr=-avx2 | FileCheck --check-prefix=NOGATHER %s
7 declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ptrs, i32 %align, <2 x i1> %masks, <2 x i32> %passthro)
9 define <2 x i32> @masked_gather_v2i32(<2 x i32*>* %ptr, <2 x i1> %masks, <2 x i32> %passthro) {
10 ; X86-LABEL: masked_gather_v2i32:
11 ; X86: # %bb.0: # %entry
12 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
13 ; X86-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
14 ; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
15 ; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
16 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
17 ; X86-NEXT: vpgatherdd %xmm0, (,%xmm2), %xmm1
18 ; X86-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
21 ; X64-LABEL: masked_gather_v2i32:
22 ; X64: # %bb.0: # %entry
23 ; X64-NEXT: vmovdqa (%rdi), %xmm2
24 ; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
25 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
26 ; X64-NEXT: vpslld $31, %xmm0, %xmm0
27 ; X64-NEXT: vpgatherqd %xmm0, (,%xmm2), %xmm1
28 ; X64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
31 ; NOGATHER-LABEL: masked_gather_v2i32:
32 ; NOGATHER: # %bb.0: # %entry
33 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
34 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
35 ; NOGATHER-NEXT: testb $1, %al
36 ; NOGATHER-NEXT: je .LBB0_2
37 ; NOGATHER-NEXT: # %bb.1: # %cond.load
38 ; NOGATHER-NEXT: vmovq %xmm2, %rax
39 ; NOGATHER-NEXT: movl (%rax), %eax
40 ; NOGATHER-NEXT: vpinsrq $0, %rax, %xmm1, %xmm1
41 ; NOGATHER-NEXT: .LBB0_2: # %else
42 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
43 ; NOGATHER-NEXT: testb $1, %al
44 ; NOGATHER-NEXT: je .LBB0_4
45 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
46 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
47 ; NOGATHER-NEXT: movl (%rax), %eax
48 ; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm1, %xmm1
49 ; NOGATHER-NEXT: .LBB0_4: # %else2
50 ; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0
53 %ld = load <2 x i32*>, <2 x i32*>* %ptr
54 %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ld, i32 0, <2 x i1> %masks, <2 x i32> %passthro)
58 define <4 x i32> @masked_gather_v2i32_concat(<2 x i32*>* %ptr, <2 x i1> %masks, <2 x i32> %passthro) {
59 ; X86-LABEL: masked_gather_v2i32_concat:
60 ; X86: # %bb.0: # %entry
61 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
62 ; X86-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
63 ; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
64 ; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
65 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
66 ; X86-NEXT: vpgatherdd %xmm0, (,%xmm2), %xmm1
67 ; X86-NEXT: vmovdqa %xmm1, %xmm0
70 ; X64-LABEL: masked_gather_v2i32_concat:
71 ; X64: # %bb.0: # %entry
72 ; X64-NEXT: vmovdqa (%rdi), %xmm2
73 ; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
74 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
75 ; X64-NEXT: vpslld $31, %xmm0, %xmm0
76 ; X64-NEXT: vpgatherqd %xmm0, (,%xmm2), %xmm1
77 ; X64-NEXT: vmovdqa %xmm1, %xmm0
80 ; NOGATHER-LABEL: masked_gather_v2i32_concat:
81 ; NOGATHER: # %bb.0: # %entry
82 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
83 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
84 ; NOGATHER-NEXT: testb $1, %al
85 ; NOGATHER-NEXT: je .LBB1_2
86 ; NOGATHER-NEXT: # %bb.1: # %cond.load
87 ; NOGATHER-NEXT: vmovq %xmm2, %rax
88 ; NOGATHER-NEXT: movl (%rax), %eax
89 ; NOGATHER-NEXT: vpinsrq $0, %rax, %xmm1, %xmm1
90 ; NOGATHER-NEXT: .LBB1_2: # %else
91 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
92 ; NOGATHER-NEXT: testb $1, %al
93 ; NOGATHER-NEXT: je .LBB1_4
94 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
95 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
96 ; NOGATHER-NEXT: movl (%rax), %eax
97 ; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm1, %xmm1
98 ; NOGATHER-NEXT: .LBB1_4: # %else2
99 ; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
100 ; NOGATHER-NEXT: retq
102 %ld = load <2 x i32*>, <2 x i32*>* %ptr
103 %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ld, i32 0, <2 x i1> %masks, <2 x i32> %passthro)
104 %res2 = shufflevector <2 x i32> %res, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
108 declare <2 x float> @llvm.masked.gather.v2float(<2 x float*> %ptrs, i32 %align, <2 x i1> %masks, <2 x float> %passthro)
110 define <2 x float> @masked_gather_v2float(<2 x float*>* %ptr, <2 x i1> %masks, <2 x float> %passthro) {
111 ; X86-LABEL: masked_gather_v2float:
112 ; X86: # %bb.0: # %entry
113 ; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
114 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
115 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
116 ; X86-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
117 ; X86-NEXT: vgatherdps %xmm0, (,%xmm2), %xmm1
118 ; X86-NEXT: vmovaps %xmm1, %xmm0
121 ; X64-LABEL: masked_gather_v2float:
122 ; X64: # %bb.0: # %entry
123 ; X64-NEXT: vmovaps (%rdi), %xmm2
124 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
125 ; X64-NEXT: vpslld $31, %xmm0, %xmm0
126 ; X64-NEXT: vgatherqps %xmm0, (,%xmm2), %xmm1
127 ; X64-NEXT: vmovaps %xmm1, %xmm0
130 ; NOGATHER-LABEL: masked_gather_v2float:
131 ; NOGATHER: # %bb.0: # %entry
132 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
133 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
134 ; NOGATHER-NEXT: testb $1, %al
135 ; NOGATHER-NEXT: je .LBB2_2
136 ; NOGATHER-NEXT: # %bb.1: # %cond.load
137 ; NOGATHER-NEXT: vmovq %xmm2, %rax
138 ; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
139 ; NOGATHER-NEXT: vblendps {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3]
140 ; NOGATHER-NEXT: .LBB2_2: # %else
141 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
142 ; NOGATHER-NEXT: testb $1, %al
143 ; NOGATHER-NEXT: je .LBB2_4
144 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
145 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
146 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
147 ; NOGATHER-NEXT: .LBB2_4: # %else2
148 ; NOGATHER-NEXT: vmovaps %xmm1, %xmm0
149 ; NOGATHER-NEXT: retq
151 %ld = load <2 x float*>, <2 x float*>* %ptr
152 %res = call <2 x float> @llvm.masked.gather.v2float(<2 x float*> %ld, i32 0, <2 x i1> %masks, <2 x float> %passthro)
156 define <4 x float> @masked_gather_v2float_concat(<2 x float*>* %ptr, <2 x i1> %masks, <2 x float> %passthro) {
157 ; X86-LABEL: masked_gather_v2float_concat:
158 ; X86: # %bb.0: # %entry
159 ; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
160 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
161 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
162 ; X86-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
163 ; X86-NEXT: vgatherdps %xmm0, (,%xmm2), %xmm1
164 ; X86-NEXT: vmovaps %xmm1, %xmm0
167 ; X64-LABEL: masked_gather_v2float_concat:
168 ; X64: # %bb.0: # %entry
169 ; X64-NEXT: vmovaps (%rdi), %xmm2
170 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
171 ; X64-NEXT: vpslld $31, %xmm0, %xmm0
172 ; X64-NEXT: vgatherqps %xmm0, (,%xmm2), %xmm1
173 ; X64-NEXT: vmovaps %xmm1, %xmm0
176 ; NOGATHER-LABEL: masked_gather_v2float_concat:
177 ; NOGATHER: # %bb.0: # %entry
178 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
179 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
180 ; NOGATHER-NEXT: testb $1, %al
181 ; NOGATHER-NEXT: je .LBB3_2
182 ; NOGATHER-NEXT: # %bb.1: # %cond.load
183 ; NOGATHER-NEXT: vmovq %xmm2, %rax
184 ; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
185 ; NOGATHER-NEXT: vblendps {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3]
186 ; NOGATHER-NEXT: .LBB3_2: # %else
187 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
188 ; NOGATHER-NEXT: testb $1, %al
189 ; NOGATHER-NEXT: je .LBB3_4
190 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
191 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
192 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
193 ; NOGATHER-NEXT: .LBB3_4: # %else2
194 ; NOGATHER-NEXT: vmovaps %xmm1, %xmm0
195 ; NOGATHER-NEXT: retq
197 %ld = load <2 x float*>, <2 x float*>* %ptr
198 %res = call <2 x float> @llvm.masked.gather.v2float(<2 x float*> %ld, i32 0, <2 x i1> %masks, <2 x float> %passthro)
199 %res2 = shufflevector <2 x float> %res, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
200 ret <4 x float> %res2
204 declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 %align, <4 x i1> %masks, <4 x i32> %passthro)
206 define <4 x i32> @masked_gather_v4i32(<4 x i32*> %ptrs, <4 x i1> %masks, <4 x i32> %passthro) {
207 ; X86-LABEL: masked_gather_v4i32:
208 ; X86: # %bb.0: # %entry
209 ; X86-NEXT: vpslld $31, %xmm1, %xmm1
210 ; X86-NEXT: vpgatherdd %xmm1, (,%xmm0), %xmm2
211 ; X86-NEXT: vmovdqa %xmm2, %xmm0
214 ; X64-LABEL: masked_gather_v4i32:
215 ; X64: # %bb.0: # %entry
216 ; X64-NEXT: vpslld $31, %xmm1, %xmm1
217 ; X64-NEXT: vpgatherqd %xmm1, (,%ymm0), %xmm2
218 ; X64-NEXT: vmovdqa %xmm2, %xmm0
219 ; X64-NEXT: vzeroupper
222 ; NOGATHER-LABEL: masked_gather_v4i32:
223 ; NOGATHER: # %bb.0: # %entry
224 ; NOGATHER-NEXT: vpextrb $0, %xmm1, %eax
225 ; NOGATHER-NEXT: testb $1, %al
226 ; NOGATHER-NEXT: je .LBB4_2
227 ; NOGATHER-NEXT: # %bb.1: # %cond.load
228 ; NOGATHER-NEXT: vmovq %xmm0, %rax
229 ; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm2, %xmm2
230 ; NOGATHER-NEXT: .LBB4_2: # %else
231 ; NOGATHER-NEXT: vpextrb $4, %xmm1, %eax
232 ; NOGATHER-NEXT: testb $1, %al
233 ; NOGATHER-NEXT: je .LBB4_4
234 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
235 ; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
236 ; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm2, %xmm2
237 ; NOGATHER-NEXT: .LBB4_4: # %else2
238 ; NOGATHER-NEXT: vpextrb $8, %xmm1, %eax
239 ; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0
240 ; NOGATHER-NEXT: testb $1, %al
241 ; NOGATHER-NEXT: je .LBB4_6
242 ; NOGATHER-NEXT: # %bb.5: # %cond.load4
243 ; NOGATHER-NEXT: vmovq %xmm0, %rax
244 ; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm2, %xmm2
245 ; NOGATHER-NEXT: .LBB4_6: # %else5
246 ; NOGATHER-NEXT: vpextrb $12, %xmm1, %eax
247 ; NOGATHER-NEXT: testb $1, %al
248 ; NOGATHER-NEXT: je .LBB4_8
249 ; NOGATHER-NEXT: # %bb.7: # %cond.load7
250 ; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
251 ; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm2, %xmm2
252 ; NOGATHER-NEXT: .LBB4_8: # %else8
253 ; NOGATHER-NEXT: vmovdqa %xmm2, %xmm0
254 ; NOGATHER-NEXT: vzeroupper
255 ; NOGATHER-NEXT: retq
257 %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 0, <4 x i1> %masks, <4 x i32> %passthro)
261 declare <4 x float> @llvm.masked.gather.v4float(<4 x float*> %ptrs, i32 %align, <4 x i1> %masks, <4 x float> %passthro)
263 define <4 x float> @masked_gather_v4float(<4 x float*> %ptrs, <4 x i1> %masks, <4 x float> %passthro) {
264 ; X86-LABEL: masked_gather_v4float:
265 ; X86: # %bb.0: # %entry
266 ; X86-NEXT: vpslld $31, %xmm1, %xmm1
267 ; X86-NEXT: vgatherdps %xmm1, (,%xmm0), %xmm2
268 ; X86-NEXT: vmovaps %xmm2, %xmm0
271 ; X64-LABEL: masked_gather_v4float:
272 ; X64: # %bb.0: # %entry
273 ; X64-NEXT: vpslld $31, %xmm1, %xmm1
274 ; X64-NEXT: vgatherqps %xmm1, (,%ymm0), %xmm2
275 ; X64-NEXT: vmovaps %xmm2, %xmm0
276 ; X64-NEXT: vzeroupper
279 ; NOGATHER-LABEL: masked_gather_v4float:
280 ; NOGATHER: # %bb.0: # %entry
281 ; NOGATHER-NEXT: vpextrb $0, %xmm1, %eax
282 ; NOGATHER-NEXT: testb $1, %al
283 ; NOGATHER-NEXT: je .LBB5_2
284 ; NOGATHER-NEXT: # %bb.1: # %cond.load
285 ; NOGATHER-NEXT: vmovq %xmm0, %rax
286 ; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
287 ; NOGATHER-NEXT: vblendps {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3]
288 ; NOGATHER-NEXT: .LBB5_2: # %else
289 ; NOGATHER-NEXT: vpextrb $4, %xmm1, %eax
290 ; NOGATHER-NEXT: testb $1, %al
291 ; NOGATHER-NEXT: je .LBB5_4
292 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
293 ; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
294 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
295 ; NOGATHER-NEXT: .LBB5_4: # %else2
296 ; NOGATHER-NEXT: vpextrb $8, %xmm1, %eax
297 ; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0
298 ; NOGATHER-NEXT: testb $1, %al
299 ; NOGATHER-NEXT: je .LBB5_6
300 ; NOGATHER-NEXT: # %bb.5: # %cond.load4
301 ; NOGATHER-NEXT: vmovq %xmm0, %rax
302 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
303 ; NOGATHER-NEXT: .LBB5_6: # %else5
304 ; NOGATHER-NEXT: vpextrb $12, %xmm1, %eax
305 ; NOGATHER-NEXT: testb $1, %al
306 ; NOGATHER-NEXT: je .LBB5_8
307 ; NOGATHER-NEXT: # %bb.7: # %cond.load7
308 ; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
309 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
310 ; NOGATHER-NEXT: .LBB5_8: # %else8
311 ; NOGATHER-NEXT: vmovaps %xmm2, %xmm0
312 ; NOGATHER-NEXT: vzeroupper
313 ; NOGATHER-NEXT: retq
315 %res = call <4 x float> @llvm.masked.gather.v4float(<4 x float*> %ptrs, i32 0, <4 x i1> %masks, <4 x float> %passthro)
319 declare <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptrs, i32 %align, <8 x i1> %masks, <8 x i32> %passthro)
321 define <8 x i32> @masked_gather_v8i32(<8 x i32*>* %ptr, <8 x i1> %masks, <8 x i32> %passthro) {
322 ; X86-LABEL: masked_gather_v8i32:
323 ; X86: # %bb.0: # %entry
324 ; X86-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
325 ; X86-NEXT: vpslld $31, %ymm0, %ymm0
326 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
327 ; X86-NEXT: vmovdqa (%eax), %ymm2
328 ; X86-NEXT: vpgatherdd %ymm0, (,%ymm2), %ymm1
329 ; X86-NEXT: vmovdqa %ymm1, %ymm0
332 ; X64-LABEL: masked_gather_v8i32:
333 ; X64: # %bb.0: # %entry
334 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
335 ; X64-NEXT: vpslld $31, %ymm0, %ymm0
336 ; X64-NEXT: vpsrad $31, %ymm0, %ymm0
337 ; X64-NEXT: vmovdqa (%rdi), %ymm2
338 ; X64-NEXT: vmovdqa 32(%rdi), %ymm3
339 ; X64-NEXT: vextracti128 $1, %ymm1, %xmm4
340 ; X64-NEXT: vextracti128 $1, %ymm0, %xmm5
341 ; X64-NEXT: vpgatherqd %xmm5, (,%ymm3), %xmm4
342 ; X64-NEXT: vpgatherqd %xmm0, (,%ymm2), %xmm1
343 ; X64-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm0
346 ; NOGATHER-LABEL: masked_gather_v8i32:
347 ; NOGATHER: # %bb.0: # %entry
348 ; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3
349 ; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm2
350 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
351 ; NOGATHER-NEXT: testb $1, %al
352 ; NOGATHER-NEXT: je .LBB6_2
353 ; NOGATHER-NEXT: # %bb.1: # %cond.load
354 ; NOGATHER-NEXT: vmovq %xmm3, %rax
355 ; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm1, %xmm4
356 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
357 ; NOGATHER-NEXT: .LBB6_2: # %else
358 ; NOGATHER-NEXT: vpextrb $2, %xmm0, %eax
359 ; NOGATHER-NEXT: testb $1, %al
360 ; NOGATHER-NEXT: je .LBB6_4
361 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
362 ; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
363 ; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm1, %xmm4
364 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
365 ; NOGATHER-NEXT: .LBB6_4: # %else2
366 ; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
367 ; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
368 ; NOGATHER-NEXT: testb $1, %al
369 ; NOGATHER-NEXT: je .LBB6_6
370 ; NOGATHER-NEXT: # %bb.5: # %cond.load4
371 ; NOGATHER-NEXT: vmovq %xmm3, %rax
372 ; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm1, %xmm4
373 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
374 ; NOGATHER-NEXT: .LBB6_6: # %else5
375 ; NOGATHER-NEXT: vpextrb $6, %xmm0, %eax
376 ; NOGATHER-NEXT: testb $1, %al
377 ; NOGATHER-NEXT: je .LBB6_8
378 ; NOGATHER-NEXT: # %bb.7: # %cond.load7
379 ; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
380 ; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm1, %xmm3
381 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
382 ; NOGATHER-NEXT: .LBB6_8: # %else8
383 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
384 ; NOGATHER-NEXT: testb $1, %al
385 ; NOGATHER-NEXT: je .LBB6_10
386 ; NOGATHER-NEXT: # %bb.9: # %cond.load10
387 ; NOGATHER-NEXT: vmovq %xmm2, %rax
388 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
389 ; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm3, %xmm3
390 ; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
391 ; NOGATHER-NEXT: .LBB6_10: # %else11
392 ; NOGATHER-NEXT: vpextrb $10, %xmm0, %eax
393 ; NOGATHER-NEXT: testb $1, %al
394 ; NOGATHER-NEXT: je .LBB6_12
395 ; NOGATHER-NEXT: # %bb.11: # %cond.load13
396 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
397 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
398 ; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm3, %xmm3
399 ; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
400 ; NOGATHER-NEXT: .LBB6_12: # %else14
401 ; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
402 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm2
403 ; NOGATHER-NEXT: testb $1, %al
404 ; NOGATHER-NEXT: je .LBB6_14
405 ; NOGATHER-NEXT: # %bb.13: # %cond.load16
406 ; NOGATHER-NEXT: vmovq %xmm2, %rax
407 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
408 ; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm3, %xmm3
409 ; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
410 ; NOGATHER-NEXT: .LBB6_14: # %else17
411 ; NOGATHER-NEXT: vpextrb $14, %xmm0, %eax
412 ; NOGATHER-NEXT: testb $1, %al
413 ; NOGATHER-NEXT: je .LBB6_16
414 ; NOGATHER-NEXT: # %bb.15: # %cond.load19
415 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
416 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
417 ; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm0, %xmm0
418 ; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
419 ; NOGATHER-NEXT: .LBB6_16: # %else20
420 ; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
421 ; NOGATHER-NEXT: retq
423 %ld = load <8 x i32*>, <8 x i32*>* %ptr
424 %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ld, i32 0, <8 x i1> %masks, <8 x i32> %passthro)
428 declare <8 x float> @llvm.masked.gather.v8float(<8 x float*> %ptrs, i32 %align, <8 x i1> %masks, <8 x float> %passthro)
430 define <8 x float> @masked_gather_v8float(<8 x float*>* %ptr, <8 x i1> %masks, <8 x float> %passthro) {
431 ; X86-LABEL: masked_gather_v8float:
432 ; X86: # %bb.0: # %entry
433 ; X86-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
434 ; X86-NEXT: vpslld $31, %ymm0, %ymm0
435 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
436 ; X86-NEXT: vmovaps (%eax), %ymm2
437 ; X86-NEXT: vgatherdps %ymm0, (,%ymm2), %ymm1
438 ; X86-NEXT: vmovaps %ymm1, %ymm0
441 ; X64-LABEL: masked_gather_v8float:
442 ; X64: # %bb.0: # %entry
443 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
444 ; X64-NEXT: vpslld $31, %ymm0, %ymm0
445 ; X64-NEXT: vpsrad $31, %ymm0, %ymm0
446 ; X64-NEXT: vmovaps (%rdi), %ymm2
447 ; X64-NEXT: vmovaps 32(%rdi), %ymm3
448 ; X64-NEXT: vextractf128 $1, %ymm1, %xmm4
449 ; X64-NEXT: vextracti128 $1, %ymm0, %xmm5
450 ; X64-NEXT: vgatherqps %xmm5, (,%ymm3), %xmm4
451 ; X64-NEXT: vgatherqps %xmm0, (,%ymm2), %xmm1
452 ; X64-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm0
455 ; NOGATHER-LABEL: masked_gather_v8float:
456 ; NOGATHER: # %bb.0: # %entry
457 ; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3
458 ; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm2
459 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
460 ; NOGATHER-NEXT: testb $1, %al
461 ; NOGATHER-NEXT: je .LBB7_2
462 ; NOGATHER-NEXT: # %bb.1: # %cond.load
463 ; NOGATHER-NEXT: vmovq %xmm3, %rax
464 ; NOGATHER-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
465 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0],ymm1[1,2,3,4,5,6,7]
466 ; NOGATHER-NEXT: .LBB7_2: # %else
467 ; NOGATHER-NEXT: vpextrb $2, %xmm0, %eax
468 ; NOGATHER-NEXT: testb $1, %al
469 ; NOGATHER-NEXT: je .LBB7_4
470 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
471 ; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
472 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm1[0],mem[0],xmm1[2,3]
473 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
474 ; NOGATHER-NEXT: .LBB7_4: # %else2
475 ; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
476 ; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
477 ; NOGATHER-NEXT: testb $1, %al
478 ; NOGATHER-NEXT: je .LBB7_6
479 ; NOGATHER-NEXT: # %bb.5: # %cond.load4
480 ; NOGATHER-NEXT: vmovq %xmm3, %rax
481 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm1[0,1],mem[0],xmm1[3]
482 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
483 ; NOGATHER-NEXT: .LBB7_6: # %else5
484 ; NOGATHER-NEXT: vpextrb $6, %xmm0, %eax
485 ; NOGATHER-NEXT: testb $1, %al
486 ; NOGATHER-NEXT: je .LBB7_8
487 ; NOGATHER-NEXT: # %bb.7: # %cond.load7
488 ; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
489 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm1[0,1,2],mem[0]
490 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
491 ; NOGATHER-NEXT: .LBB7_8: # %else8
492 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
493 ; NOGATHER-NEXT: testb $1, %al
494 ; NOGATHER-NEXT: je .LBB7_10
495 ; NOGATHER-NEXT: # %bb.9: # %cond.load10
496 ; NOGATHER-NEXT: vmovq %xmm2, %rax
497 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
498 ; NOGATHER-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
499 ; NOGATHER-NEXT: vblendps {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3]
500 ; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
501 ; NOGATHER-NEXT: .LBB7_10: # %else11
502 ; NOGATHER-NEXT: vpextrb $10, %xmm0, %eax
503 ; NOGATHER-NEXT: testb $1, %al
504 ; NOGATHER-NEXT: je .LBB7_12
505 ; NOGATHER-NEXT: # %bb.11: # %cond.load13
506 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
507 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
508 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3]
509 ; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
510 ; NOGATHER-NEXT: .LBB7_12: # %else14
511 ; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
512 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm2
513 ; NOGATHER-NEXT: testb $1, %al
514 ; NOGATHER-NEXT: je .LBB7_14
515 ; NOGATHER-NEXT: # %bb.13: # %cond.load16
516 ; NOGATHER-NEXT: vmovq %xmm2, %rax
517 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
518 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],mem[0],xmm3[3]
519 ; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
520 ; NOGATHER-NEXT: .LBB7_14: # %else17
521 ; NOGATHER-NEXT: vpextrb $14, %xmm0, %eax
522 ; NOGATHER-NEXT: testb $1, %al
523 ; NOGATHER-NEXT: je .LBB7_16
524 ; NOGATHER-NEXT: # %bb.15: # %cond.load19
525 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
526 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
527 ; NOGATHER-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
528 ; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
529 ; NOGATHER-NEXT: .LBB7_16: # %else20
530 ; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
531 ; NOGATHER-NEXT: retq
533 %ld = load <8 x float*>, <8 x float*>* %ptr
534 %res = call <8 x float> @llvm.masked.gather.v8float(<8 x float*> %ld, i32 0, <8 x i1> %masks, <8 x float> %passthro)
538 declare <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %ptrs, i32 %align, <4 x i1> %masks, <4 x i64> %passthro)
540 define <4 x i64> @masked_gather_v4i64(<4 x i64*>* %ptr, <4 x i1> %masks, <4 x i64> %passthro) {
541 ; X86-LABEL: masked_gather_v4i64:
542 ; X86: # %bb.0: # %entry
543 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
544 ; X86-NEXT: vpmovsxdq %xmm0, %ymm0
545 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
546 ; X86-NEXT: vmovdqa (%eax), %xmm2
547 ; X86-NEXT: vpgatherdq %ymm0, (,%xmm2), %ymm1
548 ; X86-NEXT: vmovdqa %ymm1, %ymm0
551 ; X64-LABEL: masked_gather_v4i64:
552 ; X64: # %bb.0: # %entry
553 ; X64-NEXT: vpslld $31, %xmm0, %xmm0
554 ; X64-NEXT: vpmovsxdq %xmm0, %ymm0
555 ; X64-NEXT: vmovdqa (%rdi), %ymm2
556 ; X64-NEXT: vpgatherqq %ymm0, (,%ymm2), %ymm1
557 ; X64-NEXT: vmovdqa %ymm1, %ymm0
560 ; NOGATHER-LABEL: masked_gather_v4i64:
561 ; NOGATHER: # %bb.0: # %entry
562 ; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2
563 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
564 ; NOGATHER-NEXT: testb $1, %al
565 ; NOGATHER-NEXT: je .LBB8_2
566 ; NOGATHER-NEXT: # %bb.1: # %cond.load
567 ; NOGATHER-NEXT: vmovq %xmm2, %rax
568 ; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm1, %xmm3
569 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
570 ; NOGATHER-NEXT: .LBB8_2: # %else
571 ; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
572 ; NOGATHER-NEXT: testb $1, %al
573 ; NOGATHER-NEXT: je .LBB8_4
574 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
575 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
576 ; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm1, %xmm3
577 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
578 ; NOGATHER-NEXT: .LBB8_4: # %else2
579 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
580 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm2
581 ; NOGATHER-NEXT: testb $1, %al
582 ; NOGATHER-NEXT: je .LBB8_6
583 ; NOGATHER-NEXT: # %bb.5: # %cond.load4
584 ; NOGATHER-NEXT: vmovq %xmm2, %rax
585 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
586 ; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm3, %xmm3
587 ; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
588 ; NOGATHER-NEXT: .LBB8_6: # %else5
589 ; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
590 ; NOGATHER-NEXT: testb $1, %al
591 ; NOGATHER-NEXT: je .LBB8_8
592 ; NOGATHER-NEXT: # %bb.7: # %cond.load7
593 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
594 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
595 ; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm0, %xmm0
596 ; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
597 ; NOGATHER-NEXT: .LBB8_8: # %else8
598 ; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
599 ; NOGATHER-NEXT: retq
601 %ld = load <4 x i64*>, <4 x i64*>* %ptr
602 %res = call <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %ld, i32 0, <4 x i1> %masks, <4 x i64> %passthro)
606 declare <4 x double> @llvm.masked.gather.v4double(<4 x double*> %ptrs, i32 %align, <4 x i1> %masks, <4 x double> %passthro)
608 define <4 x double> @masked_gather_v4double(<4 x double*>* %ptr, <4 x i1> %masks, <4 x double> %passthro) {
609 ; X86-LABEL: masked_gather_v4double:
610 ; X86: # %bb.0: # %entry
611 ; X86-NEXT: vpslld $31, %xmm0, %xmm0
612 ; X86-NEXT: vpmovsxdq %xmm0, %ymm0
613 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
614 ; X86-NEXT: vmovapd (%eax), %xmm2
615 ; X86-NEXT: vgatherdpd %ymm0, (,%xmm2), %ymm1
616 ; X86-NEXT: vmovapd %ymm1, %ymm0
619 ; X64-LABEL: masked_gather_v4double:
620 ; X64: # %bb.0: # %entry
621 ; X64-NEXT: vpslld $31, %xmm0, %xmm0
622 ; X64-NEXT: vpmovsxdq %xmm0, %ymm0
623 ; X64-NEXT: vmovapd (%rdi), %ymm2
624 ; X64-NEXT: vgatherqpd %ymm0, (,%ymm2), %ymm1
625 ; X64-NEXT: vmovapd %ymm1, %ymm0
628 ; NOGATHER-LABEL: masked_gather_v4double:
629 ; NOGATHER: # %bb.0: # %entry
630 ; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2
631 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
632 ; NOGATHER-NEXT: testb $1, %al
633 ; NOGATHER-NEXT: je .LBB9_2
634 ; NOGATHER-NEXT: # %bb.1: # %cond.load
635 ; NOGATHER-NEXT: vmovq %xmm2, %rax
636 ; NOGATHER-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
637 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1],ymm1[2,3,4,5,6,7]
638 ; NOGATHER-NEXT: .LBB9_2: # %else
639 ; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
640 ; NOGATHER-NEXT: testb $1, %al
641 ; NOGATHER-NEXT: je .LBB9_4
642 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
643 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
644 ; NOGATHER-NEXT: vmovhps {{.*#+}} xmm3 = xmm1[0,1],mem[0,1]
645 ; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
646 ; NOGATHER-NEXT: .LBB9_4: # %else2
647 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
648 ; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm2
649 ; NOGATHER-NEXT: testb $1, %al
650 ; NOGATHER-NEXT: je .LBB9_6
651 ; NOGATHER-NEXT: # %bb.5: # %cond.load4
652 ; NOGATHER-NEXT: vmovq %xmm2, %rax
653 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
654 ; NOGATHER-NEXT: vmovlps {{.*#+}} xmm3 = mem[0,1],xmm3[2,3]
655 ; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
656 ; NOGATHER-NEXT: .LBB9_6: # %else5
657 ; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
658 ; NOGATHER-NEXT: testb $1, %al
659 ; NOGATHER-NEXT: je .LBB9_8
660 ; NOGATHER-NEXT: # %bb.7: # %cond.load7
661 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
662 ; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
663 ; NOGATHER-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
664 ; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
665 ; NOGATHER-NEXT: .LBB9_8: # %else8
666 ; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
667 ; NOGATHER-NEXT: retq
669 %ld = load <4 x double*>, <4 x double*>* %ptr
670 %res = call <4 x double> @llvm.masked.gather.v4double(<4 x double*> %ld, i32 0, <4 x i1> %masks, <4 x double> %passthro)
671 ret <4 x double> %res
674 declare <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %ptrs, i32 %align, <2 x i1> %masks, <2 x i64> %passthro)
676 define <2 x i64> @masked_gather_v2i64(<2 x i64*>* %ptr, <2 x i1> %masks, <2 x i64> %passthro) {
677 ; X86-LABEL: masked_gather_v2i64:
678 ; X86: # %bb.0: # %entry
679 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
680 ; X86-NEXT: vpmovsxdq (%eax), %xmm2
681 ; X86-NEXT: vpsllq $63, %xmm0, %xmm0
682 ; X86-NEXT: vpgatherqq %xmm0, (,%xmm2), %xmm1
683 ; X86-NEXT: vmovdqa %xmm1, %xmm0
686 ; X64-LABEL: masked_gather_v2i64:
687 ; X64: # %bb.0: # %entry
688 ; X64-NEXT: vpsllq $63, %xmm0, %xmm0
689 ; X64-NEXT: vmovdqa (%rdi), %xmm2
690 ; X64-NEXT: vpgatherqq %xmm0, (,%xmm2), %xmm1
691 ; X64-NEXT: vmovdqa %xmm1, %xmm0
694 ; NOGATHER-LABEL: masked_gather_v2i64:
695 ; NOGATHER: # %bb.0: # %entry
696 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
697 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
698 ; NOGATHER-NEXT: testb $1, %al
699 ; NOGATHER-NEXT: je .LBB10_2
700 ; NOGATHER-NEXT: # %bb.1: # %cond.load
701 ; NOGATHER-NEXT: vmovq %xmm2, %rax
702 ; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm1, %xmm1
703 ; NOGATHER-NEXT: .LBB10_2: # %else
704 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
705 ; NOGATHER-NEXT: testb $1, %al
706 ; NOGATHER-NEXT: je .LBB10_4
707 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
708 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
709 ; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm1, %xmm1
710 ; NOGATHER-NEXT: .LBB10_4: # %else2
711 ; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0
712 ; NOGATHER-NEXT: retq
714 %ld = load <2 x i64*>, <2 x i64*>* %ptr
715 %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %ld, i32 0, <2 x i1> %masks, <2 x i64> %passthro)
719 declare <2 x double> @llvm.masked.gather.v2double(<2 x double*> %ptrs, i32 %align, <2 x i1> %masks, <2 x double> %passthro)
721 define <2 x double> @masked_gather_v2double(<2 x double*>* %ptr, <2 x i1> %masks, <2 x double> %passthro) {
722 ; X86-LABEL: masked_gather_v2double:
723 ; X86: # %bb.0: # %entry
724 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
725 ; X86-NEXT: vpmovsxdq (%eax), %xmm2
726 ; X86-NEXT: vpsllq $63, %xmm0, %xmm0
727 ; X86-NEXT: vgatherqpd %xmm0, (,%xmm2), %xmm1
728 ; X86-NEXT: vmovapd %xmm1, %xmm0
731 ; X64-LABEL: masked_gather_v2double:
732 ; X64: # %bb.0: # %entry
733 ; X64-NEXT: vpsllq $63, %xmm0, %xmm0
734 ; X64-NEXT: vmovapd (%rdi), %xmm2
735 ; X64-NEXT: vgatherqpd %xmm0, (,%xmm2), %xmm1
736 ; X64-NEXT: vmovapd %xmm1, %xmm0
739 ; NOGATHER-LABEL: masked_gather_v2double:
740 ; NOGATHER: # %bb.0: # %entry
741 ; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
742 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
743 ; NOGATHER-NEXT: testb $1, %al
744 ; NOGATHER-NEXT: je .LBB11_2
745 ; NOGATHER-NEXT: # %bb.1: # %cond.load
746 ; NOGATHER-NEXT: vmovq %xmm2, %rax
747 ; NOGATHER-NEXT: vmovlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
748 ; NOGATHER-NEXT: .LBB11_2: # %else
749 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
750 ; NOGATHER-NEXT: testb $1, %al
751 ; NOGATHER-NEXT: je .LBB11_4
752 ; NOGATHER-NEXT: # %bb.3: # %cond.load1
753 ; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
754 ; NOGATHER-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
755 ; NOGATHER-NEXT: .LBB11_4: # %else2
756 ; NOGATHER-NEXT: vmovaps %xmm1, %xmm0
757 ; NOGATHER-NEXT: retq
759 %ld = load <2 x double*>, <2 x double*>* %ptr
760 %res = call <2 x double> @llvm.masked.gather.v2double(<2 x double*> %ld, i32 0, <2 x i1> %masks, <2 x double> %passthro)
761 ret <2 x double> %res
765 define <2 x double> @masked_gather_zeromask(<2 x double*>* %ptr, <2 x double> %dummy, <2 x double> %passthru) {
766 ; X86-LABEL: masked_gather_zeromask:
767 ; X86: # %bb.0: # %entry
768 ; X86-NEXT: vmovaps %xmm1, %xmm0
771 ; X64-LABEL: masked_gather_zeromask:
772 ; X64: # %bb.0: # %entry
773 ; X64-NEXT: vmovaps %xmm1, %xmm0
776 ; NOGATHER-LABEL: masked_gather_zeromask:
777 ; NOGATHER: # %bb.0: # %entry
778 ; NOGATHER-NEXT: vmovaps %xmm1, %xmm0
779 ; NOGATHER-NEXT: retq
781 %ld = load <2 x double*>, <2 x double*>* %ptr
782 %res = call <2 x double> @llvm.masked.gather.v2double(<2 x double*> %ld, i32 0, <2 x i1> zeroinitializer, <2 x double> %passthru)
783 ret <2 x double> %res