1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq < %s | FileCheck %s --check-prefix=X86-64
3 ; RUN: llc -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq < %s | FileCheck %s --check-prefix=X86-32
5 define void @test_fcmp_storefloat(i1 %cond, float* %fptr, float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) {
6 ; X86-64-LABEL: test_fcmp_storefloat:
7 ; X86-64: # %bb.0: # %entry
8 ; X86-64-NEXT: testb $1, %dil
9 ; X86-64-NEXT: je .LBB0_2
10 ; X86-64-NEXT: # %bb.1: # %if
11 ; X86-64-NEXT: vcmpeqss %xmm3, %xmm2, %k1
12 ; X86-64-NEXT: jmp .LBB0_3
13 ; X86-64-NEXT: .LBB0_2: # %else
14 ; X86-64-NEXT: vcmpeqss %xmm5, %xmm4, %k1
15 ; X86-64-NEXT: .LBB0_3: # %exit
16 ; X86-64-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
17 ; X86-64-NEXT: vmovss %xmm1, (%rsi)
20 ; X86-32-LABEL: test_fcmp_storefloat:
21 ; X86-32: # %bb.0: # %entry
22 ; X86-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
23 ; X86-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
24 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax
25 ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp)
26 ; X86-32-NEXT: je .LBB0_2
27 ; X86-32-NEXT: # %bb.1: # %if
28 ; X86-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
29 ; X86-32-NEXT: vcmpeqss {{[0-9]+}}(%esp), %xmm2, %k1
30 ; X86-32-NEXT: jmp .LBB0_3
31 ; X86-32-NEXT: .LBB0_2: # %else
32 ; X86-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
33 ; X86-32-NEXT: vcmpeqss {{[0-9]+}}(%esp), %xmm2, %k1
34 ; X86-32-NEXT: .LBB0_3: # %exit
35 ; X86-32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
36 ; X86-32-NEXT: vmovss %xmm0, (%eax)
39 br i1 %cond, label %if, label %else
42 %cmp1 = fcmp oeq float %f3, %f4
46 %cmp2 = fcmp oeq float %f5, %f6
50 %val = phi i1 [%cmp1, %if], [%cmp2, %else]
51 %selected = select i1 %val, float %f1, float %f2
52 store float %selected, float* %fptr
56 define void @test_fcmp_storei1(i1 %cond, float* %fptr, i1* %iptr, float %f1, float %f2, float %f3, float %f4) {
57 ; X86-64-LABEL: test_fcmp_storei1:
58 ; X86-64: # %bb.0: # %entry
59 ; X86-64-NEXT: testb $1, %dil
60 ; X86-64-NEXT: je .LBB1_2
61 ; X86-64-NEXT: # %bb.1: # %if
62 ; X86-64-NEXT: vcmpeqss %xmm1, %xmm0, %k0
63 ; X86-64-NEXT: kmovb %k0, (%rdx)
65 ; X86-64-NEXT: .LBB1_2: # %else
66 ; X86-64-NEXT: vcmpeqss %xmm3, %xmm2, %k0
67 ; X86-64-NEXT: kmovb %k0, (%rdx)
70 ; X86-32-LABEL: test_fcmp_storei1:
71 ; X86-32: # %bb.0: # %entry
72 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax
73 ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp)
74 ; X86-32-NEXT: je .LBB1_2
75 ; X86-32-NEXT: # %bb.1: # %if
76 ; X86-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
77 ; X86-32-NEXT: vcmpeqss {{[0-9]+}}(%esp), %xmm0, %k0
78 ; X86-32-NEXT: kmovb %k0, (%eax)
80 ; X86-32-NEXT: .LBB1_2: # %else
81 ; X86-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
82 ; X86-32-NEXT: vcmpeqss {{[0-9]+}}(%esp), %xmm0, %k0
83 ; X86-32-NEXT: kmovb %k0, (%eax)
86 br i1 %cond, label %if, label %else
89 %cmp1 = fcmp oeq float %f1, %f2
93 %cmp2 = fcmp oeq float %f3, %f4
97 %val = phi i1 [%cmp1, %if], [%cmp2, %else]
98 store i1 %val, i1* %iptr
102 define void @test_load_add(i1 %cond, float* %fptr, i1* %iptr1, i1* %iptr2, float %f1, float %f2) {
103 ; X86-64-LABEL: test_load_add:
104 ; X86-64: # %bb.0: # %entry
105 ; X86-64-NEXT: testb $1, %dil
106 ; X86-64-NEXT: je .LBB2_2
107 ; X86-64-NEXT: # %bb.1: # %if
108 ; X86-64-NEXT: movb (%rdx), %al
109 ; X86-64-NEXT: addb (%rcx), %al
110 ; X86-64-NEXT: jmp .LBB2_3
111 ; X86-64-NEXT: .LBB2_2: # %else
112 ; X86-64-NEXT: movb (%rcx), %al
113 ; X86-64-NEXT: .LBB2_3: # %exit
114 ; X86-64-NEXT: kmovd %eax, %k1
115 ; X86-64-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
116 ; X86-64-NEXT: vmovss %xmm1, (%rsi)
119 ; X86-32-LABEL: test_load_add:
120 ; X86-32: # %bb.0: # %entry
121 ; X86-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
122 ; X86-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
123 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
124 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax
125 ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp)
126 ; X86-32-NEXT: je .LBB2_2
127 ; X86-32-NEXT: # %bb.1: # %if
128 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %edx
129 ; X86-32-NEXT: movb (%edx), %dl
130 ; X86-32-NEXT: addb (%ecx), %dl
131 ; X86-32-NEXT: jmp .LBB2_3
132 ; X86-32-NEXT: .LBB2_2: # %else
133 ; X86-32-NEXT: movb (%ecx), %dl
134 ; X86-32-NEXT: .LBB2_3: # %exit
135 ; X86-32-NEXT: kmovd %edx, %k1
136 ; X86-32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
137 ; X86-32-NEXT: vmovss %xmm0, (%eax)
140 br i1 %cond, label %if, label %else
143 %loaded1 = load i1, i1* %iptr1
144 %loaded2if = load i1, i1* %iptr2
145 %added = add i1 %loaded1, %loaded2if
149 %loaded2else = load i1, i1* %iptr2
153 %val = phi i1 [%added, %if], [%loaded2else, %else]
154 %selected = select i1 %val, float %f1, float %f2
155 store float %selected, float* %fptr
159 define void @test_load_i1(i1 %cond, float* %fptr, i1* %iptr1, i1* %iptr2, float %f1, float %f2) {
160 ; X86-64-LABEL: test_load_i1:
161 ; X86-64: # %bb.0: # %entry
162 ; X86-64-NEXT: testb $1, %dil
163 ; X86-64-NEXT: je .LBB3_2
164 ; X86-64-NEXT: # %bb.1: # %if
165 ; X86-64-NEXT: kmovb (%rdx), %k1
166 ; X86-64-NEXT: jmp .LBB3_3
167 ; X86-64-NEXT: .LBB3_2: # %else
168 ; X86-64-NEXT: kmovb (%rcx), %k1
169 ; X86-64-NEXT: .LBB3_3: # %exit
170 ; X86-64-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
171 ; X86-64-NEXT: vmovss %xmm1, (%rsi)
174 ; X86-32-LABEL: test_load_i1:
175 ; X86-32: # %bb.0: # %entry
176 ; X86-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
177 ; X86-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
178 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax
179 ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp)
180 ; X86-32-NEXT: je .LBB3_2
181 ; X86-32-NEXT: # %bb.1: # %if
182 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
183 ; X86-32-NEXT: jmp .LBB3_3
184 ; X86-32-NEXT: .LBB3_2: # %else
185 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
186 ; X86-32-NEXT: .LBB3_3: # %exit
187 ; X86-32-NEXT: kmovb (%ecx), %k1
188 ; X86-32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
189 ; X86-32-NEXT: vmovss %xmm0, (%eax)
192 br i1 %cond, label %if, label %else
195 %loaded1 = load i1, i1* %iptr1
199 %loaded2 = load i1, i1* %iptr2
203 %val = phi i1 [%loaded1, %if], [%loaded2, %else]
204 %selected = select i1 %val, float %f1, float %f2
205 store float %selected, float* %fptr
209 define void @test_loadi1_storei1(i1 %cond, i1* %iptr1, i1* %iptr2, i1* %iptr3) {
210 ; X86-64-LABEL: test_loadi1_storei1:
211 ; X86-64: # %bb.0: # %entry
212 ; X86-64-NEXT: testb $1, %dil
213 ; X86-64-NEXT: je .LBB4_2
214 ; X86-64-NEXT: # %bb.1: # %if
215 ; X86-64-NEXT: movb (%rsi), %al
216 ; X86-64-NEXT: jmp .LBB4_3
217 ; X86-64-NEXT: .LBB4_2: # %else
218 ; X86-64-NEXT: movb (%rdx), %al
219 ; X86-64-NEXT: .LBB4_3: # %exit
220 ; X86-64-NEXT: andb $1, %al
221 ; X86-64-NEXT: movb %al, (%rcx)
224 ; X86-32-LABEL: test_loadi1_storei1:
225 ; X86-32: # %bb.0: # %entry
226 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax
227 ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp)
228 ; X86-32-NEXT: je .LBB4_2
229 ; X86-32-NEXT: # %bb.1: # %if
230 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
231 ; X86-32-NEXT: jmp .LBB4_3
232 ; X86-32-NEXT: .LBB4_2: # %else
233 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
234 ; X86-32-NEXT: .LBB4_3: # %exit
235 ; X86-32-NEXT: movb (%ecx), %cl
236 ; X86-32-NEXT: andb $1, %cl
237 ; X86-32-NEXT: movb %cl, (%eax)
240 br i1 %cond, label %if, label %else
243 %loaded1 = load i1, i1* %iptr1
247 %loaded2 = load i1, i1* %iptr2
251 %val = phi i1 [%loaded1, %if], [%loaded2, %else]
252 store i1 %val, i1* %iptr3
256 define void @test_shl1(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) {
257 ; X86-64-LABEL: test_shl1:
258 ; X86-64: # %bb.0: # %entry
259 ; X86-64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
260 ; X86-64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
261 ; X86-64-NEXT: testb $1, %dil
262 ; X86-64-NEXT: je .LBB5_2
263 ; X86-64-NEXT: # %bb.1: # %if
264 ; X86-64-NEXT: kmovb (%rsi), %k0
265 ; X86-64-NEXT: kaddb %k0, %k0, %k1
266 ; X86-64-NEXT: jmp .LBB5_3
267 ; X86-64-NEXT: .LBB5_2: # %else
268 ; X86-64-NEXT: kmovb (%rdx), %k1
269 ; X86-64-NEXT: .LBB5_3: # %exit
270 ; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1}
271 ; X86-64-NEXT: vmovaps %ymm1, (%rcx)
272 ; X86-64-NEXT: vzeroupper
275 ; X86-32-LABEL: test_shl1:
276 ; X86-32: # %bb.0: # %entry
277 ; X86-32-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
278 ; X86-32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
279 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax
280 ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp)
281 ; X86-32-NEXT: je .LBB5_2
282 ; X86-32-NEXT: # %bb.1: # %if
283 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
284 ; X86-32-NEXT: kmovb (%ecx), %k0
285 ; X86-32-NEXT: kaddb %k0, %k0, %k1
286 ; X86-32-NEXT: jmp .LBB5_3
287 ; X86-32-NEXT: .LBB5_2: # %else
288 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
289 ; X86-32-NEXT: kmovb (%ecx), %k1
290 ; X86-32-NEXT: .LBB5_3: # %exit
291 ; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1}
292 ; X86-32-NEXT: vmovaps %ymm1, (%eax)
293 ; X86-32-NEXT: vzeroupper
296 br i1 %cond, label %if, label %else
299 %loaded1 = load i8, i8* %ptr1
300 %shifted = shl i8 %loaded1, 1
304 %loaded2 = load i8, i8* %ptr2
308 %val = phi i8 [%shifted, %if], [%loaded2, %else]
309 %mask = bitcast i8 %val to <8 x i1>
310 %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2
311 store <8 x float> %selected, <8 x float>* %fptrvec
315 define void @test_shr1(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) {
316 ; X86-64-LABEL: test_shr1:
317 ; X86-64: # %bb.0: # %entry
318 ; X86-64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
319 ; X86-64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
320 ; X86-64-NEXT: testb $1, %dil
321 ; X86-64-NEXT: je .LBB6_2
322 ; X86-64-NEXT: # %bb.1: # %if
323 ; X86-64-NEXT: movb (%rsi), %al
324 ; X86-64-NEXT: shrb %al
325 ; X86-64-NEXT: jmp .LBB6_3
326 ; X86-64-NEXT: .LBB6_2: # %else
327 ; X86-64-NEXT: movb (%rdx), %al
328 ; X86-64-NEXT: .LBB6_3: # %exit
329 ; X86-64-NEXT: kmovd %eax, %k1
330 ; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1}
331 ; X86-64-NEXT: vmovaps %ymm1, (%rcx)
332 ; X86-64-NEXT: vzeroupper
335 ; X86-32-LABEL: test_shr1:
336 ; X86-32: # %bb.0: # %entry
337 ; X86-32-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
338 ; X86-32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
339 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax
340 ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp)
341 ; X86-32-NEXT: je .LBB6_2
342 ; X86-32-NEXT: # %bb.1: # %if
343 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
344 ; X86-32-NEXT: movb (%ecx), %cl
345 ; X86-32-NEXT: shrb %cl
346 ; X86-32-NEXT: jmp .LBB6_3
347 ; X86-32-NEXT: .LBB6_2: # %else
348 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
349 ; X86-32-NEXT: movb (%ecx), %cl
350 ; X86-32-NEXT: .LBB6_3: # %exit
351 ; X86-32-NEXT: kmovd %ecx, %k1
352 ; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1}
353 ; X86-32-NEXT: vmovaps %ymm1, (%eax)
354 ; X86-32-NEXT: vzeroupper
357 br i1 %cond, label %if, label %else
360 %loaded1 = load i8, i8* %ptr1
361 %shifted = lshr i8 %loaded1, 1
365 %loaded2 = load i8, i8* %ptr2
369 %val = phi i8 [%shifted, %if], [%loaded2, %else]
370 %mask = bitcast i8 %val to <8 x i1>
371 %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2
372 store <8 x float> %selected, <8 x float>* %fptrvec
376 define void @test_shr2(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) {
377 ; X86-64-LABEL: test_shr2:
378 ; X86-64: # %bb.0: # %entry
379 ; X86-64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
380 ; X86-64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
381 ; X86-64-NEXT: testb $1, %dil
382 ; X86-64-NEXT: je .LBB7_2
383 ; X86-64-NEXT: # %bb.1: # %if
384 ; X86-64-NEXT: kmovb (%rsi), %k0
385 ; X86-64-NEXT: kshiftrb $2, %k0, %k1
386 ; X86-64-NEXT: jmp .LBB7_3
387 ; X86-64-NEXT: .LBB7_2: # %else
388 ; X86-64-NEXT: kmovb (%rdx), %k1
389 ; X86-64-NEXT: .LBB7_3: # %exit
390 ; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1}
391 ; X86-64-NEXT: vmovaps %ymm1, (%rcx)
392 ; X86-64-NEXT: vzeroupper
395 ; X86-32-LABEL: test_shr2:
396 ; X86-32: # %bb.0: # %entry
397 ; X86-32-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
398 ; X86-32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
399 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax
400 ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp)
401 ; X86-32-NEXT: je .LBB7_2
402 ; X86-32-NEXT: # %bb.1: # %if
403 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
404 ; X86-32-NEXT: kmovb (%ecx), %k0
405 ; X86-32-NEXT: kshiftrb $2, %k0, %k1
406 ; X86-32-NEXT: jmp .LBB7_3
407 ; X86-32-NEXT: .LBB7_2: # %else
408 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
409 ; X86-32-NEXT: kmovb (%ecx), %k1
410 ; X86-32-NEXT: .LBB7_3: # %exit
411 ; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1}
412 ; X86-32-NEXT: vmovaps %ymm1, (%eax)
413 ; X86-32-NEXT: vzeroupper
416 br i1 %cond, label %if, label %else
419 %loaded1 = load i8, i8* %ptr1
420 %shifted = lshr i8 %loaded1, 2
424 %loaded2 = load i8, i8* %ptr2
428 %val = phi i8 [%shifted, %if], [%loaded2, %else]
429 %mask = bitcast i8 %val to <8 x i1>
430 %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2
431 store <8 x float> %selected, <8 x float>* %fptrvec
435 define void @test_shl(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) {
436 ; X86-64-LABEL: test_shl:
437 ; X86-64: # %bb.0: # %entry
438 ; X86-64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
439 ; X86-64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
440 ; X86-64-NEXT: testb $1, %dil
441 ; X86-64-NEXT: je .LBB8_2
442 ; X86-64-NEXT: # %bb.1: # %if
443 ; X86-64-NEXT: kmovb (%rsi), %k0
444 ; X86-64-NEXT: kshiftlb $6, %k0, %k1
445 ; X86-64-NEXT: jmp .LBB8_3
446 ; X86-64-NEXT: .LBB8_2: # %else
447 ; X86-64-NEXT: kmovb (%rdx), %k1
448 ; X86-64-NEXT: .LBB8_3: # %exit
449 ; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1}
450 ; X86-64-NEXT: vmovaps %ymm1, (%rcx)
451 ; X86-64-NEXT: vzeroupper
454 ; X86-32-LABEL: test_shl:
455 ; X86-32: # %bb.0: # %entry
456 ; X86-32-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
457 ; X86-32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
458 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax
459 ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp)
460 ; X86-32-NEXT: je .LBB8_2
461 ; X86-32-NEXT: # %bb.1: # %if
462 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
463 ; X86-32-NEXT: kmovb (%ecx), %k0
464 ; X86-32-NEXT: kshiftlb $6, %k0, %k1
465 ; X86-32-NEXT: jmp .LBB8_3
466 ; X86-32-NEXT: .LBB8_2: # %else
467 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
468 ; X86-32-NEXT: kmovb (%ecx), %k1
469 ; X86-32-NEXT: .LBB8_3: # %exit
470 ; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1}
471 ; X86-32-NEXT: vmovaps %ymm1, (%eax)
472 ; X86-32-NEXT: vzeroupper
475 br i1 %cond, label %if, label %else
478 %loaded1 = load i8, i8* %ptr1
479 %shifted = shl i8 %loaded1, 6
483 %loaded2 = load i8, i8* %ptr2
487 %val = phi i8 [%shifted, %if], [%loaded2, %else]
488 %mask = bitcast i8 %val to <8 x i1>
489 %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2
490 store <8 x float> %selected, <8 x float>* %fptrvec
494 define void @test_add(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) {
495 ; X86-64-LABEL: test_add:
496 ; X86-64: # %bb.0: # %entry
497 ; X86-64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
498 ; X86-64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
499 ; X86-64-NEXT: kmovb (%rsi), %k0
500 ; X86-64-NEXT: kmovb (%rdx), %k1
501 ; X86-64-NEXT: testb $1, %dil
502 ; X86-64-NEXT: je .LBB9_2
503 ; X86-64-NEXT: # %bb.1: # %if
504 ; X86-64-NEXT: kandb %k1, %k0, %k1
505 ; X86-64-NEXT: jmp .LBB9_3
506 ; X86-64-NEXT: .LBB9_2: # %else
507 ; X86-64-NEXT: kaddb %k1, %k0, %k1
508 ; X86-64-NEXT: .LBB9_3: # %exit
509 ; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1}
510 ; X86-64-NEXT: vmovaps %ymm1, (%rcx)
511 ; X86-64-NEXT: vzeroupper
514 ; X86-32-LABEL: test_add:
515 ; X86-32: # %bb.0: # %entry
516 ; X86-32-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
517 ; X86-32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
518 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax
519 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
520 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %edx
521 ; X86-32-NEXT: kmovb (%edx), %k0
522 ; X86-32-NEXT: kmovb (%ecx), %k1
523 ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp)
524 ; X86-32-NEXT: je .LBB9_2
525 ; X86-32-NEXT: # %bb.1: # %if
526 ; X86-32-NEXT: kandb %k1, %k0, %k1
527 ; X86-32-NEXT: jmp .LBB9_3
528 ; X86-32-NEXT: .LBB9_2: # %else
529 ; X86-32-NEXT: kaddb %k1, %k0, %k1
530 ; X86-32-NEXT: .LBB9_3: # %exit
531 ; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1}
532 ; X86-32-NEXT: vmovaps %ymm1, (%eax)
533 ; X86-32-NEXT: vzeroupper
536 %loaded1 = load i8, i8* %ptr1
537 %loaded2 = load i8, i8* %ptr2
538 br i1 %cond, label %if, label %else
541 %and = and i8 %loaded1, %loaded2
545 %add = add i8 %loaded1, %loaded2
549 %val = phi i8 [%and, %if], [%add, %else]
550 %mask = bitcast i8 %val to <8 x i1>
551 %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2
552 store <8 x float> %selected, <8 x float>* %fptrvec