1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=ALL_X64 --check-prefix=KNL
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=skx | FileCheck %s --check-prefix=ALL_X64 --check-prefix=SKX
4 ; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=KNL_X32
6 define <16 x i1> @test1() {
7 ; ALL_X64-LABEL: test1:
9 ; ALL_X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
12 ; KNL_X32-LABEL: test1:
14 ; KNL_X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
16 ret <16 x i1> zeroinitializer
19 define <16 x i1> @test2(<16 x i1>%a, <16 x i1>%b) {
20 ; ALL_X64-LABEL: test2:
22 ; ALL_X64-NEXT: vandps %xmm1, %xmm0, %xmm0
25 ; KNL_X32-LABEL: test2:
27 ; KNL_X32-NEXT: vandps %xmm1, %xmm0, %xmm0
29 %c = and <16 x i1>%a, %b
33 define <8 x i1> @test3(<8 x i1>%a, <8 x i1>%b) {
34 ; ALL_X64-LABEL: test3:
36 ; ALL_X64-NEXT: vandps %xmm1, %xmm0, %xmm0
39 ; KNL_X32-LABEL: test3:
41 ; KNL_X32-NEXT: vandps %xmm1, %xmm0, %xmm0
43 %c = and <8 x i1>%a, %b
47 define <4 x i1> @test4(<4 x i1>%a, <4 x i1>%b) {
48 ; ALL_X64-LABEL: test4:
50 ; ALL_X64-NEXT: vandps %xmm1, %xmm0, %xmm0
53 ; KNL_X32-LABEL: test4:
55 ; KNL_X32-NEXT: vandps %xmm1, %xmm0, %xmm0
57 %c = and <4 x i1>%a, %b
61 declare <8 x i1> @func8xi1(<8 x i1> %a)
63 define <8 x i32> @test5(<8 x i32>%a, <8 x i32>%b) {
66 ; KNL-NEXT: pushq %rax
67 ; KNL-NEXT: .cfi_def_cfa_offset 16
68 ; KNL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
69 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
70 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
71 ; KNL-NEXT: callq _func8xi1
72 ; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
73 ; KNL-NEXT: vpslld $31, %ymm0, %ymm0
74 ; KNL-NEXT: vpsrad $31, %ymm0, %ymm0
80 ; SKX-NEXT: pushq %rax
81 ; SKX-NEXT: .cfi_def_cfa_offset 16
82 ; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
83 ; SKX-NEXT: vpmovm2w %k0, %xmm0
84 ; SKX-NEXT: vzeroupper
85 ; SKX-NEXT: callq _func8xi1
86 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
87 ; SKX-NEXT: vpslld $31, %ymm0, %ymm0
88 ; SKX-NEXT: vpsrad $31, %ymm0, %ymm0
92 ; KNL_X32-LABEL: test5:
94 ; KNL_X32-NEXT: subl $12, %esp
95 ; KNL_X32-NEXT: .cfi_def_cfa_offset 16
96 ; KNL_X32-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
97 ; KNL_X32-NEXT: vpmovdw %zmm0, %ymm0
98 ; KNL_X32-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
99 ; KNL_X32-NEXT: calll _func8xi1
100 ; KNL_X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
101 ; KNL_X32-NEXT: vpslld $31, %ymm0, %ymm0
102 ; KNL_X32-NEXT: vpsrad $31, %ymm0, %ymm0
103 ; KNL_X32-NEXT: addl $12, %esp
105 %cmpRes = icmp sgt <8 x i32>%a, %b
106 %resi = call <8 x i1> @func8xi1(<8 x i1> %cmpRes)
107 %res = sext <8 x i1>%resi to <8 x i32>
111 declare <16 x i1> @func16xi1(<16 x i1> %a)
113 define <16 x i32> @test6(<16 x i32>%a, <16 x i32>%b) {
116 ; KNL-NEXT: pushq %rax
117 ; KNL-NEXT: .cfi_def_cfa_offset 16
118 ; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
119 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
120 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
121 ; KNL-NEXT: callq _func16xi1
122 ; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
123 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
124 ; KNL-NEXT: vpsrad $31, %zmm0, %zmm0
125 ; KNL-NEXT: popq %rax
130 ; SKX-NEXT: pushq %rax
131 ; SKX-NEXT: .cfi_def_cfa_offset 16
132 ; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
133 ; SKX-NEXT: vpmovm2b %k0, %xmm0
134 ; SKX-NEXT: vzeroupper
135 ; SKX-NEXT: callq _func16xi1
136 ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
137 ; SKX-NEXT: vpslld $31, %zmm0, %zmm0
138 ; SKX-NEXT: vpsrad $31, %zmm0, %zmm0
139 ; SKX-NEXT: popq %rax
142 ; KNL_X32-LABEL: test6:
144 ; KNL_X32-NEXT: subl $12, %esp
145 ; KNL_X32-NEXT: .cfi_def_cfa_offset 16
146 ; KNL_X32-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
147 ; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
148 ; KNL_X32-NEXT: vpmovdb %zmm0, %xmm0
149 ; KNL_X32-NEXT: calll _func16xi1
150 ; KNL_X32-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
151 ; KNL_X32-NEXT: vpslld $31, %zmm0, %zmm0
152 ; KNL_X32-NEXT: vpsrad $31, %zmm0, %zmm0
153 ; KNL_X32-NEXT: addl $12, %esp
155 %cmpRes = icmp sgt <16 x i32>%a, %b
156 %resi = call <16 x i1> @func16xi1(<16 x i1> %cmpRes)
157 %res = sext <16 x i1>%resi to <16 x i32>
161 declare <4 x i1> @func4xi1(<4 x i1> %a)
163 define <4 x i32> @test7(<4 x i32>%a, <4 x i32>%b) {
164 ; ALL_X64-LABEL: test7:
166 ; ALL_X64-NEXT: pushq %rax
167 ; ALL_X64-NEXT: .cfi_def_cfa_offset 16
168 ; ALL_X64-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
169 ; ALL_X64-NEXT: callq _func4xi1
170 ; ALL_X64-NEXT: vpslld $31, %xmm0, %xmm0
171 ; ALL_X64-NEXT: vpsrad $31, %xmm0, %xmm0
172 ; ALL_X64-NEXT: popq %rax
175 ; KNL_X32-LABEL: test7:
177 ; KNL_X32-NEXT: subl $12, %esp
178 ; KNL_X32-NEXT: .cfi_def_cfa_offset 16
179 ; KNL_X32-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
180 ; KNL_X32-NEXT: calll _func4xi1
181 ; KNL_X32-NEXT: vpslld $31, %xmm0, %xmm0
182 ; KNL_X32-NEXT: vpsrad $31, %xmm0, %xmm0
183 ; KNL_X32-NEXT: addl $12, %esp
185 %cmpRes = icmp sgt <4 x i32>%a, %b
186 %resi = call <4 x i1> @func4xi1(<4 x i1> %cmpRes)
187 %res = sext <4 x i1>%resi to <4 x i32>
191 define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) {
194 ; KNL-NEXT: pushq %rax
195 ; KNL-NEXT: .cfi_def_cfa_offset 16
196 ; KNL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
197 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
198 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
199 ; KNL-NEXT: callq _func8xi1
200 ; KNL-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
201 ; KNL-NEXT: popq %rax
206 ; SKX-NEXT: pushq %rax
207 ; SKX-NEXT: .cfi_def_cfa_offset 16
208 ; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
209 ; SKX-NEXT: vpmovm2w %k0, %xmm0
210 ; SKX-NEXT: vzeroupper
211 ; SKX-NEXT: callq _func8xi1
212 ; SKX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
213 ; SKX-NEXT: popq %rax
216 ; KNL_X32-LABEL: test7a:
218 ; KNL_X32-NEXT: subl $12, %esp
219 ; KNL_X32-NEXT: .cfi_def_cfa_offset 16
220 ; KNL_X32-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
221 ; KNL_X32-NEXT: vpmovdw %zmm0, %ymm0
222 ; KNL_X32-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
223 ; KNL_X32-NEXT: calll _func8xi1
224 ; KNL_X32-NEXT: vandps LCPI7_0, %xmm0, %xmm0
225 ; KNL_X32-NEXT: addl $12, %esp
227 %cmpRes = icmp sgt <8 x i32>%a, %b
228 %resi = call <8 x i1> @func8xi1(<8 x i1> %cmpRes)
229 %res = and <8 x i1>%resi, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
233 define <16 x i8> @test8(<16 x i8> %a1, <16 x i8> %a2, i1 %cond) {
234 ; ALL_X64-LABEL: test8:
236 ; ALL_X64-NEXT: testb $1, %dil
237 ; ALL_X64-NEXT: jne LBB8_2
238 ; ALL_X64-NEXT: ## %bb.1:
239 ; ALL_X64-NEXT: vmovaps %xmm1, %xmm0
240 ; ALL_X64-NEXT: LBB8_2:
243 ; KNL_X32-LABEL: test8:
245 ; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp)
246 ; KNL_X32-NEXT: jne LBB8_2
247 ; KNL_X32-NEXT: ## %bb.1:
248 ; KNL_X32-NEXT: vmovaps %xmm1, %xmm0
249 ; KNL_X32-NEXT: LBB8_2:
251 %res = select i1 %cond, <16 x i8> %a1, <16 x i8> %a2
255 define i1 @test9(double %a, double %b) {
256 ; ALL_X64-LABEL: test9:
258 ; ALL_X64-NEXT: vucomisd %xmm0, %xmm1
259 ; ALL_X64-NEXT: setb %al
262 ; KNL_X32-LABEL: test9:
264 ; KNL_X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
265 ; KNL_X32-NEXT: vucomisd {{[0-9]+}}(%esp), %xmm0
266 ; KNL_X32-NEXT: setb %al
268 %c = fcmp ugt double %a, %b
272 define i32 @test10(i32 %a, i32 %b, i1 %cond) {
273 ; ALL_X64-LABEL: test10:
275 ; ALL_X64-NEXT: movl %edi, %eax
276 ; ALL_X64-NEXT: testb $1, %dl
277 ; ALL_X64-NEXT: cmovel %esi, %eax
280 ; KNL_X32-LABEL: test10:
282 ; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp)
283 ; KNL_X32-NEXT: leal {{[0-9]+}}(%esp), %eax
284 ; KNL_X32-NEXT: leal {{[0-9]+}}(%esp), %ecx
285 ; KNL_X32-NEXT: cmovnel %eax, %ecx
286 ; KNL_X32-NEXT: movl (%ecx), %eax
288 %c = select i1 %cond, i32 %a, i32 %b
292 define i1 @test11(i32 %a, i32 %b) {
293 ; ALL_X64-LABEL: test11:
295 ; ALL_X64-NEXT: cmpl %esi, %edi
296 ; ALL_X64-NEXT: setg %al
299 ; KNL_X32-LABEL: test11:
301 ; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax
302 ; KNL_X32-NEXT: cmpl {{[0-9]+}}(%esp), %eax
303 ; KNL_X32-NEXT: setg %al
305 %c = icmp sgt i32 %a, %b
309 define i32 @test12(i32 %a1, i32 %a2, i32 %b1) {
310 ; ALL_X64-LABEL: test12:
312 ; ALL_X64-NEXT: pushq %rbp
313 ; ALL_X64-NEXT: .cfi_def_cfa_offset 16
314 ; ALL_X64-NEXT: pushq %r14
315 ; ALL_X64-NEXT: .cfi_def_cfa_offset 24
316 ; ALL_X64-NEXT: pushq %rbx
317 ; ALL_X64-NEXT: .cfi_def_cfa_offset 32
318 ; ALL_X64-NEXT: .cfi_offset %rbx, -32
319 ; ALL_X64-NEXT: .cfi_offset %r14, -24
320 ; ALL_X64-NEXT: .cfi_offset %rbp, -16
321 ; ALL_X64-NEXT: movl %esi, %r14d
322 ; ALL_X64-NEXT: movl %edi, %ebp
323 ; ALL_X64-NEXT: movl %edx, %esi
324 ; ALL_X64-NEXT: callq _test11
325 ; ALL_X64-NEXT: movzbl %al, %ebx
326 ; ALL_X64-NEXT: movl %ebp, %edi
327 ; ALL_X64-NEXT: movl %r14d, %esi
328 ; ALL_X64-NEXT: movl %ebx, %edx
329 ; ALL_X64-NEXT: callq _test10
330 ; ALL_X64-NEXT: xorl %ecx, %ecx
331 ; ALL_X64-NEXT: testb $1, %bl
332 ; ALL_X64-NEXT: cmovel %ecx, %eax
333 ; ALL_X64-NEXT: popq %rbx
334 ; ALL_X64-NEXT: popq %r14
335 ; ALL_X64-NEXT: popq %rbp
338 ; KNL_X32-LABEL: test12:
340 ; KNL_X32-NEXT: pushl %ebx
341 ; KNL_X32-NEXT: .cfi_def_cfa_offset 8
342 ; KNL_X32-NEXT: pushl %edi
343 ; KNL_X32-NEXT: .cfi_def_cfa_offset 12
344 ; KNL_X32-NEXT: pushl %esi
345 ; KNL_X32-NEXT: .cfi_def_cfa_offset 16
346 ; KNL_X32-NEXT: subl $16, %esp
347 ; KNL_X32-NEXT: .cfi_def_cfa_offset 32
348 ; KNL_X32-NEXT: .cfi_offset %esi, -16
349 ; KNL_X32-NEXT: .cfi_offset %edi, -12
350 ; KNL_X32-NEXT: .cfi_offset %ebx, -8
351 ; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %esi
352 ; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %edi
353 ; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax
354 ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
355 ; KNL_X32-NEXT: movl %edi, (%esp)
356 ; KNL_X32-NEXT: calll _test11
357 ; KNL_X32-NEXT: movl %eax, %ebx
358 ; KNL_X32-NEXT: movzbl %al, %eax
359 ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
360 ; KNL_X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
361 ; KNL_X32-NEXT: movl %edi, (%esp)
362 ; KNL_X32-NEXT: calll _test10
363 ; KNL_X32-NEXT: xorl %ecx, %ecx
364 ; KNL_X32-NEXT: testb $1, %bl
365 ; KNL_X32-NEXT: cmovel %ecx, %eax
366 ; KNL_X32-NEXT: addl $16, %esp
367 ; KNL_X32-NEXT: popl %esi
368 ; KNL_X32-NEXT: popl %edi
369 ; KNL_X32-NEXT: popl %ebx
371 %cond = call i1 @test11(i32 %a1, i32 %b1)
372 %res = call i32 @test10(i32 %a1, i32 %a2, i1 %cond)
373 %res1 = select i1 %cond, i32 %res, i32 0
377 define <1 x i1> @test13(<1 x i1>* %foo) {
380 ; KNL-NEXT: movzbl (%rdi), %eax
381 ; KNL-NEXT: ## kill: def $al killed $al killed $eax
386 ; SKX-NEXT: kmovb (%rdi), %k0
387 ; SKX-NEXT: kmovd %k0, %eax
388 ; SKX-NEXT: ## kill: def $al killed $al killed $eax
391 ; KNL_X32-LABEL: test13:
393 ; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax
394 ; KNL_X32-NEXT: movzbl (%eax), %eax
395 ; KNL_X32-NEXT: ## kill: def $al killed $al killed $eax
397 %bar = load <1 x i1>, <1 x i1>* %foo