1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mcpu=skx | FileCheck %s --check-prefixes=CHECK,CHECK-SKX
3 ; RUN: llc < %s -mcpu=knl | FileCheck %s --check-prefixes=CHECK,CHECK-KNL
5 target triple = "x86_64-unknown-unknown"
7 define <8 x i64> @test1(<8 x i64> %m, <8 x i64> %a, <8 x i64> %b) {
8 ; CHECK-SKX-LABEL: test1:
9 ; CHECK-SKX: # %bb.0: # %entry
10 ; CHECK-SKX-NEXT: vpsllq $63, %zmm0, %zmm0
11 ; CHECK-SKX-NEXT: vpmovq2m %zmm0, %k1
12 ; CHECK-SKX-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
13 ; CHECK-SKX-NEXT: retq
15 ; CHECK-KNL-LABEL: test1:
16 ; CHECK-KNL: # %bb.0: # %entry
17 ; CHECK-KNL-NEXT: vpsllq $63, %zmm0, %zmm0
18 ; CHECK-KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
19 ; CHECK-KNL-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
20 ; CHECK-KNL-NEXT: retq
22 %m.trunc = trunc <8 x i64> %m to <8 x i1>
23 %ret = select <8 x i1> %m.trunc, <8 x i64> %a, <8 x i64> %b
27 ; This is a very contrived test case to trick the legalizer into splitting the
28 ; v16i1 masks in the select during type legalization, and in so doing extend them
29 ; into two v8i64 types. This lets us ensure that the lowering code can handle
30 ; both formulations of vselect. All of this trickery is because we can't
31 ; directly form an SDAG input to the lowering.
32 define <16 x double> @test2(<16 x float> %x, <16 x float> %y, <16 x double> %a, <16 x double> %b) {
34 ; CHECK: # %bb.0: # %entry
35 ; CHECK-NEXT: vxorps %xmm6, %xmm6, %xmm6
36 ; CHECK-NEXT: vcmpltps %zmm0, %zmm6, %k0
37 ; CHECK-NEXT: vcmpltps %zmm6, %zmm1, %k1
38 ; CHECK-NEXT: korw %k1, %k0, %k1
39 ; CHECK-NEXT: vblendmpd %zmm2, %zmm4, %zmm0 {%k1}
40 ; CHECK-NEXT: kshiftrw $8, %k1, %k1
41 ; CHECK-NEXT: vblendmpd %zmm3, %zmm5, %zmm1 {%k1}
44 %gt.m = fcmp ogt <16 x float> %x, zeroinitializer
45 %lt.m = fcmp olt <16 x float> %y, zeroinitializer
46 %m.or = or <16 x i1> %gt.m, %lt.m
47 %ret = select <16 x i1> %m.or, <16 x double> %a, <16 x double> %b
48 ret <16 x double> %ret
51 define <16 x i64> @test3(<16 x i8> %x, <16 x i64> %a, <16 x i64> %b) {
52 ; CHECK-SKX-LABEL: test3:
54 ; CHECK-SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1
55 ; CHECK-SKX-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k1}
56 ; CHECK-SKX-NEXT: kshiftrw $8, %k1, %k1
57 ; CHECK-SKX-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1}
58 ; CHECK-SKX-NEXT: retq
60 ; CHECK-KNL-LABEL: test3:
62 ; CHECK-KNL-NEXT: vpxor %xmm5, %xmm5, %xmm5
63 ; CHECK-KNL-NEXT: vpcmpeqb %xmm5, %xmm0, %xmm0
64 ; CHECK-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
65 ; CHECK-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
66 ; CHECK-KNL-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k1}
67 ; CHECK-KNL-NEXT: kshiftrw $8, %k1, %k1
68 ; CHECK-KNL-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1}
69 ; CHECK-KNL-NEXT: retq
70 %c = icmp eq <16 x i8> %x, zeroinitializer
71 %ret = select <16 x i1> %c, <16 x i64> %a, <16 x i64> %b
75 define <16 x i64> @test4(<16 x i16> %x, <16 x i64> %a, <16 x i64> %b) {
76 ; CHECK-SKX-LABEL: test4:
78 ; CHECK-SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1
79 ; CHECK-SKX-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k1}
80 ; CHECK-SKX-NEXT: kshiftrw $8, %k1, %k1
81 ; CHECK-SKX-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1}
82 ; CHECK-SKX-NEXT: retq
84 ; CHECK-KNL-LABEL: test4:
86 ; CHECK-KNL-NEXT: vpxor %xmm5, %xmm5, %xmm5
87 ; CHECK-KNL-NEXT: vpcmpeqw %ymm5, %ymm0, %ymm0
88 ; CHECK-KNL-NEXT: vpmovsxwd %ymm0, %zmm0
89 ; CHECK-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
90 ; CHECK-KNL-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k1}
91 ; CHECK-KNL-NEXT: kshiftrw $8, %k1, %k1
92 ; CHECK-KNL-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1}
93 ; CHECK-KNL-NEXT: retq
94 %c = icmp eq <16 x i16> %x, zeroinitializer
95 %ret = select <16 x i1> %c, <16 x i64> %a, <16 x i64> %b
99 define <16 x i64> @test5(<16 x i32> %x, <16 x i64> %a, <16 x i64> %b) {
100 ; CHECK-LABEL: test5:
102 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
103 ; CHECK-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k1}
104 ; CHECK-NEXT: kshiftrw $8, %k1, %k1
105 ; CHECK-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1}
107 %c = icmp eq <16 x i32> %x, zeroinitializer
108 %ret = select <16 x i1> %c, <16 x i64> %a, <16 x i64> %b
112 define <32 x i32> @test6(<32 x i8> %x, <32 x i32> %a, <32 x i32> %b) {
113 ; CHECK-SKX-LABEL: test6:
114 ; CHECK-SKX: # %bb.0:
115 ; CHECK-SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1
116 ; CHECK-SKX-NEXT: vpblendmd %zmm1, %zmm3, %zmm0 {%k1}
117 ; CHECK-SKX-NEXT: kshiftrd $16, %k1, %k1
118 ; CHECK-SKX-NEXT: vpblendmd %zmm2, %zmm4, %zmm1 {%k1}
119 ; CHECK-SKX-NEXT: retq
121 ; CHECK-KNL-LABEL: test6:
122 ; CHECK-KNL: # %bb.0:
123 ; CHECK-KNL-NEXT: vpxor %xmm5, %xmm5, %xmm5
124 ; CHECK-KNL-NEXT: vpcmpeqb %ymm5, %ymm0, %ymm0
125 ; CHECK-KNL-NEXT: vextracti128 $1, %ymm0, %xmm5
126 ; CHECK-KNL-NEXT: vpmovsxbd %xmm5, %zmm5
127 ; CHECK-KNL-NEXT: vptestmd %zmm5, %zmm5, %k1
128 ; CHECK-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
129 ; CHECK-KNL-NEXT: vptestmd %zmm0, %zmm0, %k2
130 ; CHECK-KNL-NEXT: vpblendmd %zmm1, %zmm3, %zmm0 {%k2}
131 ; CHECK-KNL-NEXT: vpblendmd %zmm2, %zmm4, %zmm1 {%k1}
132 ; CHECK-KNL-NEXT: retq
133 %c = icmp eq <32 x i8> %x, zeroinitializer
134 %ret = select <32 x i1> %c, <32 x i32> %a, <32 x i32> %b
138 define <32 x i32> @test7(<32 x i16> %x, <32 x i32> %a, <32 x i32> %b) {
139 ; CHECK-SKX-LABEL: test7:
140 ; CHECK-SKX: # %bb.0:
141 ; CHECK-SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1
142 ; CHECK-SKX-NEXT: vpblendmd %zmm1, %zmm3, %zmm0 {%k1}
143 ; CHECK-SKX-NEXT: kshiftrd $16, %k1, %k1
144 ; CHECK-SKX-NEXT: vpblendmd %zmm2, %zmm4, %zmm1 {%k1}
145 ; CHECK-SKX-NEXT: retq
147 ; CHECK-KNL-LABEL: test7:
148 ; CHECK-KNL: # %bb.0:
149 ; CHECK-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm5
150 ; CHECK-KNL-NEXT: vpxor %xmm6, %xmm6, %xmm6
151 ; CHECK-KNL-NEXT: vpcmpeqw %ymm6, %ymm5, %ymm5
152 ; CHECK-KNL-NEXT: vpmovsxwd %ymm5, %zmm5
153 ; CHECK-KNL-NEXT: vptestmd %zmm5, %zmm5, %k1
154 ; CHECK-KNL-NEXT: vpcmpeqw %ymm6, %ymm0, %ymm0
155 ; CHECK-KNL-NEXT: vpmovsxwd %ymm0, %zmm0
156 ; CHECK-KNL-NEXT: vptestmd %zmm0, %zmm0, %k2
157 ; CHECK-KNL-NEXT: vpblendmd %zmm1, %zmm3, %zmm0 {%k2}
158 ; CHECK-KNL-NEXT: vpblendmd %zmm2, %zmm4, %zmm1 {%k1}
159 ; CHECK-KNL-NEXT: retq
160 %c = icmp eq <32 x i16> %x, zeroinitializer
161 %ret = select <32 x i1> %c, <32 x i32> %a, <32 x i32> %b
165 define <64 x i16> @test8(<64 x i8> %x, <64 x i16> %a, <64 x i16> %b) {
166 ; CHECK-SKX-LABEL: test8:
167 ; CHECK-SKX: # %bb.0:
168 ; CHECK-SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1
169 ; CHECK-SKX-NEXT: vpblendmw %zmm1, %zmm3, %zmm0 {%k1}
170 ; CHECK-SKX-NEXT: kshiftrq $32, %k1, %k1
171 ; CHECK-SKX-NEXT: vpblendmw %zmm2, %zmm4, %zmm1 {%k1}
172 ; CHECK-SKX-NEXT: retq
174 ; CHECK-KNL-LABEL: test8:
175 ; CHECK-KNL: # %bb.0:
176 ; CHECK-KNL-NEXT: pushq %rbp
177 ; CHECK-KNL-NEXT: .cfi_def_cfa_offset 16
178 ; CHECK-KNL-NEXT: .cfi_offset %rbp, -16
179 ; CHECK-KNL-NEXT: movq %rsp, %rbp
180 ; CHECK-KNL-NEXT: .cfi_def_cfa_register %rbp
181 ; CHECK-KNL-NEXT: andq $-32, %rsp
182 ; CHECK-KNL-NEXT: subq $32, %rsp
183 ; CHECK-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm8
184 ; CHECK-KNL-NEXT: vmovdqa 16(%rbp), %ymm9
185 ; CHECK-KNL-NEXT: vpxor %xmm10, %xmm10, %xmm10
186 ; CHECK-KNL-NEXT: vpcmpeqb %ymm10, %ymm0, %ymm11
187 ; CHECK-KNL-NEXT: vpmovsxbw %xmm11, %ymm0
188 ; CHECK-KNL-NEXT: vpblendvb %ymm0, %ymm1, %ymm5, %ymm0
189 ; CHECK-KNL-NEXT: vextracti128 $1, %ymm11, %xmm1
190 ; CHECK-KNL-NEXT: vpmovsxbw %xmm1, %ymm1
191 ; CHECK-KNL-NEXT: vpblendvb %ymm1, %ymm2, %ymm6, %ymm1
192 ; CHECK-KNL-NEXT: vpcmpeqb %ymm10, %ymm8, %ymm5
193 ; CHECK-KNL-NEXT: vpmovsxbw %xmm5, %ymm2
194 ; CHECK-KNL-NEXT: vpblendvb %ymm2, %ymm3, %ymm7, %ymm2
195 ; CHECK-KNL-NEXT: vextracti128 $1, %ymm5, %xmm3
196 ; CHECK-KNL-NEXT: vpmovsxbw %xmm3, %ymm3
197 ; CHECK-KNL-NEXT: vpblendvb %ymm3, %ymm4, %ymm9, %ymm3
198 ; CHECK-KNL-NEXT: movq %rbp, %rsp
199 ; CHECK-KNL-NEXT: popq %rbp
200 ; CHECK-KNL-NEXT: .cfi_def_cfa %rsp, 8
201 ; CHECK-KNL-NEXT: retq
202 %c = icmp eq <64 x i8> %x, zeroinitializer
203 %ret = select <64 x i1> %c, <64 x i16> %a, <64 x i16> %b