1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
6 define <4 x i64> @vpaddq256_test(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
7 ; CHECK-LABEL: vpaddq256_test:
9 ; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc1]
10 ; CHECK-NEXT: retq ## encoding: [0xc3]
11 %x = add <4 x i64> %i, %j
15 define <4 x i64> @vpaddq256_fold_test(<4 x i64> %i, <4 x i64>* %j) nounwind {
16 ; CHECK-LABEL: vpaddq256_fold_test:
18 ; CHECK-NEXT: vpaddq (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0x07]
19 ; CHECK-NEXT: retq ## encoding: [0xc3]
20 %tmp = load <4 x i64>, <4 x i64>* %j, align 4
21 %x = add <4 x i64> %i, %tmp
25 define <4 x i64> @vpaddq256_broadcast_test(<4 x i64> %i) nounwind {
26 ; CHECK-LABEL: vpaddq256_broadcast_test:
28 ; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xd4,0x05,A,A,A,A]
29 ; CHECK-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
30 ; CHECK-NEXT: retq ## encoding: [0xc3]
31 %x = add <4 x i64> %i, <i64 2, i64 2, i64 2, i64 2>
35 define <4 x i64> @vpaddq256_broadcast2_test(<4 x i64> %i, i64* %j.ptr) nounwind {
36 ; CHECK-LABEL: vpaddq256_broadcast2_test:
38 ; CHECK-NEXT: vpaddq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xd4,0x07]
39 ; CHECK-NEXT: retq ## encoding: [0xc3]
40 %j = load i64, i64* %j.ptr
41 %j.0 = insertelement <4 x i64> undef, i64 %j, i32 0
42 %j.v = shufflevector <4 x i64> %j.0, <4 x i64> undef, <4 x i32> zeroinitializer
43 %x = add <4 x i64> %i, %j.v
47 define <8 x i32> @vpaddd256_test(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
48 ; CHECK-LABEL: vpaddd256_test:
50 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
51 ; CHECK-NEXT: retq ## encoding: [0xc3]
52 %x = add <8 x i32> %i, %j
56 define <8 x i32> @vpaddd256_fold_test(<8 x i32> %i, <8 x i32>* %j) nounwind {
57 ; CHECK-LABEL: vpaddd256_fold_test:
59 ; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0x07]
60 ; CHECK-NEXT: retq ## encoding: [0xc3]
61 %tmp = load <8 x i32>, <8 x i32>* %j, align 4
62 %x = add <8 x i32> %i, %tmp
66 define <8 x i32> @vpaddd256_broadcast_test(<8 x i32> %i) nounwind {
67 ; CHECK-LABEL: vpaddd256_broadcast_test:
69 ; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x05,A,A,A,A]
70 ; CHECK-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
71 ; CHECK-NEXT: retq ## encoding: [0xc3]
72 %x = add <8 x i32> %i, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
76 define <8 x i32> @vpaddd256_mask_test(<8 x i32> %i, <8 x i32> %j, <8 x i32> %mask1) nounwind readnone {
77 ; CHECK-LABEL: vpaddd256_mask_test:
79 ; CHECK-NEXT: vptestmd %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0x6d,0x28,0x27,0xca]
80 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xc1]
81 ; CHECK-NEXT: retq ## encoding: [0xc3]
82 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
83 %x = add <8 x i32> %i, %j
84 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i
88 define <8 x i32> @vpaddd256_maskz_test(<8 x i32> %i, <8 x i32> %j, <8 x i32> %mask1) nounwind readnone {
89 ; CHECK-LABEL: vpaddd256_maskz_test:
91 ; CHECK-NEXT: vptestmd %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0x6d,0x28,0x27,0xca]
92 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1]
93 ; CHECK-NEXT: retq ## encoding: [0xc3]
94 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
95 %x = add <8 x i32> %i, %j
96 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
100 define <8 x i32> @vpaddd256_mask_fold_test(<8 x i32> %i, <8 x i32>* %j.ptr, <8 x i32> %mask1) nounwind readnone {
101 ; CHECK-LABEL: vpaddd256_mask_fold_test:
103 ; CHECK-NEXT: vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9]
104 ; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x07]
105 ; CHECK-NEXT: retq ## encoding: [0xc3]
106 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
107 %j = load <8 x i32>, <8 x i32>* %j.ptr
108 %x = add <8 x i32> %i, %j
109 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i
113 define <8 x i32> @vpaddd256_mask_broadcast_test(<8 x i32> %i, <8 x i32> %mask1) nounwind readnone {
114 ; CHECK-LABEL: vpaddd256_mask_broadcast_test:
116 ; CHECK-NEXT: vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9]
117 ; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x05,A,A,A,A]
118 ; CHECK-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
119 ; CHECK-NEXT: retq ## encoding: [0xc3]
120 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
121 %x = add <8 x i32> %i, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
122 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i
126 define <8 x i32> @vpaddd256_maskz_fold_test(<8 x i32> %i, <8 x i32>* %j.ptr, <8 x i32> %mask1) nounwind readnone {
127 ; CHECK-LABEL: vpaddd256_maskz_fold_test:
129 ; CHECK-NEXT: vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9]
130 ; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x07]
131 ; CHECK-NEXT: retq ## encoding: [0xc3]
132 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
133 %j = load <8 x i32>, <8 x i32>* %j.ptr
134 %x = add <8 x i32> %i, %j
135 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
139 define <8 x i32> @vpaddd256_maskz_broadcast_test(<8 x i32> %i, <8 x i32> %mask1) nounwind readnone {
140 ; CHECK-LABEL: vpaddd256_maskz_broadcast_test:
142 ; CHECK-NEXT: vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9]
143 ; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x05,A,A,A,A]
144 ; CHECK-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
145 ; CHECK-NEXT: retq ## encoding: [0xc3]
146 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
147 %x = add <8 x i32> %i, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
148 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
152 define <4 x i64> @vpsubq256_test(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
153 ; CHECK-LABEL: vpsubq256_test:
155 ; CHECK-NEXT: vpsubq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfb,0xc1]
156 ; CHECK-NEXT: retq ## encoding: [0xc3]
157 %x = sub <4 x i64> %i, %j
161 define <8 x i32> @vpsubd256_test(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
162 ; CHECK-LABEL: vpsubd256_test:
164 ; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0xc1]
165 ; CHECK-NEXT: retq ## encoding: [0xc3]
166 %x = sub <8 x i32> %i, %j
170 define <8 x i32> @vpmulld256_test(<8 x i32> %i, <8 x i32> %j) {
171 ; CHECK-LABEL: vpmulld256_test:
173 ; CHECK-NEXT: vpmulld %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x40,0xc1]
174 ; CHECK-NEXT: retq ## encoding: [0xc3]
175 %x = mul <8 x i32> %i, %j
179 define <4 x double> @test_vaddpd_256(<4 x double> %y, <4 x double> %x) {
180 ; CHECK-LABEL: test_vaddpd_256:
181 ; CHECK: ## %bb.0: ## %entry
182 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
183 ; CHECK-NEXT: retq ## encoding: [0xc3]
185 %add.i = fadd <4 x double> %x, %y
186 ret <4 x double> %add.i
189 define <4 x double> @test_fold_vaddpd_256(<4 x double> %y) {
190 ; CHECK-LABEL: test_fold_vaddpd_256:
191 ; CHECK: ## %bb.0: ## %entry
192 ; CHECK-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0x05,A,A,A,A]
193 ; CHECK-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
194 ; CHECK-NEXT: retq ## encoding: [0xc3]
196 %add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 4.500000e+00, double 5.600000e+00>
197 ret <4 x double> %add.i
200 define <8 x float> @test_broadcast_vaddpd_256(<8 x float> %a) nounwind {
201 ; CHECK-LABEL: test_broadcast_vaddpd_256:
203 ; CHECK-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x58,0x05,A,A,A,A]
204 ; CHECK-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
205 ; CHECK-NEXT: retq ## encoding: [0xc3]
206 %b = fadd <8 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
210 define <8 x float> @test_mask_vaddps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone {
211 ; CHECK-LABEL: test_mask_vaddps_256:
213 ; CHECK-NEXT: vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb]
214 ; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x58,0xc2]
215 ; CHECK-NEXT: retq ## encoding: [0xc3]
216 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
217 %x = fadd <8 x float> %i, %j
218 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
222 define <8 x float> @test_mask_vmulps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone {
223 ; CHECK-LABEL: test_mask_vmulps_256:
225 ; CHECK-NEXT: vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb]
226 ; CHECK-NEXT: vmulps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x59,0xc2]
227 ; CHECK-NEXT: retq ## encoding: [0xc3]
228 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
229 %x = fmul <8 x float> %i, %j
230 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
234 define <8 x float> @test_mask_vminps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1)nounwind readnone {
235 ; CHECK-LABEL: test_mask_vminps_256:
237 ; CHECK-NEXT: vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb]
238 ; CHECK-NEXT: vminps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x5d,0xc2]
239 ; CHECK-NEXT: retq ## encoding: [0xc3]
240 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
241 %cmp_res = fcmp olt <8 x float> %i, %j
242 %min = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j
243 %r = select <8 x i1> %mask, <8 x float> %min, <8 x float> %dst
247 define <8 x float> @test_mask_vmaxps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone {
248 ; CHECK-LABEL: test_mask_vmaxps_256:
250 ; CHECK-NEXT: vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb]
251 ; CHECK-NEXT: vmaxps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x5f,0xc2]
252 ; CHECK-NEXT: retq ## encoding: [0xc3]
253 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
254 %cmp_res = fcmp ogt <8 x float> %i, %j
255 %max = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j
256 %r = select <8 x i1> %mask, <8 x float> %max, <8 x float> %dst
260 define <8 x float> @test_mask_vsubps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone {
261 ; CHECK-LABEL: test_mask_vsubps_256:
263 ; CHECK-NEXT: vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb]
264 ; CHECK-NEXT: vsubps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x5c,0xc2]
265 ; CHECK-NEXT: retq ## encoding: [0xc3]
266 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
267 %x = fsub <8 x float> %i, %j
268 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
272 define <8 x float> @test_mask_vdivps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone {
273 ; CHECK-LABEL: test_mask_vdivps_256:
275 ; CHECK-NEXT: vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb]
276 ; CHECK-NEXT: vdivps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x5e,0xc2]
277 ; CHECK-NEXT: retq ## encoding: [0xc3]
278 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
279 %x = fdiv <8 x float> %i, %j
280 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
284 define <4 x double> @test_mask_vmulpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone {
285 ; CHECK-LABEL: test_mask_vmulpd_256:
287 ; CHECK-NEXT: vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb]
288 ; CHECK-NEXT: vmulpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x59,0xc2]
289 ; CHECK-NEXT: retq ## encoding: [0xc3]
290 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
291 %x = fmul <4 x double> %i, %j
292 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
296 define <4 x double> @test_mask_vminpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone {
297 ; CHECK-LABEL: test_mask_vminpd_256:
299 ; CHECK-NEXT: vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb]
300 ; CHECK-NEXT: vminpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x5d,0xc2]
301 ; CHECK-NEXT: retq ## encoding: [0xc3]
302 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
303 %cmp_res = fcmp olt <4 x double> %i, %j
304 %min = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j
305 %r = select <4 x i1> %mask, <4 x double> %min, <4 x double> %dst
309 define <4 x double> @test_mask_vmaxpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone {
310 ; CHECK-LABEL: test_mask_vmaxpd_256:
312 ; CHECK-NEXT: vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb]
313 ; CHECK-NEXT: vmaxpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x5f,0xc2]
314 ; CHECK-NEXT: retq ## encoding: [0xc3]
315 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
316 %cmp_res = fcmp ogt <4 x double> %i, %j
317 %max = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j
318 %r = select <4 x i1> %mask, <4 x double> %max, <4 x double> %dst
322 define <4 x double> @test_mask_vsubpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone {
323 ; CHECK-LABEL: test_mask_vsubpd_256:
325 ; CHECK-NEXT: vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb]
326 ; CHECK-NEXT: vsubpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x5c,0xc2]
327 ; CHECK-NEXT: retq ## encoding: [0xc3]
328 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
329 %x = fsub <4 x double> %i, %j
330 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
334 define <4 x double> @test_mask_vdivpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone {
335 ; CHECK-LABEL: test_mask_vdivpd_256:
337 ; CHECK-NEXT: vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb]
338 ; CHECK-NEXT: vdivpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x5e,0xc2]
339 ; CHECK-NEXT: retq ## encoding: [0xc3]
340 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
341 %x = fdiv <4 x double> %i, %j
342 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
346 define <4 x double> @test_mask_vaddpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone {
347 ; CHECK-LABEL: test_mask_vaddpd_256:
349 ; CHECK-NEXT: vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb]
350 ; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x58,0xc2]
351 ; CHECK-NEXT: retq ## encoding: [0xc3]
352 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
353 %x = fadd <4 x double> %i, %j
354 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
358 define <4 x double> @test_maskz_vaddpd_256(<4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone {
359 ; CHECK-LABEL: test_maskz_vaddpd_256:
361 ; CHECK-NEXT: vptestmq %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0xed,0x28,0x27,0xca]
362 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x58,0xc1]
363 ; CHECK-NEXT: retq ## encoding: [0xc3]
364 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
365 %x = fadd <4 x double> %i, %j
366 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
370 define <4 x double> @test_mask_fold_vaddpd_256(<4 x double> %dst, <4 x double> %i, <4 x double>* %j, <4 x i64> %mask1) nounwind {
371 ; CHECK-LABEL: test_mask_fold_vaddpd_256:
373 ; CHECK-NEXT: vptestmq %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0xed,0x28,0x27,0xca]
374 ; CHECK-NEXT: vaddpd (%rdi), %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x58,0x07]
375 ; CHECK-NEXT: retq ## encoding: [0xc3]
376 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
377 %tmp = load <4 x double>, <4 x double>* %j
378 %x = fadd <4 x double> %i, %tmp
379 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
383 define <4 x double> @test_maskz_fold_vaddpd_256(<4 x double> %i, <4 x double>* %j, <4 x i64> %mask1) nounwind {
384 ; CHECK-LABEL: test_maskz_fold_vaddpd_256:
386 ; CHECK-NEXT: vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9]
387 ; CHECK-NEXT: vaddpd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x58,0x07]
388 ; CHECK-NEXT: retq ## encoding: [0xc3]
389 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
390 %tmp = load <4 x double>, <4 x double>* %j
391 %x = fadd <4 x double> %i, %tmp
392 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
396 define <4 x double> @test_broadcast2_vaddpd_256(<4 x double> %i, double* %j) nounwind {
397 ; CHECK-LABEL: test_broadcast2_vaddpd_256:
399 ; CHECK-NEXT: vaddpd (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0x58,0x07]
400 ; CHECK-NEXT: retq ## encoding: [0xc3]
401 %tmp = load double, double* %j
402 %b = insertelement <4 x double> undef, double %tmp, i32 0
403 %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer
404 %x = fadd <4 x double> %c, %i
408 define <4 x double> @test_mask_broadcast_vaddpd_256(<4 x double> %dst, <4 x double> %i, double* %j, <4 x i64> %mask1) nounwind {
409 ; CHECK-LABEL: test_mask_broadcast_vaddpd_256:
411 ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
412 ; CHECK-NEXT: vptestmq %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0xed,0x28,0x27,0xca]
413 ; CHECK-NEXT: vaddpd (%rdi){1to4}, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x39,0x58,0x07]
414 ; CHECK-NEXT: retq ## encoding: [0xc3]
415 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
416 %tmp = load double, double* %j
417 %b = insertelement <4 x double> undef, double %tmp, i32 0
418 %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer
419 %x = fadd <4 x double> %c, %i
420 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %i
424 define <4 x double> @test_maskz_broadcast_vaddpd_256(<4 x double> %i, double* %j, <4 x i64> %mask1) nounwind {
425 ; CHECK-LABEL: test_maskz_broadcast_vaddpd_256:
427 ; CHECK-NEXT: vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9]
428 ; CHECK-NEXT: vaddpd (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x58,0x07]
429 ; CHECK-NEXT: retq ## encoding: [0xc3]
430 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
431 %tmp = load double, double* %j
432 %b = insertelement <4 x double> undef, double %tmp, i32 0
433 %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer
434 %x = fadd <4 x double> %c, %i
435 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
441 define <2 x i64> @vpaddq128_test(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
442 ; CHECK-LABEL: vpaddq128_test:
444 ; CHECK-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc1]
445 ; CHECK-NEXT: retq ## encoding: [0xc3]
446 %x = add <2 x i64> %i, %j
450 define <2 x i64> @vpaddq128_fold_test(<2 x i64> %i, <2 x i64>* %j) nounwind {
451 ; CHECK-LABEL: vpaddq128_fold_test:
453 ; CHECK-NEXT: vpaddq (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0x07]
454 ; CHECK-NEXT: retq ## encoding: [0xc3]
455 %tmp = load <2 x i64>, <2 x i64>* %j, align 4
456 %x = add <2 x i64> %i, %tmp
460 define <2 x i64> @vpaddq128_broadcast2_test(<2 x i64> %i, i64* %j) nounwind {
461 ; CHECK-LABEL: vpaddq128_broadcast2_test:
463 ; CHECK-NEXT: vpaddq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0xd4,0x07]
464 ; CHECK-NEXT: retq ## encoding: [0xc3]
465 %tmp = load i64, i64* %j
466 %j.0 = insertelement <2 x i64> undef, i64 %tmp, i32 0
467 %j.1 = insertelement <2 x i64> %j.0, i64 %tmp, i32 1
468 %x = add <2 x i64> %i, %j.1
472 define <4 x i32> @vpaddd128_test(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
473 ; CHECK-LABEL: vpaddd128_test:
475 ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
476 ; CHECK-NEXT: retq ## encoding: [0xc3]
477 %x = add <4 x i32> %i, %j
481 define <4 x i32> @vpaddd128_fold_test(<4 x i32> %i, <4 x i32>* %j) nounwind {
482 ; CHECK-LABEL: vpaddd128_fold_test:
484 ; CHECK-NEXT: vpaddd (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0x07]
485 ; CHECK-NEXT: retq ## encoding: [0xc3]
486 %tmp = load <4 x i32>, <4 x i32>* %j, align 4
487 %x = add <4 x i32> %i, %tmp
491 define <4 x i32> @vpaddd128_broadcast_test(<4 x i32> %i) nounwind {
492 ; CHECK-LABEL: vpaddd128_broadcast_test:
494 ; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x05,A,A,A,A]
495 ; CHECK-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
496 ; CHECK-NEXT: retq ## encoding: [0xc3]
497 %x = add <4 x i32> %i, <i32 6, i32 6, i32 6, i32 6>
501 define <4 x i32> @vpaddd128_mask_test(<4 x i32> %i, <4 x i32> %j, <4 x i32> %mask1) nounwind readnone {
502 ; CHECK-LABEL: vpaddd128_mask_test:
504 ; CHECK-NEXT: vptestmd %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0x6d,0x08,0x27,0xca]
505 ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xc1]
506 ; CHECK-NEXT: retq ## encoding: [0xc3]
507 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
508 %x = add <4 x i32> %i, %j
509 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i
513 define <4 x i32> @vpaddd128_maskz_test(<4 x i32> %i, <4 x i32> %j, <4 x i32> %mask1) nounwind readnone {
514 ; CHECK-LABEL: vpaddd128_maskz_test:
516 ; CHECK-NEXT: vptestmd %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0x6d,0x08,0x27,0xca]
517 ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1]
518 ; CHECK-NEXT: retq ## encoding: [0xc3]
519 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
520 %x = add <4 x i32> %i, %j
521 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
525 define <4 x i32> @vpaddd128_mask_fold_test(<4 x i32> %i, <4 x i32>* %j.ptr, <4 x i32> %mask1) nounwind readnone {
526 ; CHECK-LABEL: vpaddd128_mask_fold_test:
528 ; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
529 ; CHECK-NEXT: vpaddd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x07]
530 ; CHECK-NEXT: retq ## encoding: [0xc3]
531 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
532 %j = load <4 x i32>, <4 x i32>* %j.ptr
533 %x = add <4 x i32> %i, %j
534 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i
538 define <4 x i32> @vpaddd128_mask_broadcast_test(<4 x i32> %i, <4 x i32> %mask1) nounwind readnone {
539 ; CHECK-LABEL: vpaddd128_mask_broadcast_test:
541 ; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
542 ; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x05,A,A,A,A]
543 ; CHECK-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
544 ; CHECK-NEXT: retq ## encoding: [0xc3]
545 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
546 %x = add <4 x i32> %i, <i32 7, i32 7, i32 7, i32 7>
547 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i
551 define <4 x i32> @vpaddd128_maskz_fold_test(<4 x i32> %i, <4 x i32>* %j.ptr, <4 x i32> %mask1) nounwind readnone {
552 ; CHECK-LABEL: vpaddd128_maskz_fold_test:
554 ; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
555 ; CHECK-NEXT: vpaddd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x07]
556 ; CHECK-NEXT: retq ## encoding: [0xc3]
557 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
558 %j = load <4 x i32>, <4 x i32>* %j.ptr
559 %x = add <4 x i32> %i, %j
560 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
564 define <4 x i32> @vpaddd128_maskz_broadcast_test(<4 x i32> %i, <4 x i32> %mask1) nounwind readnone {
565 ; CHECK-LABEL: vpaddd128_maskz_broadcast_test:
567 ; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
568 ; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x05,A,A,A,A]
569 ; CHECK-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
570 ; CHECK-NEXT: retq ## encoding: [0xc3]
571 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
572 %x = add <4 x i32> %i, <i32 8, i32 8, i32 8, i32 8>
573 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
577 define <2 x i64> @vpsubq128_test(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
578 ; CHECK-LABEL: vpsubq128_test:
580 ; CHECK-NEXT: vpsubq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
581 ; CHECK-NEXT: retq ## encoding: [0xc3]
582 %x = sub <2 x i64> %i, %j
586 define <4 x i32> @vpsubd128_test(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
587 ; CHECK-LABEL: vpsubd128_test:
589 ; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0xc1]
590 ; CHECK-NEXT: retq ## encoding: [0xc3]
591 %x = sub <4 x i32> %i, %j
595 define <4 x i32> @vpmulld128_test(<4 x i32> %i, <4 x i32> %j) {
596 ; CHECK-LABEL: vpmulld128_test:
598 ; CHECK-NEXT: vpmulld %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x40,0xc1]
599 ; CHECK-NEXT: retq ## encoding: [0xc3]
600 %x = mul <4 x i32> %i, %j
604 define <2 x double> @test_vaddpd_128(<2 x double> %y, <2 x double> %x) {
605 ; CHECK-LABEL: test_vaddpd_128:
606 ; CHECK: ## %bb.0: ## %entry
607 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
608 ; CHECK-NEXT: retq ## encoding: [0xc3]
610 %add.i = fadd <2 x double> %x, %y
611 ret <2 x double> %add.i
614 define <2 x double> @test_fold_vaddpd_128(<2 x double> %y) {
615 ; CHECK-LABEL: test_fold_vaddpd_128:
616 ; CHECK: ## %bb.0: ## %entry
617 ; CHECK-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0x05,A,A,A,A]
618 ; CHECK-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
619 ; CHECK-NEXT: retq ## encoding: [0xc3]
621 %add.i = fadd <2 x double> %y, <double 4.500000e+00, double 3.400000e+00>
622 ret <2 x double> %add.i
625 define <4 x float> @test_broadcast_vaddpd_128(<4 x float> %a) nounwind {
626 ; CHECK-LABEL: test_broadcast_vaddpd_128:
628 ; CHECK-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x58,0x05,A,A,A,A]
629 ; CHECK-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
630 ; CHECK-NEXT: retq ## encoding: [0xc3]
631 %b = fadd <4 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
635 define <4 x float> @test_mask_vaddps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone {
636 ; CHECK-LABEL: test_mask_vaddps_128:
638 ; CHECK-NEXT: vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb]
639 ; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x58,0xc2]
640 ; CHECK-NEXT: retq ## encoding: [0xc3]
641 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
642 %x = fadd <4 x float> %i, %j
643 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
647 define <4 x float> @test_mask_vmulps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone {
648 ; CHECK-LABEL: test_mask_vmulps_128:
650 ; CHECK-NEXT: vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb]
651 ; CHECK-NEXT: vmulps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x59,0xc2]
652 ; CHECK-NEXT: retq ## encoding: [0xc3]
653 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
654 %x = fmul <4 x float> %i, %j
655 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
659 define <4 x float> @test_mask_vminps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone {
660 ; CHECK-LABEL: test_mask_vminps_128:
662 ; CHECK-NEXT: vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb]
663 ; CHECK-NEXT: vminps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x5d,0xc2]
664 ; CHECK-NEXT: retq ## encoding: [0xc3]
665 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
666 %cmp_res = fcmp olt <4 x float> %i, %j
667 %min = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j
668 %r = select <4 x i1> %mask, <4 x float> %min, <4 x float> %dst
672 define <4 x float> @test_mask_vmaxps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone {
673 ; CHECK-LABEL: test_mask_vmaxps_128:
675 ; CHECK-NEXT: vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb]
676 ; CHECK-NEXT: vmaxps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x5f,0xc2]
677 ; CHECK-NEXT: retq ## encoding: [0xc3]
678 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
679 %cmp_res = fcmp ogt <4 x float> %i, %j
680 %max = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j
681 %r = select <4 x i1> %mask, <4 x float> %max, <4 x float> %dst
685 define <4 x float> @test_mask_vsubps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone {
686 ; CHECK-LABEL: test_mask_vsubps_128:
688 ; CHECK-NEXT: vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb]
689 ; CHECK-NEXT: vsubps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x5c,0xc2]
690 ; CHECK-NEXT: retq ## encoding: [0xc3]
691 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
692 %x = fsub <4 x float> %i, %j
693 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
698 define <4 x float> @test_mask_vdivps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone {
699 ; CHECK-LABEL: test_mask_vdivps_128:
701 ; CHECK-NEXT: vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb]
702 ; CHECK-NEXT: vdivps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x5e,0xc2]
703 ; CHECK-NEXT: retq ## encoding: [0xc3]
704 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
705 %x = fdiv <4 x float> %i, %j
706 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
710 define <2 x double> @test_mask_vmulpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone {
711 ; CHECK-LABEL: test_mask_vmulpd_128:
713 ; CHECK-NEXT: vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb]
714 ; CHECK-NEXT: vmulpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x59,0xc2]
715 ; CHECK-NEXT: retq ## encoding: [0xc3]
716 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
717 %x = fmul <2 x double> %i, %j
718 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
722 define <2 x double> @test_mask_vminpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone {
723 ; CHECK-LABEL: test_mask_vminpd_128:
725 ; CHECK-NEXT: vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb]
726 ; CHECK-NEXT: vminpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x5d,0xc2]
727 ; CHECK-NEXT: retq ## encoding: [0xc3]
728 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
729 %cmp_res = fcmp olt <2 x double> %i, %j
730 %min = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j
731 %r = select <2 x i1> %mask, <2 x double> %min, <2 x double> %dst
735 define <2 x double> @test_mask_vmaxpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone {
736 ; CHECK-LABEL: test_mask_vmaxpd_128:
738 ; CHECK-NEXT: vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb]
739 ; CHECK-NEXT: vmaxpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x5f,0xc2]
740 ; CHECK-NEXT: retq ## encoding: [0xc3]
741 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
742 %cmp_res = fcmp ogt <2 x double> %i, %j
743 %max = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j
744 %r = select <2 x i1> %mask, <2 x double> %max, <2 x double> %dst
748 define <2 x double> @test_mask_vsubpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone {
749 ; CHECK-LABEL: test_mask_vsubpd_128:
751 ; CHECK-NEXT: vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb]
752 ; CHECK-NEXT: vsubpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x5c,0xc2]
753 ; CHECK-NEXT: retq ## encoding: [0xc3]
754 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
755 %x = fsub <2 x double> %i, %j
756 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
760 define <2 x double> @test_mask_vdivpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone {
761 ; CHECK-LABEL: test_mask_vdivpd_128:
763 ; CHECK-NEXT: vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb]
764 ; CHECK-NEXT: vdivpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x5e,0xc2]
765 ; CHECK-NEXT: retq ## encoding: [0xc3]
766 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
767 %x = fdiv <2 x double> %i, %j
768 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
772 define <2 x double> @test_mask_vaddpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone {
773 ; CHECK-LABEL: test_mask_vaddpd_128:
775 ; CHECK-NEXT: vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb]
776 ; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x58,0xc2]
777 ; CHECK-NEXT: retq ## encoding: [0xc3]
778 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
779 %x = fadd <2 x double> %i, %j
780 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
784 define <2 x double> @test_maskz_vaddpd_128(<2 x double> %i, <2 x double> %j,
785 ; CHECK-LABEL: test_maskz_vaddpd_128:
787 ; CHECK-NEXT: vptestmq %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x08,0x27,0xca]
788 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x58,0xc1]
789 ; CHECK-NEXT: retq ## encoding: [0xc3]
790 <2 x i64> %mask1) nounwind readnone {
791 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
792 %x = fadd <2 x double> %i, %j
793 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
797 define <2 x double> @test_mask_fold_vaddpd_128(<2 x double> %dst, <2 x double> %i, <2 x double>* %j, <2 x i64> %mask1) nounwind {
798 ; CHECK-LABEL: test_mask_fold_vaddpd_128:
800 ; CHECK-NEXT: vptestmq %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x08,0x27,0xca]
801 ; CHECK-NEXT: vaddpd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x58,0x07]
802 ; CHECK-NEXT: retq ## encoding: [0xc3]
803 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
804 %tmp = load <2 x double>, <2 x double>* %j
805 %x = fadd <2 x double> %i, %tmp
806 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
810 define <2 x double> @test_maskz_fold_vaddpd_128(<2 x double> %i, <2 x double>* %j, <2 x i64> %mask1) nounwind {
811 ; CHECK-LABEL: test_maskz_fold_vaddpd_128:
813 ; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9]
814 ; CHECK-NEXT: vaddpd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x58,0x07]
815 ; CHECK-NEXT: retq ## encoding: [0xc3]
816 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
817 %tmp = load <2 x double>, <2 x double>* %j
818 %x = fadd <2 x double> %i, %tmp
819 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
823 define <2 x double> @test_broadcast2_vaddpd_128(<2 x double> %i, double* %j) nounwind {
824 ; CHECK-LABEL: test_broadcast2_vaddpd_128:
826 ; CHECK-NEXT: vaddpd (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0x58,0x07]
827 ; CHECK-NEXT: retq ## encoding: [0xc3]
828 %tmp = load double, double* %j
829 %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
830 %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
831 %x = fadd <2 x double> %j.1, %i
835 define <2 x double> @test_mask_broadcast_vaddpd_128(<2 x double> %dst, <2 x double> %i, double* %j, <2 x i64> %mask1) nounwind {
836 ; CHECK-LABEL: test_mask_broadcast_vaddpd_128:
838 ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
839 ; CHECK-NEXT: vptestmq %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x08,0x27,0xca]
840 ; CHECK-NEXT: vaddpd (%rdi){1to2}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x19,0x58,0x07]
841 ; CHECK-NEXT: retq ## encoding: [0xc3]
842 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
843 %tmp = load double, double* %j
844 %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
845 %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
846 %x = fadd <2 x double> %j.1, %i
847 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %i
851 define <2 x double> @test_maskz_broadcast_vaddpd_128(<2 x double> %i, double* %j, <2 x i64> %mask1) nounwind {
852 ; CHECK-LABEL: test_maskz_broadcast_vaddpd_128:
854 ; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9]
855 ; CHECK-NEXT: vaddpd (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x58,0x07]
856 ; CHECK-NEXT: retq ## encoding: [0xc3]
857 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
858 %tmp = load double, double* %j
859 %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
860 %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
861 %x = fadd <2 x double> %j.1, %i
862 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer