1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck %s
4 declare <2 x double> @llvm.floor.v2f64(<2 x double> %p)
5 declare <4 x float> @llvm.floor.v4f32(<4 x float> %p)
6 declare <4 x double> @llvm.floor.v4f64(<4 x double> %p)
7 declare <8 x float> @llvm.floor.v8f32(<8 x float> %p)
8 declare <8 x double> @llvm.floor.v8f64(<8 x double> %p)
9 declare <16 x float> @llvm.floor.v16f32(<16 x float> %p)
10 declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
11 declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
12 declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
13 declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
14 declare <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
15 declare <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
16 declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
17 declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
18 declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
19 declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
20 declare <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
21 declare <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
22 declare <2 x double> @llvm.rint.v2f64(<2 x double> %p)
23 declare <4 x float> @llvm.rint.v4f32(<4 x float> %p)
24 declare <4 x double> @llvm.rint.v4f64(<4 x double> %p)
25 declare <8 x float> @llvm.rint.v8f32(<8 x float> %p)
26 declare <8 x double> @llvm.rint.v8f64(<8 x double> %p)
27 declare <16 x float> @llvm.rint.v16f32(<16 x float> %p)
28 declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
29 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
30 declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
31 declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
32 declare <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
33 declare <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
35 define <2 x double> @floor_v2f64(<2 x double> %p) {
36 ; CHECK-LABEL: floor_v2f64:
38 ; CHECK-NEXT: vroundpd $9, %xmm0, %xmm0
40 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
44 define <4 x float> @floor_v4f32(<4 x float> %p) {
45 ; CHECK-LABEL: floor_v4f32:
47 ; CHECK-NEXT: vroundps $9, %xmm0, %xmm0
49 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
53 define <4 x double> @floor_v4f64(<4 x double> %p){
54 ; CHECK-LABEL: floor_v4f64:
56 ; CHECK-NEXT: vroundpd $9, %ymm0, %ymm0
58 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
62 define <8 x float> @floor_v8f32(<8 x float> %p) {
63 ; CHECK-LABEL: floor_v8f32:
65 ; CHECK-NEXT: vroundps $9, %ymm0, %ymm0
67 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
71 define <8 x double> @floor_v8f64(<8 x double> %p){
72 ; CHECK-LABEL: floor_v8f64:
74 ; CHECK-NEXT: vrndscalepd $9, %zmm0, %zmm0
76 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
80 define <16 x float> @floor_v16f32(<16 x float> %p) {
81 ; CHECK-LABEL: floor_v16f32:
83 ; CHECK-NEXT: vrndscaleps $9, %zmm0, %zmm0
85 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
89 define <2 x double> @floor_v2f64_load(ptr %ptr) {
90 ; CHECK-LABEL: floor_v2f64_load:
92 ; CHECK-NEXT: vroundpd $9, (%rdi), %xmm0
94 %p = load <2 x double>, ptr %ptr
95 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
99 define <4 x float> @floor_v4f32_load(ptr %ptr) {
100 ; CHECK-LABEL: floor_v4f32_load:
102 ; CHECK-NEXT: vroundps $9, (%rdi), %xmm0
104 %p = load <4 x float>, ptr %ptr
105 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
109 define <4 x double> @floor_v4f64_load(ptr %ptr){
110 ; CHECK-LABEL: floor_v4f64_load:
112 ; CHECK-NEXT: vroundpd $9, (%rdi), %ymm0
114 %p = load <4 x double>, ptr %ptr
115 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
119 define <8 x float> @floor_v8f32_load(ptr %ptr) {
120 ; CHECK-LABEL: floor_v8f32_load:
122 ; CHECK-NEXT: vroundps $9, (%rdi), %ymm0
124 %p = load <8 x float>, ptr %ptr
125 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
129 define <8 x double> @floor_v8f64_load(ptr %ptr){
130 ; CHECK-LABEL: floor_v8f64_load:
132 ; CHECK-NEXT: vrndscalepd $9, (%rdi), %zmm0
134 %p = load <8 x double>, ptr %ptr
135 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
139 define <16 x float> @floor_v16f32_load(ptr %ptr) {
140 ; CHECK-LABEL: floor_v16f32_load:
142 ; CHECK-NEXT: vrndscaleps $9, (%rdi), %zmm0
144 %p = load <16 x float>, ptr %ptr
145 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
149 define <2 x double> @floor_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) {
150 ; CHECK-LABEL: floor_v2f64_mask:
152 ; CHECK-NEXT: vptestnmq %xmm2, %xmm2, %k1
153 ; CHECK-NEXT: vrndscalepd $9, %xmm0, %xmm1 {%k1}
154 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
156 %c = icmp eq <2 x i64> %cmp, zeroinitializer
157 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
158 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
162 define <4 x float> @floor_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) {
163 ; CHECK-LABEL: floor_v4f32_mask:
165 ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1
166 ; CHECK-NEXT: vrndscaleps $9, %xmm0, %xmm1 {%k1}
167 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
169 %c = icmp eq <4 x i32> %cmp, zeroinitializer
170 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
171 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
175 define <4 x double> @floor_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) {
176 ; CHECK-LABEL: floor_v4f64_mask:
178 ; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1
179 ; CHECK-NEXT: vrndscalepd $9, %ymm0, %ymm1 {%k1}
180 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
182 %c = icmp eq <4 x i64> %cmp, zeroinitializer
183 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
184 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
188 define <8 x float> @floor_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) {
189 ; CHECK-LABEL: floor_v8f32_mask:
191 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
192 ; CHECK-NEXT: vrndscaleps $9, %ymm0, %ymm1 {%k1}
193 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
195 %c = icmp eq <8 x i32> %cmp, zeroinitializer
196 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
197 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
201 define <8 x double> @floor_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) {
202 ; CHECK-LABEL: floor_v8f64_mask:
204 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1
205 ; CHECK-NEXT: vrndscalepd $9, %zmm0, %zmm1 {%k1}
206 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
208 %c = icmp eq <8 x i64> %cmp, zeroinitializer
209 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
210 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
214 define <16 x float> @floor_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) {
215 ; CHECK-LABEL: floor_v16f32_mask:
217 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
218 ; CHECK-NEXT: vrndscaleps $9, %zmm0, %zmm1 {%k1}
219 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
221 %c = icmp eq <16 x i32> %cmp, zeroinitializer
222 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
223 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
227 define <2 x double> @floor_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) {
228 ; CHECK-LABEL: floor_v2f64_maskz:
230 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1
231 ; CHECK-NEXT: vrndscalepd $9, %xmm0, %xmm0 {%k1} {z}
233 %c = icmp eq <2 x i64> %cmp, zeroinitializer
234 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
235 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
239 define <4 x float> @floor_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) {
240 ; CHECK-LABEL: floor_v4f32_maskz:
242 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
243 ; CHECK-NEXT: vrndscaleps $9, %xmm0, %xmm0 {%k1} {z}
245 %c = icmp eq <4 x i32> %cmp, zeroinitializer
246 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
247 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
251 define <4 x double> @floor_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) {
252 ; CHECK-LABEL: floor_v4f64_maskz:
254 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
255 ; CHECK-NEXT: vrndscalepd $9, %ymm0, %ymm0 {%k1} {z}
257 %c = icmp eq <4 x i64> %cmp, zeroinitializer
258 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
259 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
263 define <8 x float> @floor_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) {
264 ; CHECK-LABEL: floor_v8f32_maskz:
266 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
267 ; CHECK-NEXT: vrndscaleps $9, %ymm0, %ymm0 {%k1} {z}
269 %c = icmp eq <8 x i32> %cmp, zeroinitializer
270 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
271 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
275 define <8 x double> @floor_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) {
276 ; CHECK-LABEL: floor_v8f64_maskz:
278 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
279 ; CHECK-NEXT: vrndscalepd $9, %zmm0, %zmm0 {%k1} {z}
281 %c = icmp eq <8 x i64> %cmp, zeroinitializer
282 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
283 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
287 define <16 x float> @floor_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) {
288 ; CHECK-LABEL: floor_v16f32_maskz:
290 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
291 ; CHECK-NEXT: vrndscaleps $9, %zmm0, %zmm0 {%k1} {z}
293 %c = icmp eq <16 x i32> %cmp, zeroinitializer
294 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
295 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
299 define <2 x double> @floor_v2f64_mask_load(ptr %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
300 ; CHECK-LABEL: floor_v2f64_mask_load:
302 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1
303 ; CHECK-NEXT: vrndscalepd $9, (%rdi), %xmm0 {%k1}
305 %c = icmp eq <2 x i64> %cmp, zeroinitializer
306 %p = load <2 x double>, ptr %ptr
307 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
308 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
312 define <4 x float> @floor_v4f32_mask_load(ptr %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
313 ; CHECK-LABEL: floor_v4f32_mask_load:
315 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
316 ; CHECK-NEXT: vrndscaleps $9, (%rdi), %xmm0 {%k1}
318 %c = icmp eq <4 x i32> %cmp, zeroinitializer
319 %p = load <4 x float>, ptr %ptr
320 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
321 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
325 define <4 x double> @floor_v4f64_mask_load(ptr %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
326 ; CHECK-LABEL: floor_v4f64_mask_load:
328 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
329 ; CHECK-NEXT: vrndscalepd $9, (%rdi), %ymm0 {%k1}
331 %c = icmp eq <4 x i64> %cmp, zeroinitializer
332 %p = load <4 x double>, ptr %ptr
333 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
334 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
338 define <8 x float> @floor_v8f32_mask_load(ptr %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
339 ; CHECK-LABEL: floor_v8f32_mask_load:
341 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
342 ; CHECK-NEXT: vrndscaleps $9, (%rdi), %ymm0 {%k1}
344 %c = icmp eq <8 x i32> %cmp, zeroinitializer
345 %p = load <8 x float>, ptr %ptr
346 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
347 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
351 define <8 x double> @floor_v8f64_mask_load(ptr %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
352 ; CHECK-LABEL: floor_v8f64_mask_load:
354 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
355 ; CHECK-NEXT: vrndscalepd $9, (%rdi), %zmm0 {%k1}
357 %c = icmp eq <8 x i64> %cmp, zeroinitializer
358 %p = load <8 x double>, ptr %ptr
359 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
360 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
364 define <16 x float> @floor_v16f32_mask_load(ptr %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
365 ; CHECK-LABEL: floor_v16f32_mask_load:
367 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
368 ; CHECK-NEXT: vrndscaleps $9, (%rdi), %zmm0 {%k1}
370 %c = icmp eq <16 x i32> %cmp, zeroinitializer
371 %p = load <16 x float>, ptr %ptr
372 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
373 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
377 define <2 x double> @floor_v2f64_maskz_load(ptr %ptr, <2 x i64> %cmp) {
378 ; CHECK-LABEL: floor_v2f64_maskz_load:
380 ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1
381 ; CHECK-NEXT: vrndscalepd $9, (%rdi), %xmm0 {%k1} {z}
383 %c = icmp eq <2 x i64> %cmp, zeroinitializer
384 %p = load <2 x double>, ptr %ptr
385 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
386 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
390 define <4 x float> @floor_v4f32_maskz_load(ptr %ptr, <4 x i32> %cmp) {
391 ; CHECK-LABEL: floor_v4f32_maskz_load:
393 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1
394 ; CHECK-NEXT: vrndscaleps $9, (%rdi), %xmm0 {%k1} {z}
396 %c = icmp eq <4 x i32> %cmp, zeroinitializer
397 %p = load <4 x float>, ptr %ptr
398 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
399 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
403 define <4 x double> @floor_v4f64_maskz_load(ptr %ptr, <4 x i64> %cmp) {
404 ; CHECK-LABEL: floor_v4f64_maskz_load:
406 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1
407 ; CHECK-NEXT: vrndscalepd $9, (%rdi), %ymm0 {%k1} {z}
409 %c = icmp eq <4 x i64> %cmp, zeroinitializer
410 %p = load <4 x double>, ptr %ptr
411 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
412 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
416 define <8 x float> @floor_v8f32_maskz_load(ptr %ptr, <8 x i32> %cmp) {
417 ; CHECK-LABEL: floor_v8f32_maskz_load:
419 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
420 ; CHECK-NEXT: vrndscaleps $9, (%rdi), %ymm0 {%k1} {z}
422 %c = icmp eq <8 x i32> %cmp, zeroinitializer
423 %p = load <8 x float>, ptr %ptr
424 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
425 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
429 define <8 x double> @floor_v8f64_maskz_load(ptr %ptr, <8 x i64> %cmp) {
430 ; CHECK-LABEL: floor_v8f64_maskz_load:
432 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
433 ; CHECK-NEXT: vrndscalepd $9, (%rdi), %zmm0 {%k1} {z}
435 %c = icmp eq <8 x i64> %cmp, zeroinitializer
436 %p = load <8 x double>, ptr %ptr
437 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
438 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
442 define <16 x float> @floor_v16f32_maskz_load(ptr %ptr, <16 x i32> %cmp) {
443 ; CHECK-LABEL: floor_v16f32_maskz_load:
445 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
446 ; CHECK-NEXT: vrndscaleps $9, (%rdi), %zmm0 {%k1} {z}
448 %c = icmp eq <16 x i32> %cmp, zeroinitializer
449 %p = load <16 x float>, ptr %ptr
450 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
451 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
455 define <2 x double> @floor_v2f64_broadcast(ptr %ptr) {
456 ; CHECK-LABEL: floor_v2f64_broadcast:
458 ; CHECK-NEXT: vrndscalepd $9, (%rdi){1to2}, %xmm0
460 %ps = load double, ptr %ptr
461 %pins = insertelement <2 x double> undef, double %ps, i32 0
462 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
463 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
467 define <4 x float> @floor_v4f32_broadcast(ptr %ptr) {
468 ; CHECK-LABEL: floor_v4f32_broadcast:
470 ; CHECK-NEXT: vrndscaleps $9, (%rdi){1to4}, %xmm0
472 %ps = load float, ptr %ptr
473 %pins = insertelement <4 x float> undef, float %ps, i32 0
474 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
475 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
479 define <4 x double> @floor_v4f64_broadcast(ptr %ptr){
480 ; CHECK-LABEL: floor_v4f64_broadcast:
482 ; CHECK-NEXT: vrndscalepd $9, (%rdi){1to4}, %ymm0
484 %ps = load double, ptr %ptr
485 %pins = insertelement <4 x double> undef, double %ps, i32 0
486 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
487 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
491 define <8 x float> @floor_v8f32_broadcast(ptr %ptr) {
492 ; CHECK-LABEL: floor_v8f32_broadcast:
494 ; CHECK-NEXT: vrndscaleps $9, (%rdi){1to8}, %ymm0
496 %ps = load float, ptr %ptr
497 %pins = insertelement <8 x float> undef, float %ps, i32 0
498 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
499 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
503 define <8 x double> @floor_v8f64_broadcast(ptr %ptr){
504 ; CHECK-LABEL: floor_v8f64_broadcast:
506 ; CHECK-NEXT: vrndscalepd $9, (%rdi){1to8}, %zmm0
508 %ps = load double, ptr %ptr
509 %pins = insertelement <8 x double> undef, double %ps, i32 0
510 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
511 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
515 define <16 x float> @floor_v16f32_broadcast(ptr %ptr) {
516 ; CHECK-LABEL: floor_v16f32_broadcast:
518 ; CHECK-NEXT: vrndscaleps $9, (%rdi){1to16}, %zmm0
520 %ps = load float, ptr %ptr
521 %pins = insertelement <16 x float> undef, float %ps, i32 0
522 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
523 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
527 define <2 x double> @floor_v2f64_mask_broadcast(ptr %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
528 ; CHECK-LABEL: floor_v2f64_mask_broadcast:
530 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1
531 ; CHECK-NEXT: vrndscalepd $9, (%rdi){1to2}, %xmm0 {%k1}
533 %c = icmp eq <2 x i64> %cmp, zeroinitializer
534 %ps = load double, ptr %ptr
535 %pins = insertelement <2 x double> undef, double %ps, i32 0
536 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
537 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
538 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
542 define <4 x float> @floor_v4f32_mask_broadcast(ptr %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
543 ; CHECK-LABEL: floor_v4f32_mask_broadcast:
545 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
546 ; CHECK-NEXT: vrndscaleps $9, (%rdi){1to4}, %xmm0 {%k1}
548 %c = icmp eq <4 x i32> %cmp, zeroinitializer
549 %ps = load float, ptr %ptr
550 %pins = insertelement <4 x float> undef, float %ps, i32 0
551 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
552 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
553 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
557 define <4 x double> @floor_v4f64_mask_broadcast(ptr %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
558 ; CHECK-LABEL: floor_v4f64_mask_broadcast:
560 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
561 ; CHECK-NEXT: vrndscalepd $9, (%rdi){1to4}, %ymm0 {%k1}
563 %c = icmp eq <4 x i64> %cmp, zeroinitializer
564 %ps = load double, ptr %ptr
565 %pins = insertelement <4 x double> undef, double %ps, i32 0
566 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
567 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
568 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
572 define <8 x float> @floor_v8f32_mask_broadcast(ptr %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
573 ; CHECK-LABEL: floor_v8f32_mask_broadcast:
575 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
576 ; CHECK-NEXT: vrndscaleps $9, (%rdi){1to8}, %ymm0 {%k1}
578 %c = icmp eq <8 x i32> %cmp, zeroinitializer
579 %ps = load float, ptr %ptr
580 %pins = insertelement <8 x float> undef, float %ps, i32 0
581 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
582 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
583 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
587 define <8 x double> @floor_v8f64_mask_broadcast(ptr %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
588 ; CHECK-LABEL: floor_v8f64_mask_broadcast:
590 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
591 ; CHECK-NEXT: vrndscalepd $9, (%rdi){1to8}, %zmm0 {%k1}
593 %c = icmp eq <8 x i64> %cmp, zeroinitializer
594 %ps = load double, ptr %ptr
595 %pins = insertelement <8 x double> undef, double %ps, i32 0
596 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
597 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
598 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
602 define <16 x float> @floor_v16f32_mask_broadcast(ptr %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
603 ; CHECK-LABEL: floor_v16f32_mask_broadcast:
605 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
606 ; CHECK-NEXT: vrndscaleps $9, (%rdi){1to16}, %zmm0 {%k1}
608 %c = icmp eq <16 x i32> %cmp, zeroinitializer
609 %ps = load float, ptr %ptr
610 %pins = insertelement <16 x float> undef, float %ps, i32 0
611 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
612 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
613 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
617 define <2 x double> @floor_v2f64_maskz_broadcast(ptr %ptr, <2 x i64> %cmp) {
618 ; CHECK-LABEL: floor_v2f64_maskz_broadcast:
620 ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1
621 ; CHECK-NEXT: vrndscalepd $9, (%rdi){1to2}, %xmm0 {%k1} {z}
623 %c = icmp eq <2 x i64> %cmp, zeroinitializer
624 %ps = load double, ptr %ptr
625 %pins = insertelement <2 x double> undef, double %ps, i32 0
626 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
627 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
628 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
632 define <4 x float> @floor_v4f32_maskz_broadcast(ptr %ptr, <4 x i32> %cmp) {
633 ; CHECK-LABEL: floor_v4f32_maskz_broadcast:
635 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1
636 ; CHECK-NEXT: vrndscaleps $9, (%rdi){1to4}, %xmm0 {%k1} {z}
638 %c = icmp eq <4 x i32> %cmp, zeroinitializer
639 %ps = load float, ptr %ptr
640 %pins = insertelement <4 x float> undef, float %ps, i32 0
641 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
642 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
643 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
647 define <4 x double> @floor_v4f64_maskz_broadcast(ptr %ptr, <4 x i64> %cmp) {
648 ; CHECK-LABEL: floor_v4f64_maskz_broadcast:
650 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1
651 ; CHECK-NEXT: vrndscalepd $9, (%rdi){1to4}, %ymm0 {%k1} {z}
653 %c = icmp eq <4 x i64> %cmp, zeroinitializer
654 %ps = load double, ptr %ptr
655 %pins = insertelement <4 x double> undef, double %ps, i32 0
656 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
657 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
658 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
662 define <8 x float> @floor_v8f32_maskz_broadcast(ptr %ptr, <8 x i32> %cmp) {
663 ; CHECK-LABEL: floor_v8f32_maskz_broadcast:
665 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
666 ; CHECK-NEXT: vrndscaleps $9, (%rdi){1to8}, %ymm0 {%k1} {z}
668 %c = icmp eq <8 x i32> %cmp, zeroinitializer
669 %ps = load float, ptr %ptr
670 %pins = insertelement <8 x float> undef, float %ps, i32 0
671 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
672 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
673 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
677 define <8 x double> @floor_v8f64_maskz_broadcast(ptr %ptr, <8 x i64> %cmp) {
678 ; CHECK-LABEL: floor_v8f64_maskz_broadcast:
680 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
681 ; CHECK-NEXT: vrndscalepd $9, (%rdi){1to8}, %zmm0 {%k1} {z}
683 %c = icmp eq <8 x i64> %cmp, zeroinitializer
684 %ps = load double, ptr %ptr
685 %pins = insertelement <8 x double> undef, double %ps, i32 0
686 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
687 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
688 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
692 define <16 x float> @floor_v16f32_maskz_broadcast(ptr %ptr, <16 x i32> %cmp) {
693 ; CHECK-LABEL: floor_v16f32_maskz_broadcast:
695 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
696 ; CHECK-NEXT: vrndscaleps $9, (%rdi){1to16}, %zmm0 {%k1} {z}
698 %c = icmp eq <16 x i32> %cmp, zeroinitializer
699 %ps = load float, ptr %ptr
700 %pins = insertelement <16 x float> undef, float %ps, i32 0
701 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
702 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
703 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
707 define <2 x double> @ceil_v2f64(<2 x double> %p) {
708 ; CHECK-LABEL: ceil_v2f64:
710 ; CHECK-NEXT: vroundpd $10, %xmm0, %xmm0
712 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
716 define <4 x float> @ceil_v4f32(<4 x float> %p) {
717 ; CHECK-LABEL: ceil_v4f32:
719 ; CHECK-NEXT: vroundps $10, %xmm0, %xmm0
721 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
725 define <4 x double> @ceil_v4f64(<4 x double> %p){
726 ; CHECK-LABEL: ceil_v4f64:
728 ; CHECK-NEXT: vroundpd $10, %ymm0, %ymm0
730 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
734 define <8 x float> @ceil_v8f32(<8 x float> %p) {
735 ; CHECK-LABEL: ceil_v8f32:
737 ; CHECK-NEXT: vroundps $10, %ymm0, %ymm0
739 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
743 define <8 x double> @ceil_v8f64(<8 x double> %p){
744 ; CHECK-LABEL: ceil_v8f64:
746 ; CHECK-NEXT: vrndscalepd $10, %zmm0, %zmm0
748 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
752 define <16 x float> @ceil_v16f32(<16 x float> %p) {
753 ; CHECK-LABEL: ceil_v16f32:
755 ; CHECK-NEXT: vrndscaleps $10, %zmm0, %zmm0
757 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
761 define <2 x double> @ceil_v2f64_load(ptr %ptr) {
762 ; CHECK-LABEL: ceil_v2f64_load:
764 ; CHECK-NEXT: vroundpd $10, (%rdi), %xmm0
766 %p = load <2 x double>, ptr %ptr
767 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
771 define <4 x float> @ceil_v4f32_load(ptr %ptr) {
772 ; CHECK-LABEL: ceil_v4f32_load:
774 ; CHECK-NEXT: vroundps $10, (%rdi), %xmm0
776 %p = load <4 x float>, ptr %ptr
777 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
781 define <4 x double> @ceil_v4f64_load(ptr %ptr){
782 ; CHECK-LABEL: ceil_v4f64_load:
784 ; CHECK-NEXT: vroundpd $10, (%rdi), %ymm0
786 %p = load <4 x double>, ptr %ptr
787 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
791 define <8 x float> @ceil_v8f32_load(ptr %ptr) {
792 ; CHECK-LABEL: ceil_v8f32_load:
794 ; CHECK-NEXT: vroundps $10, (%rdi), %ymm0
796 %p = load <8 x float>, ptr %ptr
797 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
801 define <8 x double> @ceil_v8f64_load(ptr %ptr){
802 ; CHECK-LABEL: ceil_v8f64_load:
804 ; CHECK-NEXT: vrndscalepd $10, (%rdi), %zmm0
806 %p = load <8 x double>, ptr %ptr
807 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
811 define <16 x float> @ceil_v16f32_load(ptr %ptr) {
812 ; CHECK-LABEL: ceil_v16f32_load:
814 ; CHECK-NEXT: vrndscaleps $10, (%rdi), %zmm0
816 %p = load <16 x float>, ptr %ptr
817 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
821 define <2 x double> @ceil_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) {
822 ; CHECK-LABEL: ceil_v2f64_mask:
824 ; CHECK-NEXT: vptestnmq %xmm2, %xmm2, %k1
825 ; CHECK-NEXT: vrndscalepd $10, %xmm0, %xmm1 {%k1}
826 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
828 %c = icmp eq <2 x i64> %cmp, zeroinitializer
829 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
830 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
834 define <4 x float> @ceil_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) {
835 ; CHECK-LABEL: ceil_v4f32_mask:
837 ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1
838 ; CHECK-NEXT: vrndscaleps $10, %xmm0, %xmm1 {%k1}
839 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
841 %c = icmp eq <4 x i32> %cmp, zeroinitializer
842 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
843 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
847 define <4 x double> @ceil_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) {
848 ; CHECK-LABEL: ceil_v4f64_mask:
850 ; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1
851 ; CHECK-NEXT: vrndscalepd $10, %ymm0, %ymm1 {%k1}
852 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
854 %c = icmp eq <4 x i64> %cmp, zeroinitializer
855 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
856 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
860 define <8 x float> @ceil_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) {
861 ; CHECK-LABEL: ceil_v8f32_mask:
863 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
864 ; CHECK-NEXT: vrndscaleps $10, %ymm0, %ymm1 {%k1}
865 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
867 %c = icmp eq <8 x i32> %cmp, zeroinitializer
868 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
869 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
873 define <8 x double> @ceil_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) {
874 ; CHECK-LABEL: ceil_v8f64_mask:
876 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1
877 ; CHECK-NEXT: vrndscalepd $10, %zmm0, %zmm1 {%k1}
878 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
880 %c = icmp eq <8 x i64> %cmp, zeroinitializer
881 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
882 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
886 define <16 x float> @ceil_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) {
887 ; CHECK-LABEL: ceil_v16f32_mask:
889 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
890 ; CHECK-NEXT: vrndscaleps $10, %zmm0, %zmm1 {%k1}
891 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
893 %c = icmp eq <16 x i32> %cmp, zeroinitializer
894 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
895 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
899 define <2 x double> @ceil_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) {
900 ; CHECK-LABEL: ceil_v2f64_maskz:
902 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1
903 ; CHECK-NEXT: vrndscalepd $10, %xmm0, %xmm0 {%k1} {z}
905 %c = icmp eq <2 x i64> %cmp, zeroinitializer
906 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
907 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
911 define <4 x float> @ceil_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) {
912 ; CHECK-LABEL: ceil_v4f32_maskz:
914 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
915 ; CHECK-NEXT: vrndscaleps $10, %xmm0, %xmm0 {%k1} {z}
917 %c = icmp eq <4 x i32> %cmp, zeroinitializer
918 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
919 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
923 define <4 x double> @ceil_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) {
924 ; CHECK-LABEL: ceil_v4f64_maskz:
926 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
927 ; CHECK-NEXT: vrndscalepd $10, %ymm0, %ymm0 {%k1} {z}
929 %c = icmp eq <4 x i64> %cmp, zeroinitializer
930 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
931 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
935 define <8 x float> @ceil_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) {
936 ; CHECK-LABEL: ceil_v8f32_maskz:
938 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
939 ; CHECK-NEXT: vrndscaleps $10, %ymm0, %ymm0 {%k1} {z}
941 %c = icmp eq <8 x i32> %cmp, zeroinitializer
942 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
943 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
947 define <8 x double> @ceil_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) {
948 ; CHECK-LABEL: ceil_v8f64_maskz:
950 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
951 ; CHECK-NEXT: vrndscalepd $10, %zmm0, %zmm0 {%k1} {z}
953 %c = icmp eq <8 x i64> %cmp, zeroinitializer
954 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
955 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
959 define <16 x float> @ceil_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) {
960 ; CHECK-LABEL: ceil_v16f32_maskz:
962 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
963 ; CHECK-NEXT: vrndscaleps $10, %zmm0, %zmm0 {%k1} {z}
965 %c = icmp eq <16 x i32> %cmp, zeroinitializer
966 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
967 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
971 define <2 x double> @ceil_v2f64_mask_load(ptr %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
972 ; CHECK-LABEL: ceil_v2f64_mask_load:
974 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1
975 ; CHECK-NEXT: vrndscalepd $10, (%rdi), %xmm0 {%k1}
977 %c = icmp eq <2 x i64> %cmp, zeroinitializer
978 %p = load <2 x double>, ptr %ptr
979 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
980 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
984 define <4 x float> @ceil_v4f32_mask_load(ptr %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
985 ; CHECK-LABEL: ceil_v4f32_mask_load:
987 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
988 ; CHECK-NEXT: vrndscaleps $10, (%rdi), %xmm0 {%k1}
990 %c = icmp eq <4 x i32> %cmp, zeroinitializer
991 %p = load <4 x float>, ptr %ptr
992 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
993 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
997 define <4 x double> @ceil_v4f64_mask_load(ptr %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
998 ; CHECK-LABEL: ceil_v4f64_mask_load:
1000 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
1001 ; CHECK-NEXT: vrndscalepd $10, (%rdi), %ymm0 {%k1}
1003 %c = icmp eq <4 x i64> %cmp, zeroinitializer
1004 %p = load <4 x double>, ptr %ptr
1005 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
1006 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
1010 define <8 x float> @ceil_v8f32_mask_load(ptr %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
1011 ; CHECK-LABEL: ceil_v8f32_mask_load:
1013 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
1014 ; CHECK-NEXT: vrndscaleps $10, (%rdi), %ymm0 {%k1}
1016 %c = icmp eq <8 x i32> %cmp, zeroinitializer
1017 %p = load <8 x float>, ptr %ptr
1018 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
1019 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
1023 define <8 x double> @ceil_v8f64_mask_load(ptr %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
1024 ; CHECK-LABEL: ceil_v8f64_mask_load:
1026 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1027 ; CHECK-NEXT: vrndscalepd $10, (%rdi), %zmm0 {%k1}
1029 %c = icmp eq <8 x i64> %cmp, zeroinitializer
1030 %p = load <8 x double>, ptr %ptr
1031 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
1032 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
1036 define <16 x float> @ceil_v16f32_mask_load(ptr %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
1037 ; CHECK-LABEL: ceil_v16f32_mask_load:
1039 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
1040 ; CHECK-NEXT: vrndscaleps $10, (%rdi), %zmm0 {%k1}
1042 %c = icmp eq <16 x i32> %cmp, zeroinitializer
1043 %p = load <16 x float>, ptr %ptr
1044 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
1045 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
1049 define <2 x double> @ceil_v2f64_maskz_load(ptr %ptr, <2 x i64> %cmp) {
1050 ; CHECK-LABEL: ceil_v2f64_maskz_load:
1052 ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1
1053 ; CHECK-NEXT: vrndscalepd $10, (%rdi), %xmm0 {%k1} {z}
1055 %c = icmp eq <2 x i64> %cmp, zeroinitializer
1056 %p = load <2 x double>, ptr %ptr
1057 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
1058 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
1062 define <4 x float> @ceil_v4f32_maskz_load(ptr %ptr, <4 x i32> %cmp) {
1063 ; CHECK-LABEL: ceil_v4f32_maskz_load:
1065 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1
1066 ; CHECK-NEXT: vrndscaleps $10, (%rdi), %xmm0 {%k1} {z}
1068 %c = icmp eq <4 x i32> %cmp, zeroinitializer
1069 %p = load <4 x float>, ptr %ptr
1070 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
1071 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
1075 define <4 x double> @ceil_v4f64_maskz_load(ptr %ptr, <4 x i64> %cmp) {
1076 ; CHECK-LABEL: ceil_v4f64_maskz_load:
1078 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1
1079 ; CHECK-NEXT: vrndscalepd $10, (%rdi), %ymm0 {%k1} {z}
1081 %c = icmp eq <4 x i64> %cmp, zeroinitializer
1082 %p = load <4 x double>, ptr %ptr
1083 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
1084 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
1088 define <8 x float> @ceil_v8f32_maskz_load(ptr %ptr, <8 x i32> %cmp) {
1089 ; CHECK-LABEL: ceil_v8f32_maskz_load:
1091 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
1092 ; CHECK-NEXT: vrndscaleps $10, (%rdi), %ymm0 {%k1} {z}
1094 %c = icmp eq <8 x i32> %cmp, zeroinitializer
1095 %p = load <8 x float>, ptr %ptr
1096 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
1097 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
1101 define <8 x double> @ceil_v8f64_maskz_load(ptr %ptr, <8 x i64> %cmp) {
1102 ; CHECK-LABEL: ceil_v8f64_maskz_load:
1104 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
1105 ; CHECK-NEXT: vrndscalepd $10, (%rdi), %zmm0 {%k1} {z}
1107 %c = icmp eq <8 x i64> %cmp, zeroinitializer
1108 %p = load <8 x double>, ptr %ptr
1109 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
1110 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
1114 define <16 x float> @ceil_v16f32_maskz_load(ptr %ptr, <16 x i32> %cmp) {
1115 ; CHECK-LABEL: ceil_v16f32_maskz_load:
1117 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
1118 ; CHECK-NEXT: vrndscaleps $10, (%rdi), %zmm0 {%k1} {z}
1120 %c = icmp eq <16 x i32> %cmp, zeroinitializer
1121 %p = load <16 x float>, ptr %ptr
1122 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
1123 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
1127 define <2 x double> @ceil_v2f64_broadcast(ptr %ptr) {
1128 ; CHECK-LABEL: ceil_v2f64_broadcast:
1130 ; CHECK-NEXT: vrndscalepd $10, (%rdi){1to2}, %xmm0
1132 %ps = load double, ptr %ptr
1133 %pins = insertelement <2 x double> undef, double %ps, i32 0
1134 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
1135 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
1139 define <4 x float> @ceil_v4f32_broadcast(ptr %ptr) {
1140 ; CHECK-LABEL: ceil_v4f32_broadcast:
1142 ; CHECK-NEXT: vrndscaleps $10, (%rdi){1to4}, %xmm0
1144 %ps = load float, ptr %ptr
1145 %pins = insertelement <4 x float> undef, float %ps, i32 0
1146 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
1147 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
1151 define <4 x double> @ceil_v4f64_broadcast(ptr %ptr){
1152 ; CHECK-LABEL: ceil_v4f64_broadcast:
1154 ; CHECK-NEXT: vrndscalepd $10, (%rdi){1to4}, %ymm0
1156 %ps = load double, ptr %ptr
1157 %pins = insertelement <4 x double> undef, double %ps, i32 0
1158 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
1159 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
1163 define <8 x float> @ceil_v8f32_broadcast(ptr %ptr) {
1164 ; CHECK-LABEL: ceil_v8f32_broadcast:
1166 ; CHECK-NEXT: vrndscaleps $10, (%rdi){1to8}, %ymm0
1168 %ps = load float, ptr %ptr
1169 %pins = insertelement <8 x float> undef, float %ps, i32 0
1170 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
1171 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
1175 define <8 x double> @ceil_v8f64_broadcast(ptr %ptr){
1176 ; CHECK-LABEL: ceil_v8f64_broadcast:
1178 ; CHECK-NEXT: vrndscalepd $10, (%rdi){1to8}, %zmm0
1180 %ps = load double, ptr %ptr
1181 %pins = insertelement <8 x double> undef, double %ps, i32 0
1182 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
1183 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
1187 define <16 x float> @ceil_v16f32_broadcast(ptr %ptr) {
1188 ; CHECK-LABEL: ceil_v16f32_broadcast:
1190 ; CHECK-NEXT: vrndscaleps $10, (%rdi){1to16}, %zmm0
1192 %ps = load float, ptr %ptr
1193 %pins = insertelement <16 x float> undef, float %ps, i32 0
1194 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
1195 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
1199 define <2 x double> @ceil_v2f64_mask_broadcast(ptr %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
1200 ; CHECK-LABEL: ceil_v2f64_mask_broadcast:
1202 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1
1203 ; CHECK-NEXT: vrndscalepd $10, (%rdi){1to2}, %xmm0 {%k1}
1205 %c = icmp eq <2 x i64> %cmp, zeroinitializer
1206 %ps = load double, ptr %ptr
1207 %pins = insertelement <2 x double> undef, double %ps, i32 0
1208 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
1209 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
1210 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
1214 define <4 x float> @ceil_v4f32_mask_broadcast(ptr %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
1215 ; CHECK-LABEL: ceil_v4f32_mask_broadcast:
1217 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
1218 ; CHECK-NEXT: vrndscaleps $10, (%rdi){1to4}, %xmm0 {%k1}
1220 %c = icmp eq <4 x i32> %cmp, zeroinitializer
1221 %ps = load float, ptr %ptr
1222 %pins = insertelement <4 x float> undef, float %ps, i32 0
1223 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
1224 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
1225 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
1229 define <4 x double> @ceil_v4f64_mask_broadcast(ptr %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
1230 ; CHECK-LABEL: ceil_v4f64_mask_broadcast:
1232 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
1233 ; CHECK-NEXT: vrndscalepd $10, (%rdi){1to4}, %ymm0 {%k1}
1235 %c = icmp eq <4 x i64> %cmp, zeroinitializer
1236 %ps = load double, ptr %ptr
1237 %pins = insertelement <4 x double> undef, double %ps, i32 0
1238 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
1239 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
1240 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
1244 define <8 x float> @ceil_v8f32_mask_broadcast(ptr %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
1245 ; CHECK-LABEL: ceil_v8f32_mask_broadcast:
1247 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
1248 ; CHECK-NEXT: vrndscaleps $10, (%rdi){1to8}, %ymm0 {%k1}
1250 %c = icmp eq <8 x i32> %cmp, zeroinitializer
1251 %ps = load float, ptr %ptr
1252 %pins = insertelement <8 x float> undef, float %ps, i32 0
1253 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
1254 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
1255 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
1259 define <8 x double> @ceil_v8f64_mask_broadcast(ptr %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
1260 ; CHECK-LABEL: ceil_v8f64_mask_broadcast:
1262 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1263 ; CHECK-NEXT: vrndscalepd $10, (%rdi){1to8}, %zmm0 {%k1}
1265 %c = icmp eq <8 x i64> %cmp, zeroinitializer
1266 %ps = load double, ptr %ptr
1267 %pins = insertelement <8 x double> undef, double %ps, i32 0
1268 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
1269 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
1270 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
1274 define <16 x float> @ceil_v16f32_mask_broadcast(ptr %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
1275 ; CHECK-LABEL: ceil_v16f32_mask_broadcast:
1277 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
1278 ; CHECK-NEXT: vrndscaleps $10, (%rdi){1to16}, %zmm0 {%k1}
1280 %c = icmp eq <16 x i32> %cmp, zeroinitializer
1281 %ps = load float, ptr %ptr
1282 %pins = insertelement <16 x float> undef, float %ps, i32 0
1283 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
1284 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
1285 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
1289 define <2 x double> @ceil_v2f64_maskz_broadcast(ptr %ptr, <2 x i64> %cmp) {
1290 ; CHECK-LABEL: ceil_v2f64_maskz_broadcast:
1292 ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1
1293 ; CHECK-NEXT: vrndscalepd $10, (%rdi){1to2}, %xmm0 {%k1} {z}
1295 %c = icmp eq <2 x i64> %cmp, zeroinitializer
1296 %ps = load double, ptr %ptr
1297 %pins = insertelement <2 x double> undef, double %ps, i32 0
1298 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
1299 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
1300 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
1304 define <4 x float> @ceil_v4f32_maskz_broadcast(ptr %ptr, <4 x i32> %cmp) {
1305 ; CHECK-LABEL: ceil_v4f32_maskz_broadcast:
1307 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1
1308 ; CHECK-NEXT: vrndscaleps $10, (%rdi){1to4}, %xmm0 {%k1} {z}
1310 %c = icmp eq <4 x i32> %cmp, zeroinitializer
1311 %ps = load float, ptr %ptr
1312 %pins = insertelement <4 x float> undef, float %ps, i32 0
1313 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
1314 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
1315 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
1319 define <4 x double> @ceil_v4f64_maskz_broadcast(ptr %ptr, <4 x i64> %cmp) {
1320 ; CHECK-LABEL: ceil_v4f64_maskz_broadcast:
1322 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1
1323 ; CHECK-NEXT: vrndscalepd $10, (%rdi){1to4}, %ymm0 {%k1} {z}
1325 %c = icmp eq <4 x i64> %cmp, zeroinitializer
1326 %ps = load double, ptr %ptr
1327 %pins = insertelement <4 x double> undef, double %ps, i32 0
1328 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
1329 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
1330 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
1334 define <8 x float> @ceil_v8f32_maskz_broadcast(ptr %ptr, <8 x i32> %cmp) {
1335 ; CHECK-LABEL: ceil_v8f32_maskz_broadcast:
1337 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
1338 ; CHECK-NEXT: vrndscaleps $10, (%rdi){1to8}, %ymm0 {%k1} {z}
1340 %c = icmp eq <8 x i32> %cmp, zeroinitializer
1341 %ps = load float, ptr %ptr
1342 %pins = insertelement <8 x float> undef, float %ps, i32 0
1343 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
1344 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
1345 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
1349 define <8 x double> @ceil_v8f64_maskz_broadcast(ptr %ptr, <8 x i64> %cmp) {
1350 ; CHECK-LABEL: ceil_v8f64_maskz_broadcast:
1352 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
1353 ; CHECK-NEXT: vrndscalepd $10, (%rdi){1to8}, %zmm0 {%k1} {z}
1355 %c = icmp eq <8 x i64> %cmp, zeroinitializer
1356 %ps = load double, ptr %ptr
1357 %pins = insertelement <8 x double> undef, double %ps, i32 0
1358 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
1359 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
1360 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
1364 define <16 x float> @ceil_v16f32_maskz_broadcast(ptr %ptr, <16 x i32> %cmp) {
1365 ; CHECK-LABEL: ceil_v16f32_maskz_broadcast:
1367 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
1368 ; CHECK-NEXT: vrndscaleps $10, (%rdi){1to16}, %zmm0 {%k1} {z}
1370 %c = icmp eq <16 x i32> %cmp, zeroinitializer
1371 %ps = load float, ptr %ptr
1372 %pins = insertelement <16 x float> undef, float %ps, i32 0
1373 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
1374 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
1375 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
1379 define <2 x double> @trunc_v2f64(<2 x double> %p) {
1380 ; CHECK-LABEL: trunc_v2f64:
1382 ; CHECK-NEXT: vroundpd $11, %xmm0, %xmm0
1384 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
1388 define <4 x float> @trunc_v4f32(<4 x float> %p) {
1389 ; CHECK-LABEL: trunc_v4f32:
1391 ; CHECK-NEXT: vroundps $11, %xmm0, %xmm0
1393 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
1397 define <4 x double> @trunc_v4f64(<4 x double> %p){
1398 ; CHECK-LABEL: trunc_v4f64:
1400 ; CHECK-NEXT: vroundpd $11, %ymm0, %ymm0
1402 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
1406 define <8 x float> @trunc_v8f32(<8 x float> %p) {
1407 ; CHECK-LABEL: trunc_v8f32:
1409 ; CHECK-NEXT: vroundps $11, %ymm0, %ymm0
1411 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
1415 define <8 x double> @trunc_v8f64(<8 x double> %p){
1416 ; CHECK-LABEL: trunc_v8f64:
1418 ; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0
1420 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
1424 define <16 x float> @trunc_v16f32(<16 x float> %p) {
1425 ; CHECK-LABEL: trunc_v16f32:
1427 ; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0
1429 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
1433 define <2 x double> @trunc_v2f64_load(ptr %ptr) {
1434 ; CHECK-LABEL: trunc_v2f64_load:
1436 ; CHECK-NEXT: vroundpd $11, (%rdi), %xmm0
1438 %p = load <2 x double>, ptr %ptr
1439 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
1443 define <4 x float> @trunc_v4f32_load(ptr %ptr) {
1444 ; CHECK-LABEL: trunc_v4f32_load:
1446 ; CHECK-NEXT: vroundps $11, (%rdi), %xmm0
1448 %p = load <4 x float>, ptr %ptr
1449 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
1453 define <4 x double> @trunc_v4f64_load(ptr %ptr){
1454 ; CHECK-LABEL: trunc_v4f64_load:
1456 ; CHECK-NEXT: vroundpd $11, (%rdi), %ymm0
1458 %p = load <4 x double>, ptr %ptr
1459 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
1463 define <8 x float> @trunc_v8f32_load(ptr %ptr) {
1464 ; CHECK-LABEL: trunc_v8f32_load:
1466 ; CHECK-NEXT: vroundps $11, (%rdi), %ymm0
1468 %p = load <8 x float>, ptr %ptr
1469 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
1473 define <8 x double> @trunc_v8f64_load(ptr %ptr){
1474 ; CHECK-LABEL: trunc_v8f64_load:
1476 ; CHECK-NEXT: vrndscalepd $11, (%rdi), %zmm0
1478 %p = load <8 x double>, ptr %ptr
1479 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
1483 define <16 x float> @trunc_v16f32_load(ptr %ptr) {
1484 ; CHECK-LABEL: trunc_v16f32_load:
1486 ; CHECK-NEXT: vrndscaleps $11, (%rdi), %zmm0
1488 %p = load <16 x float>, ptr %ptr
1489 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
1493 define <2 x double> @trunc_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) {
1494 ; CHECK-LABEL: trunc_v2f64_mask:
1496 ; CHECK-NEXT: vptestnmq %xmm2, %xmm2, %k1
1497 ; CHECK-NEXT: vrndscalepd $11, %xmm0, %xmm1 {%k1}
1498 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
1500 %c = icmp eq <2 x i64> %cmp, zeroinitializer
1501 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
1502 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
1506 define <4 x float> @trunc_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) {
1507 ; CHECK-LABEL: trunc_v4f32_mask:
1509 ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1
1510 ; CHECK-NEXT: vrndscaleps $11, %xmm0, %xmm1 {%k1}
1511 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
1513 %c = icmp eq <4 x i32> %cmp, zeroinitializer
1514 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
1515 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
1519 define <4 x double> @trunc_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) {
1520 ; CHECK-LABEL: trunc_v4f64_mask:
1522 ; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1
1523 ; CHECK-NEXT: vrndscalepd $11, %ymm0, %ymm1 {%k1}
1524 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
1526 %c = icmp eq <4 x i64> %cmp, zeroinitializer
1527 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
1528 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
1532 define <8 x float> @trunc_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) {
1533 ; CHECK-LABEL: trunc_v8f32_mask:
1535 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
1536 ; CHECK-NEXT: vrndscaleps $11, %ymm0, %ymm1 {%k1}
1537 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
1539 %c = icmp eq <8 x i32> %cmp, zeroinitializer
1540 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
1541 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
1545 define <8 x double> @trunc_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) {
1546 ; CHECK-LABEL: trunc_v8f64_mask:
1548 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1
1549 ; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm1 {%k1}
1550 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
1552 %c = icmp eq <8 x i64> %cmp, zeroinitializer
1553 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
1554 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
1558 define <16 x float> @trunc_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) {
1559 ; CHECK-LABEL: trunc_v16f32_mask:
1561 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
1562 ; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm1 {%k1}
1563 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1565 %c = icmp eq <16 x i32> %cmp, zeroinitializer
1566 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
1567 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
1571 define <2 x double> @trunc_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) {
1572 ; CHECK-LABEL: trunc_v2f64_maskz:
1574 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1
1575 ; CHECK-NEXT: vrndscalepd $11, %xmm0, %xmm0 {%k1} {z}
1577 %c = icmp eq <2 x i64> %cmp, zeroinitializer
1578 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
1579 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
1583 define <4 x float> @trunc_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) {
1584 ; CHECK-LABEL: trunc_v4f32_maskz:
1586 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
1587 ; CHECK-NEXT: vrndscaleps $11, %xmm0, %xmm0 {%k1} {z}
1589 %c = icmp eq <4 x i32> %cmp, zeroinitializer
1590 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
1591 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
1595 define <4 x double> @trunc_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) {
1596 ; CHECK-LABEL: trunc_v4f64_maskz:
1598 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
1599 ; CHECK-NEXT: vrndscalepd $11, %ymm0, %ymm0 {%k1} {z}
1601 %c = icmp eq <4 x i64> %cmp, zeroinitializer
1602 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
1603 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
1607 define <8 x float> @trunc_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) {
1608 ; CHECK-LABEL: trunc_v8f32_maskz:
1610 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
1611 ; CHECK-NEXT: vrndscaleps $11, %ymm0, %ymm0 {%k1} {z}
1613 %c = icmp eq <8 x i32> %cmp, zeroinitializer
1614 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
1615 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
1619 define <8 x double> @trunc_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) {
1620 ; CHECK-LABEL: trunc_v8f64_maskz:
1622 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1623 ; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0 {%k1} {z}
1625 %c = icmp eq <8 x i64> %cmp, zeroinitializer
1626 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
1627 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
1631 define <16 x float> @trunc_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) {
1632 ; CHECK-LABEL: trunc_v16f32_maskz:
1634 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
1635 ; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0 {%k1} {z}
1637 %c = icmp eq <16 x i32> %cmp, zeroinitializer
1638 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
1639 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
1643 define <2 x double> @trunc_v2f64_mask_load(ptr %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
1644 ; CHECK-LABEL: trunc_v2f64_mask_load:
1646 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1
1647 ; CHECK-NEXT: vrndscalepd $11, (%rdi), %xmm0 {%k1}
1649 %c = icmp eq <2 x i64> %cmp, zeroinitializer
1650 %p = load <2 x double>, ptr %ptr
1651 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
1652 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
1656 define <4 x float> @trunc_v4f32_mask_load(ptr %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
1657 ; CHECK-LABEL: trunc_v4f32_mask_load:
1659 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
1660 ; CHECK-NEXT: vrndscaleps $11, (%rdi), %xmm0 {%k1}
1662 %c = icmp eq <4 x i32> %cmp, zeroinitializer
1663 %p = load <4 x float>, ptr %ptr
1664 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
1665 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
1669 define <4 x double> @trunc_v4f64_mask_load(ptr %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
1670 ; CHECK-LABEL: trunc_v4f64_mask_load:
1672 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
1673 ; CHECK-NEXT: vrndscalepd $11, (%rdi), %ymm0 {%k1}
1675 %c = icmp eq <4 x i64> %cmp, zeroinitializer
1676 %p = load <4 x double>, ptr %ptr
1677 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
1678 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
1682 define <8 x float> @trunc_v8f32_mask_load(ptr %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
1683 ; CHECK-LABEL: trunc_v8f32_mask_load:
1685 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
1686 ; CHECK-NEXT: vrndscaleps $11, (%rdi), %ymm0 {%k1}
1688 %c = icmp eq <8 x i32> %cmp, zeroinitializer
1689 %p = load <8 x float>, ptr %ptr
1690 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
1691 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
1695 define <8 x double> @trunc_v8f64_mask_load(ptr %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
1696 ; CHECK-LABEL: trunc_v8f64_mask_load:
1698 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1699 ; CHECK-NEXT: vrndscalepd $11, (%rdi), %zmm0 {%k1}
1701 %c = icmp eq <8 x i64> %cmp, zeroinitializer
1702 %p = load <8 x double>, ptr %ptr
1703 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
1704 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
1708 define <16 x float> @trunc_v16f32_mask_load(ptr %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
1709 ; CHECK-LABEL: trunc_v16f32_mask_load:
1711 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
1712 ; CHECK-NEXT: vrndscaleps $11, (%rdi), %zmm0 {%k1}
1714 %c = icmp eq <16 x i32> %cmp, zeroinitializer
1715 %p = load <16 x float>, ptr %ptr
1716 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
1717 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
1721 define <2 x double> @trunc_v2f64_maskz_load(ptr %ptr, <2 x i64> %cmp) {
1722 ; CHECK-LABEL: trunc_v2f64_maskz_load:
1724 ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1
1725 ; CHECK-NEXT: vrndscalepd $11, (%rdi), %xmm0 {%k1} {z}
1727 %c = icmp eq <2 x i64> %cmp, zeroinitializer
1728 %p = load <2 x double>, ptr %ptr
1729 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
1730 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
1734 define <4 x float> @trunc_v4f32_maskz_load(ptr %ptr, <4 x i32> %cmp) {
1735 ; CHECK-LABEL: trunc_v4f32_maskz_load:
1737 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1
1738 ; CHECK-NEXT: vrndscaleps $11, (%rdi), %xmm0 {%k1} {z}
1740 %c = icmp eq <4 x i32> %cmp, zeroinitializer
1741 %p = load <4 x float>, ptr %ptr
1742 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
1743 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
1747 define <4 x double> @trunc_v4f64_maskz_load(ptr %ptr, <4 x i64> %cmp) {
1748 ; CHECK-LABEL: trunc_v4f64_maskz_load:
1750 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1
1751 ; CHECK-NEXT: vrndscalepd $11, (%rdi), %ymm0 {%k1} {z}
1753 %c = icmp eq <4 x i64> %cmp, zeroinitializer
1754 %p = load <4 x double>, ptr %ptr
1755 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
1756 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
1760 define <8 x float> @trunc_v8f32_maskz_load(ptr %ptr, <8 x i32> %cmp) {
1761 ; CHECK-LABEL: trunc_v8f32_maskz_load:
1763 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
1764 ; CHECK-NEXT: vrndscaleps $11, (%rdi), %ymm0 {%k1} {z}
1766 %c = icmp eq <8 x i32> %cmp, zeroinitializer
1767 %p = load <8 x float>, ptr %ptr
1768 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
1769 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
1773 define <8 x double> @trunc_v8f64_maskz_load(ptr %ptr, <8 x i64> %cmp) {
1774 ; CHECK-LABEL: trunc_v8f64_maskz_load:
1776 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
1777 ; CHECK-NEXT: vrndscalepd $11, (%rdi), %zmm0 {%k1} {z}
1779 %c = icmp eq <8 x i64> %cmp, zeroinitializer
1780 %p = load <8 x double>, ptr %ptr
1781 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
1782 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
1786 define <16 x float> @trunc_v16f32_maskz_load(ptr %ptr, <16 x i32> %cmp) {
1787 ; CHECK-LABEL: trunc_v16f32_maskz_load:
1789 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
1790 ; CHECK-NEXT: vrndscaleps $11, (%rdi), %zmm0 {%k1} {z}
1792 %c = icmp eq <16 x i32> %cmp, zeroinitializer
1793 %p = load <16 x float>, ptr %ptr
1794 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
1795 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
1799 define <2 x double> @trunc_v2f64_broadcast(ptr %ptr) {
1800 ; CHECK-LABEL: trunc_v2f64_broadcast:
1802 ; CHECK-NEXT: vrndscalepd $11, (%rdi){1to2}, %xmm0
1804 %ps = load double, ptr %ptr
1805 %pins = insertelement <2 x double> undef, double %ps, i32 0
1806 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
1807 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
1811 define <4 x float> @trunc_v4f32_broadcast(ptr %ptr) {
1812 ; CHECK-LABEL: trunc_v4f32_broadcast:
1814 ; CHECK-NEXT: vrndscaleps $11, (%rdi){1to4}, %xmm0
1816 %ps = load float, ptr %ptr
1817 %pins = insertelement <4 x float> undef, float %ps, i32 0
1818 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
1819 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
1823 define <4 x double> @trunc_v4f64_broadcast(ptr %ptr){
1824 ; CHECK-LABEL: trunc_v4f64_broadcast:
1826 ; CHECK-NEXT: vrndscalepd $11, (%rdi){1to4}, %ymm0
1828 %ps = load double, ptr %ptr
1829 %pins = insertelement <4 x double> undef, double %ps, i32 0
1830 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
1831 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
1835 define <8 x float> @trunc_v8f32_broadcast(ptr %ptr) {
1836 ; CHECK-LABEL: trunc_v8f32_broadcast:
1838 ; CHECK-NEXT: vrndscaleps $11, (%rdi){1to8}, %ymm0
1840 %ps = load float, ptr %ptr
1841 %pins = insertelement <8 x float> undef, float %ps, i32 0
1842 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
1843 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
1847 define <8 x double> @trunc_v8f64_broadcast(ptr %ptr){
1848 ; CHECK-LABEL: trunc_v8f64_broadcast:
1850 ; CHECK-NEXT: vrndscalepd $11, (%rdi){1to8}, %zmm0
1852 %ps = load double, ptr %ptr
1853 %pins = insertelement <8 x double> undef, double %ps, i32 0
1854 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
1855 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
1859 define <16 x float> @trunc_v16f32_broadcast(ptr %ptr) {
1860 ; CHECK-LABEL: trunc_v16f32_broadcast:
1862 ; CHECK-NEXT: vrndscaleps $11, (%rdi){1to16}, %zmm0
1864 %ps = load float, ptr %ptr
1865 %pins = insertelement <16 x float> undef, float %ps, i32 0
1866 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
1867 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
1871 define <2 x double> @trunc_v2f64_mask_broadcast(ptr %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
1872 ; CHECK-LABEL: trunc_v2f64_mask_broadcast:
1874 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1
1875 ; CHECK-NEXT: vrndscalepd $11, (%rdi){1to2}, %xmm0 {%k1}
1877 %c = icmp eq <2 x i64> %cmp, zeroinitializer
1878 %ps = load double, ptr %ptr
1879 %pins = insertelement <2 x double> undef, double %ps, i32 0
1880 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
1881 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
1882 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
1886 define <4 x float> @trunc_v4f32_mask_broadcast(ptr %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
1887 ; CHECK-LABEL: trunc_v4f32_mask_broadcast:
1889 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
1890 ; CHECK-NEXT: vrndscaleps $11, (%rdi){1to4}, %xmm0 {%k1}
1892 %c = icmp eq <4 x i32> %cmp, zeroinitializer
1893 %ps = load float, ptr %ptr
1894 %pins = insertelement <4 x float> undef, float %ps, i32 0
1895 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
1896 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
1897 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
1901 define <4 x double> @trunc_v4f64_mask_broadcast(ptr %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
1902 ; CHECK-LABEL: trunc_v4f64_mask_broadcast:
1904 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
1905 ; CHECK-NEXT: vrndscalepd $11, (%rdi){1to4}, %ymm0 {%k1}
1907 %c = icmp eq <4 x i64> %cmp, zeroinitializer
1908 %ps = load double, ptr %ptr
1909 %pins = insertelement <4 x double> undef, double %ps, i32 0
1910 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
1911 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
1912 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
1916 define <8 x float> @trunc_v8f32_mask_broadcast(ptr %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
1917 ; CHECK-LABEL: trunc_v8f32_mask_broadcast:
1919 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
1920 ; CHECK-NEXT: vrndscaleps $11, (%rdi){1to8}, %ymm0 {%k1}
1922 %c = icmp eq <8 x i32> %cmp, zeroinitializer
1923 %ps = load float, ptr %ptr
1924 %pins = insertelement <8 x float> undef, float %ps, i32 0
1925 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
1926 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
1927 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
1931 define <8 x double> @trunc_v8f64_mask_broadcast(ptr %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
1932 ; CHECK-LABEL: trunc_v8f64_mask_broadcast:
1934 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1935 ; CHECK-NEXT: vrndscalepd $11, (%rdi){1to8}, %zmm0 {%k1}
1937 %c = icmp eq <8 x i64> %cmp, zeroinitializer
1938 %ps = load double, ptr %ptr
1939 %pins = insertelement <8 x double> undef, double %ps, i32 0
1940 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
1941 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
1942 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
1946 define <16 x float> @trunc_v16f32_mask_broadcast(ptr %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
1947 ; CHECK-LABEL: trunc_v16f32_mask_broadcast:
1949 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
1950 ; CHECK-NEXT: vrndscaleps $11, (%rdi){1to16}, %zmm0 {%k1}
1952 %c = icmp eq <16 x i32> %cmp, zeroinitializer
1953 %ps = load float, ptr %ptr
1954 %pins = insertelement <16 x float> undef, float %ps, i32 0
1955 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
1956 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
1957 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
1961 define <2 x double> @trunc_v2f64_maskz_broadcast(ptr %ptr, <2 x i64> %cmp) {
1962 ; CHECK-LABEL: trunc_v2f64_maskz_broadcast:
1964 ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1
1965 ; CHECK-NEXT: vrndscalepd $11, (%rdi){1to2}, %xmm0 {%k1} {z}
1967 %c = icmp eq <2 x i64> %cmp, zeroinitializer
1968 %ps = load double, ptr %ptr
1969 %pins = insertelement <2 x double> undef, double %ps, i32 0
1970 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
1971 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
1972 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
1976 define <4 x float> @trunc_v4f32_maskz_broadcast(ptr %ptr, <4 x i32> %cmp) {
1977 ; CHECK-LABEL: trunc_v4f32_maskz_broadcast:
1979 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1
1980 ; CHECK-NEXT: vrndscaleps $11, (%rdi){1to4}, %xmm0 {%k1} {z}
1982 %c = icmp eq <4 x i32> %cmp, zeroinitializer
1983 %ps = load float, ptr %ptr
1984 %pins = insertelement <4 x float> undef, float %ps, i32 0
1985 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
1986 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
1987 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
1991 define <4 x double> @trunc_v4f64_maskz_broadcast(ptr %ptr, <4 x i64> %cmp) {
1992 ; CHECK-LABEL: trunc_v4f64_maskz_broadcast:
1994 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1
1995 ; CHECK-NEXT: vrndscalepd $11, (%rdi){1to4}, %ymm0 {%k1} {z}
1997 %c = icmp eq <4 x i64> %cmp, zeroinitializer
1998 %ps = load double, ptr %ptr
1999 %pins = insertelement <4 x double> undef, double %ps, i32 0
2000 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
2001 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
2002 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
2006 define <8 x float> @trunc_v8f32_maskz_broadcast(ptr %ptr, <8 x i32> %cmp) {
2007 ; CHECK-LABEL: trunc_v8f32_maskz_broadcast:
2009 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
2010 ; CHECK-NEXT: vrndscaleps $11, (%rdi){1to8}, %ymm0 {%k1} {z}
2012 %c = icmp eq <8 x i32> %cmp, zeroinitializer
2013 %ps = load float, ptr %ptr
2014 %pins = insertelement <8 x float> undef, float %ps, i32 0
2015 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
2016 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
2017 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
2021 define <8 x double> @trunc_v8f64_maskz_broadcast(ptr %ptr, <8 x i64> %cmp) {
2022 ; CHECK-LABEL: trunc_v8f64_maskz_broadcast:
2024 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
2025 ; CHECK-NEXT: vrndscalepd $11, (%rdi){1to8}, %zmm0 {%k1} {z}
2027 %c = icmp eq <8 x i64> %cmp, zeroinitializer
2028 %ps = load double, ptr %ptr
2029 %pins = insertelement <8 x double> undef, double %ps, i32 0
2030 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
2031 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
2032 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
2036 define <16 x float> @trunc_v16f32_maskz_broadcast(ptr %ptr, <16 x i32> %cmp) {
2037 ; CHECK-LABEL: trunc_v16f32_maskz_broadcast:
2039 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
2040 ; CHECK-NEXT: vrndscaleps $11, (%rdi){1to16}, %zmm0 {%k1} {z}
2042 %c = icmp eq <16 x i32> %cmp, zeroinitializer
2043 %ps = load float, ptr %ptr
2044 %pins = insertelement <16 x float> undef, float %ps, i32 0
2045 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
2046 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
2047 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
2051 define <2 x double> @rint_v2f64(<2 x double> %p) {
2052 ; CHECK-LABEL: rint_v2f64:
2054 ; CHECK-NEXT: vroundpd $4, %xmm0, %xmm0
2056 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
2060 define <4 x float> @rint_v4f32(<4 x float> %p) {
2061 ; CHECK-LABEL: rint_v4f32:
2063 ; CHECK-NEXT: vroundps $4, %xmm0, %xmm0
2065 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
2069 define <4 x double> @rint_v4f64(<4 x double> %p){
2070 ; CHECK-LABEL: rint_v4f64:
2072 ; CHECK-NEXT: vroundpd $4, %ymm0, %ymm0
2074 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
2078 define <8 x float> @rint_v8f32(<8 x float> %p) {
2079 ; CHECK-LABEL: rint_v8f32:
2081 ; CHECK-NEXT: vroundps $4, %ymm0, %ymm0
2083 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
2087 define <8 x double> @rint_v8f64(<8 x double> %p){
2088 ; CHECK-LABEL: rint_v8f64:
2090 ; CHECK-NEXT: vrndscalepd $4, %zmm0, %zmm0
2092 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
2096 define <16 x float> @rint_v16f32(<16 x float> %p) {
2097 ; CHECK-LABEL: rint_v16f32:
2099 ; CHECK-NEXT: vrndscaleps $4, %zmm0, %zmm0
2101 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
2105 define <2 x double> @rint_v2f64_load(ptr %ptr) {
2106 ; CHECK-LABEL: rint_v2f64_load:
2108 ; CHECK-NEXT: vroundpd $4, (%rdi), %xmm0
2110 %p = load <2 x double>, ptr %ptr
2111 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
2115 define <4 x float> @rint_v4f32_load(ptr %ptr) {
2116 ; CHECK-LABEL: rint_v4f32_load:
2118 ; CHECK-NEXT: vroundps $4, (%rdi), %xmm0
2120 %p = load <4 x float>, ptr %ptr
2121 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
2125 define <4 x double> @rint_v4f64_load(ptr %ptr){
2126 ; CHECK-LABEL: rint_v4f64_load:
2128 ; CHECK-NEXT: vroundpd $4, (%rdi), %ymm0
2130 %p = load <4 x double>, ptr %ptr
2131 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
2135 define <8 x float> @rint_v8f32_load(ptr %ptr) {
2136 ; CHECK-LABEL: rint_v8f32_load:
2138 ; CHECK-NEXT: vroundps $4, (%rdi), %ymm0
2140 %p = load <8 x float>, ptr %ptr
2141 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
2145 define <8 x double> @rint_v8f64_load(ptr %ptr){
2146 ; CHECK-LABEL: rint_v8f64_load:
2148 ; CHECK-NEXT: vrndscalepd $4, (%rdi), %zmm0
2150 %p = load <8 x double>, ptr %ptr
2151 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
2155 define <16 x float> @rint_v16f32_load(ptr %ptr) {
2156 ; CHECK-LABEL: rint_v16f32_load:
2158 ; CHECK-NEXT: vrndscaleps $4, (%rdi), %zmm0
2160 %p = load <16 x float>, ptr %ptr
2161 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
2165 define <2 x double> @rint_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) {
2166 ; CHECK-LABEL: rint_v2f64_mask:
2168 ; CHECK-NEXT: vptestnmq %xmm2, %xmm2, %k1
2169 ; CHECK-NEXT: vrndscalepd $4, %xmm0, %xmm1 {%k1}
2170 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
2172 %c = icmp eq <2 x i64> %cmp, zeroinitializer
2173 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
2174 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
2178 define <4 x float> @rint_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) {
2179 ; CHECK-LABEL: rint_v4f32_mask:
2181 ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1
2182 ; CHECK-NEXT: vrndscaleps $4, %xmm0, %xmm1 {%k1}
2183 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
2185 %c = icmp eq <4 x i32> %cmp, zeroinitializer
2186 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
2187 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
2191 define <4 x double> @rint_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) {
2192 ; CHECK-LABEL: rint_v4f64_mask:
2194 ; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1
2195 ; CHECK-NEXT: vrndscalepd $4, %ymm0, %ymm1 {%k1}
2196 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
2198 %c = icmp eq <4 x i64> %cmp, zeroinitializer
2199 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
2200 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
2204 define <8 x float> @rint_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) {
2205 ; CHECK-LABEL: rint_v8f32_mask:
2207 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
2208 ; CHECK-NEXT: vrndscaleps $4, %ymm0, %ymm1 {%k1}
2209 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
2211 %c = icmp eq <8 x i32> %cmp, zeroinitializer
2212 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
2213 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
2217 define <8 x double> @rint_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) {
2218 ; CHECK-LABEL: rint_v8f64_mask:
2220 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1
2221 ; CHECK-NEXT: vrndscalepd $4, %zmm0, %zmm1 {%k1}
2222 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
2224 %c = icmp eq <8 x i64> %cmp, zeroinitializer
2225 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
2226 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
2230 define <16 x float> @rint_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) {
2231 ; CHECK-LABEL: rint_v16f32_mask:
2233 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
2234 ; CHECK-NEXT: vrndscaleps $4, %zmm0, %zmm1 {%k1}
2235 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2237 %c = icmp eq <16 x i32> %cmp, zeroinitializer
2238 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
2239 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
2243 define <2 x double> @rint_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) {
2244 ; CHECK-LABEL: rint_v2f64_maskz:
2246 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1
2247 ; CHECK-NEXT: vrndscalepd $4, %xmm0, %xmm0 {%k1} {z}
2249 %c = icmp eq <2 x i64> %cmp, zeroinitializer
2250 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
2251 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
2255 define <4 x float> @rint_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) {
2256 ; CHECK-LABEL: rint_v4f32_maskz:
2258 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
2259 ; CHECK-NEXT: vrndscaleps $4, %xmm0, %xmm0 {%k1} {z}
2261 %c = icmp eq <4 x i32> %cmp, zeroinitializer
2262 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
2263 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
2267 define <4 x double> @rint_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) {
2268 ; CHECK-LABEL: rint_v4f64_maskz:
2270 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
2271 ; CHECK-NEXT: vrndscalepd $4, %ymm0, %ymm0 {%k1} {z}
2273 %c = icmp eq <4 x i64> %cmp, zeroinitializer
2274 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
2275 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
2279 define <8 x float> @rint_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) {
2280 ; CHECK-LABEL: rint_v8f32_maskz:
2282 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
2283 ; CHECK-NEXT: vrndscaleps $4, %ymm0, %ymm0 {%k1} {z}
2285 %c = icmp eq <8 x i32> %cmp, zeroinitializer
2286 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
2287 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
2291 define <8 x double> @rint_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) {
2292 ; CHECK-LABEL: rint_v8f64_maskz:
2294 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
2295 ; CHECK-NEXT: vrndscalepd $4, %zmm0, %zmm0 {%k1} {z}
2297 %c = icmp eq <8 x i64> %cmp, zeroinitializer
2298 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
2299 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
2303 define <16 x float> @rint_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) {
2304 ; CHECK-LABEL: rint_v16f32_maskz:
2306 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
2307 ; CHECK-NEXT: vrndscaleps $4, %zmm0, %zmm0 {%k1} {z}
2309 %c = icmp eq <16 x i32> %cmp, zeroinitializer
2310 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
2311 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
2315 define <2 x double> @rint_v2f64_mask_load(ptr %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
2316 ; CHECK-LABEL: rint_v2f64_mask_load:
2318 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1
2319 ; CHECK-NEXT: vrndscalepd $4, (%rdi), %xmm0 {%k1}
2321 %c = icmp eq <2 x i64> %cmp, zeroinitializer
2322 %p = load <2 x double>, ptr %ptr
2323 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
2324 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
2328 define <4 x float> @rint_v4f32_mask_load(ptr %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
2329 ; CHECK-LABEL: rint_v4f32_mask_load:
2331 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
2332 ; CHECK-NEXT: vrndscaleps $4, (%rdi), %xmm0 {%k1}
2334 %c = icmp eq <4 x i32> %cmp, zeroinitializer
2335 %p = load <4 x float>, ptr %ptr
2336 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
2337 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
2341 define <4 x double> @rint_v4f64_mask_load(ptr %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
2342 ; CHECK-LABEL: rint_v4f64_mask_load:
2344 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
2345 ; CHECK-NEXT: vrndscalepd $4, (%rdi), %ymm0 {%k1}
2347 %c = icmp eq <4 x i64> %cmp, zeroinitializer
2348 %p = load <4 x double>, ptr %ptr
2349 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
2350 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
2354 define <8 x float> @rint_v8f32_mask_load(ptr %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
2355 ; CHECK-LABEL: rint_v8f32_mask_load:
2357 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
2358 ; CHECK-NEXT: vrndscaleps $4, (%rdi), %ymm0 {%k1}
2360 %c = icmp eq <8 x i32> %cmp, zeroinitializer
2361 %p = load <8 x float>, ptr %ptr
2362 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
2363 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
2367 define <8 x double> @rint_v8f64_mask_load(ptr %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
2368 ; CHECK-LABEL: rint_v8f64_mask_load:
2370 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
2371 ; CHECK-NEXT: vrndscalepd $4, (%rdi), %zmm0 {%k1}
2373 %c = icmp eq <8 x i64> %cmp, zeroinitializer
2374 %p = load <8 x double>, ptr %ptr
2375 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
2376 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
2380 define <16 x float> @rint_v16f32_mask_load(ptr %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
2381 ; CHECK-LABEL: rint_v16f32_mask_load:
2383 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
2384 ; CHECK-NEXT: vrndscaleps $4, (%rdi), %zmm0 {%k1}
2386 %c = icmp eq <16 x i32> %cmp, zeroinitializer
2387 %p = load <16 x float>, ptr %ptr
2388 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
2389 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
2393 define <2 x double> @rint_v2f64_maskz_load(ptr %ptr, <2 x i64> %cmp) {
2394 ; CHECK-LABEL: rint_v2f64_maskz_load:
2396 ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1
2397 ; CHECK-NEXT: vrndscalepd $4, (%rdi), %xmm0 {%k1} {z}
2399 %c = icmp eq <2 x i64> %cmp, zeroinitializer
2400 %p = load <2 x double>, ptr %ptr
2401 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
2402 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
2406 define <4 x float> @rint_v4f32_maskz_load(ptr %ptr, <4 x i32> %cmp) {
2407 ; CHECK-LABEL: rint_v4f32_maskz_load:
2409 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1
2410 ; CHECK-NEXT: vrndscaleps $4, (%rdi), %xmm0 {%k1} {z}
2412 %c = icmp eq <4 x i32> %cmp, zeroinitializer
2413 %p = load <4 x float>, ptr %ptr
2414 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
2415 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
2419 define <4 x double> @rint_v4f64_maskz_load(ptr %ptr, <4 x i64> %cmp) {
2420 ; CHECK-LABEL: rint_v4f64_maskz_load:
2422 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1
2423 ; CHECK-NEXT: vrndscalepd $4, (%rdi), %ymm0 {%k1} {z}
2425 %c = icmp eq <4 x i64> %cmp, zeroinitializer
2426 %p = load <4 x double>, ptr %ptr
2427 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
2428 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
2432 define <8 x float> @rint_v8f32_maskz_load(ptr %ptr, <8 x i32> %cmp) {
2433 ; CHECK-LABEL: rint_v8f32_maskz_load:
2435 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
2436 ; CHECK-NEXT: vrndscaleps $4, (%rdi), %ymm0 {%k1} {z}
2438 %c = icmp eq <8 x i32> %cmp, zeroinitializer
2439 %p = load <8 x float>, ptr %ptr
2440 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
2441 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
2445 define <8 x double> @rint_v8f64_maskz_load(ptr %ptr, <8 x i64> %cmp) {
2446 ; CHECK-LABEL: rint_v8f64_maskz_load:
2448 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
2449 ; CHECK-NEXT: vrndscalepd $4, (%rdi), %zmm0 {%k1} {z}
2451 %c = icmp eq <8 x i64> %cmp, zeroinitializer
2452 %p = load <8 x double>, ptr %ptr
2453 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
2454 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
2458 define <16 x float> @rint_v16f32_maskz_load(ptr %ptr, <16 x i32> %cmp) {
2459 ; CHECK-LABEL: rint_v16f32_maskz_load:
2461 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
2462 ; CHECK-NEXT: vrndscaleps $4, (%rdi), %zmm0 {%k1} {z}
2464 %c = icmp eq <16 x i32> %cmp, zeroinitializer
2465 %p = load <16 x float>, ptr %ptr
2466 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
2467 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
2471 define <2 x double> @rint_v2f64_broadcast(ptr %ptr) {
2472 ; CHECK-LABEL: rint_v2f64_broadcast:
2474 ; CHECK-NEXT: vrndscalepd $4, (%rdi){1to2}, %xmm0
2476 %ps = load double, ptr %ptr
2477 %pins = insertelement <2 x double> undef, double %ps, i32 0
2478 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
2479 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
2483 define <4 x float> @rint_v4f32_broadcast(ptr %ptr) {
2484 ; CHECK-LABEL: rint_v4f32_broadcast:
2486 ; CHECK-NEXT: vrndscaleps $4, (%rdi){1to4}, %xmm0
2488 %ps = load float, ptr %ptr
2489 %pins = insertelement <4 x float> undef, float %ps, i32 0
2490 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
2491 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
2495 define <4 x double> @rint_v4f64_broadcast(ptr %ptr){
2496 ; CHECK-LABEL: rint_v4f64_broadcast:
2498 ; CHECK-NEXT: vrndscalepd $4, (%rdi){1to4}, %ymm0
2500 %ps = load double, ptr %ptr
2501 %pins = insertelement <4 x double> undef, double %ps, i32 0
2502 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
2503 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
2507 define <8 x float> @rint_v8f32_broadcast(ptr %ptr) {
2508 ; CHECK-LABEL: rint_v8f32_broadcast:
2510 ; CHECK-NEXT: vrndscaleps $4, (%rdi){1to8}, %ymm0
2512 %ps = load float, ptr %ptr
2513 %pins = insertelement <8 x float> undef, float %ps, i32 0
2514 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
2515 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
2519 define <8 x double> @rint_v8f64_broadcast(ptr %ptr){
2520 ; CHECK-LABEL: rint_v8f64_broadcast:
2522 ; CHECK-NEXT: vrndscalepd $4, (%rdi){1to8}, %zmm0
2524 %ps = load double, ptr %ptr
2525 %pins = insertelement <8 x double> undef, double %ps, i32 0
2526 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
2527 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
2531 define <16 x float> @rint_v16f32_broadcast(ptr %ptr) {
2532 ; CHECK-LABEL: rint_v16f32_broadcast:
2534 ; CHECK-NEXT: vrndscaleps $4, (%rdi){1to16}, %zmm0
2536 %ps = load float, ptr %ptr
2537 %pins = insertelement <16 x float> undef, float %ps, i32 0
2538 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
2539 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
2543 define <2 x double> @rint_v2f64_mask_broadcast(ptr %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
2544 ; CHECK-LABEL: rint_v2f64_mask_broadcast:
2546 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1
2547 ; CHECK-NEXT: vrndscalepd $4, (%rdi){1to2}, %xmm0 {%k1}
2549 %c = icmp eq <2 x i64> %cmp, zeroinitializer
2550 %ps = load double, ptr %ptr
2551 %pins = insertelement <2 x double> undef, double %ps, i32 0
2552 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
2553 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
2554 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
2558 define <4 x float> @rint_v4f32_mask_broadcast(ptr %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
2559 ; CHECK-LABEL: rint_v4f32_mask_broadcast:
2561 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
2562 ; CHECK-NEXT: vrndscaleps $4, (%rdi){1to4}, %xmm0 {%k1}
2564 %c = icmp eq <4 x i32> %cmp, zeroinitializer
2565 %ps = load float, ptr %ptr
2566 %pins = insertelement <4 x float> undef, float %ps, i32 0
2567 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
2568 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
2569 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
2573 define <4 x double> @rint_v4f64_mask_broadcast(ptr %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
2574 ; CHECK-LABEL: rint_v4f64_mask_broadcast:
2576 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
2577 ; CHECK-NEXT: vrndscalepd $4, (%rdi){1to4}, %ymm0 {%k1}
2579 %c = icmp eq <4 x i64> %cmp, zeroinitializer
2580 %ps = load double, ptr %ptr
2581 %pins = insertelement <4 x double> undef, double %ps, i32 0
2582 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
2583 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
2584 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
2588 define <8 x float> @rint_v8f32_mask_broadcast(ptr %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
2589 ; CHECK-LABEL: rint_v8f32_mask_broadcast:
2591 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
2592 ; CHECK-NEXT: vrndscaleps $4, (%rdi){1to8}, %ymm0 {%k1}
2594 %c = icmp eq <8 x i32> %cmp, zeroinitializer
2595 %ps = load float, ptr %ptr
2596 %pins = insertelement <8 x float> undef, float %ps, i32 0
2597 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
2598 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
2599 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
2603 define <8 x double> @rint_v8f64_mask_broadcast(ptr %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
2604 ; CHECK-LABEL: rint_v8f64_mask_broadcast:
2606 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
2607 ; CHECK-NEXT: vrndscalepd $4, (%rdi){1to8}, %zmm0 {%k1}
2609 %c = icmp eq <8 x i64> %cmp, zeroinitializer
2610 %ps = load double, ptr %ptr
2611 %pins = insertelement <8 x double> undef, double %ps, i32 0
2612 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
2613 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
2614 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
2618 define <16 x float> @rint_v16f32_mask_broadcast(ptr %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
2619 ; CHECK-LABEL: rint_v16f32_mask_broadcast:
2621 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
2622 ; CHECK-NEXT: vrndscaleps $4, (%rdi){1to16}, %zmm0 {%k1}
2624 %c = icmp eq <16 x i32> %cmp, zeroinitializer
2625 %ps = load float, ptr %ptr
2626 %pins = insertelement <16 x float> undef, float %ps, i32 0
2627 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
2628 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
2629 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
2633 define <2 x double> @rint_v2f64_maskz_broadcast(ptr %ptr, <2 x i64> %cmp) {
2634 ; CHECK-LABEL: rint_v2f64_maskz_broadcast:
2636 ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1
2637 ; CHECK-NEXT: vrndscalepd $4, (%rdi){1to2}, %xmm0 {%k1} {z}
2639 %c = icmp eq <2 x i64> %cmp, zeroinitializer
2640 %ps = load double, ptr %ptr
2641 %pins = insertelement <2 x double> undef, double %ps, i32 0
2642 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
2643 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
2644 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
2648 define <4 x float> @rint_v4f32_maskz_broadcast(ptr %ptr, <4 x i32> %cmp) {
2649 ; CHECK-LABEL: rint_v4f32_maskz_broadcast:
2651 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1
2652 ; CHECK-NEXT: vrndscaleps $4, (%rdi){1to4}, %xmm0 {%k1} {z}
2654 %c = icmp eq <4 x i32> %cmp, zeroinitializer
2655 %ps = load float, ptr %ptr
2656 %pins = insertelement <4 x float> undef, float %ps, i32 0
2657 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
2658 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
2659 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
2663 define <4 x double> @rint_v4f64_maskz_broadcast(ptr %ptr, <4 x i64> %cmp) {
2664 ; CHECK-LABEL: rint_v4f64_maskz_broadcast:
2666 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1
2667 ; CHECK-NEXT: vrndscalepd $4, (%rdi){1to4}, %ymm0 {%k1} {z}
2669 %c = icmp eq <4 x i64> %cmp, zeroinitializer
2670 %ps = load double, ptr %ptr
2671 %pins = insertelement <4 x double> undef, double %ps, i32 0
2672 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
2673 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
2674 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
2678 define <8 x float> @rint_v8f32_maskz_broadcast(ptr %ptr, <8 x i32> %cmp) {
2679 ; CHECK-LABEL: rint_v8f32_maskz_broadcast:
2681 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
2682 ; CHECK-NEXT: vrndscaleps $4, (%rdi){1to8}, %ymm0 {%k1} {z}
2684 %c = icmp eq <8 x i32> %cmp, zeroinitializer
2685 %ps = load float, ptr %ptr
2686 %pins = insertelement <8 x float> undef, float %ps, i32 0
2687 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
2688 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
2689 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
2693 define <8 x double> @rint_v8f64_maskz_broadcast(ptr %ptr, <8 x i64> %cmp) {
2694 ; CHECK-LABEL: rint_v8f64_maskz_broadcast:
2696 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
2697 ; CHECK-NEXT: vrndscalepd $4, (%rdi){1to8}, %zmm0 {%k1} {z}
2699 %c = icmp eq <8 x i64> %cmp, zeroinitializer
2700 %ps = load double, ptr %ptr
2701 %pins = insertelement <8 x double> undef, double %ps, i32 0
2702 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
2703 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
2704 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
2708 define <16 x float> @rint_v16f32_maskz_broadcast(ptr %ptr, <16 x i32> %cmp) {
2709 ; CHECK-LABEL: rint_v16f32_maskz_broadcast:
2711 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
2712 ; CHECK-NEXT: vrndscaleps $4, (%rdi){1to16}, %zmm0 {%k1} {z}
2714 %c = icmp eq <16 x i32> %cmp, zeroinitializer
2715 %ps = load float, ptr %ptr
2716 %pins = insertelement <16 x float> undef, float %ps, i32 0
2717 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
2718 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
2719 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
2723 define <2 x double> @nearbyint_v2f64(<2 x double> %p) {
2724 ; CHECK-LABEL: nearbyint_v2f64:
2726 ; CHECK-NEXT: vroundpd $12, %xmm0, %xmm0
2728 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
2732 define <4 x float> @nearbyint_v4f32(<4 x float> %p) {
2733 ; CHECK-LABEL: nearbyint_v4f32:
2735 ; CHECK-NEXT: vroundps $12, %xmm0, %xmm0
2737 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
2741 define <4 x double> @nearbyint_v4f64(<4 x double> %p){
2742 ; CHECK-LABEL: nearbyint_v4f64:
2744 ; CHECK-NEXT: vroundpd $12, %ymm0, %ymm0
2746 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
2750 define <8 x float> @nearbyint_v8f32(<8 x float> %p) {
2751 ; CHECK-LABEL: nearbyint_v8f32:
2753 ; CHECK-NEXT: vroundps $12, %ymm0, %ymm0
2755 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
2759 define <8 x double> @nearbyint_v8f64(<8 x double> %p){
2760 ; CHECK-LABEL: nearbyint_v8f64:
2762 ; CHECK-NEXT: vrndscalepd $12, %zmm0, %zmm0
2764 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
2768 define <16 x float> @nearbyint_v16f32(<16 x float> %p) {
2769 ; CHECK-LABEL: nearbyint_v16f32:
2771 ; CHECK-NEXT: vrndscaleps $12, %zmm0, %zmm0
2773 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
2777 define <2 x double> @nearbyint_v2f64_load(ptr %ptr) {
2778 ; CHECK-LABEL: nearbyint_v2f64_load:
2780 ; CHECK-NEXT: vroundpd $12, (%rdi), %xmm0
2782 %p = load <2 x double>, ptr %ptr
2783 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
2787 define <4 x float> @nearbyint_v4f32_load(ptr %ptr) {
2788 ; CHECK-LABEL: nearbyint_v4f32_load:
2790 ; CHECK-NEXT: vroundps $12, (%rdi), %xmm0
2792 %p = load <4 x float>, ptr %ptr
2793 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
2797 define <4 x double> @nearbyint_v4f64_load(ptr %ptr){
2798 ; CHECK-LABEL: nearbyint_v4f64_load:
2800 ; CHECK-NEXT: vroundpd $12, (%rdi), %ymm0
2802 %p = load <4 x double>, ptr %ptr
2803 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
2807 define <8 x float> @nearbyint_v8f32_load(ptr %ptr) {
2808 ; CHECK-LABEL: nearbyint_v8f32_load:
2810 ; CHECK-NEXT: vroundps $12, (%rdi), %ymm0
2812 %p = load <8 x float>, ptr %ptr
2813 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
2817 define <8 x double> @nearbyint_v8f64_load(ptr %ptr){
2818 ; CHECK-LABEL: nearbyint_v8f64_load:
2820 ; CHECK-NEXT: vrndscalepd $12, (%rdi), %zmm0
2822 %p = load <8 x double>, ptr %ptr
2823 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
2827 define <16 x float> @nearbyint_v16f32_load(ptr %ptr) {
2828 ; CHECK-LABEL: nearbyint_v16f32_load:
2830 ; CHECK-NEXT: vrndscaleps $12, (%rdi), %zmm0
2832 %p = load <16 x float>, ptr %ptr
2833 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
2837 define <2 x double> @nearbyint_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) {
2838 ; CHECK-LABEL: nearbyint_v2f64_mask:
2840 ; CHECK-NEXT: vptestnmq %xmm2, %xmm2, %k1
2841 ; CHECK-NEXT: vrndscalepd $12, %xmm0, %xmm1 {%k1}
2842 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
2844 %c = icmp eq <2 x i64> %cmp, zeroinitializer
2845 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
2846 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
2850 define <4 x float> @nearbyint_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) {
2851 ; CHECK-LABEL: nearbyint_v4f32_mask:
2853 ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1
2854 ; CHECK-NEXT: vrndscaleps $12, %xmm0, %xmm1 {%k1}
2855 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
2857 %c = icmp eq <4 x i32> %cmp, zeroinitializer
2858 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
2859 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
2863 define <4 x double> @nearbyint_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) {
2864 ; CHECK-LABEL: nearbyint_v4f64_mask:
2866 ; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1
2867 ; CHECK-NEXT: vrndscalepd $12, %ymm0, %ymm1 {%k1}
2868 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
2870 %c = icmp eq <4 x i64> %cmp, zeroinitializer
2871 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
2872 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
2876 define <8 x float> @nearbyint_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) {
2877 ; CHECK-LABEL: nearbyint_v8f32_mask:
2879 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
2880 ; CHECK-NEXT: vrndscaleps $12, %ymm0, %ymm1 {%k1}
2881 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
2883 %c = icmp eq <8 x i32> %cmp, zeroinitializer
2884 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
2885 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
2889 define <8 x double> @nearbyint_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) {
2890 ; CHECK-LABEL: nearbyint_v8f64_mask:
2892 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1
2893 ; CHECK-NEXT: vrndscalepd $12, %zmm0, %zmm1 {%k1}
2894 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
2896 %c = icmp eq <8 x i64> %cmp, zeroinitializer
2897 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
2898 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
2902 define <16 x float> @nearbyint_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) {
2903 ; CHECK-LABEL: nearbyint_v16f32_mask:
2905 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
2906 ; CHECK-NEXT: vrndscaleps $12, %zmm0, %zmm1 {%k1}
2907 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2909 %c = icmp eq <16 x i32> %cmp, zeroinitializer
2910 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
2911 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
2915 define <2 x double> @nearbyint_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) {
2916 ; CHECK-LABEL: nearbyint_v2f64_maskz:
2918 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1
2919 ; CHECK-NEXT: vrndscalepd $12, %xmm0, %xmm0 {%k1} {z}
2921 %c = icmp eq <2 x i64> %cmp, zeroinitializer
2922 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
2923 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
2927 define <4 x float> @nearbyint_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) {
2928 ; CHECK-LABEL: nearbyint_v4f32_maskz:
2930 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
2931 ; CHECK-NEXT: vrndscaleps $12, %xmm0, %xmm0 {%k1} {z}
2933 %c = icmp eq <4 x i32> %cmp, zeroinitializer
2934 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
2935 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
2939 define <4 x double> @nearbyint_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) {
2940 ; CHECK-LABEL: nearbyint_v4f64_maskz:
2942 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
2943 ; CHECK-NEXT: vrndscalepd $12, %ymm0, %ymm0 {%k1} {z}
2945 %c = icmp eq <4 x i64> %cmp, zeroinitializer
2946 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
2947 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
2951 define <8 x float> @nearbyint_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) {
2952 ; CHECK-LABEL: nearbyint_v8f32_maskz:
2954 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
2955 ; CHECK-NEXT: vrndscaleps $12, %ymm0, %ymm0 {%k1} {z}
2957 %c = icmp eq <8 x i32> %cmp, zeroinitializer
2958 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
2959 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
2963 define <8 x double> @nearbyint_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) {
2964 ; CHECK-LABEL: nearbyint_v8f64_maskz:
2966 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
2967 ; CHECK-NEXT: vrndscalepd $12, %zmm0, %zmm0 {%k1} {z}
2969 %c = icmp eq <8 x i64> %cmp, zeroinitializer
2970 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
2971 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
2975 define <16 x float> @nearbyint_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) {
2976 ; CHECK-LABEL: nearbyint_v16f32_maskz:
2978 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
2979 ; CHECK-NEXT: vrndscaleps $12, %zmm0, %zmm0 {%k1} {z}
2981 %c = icmp eq <16 x i32> %cmp, zeroinitializer
2982 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
2983 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
2987 define <2 x double> @nearbyint_v2f64_mask_load(ptr %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
2988 ; CHECK-LABEL: nearbyint_v2f64_mask_load:
2990 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1
2991 ; CHECK-NEXT: vrndscalepd $12, (%rdi), %xmm0 {%k1}
2993 %c = icmp eq <2 x i64> %cmp, zeroinitializer
2994 %p = load <2 x double>, ptr %ptr
2995 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
2996 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
3000 define <4 x float> @nearbyint_v4f32_mask_load(ptr %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
3001 ; CHECK-LABEL: nearbyint_v4f32_mask_load:
3003 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
3004 ; CHECK-NEXT: vrndscaleps $12, (%rdi), %xmm0 {%k1}
3006 %c = icmp eq <4 x i32> %cmp, zeroinitializer
3007 %p = load <4 x float>, ptr %ptr
3008 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
3009 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
3013 define <4 x double> @nearbyint_v4f64_mask_load(ptr %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
3014 ; CHECK-LABEL: nearbyint_v4f64_mask_load:
3016 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
3017 ; CHECK-NEXT: vrndscalepd $12, (%rdi), %ymm0 {%k1}
3019 %c = icmp eq <4 x i64> %cmp, zeroinitializer
3020 %p = load <4 x double>, ptr %ptr
3021 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
3022 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
3026 define <8 x float> @nearbyint_v8f32_mask_load(ptr %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
3027 ; CHECK-LABEL: nearbyint_v8f32_mask_load:
3029 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
3030 ; CHECK-NEXT: vrndscaleps $12, (%rdi), %ymm0 {%k1}
3032 %c = icmp eq <8 x i32> %cmp, zeroinitializer
3033 %p = load <8 x float>, ptr %ptr
3034 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
3035 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
3039 define <8 x double> @nearbyint_v8f64_mask_load(ptr %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
3040 ; CHECK-LABEL: nearbyint_v8f64_mask_load:
3042 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
3043 ; CHECK-NEXT: vrndscalepd $12, (%rdi), %zmm0 {%k1}
3045 %c = icmp eq <8 x i64> %cmp, zeroinitializer
3046 %p = load <8 x double>, ptr %ptr
3047 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
3048 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
3052 define <16 x float> @nearbyint_v16f32_mask_load(ptr %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
3053 ; CHECK-LABEL: nearbyint_v16f32_mask_load:
3055 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
3056 ; CHECK-NEXT: vrndscaleps $12, (%rdi), %zmm0 {%k1}
3058 %c = icmp eq <16 x i32> %cmp, zeroinitializer
3059 %p = load <16 x float>, ptr %ptr
3060 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
3061 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
3065 define <2 x double> @nearbyint_v2f64_maskz_load(ptr %ptr, <2 x i64> %cmp) {
3066 ; CHECK-LABEL: nearbyint_v2f64_maskz_load:
3068 ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1
3069 ; CHECK-NEXT: vrndscalepd $12, (%rdi), %xmm0 {%k1} {z}
3071 %c = icmp eq <2 x i64> %cmp, zeroinitializer
3072 %p = load <2 x double>, ptr %ptr
3073 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
3074 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
3078 define <4 x float> @nearbyint_v4f32_maskz_load(ptr %ptr, <4 x i32> %cmp) {
3079 ; CHECK-LABEL: nearbyint_v4f32_maskz_load:
3081 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1
3082 ; CHECK-NEXT: vrndscaleps $12, (%rdi), %xmm0 {%k1} {z}
3084 %c = icmp eq <4 x i32> %cmp, zeroinitializer
3085 %p = load <4 x float>, ptr %ptr
3086 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
3087 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
3091 define <4 x double> @nearbyint_v4f64_maskz_load(ptr %ptr, <4 x i64> %cmp) {
3092 ; CHECK-LABEL: nearbyint_v4f64_maskz_load:
3094 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1
3095 ; CHECK-NEXT: vrndscalepd $12, (%rdi), %ymm0 {%k1} {z}
3097 %c = icmp eq <4 x i64> %cmp, zeroinitializer
3098 %p = load <4 x double>, ptr %ptr
3099 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
3100 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
3104 define <8 x float> @nearbyint_v8f32_maskz_load(ptr %ptr, <8 x i32> %cmp) {
3105 ; CHECK-LABEL: nearbyint_v8f32_maskz_load:
3107 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
3108 ; CHECK-NEXT: vrndscaleps $12, (%rdi), %ymm0 {%k1} {z}
3110 %c = icmp eq <8 x i32> %cmp, zeroinitializer
3111 %p = load <8 x float>, ptr %ptr
3112 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
3113 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
3117 define <8 x double> @nearbyint_v8f64_maskz_load(ptr %ptr, <8 x i64> %cmp) {
3118 ; CHECK-LABEL: nearbyint_v8f64_maskz_load:
3120 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
3121 ; CHECK-NEXT: vrndscalepd $12, (%rdi), %zmm0 {%k1} {z}
3123 %c = icmp eq <8 x i64> %cmp, zeroinitializer
3124 %p = load <8 x double>, ptr %ptr
3125 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
3126 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
3130 define <16 x float> @nearbyint_v16f32_maskz_load(ptr %ptr, <16 x i32> %cmp) {
3131 ; CHECK-LABEL: nearbyint_v16f32_maskz_load:
3133 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
3134 ; CHECK-NEXT: vrndscaleps $12, (%rdi), %zmm0 {%k1} {z}
3136 %c = icmp eq <16 x i32> %cmp, zeroinitializer
3137 %p = load <16 x float>, ptr %ptr
3138 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
3139 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
3143 define <2 x double> @nearbyint_v2f64_broadcast(ptr %ptr) {
3144 ; CHECK-LABEL: nearbyint_v2f64_broadcast:
3146 ; CHECK-NEXT: vrndscalepd $12, (%rdi){1to2}, %xmm0
3148 %ps = load double, ptr %ptr
3149 %pins = insertelement <2 x double> undef, double %ps, i32 0
3150 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
3151 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
3155 define <4 x float> @nearbyint_v4f32_broadcast(ptr %ptr) {
3156 ; CHECK-LABEL: nearbyint_v4f32_broadcast:
3158 ; CHECK-NEXT: vrndscaleps $12, (%rdi){1to4}, %xmm0
3160 %ps = load float, ptr %ptr
3161 %pins = insertelement <4 x float> undef, float %ps, i32 0
3162 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
3163 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
3167 define <4 x double> @nearbyint_v4f64_broadcast(ptr %ptr){
3168 ; CHECK-LABEL: nearbyint_v4f64_broadcast:
3170 ; CHECK-NEXT: vrndscalepd $12, (%rdi){1to4}, %ymm0
3172 %ps = load double, ptr %ptr
3173 %pins = insertelement <4 x double> undef, double %ps, i32 0
3174 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
3175 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
3179 define <8 x float> @nearbyint_v8f32_broadcast(ptr %ptr) {
3180 ; CHECK-LABEL: nearbyint_v8f32_broadcast:
3182 ; CHECK-NEXT: vrndscaleps $12, (%rdi){1to8}, %ymm0
3184 %ps = load float, ptr %ptr
3185 %pins = insertelement <8 x float> undef, float %ps, i32 0
3186 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
3187 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
3191 define <8 x double> @nearbyint_v8f64_broadcast(ptr %ptr){
3192 ; CHECK-LABEL: nearbyint_v8f64_broadcast:
3194 ; CHECK-NEXT: vrndscalepd $12, (%rdi){1to8}, %zmm0
3196 %ps = load double, ptr %ptr
3197 %pins = insertelement <8 x double> undef, double %ps, i32 0
3198 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
3199 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
3203 define <16 x float> @nearbyint_v16f32_broadcast(ptr %ptr) {
3204 ; CHECK-LABEL: nearbyint_v16f32_broadcast:
3206 ; CHECK-NEXT: vrndscaleps $12, (%rdi){1to16}, %zmm0
3208 %ps = load float, ptr %ptr
3209 %pins = insertelement <16 x float> undef, float %ps, i32 0
3210 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
3211 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
3215 define <2 x double> @nearbyint_v2f64_mask_broadcast(ptr %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
3216 ; CHECK-LABEL: nearbyint_v2f64_mask_broadcast:
3218 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1
3219 ; CHECK-NEXT: vrndscalepd $12, (%rdi){1to2}, %xmm0 {%k1}
3221 %c = icmp eq <2 x i64> %cmp, zeroinitializer
3222 %ps = load double, ptr %ptr
3223 %pins = insertelement <2 x double> undef, double %ps, i32 0
3224 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
3225 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
3226 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
3230 define <4 x float> @nearbyint_v4f32_mask_broadcast(ptr %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
3231 ; CHECK-LABEL: nearbyint_v4f32_mask_broadcast:
3233 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
3234 ; CHECK-NEXT: vrndscaleps $12, (%rdi){1to4}, %xmm0 {%k1}
3236 %c = icmp eq <4 x i32> %cmp, zeroinitializer
3237 %ps = load float, ptr %ptr
3238 %pins = insertelement <4 x float> undef, float %ps, i32 0
3239 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
3240 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
3241 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
3245 define <4 x double> @nearbyint_v4f64_mask_broadcast(ptr %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
3246 ; CHECK-LABEL: nearbyint_v4f64_mask_broadcast:
3248 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
3249 ; CHECK-NEXT: vrndscalepd $12, (%rdi){1to4}, %ymm0 {%k1}
3251 %c = icmp eq <4 x i64> %cmp, zeroinitializer
3252 %ps = load double, ptr %ptr
3253 %pins = insertelement <4 x double> undef, double %ps, i32 0
3254 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
3255 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
3256 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
3260 define <8 x float> @nearbyint_v8f32_mask_broadcast(ptr %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
3261 ; CHECK-LABEL: nearbyint_v8f32_mask_broadcast:
3263 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
3264 ; CHECK-NEXT: vrndscaleps $12, (%rdi){1to8}, %ymm0 {%k1}
3266 %c = icmp eq <8 x i32> %cmp, zeroinitializer
3267 %ps = load float, ptr %ptr
3268 %pins = insertelement <8 x float> undef, float %ps, i32 0
3269 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
3270 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
3271 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
3275 define <8 x double> @nearbyint_v8f64_mask_broadcast(ptr %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
3276 ; CHECK-LABEL: nearbyint_v8f64_mask_broadcast:
3278 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
3279 ; CHECK-NEXT: vrndscalepd $12, (%rdi){1to8}, %zmm0 {%k1}
3281 %c = icmp eq <8 x i64> %cmp, zeroinitializer
3282 %ps = load double, ptr %ptr
3283 %pins = insertelement <8 x double> undef, double %ps, i32 0
3284 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
3285 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
3286 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
3290 define <16 x float> @nearbyint_v16f32_mask_broadcast(ptr %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
3291 ; CHECK-LABEL: nearbyint_v16f32_mask_broadcast:
3293 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
3294 ; CHECK-NEXT: vrndscaleps $12, (%rdi){1to16}, %zmm0 {%k1}
3296 %c = icmp eq <16 x i32> %cmp, zeroinitializer
3297 %ps = load float, ptr %ptr
3298 %pins = insertelement <16 x float> undef, float %ps, i32 0
3299 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
3300 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
3301 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
3305 define <2 x double> @nearbyint_v2f64_maskz_broadcast(ptr %ptr, <2 x i64> %cmp) {
3306 ; CHECK-LABEL: nearbyint_v2f64_maskz_broadcast:
3308 ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1
3309 ; CHECK-NEXT: vrndscalepd $12, (%rdi){1to2}, %xmm0 {%k1} {z}
3311 %c = icmp eq <2 x i64> %cmp, zeroinitializer
3312 %ps = load double, ptr %ptr
3313 %pins = insertelement <2 x double> undef, double %ps, i32 0
3314 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
3315 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
3316 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
3320 define <4 x float> @nearbyint_v4f32_maskz_broadcast(ptr %ptr, <4 x i32> %cmp) {
3321 ; CHECK-LABEL: nearbyint_v4f32_maskz_broadcast:
3323 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1
3324 ; CHECK-NEXT: vrndscaleps $12, (%rdi){1to4}, %xmm0 {%k1} {z}
3326 %c = icmp eq <4 x i32> %cmp, zeroinitializer
3327 %ps = load float, ptr %ptr
3328 %pins = insertelement <4 x float> undef, float %ps, i32 0
3329 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
3330 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
3331 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
3335 define <4 x double> @nearbyint_v4f64_maskz_broadcast(ptr %ptr, <4 x i64> %cmp) {
3336 ; CHECK-LABEL: nearbyint_v4f64_maskz_broadcast:
3338 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1
3339 ; CHECK-NEXT: vrndscalepd $12, (%rdi){1to4}, %ymm0 {%k1} {z}
3341 %c = icmp eq <4 x i64> %cmp, zeroinitializer
3342 %ps = load double, ptr %ptr
3343 %pins = insertelement <4 x double> undef, double %ps, i32 0
3344 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
3345 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
3346 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
3350 define <8 x float> @nearbyint_v8f32_maskz_broadcast(ptr %ptr, <8 x i32> %cmp) {
3351 ; CHECK-LABEL: nearbyint_v8f32_maskz_broadcast:
3353 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
3354 ; CHECK-NEXT: vrndscaleps $12, (%rdi){1to8}, %ymm0 {%k1} {z}
3356 %c = icmp eq <8 x i32> %cmp, zeroinitializer
3357 %ps = load float, ptr %ptr
3358 %pins = insertelement <8 x float> undef, float %ps, i32 0
3359 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
3360 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
3361 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
3365 define <8 x double> @nearbyint_v8f64_maskz_broadcast(ptr %ptr, <8 x i64> %cmp) {
3366 ; CHECK-LABEL: nearbyint_v8f64_maskz_broadcast:
3368 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
3369 ; CHECK-NEXT: vrndscalepd $12, (%rdi){1to8}, %zmm0 {%k1} {z}
3371 %c = icmp eq <8 x i64> %cmp, zeroinitializer
3372 %ps = load double, ptr %ptr
3373 %pins = insertelement <8 x double> undef, double %ps, i32 0
3374 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
3375 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
3376 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
3380 define <16 x float> @nearbyint_v16f32_maskz_broadcast(ptr %ptr, <16 x i32> %cmp) {
3381 ; CHECK-LABEL: nearbyint_v16f32_maskz_broadcast:
3383 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
3384 ; CHECK-NEXT: vrndscaleps $12, (%rdi){1to16}, %zmm0 {%k1} {z}
3386 %c = icmp eq <16 x i32> %cmp, zeroinitializer
3387 %ps = load float, ptr %ptr
3388 %pins = insertelement <16 x float> undef, float %ps, i32 0
3389 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
3390 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
3391 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer