1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
6 define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
7 ; CHECK-LABEL: test_rsqrt14_ss:
9 ; CHECK-NEXT: vrsqrt14ss %xmm0, %xmm0, %xmm0
11 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ;
15 define <4 x float> @test_rsqrt14_ss_load(<4 x float> %a0, <4 x float>* %a1ptr) {
16 ; CHECK-LABEL: test_rsqrt14_ss_load:
18 ; CHECK-NEXT: vrsqrt14ss (%rdi), %xmm0, %xmm0
20 %a1 = load <4 x float>, <4 x float>* %a1ptr
21 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1) ;
24 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
26 define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
27 ; CHECK-LABEL: test_rcp14_ss:
29 ; CHECK-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0
31 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ;
35 define <4 x float> @test_rcp14_ss_load(<4 x float> %a0, <4 x float>* %a1ptr) {
36 ; CHECK-LABEL: test_rcp14_ss_load:
38 ; CHECK-NEXT: vrcp14ss (%rdi), %xmm0, %xmm0
40 %a1 = load <4 x float>, <4 x float>* %a1ptr
41 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1) ;
44 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
46 define <2 x double> @test_rsqrt14_sd(<2 x double> %a0) {
47 ; CHECK-LABEL: test_rsqrt14_sd:
49 ; CHECK-NEXT: vrsqrt14sd %xmm0, %xmm0, %xmm0
51 %res = call <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 -1) ;
55 define <2 x double> @test_rsqrt14_sd_load(<2 x double> %a0, <2 x double>* %a1ptr) {
56 ; CHECK-LABEL: test_rsqrt14_sd_load:
58 ; CHECK-NEXT: vrsqrt14sd (%rdi), %xmm0, %xmm0
60 %a1 = load <2 x double>, <2 x double>* %a1ptr
61 %res = call <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1) ;
64 declare <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
66 define <2 x double> @test_rcp14_sd(<2 x double> %a0) {
67 ; CHECK-LABEL: test_rcp14_sd:
69 ; CHECK-NEXT: vrcp14sd %xmm0, %xmm0, %xmm0
71 %res = call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 -1) ;
76 define <2 x double> @test_rcp14_sd_load(<2 x double> %a0, <2 x double>* %a1ptr) {
77 ; CHECK-LABEL: test_rcp14_sd_load:
79 ; CHECK-NEXT: vrcp14sd (%rdi), %xmm0, %xmm0
81 %a1 = load <2 x double>, <2 x double>* %a1ptr
82 %res = call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1) ;
85 declare <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
87 declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32)
88 define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
89 ; SKX-LABEL: test_int_x86_avx512_mask_scalef_ss:
91 ; SKX-NEXT: kmovd %edi, %k1
92 ; SKX-NEXT: vscalefss %xmm1, %xmm0, %xmm2 {%k1}
93 ; SKX-NEXT: vscalefss {rn-sae}, %xmm1, %xmm0, %xmm0
94 ; SKX-NEXT: vaddps %xmm0, %xmm2, %xmm0
97 ; KNL-LABEL: test_int_x86_avx512_mask_scalef_ss:
99 ; KNL-NEXT: kmovw %edi, %k1
100 ; KNL-NEXT: vscalefss %xmm1, %xmm0, %xmm2 {%k1}
101 ; KNL-NEXT: vscalefss {rn-sae}, %xmm1, %xmm0, %xmm0
102 ; KNL-NEXT: vaddps %xmm0, %xmm2, %xmm0
104 %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4)
105 %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8)
106 %res2 = fadd <4 x float> %res, %res1
107 ret <4 x float> %res2
110 define <4 x float>@test_int_x86_avx512_mask_scalef_ss_load(<4 x float> %x0, <4 x float>* %x1ptr) {
111 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss_load:
113 ; CHECK-NEXT: vscalefss (%rdi), %xmm0, %xmm0
115 %x1 = load <4 x float>, <4 x float>* %x1ptr
116 %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> undef, i8 -1, i32 4)
120 declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32)
121 define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
122 ; SKX-LABEL: test_int_x86_avx512_mask_scalef_sd:
124 ; SKX-NEXT: kmovd %edi, %k1
125 ; SKX-NEXT: vscalefsd %xmm1, %xmm0, %xmm2 {%k1}
126 ; SKX-NEXT: vscalefsd {rn-sae}, %xmm1, %xmm0, %xmm0
127 ; SKX-NEXT: vaddpd %xmm0, %xmm2, %xmm0
130 ; KNL-LABEL: test_int_x86_avx512_mask_scalef_sd:
132 ; KNL-NEXT: kmovw %edi, %k1
133 ; KNL-NEXT: vscalefsd %xmm1, %xmm0, %xmm2 {%k1}
134 ; KNL-NEXT: vscalefsd {rn-sae}, %xmm1, %xmm0, %xmm0
135 ; KNL-NEXT: vaddpd %xmm0, %xmm2, %xmm0
137 %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4)
138 %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8)
139 %res2 = fadd <2 x double> %res, %res1
140 ret <2 x double> %res2
143 define <2 x double>@test_int_x86_avx512_mask_scalef_sd_load(<2 x double> %x0, <2 x double>* %x1ptr) {
144 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sd_load:
146 ; CHECK-NEXT: vscalefsd (%rdi), %xmm0, %xmm0
148 %x1 = load <2 x double>, <2 x double>* %x1ptr
149 %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> undef, i8 -1, i32 4)
150 ret <2 x double> %res