1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+f16c | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=IVY
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
10 define <4 x float> @test_vcvtph2ps_128(<8 x i16> %a0, <8 x i16> *%a1) {
11 ; GENERIC-LABEL: test_vcvtph2ps_128:
13 ; GENERIC-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [7:1.00]
14 ; GENERIC-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00]
15 ; GENERIC-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
16 ; GENERIC-NEXT: retq # sched: [1:1.00]
18 ; IVY-LABEL: test_vcvtph2ps_128:
20 ; IVY-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [7:1.00]
21 ; IVY-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00]
22 ; IVY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
23 ; IVY-NEXT: retq # sched: [1:1.00]
25 ; HASWELL-LABEL: test_vcvtph2ps_128:
27 ; HASWELL-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [1:1.00]
28 ; HASWELL-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [2:1.00]
29 ; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
30 ; HASWELL-NEXT: retq # sched: [2:1.00]
32 ; BROADWELL-LABEL: test_vcvtph2ps_128:
34 ; BROADWELL-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [6:1.00]
35 ; BROADWELL-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [2:1.00]
36 ; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
37 ; BROADWELL-NEXT: retq # sched: [7:1.00]
39 ; SKYLAKE-LABEL: test_vcvtph2ps_128:
41 ; SKYLAKE-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [9:0.50]
42 ; SKYLAKE-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [5:1.00]
43 ; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
44 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
46 ; BTVER2-LABEL: test_vcvtph2ps_128:
48 ; BTVER2-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00]
49 ; BTVER2-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00]
50 ; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
51 ; BTVER2-NEXT: retq # sched: [4:1.00]
53 ; ZNVER1-LABEL: test_vcvtph2ps_128:
55 ; ZNVER1-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [100:?]
56 ; ZNVER1-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [100:?]
57 ; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
58 ; ZNVER1-NEXT: retq # sched: [1:0.50]
59 %1 = load <8 x i16>, <8 x i16> *%a1
60 %2 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %1)
61 %3 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0)
62 %4 = fadd <4 x float> %2, %3
65 declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>)
67 define <8 x float> @test_vcvtph2ps_256(<8 x i16> %a0, <8 x i16> *%a1) {
68 ; GENERIC-LABEL: test_vcvtph2ps_256:
70 ; GENERIC-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [7:1.00]
71 ; GENERIC-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00]
72 ; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
73 ; GENERIC-NEXT: retq # sched: [1:1.00]
75 ; IVY-LABEL: test_vcvtph2ps_256:
77 ; IVY-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [7:1.00]
78 ; IVY-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00]
79 ; IVY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
80 ; IVY-NEXT: retq # sched: [1:1.00]
82 ; HASWELL-LABEL: test_vcvtph2ps_256:
84 ; HASWELL-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [1:1.00]
85 ; HASWELL-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [2:1.00]
86 ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
87 ; HASWELL-NEXT: retq # sched: [2:1.00]
89 ; BROADWELL-LABEL: test_vcvtph2ps_256:
91 ; BROADWELL-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [6:1.00]
92 ; BROADWELL-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [2:1.00]
93 ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
94 ; BROADWELL-NEXT: retq # sched: [7:1.00]
96 ; SKYLAKE-LABEL: test_vcvtph2ps_256:
98 ; SKYLAKE-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [10:0.50]
99 ; SKYLAKE-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [7:1.00]
100 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
101 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
103 ; BTVER2-LABEL: test_vcvtph2ps_256:
105 ; BTVER2-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:2.00]
106 ; BTVER2-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:2.00]
107 ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
108 ; BTVER2-NEXT: retq # sched: [4:1.00]
110 ; ZNVER1-LABEL: test_vcvtph2ps_256:
112 ; ZNVER1-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [100:?]
113 ; ZNVER1-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [100:?]
114 ; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
115 ; ZNVER1-NEXT: retq # sched: [1:0.50]
116 %1 = load <8 x i16>, <8 x i16> *%a1
117 %2 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1)
118 %3 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0)
119 %4 = fadd <8 x float> %2, %3
122 declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>)
124 define <8 x i16> @test_vcvtps2ph_128(<4 x float> %a0, <4 x float> %a1, <4 x i16> *%a2) {
125 ; GENERIC-LABEL: test_vcvtps2ph_128:
127 ; GENERIC-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
128 ; GENERIC-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [7:1.00]
129 ; GENERIC-NEXT: retq # sched: [1:1.00]
131 ; IVY-LABEL: test_vcvtps2ph_128:
133 ; IVY-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
134 ; IVY-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [7:1.00]
135 ; IVY-NEXT: retq # sched: [1:1.00]
137 ; HASWELL-LABEL: test_vcvtps2ph_128:
139 ; HASWELL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [4:1.00]
140 ; HASWELL-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00]
141 ; HASWELL-NEXT: retq # sched: [2:1.00]
143 ; BROADWELL-LABEL: test_vcvtps2ph_128:
145 ; BROADWELL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [4:1.00]
146 ; BROADWELL-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00]
147 ; BROADWELL-NEXT: retq # sched: [7:1.00]
149 ; SKYLAKE-LABEL: test_vcvtps2ph_128:
151 ; SKYLAKE-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [5:1.00]
152 ; SKYLAKE-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [6:1.00]
153 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
155 ; BTVER2-LABEL: test_vcvtps2ph_128:
157 ; BTVER2-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
158 ; BTVER2-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [3:1.00]
159 ; BTVER2-NEXT: retq # sched: [4:1.00]
161 ; ZNVER1-LABEL: test_vcvtps2ph_128:
163 ; ZNVER1-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [100:?]
164 ; ZNVER1-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [100:?]
165 ; ZNVER1-NEXT: retq # sched: [1:0.50]
166 %1 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0)
167 %2 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a1, i32 0)
168 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
169 store <4 x i16> %3, <4 x i16> *%a2
172 declare <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float>, i32)
174 define <8 x i16> @test_vcvtps2ph_256(<8 x float> %a0, <8 x float> %a1, <8 x i16> *%a2) {
175 ; GENERIC-LABEL: test_vcvtps2ph_256:
177 ; GENERIC-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]
178 ; GENERIC-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00]
179 ; GENERIC-NEXT: vzeroupper
180 ; GENERIC-NEXT: retq # sched: [1:1.00]
182 ; IVY-LABEL: test_vcvtps2ph_256:
184 ; IVY-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]
185 ; IVY-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00]
186 ; IVY-NEXT: vzeroupper
187 ; IVY-NEXT: retq # sched: [1:1.00]
189 ; HASWELL-LABEL: test_vcvtps2ph_256:
191 ; HASWELL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00]
192 ; HASWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [6:1.00]
193 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
194 ; HASWELL-NEXT: retq # sched: [2:1.00]
196 ; BROADWELL-LABEL: test_vcvtps2ph_256:
198 ; BROADWELL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00]
199 ; BROADWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00]
200 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
201 ; BROADWELL-NEXT: retq # sched: [7:1.00]
203 ; SKYLAKE-LABEL: test_vcvtps2ph_256:
205 ; SKYLAKE-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [7:1.00]
206 ; SKYLAKE-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00]
207 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
208 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
210 ; BTVER2-LABEL: test_vcvtps2ph_256:
212 ; BTVER2-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:2.00]
213 ; BTVER2-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [11:2.00]
214 ; BTVER2-NEXT: retq # sched: [4:1.00]
216 ; ZNVER1-LABEL: test_vcvtps2ph_256:
218 ; ZNVER1-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [100:?]
219 ; ZNVER1-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [100:?]
220 ; ZNVER1-NEXT: vzeroupper # sched: [100:?]
221 ; ZNVER1-NEXT: retq # sched: [1:0.50]
222 %1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0)
223 %2 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a1, i32 0)
224 store <8 x i16> %2, <8 x i16> *%a2
227 declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32)