1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2 ; RUN: llc -o - %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK-NO_FP16
3 ; RUN: llc -o - %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 -mattr=+avx512fp16 | FileCheck %s --check-prefixes=CHECK-WITH_FP16
5 ; Note: We could check more configurations, but anything with software
6 ; emulation of fp16 generates a ton of assembly code and is not particularly
9 ;----------------------------------------
11 ;----------------------------------------
14 ; - Go from i8 to i32: zext
15 ; - Convert i32 to float
16 define float @uint8ToFloat(i8 %int8) {
17 ; CHECK-NO_FP16-LABEL: uint8ToFloat:
18 ; CHECK-NO_FP16: # %bb.0:
19 ; CHECK-NO_FP16-NEXT: movzbl %dil, %eax
20 ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
21 ; CHECK-NO_FP16-NEXT: retq
23 ; CHECK-WITH_FP16-LABEL: uint8ToFloat:
24 ; CHECK-WITH_FP16: # %bb.0:
25 ; CHECK-WITH_FP16-NEXT: movzbl %dil, %eax
26 ; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
27 ; CHECK-WITH_FP16-NEXT: retq
28 %fp32 = uitofp i8 %int8 to float
32 ; vector uint8_t to float.
33 ; Same as @uint8ToFloat but with vector types.
34 define <16 x float> @vector_uint8ToFloat(<16 x i8> %int8) {
35 ; CHECK-NO_FP16-LABEL: vector_uint8ToFloat:
36 ; CHECK-NO_FP16: # %bb.0:
37 ; CHECK-NO_FP16-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
38 ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
39 ; CHECK-NO_FP16-NEXT: retq
41 ; CHECK-WITH_FP16-LABEL: vector_uint8ToFloat:
42 ; CHECK-WITH_FP16: # %bb.0:
43 ; CHECK-WITH_FP16-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
44 ; CHECK-WITH_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
45 ; CHECK-WITH_FP16-NEXT: retq
46 %fp32 = uitofp <16 x i8> %int8 to <16 x float>
47 ret <16 x float> %fp32
54 ; - Go from i8 to i32: zext
55 ; - Convert i32 to float
56 ; - Trunc from float to half
58 ; Else if half support:
59 ; - Go from i8 to i32: zext
60 ; - Convert i32 to half
61 define half @uint8ToHalf(i8 %int8) {
62 ; CHECK-NO_FP16-LABEL: uint8ToHalf:
63 ; CHECK-NO_FP16: # %bb.0:
64 ; CHECK-NO_FP16-NEXT: movzbl %dil, %eax
65 ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
66 ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0
67 ; CHECK-NO_FP16-NEXT: vmovd %xmm0, %eax
68 ; CHECK-NO_FP16-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
69 ; CHECK-NO_FP16-NEXT: retq
71 ; CHECK-WITH_FP16-LABEL: uint8ToHalf:
72 ; CHECK-WITH_FP16: # %bb.0:
73 ; CHECK-WITH_FP16-NEXT: movzbl %dil, %eax
74 ; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
75 ; CHECK-WITH_FP16-NEXT: retq
76 %fp32 = uitofp i8 %int8 to half
80 ; vector uint8_t to half.
83 ; - Go from i8 to i32: zext
84 ; - Convert i32 to float
85 ; - Trunc from float to half
87 ; Else if half support:
88 ; - Go from i8 to i16: zext
89 ; - Convert i16 to half
91 ; The difference with the scalar version (uint8ToHalf) is that we use i16
92 ; for the intermediate type when we have half support.
93 define <16 x half> @vector_uint8ToHalf(<16 x i8> %int8) {
94 ; CHECK-NO_FP16-LABEL: vector_uint8ToHalf:
95 ; CHECK-NO_FP16: # %bb.0:
96 ; CHECK-NO_FP16-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
97 ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
98 ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %zmm0, %ymm0
99 ; CHECK-NO_FP16-NEXT: retq
101 ; CHECK-WITH_FP16-LABEL: vector_uint8ToHalf:
102 ; CHECK-WITH_FP16: # %bb.0:
103 ; CHECK-WITH_FP16-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
104 ; CHECK-WITH_FP16-NEXT: vcvtw2ph %ymm0, %ymm0
105 ; CHECK-WITH_FP16-NEXT: retq
106 %fp32 = uitofp <16 x i8> %int8 to <16 x half>
107 ret <16 x half> %fp32
110 ; Same as uint8_t but with the signed variant.
111 ; I.e., use sext instead of zext.
112 define float @sint8ToFloat(i8 %int8) {
113 ; CHECK-NO_FP16-LABEL: sint8ToFloat:
114 ; CHECK-NO_FP16: # %bb.0:
115 ; CHECK-NO_FP16-NEXT: movsbl %dil, %eax
116 ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
117 ; CHECK-NO_FP16-NEXT: retq
119 ; CHECK-WITH_FP16-LABEL: sint8ToFloat:
120 ; CHECK-WITH_FP16: # %bb.0:
121 ; CHECK-WITH_FP16-NEXT: movsbl %dil, %eax
122 ; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
123 ; CHECK-WITH_FP16-NEXT: retq
124 %fp32 = sitofp i8 %int8 to float
128 define <16 x float> @vector_sint8ToFloat(<16 x i8> %int8) {
129 ; CHECK-NO_FP16-LABEL: vector_sint8ToFloat:
130 ; CHECK-NO_FP16: # %bb.0:
131 ; CHECK-NO_FP16-NEXT: vpmovsxbd %xmm0, %zmm0
132 ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
133 ; CHECK-NO_FP16-NEXT: retq
135 ; CHECK-WITH_FP16-LABEL: vector_sint8ToFloat:
136 ; CHECK-WITH_FP16: # %bb.0:
137 ; CHECK-WITH_FP16-NEXT: vpmovsxbd %xmm0, %zmm0
138 ; CHECK-WITH_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
139 ; CHECK-WITH_FP16-NEXT: retq
140 %fp32 = sitofp <16 x i8> %int8 to <16 x float>
141 ret <16 x float> %fp32
144 define half @sint8ToHalf(i8 %int8) {
145 ; CHECK-NO_FP16-LABEL: sint8ToHalf:
146 ; CHECK-NO_FP16: # %bb.0:
147 ; CHECK-NO_FP16-NEXT: movsbl %dil, %eax
148 ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
149 ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0
150 ; CHECK-NO_FP16-NEXT: vmovd %xmm0, %eax
151 ; CHECK-NO_FP16-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
152 ; CHECK-NO_FP16-NEXT: retq
154 ; CHECK-WITH_FP16-LABEL: sint8ToHalf:
155 ; CHECK-WITH_FP16: # %bb.0:
156 ; CHECK-WITH_FP16-NEXT: movsbl %dil, %eax
157 ; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
158 ; CHECK-WITH_FP16-NEXT: retq
159 %fp32 = sitofp i8 %int8 to half
163 define <16 x half> @vector_sint8ToHalf(<16 x i8> %int8) {
164 ; CHECK-NO_FP16-LABEL: vector_sint8ToHalf:
165 ; CHECK-NO_FP16: # %bb.0:
166 ; CHECK-NO_FP16-NEXT: vpmovsxbd %xmm0, %zmm0
167 ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
168 ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %zmm0, %ymm0
169 ; CHECK-NO_FP16-NEXT: retq
171 ; CHECK-WITH_FP16-LABEL: vector_sint8ToHalf:
172 ; CHECK-WITH_FP16: # %bb.0:
173 ; CHECK-WITH_FP16-NEXT: vpmovsxbw %xmm0, %ymm0
174 ; CHECK-WITH_FP16-NEXT: vcvtw2ph %ymm0, %ymm0
175 ; CHECK-WITH_FP16-NEXT: retq
176 %fp32 = sitofp <16 x i8> %int8 to <16 x half>
177 ret <16 x half> %fp32
181 ;----------------------------------------
183 ;----------------------------------------
185 ; Similar lowering as i8, but with i16 as the input type.
187 define float @uint16ToFloat(i16 %int16) {
188 ; CHECK-NO_FP16-LABEL: uint16ToFloat:
189 ; CHECK-NO_FP16: # %bb.0:
190 ; CHECK-NO_FP16-NEXT: movzwl %di, %eax
191 ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
192 ; CHECK-NO_FP16-NEXT: retq
194 ; CHECK-WITH_FP16-LABEL: uint16ToFloat:
195 ; CHECK-WITH_FP16: # %bb.0:
196 ; CHECK-WITH_FP16-NEXT: movzwl %di, %eax
197 ; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
198 ; CHECK-WITH_FP16-NEXT: retq
199 %fp32 = uitofp i16 %int16 to float
203 define <16 x float> @vector_uint16ToFloat(<16 x i16> %int16) {
204 ; CHECK-NO_FP16-LABEL: vector_uint16ToFloat:
205 ; CHECK-NO_FP16: # %bb.0:
206 ; CHECK-NO_FP16-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
207 ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
208 ; CHECK-NO_FP16-NEXT: retq
210 ; CHECK-WITH_FP16-LABEL: vector_uint16ToFloat:
211 ; CHECK-WITH_FP16: # %bb.0:
212 ; CHECK-WITH_FP16-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
213 ; CHECK-WITH_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
214 ; CHECK-WITH_FP16-NEXT: retq
215 %fp32 = uitofp <16 x i16> %int16 to <16 x float>
216 ret <16 x float> %fp32
219 define half @uint16ToHalf(i16 %int16) {
220 ; CHECK-NO_FP16-LABEL: uint16ToHalf:
221 ; CHECK-NO_FP16: # %bb.0:
222 ; CHECK-NO_FP16-NEXT: movzwl %di, %eax
223 ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
224 ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0
225 ; CHECK-NO_FP16-NEXT: vmovd %xmm0, %eax
226 ; CHECK-NO_FP16-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
227 ; CHECK-NO_FP16-NEXT: retq
229 ; CHECK-WITH_FP16-LABEL: uint16ToHalf:
230 ; CHECK-WITH_FP16: # %bb.0:
231 ; CHECK-WITH_FP16-NEXT: movzwl %di, %eax
232 ; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
233 ; CHECK-WITH_FP16-NEXT: retq
234 %fp32 = uitofp i16 %int16 to half
238 define <16 x half> @vector_uint16ToHalf(<16 x i16> %int16) {
239 ; CHECK-NO_FP16-LABEL: vector_uint16ToHalf:
240 ; CHECK-NO_FP16: # %bb.0:
241 ; CHECK-NO_FP16-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
242 ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
243 ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %zmm0, %ymm0
244 ; CHECK-NO_FP16-NEXT: retq
246 ; CHECK-WITH_FP16-LABEL: vector_uint16ToHalf:
247 ; CHECK-WITH_FP16: # %bb.0:
248 ; CHECK-WITH_FP16-NEXT: vcvtuw2ph %ymm0, %ymm0
249 ; CHECK-WITH_FP16-NEXT: retq
250 %fp32 = uitofp <16 x i16> %int16 to <16 x half>
251 ret <16 x half> %fp32
254 define float @sint16ToFloat(i16 %int16) {
255 ; CHECK-NO_FP16-LABEL: sint16ToFloat:
256 ; CHECK-NO_FP16: # %bb.0:
257 ; CHECK-NO_FP16-NEXT: movswl %di, %eax
258 ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
259 ; CHECK-NO_FP16-NEXT: retq
261 ; CHECK-WITH_FP16-LABEL: sint16ToFloat:
262 ; CHECK-WITH_FP16: # %bb.0:
263 ; CHECK-WITH_FP16-NEXT: movswl %di, %eax
264 ; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
265 ; CHECK-WITH_FP16-NEXT: retq
266 %fp32 = sitofp i16 %int16 to float
270 define <16 x float> @vector_sint16ToFloat(<16 x i16> %int16) {
271 ; CHECK-NO_FP16-LABEL: vector_sint16ToFloat:
272 ; CHECK-NO_FP16: # %bb.0:
273 ; CHECK-NO_FP16-NEXT: vpmovsxwd %ymm0, %zmm0
274 ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
275 ; CHECK-NO_FP16-NEXT: retq
277 ; CHECK-WITH_FP16-LABEL: vector_sint16ToFloat:
278 ; CHECK-WITH_FP16: # %bb.0:
279 ; CHECK-WITH_FP16-NEXT: vpmovsxwd %ymm0, %zmm0
280 ; CHECK-WITH_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
281 ; CHECK-WITH_FP16-NEXT: retq
282 %fp32 = sitofp <16 x i16> %int16 to <16 x float>
283 ret <16 x float> %fp32
286 define half @sint16ToHalf(i16 %int16) {
287 ; CHECK-NO_FP16-LABEL: sint16ToHalf:
288 ; CHECK-NO_FP16: # %bb.0:
289 ; CHECK-NO_FP16-NEXT: movswl %di, %eax
290 ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
291 ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0
292 ; CHECK-NO_FP16-NEXT: vmovd %xmm0, %eax
293 ; CHECK-NO_FP16-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
294 ; CHECK-NO_FP16-NEXT: retq
296 ; CHECK-WITH_FP16-LABEL: sint16ToHalf:
297 ; CHECK-WITH_FP16: # %bb.0:
298 ; CHECK-WITH_FP16-NEXT: movswl %di, %eax
299 ; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
300 ; CHECK-WITH_FP16-NEXT: retq
301 %fp32 = sitofp i16 %int16 to half
305 define <16 x half> @vector_sint16ToHalf(<16 x i16> %int16) {
306 ; CHECK-NO_FP16-LABEL: vector_sint16ToHalf:
307 ; CHECK-NO_FP16: # %bb.0:
308 ; CHECK-NO_FP16-NEXT: vpmovsxwd %ymm0, %zmm0
309 ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0
310 ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %zmm0, %ymm0
311 ; CHECK-NO_FP16-NEXT: retq
313 ; CHECK-WITH_FP16-LABEL: vector_sint16ToHalf:
314 ; CHECK-WITH_FP16: # %bb.0:
315 ; CHECK-WITH_FP16-NEXT: vcvtw2ph %ymm0, %ymm0
316 ; CHECK-WITH_FP16-NEXT: retq
317 %fp32 = sitofp <16 x i16> %int16 to <16 x half>
318 ret <16 x half> %fp32