1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 < %s | FileCheck -check-prefix=NOFP16 %s
4 declare void @f16_user(half)
5 declare half @f16_result()
7 declare void @v2f16_user(<2 x half>)
8 declare <2 x half> @v2f16_result()
10 declare void @v4f16_user(<4 x half>)
11 declare <4 x half> @v4f16_result()
13 declare void @v8f16_user(<8 x half>)
14 declare <8 x half> @v8f16_result()
16 define void @f16_arg(half %arg, ptr %ptr) #0 {
17 ; NOFP16-LABEL: f16_arg:
19 ; NOFP16-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
20 ; NOFP16-NEXT: .cfi_def_cfa_offset 16
21 ; NOFP16-NEXT: .cfi_offset w19, -8
22 ; NOFP16-NEXT: .cfi_offset w30, -16
23 ; NOFP16-NEXT: and w0, w0, #0xffff
24 ; NOFP16-NEXT: mov x19, x1
25 ; NOFP16-NEXT: bl __gnu_h2f_ieee
26 ; NOFP16-NEXT: str w0, [x19]
27 ; NOFP16-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
29 %fpext = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg, metadata !"fpexcept.strict")
30 store float %fpext, ptr %ptr
34 define void @v2f16_arg(<2 x half> %arg, ptr %ptr) #0 {
35 ; NOFP16-LABEL: v2f16_arg:
37 ; NOFP16-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
38 ; NOFP16-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
39 ; NOFP16-NEXT: .cfi_def_cfa_offset 32
40 ; NOFP16-NEXT: .cfi_offset w19, -8
41 ; NOFP16-NEXT: .cfi_offset w20, -16
42 ; NOFP16-NEXT: .cfi_offset w21, -24
43 ; NOFP16-NEXT: .cfi_offset w30, -32
44 ; NOFP16-NEXT: and w0, w0, #0xffff
45 ; NOFP16-NEXT: mov x19, x2
46 ; NOFP16-NEXT: mov w20, w1
47 ; NOFP16-NEXT: bl __gnu_h2f_ieee
48 ; NOFP16-NEXT: mov w21, w0
49 ; NOFP16-NEXT: and w0, w20, #0xffff
50 ; NOFP16-NEXT: bl __gnu_h2f_ieee
51 ; NOFP16-NEXT: stp w21, w0, [x19]
52 ; NOFP16-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
53 ; NOFP16-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
55 %fpext = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half> %arg, metadata !"fpexcept.strict")
56 store <2 x float> %fpext, ptr %ptr
60 define void @v3f16_arg(<3 x half> %arg, ptr %ptr) #0 {
61 ; NOFP16-LABEL: v3f16_arg:
63 ; NOFP16-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
64 ; NOFP16-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
65 ; NOFP16-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
66 ; NOFP16-NEXT: .cfi_def_cfa_offset 48
67 ; NOFP16-NEXT: .cfi_offset w19, -8
68 ; NOFP16-NEXT: .cfi_offset w20, -16
69 ; NOFP16-NEXT: .cfi_offset w21, -24
70 ; NOFP16-NEXT: .cfi_offset w22, -32
71 ; NOFP16-NEXT: .cfi_offset w30, -48
72 ; NOFP16-NEXT: mov w21, w0
73 ; NOFP16-NEXT: and w0, w1, #0xffff
74 ; NOFP16-NEXT: mov x19, x3
75 ; NOFP16-NEXT: mov w20, w2
76 ; NOFP16-NEXT: bl __gnu_h2f_ieee
77 ; NOFP16-NEXT: mov w22, w0
78 ; NOFP16-NEXT: and w0, w21, #0xffff
79 ; NOFP16-NEXT: bl __gnu_h2f_ieee
80 ; NOFP16-NEXT: mov w8, w0
81 ; NOFP16-NEXT: and w0, w20, #0xffff
82 ; NOFP16-NEXT: orr x21, x8, x22, lsl #32
83 ; NOFP16-NEXT: bl __gnu_h2f_ieee
84 ; NOFP16-NEXT: str x21, [x19]
85 ; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
86 ; NOFP16-NEXT: str w0, [x19, #8]
87 ; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
88 ; NOFP16-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
90 %fpext = call <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half> %arg, metadata !"fpexcept.strict")
91 store <3 x float> %fpext, ptr %ptr
95 define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 {
96 ; NOFP16-LABEL: v4f16_arg:
98 ; NOFP16-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
99 ; NOFP16-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
100 ; NOFP16-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
101 ; NOFP16-NEXT: .cfi_def_cfa_offset 48
102 ; NOFP16-NEXT: .cfi_offset w19, -8
103 ; NOFP16-NEXT: .cfi_offset w20, -16
104 ; NOFP16-NEXT: .cfi_offset w21, -24
105 ; NOFP16-NEXT: .cfi_offset w22, -32
106 ; NOFP16-NEXT: .cfi_offset w23, -40
107 ; NOFP16-NEXT: .cfi_offset w30, -48
108 ; NOFP16-NEXT: and w0, w0, #0xffff
109 ; NOFP16-NEXT: mov x19, x4
110 ; NOFP16-NEXT: mov w20, w3
111 ; NOFP16-NEXT: mov w21, w2
112 ; NOFP16-NEXT: mov w22, w1
113 ; NOFP16-NEXT: bl __gnu_h2f_ieee
114 ; NOFP16-NEXT: mov w23, w0
115 ; NOFP16-NEXT: and w0, w22, #0xffff
116 ; NOFP16-NEXT: bl __gnu_h2f_ieee
117 ; NOFP16-NEXT: mov w22, w0
118 ; NOFP16-NEXT: and w0, w21, #0xffff
119 ; NOFP16-NEXT: bl __gnu_h2f_ieee
120 ; NOFP16-NEXT: mov w21, w0
121 ; NOFP16-NEXT: and w0, w20, #0xffff
122 ; NOFP16-NEXT: bl __gnu_h2f_ieee
123 ; NOFP16-NEXT: stp w21, w0, [x19, #8]
124 ; NOFP16-NEXT: stp w23, w22, [x19]
125 ; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
126 ; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
127 ; NOFP16-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload
129 %fpext = call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %arg, metadata !"fpexcept.strict")
130 store <4 x float> %fpext, ptr %ptr
134 define half @f16_return(float %arg) #0 {
135 ; NOFP16-LABEL: f16_return:
137 ; NOFP16-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
138 ; NOFP16-NEXT: .cfi_def_cfa_offset 16
139 ; NOFP16-NEXT: .cfi_offset w30, -16
140 ; NOFP16-NEXT: bl __gnu_f2h_ieee
141 ; NOFP16-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
143 %fptrunc = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
147 define <2 x half> @v2f16_return(<2 x float> %arg) #0 {
148 ; NOFP16-LABEL: v2f16_return:
150 ; NOFP16-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
151 ; NOFP16-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
152 ; NOFP16-NEXT: .cfi_def_cfa_offset 32
153 ; NOFP16-NEXT: .cfi_offset w19, -8
154 ; NOFP16-NEXT: .cfi_offset w20, -16
155 ; NOFP16-NEXT: .cfi_offset w30, -32
156 ; NOFP16-NEXT: mov w19, w0
157 ; NOFP16-NEXT: mov w0, w1
158 ; NOFP16-NEXT: bl __gnu_f2h_ieee
159 ; NOFP16-NEXT: mov w20, w0
160 ; NOFP16-NEXT: mov w0, w19
161 ; NOFP16-NEXT: bl __gnu_f2h_ieee
162 ; NOFP16-NEXT: mov w1, w20
163 ; NOFP16-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
164 ; NOFP16-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
166 %fptrunc = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
167 ret <2 x half> %fptrunc
170 define <3 x half> @v3f16_return(<3 x float> %arg) #0 {
171 ; NOFP16-LABEL: v3f16_return:
173 ; NOFP16-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
174 ; NOFP16-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
175 ; NOFP16-NEXT: .cfi_def_cfa_offset 32
176 ; NOFP16-NEXT: .cfi_offset w19, -8
177 ; NOFP16-NEXT: .cfi_offset w20, -16
178 ; NOFP16-NEXT: .cfi_offset w21, -24
179 ; NOFP16-NEXT: .cfi_offset w30, -32
180 ; NOFP16-NEXT: mov w20, w0
181 ; NOFP16-NEXT: mov w0, w2
182 ; NOFP16-NEXT: mov w19, w1
183 ; NOFP16-NEXT: bl __gnu_f2h_ieee
184 ; NOFP16-NEXT: mov w21, w0
185 ; NOFP16-NEXT: mov w0, w19
186 ; NOFP16-NEXT: bl __gnu_f2h_ieee
187 ; NOFP16-NEXT: mov w19, w0
188 ; NOFP16-NEXT: mov w0, w20
189 ; NOFP16-NEXT: bl __gnu_f2h_ieee
190 ; NOFP16-NEXT: mov w1, w19
191 ; NOFP16-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
192 ; NOFP16-NEXT: mov w2, w21
193 ; NOFP16-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
195 %fptrunc = call <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
196 ret <3 x half> %fptrunc
199 define <4 x half> @v4f16_return(<4 x float> %arg) #0 {
200 ; NOFP16-LABEL: v4f16_return:
202 ; NOFP16-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
203 ; NOFP16-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
204 ; NOFP16-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
205 ; NOFP16-NEXT: .cfi_def_cfa_offset 48
206 ; NOFP16-NEXT: .cfi_offset w19, -8
207 ; NOFP16-NEXT: .cfi_offset w20, -16
208 ; NOFP16-NEXT: .cfi_offset w21, -24
209 ; NOFP16-NEXT: .cfi_offset w22, -32
210 ; NOFP16-NEXT: .cfi_offset w30, -48
211 ; NOFP16-NEXT: mov w21, w0
212 ; NOFP16-NEXT: mov w0, w3
213 ; NOFP16-NEXT: mov w19, w2
214 ; NOFP16-NEXT: mov w20, w1
215 ; NOFP16-NEXT: bl __gnu_f2h_ieee
216 ; NOFP16-NEXT: mov w22, w0
217 ; NOFP16-NEXT: mov w0, w19
218 ; NOFP16-NEXT: bl __gnu_f2h_ieee
219 ; NOFP16-NEXT: mov w19, w0
220 ; NOFP16-NEXT: mov w0, w20
221 ; NOFP16-NEXT: bl __gnu_f2h_ieee
222 ; NOFP16-NEXT: mov w20, w0
223 ; NOFP16-NEXT: mov w0, w21
224 ; NOFP16-NEXT: bl __gnu_f2h_ieee
225 ; NOFP16-NEXT: mov w1, w20
226 ; NOFP16-NEXT: mov w2, w19
227 ; NOFP16-NEXT: mov w3, w22
228 ; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
229 ; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
230 ; NOFP16-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
232 %fptrunc = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
233 ret <4 x half> %fptrunc
237 ; define void @outgoing_f16_arg(ptr %ptr) #0 {
238 ; %val = load half, ptr %ptr
239 ; call void @f16_user(half %val)
243 ; define void @outgoing_v2f16_arg(ptr %ptr) #0 {
244 ; %val = load <2 x half>, ptr %ptr
245 ; call void @v2f16_user(<2 x half> %val)
249 ; define void @outgoing_f16_return(ptr %ptr) #0 {
250 ; %val = call half @f16_result()
251 ; store half %val, ptr %ptr
255 ; define void @outgoing_v2f16_return(ptr %ptr) #0 {
256 ; %val = call <2 x half> @v2f16_result()
257 ; store <2 x half> %val, ptr %ptr
261 define void @outgoing_v4f16_return(ptr %ptr) #0 {
262 ; NOFP16-LABEL: outgoing_v4f16_return:
264 ; NOFP16-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
265 ; NOFP16-NEXT: .cfi_def_cfa_offset 16
266 ; NOFP16-NEXT: .cfi_offset w19, -8
267 ; NOFP16-NEXT: .cfi_offset w30, -16
268 ; NOFP16-NEXT: mov x19, x0
269 ; NOFP16-NEXT: bl v4f16_result
270 ; NOFP16-NEXT: strh w2, [x19, #4]
271 ; NOFP16-NEXT: strh w3, [x19, #6]
272 ; NOFP16-NEXT: strh w1, [x19, #2]
273 ; NOFP16-NEXT: strh w0, [x19]
274 ; NOFP16-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
276 %val = call <4 x half> @v4f16_result() #0
277 store <4 x half> %val, ptr %ptr
281 define void @outgoing_v8f16_return(ptr %ptr) #0 {
282 ; NOFP16-LABEL: outgoing_v8f16_return:
284 ; NOFP16-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
285 ; NOFP16-NEXT: .cfi_def_cfa_offset 16
286 ; NOFP16-NEXT: .cfi_offset w19, -8
287 ; NOFP16-NEXT: .cfi_offset w30, -16
288 ; NOFP16-NEXT: mov x19, x0
289 ; NOFP16-NEXT: bl v8f16_result
290 ; NOFP16-NEXT: strh w5, [x19, #10]
291 ; NOFP16-NEXT: strh w7, [x19, #14]
292 ; NOFP16-NEXT: strh w6, [x19, #12]
293 ; NOFP16-NEXT: strh w4, [x19, #8]
294 ; NOFP16-NEXT: strh w3, [x19, #6]
295 ; NOFP16-NEXT: strh w2, [x19, #4]
296 ; NOFP16-NEXT: strh w1, [x19, #2]
297 ; NOFP16-NEXT: strh w0, [x19]
298 ; NOFP16-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
300 %val = call <8 x half> @v8f16_result() #0
301 store <8 x half> %val, ptr %ptr
305 define half @call_split_type_used_outside_block_v8f16() #0 {
306 ; NOFP16-LABEL: call_split_type_used_outside_block_v8f16:
307 ; NOFP16: // %bb.0: // %bb0
308 ; NOFP16-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
309 ; NOFP16-NEXT: .cfi_def_cfa_offset 16
310 ; NOFP16-NEXT: .cfi_offset w30, -16
311 ; NOFP16-NEXT: bl v8f16_result
312 ; NOFP16-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
315 %split.ret.type = call <8 x half> @v8f16_result() #0
319 %extract = extractelement <8 x half> %split.ret.type, i32 0
323 declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) #0
324 declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) #0
325 declare <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half>, metadata) #0
326 declare <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half>, metadata) #0
328 declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) #0
329 declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata) #0
330 declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata) #0
331 declare <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float>, metadata, metadata) #0
333 attributes #0 = { strictfp }