1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
4 define arm_aapcs_vfpcc <4 x float> @fpext_4(<4 x half> %src1) {
5 ; CHECK-LABEL: fpext_4:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vcvtt.f32.f16 s3, s1
8 ; CHECK-NEXT: vcvtb.f32.f16 s2, s1
9 ; CHECK-NEXT: vcvtt.f32.f16 s1, s0
10 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
13 %out = fpext <4 x half> %src1 to <4 x float>
17 define arm_aapcs_vfpcc <8 x float> @fpext_8(<8 x half> %src1) {
18 ; CHECK-LABEL: fpext_8:
19 ; CHECK: @ %bb.0: @ %entry
20 ; CHECK-NEXT: vcvtt.f32.f16 s11, s1
21 ; CHECK-NEXT: vcvtb.f32.f16 s10, s1
22 ; CHECK-NEXT: vcvtt.f32.f16 s9, s0
23 ; CHECK-NEXT: vcvtb.f32.f16 s8, s0
24 ; CHECK-NEXT: vcvtt.f32.f16 s7, s3
25 ; CHECK-NEXT: vcvtb.f32.f16 s6, s3
26 ; CHECK-NEXT: vcvtt.f32.f16 s5, s2
27 ; CHECK-NEXT: vcvtb.f32.f16 s4, s2
28 ; CHECK-NEXT: vmov q0, q2
31 %out = fpext <8 x half> %src1 to <8 x float>
36 define arm_aapcs_vfpcc <4 x half> @fptrunc_4(<4 x float> %src1) {
37 ; CHECK-LABEL: fptrunc_4:
38 ; CHECK: @ %bb.0: @ %entry
39 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0
40 ; CHECK-NEXT: vcvtt.f16.f32 s0, s1
41 ; CHECK-NEXT: vcvtb.f16.f32 s1, s2
42 ; CHECK-NEXT: vcvtt.f16.f32 s1, s3
45 %out = fptrunc <4 x float> %src1 to <4 x half>
49 define arm_aapcs_vfpcc <8 x half> @fptrunc_8(<8 x float> %src1) {
50 ; CHECK-LABEL: fptrunc_8:
51 ; CHECK: @ %bb.0: @ %entry
52 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0
53 ; CHECK-NEXT: vcvtt.f16.f32 s0, s1
54 ; CHECK-NEXT: vcvtb.f16.f32 s1, s2
55 ; CHECK-NEXT: vcvtb.f16.f32 s2, s4
56 ; CHECK-NEXT: vcvtt.f16.f32 s1, s3
57 ; CHECK-NEXT: vcvtb.f16.f32 s3, s6
58 ; CHECK-NEXT: vcvtt.f16.f32 s2, s5
59 ; CHECK-NEXT: vcvtt.f16.f32 s3, s7
62 %out = fptrunc <8 x float> %src1 to <8 x half>
67 define arm_aapcs_vfpcc <8 x half> @shuffle_trunc1(<4 x float> %src1, <4 x float> %src2) {
68 ; CHECK-LABEL: shuffle_trunc1:
69 ; CHECK: @ %bb.0: @ %entry
70 ; CHECK-NEXT: vcvtb.f16.f32 q0, q0
71 ; CHECK-NEXT: vcvtt.f16.f32 q0, q1
74 %strided.vec = shufflevector <4 x float> %src1, <4 x float> %src2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
75 %out = fptrunc <8 x float> %strided.vec to <8 x half>
79 define arm_aapcs_vfpcc <8 x half> @shuffle_trunc2(<4 x float> %src1, <4 x float> %src2) {
80 ; CHECK-LABEL: shuffle_trunc2:
81 ; CHECK: @ %bb.0: @ %entry
82 ; CHECK-NEXT: vcvtb.f16.f32 q1, q1
83 ; CHECK-NEXT: vcvtt.f16.f32 q1, q0
84 ; CHECK-NEXT: vmov q0, q1
87 %strided.vec = shufflevector <4 x float> %src1, <4 x float> %src2, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3>
88 %out = fptrunc <8 x float> %strided.vec to <8 x half>
92 define arm_aapcs_vfpcc <16 x half> @shuffle_trunc3(<8 x float> %src1, <8 x float> %src2) {
93 ; CHECK-LABEL: shuffle_trunc3:
94 ; CHECK: @ %bb.0: @ %entry
95 ; CHECK-NEXT: vcvtb.f16.f32 q0, q0
96 ; CHECK-NEXT: vcvtb.f16.f32 q1, q1
97 ; CHECK-NEXT: vcvtt.f16.f32 q0, q2
98 ; CHECK-NEXT: vcvtt.f16.f32 q1, q3
101 %strided.vec = shufflevector <8 x float> %src1, <8 x float> %src2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
102 %out = fptrunc <16 x float> %strided.vec to <16 x half>
106 define arm_aapcs_vfpcc <16 x half> @shuffle_trunc4(<8 x float> %src1, <8 x float> %src2) {
107 ; CHECK-LABEL: shuffle_trunc4:
108 ; CHECK: @ %bb.0: @ %entry
109 ; CHECK-NEXT: vcvtb.f16.f32 q2, q2
110 ; CHECK-NEXT: vcvtb.f16.f32 q3, q3
111 ; CHECK-NEXT: vcvtt.f16.f32 q2, q0
112 ; CHECK-NEXT: vcvtt.f16.f32 q3, q1
113 ; CHECK-NEXT: vmov q0, q2
114 ; CHECK-NEXT: vmov q1, q3
117 %strided.vec = shufflevector <8 x float> %src1, <8 x float> %src2, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
118 %out = fptrunc <16 x float> %strided.vec to <16 x half>
122 define arm_aapcs_vfpcc <8 x half> @shuffle_trunc5(<4 x float> %src1, <4 x float> %src2) {
123 ; CHECK-LABEL: shuffle_trunc5:
124 ; CHECK: @ %bb.0: @ %entry
125 ; CHECK-NEXT: vcvtb.f16.f32 q0, q0
126 ; CHECK-NEXT: vcvtt.f16.f32 q0, q1
129 %out1 = fptrunc <4 x float> %src1 to <4 x half>
130 %out2 = fptrunc <4 x float> %src2 to <4 x half>
131 %s = shufflevector <4 x half> %out1, <4 x half> %out2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
135 define arm_aapcs_vfpcc <8 x half> @shuffle_trunc6(<4 x float> %src1, <4 x float> %src2) {
136 ; CHECK-LABEL: shuffle_trunc6:
137 ; CHECK: @ %bb.0: @ %entry
138 ; CHECK-NEXT: vcvtb.f16.f32 q1, q1
139 ; CHECK-NEXT: vcvtt.f16.f32 q1, q0
140 ; CHECK-NEXT: vmov q0, q1
143 %out1 = fptrunc <4 x float> %src1 to <4 x half>
144 %out2 = fptrunc <4 x float> %src2 to <4 x half>
145 %s = shufflevector <4 x half> %out1, <4 x half> %out2, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3>
149 define arm_aapcs_vfpcc <16 x half> @shuffle_trunc7(<8 x float> %src1, <8 x float> %src2) {
150 ; CHECK-LABEL: shuffle_trunc7:
151 ; CHECK: @ %bb.0: @ %entry
152 ; CHECK-NEXT: vcvtb.f16.f32 q0, q0
153 ; CHECK-NEXT: vcvtb.f16.f32 q1, q1
154 ; CHECK-NEXT: vcvtt.f16.f32 q0, q2
155 ; CHECK-NEXT: vcvtt.f16.f32 q1, q3
158 %out1 = fptrunc <8 x float> %src1 to <8 x half>
159 %out2 = fptrunc <8 x float> %src2 to <8 x half>
160 %s = shufflevector <8 x half> %out1, <8 x half> %out2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
164 define arm_aapcs_vfpcc <16 x half> @shuffle_trunc8(<8 x float> %src1, <8 x float> %src2) {
165 ; CHECK-LABEL: shuffle_trunc8:
166 ; CHECK: @ %bb.0: @ %entry
167 ; CHECK-NEXT: vcvtb.f16.f32 q2, q2
168 ; CHECK-NEXT: vcvtb.f16.f32 q3, q3
169 ; CHECK-NEXT: vcvtt.f16.f32 q2, q0
170 ; CHECK-NEXT: vcvtt.f16.f32 q3, q1
171 ; CHECK-NEXT: vmov q0, q2
172 ; CHECK-NEXT: vmov q1, q3
175 %out1 = fptrunc <8 x float> %src1 to <8 x half>
176 %out2 = fptrunc <8 x float> %src2 to <8 x half>
177 %s = shufflevector <8 x half> %out1, <8 x half> %out2, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
184 define arm_aapcs_vfpcc <4 x float> @load_ext_4(ptr %src) {
185 ; CHECK-LABEL: load_ext_4:
186 ; CHECK: @ %bb.0: @ %entry
187 ; CHECK-NEXT: vldrh.u32 q0, [r0]
188 ; CHECK-NEXT: vcvtb.f32.f16 q0, q0
191 %wide.load = load <4 x half>, ptr %src, align 4
192 %e = fpext <4 x half> %wide.load to <4 x float>
196 define arm_aapcs_vfpcc <8 x float> @load_ext_8(ptr %src) {
197 ; CHECK-LABEL: load_ext_8:
198 ; CHECK: @ %bb.0: @ %entry
199 ; CHECK-NEXT: vldrh.u32 q0, [r0]
200 ; CHECK-NEXT: vldrh.u32 q1, [r0, #8]
201 ; CHECK-NEXT: vcvtb.f32.f16 q0, q0
202 ; CHECK-NEXT: vcvtb.f32.f16 q1, q1
205 %wide.load = load <8 x half>, ptr %src, align 4
206 %e = fpext <8 x half> %wide.load to <8 x float>
210 define arm_aapcs_vfpcc <16 x float> @load_ext_16(ptr %src) {
211 ; CHECK-LABEL: load_ext_16:
212 ; CHECK: @ %bb.0: @ %entry
213 ; CHECK-NEXT: vldrh.u32 q0, [r0]
214 ; CHECK-NEXT: vldrh.u32 q1, [r0, #8]
215 ; CHECK-NEXT: vldrh.u32 q2, [r0, #16]
216 ; CHECK-NEXT: vldrh.u32 q3, [r0, #24]
217 ; CHECK-NEXT: vcvtb.f32.f16 q0, q0
218 ; CHECK-NEXT: vcvtb.f32.f16 q1, q1
219 ; CHECK-NEXT: vcvtb.f32.f16 q2, q2
220 ; CHECK-NEXT: vcvtb.f32.f16 q3, q3
223 %wide.load = load <16 x half>, ptr %src, align 4
224 %e = fpext <16 x half> %wide.load to <16 x float>
228 define arm_aapcs_vfpcc <4 x float> @load_shuffleext_8(ptr %src) {
229 ; CHECK-LABEL: load_shuffleext_8:
230 ; CHECK: @ %bb.0: @ %entry
231 ; CHECK-NEXT: vldrw.u32 q0, [r0]
232 ; CHECK-NEXT: vcvtb.f32.f16 q0, q0
235 %wide.load = load <8 x half>, ptr %src, align 4
236 %sh = shufflevector <8 x half> %wide.load, <8 x half> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
237 %e = fpext <4 x half> %sh to <4 x float>
241 define arm_aapcs_vfpcc <8 x float> @load_shuffleext_16(ptr %src) {
242 ; CHECK-LABEL: load_shuffleext_16:
243 ; CHECK: @ %bb.0: @ %entry
244 ; CHECK-NEXT: vld20.16 {q2, q3}, [r0]
245 ; CHECK-NEXT: vld21.16 {q2, q3}, [r0]
246 ; CHECK-NEXT: vcvtt.f32.f16 s3, s9
247 ; CHECK-NEXT: vcvtb.f32.f16 s2, s9
248 ; CHECK-NEXT: vcvtt.f32.f16 s1, s8
249 ; CHECK-NEXT: vcvtb.f32.f16 s0, s8
250 ; CHECK-NEXT: vcvtt.f32.f16 s7, s11
251 ; CHECK-NEXT: vcvtb.f32.f16 s6, s11
252 ; CHECK-NEXT: vcvtt.f32.f16 s5, s10
253 ; CHECK-NEXT: vcvtb.f32.f16 s4, s10
256 %wide.load = load <16 x half>, ptr %src, align 4
257 %sh = shufflevector <16 x half> %wide.load, <16 x half> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
258 %e = fpext <8 x half> %sh to <8 x float>
265 define arm_aapcs_vfpcc void @store_trunc_4(ptr %src, <4 x float> %val) {
266 ; CHECK-LABEL: store_trunc_4:
267 ; CHECK: @ %bb.0: @ %entry
268 ; CHECK-NEXT: vcvtb.f16.f32 q0, q0
269 ; CHECK-NEXT: vstrh.32 q0, [r0]
272 %e = fptrunc <4 x float> %val to <4 x half>
273 store <4 x half> %e, ptr %src, align 4
277 define arm_aapcs_vfpcc void @store_trunc_8(ptr %src, <8 x float> %val) {
278 ; CHECK-LABEL: store_trunc_8:
279 ; CHECK: @ %bb.0: @ %entry
280 ; CHECK-NEXT: vcvtb.f16.f32 q1, q1
281 ; CHECK-NEXT: vcvtb.f16.f32 q0, q0
282 ; CHECK-NEXT: vstrh.32 q1, [r0, #8]
283 ; CHECK-NEXT: vstrh.32 q0, [r0]
286 %e = fptrunc <8 x float> %val to <8 x half>
287 store <8 x half> %e, ptr %src, align 4
291 define arm_aapcs_vfpcc void @store_trunc_16(ptr %src, <16 x float> %val) {
292 ; CHECK-LABEL: store_trunc_16:
293 ; CHECK: @ %bb.0: @ %entry
294 ; CHECK-NEXT: vcvtb.f16.f32 q3, q3
295 ; CHECK-NEXT: vcvtb.f16.f32 q2, q2
296 ; CHECK-NEXT: vcvtb.f16.f32 q1, q1
297 ; CHECK-NEXT: vcvtb.f16.f32 q0, q0
298 ; CHECK-NEXT: vstrh.32 q3, [r0, #24]
299 ; CHECK-NEXT: vstrh.32 q2, [r0, #16]
300 ; CHECK-NEXT: vstrh.32 q1, [r0, #8]
301 ; CHECK-NEXT: vstrh.32 q0, [r0]
304 %e = fptrunc <16 x float> %val to <16 x half>
305 store <16 x half> %e, ptr %src, align 4
309 define arm_aapcs_vfpcc void @store_shuffletrunc_8(ptr %src, <4 x float> %val1, <4 x float> %val2) {
310 ; CHECK-LABEL: store_shuffletrunc_8:
311 ; CHECK: @ %bb.0: @ %entry
312 ; CHECK-NEXT: vcvtb.f16.f32 q0, q0
313 ; CHECK-NEXT: vcvtt.f16.f32 q0, q1
314 ; CHECK-NEXT: vstrw.32 q0, [r0]
317 %strided.vec = shufflevector <4 x float> %val1, <4 x float> %val2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
318 %out = fptrunc <8 x float> %strided.vec to <8 x half>
319 store <8 x half> %out, ptr %src, align 4
323 define arm_aapcs_vfpcc void @store_shuffletrunc_16(ptr %src, <8 x float> %val1, <8 x float> %val2) {
324 ; CHECK-LABEL: store_shuffletrunc_16:
325 ; CHECK: @ %bb.0: @ %entry
326 ; CHECK-NEXT: vcvtb.f16.f32 q1, q1
327 ; CHECK-NEXT: vcvtb.f16.f32 q0, q0
328 ; CHECK-NEXT: vcvtt.f16.f32 q1, q3
329 ; CHECK-NEXT: vcvtt.f16.f32 q0, q2
330 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
331 ; CHECK-NEXT: vstrw.32 q0, [r0]
334 %strided.vec = shufflevector <8 x float> %val1, <8 x float> %val2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
335 %out = fptrunc <16 x float> %strided.vec to <16 x half>
336 store <16 x half> %out, ptr %src, align 4