1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE
3 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP
5 define arm_aapcs_vfpcc <16 x i8> @add_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
6 ; CHECK-LABEL: add_int8_t:
7 ; CHECK: @ %bb.0: @ %entry
8 ; CHECK-NEXT: vadd.i8 q0, q0, q1
11 %0 = add <16 x i8> %src1, %src2
15 define arm_aapcs_vfpcc <8 x i16> @add_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
16 ; CHECK-LABEL: add_int16_t:
17 ; CHECK: @ %bb.0: @ %entry
18 ; CHECK-NEXT: vadd.i16 q0, q0, q1
21 %0 = add <8 x i16> %src1, %src2
25 define arm_aapcs_vfpcc <4 x i32> @add_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
26 ; CHECK-LABEL: add_int32_t:
27 ; CHECK: @ %bb.0: @ %entry
28 ; CHECK-NEXT: vadd.i32 q0, q0, q1
31 %0 = add nsw <4 x i32> %src1, %src2
35 define arm_aapcs_vfpcc <2 x i64> @add_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
36 ; CHECK-LABEL: add_int64_t:
37 ; CHECK: @ %bb.0: @ %entry
38 ; CHECK-NEXT: .save {r4, r5, r7, lr}
39 ; CHECK-NEXT: push {r4, r5, r7, lr}
40 ; CHECK-NEXT: vmov lr, r12, d3
41 ; CHECK-NEXT: vmov r2, r3, d1
42 ; CHECK-NEXT: vmov r1, r0, d2
43 ; CHECK-NEXT: vmov r4, r5, d0
44 ; CHECK-NEXT: adds.w r2, r2, lr
45 ; CHECK-NEXT: adc.w r3, r3, r12
46 ; CHECK-NEXT: adds r1, r1, r4
47 ; CHECK-NEXT: adcs r0, r5
48 ; CHECK-NEXT: vmov q0[2], q0[0], r1, r2
49 ; CHECK-NEXT: vmov q0[3], q0[1], r0, r3
50 ; CHECK-NEXT: pop {r4, r5, r7, pc}
52 %0 = add nsw <2 x i64> %src1, %src2
56 define arm_aapcs_vfpcc <4 x float> @add_float32_t(<4 x float> %src1, <4 x float> %src2) {
57 ; CHECK-MVE-LABEL: add_float32_t:
58 ; CHECK-MVE: @ %bb.0: @ %entry
59 ; CHECK-MVE-NEXT: vadd.f32 s3, s7, s3
60 ; CHECK-MVE-NEXT: vadd.f32 s2, s6, s2
61 ; CHECK-MVE-NEXT: vadd.f32 s1, s5, s1
62 ; CHECK-MVE-NEXT: vadd.f32 s0, s4, s0
63 ; CHECK-MVE-NEXT: bx lr
65 ; CHECK-MVEFP-LABEL: add_float32_t:
66 ; CHECK-MVEFP: @ %bb.0: @ %entry
67 ; CHECK-MVEFP-NEXT: vadd.f32 q0, q1, q0
68 ; CHECK-MVEFP-NEXT: bx lr
70 %0 = fadd nnan ninf nsz <4 x float> %src2, %src1
74 define arm_aapcs_vfpcc <8 x half> @add_float16_t(<8 x half> %src1, <8 x half> %src2) {
75 ; CHECK-MVE-LABEL: add_float16_t:
76 ; CHECK-MVE: @ %bb.0: @ %entry
77 ; CHECK-MVE-NEXT: vmovx.f16 s8, s0
78 ; CHECK-MVE-NEXT: vmovx.f16 s10, s4
79 ; CHECK-MVE-NEXT: vadd.f16 s0, s4, s0
80 ; CHECK-MVE-NEXT: vadd.f16 s8, s10, s8
81 ; CHECK-MVE-NEXT: vins.f16 s0, s8
82 ; CHECK-MVE-NEXT: vmovx.f16 s4, s1
83 ; CHECK-MVE-NEXT: vmovx.f16 s8, s5
84 ; CHECK-MVE-NEXT: vadd.f16 s1, s5, s1
85 ; CHECK-MVE-NEXT: vadd.f16 s4, s8, s4
86 ; CHECK-MVE-NEXT: vmovx.f16 s8, s6
87 ; CHECK-MVE-NEXT: vins.f16 s1, s4
88 ; CHECK-MVE-NEXT: vmovx.f16 s4, s2
89 ; CHECK-MVE-NEXT: vadd.f16 s2, s6, s2
90 ; CHECK-MVE-NEXT: vadd.f16 s4, s8, s4
91 ; CHECK-MVE-NEXT: vins.f16 s2, s4
92 ; CHECK-MVE-NEXT: vmovx.f16 s4, s3
93 ; CHECK-MVE-NEXT: vmovx.f16 s6, s7
94 ; CHECK-MVE-NEXT: vadd.f16 s3, s7, s3
95 ; CHECK-MVE-NEXT: vadd.f16 s4, s6, s4
96 ; CHECK-MVE-NEXT: vins.f16 s3, s4
97 ; CHECK-MVE-NEXT: bx lr
99 ; CHECK-MVEFP-LABEL: add_float16_t:
100 ; CHECK-MVEFP: @ %bb.0: @ %entry
101 ; CHECK-MVEFP-NEXT: vadd.f16 q0, q1, q0
102 ; CHECK-MVEFP-NEXT: bx lr
104 %0 = fadd nnan ninf nsz <8 x half> %src2, %src1
108 define arm_aapcs_vfpcc <2 x double> @add_float64_t(<2 x double> %src1, <2 x double> %src2) {
109 ; CHECK-LABEL: add_float64_t:
110 ; CHECK: @ %bb.0: @ %entry
111 ; CHECK-NEXT: .save {r7, lr}
112 ; CHECK-NEXT: push {r7, lr}
113 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
114 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
115 ; CHECK-NEXT: vmov q5, q1
116 ; CHECK-NEXT: vmov q4, q0
117 ; CHECK-NEXT: vmov r0, r1, d11
118 ; CHECK-NEXT: vmov r2, r3, d9
119 ; CHECK-NEXT: bl __aeabi_dadd
120 ; CHECK-NEXT: vmov lr, r12, d10
121 ; CHECK-NEXT: vmov r2, r3, d8
122 ; CHECK-NEXT: vmov d9, r0, r1
123 ; CHECK-NEXT: mov r0, lr
124 ; CHECK-NEXT: mov r1, r12
125 ; CHECK-NEXT: bl __aeabi_dadd
126 ; CHECK-NEXT: vmov d8, r0, r1
127 ; CHECK-NEXT: vmov q0, q4
128 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
129 ; CHECK-NEXT: pop {r7, pc}
131 %0 = fadd nnan ninf nsz <2 x double> %src2, %src1
136 define arm_aapcs_vfpcc <16 x i8> @sub_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
137 ; CHECK-LABEL: sub_int8_t:
138 ; CHECK: @ %bb.0: @ %entry
139 ; CHECK-NEXT: vsub.i8 q0, q1, q0
142 %0 = sub <16 x i8> %src2, %src1
146 define arm_aapcs_vfpcc <8 x i16> @sub_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
147 ; CHECK-LABEL: sub_int16_t:
148 ; CHECK: @ %bb.0: @ %entry
149 ; CHECK-NEXT: vsub.i16 q0, q1, q0
152 %0 = sub <8 x i16> %src2, %src1
156 define arm_aapcs_vfpcc <4 x i32> @sub_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
157 ; CHECK-LABEL: sub_int32_t:
158 ; CHECK: @ %bb.0: @ %entry
159 ; CHECK-NEXT: vsub.i32 q0, q1, q0
162 %0 = sub nsw <4 x i32> %src2, %src1
166 define arm_aapcs_vfpcc <2 x i64> @sub_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
167 ; CHECK-LABEL: sub_int64_t:
168 ; CHECK: @ %bb.0: @ %entry
169 ; CHECK-NEXT: .save {r4, r5, r7, lr}
170 ; CHECK-NEXT: push {r4, r5, r7, lr}
171 ; CHECK-NEXT: vmov lr, r12, d1
172 ; CHECK-NEXT: vmov r2, r3, d3
173 ; CHECK-NEXT: vmov r1, r0, d0
174 ; CHECK-NEXT: vmov r4, r5, d2
175 ; CHECK-NEXT: subs.w r2, r2, lr
176 ; CHECK-NEXT: sbc.w r3, r3, r12
177 ; CHECK-NEXT: subs r1, r4, r1
178 ; CHECK-NEXT: sbc.w r0, r5, r0
179 ; CHECK-NEXT: vmov q0[2], q0[0], r1, r2
180 ; CHECK-NEXT: vmov q0[3], q0[1], r0, r3
181 ; CHECK-NEXT: pop {r4, r5, r7, pc}
183 %0 = sub nsw <2 x i64> %src2, %src1
187 define arm_aapcs_vfpcc <4 x float> @sub_float32_t(<4 x float> %src1, <4 x float> %src2) {
188 ; CHECK-MVE-LABEL: sub_float32_t:
189 ; CHECK-MVE: @ %bb.0: @ %entry
190 ; CHECK-MVE-NEXT: vsub.f32 s3, s7, s3
191 ; CHECK-MVE-NEXT: vsub.f32 s2, s6, s2
192 ; CHECK-MVE-NEXT: vsub.f32 s1, s5, s1
193 ; CHECK-MVE-NEXT: vsub.f32 s0, s4, s0
194 ; CHECK-MVE-NEXT: bx lr
196 ; CHECK-MVEFP-LABEL: sub_float32_t:
197 ; CHECK-MVEFP: @ %bb.0: @ %entry
198 ; CHECK-MVEFP-NEXT: vsub.f32 q0, q1, q0
199 ; CHECK-MVEFP-NEXT: bx lr
201 %0 = fsub nnan ninf nsz <4 x float> %src2, %src1
205 define arm_aapcs_vfpcc <8 x half> @sub_float16_t(<8 x half> %src1, <8 x half> %src2) {
206 ; CHECK-MVE-LABEL: sub_float16_t:
207 ; CHECK-MVE: @ %bb.0: @ %entry
208 ; CHECK-MVE-NEXT: vmovx.f16 s8, s0
209 ; CHECK-MVE-NEXT: vmovx.f16 s10, s4
210 ; CHECK-MVE-NEXT: vsub.f16 s0, s4, s0
211 ; CHECK-MVE-NEXT: vsub.f16 s8, s10, s8
212 ; CHECK-MVE-NEXT: vins.f16 s0, s8
213 ; CHECK-MVE-NEXT: vmovx.f16 s4, s1
214 ; CHECK-MVE-NEXT: vmovx.f16 s8, s5
215 ; CHECK-MVE-NEXT: vsub.f16 s1, s5, s1
216 ; CHECK-MVE-NEXT: vsub.f16 s4, s8, s4
217 ; CHECK-MVE-NEXT: vmovx.f16 s8, s6
218 ; CHECK-MVE-NEXT: vins.f16 s1, s4
219 ; CHECK-MVE-NEXT: vmovx.f16 s4, s2
220 ; CHECK-MVE-NEXT: vsub.f16 s2, s6, s2
221 ; CHECK-MVE-NEXT: vsub.f16 s4, s8, s4
222 ; CHECK-MVE-NEXT: vins.f16 s2, s4
223 ; CHECK-MVE-NEXT: vmovx.f16 s4, s3
224 ; CHECK-MVE-NEXT: vmovx.f16 s6, s7
225 ; CHECK-MVE-NEXT: vsub.f16 s3, s7, s3
226 ; CHECK-MVE-NEXT: vsub.f16 s4, s6, s4
227 ; CHECK-MVE-NEXT: vins.f16 s3, s4
228 ; CHECK-MVE-NEXT: bx lr
230 ; CHECK-MVEFP-LABEL: sub_float16_t:
231 ; CHECK-MVEFP: @ %bb.0: @ %entry
232 ; CHECK-MVEFP-NEXT: vsub.f16 q0, q1, q0
233 ; CHECK-MVEFP-NEXT: bx lr
235 %0 = fsub nnan ninf nsz <8 x half> %src2, %src1
239 define arm_aapcs_vfpcc <2 x double> @sub_float64_t(<2 x double> %src1, <2 x double> %src2) {
240 ; CHECK-LABEL: sub_float64_t:
241 ; CHECK: @ %bb.0: @ %entry
242 ; CHECK-NEXT: .save {r7, lr}
243 ; CHECK-NEXT: push {r7, lr}
244 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
245 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
246 ; CHECK-NEXT: vmov q5, q1
247 ; CHECK-NEXT: vmov q4, q0
248 ; CHECK-NEXT: vmov r0, r1, d11
249 ; CHECK-NEXT: vmov r2, r3, d9
250 ; CHECK-NEXT: bl __aeabi_dsub
251 ; CHECK-NEXT: vmov lr, r12, d10
252 ; CHECK-NEXT: vmov r2, r3, d8
253 ; CHECK-NEXT: vmov d9, r0, r1
254 ; CHECK-NEXT: mov r0, lr
255 ; CHECK-NEXT: mov r1, r12
256 ; CHECK-NEXT: bl __aeabi_dsub
257 ; CHECK-NEXT: vmov d8, r0, r1
258 ; CHECK-NEXT: vmov q0, q4
259 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
260 ; CHECK-NEXT: pop {r7, pc}
262 %0 = fsub nnan ninf nsz <2 x double> %src2, %src1
267 define arm_aapcs_vfpcc <16 x i8> @mul_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
268 ; CHECK-LABEL: mul_int8_t:
269 ; CHECK: @ %bb.0: @ %entry
270 ; CHECK-NEXT: vmul.i8 q0, q0, q1
273 %0 = mul <16 x i8> %src1, %src2
277 define arm_aapcs_vfpcc <8 x i16> @mul_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
278 ; CHECK-LABEL: mul_int16_t:
279 ; CHECK: @ %bb.0: @ %entry
280 ; CHECK-NEXT: vmul.i16 q0, q0, q1
283 %0 = mul <8 x i16> %src1, %src2
287 define arm_aapcs_vfpcc <4 x i32> @mul_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
288 ; CHECK-LABEL: mul_int32_t:
289 ; CHECK: @ %bb.0: @ %entry
290 ; CHECK-NEXT: vmul.i32 q0, q0, q1
293 %0 = mul nsw <4 x i32> %src1, %src2
297 define arm_aapcs_vfpcc <2 x i64> @mul_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
298 ; CHECK-LABEL: mul_int64_t:
299 ; CHECK: @ %bb.0: @ %entry
300 ; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
301 ; CHECK-NEXT: push {r4, r5, r6, r7, lr}
302 ; CHECK-NEXT: vmov r0, r1, d2
303 ; CHECK-NEXT: vmov r2, lr, d0
304 ; CHECK-NEXT: vmov r4, r5, d3
305 ; CHECK-NEXT: umull r12, r3, r2, r0
306 ; CHECK-NEXT: mla r1, r2, r1, r3
307 ; CHECK-NEXT: vmov r2, r3, d1
308 ; CHECK-NEXT: mla r0, lr, r0, r1
309 ; CHECK-NEXT: umull r6, r7, r2, r4
310 ; CHECK-NEXT: mla r2, r2, r5, r7
311 ; CHECK-NEXT: vmov q0[2], q0[0], r12, r6
312 ; CHECK-NEXT: mla r2, r3, r4, r2
313 ; CHECK-NEXT: vmov q0[3], q0[1], r0, r2
314 ; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
316 %0 = mul nsw <2 x i64> %src1, %src2
320 define arm_aapcs_vfpcc <8 x half> @mul_float16_t(<8 x half> %src1, <8 x half> %src2) {
321 ; CHECK-MVE-LABEL: mul_float16_t:
322 ; CHECK-MVE: @ %bb.0: @ %entry
323 ; CHECK-MVE-NEXT: vmovx.f16 s8, s0
324 ; CHECK-MVE-NEXT: vmovx.f16 s10, s4
325 ; CHECK-MVE-NEXT: vmul.f16 s0, s4, s0
326 ; CHECK-MVE-NEXT: vmul.f16 s8, s10, s8
327 ; CHECK-MVE-NEXT: vins.f16 s0, s8
328 ; CHECK-MVE-NEXT: vmovx.f16 s4, s1
329 ; CHECK-MVE-NEXT: vmovx.f16 s8, s5
330 ; CHECK-MVE-NEXT: vmul.f16 s1, s5, s1
331 ; CHECK-MVE-NEXT: vmul.f16 s4, s8, s4
332 ; CHECK-MVE-NEXT: vmovx.f16 s8, s6
333 ; CHECK-MVE-NEXT: vins.f16 s1, s4
334 ; CHECK-MVE-NEXT: vmovx.f16 s4, s2
335 ; CHECK-MVE-NEXT: vmul.f16 s2, s6, s2
336 ; CHECK-MVE-NEXT: vmul.f16 s4, s8, s4
337 ; CHECK-MVE-NEXT: vins.f16 s2, s4
338 ; CHECK-MVE-NEXT: vmovx.f16 s4, s3
339 ; CHECK-MVE-NEXT: vmovx.f16 s6, s7
340 ; CHECK-MVE-NEXT: vmul.f16 s3, s7, s3
341 ; CHECK-MVE-NEXT: vmul.f16 s4, s6, s4
342 ; CHECK-MVE-NEXT: vins.f16 s3, s4
343 ; CHECK-MVE-NEXT: bx lr
345 ; CHECK-MVEFP-LABEL: mul_float16_t:
346 ; CHECK-MVEFP: @ %bb.0: @ %entry
347 ; CHECK-MVEFP-NEXT: vmul.f16 q0, q1, q0
348 ; CHECK-MVEFP-NEXT: bx lr
350 %0 = fmul nnan ninf nsz <8 x half> %src2, %src1
354 define arm_aapcs_vfpcc <4 x float> @mul_float32_t(<4 x float> %src1, <4 x float> %src2) {
355 ; CHECK-MVE-LABEL: mul_float32_t:
356 ; CHECK-MVE: @ %bb.0: @ %entry
357 ; CHECK-MVE-NEXT: vmul.f32 s3, s7, s3
358 ; CHECK-MVE-NEXT: vmul.f32 s2, s6, s2
359 ; CHECK-MVE-NEXT: vmul.f32 s1, s5, s1
360 ; CHECK-MVE-NEXT: vmul.f32 s0, s4, s0
361 ; CHECK-MVE-NEXT: bx lr
363 ; CHECK-MVEFP-LABEL: mul_float32_t:
364 ; CHECK-MVEFP: @ %bb.0: @ %entry
365 ; CHECK-MVEFP-NEXT: vmul.f32 q0, q1, q0
366 ; CHECK-MVEFP-NEXT: bx lr
368 %0 = fmul nnan ninf nsz <4 x float> %src2, %src1
372 define arm_aapcs_vfpcc <2 x double> @mul_float64_t(<2 x double> %src1, <2 x double> %src2) {
373 ; CHECK-LABEL: mul_float64_t:
374 ; CHECK: @ %bb.0: @ %entry
375 ; CHECK-NEXT: .save {r7, lr}
376 ; CHECK-NEXT: push {r7, lr}
377 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
378 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
379 ; CHECK-NEXT: vmov q5, q1
380 ; CHECK-NEXT: vmov q4, q0
381 ; CHECK-NEXT: vmov r0, r1, d11
382 ; CHECK-NEXT: vmov r2, r3, d9
383 ; CHECK-NEXT: bl __aeabi_dmul
384 ; CHECK-NEXT: vmov lr, r12, d10
385 ; CHECK-NEXT: vmov r2, r3, d8
386 ; CHECK-NEXT: vmov d9, r0, r1
387 ; CHECK-NEXT: mov r0, lr
388 ; CHECK-NEXT: mov r1, r12
389 ; CHECK-NEXT: bl __aeabi_dmul
390 ; CHECK-NEXT: vmov d8, r0, r1
391 ; CHECK-NEXT: vmov q0, q4
392 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
393 ; CHECK-NEXT: pop {r7, pc}
395 %0 = fmul nnan ninf nsz <2 x double> %src2, %src1