1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
4 define half @extret1_f16_sf(<8 x half> %a, <8 x half> %b, ptr nocapture %p) {
5 ; CHECK-LABEL: extret1_f16_sf:
7 ; CHECK-NEXT: vmov d0, r0, r1
8 ; CHECK-NEXT: mov r0, sp
9 ; CHECK-NEXT: vldrw.u32 q1, [r0]
10 ; CHECK-NEXT: ldr r0, [sp, #16]
11 ; CHECK-NEXT: vadd.f16 q0, q0, q1
12 ; CHECK-NEXT: vmovx.f16 s0, s0
13 ; CHECK-NEXT: vstr.16 s0, [r0]
14 ; CHECK-NEXT: vmov r0, s0
16 %c = fadd <8 x half> %a, %b
17 %e = extractelement <8 x half> %c, i32 1
18 store half %e, ptr %p, align 2
22 define half @extret4_f16_sf(<8 x half> %a, <8 x half> %b, ptr nocapture %p) {
23 ; CHECK-LABEL: extret4_f16_sf:
25 ; CHECK-NEXT: mov r0, sp
26 ; CHECK-NEXT: vmov d1, r2, r3
27 ; CHECK-NEXT: vldrw.u32 q1, [r0]
28 ; CHECK-NEXT: ldr r0, [sp, #16]
29 ; CHECK-NEXT: vadd.f16 q0, q0, q1
30 ; CHECK-NEXT: vstr.16 s2, [r0]
31 ; CHECK-NEXT: vmov r0, s2
33 %c = fadd <8 x half> %a, %b
34 %e = extractelement <8 x half> %c, i32 4
35 store half %e, ptr %p, align 2
39 define arm_aapcs_vfpcc half @extret1_f16_hf(<8 x half> %a, <8 x half> %b, ptr nocapture %p) {
40 ; CHECK-LABEL: extret1_f16_hf:
42 ; CHECK-NEXT: vadd.f16 q0, q0, q1
43 ; CHECK-NEXT: vmovx.f16 s0, s0
44 ; CHECK-NEXT: vstr.16 s0, [r0]
46 %c = fadd <8 x half> %a, %b
47 %e = extractelement <8 x half> %c, i32 1
48 store half %e, ptr %p, align 2
52 define arm_aapcs_vfpcc half @extret4_f16_hf(<8 x half> %a, <8 x half> %b, ptr nocapture %p) {
53 ; CHECK-LABEL: extret4_f16_hf:
55 ; CHECK-NEXT: vadd.f16 q0, q0, q1
56 ; CHECK-NEXT: vmov.f32 s0, s2
57 ; CHECK-NEXT: vstr.16 s2, [r0]
59 %c = fadd <8 x half> %a, %b
60 %e = extractelement <8 x half> %c, i32 4
61 store half %e, ptr %p, align 2
65 define arm_aapcs_vfpcc <8 x half> @extret1_v8f16_hf(<8 x half> %a, <8 x half> %b, ptr nocapture %p) {
66 ; CHECK-LABEL: extret1_v8f16_hf:
68 ; CHECK-NEXT: vadd.f16 q0, q0, q1
69 ; CHECK-NEXT: vmov.u16 r1, q0[1]
70 ; CHECK-NEXT: vdup.16 q0, r1
71 ; CHECK-NEXT: strh r1, [r0]
73 %c = fadd <8 x half> %a, %b
74 %e = extractelement <8 x half> %c, i32 1
75 store half %e, ptr %p, align 2
76 %i = insertelement <8 x half> undef, half %e, i32 0
77 %s = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
81 define arm_aapcs_vfpcc <8 x half> @extret4_v8f16_hf(<8 x half> %a, <8 x half> %b, ptr nocapture %p) {
82 ; CHECK-LABEL: extret4_v8f16_hf:
84 ; CHECK-NEXT: vadd.f16 q0, q0, q1
85 ; CHECK-NEXT: vmov.u16 r1, q0[4]
86 ; CHECK-NEXT: vdup.16 q0, r1
87 ; CHECK-NEXT: strh r1, [r0]
89 %c = fadd <8 x half> %a, %b
90 %e = extractelement <8 x half> %c, i32 4
91 store half %e, ptr %p, align 2
92 %i = insertelement <8 x half> undef, half %e, i32 0
93 %s = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
98 define float @extret1_f32_sf(<4 x float> %a, <4 x float> %b, ptr nocapture %p) {
99 ; CHECK-LABEL: extret1_f32_sf:
101 ; CHECK-NEXT: vmov d0, r0, r1
102 ; CHECK-NEXT: mov r0, sp
103 ; CHECK-NEXT: vldrw.u32 q1, [r0]
104 ; CHECK-NEXT: ldr r1, [sp, #16]
105 ; CHECK-NEXT: vadd.f32 q0, q0, q1
106 ; CHECK-NEXT: vmov r0, s1
107 ; CHECK-NEXT: vstr s1, [r1]
109 %c = fadd <4 x float> %a, %b
110 %e = extractelement <4 x float> %c, i32 1
111 store float %e, ptr %p, align 4
115 define float @extret2_f32_sf(<4 x float> %a, <4 x float> %b, ptr nocapture %p) {
116 ; CHECK-LABEL: extret2_f32_sf:
118 ; CHECK-NEXT: mov r0, sp
119 ; CHECK-NEXT: vmov d1, r2, r3
120 ; CHECK-NEXT: vldrw.u32 q1, [r0]
121 ; CHECK-NEXT: ldr r1, [sp, #16]
122 ; CHECK-NEXT: vadd.f32 q0, q0, q1
123 ; CHECK-NEXT: vmov r0, s2
124 ; CHECK-NEXT: vstr s2, [r1]
126 %c = fadd <4 x float> %a, %b
127 %e = extractelement <4 x float> %c, i32 2
128 store float %e, ptr %p, align 4
132 define arm_aapcs_vfpcc float @extret1_f32_hf(<4 x float> %a, <4 x float> %b, ptr nocapture %p) {
133 ; CHECK-LABEL: extret1_f32_hf:
135 ; CHECK-NEXT: vadd.f32 q0, q0, q1
136 ; CHECK-NEXT: vmov.f32 s0, s1
137 ; CHECK-NEXT: vstr s1, [r0]
139 %c = fadd <4 x float> %a, %b
140 %e = extractelement <4 x float> %c, i32 1
141 store float %e, ptr %p, align 4
146 define arm_aapcs_vfpcc float @extret2_f32_hf(<4 x float> %a, <4 x float> %b, ptr nocapture %p) {
147 ; CHECK-LABEL: extret2_f32_hf:
149 ; CHECK-NEXT: vadd.f32 q0, q0, q1
150 ; CHECK-NEXT: vmov.f32 s0, s2
151 ; CHECK-NEXT: vstr s2, [r0]
153 %c = fadd <4 x float> %a, %b
154 %e = extractelement <4 x float> %c, i32 2
155 store float %e, ptr %p, align 4
159 define arm_aapcs_vfpcc <4 x float> @extret1_v4f32_hf(<4 x float> %a, <4 x float> %b, ptr nocapture %p) {
160 ; CHECK-LABEL: extret1_v4f32_hf:
162 ; CHECK-NEXT: vadd.f32 q1, q0, q1
163 ; CHECK-NEXT: vmov r1, s5
164 ; CHECK-NEXT: vstr s5, [r0]
165 ; CHECK-NEXT: vdup.32 q0, r1
167 %c = fadd <4 x float> %a, %b
168 %e = extractelement <4 x float> %c, i32 1
169 store float %e, ptr %p, align 4
170 %i = insertelement <4 x float> undef, float %e, i32 0
171 %s = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
175 define arm_aapcs_vfpcc <4 x float> @extret2_v4f32_hf(<4 x float> %a, <4 x float> %b, ptr nocapture %p) {
176 ; CHECK-LABEL: extret2_v4f32_hf:
178 ; CHECK-NEXT: vadd.f32 q1, q0, q1
179 ; CHECK-NEXT: vmov r1, s6
180 ; CHECK-NEXT: vstr s6, [r0]
181 ; CHECK-NEXT: vdup.32 q0, r1
183 %c = fadd <4 x float> %a, %b
184 %e = extractelement <4 x float> %c, i32 2
185 store float %e, ptr %p, align 4
186 %i = insertelement <4 x float> undef, float %e, i32 0
187 %s = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer