1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3 ; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
5 define double @pow_f64(double %a, double %b) {
6 ; CHECK-SD-LABEL: pow_f64:
7 ; CHECK-SD: // %bb.0: // %entry
10 ; CHECK-GI-LABEL: pow_f64:
11 ; CHECK-GI: // %bb.0: // %entry
12 ; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
13 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
14 ; CHECK-GI-NEXT: .cfi_offset w30, -16
15 ; CHECK-GI-NEXT: bl pow
16 ; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
19 %c = call double @llvm.pow.f64(double %a, double %b)
23 define float @pow_f32(float %a, float %b) {
24 ; CHECK-SD-LABEL: pow_f32:
25 ; CHECK-SD: // %bb.0: // %entry
26 ; CHECK-SD-NEXT: b powf
28 ; CHECK-GI-LABEL: pow_f32:
29 ; CHECK-GI: // %bb.0: // %entry
30 ; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
31 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
32 ; CHECK-GI-NEXT: .cfi_offset w30, -16
33 ; CHECK-GI-NEXT: bl powf
34 ; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
37 %c = call float @llvm.pow.f32(float %a, float %b)
41 define half @pow_f16(half %a, half %b) {
42 ; CHECK-LABEL: pow_f16:
43 ; CHECK: // %bb.0: // %entry
44 ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
45 ; CHECK-NEXT: .cfi_def_cfa_offset 16
46 ; CHECK-NEXT: .cfi_offset w30, -16
47 ; CHECK-NEXT: fcvt s0, h0
48 ; CHECK-NEXT: fcvt s1, h1
50 ; CHECK-NEXT: fcvt h0, s0
51 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
54 %c = call half @llvm.pow.f16(half %a, half %b)
58 define <2 x double> @pow_v2f64(<2 x double> %a, <2 x double> %b) {
59 ; CHECK-SD-LABEL: pow_v2f64:
60 ; CHECK-SD: // %bb.0: // %entry
61 ; CHECK-SD-NEXT: sub sp, sp, #64
62 ; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
63 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
64 ; CHECK-SD-NEXT: .cfi_offset w30, -16
65 ; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
66 ; CHECK-SD-NEXT: mov d0, v0.d[1]
67 ; CHECK-SD-NEXT: mov d1, v1.d[1]
68 ; CHECK-SD-NEXT: bl pow
69 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
70 ; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
71 ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
72 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
73 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
74 ; CHECK-SD-NEXT: bl pow
75 ; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
76 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
77 ; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
78 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
79 ; CHECK-SD-NEXT: add sp, sp, #64
82 ; CHECK-GI-LABEL: pow_v2f64:
83 ; CHECK-GI: // %bb.0: // %entry
84 ; CHECK-GI-NEXT: sub sp, sp, #48
85 ; CHECK-GI-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill
86 ; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
87 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48
88 ; CHECK-GI-NEXT: .cfi_offset w30, -16
89 ; CHECK-GI-NEXT: .cfi_offset b8, -24
90 ; CHECK-GI-NEXT: .cfi_offset b9, -32
91 ; CHECK-GI-NEXT: mov d8, v0.d[1]
92 ; CHECK-GI-NEXT: mov d9, v1.d[1]
93 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
94 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1
95 ; CHECK-GI-NEXT: bl pow
96 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
97 ; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
98 ; CHECK-GI-NEXT: fmov d1, d9
99 ; CHECK-GI-NEXT: fmov d0, d8
100 ; CHECK-GI-NEXT: bl pow
101 ; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload
102 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
103 ; CHECK-GI-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
104 ; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
105 ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0]
106 ; CHECK-GI-NEXT: mov v0.16b, v1.16b
107 ; CHECK-GI-NEXT: add sp, sp, #48
110 %c = call <2 x double> @llvm.pow.v2f64(<2 x double> %a, <2 x double> %b)
114 define <3 x double> @pow_v3f64(<3 x double> %a, <3 x double> %b) {
115 ; CHECK-SD-LABEL: pow_v3f64:
116 ; CHECK-SD: // %bb.0: // %entry
117 ; CHECK-SD-NEXT: str d12, [sp, #-48]! // 8-byte Folded Spill
118 ; CHECK-SD-NEXT: stp d11, d10, [sp, #8] // 16-byte Folded Spill
119 ; CHECK-SD-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill
120 ; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill
121 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
122 ; CHECK-SD-NEXT: .cfi_offset w30, -8
123 ; CHECK-SD-NEXT: .cfi_offset b8, -16
124 ; CHECK-SD-NEXT: .cfi_offset b9, -24
125 ; CHECK-SD-NEXT: .cfi_offset b10, -32
126 ; CHECK-SD-NEXT: .cfi_offset b11, -40
127 ; CHECK-SD-NEXT: .cfi_offset b12, -48
128 ; CHECK-SD-NEXT: fmov d11, d1
129 ; CHECK-SD-NEXT: fmov d1, d3
130 ; CHECK-SD-NEXT: fmov d8, d5
131 ; CHECK-SD-NEXT: fmov d9, d4
132 ; CHECK-SD-NEXT: fmov d10, d2
133 ; CHECK-SD-NEXT: bl pow
134 ; CHECK-SD-NEXT: fmov d12, d0
135 ; CHECK-SD-NEXT: fmov d0, d11
136 ; CHECK-SD-NEXT: fmov d1, d9
137 ; CHECK-SD-NEXT: bl pow
138 ; CHECK-SD-NEXT: fmov d9, d0
139 ; CHECK-SD-NEXT: fmov d0, d10
140 ; CHECK-SD-NEXT: fmov d1, d8
141 ; CHECK-SD-NEXT: bl pow
142 ; CHECK-SD-NEXT: fmov d1, d9
143 ; CHECK-SD-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload
144 ; CHECK-SD-NEXT: ldp d11, d10, [sp, #8] // 16-byte Folded Reload
145 ; CHECK-SD-NEXT: fmov d2, d0
146 ; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload
147 ; CHECK-SD-NEXT: fmov d0, d12
148 ; CHECK-SD-NEXT: ldr d12, [sp], #48 // 8-byte Folded Reload
151 ; CHECK-GI-LABEL: pow_v3f64:
152 ; CHECK-GI: // %bb.0: // %entry
153 ; CHECK-GI-NEXT: str d12, [sp, #-48]! // 8-byte Folded Spill
154 ; CHECK-GI-NEXT: stp d11, d10, [sp, #8] // 16-byte Folded Spill
155 ; CHECK-GI-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill
156 ; CHECK-GI-NEXT: str x30, [sp, #40] // 8-byte Folded Spill
157 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48
158 ; CHECK-GI-NEXT: .cfi_offset w30, -8
159 ; CHECK-GI-NEXT: .cfi_offset b8, -16
160 ; CHECK-GI-NEXT: .cfi_offset b9, -24
161 ; CHECK-GI-NEXT: .cfi_offset b10, -32
162 ; CHECK-GI-NEXT: .cfi_offset b11, -40
163 ; CHECK-GI-NEXT: .cfi_offset b12, -48
164 ; CHECK-GI-NEXT: fmov d8, d1
165 ; CHECK-GI-NEXT: fmov d1, d3
166 ; CHECK-GI-NEXT: fmov d9, d2
167 ; CHECK-GI-NEXT: fmov d10, d4
168 ; CHECK-GI-NEXT: fmov d11, d5
169 ; CHECK-GI-NEXT: bl pow
170 ; CHECK-GI-NEXT: fmov d12, d0
171 ; CHECK-GI-NEXT: fmov d0, d8
172 ; CHECK-GI-NEXT: fmov d1, d10
173 ; CHECK-GI-NEXT: bl pow
174 ; CHECK-GI-NEXT: fmov d8, d0
175 ; CHECK-GI-NEXT: fmov d0, d9
176 ; CHECK-GI-NEXT: fmov d1, d11
177 ; CHECK-GI-NEXT: bl pow
178 ; CHECK-GI-NEXT: fmov d1, d8
179 ; CHECK-GI-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload
180 ; CHECK-GI-NEXT: ldp d11, d10, [sp, #8] // 16-byte Folded Reload
181 ; CHECK-GI-NEXT: fmov d2, d0
182 ; CHECK-GI-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload
183 ; CHECK-GI-NEXT: fmov d0, d12
184 ; CHECK-GI-NEXT: ldr d12, [sp], #48 // 8-byte Folded Reload
187 %c = call <3 x double> @llvm.pow.v3f64(<3 x double> %a, <3 x double> %b)
191 define <4 x double> @pow_v4f64(<4 x double> %a, <4 x double> %b) {
192 ; CHECK-SD-LABEL: pow_v4f64:
193 ; CHECK-SD: // %bb.0: // %entry
194 ; CHECK-SD-NEXT: sub sp, sp, #96
195 ; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
196 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 96
197 ; CHECK-SD-NEXT: .cfi_offset w30, -16
198 ; CHECK-SD-NEXT: stp q0, q2, [sp] // 32-byte Folded Spill
199 ; CHECK-SD-NEXT: mov d0, v0.d[1]
200 ; CHECK-SD-NEXT: stp q1, q3, [sp, #48] // 32-byte Folded Spill
201 ; CHECK-SD-NEXT: mov d1, v2.d[1]
202 ; CHECK-SD-NEXT: bl pow
203 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
204 ; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
205 ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
206 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
207 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
208 ; CHECK-SD-NEXT: bl pow
209 ; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
210 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
211 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
212 ; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
213 ; CHECK-SD-NEXT: ldp q0, q1, [sp, #48] // 32-byte Folded Reload
214 ; CHECK-SD-NEXT: mov d0, v0.d[1]
215 ; CHECK-SD-NEXT: mov d1, v1.d[1]
216 ; CHECK-SD-NEXT: bl pow
217 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
218 ; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
219 ; CHECK-SD-NEXT: ldp q0, q1, [sp, #48] // 32-byte Folded Reload
220 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
221 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
222 ; CHECK-SD-NEXT: bl pow
223 ; CHECK-SD-NEXT: fmov d1, d0
224 ; CHECK-SD-NEXT: ldp q2, q0, [sp, #16] // 32-byte Folded Reload
225 ; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
226 ; CHECK-SD-NEXT: mov v1.d[1], v2.d[0]
227 ; CHECK-SD-NEXT: add sp, sp, #96
230 ; CHECK-GI-LABEL: pow_v4f64:
231 ; CHECK-GI: // %bb.0: // %entry
232 ; CHECK-GI-NEXT: sub sp, sp, #112
233 ; CHECK-GI-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
234 ; CHECK-GI-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
235 ; CHECK-GI-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
236 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 112
237 ; CHECK-GI-NEXT: .cfi_offset w30, -16
238 ; CHECK-GI-NEXT: .cfi_offset b8, -24
239 ; CHECK-GI-NEXT: .cfi_offset b9, -32
240 ; CHECK-GI-NEXT: .cfi_offset b10, -40
241 ; CHECK-GI-NEXT: .cfi_offset b11, -48
242 ; CHECK-GI-NEXT: mov v4.16b, v1.16b
243 ; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
244 ; CHECK-GI-NEXT: mov v1.16b, v2.16b
245 ; CHECK-GI-NEXT: str q3, [sp] // 16-byte Folded Spill
246 ; CHECK-GI-NEXT: mov d8, v0.d[1]
247 ; CHECK-GI-NEXT: mov d10, v2.d[1]
248 ; CHECK-GI-NEXT: mov d11, v3.d[1]
249 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
250 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1
251 ; CHECK-GI-NEXT: mov d9, v4.d[1]
252 ; CHECK-GI-NEXT: bl pow
253 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
254 ; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
255 ; CHECK-GI-NEXT: fmov d1, d10
256 ; CHECK-GI-NEXT: fmov d0, d8
257 ; CHECK-GI-NEXT: bl pow
258 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
259 ; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
260 ; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload
261 ; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
262 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1
263 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
264 ; CHECK-GI-NEXT: bl pow
265 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
266 ; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
267 ; CHECK-GI-NEXT: fmov d1, d11
268 ; CHECK-GI-NEXT: fmov d0, d9
269 ; CHECK-GI-NEXT: bl pow
270 ; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
271 ; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
272 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
273 ; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
274 ; CHECK-GI-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
275 ; CHECK-GI-NEXT: mov v2.d[1], v1.d[0]
276 ; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
277 ; CHECK-GI-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload
278 ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0]
279 ; CHECK-GI-NEXT: mov v0.16b, v2.16b
280 ; CHECK-GI-NEXT: add sp, sp, #112
283 %c = call <4 x double> @llvm.pow.v4f64(<4 x double> %a, <4 x double> %b)
287 define <2 x float> @pow_v2f32(<2 x float> %a, <2 x float> %b) {
288 ; CHECK-SD-LABEL: pow_v2f32:
289 ; CHECK-SD: // %bb.0: // %entry
290 ; CHECK-SD-NEXT: sub sp, sp, #64
291 ; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
292 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
293 ; CHECK-SD-NEXT: .cfi_offset w30, -16
294 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
295 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
296 ; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
297 ; CHECK-SD-NEXT: mov s0, v0.s[1]
298 ; CHECK-SD-NEXT: mov s1, v1.s[1]
299 ; CHECK-SD-NEXT: bl powf
300 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
301 ; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
302 ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
303 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
304 ; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1
305 ; CHECK-SD-NEXT: bl powf
306 ; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
307 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
308 ; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
309 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0]
310 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
311 ; CHECK-SD-NEXT: add sp, sp, #64
314 ; CHECK-GI-LABEL: pow_v2f32:
315 ; CHECK-GI: // %bb.0: // %entry
316 ; CHECK-GI-NEXT: sub sp, sp, #48
317 ; CHECK-GI-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill
318 ; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
319 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48
320 ; CHECK-GI-NEXT: .cfi_offset w30, -16
321 ; CHECK-GI-NEXT: .cfi_offset b8, -24
322 ; CHECK-GI-NEXT: .cfi_offset b9, -32
323 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
324 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
325 ; CHECK-GI-NEXT: mov s8, v0.s[1]
326 ; CHECK-GI-NEXT: mov s9, v1.s[1]
327 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0
328 ; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1
329 ; CHECK-GI-NEXT: bl powf
330 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
331 ; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
332 ; CHECK-GI-NEXT: fmov s1, s9
333 ; CHECK-GI-NEXT: fmov s0, s8
334 ; CHECK-GI-NEXT: bl powf
335 ; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload
336 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
337 ; CHECK-GI-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
338 ; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
339 ; CHECK-GI-NEXT: mov v1.s[1], v0.s[0]
340 ; CHECK-GI-NEXT: fmov d0, d1
341 ; CHECK-GI-NEXT: add sp, sp, #48
344 %c = call <2 x float> @llvm.pow.v2f32(<2 x float> %a, <2 x float> %b)
348 define <3 x float> @pow_v3f32(<3 x float> %a, <3 x float> %b) {
349 ; CHECK-SD-LABEL: pow_v3f32:
350 ; CHECK-SD: // %bb.0: // %entry
351 ; CHECK-SD-NEXT: sub sp, sp, #64
352 ; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
353 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
354 ; CHECK-SD-NEXT: .cfi_offset w30, -16
355 ; CHECK-SD-NEXT: stp q0, q1, [sp, #16] // 32-byte Folded Spill
356 ; CHECK-SD-NEXT: mov s0, v0.s[1]
357 ; CHECK-SD-NEXT: mov s1, v1.s[1]
358 ; CHECK-SD-NEXT: bl powf
359 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
360 ; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
361 ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
362 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
363 ; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1
364 ; CHECK-SD-NEXT: bl powf
365 ; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
366 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
367 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0]
368 ; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
369 ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
370 ; CHECK-SD-NEXT: mov s0, v0.s[2]
371 ; CHECK-SD-NEXT: mov s1, v1.s[2]
372 ; CHECK-SD-NEXT: bl powf
373 ; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
374 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
375 ; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
376 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0]
377 ; CHECK-SD-NEXT: mov v0.16b, v1.16b
378 ; CHECK-SD-NEXT: add sp, sp, #64
381 ; CHECK-GI-LABEL: pow_v3f32:
382 ; CHECK-GI: // %bb.0: // %entry
383 ; CHECK-GI-NEXT: sub sp, sp, #80
384 ; CHECK-GI-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
385 ; CHECK-GI-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
386 ; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
387 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80
388 ; CHECK-GI-NEXT: .cfi_offset w30, -16
389 ; CHECK-GI-NEXT: .cfi_offset b8, -24
390 ; CHECK-GI-NEXT: .cfi_offset b9, -32
391 ; CHECK-GI-NEXT: .cfi_offset b10, -40
392 ; CHECK-GI-NEXT: .cfi_offset b11, -48
393 ; CHECK-GI-NEXT: mov s8, v0.s[1]
394 ; CHECK-GI-NEXT: mov s9, v0.s[2]
395 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0
396 ; CHECK-GI-NEXT: mov s10, v1.s[1]
397 ; CHECK-GI-NEXT: mov s11, v1.s[2]
398 ; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1
399 ; CHECK-GI-NEXT: bl powf
400 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
401 ; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
402 ; CHECK-GI-NEXT: fmov s1, s10
403 ; CHECK-GI-NEXT: fmov s0, s8
404 ; CHECK-GI-NEXT: bl powf
405 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
406 ; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
407 ; CHECK-GI-NEXT: fmov s1, s11
408 ; CHECK-GI-NEXT: fmov s0, s9
409 ; CHECK-GI-NEXT: bl powf
410 ; CHECK-GI-NEXT: ldp q2, q1, [sp] // 32-byte Folded Reload
411 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
412 ; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
413 ; CHECK-GI-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
414 ; CHECK-GI-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
415 ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0]
416 ; CHECK-GI-NEXT: mov v1.s[2], v0.s[0]
417 ; CHECK-GI-NEXT: mov v1.s[3], v0.s[0]
418 ; CHECK-GI-NEXT: mov v0.16b, v1.16b
419 ; CHECK-GI-NEXT: add sp, sp, #80
422 %c = call <3 x float> @llvm.pow.v3f32(<3 x float> %a, <3 x float> %b)
426 define <4 x float> @pow_v4f32(<4 x float> %a, <4 x float> %b) {
427 ; CHECK-SD-LABEL: pow_v4f32:
428 ; CHECK-SD: // %bb.0: // %entry
429 ; CHECK-SD-NEXT: sub sp, sp, #64
430 ; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
431 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
432 ; CHECK-SD-NEXT: .cfi_offset w30, -16
433 ; CHECK-SD-NEXT: stp q0, q1, [sp, #16] // 32-byte Folded Spill
434 ; CHECK-SD-NEXT: mov s0, v0.s[1]
435 ; CHECK-SD-NEXT: mov s1, v1.s[1]
436 ; CHECK-SD-NEXT: bl powf
437 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
438 ; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
439 ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
440 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
441 ; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1
442 ; CHECK-SD-NEXT: bl powf
443 ; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
444 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
445 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0]
446 ; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
447 ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
448 ; CHECK-SD-NEXT: mov s0, v0.s[2]
449 ; CHECK-SD-NEXT: mov s1, v1.s[2]
450 ; CHECK-SD-NEXT: bl powf
451 ; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
452 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
453 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0]
454 ; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill
455 ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
456 ; CHECK-SD-NEXT: mov s0, v0.s[3]
457 ; CHECK-SD-NEXT: mov s1, v1.s[3]
458 ; CHECK-SD-NEXT: bl powf
459 ; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
460 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
461 ; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
462 ; CHECK-SD-NEXT: mov v1.s[3], v0.s[0]
463 ; CHECK-SD-NEXT: mov v0.16b, v1.16b
464 ; CHECK-SD-NEXT: add sp, sp, #64
467 ; CHECK-GI-LABEL: pow_v4f32:
468 ; CHECK-GI: // %bb.0: // %entry
469 ; CHECK-GI-NEXT: sub sp, sp, #112
470 ; CHECK-GI-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill
471 ; CHECK-GI-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
472 ; CHECK-GI-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
473 ; CHECK-GI-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
474 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 112
475 ; CHECK-GI-NEXT: .cfi_offset w30, -16
476 ; CHECK-GI-NEXT: .cfi_offset b8, -24
477 ; CHECK-GI-NEXT: .cfi_offset b9, -32
478 ; CHECK-GI-NEXT: .cfi_offset b10, -40
479 ; CHECK-GI-NEXT: .cfi_offset b11, -48
480 ; CHECK-GI-NEXT: .cfi_offset b12, -56
481 ; CHECK-GI-NEXT: .cfi_offset b13, -64
482 ; CHECK-GI-NEXT: mov s8, v0.s[1]
483 ; CHECK-GI-NEXT: mov s9, v0.s[2]
484 ; CHECK-GI-NEXT: mov s10, v0.s[3]
485 ; CHECK-GI-NEXT: mov s11, v1.s[1]
486 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0
487 ; CHECK-GI-NEXT: mov s12, v1.s[2]
488 ; CHECK-GI-NEXT: mov s13, v1.s[3]
489 ; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1
490 ; CHECK-GI-NEXT: bl powf
491 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
492 ; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
493 ; CHECK-GI-NEXT: fmov s1, s11
494 ; CHECK-GI-NEXT: fmov s0, s8
495 ; CHECK-GI-NEXT: bl powf
496 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
497 ; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
498 ; CHECK-GI-NEXT: fmov s1, s12
499 ; CHECK-GI-NEXT: fmov s0, s9
500 ; CHECK-GI-NEXT: bl powf
501 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
502 ; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
503 ; CHECK-GI-NEXT: fmov s1, s13
504 ; CHECK-GI-NEXT: fmov s0, s10
505 ; CHECK-GI-NEXT: bl powf
506 ; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload
507 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
508 ; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
509 ; CHECK-GI-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
510 ; CHECK-GI-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload
511 ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0]
512 ; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
513 ; CHECK-GI-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload
514 ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0]
515 ; CHECK-GI-NEXT: mov v1.s[3], v0.s[0]
516 ; CHECK-GI-NEXT: mov v0.16b, v1.16b
517 ; CHECK-GI-NEXT: add sp, sp, #112
520 %c = call <4 x float> @llvm.pow.v4f32(<4 x float> %a, <4 x float> %b)
524 define <8 x float> @pow_v8f32(<8 x float> %a, <8 x float> %b) {
525 ; CHECK-SD-LABEL: pow_v8f32:
526 ; CHECK-SD: // %bb.0: // %entry
527 ; CHECK-SD-NEXT: sub sp, sp, #96
528 ; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
529 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 96
530 ; CHECK-SD-NEXT: .cfi_offset w30, -16
531 ; CHECK-SD-NEXT: stp q0, q2, [sp] // 32-byte Folded Spill
532 ; CHECK-SD-NEXT: mov s0, v0.s[1]
533 ; CHECK-SD-NEXT: stp q1, q3, [sp, #32] // 32-byte Folded Spill
534 ; CHECK-SD-NEXT: mov s1, v2.s[1]
535 ; CHECK-SD-NEXT: bl powf
536 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
537 ; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
538 ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
539 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
540 ; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1
541 ; CHECK-SD-NEXT: bl powf
542 ; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
543 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
544 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0]
545 ; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
546 ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
547 ; CHECK-SD-NEXT: mov s0, v0.s[2]
548 ; CHECK-SD-NEXT: mov s1, v1.s[2]
549 ; CHECK-SD-NEXT: bl powf
550 ; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
551 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
552 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0]
553 ; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill
554 ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
555 ; CHECK-SD-NEXT: mov s0, v0.s[3]
556 ; CHECK-SD-NEXT: mov s1, v1.s[3]
557 ; CHECK-SD-NEXT: bl powf
558 ; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
559 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
560 ; CHECK-SD-NEXT: mov v1.s[3], v0.s[0]
561 ; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill
562 ; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload
563 ; CHECK-SD-NEXT: mov s0, v0.s[1]
564 ; CHECK-SD-NEXT: mov s1, v1.s[1]
565 ; CHECK-SD-NEXT: bl powf
566 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
567 ; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
568 ; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload
569 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
570 ; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1
571 ; CHECK-SD-NEXT: bl powf
572 ; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
573 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
574 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0]
575 ; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
576 ; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload
577 ; CHECK-SD-NEXT: mov s0, v0.s[2]
578 ; CHECK-SD-NEXT: mov s1, v1.s[2]
579 ; CHECK-SD-NEXT: bl powf
580 ; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
581 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
582 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0]
583 ; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
584 ; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload
585 ; CHECK-SD-NEXT: mov s0, v0.s[3]
586 ; CHECK-SD-NEXT: mov s1, v1.s[3]
587 ; CHECK-SD-NEXT: bl powf
588 ; CHECK-SD-NEXT: fmov s2, s0
589 ; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
590 ; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
591 ; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
592 ; CHECK-SD-NEXT: mov v1.s[3], v2.s[0]
593 ; CHECK-SD-NEXT: add sp, sp, #96
596 ; CHECK-GI-LABEL: pow_v8f32:
597 ; CHECK-GI: // %bb.0: // %entry
598 ; CHECK-GI-NEXT: sub sp, sp, #208
599 ; CHECK-GI-NEXT: stp d15, d14, [sp, #128] // 16-byte Folded Spill
600 ; CHECK-GI-NEXT: stp d13, d12, [sp, #144] // 16-byte Folded Spill
601 ; CHECK-GI-NEXT: stp d11, d10, [sp, #160] // 16-byte Folded Spill
602 ; CHECK-GI-NEXT: stp d9, d8, [sp, #176] // 16-byte Folded Spill
603 ; CHECK-GI-NEXT: str x30, [sp, #192] // 8-byte Folded Spill
604 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 208
605 ; CHECK-GI-NEXT: .cfi_offset w30, -16
606 ; CHECK-GI-NEXT: .cfi_offset b8, -24
607 ; CHECK-GI-NEXT: .cfi_offset b9, -32
608 ; CHECK-GI-NEXT: .cfi_offset b10, -40
609 ; CHECK-GI-NEXT: .cfi_offset b11, -48
610 ; CHECK-GI-NEXT: .cfi_offset b12, -56
611 ; CHECK-GI-NEXT: .cfi_offset b13, -64
612 ; CHECK-GI-NEXT: .cfi_offset b14, -72
613 ; CHECK-GI-NEXT: .cfi_offset b15, -80
614 ; CHECK-GI-NEXT: mov v4.16b, v1.16b
615 ; CHECK-GI-NEXT: mov v1.16b, v2.16b
616 ; CHECK-GI-NEXT: mov s8, v0.s[1]
617 ; CHECK-GI-NEXT: mov s9, v0.s[2]
618 ; CHECK-GI-NEXT: mov s10, v0.s[3]
619 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0
620 ; CHECK-GI-NEXT: mov s12, v3.s[1]
621 ; CHECK-GI-NEXT: mov s11, v3.s[2]
622 ; CHECK-GI-NEXT: mov s2, v4.s[1]
623 ; CHECK-GI-NEXT: stp q3, q4, [sp] // 32-byte Folded Spill
624 ; CHECK-GI-NEXT: mov s5, v4.s[3]
625 ; CHECK-GI-NEXT: mov s14, v1.s[1]
626 ; CHECK-GI-NEXT: mov s15, v1.s[2]
627 ; CHECK-GI-NEXT: mov s13, v1.s[3]
628 ; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1
629 ; CHECK-GI-NEXT: str s2, [sp, #48] // 4-byte Folded Spill
630 ; CHECK-GI-NEXT: mov s2, v4.s[2]
631 ; CHECK-GI-NEXT: str s2, [sp, #112] // 4-byte Folded Spill
632 ; CHECK-GI-NEXT: mov s2, v3.s[3]
633 ; CHECK-GI-NEXT: stp s2, s5, [sp, #200] // 8-byte Folded Spill
634 ; CHECK-GI-NEXT: bl powf
635 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
636 ; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
637 ; CHECK-GI-NEXT: fmov s1, s14
638 ; CHECK-GI-NEXT: fmov s0, s8
639 ; CHECK-GI-NEXT: bl powf
640 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
641 ; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
642 ; CHECK-GI-NEXT: fmov s1, s15
643 ; CHECK-GI-NEXT: fmov s0, s9
644 ; CHECK-GI-NEXT: bl powf
645 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
646 ; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
647 ; CHECK-GI-NEXT: fmov s1, s13
648 ; CHECK-GI-NEXT: fmov s0, s10
649 ; CHECK-GI-NEXT: bl powf
650 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
651 ; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
652 ; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
653 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0
654 ; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1
655 ; CHECK-GI-NEXT: bl powf
656 ; CHECK-GI-NEXT: fmov s1, s12
657 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
658 ; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
659 ; CHECK-GI-NEXT: ldr s0, [sp, #48] // 4-byte Folded Reload
660 ; CHECK-GI-NEXT: bl powf
661 ; CHECK-GI-NEXT: fmov s1, s11
662 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
663 ; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
664 ; CHECK-GI-NEXT: ldr s0, [sp, #112] // 4-byte Folded Reload
665 ; CHECK-GI-NEXT: bl powf
666 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
667 ; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Folded Spill
668 ; CHECK-GI-NEXT: ldp s1, s0, [sp, #200] // 8-byte Folded Reload
669 ; CHECK-GI-NEXT: bl powf
670 ; CHECK-GI-NEXT: ldp q3, q2, [sp, #16] // 32-byte Folded Reload
671 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
672 ; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload
673 ; CHECK-GI-NEXT: ldp d9, d8, [sp, #176] // 16-byte Folded Reload
674 ; CHECK-GI-NEXT: ldr x30, [sp, #192] // 8-byte Folded Reload
675 ; CHECK-GI-NEXT: ldp d11, d10, [sp, #160] // 16-byte Folded Reload
676 ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0]
677 ; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
678 ; CHECK-GI-NEXT: ldp d13, d12, [sp, #144] // 16-byte Folded Reload
679 ; CHECK-GI-NEXT: mov v3.s[1], v2.s[0]
680 ; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload
681 ; CHECK-GI-NEXT: ldp d15, d14, [sp, #128] // 16-byte Folded Reload
682 ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0]
683 ; CHECK-GI-NEXT: ldr q2, [sp, #112] // 16-byte Folded Reload
684 ; CHECK-GI-NEXT: mov v3.s[2], v2.s[0]
685 ; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload
686 ; CHECK-GI-NEXT: mov v1.s[3], v2.s[0]
687 ; CHECK-GI-NEXT: mov v3.s[3], v0.s[0]
688 ; CHECK-GI-NEXT: mov v2.16b, v1.16b
689 ; CHECK-GI-NEXT: mov v1.16b, v3.16b
690 ; CHECK-GI-NEXT: mov v0.16b, v2.16b
691 ; CHECK-GI-NEXT: add sp, sp, #208
694 %c = call <8 x float> @llvm.pow.v8f32(<8 x float> %a, <8 x float> %b)
698 define <7 x half> @pow_v7f16(<7 x half> %a, <7 x half> %b) {
699 ; CHECK-SD-LABEL: pow_v7f16:
700 ; CHECK-SD: // %bb.0: // %entry
701 ; CHECK-SD-NEXT: sub sp, sp, #64
702 ; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
703 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
704 ; CHECK-SD-NEXT: .cfi_offset w30, -16
705 ; CHECK-SD-NEXT: mov h3, v0.h[1]
706 ; CHECK-SD-NEXT: mov h2, v1.h[1]
707 ; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
708 ; CHECK-SD-NEXT: fcvt s0, h3
709 ; CHECK-SD-NEXT: fcvt s1, h2
710 ; CHECK-SD-NEXT: bl powf
711 ; CHECK-SD-NEXT: fcvt h0, s0
712 ; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
713 ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
714 ; CHECK-SD-NEXT: fcvt s0, h0
715 ; CHECK-SD-NEXT: fcvt s1, h1
716 ; CHECK-SD-NEXT: bl powf
717 ; CHECK-SD-NEXT: fcvt h0, s0
718 ; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
719 ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0]
720 ; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
721 ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
722 ; CHECK-SD-NEXT: mov h0, v0.h[2]
723 ; CHECK-SD-NEXT: mov h1, v1.h[2]
724 ; CHECK-SD-NEXT: fcvt s0, h0
725 ; CHECK-SD-NEXT: fcvt s1, h1
726 ; CHECK-SD-NEXT: bl powf
727 ; CHECK-SD-NEXT: fcvt h0, s0
728 ; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
729 ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0]
730 ; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
731 ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
732 ; CHECK-SD-NEXT: mov h0, v0.h[3]
733 ; CHECK-SD-NEXT: mov h1, v1.h[3]
734 ; CHECK-SD-NEXT: fcvt s0, h0
735 ; CHECK-SD-NEXT: fcvt s1, h1
736 ; CHECK-SD-NEXT: bl powf
737 ; CHECK-SD-NEXT: fcvt h0, s0
738 ; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
739 ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0]
740 ; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
741 ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
742 ; CHECK-SD-NEXT: mov h0, v0.h[4]
743 ; CHECK-SD-NEXT: mov h1, v1.h[4]
744 ; CHECK-SD-NEXT: fcvt s0, h0
745 ; CHECK-SD-NEXT: fcvt s1, h1
746 ; CHECK-SD-NEXT: bl powf
747 ; CHECK-SD-NEXT: fcvt h0, s0
748 ; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
749 ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0]
750 ; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
751 ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
752 ; CHECK-SD-NEXT: mov h0, v0.h[5]
753 ; CHECK-SD-NEXT: mov h1, v1.h[5]
754 ; CHECK-SD-NEXT: fcvt s0, h0
755 ; CHECK-SD-NEXT: fcvt s1, h1
756 ; CHECK-SD-NEXT: bl powf
757 ; CHECK-SD-NEXT: fcvt h0, s0
758 ; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
759 ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0]
760 ; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
761 ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
762 ; CHECK-SD-NEXT: mov h0, v0.h[6]
763 ; CHECK-SD-NEXT: mov h1, v1.h[6]
764 ; CHECK-SD-NEXT: fcvt s0, h0
765 ; CHECK-SD-NEXT: fcvt s1, h1
766 ; CHECK-SD-NEXT: bl powf
767 ; CHECK-SD-NEXT: fcvt h0, s0
768 ; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
769 ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0]
770 ; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
771 ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
772 ; CHECK-SD-NEXT: mov h0, v0.h[7]
773 ; CHECK-SD-NEXT: mov h1, v1.h[7]
774 ; CHECK-SD-NEXT: fcvt s0, h0
775 ; CHECK-SD-NEXT: fcvt s1, h1
776 ; CHECK-SD-NEXT: bl powf
777 ; CHECK-SD-NEXT: fcvt h1, s0
778 ; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
779 ; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
780 ; CHECK-SD-NEXT: mov v0.h[7], v1.h[0]
781 ; CHECK-SD-NEXT: add sp, sp, #64
784 ; CHECK-GI-LABEL: pow_v7f16:
785 ; CHECK-GI: // %bb.0: // %entry
786 ; CHECK-GI-NEXT: sub sp, sp, #176
787 ; CHECK-GI-NEXT: stp d15, d14, [sp, #96] // 16-byte Folded Spill
788 ; CHECK-GI-NEXT: stp d13, d12, [sp, #112] // 16-byte Folded Spill
789 ; CHECK-GI-NEXT: stp d11, d10, [sp, #128] // 16-byte Folded Spill
790 ; CHECK-GI-NEXT: stp d9, d8, [sp, #144] // 16-byte Folded Spill
791 ; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Folded Spill
792 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 176
793 ; CHECK-GI-NEXT: .cfi_offset w30, -16
794 ; CHECK-GI-NEXT: .cfi_offset b8, -24
795 ; CHECK-GI-NEXT: .cfi_offset b9, -32
796 ; CHECK-GI-NEXT: .cfi_offset b10, -40
797 ; CHECK-GI-NEXT: .cfi_offset b11, -48
798 ; CHECK-GI-NEXT: .cfi_offset b12, -56
799 ; CHECK-GI-NEXT: .cfi_offset b13, -64
800 ; CHECK-GI-NEXT: .cfi_offset b14, -72
801 ; CHECK-GI-NEXT: .cfi_offset b15, -80
802 ; CHECK-GI-NEXT: mov h2, v0.h[5]
803 ; CHECK-GI-NEXT: mov h9, v0.h[1]
804 ; CHECK-GI-NEXT: mov h10, v0.h[2]
805 ; CHECK-GI-NEXT: mov h11, v0.h[3]
806 ; CHECK-GI-NEXT: mov h12, v0.h[4]
807 ; CHECK-GI-NEXT: mov h14, v1.h[1]
808 ; CHECK-GI-NEXT: mov h15, v1.h[2]
809 ; CHECK-GI-NEXT: mov h8, v1.h[3]
810 ; CHECK-GI-NEXT: mov h13, v1.h[4]
811 ; CHECK-GI-NEXT: str h2, [sp, #48] // 2-byte Folded Spill
812 ; CHECK-GI-NEXT: mov h2, v0.h[6]
813 ; CHECK-GI-NEXT: fcvt s0, h0
814 ; CHECK-GI-NEXT: str h2, [sp, #80] // 2-byte Folded Spill
815 ; CHECK-GI-NEXT: mov h2, v1.h[5]
816 ; CHECK-GI-NEXT: str h2, [sp, #172] // 2-byte Folded Spill
817 ; CHECK-GI-NEXT: mov h2, v1.h[6]
818 ; CHECK-GI-NEXT: fcvt s1, h1
819 ; CHECK-GI-NEXT: str h2, [sp, #174] // 2-byte Folded Spill
820 ; CHECK-GI-NEXT: bl powf
821 ; CHECK-GI-NEXT: fcvt s2, h9
822 ; CHECK-GI-NEXT: fcvt h0, s0
823 ; CHECK-GI-NEXT: fcvt s1, h14
824 ; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
825 ; CHECK-GI-NEXT: fmov s0, s2
826 ; CHECK-GI-NEXT: bl powf
827 ; CHECK-GI-NEXT: fcvt s2, h10
828 ; CHECK-GI-NEXT: fcvt h0, s0
829 ; CHECK-GI-NEXT: fcvt s1, h15
830 ; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
831 ; CHECK-GI-NEXT: fmov s0, s2
832 ; CHECK-GI-NEXT: bl powf
833 ; CHECK-GI-NEXT: fcvt s2, h11
834 ; CHECK-GI-NEXT: fcvt h0, s0
835 ; CHECK-GI-NEXT: fcvt s1, h8
836 ; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
837 ; CHECK-GI-NEXT: fmov s0, s2
838 ; CHECK-GI-NEXT: bl powf
839 ; CHECK-GI-NEXT: fcvt s2, h12
840 ; CHECK-GI-NEXT: fcvt h0, s0
841 ; CHECK-GI-NEXT: fcvt s1, h13
842 ; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
843 ; CHECK-GI-NEXT: fmov s0, s2
844 ; CHECK-GI-NEXT: bl powf
845 ; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Folded Reload
846 ; CHECK-GI-NEXT: fcvt h0, s0
847 ; CHECK-GI-NEXT: fcvt s2, h1
848 ; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
849 ; CHECK-GI-NEXT: ldr h0, [sp, #172] // 2-byte Folded Reload
850 ; CHECK-GI-NEXT: fcvt s1, h0
851 ; CHECK-GI-NEXT: fmov s0, s2
852 ; CHECK-GI-NEXT: bl powf
853 ; CHECK-GI-NEXT: ldr h1, [sp, #80] // 2-byte Folded Reload
854 ; CHECK-GI-NEXT: fcvt h0, s0
855 ; CHECK-GI-NEXT: fcvt s2, h1
856 ; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
857 ; CHECK-GI-NEXT: ldr h0, [sp, #174] // 2-byte Folded Reload
858 ; CHECK-GI-NEXT: fcvt s1, h0
859 ; CHECK-GI-NEXT: fmov s0, s2
860 ; CHECK-GI-NEXT: bl powf
861 ; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
862 ; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
863 ; CHECK-GI-NEXT: fcvt h0, s0
864 ; CHECK-GI-NEXT: ldp d9, d8, [sp, #144] // 16-byte Folded Reload
865 ; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Folded Reload
866 ; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
867 ; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
868 ; CHECK-GI-NEXT: ldp d11, d10, [sp, #128] // 16-byte Folded Reload
869 ; CHECK-GI-NEXT: ldp d13, d12, [sp, #112] // 16-byte Folded Reload
870 ; CHECK-GI-NEXT: ldp d15, d14, [sp, #96] // 16-byte Folded Reload
871 ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
872 ; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
873 ; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
874 ; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
875 ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
876 ; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload
877 ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
878 ; CHECK-GI-NEXT: mov v1.h[6], v0.h[0]
879 ; CHECK-GI-NEXT: mov v1.h[7], v0.h[0]
880 ; CHECK-GI-NEXT: mov v0.16b, v1.16b
881 ; CHECK-GI-NEXT: add sp, sp, #176
884 %c = call <7 x half> @llvm.pow.v7f16(<7 x half> %a, <7 x half> %b)
888 define <4 x half> @pow_v4f16(<4 x half> %a, <4 x half> %b) {
889 ; CHECK-SD-LABEL: pow_v4f16:
890 ; CHECK-SD: // %bb.0: // %entry
891 ; CHECK-SD-NEXT: sub sp, sp, #64
892 ; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
893 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
894 ; CHECK-SD-NEXT: .cfi_offset w30, -16
895 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
896 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
897 ; CHECK-SD-NEXT: mov h3, v0.h[1]
898 ; CHECK-SD-NEXT: mov h2, v1.h[1]
899 ; CHECK-SD-NEXT: stp q0, q1, [sp, #16] // 32-byte Folded Spill
900 ; CHECK-SD-NEXT: fcvt s0, h3
901 ; CHECK-SD-NEXT: fcvt s1, h2
902 ; CHECK-SD-NEXT: bl powf
903 ; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
904 ; CHECK-SD-NEXT: fcvt h0, s0
905 ; CHECK-SD-NEXT: fcvt s2, h1
906 ; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
907 ; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
908 ; CHECK-SD-NEXT: fcvt s1, h0
909 ; CHECK-SD-NEXT: fmov s0, s2
910 ; CHECK-SD-NEXT: bl powf
911 ; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
912 ; CHECK-SD-NEXT: fcvt h3, s0
913 ; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
914 ; CHECK-SD-NEXT: mov h1, v1.h[2]
915 ; CHECK-SD-NEXT: mov h2, v0.h[2]
916 ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
917 ; CHECK-SD-NEXT: mov v3.h[1], v0.h[0]
918 ; CHECK-SD-NEXT: fcvt s0, h1
919 ; CHECK-SD-NEXT: fcvt s1, h2
920 ; CHECK-SD-NEXT: str q3, [sp] // 16-byte Folded Spill
921 ; CHECK-SD-NEXT: bl powf
922 ; CHECK-SD-NEXT: ldp q1, q2, [sp, #16] // 32-byte Folded Reload
923 ; CHECK-SD-NEXT: fcvt h0, s0
924 ; CHECK-SD-NEXT: ldr q3, [sp] // 16-byte Folded Reload
925 ; CHECK-SD-NEXT: mov h1, v1.h[3]
926 ; CHECK-SD-NEXT: mov h2, v2.h[3]
927 ; CHECK-SD-NEXT: mov v3.h[2], v0.h[0]
928 ; CHECK-SD-NEXT: fcvt s0, h1
929 ; CHECK-SD-NEXT: fcvt s1, h2
930 ; CHECK-SD-NEXT: str q3, [sp] // 16-byte Folded Spill
931 ; CHECK-SD-NEXT: bl powf
932 ; CHECK-SD-NEXT: fcvt h1, s0
933 ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
934 ; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
935 ; CHECK-SD-NEXT: mov v0.h[3], v1.h[0]
936 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
937 ; CHECK-SD-NEXT: add sp, sp, #64
940 ; CHECK-GI-LABEL: pow_v4f16:
941 ; CHECK-GI: // %bb.0: // %entry
942 ; CHECK-GI-NEXT: sub sp, sp, #112
943 ; CHECK-GI-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill
944 ; CHECK-GI-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
945 ; CHECK-GI-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
946 ; CHECK-GI-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
947 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 112
948 ; CHECK-GI-NEXT: .cfi_offset w30, -16
949 ; CHECK-GI-NEXT: .cfi_offset b8, -24
950 ; CHECK-GI-NEXT: .cfi_offset b9, -32
951 ; CHECK-GI-NEXT: .cfi_offset b10, -40
952 ; CHECK-GI-NEXT: .cfi_offset b11, -48
953 ; CHECK-GI-NEXT: .cfi_offset b12, -56
954 ; CHECK-GI-NEXT: .cfi_offset b13, -64
955 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
956 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
957 ; CHECK-GI-NEXT: mov h8, v0.h[1]
958 ; CHECK-GI-NEXT: mov h9, v0.h[2]
959 ; CHECK-GI-NEXT: mov h10, v0.h[3]
960 ; CHECK-GI-NEXT: mov h11, v1.h[1]
961 ; CHECK-GI-NEXT: mov h12, v1.h[2]
962 ; CHECK-GI-NEXT: mov h13, v1.h[3]
963 ; CHECK-GI-NEXT: fcvt s0, h0
964 ; CHECK-GI-NEXT: fcvt s1, h1
965 ; CHECK-GI-NEXT: bl powf
966 ; CHECK-GI-NEXT: fcvt s2, h8
967 ; CHECK-GI-NEXT: fcvt h0, s0
968 ; CHECK-GI-NEXT: fcvt s1, h11
969 ; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
970 ; CHECK-GI-NEXT: fmov s0, s2
971 ; CHECK-GI-NEXT: bl powf
972 ; CHECK-GI-NEXT: fcvt s2, h9
973 ; CHECK-GI-NEXT: fcvt h0, s0
974 ; CHECK-GI-NEXT: fcvt s1, h12
975 ; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
976 ; CHECK-GI-NEXT: fmov s0, s2
977 ; CHECK-GI-NEXT: bl powf
978 ; CHECK-GI-NEXT: fcvt s2, h10
979 ; CHECK-GI-NEXT: fcvt h0, s0
980 ; CHECK-GI-NEXT: fcvt s1, h13
981 ; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
982 ; CHECK-GI-NEXT: fmov s0, s2
983 ; CHECK-GI-NEXT: bl powf
984 ; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload
985 ; CHECK-GI-NEXT: fcvt h0, s0
986 ; CHECK-GI-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
987 ; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
988 ; CHECK-GI-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload
989 ; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
990 ; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
991 ; CHECK-GI-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload
992 ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
993 ; CHECK-GI-NEXT: mov v1.h[3], v0.h[0]
994 ; CHECK-GI-NEXT: mov v0.16b, v1.16b
995 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
996 ; CHECK-GI-NEXT: add sp, sp, #112
999 %c = call <4 x half> @llvm.pow.v4f16(<4 x half> %a, <4 x half> %b)
1003 define <8 x half> @pow_v8f16(<8 x half> %a, <8 x half> %b) {
1004 ; CHECK-SD-LABEL: pow_v8f16:
1005 ; CHECK-SD: // %bb.0: // %entry
1006 ; CHECK-SD-NEXT: sub sp, sp, #64
1007 ; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
1008 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
1009 ; CHECK-SD-NEXT: .cfi_offset w30, -16
1010 ; CHECK-SD-NEXT: mov h3, v0.h[1]
1011 ; CHECK-SD-NEXT: mov h2, v1.h[1]
1012 ; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
1013 ; CHECK-SD-NEXT: fcvt s0, h3
1014 ; CHECK-SD-NEXT: fcvt s1, h2
1015 ; CHECK-SD-NEXT: bl powf
1016 ; CHECK-SD-NEXT: fcvt h0, s0
1017 ; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
1018 ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
1019 ; CHECK-SD-NEXT: fcvt s0, h0
1020 ; CHECK-SD-NEXT: fcvt s1, h1
1021 ; CHECK-SD-NEXT: bl powf
1022 ; CHECK-SD-NEXT: fcvt h0, s0
1023 ; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
1024 ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0]
1025 ; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
1026 ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
1027 ; CHECK-SD-NEXT: mov h0, v0.h[2]
1028 ; CHECK-SD-NEXT: mov h1, v1.h[2]
1029 ; CHECK-SD-NEXT: fcvt s0, h0
1030 ; CHECK-SD-NEXT: fcvt s1, h1
1031 ; CHECK-SD-NEXT: bl powf
1032 ; CHECK-SD-NEXT: fcvt h0, s0
1033 ; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
1034 ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0]
1035 ; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
1036 ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
1037 ; CHECK-SD-NEXT: mov h0, v0.h[3]
1038 ; CHECK-SD-NEXT: mov h1, v1.h[3]
1039 ; CHECK-SD-NEXT: fcvt s0, h0
1040 ; CHECK-SD-NEXT: fcvt s1, h1
1041 ; CHECK-SD-NEXT: bl powf
1042 ; CHECK-SD-NEXT: fcvt h0, s0
1043 ; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
1044 ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0]
1045 ; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
1046 ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
1047 ; CHECK-SD-NEXT: mov h0, v0.h[4]
1048 ; CHECK-SD-NEXT: mov h1, v1.h[4]
1049 ; CHECK-SD-NEXT: fcvt s0, h0
1050 ; CHECK-SD-NEXT: fcvt s1, h1
1051 ; CHECK-SD-NEXT: bl powf
1052 ; CHECK-SD-NEXT: fcvt h0, s0
1053 ; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
1054 ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0]
1055 ; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
1056 ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
1057 ; CHECK-SD-NEXT: mov h0, v0.h[5]
1058 ; CHECK-SD-NEXT: mov h1, v1.h[5]
1059 ; CHECK-SD-NEXT: fcvt s0, h0
1060 ; CHECK-SD-NEXT: fcvt s1, h1
1061 ; CHECK-SD-NEXT: bl powf
1062 ; CHECK-SD-NEXT: fcvt h0, s0
1063 ; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
1064 ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0]
1065 ; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
1066 ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
1067 ; CHECK-SD-NEXT: mov h0, v0.h[6]
1068 ; CHECK-SD-NEXT: mov h1, v1.h[6]
1069 ; CHECK-SD-NEXT: fcvt s0, h0
1070 ; CHECK-SD-NEXT: fcvt s1, h1
1071 ; CHECK-SD-NEXT: bl powf
1072 ; CHECK-SD-NEXT: fcvt h0, s0
1073 ; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
1074 ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0]
1075 ; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
1076 ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
1077 ; CHECK-SD-NEXT: mov h0, v0.h[7]
1078 ; CHECK-SD-NEXT: mov h1, v1.h[7]
1079 ; CHECK-SD-NEXT: fcvt s0, h0
1080 ; CHECK-SD-NEXT: fcvt s1, h1
1081 ; CHECK-SD-NEXT: bl powf
1082 ; CHECK-SD-NEXT: fcvt h1, s0
1083 ; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
1084 ; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
1085 ; CHECK-SD-NEXT: mov v0.h[7], v1.h[0]
1086 ; CHECK-SD-NEXT: add sp, sp, #64
1087 ; CHECK-SD-NEXT: ret
1089 ; CHECK-GI-LABEL: pow_v8f16:
1090 ; CHECK-GI: // %bb.0: // %entry
1091 ; CHECK-GI-NEXT: sub sp, sp, #192
1092 ; CHECK-GI-NEXT: stp d15, d14, [sp, #112] // 16-byte Folded Spill
1093 ; CHECK-GI-NEXT: stp d13, d12, [sp, #128] // 16-byte Folded Spill
1094 ; CHECK-GI-NEXT: stp d11, d10, [sp, #144] // 16-byte Folded Spill
1095 ; CHECK-GI-NEXT: stp d9, d8, [sp, #160] // 16-byte Folded Spill
1096 ; CHECK-GI-NEXT: str x30, [sp, #176] // 8-byte Folded Spill
1097 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 192
1098 ; CHECK-GI-NEXT: .cfi_offset w30, -16
1099 ; CHECK-GI-NEXT: .cfi_offset b8, -24
1100 ; CHECK-GI-NEXT: .cfi_offset b9, -32
1101 ; CHECK-GI-NEXT: .cfi_offset b10, -40
1102 ; CHECK-GI-NEXT: .cfi_offset b11, -48
1103 ; CHECK-GI-NEXT: .cfi_offset b12, -56
1104 ; CHECK-GI-NEXT: .cfi_offset b13, -64
1105 ; CHECK-GI-NEXT: .cfi_offset b14, -72
1106 ; CHECK-GI-NEXT: .cfi_offset b15, -80
1107 ; CHECK-GI-NEXT: mov h2, v0.h[5]
1108 ; CHECK-GI-NEXT: mov h11, v0.h[1]
1109 ; CHECK-GI-NEXT: mov h12, v0.h[2]
1110 ; CHECK-GI-NEXT: mov h13, v0.h[3]
1111 ; CHECK-GI-NEXT: mov h14, v0.h[4]
1112 ; CHECK-GI-NEXT: mov h8, v1.h[1]
1113 ; CHECK-GI-NEXT: mov h9, v1.h[2]
1114 ; CHECK-GI-NEXT: mov h10, v1.h[3]
1115 ; CHECK-GI-NEXT: mov h15, v1.h[4]
1116 ; CHECK-GI-NEXT: str h2, [sp, #48] // 2-byte Folded Spill
1117 ; CHECK-GI-NEXT: mov h2, v0.h[6]
1118 ; CHECK-GI-NEXT: str h2, [sp, #64] // 2-byte Folded Spill
1119 ; CHECK-GI-NEXT: mov h2, v0.h[7]
1120 ; CHECK-GI-NEXT: fcvt s0, h0
1121 ; CHECK-GI-NEXT: str h2, [sp, #96] // 2-byte Folded Spill
1122 ; CHECK-GI-NEXT: mov h2, v1.h[5]
1123 ; CHECK-GI-NEXT: str h2, [sp, #186] // 2-byte Folded Spill
1124 ; CHECK-GI-NEXT: mov h2, v1.h[6]
1125 ; CHECK-GI-NEXT: str h2, [sp, #188] // 2-byte Folded Spill
1126 ; CHECK-GI-NEXT: mov h2, v1.h[7]
1127 ; CHECK-GI-NEXT: fcvt s1, h1
1128 ; CHECK-GI-NEXT: str h2, [sp, #190] // 2-byte Folded Spill
1129 ; CHECK-GI-NEXT: bl powf
1130 ; CHECK-GI-NEXT: fcvt s2, h11
1131 ; CHECK-GI-NEXT: fcvt h0, s0
1132 ; CHECK-GI-NEXT: fcvt s1, h8
1133 ; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
1134 ; CHECK-GI-NEXT: fmov s0, s2
1135 ; CHECK-GI-NEXT: bl powf
1136 ; CHECK-GI-NEXT: fcvt s2, h12
1137 ; CHECK-GI-NEXT: fcvt h0, s0
1138 ; CHECK-GI-NEXT: fcvt s1, h9
1139 ; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
1140 ; CHECK-GI-NEXT: fmov s0, s2
1141 ; CHECK-GI-NEXT: bl powf
1142 ; CHECK-GI-NEXT: fcvt s2, h13
1143 ; CHECK-GI-NEXT: fcvt h0, s0
1144 ; CHECK-GI-NEXT: fcvt s1, h10
1145 ; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
1146 ; CHECK-GI-NEXT: fmov s0, s2
1147 ; CHECK-GI-NEXT: bl powf
1148 ; CHECK-GI-NEXT: fcvt s2, h14
1149 ; CHECK-GI-NEXT: fcvt h0, s0
1150 ; CHECK-GI-NEXT: fcvt s1, h15
1151 ; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
1152 ; CHECK-GI-NEXT: fmov s0, s2
1153 ; CHECK-GI-NEXT: bl powf
1154 ; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Folded Reload
1155 ; CHECK-GI-NEXT: fcvt h0, s0
1156 ; CHECK-GI-NEXT: fcvt s2, h1
1157 ; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
1158 ; CHECK-GI-NEXT: ldr h0, [sp, #186] // 2-byte Folded Reload
1159 ; CHECK-GI-NEXT: fcvt s1, h0
1160 ; CHECK-GI-NEXT: fmov s0, s2
1161 ; CHECK-GI-NEXT: bl powf
1162 ; CHECK-GI-NEXT: ldr h1, [sp, #64] // 2-byte Folded Reload
1163 ; CHECK-GI-NEXT: fcvt h0, s0
1164 ; CHECK-GI-NEXT: fcvt s2, h1
1165 ; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
1166 ; CHECK-GI-NEXT: ldr h0, [sp, #188] // 2-byte Folded Reload
1167 ; CHECK-GI-NEXT: fcvt s1, h0
1168 ; CHECK-GI-NEXT: fmov s0, s2
1169 ; CHECK-GI-NEXT: bl powf
1170 ; CHECK-GI-NEXT: ldr h1, [sp, #96] // 2-byte Folded Reload
1171 ; CHECK-GI-NEXT: fcvt h0, s0
1172 ; CHECK-GI-NEXT: fcvt s2, h1
1173 ; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
1174 ; CHECK-GI-NEXT: ldr h0, [sp, #190] // 2-byte Folded Reload
1175 ; CHECK-GI-NEXT: fcvt s1, h0
1176 ; CHECK-GI-NEXT: fmov s0, s2
1177 ; CHECK-GI-NEXT: bl powf
1178 ; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload
1179 ; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
1180 ; CHECK-GI-NEXT: fcvt h0, s0
1181 ; CHECK-GI-NEXT: ldp d9, d8, [sp, #160] // 16-byte Folded Reload
1182 ; CHECK-GI-NEXT: ldr x30, [sp, #176] // 8-byte Folded Reload
1183 ; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
1184 ; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
1185 ; CHECK-GI-NEXT: ldp d11, d10, [sp, #144] // 16-byte Folded Reload
1186 ; CHECK-GI-NEXT: ldp d13, d12, [sp, #128] // 16-byte Folded Reload
1187 ; CHECK-GI-NEXT: ldp d15, d14, [sp, #112] // 16-byte Folded Reload
1188 ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
1189 ; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
1190 ; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
1191 ; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
1192 ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
1193 ; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload
1194 ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
1195 ; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Folded Reload
1196 ; CHECK-GI-NEXT: mov v1.h[6], v2.h[0]
1197 ; CHECK-GI-NEXT: mov v1.h[7], v0.h[0]
1198 ; CHECK-GI-NEXT: mov v0.16b, v1.16b
1199 ; CHECK-GI-NEXT: add sp, sp, #192
1200 ; CHECK-GI-NEXT: ret
1202 %c = call <8 x half> @llvm.pow.v8f16(<8 x half> %a, <8 x half> %b)
1206 define <16 x half> @pow_v16f16(<16 x half> %a, <16 x half> %b) {
1207 ; CHECK-SD-LABEL: pow_v16f16:
1208 ; CHECK-SD: // %bb.0: // %entry
1209 ; CHECK-SD-NEXT: sub sp, sp, #96
1210 ; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
1211 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 96
1212 ; CHECK-SD-NEXT: .cfi_offset w30, -16
1213 ; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
1214 ; CHECK-SD-NEXT: mov h1, v2.h[1]
1215 ; CHECK-SD-NEXT: stp q3, q2, [sp, #32] // 32-byte Folded Spill
1216 ; CHECK-SD-NEXT: mov h3, v0.h[1]
1217 ; CHECK-SD-NEXT: fcvt s1, h1
1218 ; CHECK-SD-NEXT: fcvt s0, h3
1219 ; CHECK-SD-NEXT: bl powf
1220 ; CHECK-SD-NEXT: fcvt h0, s0
1221 ; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
1222 ; CHECK-SD-NEXT: fcvt s1, h1
1223 ; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
1224 ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
1225 ; CHECK-SD-NEXT: fcvt s0, h0
1226 ; CHECK-SD-NEXT: bl powf
1227 ; CHECK-SD-NEXT: fcvt h0, s0
1228 ; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
1229 ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0]
1230 ; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
1231 ; CHECK-SD-NEXT: mov h1, v1.h[2]
1232 ; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
1233 ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
1234 ; CHECK-SD-NEXT: mov h0, v0.h[2]
1235 ; CHECK-SD-NEXT: fcvt s1, h1
1236 ; CHECK-SD-NEXT: fcvt s0, h0
1237 ; CHECK-SD-NEXT: bl powf
1238 ; CHECK-SD-NEXT: fcvt h0, s0
1239 ; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
1240 ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0]
1241 ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
1242 ; CHECK-SD-NEXT: mov h0, v0.h[3]
1243 ; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill
1244 ; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
1245 ; CHECK-SD-NEXT: mov h1, v1.h[3]
1246 ; CHECK-SD-NEXT: fcvt s0, h0
1247 ; CHECK-SD-NEXT: fcvt s1, h1
1248 ; CHECK-SD-NEXT: bl powf
1249 ; CHECK-SD-NEXT: fcvt h0, s0
1250 ; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
1251 ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0]
1252 ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
1253 ; CHECK-SD-NEXT: mov h0, v0.h[4]
1254 ; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill
1255 ; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
1256 ; CHECK-SD-NEXT: mov h1, v1.h[4]
1257 ; CHECK-SD-NEXT: fcvt s0, h0
1258 ; CHECK-SD-NEXT: fcvt s1, h1
1259 ; CHECK-SD-NEXT: bl powf
1260 ; CHECK-SD-NEXT: fcvt h0, s0
1261 ; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
1262 ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0]
1263 ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
1264 ; CHECK-SD-NEXT: mov h0, v0.h[5]
1265 ; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill
1266 ; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
1267 ; CHECK-SD-NEXT: mov h1, v1.h[5]
1268 ; CHECK-SD-NEXT: fcvt s0, h0
1269 ; CHECK-SD-NEXT: fcvt s1, h1
1270 ; CHECK-SD-NEXT: bl powf
1271 ; CHECK-SD-NEXT: fcvt h0, s0
1272 ; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
1273 ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0]
1274 ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
1275 ; CHECK-SD-NEXT: mov h0, v0.h[6]
1276 ; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill
1277 ; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
1278 ; CHECK-SD-NEXT: mov h1, v1.h[6]
1279 ; CHECK-SD-NEXT: fcvt s0, h0
1280 ; CHECK-SD-NEXT: fcvt s1, h1
1281 ; CHECK-SD-NEXT: bl powf
1282 ; CHECK-SD-NEXT: fcvt h0, s0
1283 ; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
1284 ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0]
1285 ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
1286 ; CHECK-SD-NEXT: mov h0, v0.h[7]
1287 ; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill
1288 ; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
1289 ; CHECK-SD-NEXT: mov h1, v1.h[7]
1290 ; CHECK-SD-NEXT: fcvt s0, h0
1291 ; CHECK-SD-NEXT: fcvt s1, h1
1292 ; CHECK-SD-NEXT: bl powf
1293 ; CHECK-SD-NEXT: fcvt h0, s0
1294 ; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
1295 ; CHECK-SD-NEXT: mov v1.h[7], v0.h[0]
1296 ; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill
1297 ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
1298 ; CHECK-SD-NEXT: mov h0, v0.h[1]
1299 ; CHECK-SD-NEXT: mov h1, v1.h[1]
1300 ; CHECK-SD-NEXT: fcvt s0, h0
1301 ; CHECK-SD-NEXT: fcvt s1, h1
1302 ; CHECK-SD-NEXT: bl powf
1303 ; CHECK-SD-NEXT: fcvt h0, s0
1304 ; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
1305 ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
1306 ; CHECK-SD-NEXT: fcvt s0, h0
1307 ; CHECK-SD-NEXT: fcvt s1, h1
1308 ; CHECK-SD-NEXT: bl powf
1309 ; CHECK-SD-NEXT: fcvt h0, s0
1310 ; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
1311 ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0]
1312 ; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
1313 ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
1314 ; CHECK-SD-NEXT: mov h0, v0.h[2]
1315 ; CHECK-SD-NEXT: mov h1, v1.h[2]
1316 ; CHECK-SD-NEXT: fcvt s0, h0
1317 ; CHECK-SD-NEXT: fcvt s1, h1
1318 ; CHECK-SD-NEXT: bl powf
1319 ; CHECK-SD-NEXT: fcvt h0, s0
1320 ; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
1321 ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0]
1322 ; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Folded Spill
1323 ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
1324 ; CHECK-SD-NEXT: mov h0, v0.h[3]
1325 ; CHECK-SD-NEXT: mov h1, v1.h[3]
1326 ; CHECK-SD-NEXT: fcvt s0, h0
1327 ; CHECK-SD-NEXT: fcvt s1, h1
1328 ; CHECK-SD-NEXT: bl powf
1329 ; CHECK-SD-NEXT: fcvt h0, s0
1330 ; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
1331 ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0]
1332 ; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Folded Spill
1333 ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
1334 ; CHECK-SD-NEXT: mov h0, v0.h[4]
1335 ; CHECK-SD-NEXT: mov h1, v1.h[4]
1336 ; CHECK-SD-NEXT: fcvt s0, h0
1337 ; CHECK-SD-NEXT: fcvt s1, h1
1338 ; CHECK-SD-NEXT: bl powf
1339 ; CHECK-SD-NEXT: fcvt h0, s0
1340 ; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
1341 ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0]
1342 ; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Folded Spill
1343 ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
1344 ; CHECK-SD-NEXT: mov h0, v0.h[5]
1345 ; CHECK-SD-NEXT: mov h1, v1.h[5]
1346 ; CHECK-SD-NEXT: fcvt s0, h0
1347 ; CHECK-SD-NEXT: fcvt s1, h1
1348 ; CHECK-SD-NEXT: bl powf
1349 ; CHECK-SD-NEXT: fcvt h0, s0
1350 ; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
1351 ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0]
1352 ; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Folded Spill
1353 ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
1354 ; CHECK-SD-NEXT: mov h0, v0.h[6]
1355 ; CHECK-SD-NEXT: mov h1, v1.h[6]
1356 ; CHECK-SD-NEXT: fcvt s0, h0
1357 ; CHECK-SD-NEXT: fcvt s1, h1
1358 ; CHECK-SD-NEXT: bl powf
1359 ; CHECK-SD-NEXT: fcvt h0, s0
1360 ; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
1361 ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0]
1362 ; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Folded Spill
1363 ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
1364 ; CHECK-SD-NEXT: mov h0, v0.h[7]
1365 ; CHECK-SD-NEXT: mov h1, v1.h[7]
1366 ; CHECK-SD-NEXT: fcvt s0, h0
1367 ; CHECK-SD-NEXT: fcvt s1, h1
1368 ; CHECK-SD-NEXT: bl powf
1369 ; CHECK-SD-NEXT: fmov s1, s0
1370 ; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
1371 ; CHECK-SD-NEXT: fcvt h2, s1
1372 ; CHECK-SD-NEXT: ldp q1, q0, [sp, #48] // 32-byte Folded Reload
1373 ; CHECK-SD-NEXT: mov v1.h[7], v2.h[0]
1374 ; CHECK-SD-NEXT: add sp, sp, #96
1375 ; CHECK-SD-NEXT: ret
1377 ; CHECK-GI-LABEL: pow_v16f16:
1378 ; CHECK-GI: // %bb.0: // %entry
1379 ; CHECK-GI-NEXT: sub sp, sp, #448
1380 ; CHECK-GI-NEXT: stp d15, d14, [sp, #368] // 16-byte Folded Spill
1381 ; CHECK-GI-NEXT: stp d13, d12, [sp, #384] // 16-byte Folded Spill
1382 ; CHECK-GI-NEXT: stp d11, d10, [sp, #400] // 16-byte Folded Spill
1383 ; CHECK-GI-NEXT: stp d9, d8, [sp, #416] // 16-byte Folded Spill
1384 ; CHECK-GI-NEXT: stp x29, x30, [sp, #432] // 16-byte Folded Spill
1385 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 448
1386 ; CHECK-GI-NEXT: .cfi_offset w30, -8
1387 ; CHECK-GI-NEXT: .cfi_offset w29, -16
1388 ; CHECK-GI-NEXT: .cfi_offset b8, -24
1389 ; CHECK-GI-NEXT: .cfi_offset b9, -32
1390 ; CHECK-GI-NEXT: .cfi_offset b10, -40
1391 ; CHECK-GI-NEXT: .cfi_offset b11, -48
1392 ; CHECK-GI-NEXT: .cfi_offset b12, -56
1393 ; CHECK-GI-NEXT: .cfi_offset b13, -64
1394 ; CHECK-GI-NEXT: .cfi_offset b14, -72
1395 ; CHECK-GI-NEXT: .cfi_offset b15, -80
1396 ; CHECK-GI-NEXT: mov v4.16b, v1.16b
1397 ; CHECK-GI-NEXT: str q1, [sp, #112] // 16-byte Folded Spill
1398 ; CHECK-GI-NEXT: mov h1, v0.h[4]
1399 ; CHECK-GI-NEXT: mov h12, v0.h[1]
1400 ; CHECK-GI-NEXT: mov h13, v0.h[2]
1401 ; CHECK-GI-NEXT: str q3, [sp, #16] // 16-byte Folded Spill
1402 ; CHECK-GI-NEXT: mov h14, v0.h[3]
1403 ; CHECK-GI-NEXT: mov h15, v2.h[1]
1404 ; CHECK-GI-NEXT: mov h8, v2.h[2]
1405 ; CHECK-GI-NEXT: mov h9, v2.h[3]
1406 ; CHECK-GI-NEXT: mov h10, v2.h[4]
1407 ; CHECK-GI-NEXT: mov h11, v2.h[5]
1408 ; CHECK-GI-NEXT: str h1, [sp, #272] // 2-byte Folded Spill
1409 ; CHECK-GI-NEXT: mov h1, v0.h[5]
1410 ; CHECK-GI-NEXT: str h1, [sp, #240] // 2-byte Folded Spill
1411 ; CHECK-GI-NEXT: mov h1, v0.h[6]
1412 ; CHECK-GI-NEXT: str h1, [sp, #176] // 2-byte Folded Spill
1413 ; CHECK-GI-NEXT: mov h1, v0.h[7]
1414 ; CHECK-GI-NEXT: fcvt s0, h0
1415 ; CHECK-GI-NEXT: str h1, [sp, #144] // 2-byte Folded Spill
1416 ; CHECK-GI-NEXT: mov h1, v4.h[1]
1417 ; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Folded Spill
1418 ; CHECK-GI-NEXT: mov h1, v4.h[2]
1419 ; CHECK-GI-NEXT: str h1, [sp, #80] // 2-byte Folded Spill
1420 ; CHECK-GI-NEXT: mov h1, v4.h[3]
1421 ; CHECK-GI-NEXT: str h1, [sp, #128] // 2-byte Folded Spill
1422 ; CHECK-GI-NEXT: mov h1, v4.h[4]
1423 ; CHECK-GI-NEXT: str h1, [sp, #192] // 2-byte Folded Spill
1424 ; CHECK-GI-NEXT: mov h1, v4.h[5]
1425 ; CHECK-GI-NEXT: str h1, [sp, #256] // 2-byte Folded Spill
1426 ; CHECK-GI-NEXT: mov h1, v4.h[6]
1427 ; CHECK-GI-NEXT: str h1, [sp, #336] // 2-byte Folded Spill
1428 ; CHECK-GI-NEXT: mov h1, v4.h[7]
1429 ; CHECK-GI-NEXT: str h1, [sp, #352] // 2-byte Folded Spill
1430 ; CHECK-GI-NEXT: mov h1, v2.h[6]
1431 ; CHECK-GI-NEXT: str h1, [sp, #12] // 2-byte Folded Spill
1432 ; CHECK-GI-NEXT: mov h1, v2.h[7]
1433 ; CHECK-GI-NEXT: str h1, [sp, #14] // 2-byte Folded Spill
1434 ; CHECK-GI-NEXT: mov h1, v3.h[1]
1435 ; CHECK-GI-NEXT: str h1, [sp, #44] // 2-byte Folded Spill
1436 ; CHECK-GI-NEXT: mov h1, v3.h[2]
1437 ; CHECK-GI-NEXT: str h1, [sp, #46] // 2-byte Folded Spill
1438 ; CHECK-GI-NEXT: mov h1, v3.h[3]
1439 ; CHECK-GI-NEXT: str h1, [sp, #78] // 2-byte Folded Spill
1440 ; CHECK-GI-NEXT: mov h1, v3.h[4]
1441 ; CHECK-GI-NEXT: str h1, [sp, #110] // 2-byte Folded Spill
1442 ; CHECK-GI-NEXT: mov h1, v3.h[5]
1443 ; CHECK-GI-NEXT: str h1, [sp, #174] // 2-byte Folded Spill
1444 ; CHECK-GI-NEXT: mov h1, v3.h[6]
1445 ; CHECK-GI-NEXT: str h1, [sp, #238] // 2-byte Folded Spill
1446 ; CHECK-GI-NEXT: mov h1, v3.h[7]
1447 ; CHECK-GI-NEXT: str h1, [sp, #302] // 2-byte Folded Spill
1448 ; CHECK-GI-NEXT: fcvt s1, h2
1449 ; CHECK-GI-NEXT: bl powf
1450 ; CHECK-GI-NEXT: fcvt s2, h12
1451 ; CHECK-GI-NEXT: fcvt h0, s0
1452 ; CHECK-GI-NEXT: fcvt s1, h15
1453 ; CHECK-GI-NEXT: str q0, [sp, #304] // 16-byte Folded Spill
1454 ; CHECK-GI-NEXT: fmov s0, s2
1455 ; CHECK-GI-NEXT: bl powf
1456 ; CHECK-GI-NEXT: fcvt s2, h13
1457 ; CHECK-GI-NEXT: fcvt h0, s0
1458 ; CHECK-GI-NEXT: fcvt s1, h8
1459 ; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Folded Spill
1460 ; CHECK-GI-NEXT: fmov s0, s2
1461 ; CHECK-GI-NEXT: bl powf
1462 ; CHECK-GI-NEXT: fcvt s2, h14
1463 ; CHECK-GI-NEXT: fcvt h0, s0
1464 ; CHECK-GI-NEXT: fcvt s1, h9
1465 ; CHECK-GI-NEXT: str q0, [sp, #320] // 16-byte Folded Spill
1466 ; CHECK-GI-NEXT: fmov s0, s2
1467 ; CHECK-GI-NEXT: bl powf
1468 ; CHECK-GI-NEXT: ldr h1, [sp, #272] // 2-byte Folded Reload
1469 ; CHECK-GI-NEXT: fcvt h0, s0
1470 ; CHECK-GI-NEXT: fcvt s2, h1
1471 ; CHECK-GI-NEXT: fcvt s1, h10
1472 ; CHECK-GI-NEXT: str q0, [sp, #272] // 16-byte Folded Spill
1473 ; CHECK-GI-NEXT: fmov s0, s2
1474 ; CHECK-GI-NEXT: bl powf
1475 ; CHECK-GI-NEXT: ldr h1, [sp, #240] // 2-byte Folded Reload
1476 ; CHECK-GI-NEXT: fcvt h0, s0
1477 ; CHECK-GI-NEXT: fcvt s2, h1
1478 ; CHECK-GI-NEXT: fcvt s1, h11
1479 ; CHECK-GI-NEXT: str q0, [sp, #240] // 16-byte Folded Spill
1480 ; CHECK-GI-NEXT: fmov s0, s2
1481 ; CHECK-GI-NEXT: bl powf
1482 ; CHECK-GI-NEXT: ldr h1, [sp, #176] // 2-byte Folded Reload
1483 ; CHECK-GI-NEXT: fcvt h0, s0
1484 ; CHECK-GI-NEXT: fcvt s2, h1
1485 ; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Folded Spill
1486 ; CHECK-GI-NEXT: ldr h0, [sp, #12] // 2-byte Folded Reload
1487 ; CHECK-GI-NEXT: fcvt s1, h0
1488 ; CHECK-GI-NEXT: fmov s0, s2
1489 ; CHECK-GI-NEXT: bl powf
1490 ; CHECK-GI-NEXT: ldr h1, [sp, #144] // 2-byte Folded Reload
1491 ; CHECK-GI-NEXT: fcvt h0, s0
1492 ; CHECK-GI-NEXT: fcvt s2, h1
1493 ; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Folded Spill
1494 ; CHECK-GI-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload
1495 ; CHECK-GI-NEXT: fcvt s1, h0
1496 ; CHECK-GI-NEXT: fmov s0, s2
1497 ; CHECK-GI-NEXT: bl powf
1498 ; CHECK-GI-NEXT: ldr q1, [sp, #112] // 16-byte Folded Reload
1499 ; CHECK-GI-NEXT: fcvt h0, s0
1500 ; CHECK-GI-NEXT: fcvt s2, h1
1501 ; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Folded Spill
1502 ; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
1503 ; CHECK-GI-NEXT: fcvt s1, h0
1504 ; CHECK-GI-NEXT: fmov s0, s2
1505 ; CHECK-GI-NEXT: bl powf
1506 ; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Folded Reload
1507 ; CHECK-GI-NEXT: fcvt h0, s0
1508 ; CHECK-GI-NEXT: fcvt s2, h1
1509 ; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
1510 ; CHECK-GI-NEXT: ldr h0, [sp, #44] // 2-byte Folded Reload
1511 ; CHECK-GI-NEXT: fcvt s1, h0
1512 ; CHECK-GI-NEXT: fmov s0, s2
1513 ; CHECK-GI-NEXT: bl powf
1514 ; CHECK-GI-NEXT: ldr h1, [sp, #80] // 2-byte Folded Reload
1515 ; CHECK-GI-NEXT: fcvt h0, s0
1516 ; CHECK-GI-NEXT: fcvt s2, h1
1517 ; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
1518 ; CHECK-GI-NEXT: ldr h0, [sp, #46] // 2-byte Folded Reload
1519 ; CHECK-GI-NEXT: fcvt s1, h0
1520 ; CHECK-GI-NEXT: fmov s0, s2
1521 ; CHECK-GI-NEXT: bl powf
1522 ; CHECK-GI-NEXT: ldr h1, [sp, #128] // 2-byte Folded Reload
1523 ; CHECK-GI-NEXT: fcvt h0, s0
1524 ; CHECK-GI-NEXT: fcvt s2, h1
1525 ; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Folded Spill
1526 ; CHECK-GI-NEXT: ldr h0, [sp, #78] // 2-byte Folded Reload
1527 ; CHECK-GI-NEXT: fcvt s1, h0
1528 ; CHECK-GI-NEXT: fmov s0, s2
1529 ; CHECK-GI-NEXT: bl powf
1530 ; CHECK-GI-NEXT: ldr h1, [sp, #192] // 2-byte Folded Reload
1531 ; CHECK-GI-NEXT: fcvt h0, s0
1532 ; CHECK-GI-NEXT: fcvt s2, h1
1533 ; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Folded Spill
1534 ; CHECK-GI-NEXT: ldr h0, [sp, #110] // 2-byte Folded Reload
1535 ; CHECK-GI-NEXT: fcvt s1, h0
1536 ; CHECK-GI-NEXT: fmov s0, s2
1537 ; CHECK-GI-NEXT: bl powf
1538 ; CHECK-GI-NEXT: ldr h1, [sp, #256] // 2-byte Folded Reload
1539 ; CHECK-GI-NEXT: fcvt h0, s0
1540 ; CHECK-GI-NEXT: fcvt s2, h1
1541 ; CHECK-GI-NEXT: str q0, [sp, #256] // 16-byte Folded Spill
1542 ; CHECK-GI-NEXT: ldr h0, [sp, #174] // 2-byte Folded Reload
1543 ; CHECK-GI-NEXT: fcvt s1, h0
1544 ; CHECK-GI-NEXT: fmov s0, s2
1545 ; CHECK-GI-NEXT: bl powf
1546 ; CHECK-GI-NEXT: ldr h1, [sp, #336] // 2-byte Folded Reload
1547 ; CHECK-GI-NEXT: fcvt h0, s0
1548 ; CHECK-GI-NEXT: fcvt s2, h1
1549 ; CHECK-GI-NEXT: str q0, [sp, #336] // 16-byte Folded Spill
1550 ; CHECK-GI-NEXT: ldr h0, [sp, #238] // 2-byte Folded Reload
1551 ; CHECK-GI-NEXT: fcvt s1, h0
1552 ; CHECK-GI-NEXT: fmov s0, s2
1553 ; CHECK-GI-NEXT: bl powf
1554 ; CHECK-GI-NEXT: ldr h1, [sp, #352] // 2-byte Folded Reload
1555 ; CHECK-GI-NEXT: fcvt h0, s0
1556 ; CHECK-GI-NEXT: fcvt s2, h1
1557 ; CHECK-GI-NEXT: str q0, [sp, #352] // 16-byte Folded Spill
1558 ; CHECK-GI-NEXT: ldr h0, [sp, #302] // 2-byte Folded Reload
1559 ; CHECK-GI-NEXT: fcvt s1, h0
1560 ; CHECK-GI-NEXT: fmov s0, s2
1561 ; CHECK-GI-NEXT: bl powf
1562 ; CHECK-GI-NEXT: ldr q3, [sp, #304] // 16-byte Folded Reload
1563 ; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Folded Reload
1564 ; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
1565 ; CHECK-GI-NEXT: ldp x29, x30, [sp, #432] // 16-byte Folded Reload
1566 ; CHECK-GI-NEXT: mov v3.h[1], v2.h[0]
1567 ; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload
1568 ; CHECK-GI-NEXT: ldp d9, d8, [sp, #416] // 16-byte Folded Reload
1569 ; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
1570 ; CHECK-GI-NEXT: ldr q2, [sp, #320] // 16-byte Folded Reload
1571 ; CHECK-GI-NEXT: ldp d11, d10, [sp, #400] // 16-byte Folded Reload
1572 ; CHECK-GI-NEXT: mov v3.h[2], v2.h[0]
1573 ; CHECK-GI-NEXT: ldr q2, [sp, #128] // 16-byte Folded Reload
1574 ; CHECK-GI-NEXT: ldp d13, d12, [sp, #384] // 16-byte Folded Reload
1575 ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
1576 ; CHECK-GI-NEXT: ldr q2, [sp, #272] // 16-byte Folded Reload
1577 ; CHECK-GI-NEXT: ldp d15, d14, [sp, #368] // 16-byte Folded Reload
1578 ; CHECK-GI-NEXT: mov v3.h[3], v2.h[0]
1579 ; CHECK-GI-NEXT: ldr q2, [sp, #192] // 16-byte Folded Reload
1580 ; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
1581 ; CHECK-GI-NEXT: ldr q2, [sp, #240] // 16-byte Folded Reload
1582 ; CHECK-GI-NEXT: mov v3.h[4], v2.h[0]
1583 ; CHECK-GI-NEXT: ldr q2, [sp, #256] // 16-byte Folded Reload
1584 ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
1585 ; CHECK-GI-NEXT: ldr q2, [sp, #176] // 16-byte Folded Reload
1586 ; CHECK-GI-NEXT: mov v3.h[5], v2.h[0]
1587 ; CHECK-GI-NEXT: ldr q2, [sp, #336] // 16-byte Folded Reload
1588 ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
1589 ; CHECK-GI-NEXT: fcvt h2, s0
1590 ; CHECK-GI-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload
1591 ; CHECK-GI-NEXT: mov v3.h[6], v0.h[0]
1592 ; CHECK-GI-NEXT: ldr q0, [sp, #352] // 16-byte Folded Reload
1593 ; CHECK-GI-NEXT: mov v1.h[6], v0.h[0]
1594 ; CHECK-GI-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
1595 ; CHECK-GI-NEXT: mov v3.h[7], v0.h[0]
1596 ; CHECK-GI-NEXT: mov v1.h[7], v2.h[0]
1597 ; CHECK-GI-NEXT: mov v0.16b, v3.16b
1598 ; CHECK-GI-NEXT: add sp, sp, #448
1599 ; CHECK-GI-NEXT: ret
1601 %c = call <16 x half> @llvm.pow.v16f16(<16 x half> %a, <16 x half> %b)
1605 declare <16 x half> @llvm.pow.v16f16(<16 x half>, <16 x half>)
1606 declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>)
1607 declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>)
1608 declare <3 x double> @llvm.pow.v3f64(<3 x double>, <3 x double>)
1609 declare <3 x float> @llvm.pow.v3f32(<3 x float>, <3 x float>)
1610 declare <4 x double> @llvm.pow.v4f64(<4 x double>, <4 x double>)
1611 declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)
1612 declare <4 x half> @llvm.pow.v4f16(<4 x half>, <4 x half>)
1613 declare <7 x half> @llvm.pow.v7f16(<7 x half>, <7 x half>)
1614 declare <8 x float> @llvm.pow.v8f32(<8 x float>, <8 x float>)
1615 declare <8 x half> @llvm.pow.v8f16(<8 x half>, <8 x half>)
1616 declare double @llvm.pow.f64(double, double)
1617 declare float @llvm.pow.f32(float, float)
1618 declare half @llvm.pow.f16(half, half)