1 ; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cyclone -aarch64-neon-syntax=apple | FileCheck -enable-var-scope %s
2 ; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cortex-a57 | FileCheck -enable-var-scope --check-prefix=CHECK-A57 %s
5 define float @t1(i32* nocapture %src) nounwind ssp {
10 %tmp1 = load i32, i32* %src, align 4
11 %tmp2 = sitofp i32 %tmp1 to float
15 define float @t2(i32* nocapture %src) nounwind ssp {
20 %tmp1 = load i32, i32* %src, align 4
21 %tmp2 = uitofp i32 %tmp1 to float
25 define double @t3(i64* nocapture %src) nounwind ssp {
30 %tmp1 = load i64, i64* %src, align 4
31 %tmp2 = sitofp i64 %tmp1 to double
35 define double @t4(i64* nocapture %src) nounwind ssp {
40 %tmp1 = load i64, i64* %src, align 4
41 %tmp2 = uitofp i64 %tmp1 to double
46 define double @t5(i32* nocapture %src) nounwind ssp optsize {
49 ; CHECK: ldr [[REG:w[0-9]+]], [x0]
50 ; CHECK: scvtf d0, [[REG]]
51 %tmp1 = load i32, i32* %src, align 4
52 %tmp2 = sitofp i32 %tmp1 to double
56 ; Check that we load in FP register when we want to convert into
57 ; floating point value.
58 ; This is much faster than loading on GPR and making the conversion
60 ; <rdar://problem/14599607>
62 ; Check the flollowing patterns for signed/unsigned:
63 ; 1. load with scaled imm to float.
64 ; 2. load with scaled register to float.
65 ; 3. load with scaled imm to double.
66 ; 4. load with scaled register to double.
67 ; 5. load with unscaled imm to float.
68 ; 6. load with unscaled imm to double.
69 ; With loading size: 8, 16, 32, and 64-bits.
71 ; ********* 1. load with scaled imm to float. *********
72 define float @fct1(i8* nocapture %sp0) {
74 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
75 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
76 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
78 %addr = getelementptr i8, i8* %sp0, i64 1
79 %pix_sp0.0.copyload = load i8, i8* %addr, align 1
80 %val = uitofp i8 %pix_sp0.0.copyload to float
81 %vmull.i = fmul float %val, %val
85 define float @fct2(i16* nocapture %sp0) {
87 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
88 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
89 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
91 %addr = getelementptr i16, i16* %sp0, i64 1
92 %pix_sp0.0.copyload = load i16, i16* %addr, align 1
93 %val = uitofp i16 %pix_sp0.0.copyload to float
94 %vmull.i = fmul float %val, %val
98 define float @fct3(i32* nocapture %sp0) {
100 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
101 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
102 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
104 %addr = getelementptr i32, i32* %sp0, i64 1
105 %pix_sp0.0.copyload = load i32, i32* %addr, align 1
106 %val = uitofp i32 %pix_sp0.0.copyload to float
107 %vmull.i = fmul float %val, %val
111 ; i64 -> f32 is not supported on floating point unit.
112 define float @fct4(i64* nocapture %sp0) {
114 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8]
115 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
116 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
118 %addr = getelementptr i64, i64* %sp0, i64 1
119 %pix_sp0.0.copyload = load i64, i64* %addr, align 1
120 %val = uitofp i64 %pix_sp0.0.copyload to float
121 %vmull.i = fmul float %val, %val
125 ; ********* 2. load with scaled register to float. *********
126 define float @fct5(i8* nocapture %sp0, i64 %offset) {
128 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
129 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
130 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
132 %addr = getelementptr i8, i8* %sp0, i64 %offset
133 %pix_sp0.0.copyload = load i8, i8* %addr, align 1
134 %val = uitofp i8 %pix_sp0.0.copyload to float
135 %vmull.i = fmul float %val, %val
139 define float @fct6(i16* nocapture %sp0, i64 %offset) {
141 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
142 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
143 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
145 %addr = getelementptr i16, i16* %sp0, i64 %offset
146 %pix_sp0.0.copyload = load i16, i16* %addr, align 1
147 %val = uitofp i16 %pix_sp0.0.copyload to float
148 %vmull.i = fmul float %val, %val
152 define float @fct7(i32* nocapture %sp0, i64 %offset) {
154 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
155 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
156 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
158 %addr = getelementptr i32, i32* %sp0, i64 %offset
159 %pix_sp0.0.copyload = load i32, i32* %addr, align 1
160 %val = uitofp i32 %pix_sp0.0.copyload to float
161 %vmull.i = fmul float %val, %val
165 ; i64 -> f32 is not supported on floating point unit.
166 define float @fct8(i64* nocapture %sp0, i64 %offset) {
168 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
169 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
170 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
172 %addr = getelementptr i64, i64* %sp0, i64 %offset
173 %pix_sp0.0.copyload = load i64, i64* %addr, align 1
174 %val = uitofp i64 %pix_sp0.0.copyload to float
175 %vmull.i = fmul float %val, %val
180 ; ********* 3. load with scaled imm to double. *********
181 define double @fct9(i8* nocapture %sp0) {
183 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
184 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
185 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
187 %addr = getelementptr i8, i8* %sp0, i64 1
188 %pix_sp0.0.copyload = load i8, i8* %addr, align 1
189 %val = uitofp i8 %pix_sp0.0.copyload to double
190 %vmull.i = fmul double %val, %val
194 define double @fct10(i16* nocapture %sp0) {
195 ; CHECK-LABEL: fct10:
196 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
197 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
198 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
200 %addr = getelementptr i16, i16* %sp0, i64 1
201 %pix_sp0.0.copyload = load i16, i16* %addr, align 1
202 %val = uitofp i16 %pix_sp0.0.copyload to double
203 %vmull.i = fmul double %val, %val
207 define double @fct11(i32* nocapture %sp0) {
208 ; CHECK-LABEL: fct11:
209 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
210 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
211 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
213 %addr = getelementptr i32, i32* %sp0, i64 1
214 %pix_sp0.0.copyload = load i32, i32* %addr, align 1
215 %val = uitofp i32 %pix_sp0.0.copyload to double
216 %vmull.i = fmul double %val, %val
220 define double @fct12(i64* nocapture %sp0) {
221 ; CHECK-LABEL: fct12:
222 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
223 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
224 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
226 %addr = getelementptr i64, i64* %sp0, i64 1
227 %pix_sp0.0.copyload = load i64, i64* %addr, align 1
228 %val = uitofp i64 %pix_sp0.0.copyload to double
229 %vmull.i = fmul double %val, %val
233 ; ********* 4. load with scaled register to double. *********
234 define double @fct13(i8* nocapture %sp0, i64 %offset) {
235 ; CHECK-LABEL: fct13:
236 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
237 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
238 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
240 %addr = getelementptr i8, i8* %sp0, i64 %offset
241 %pix_sp0.0.copyload = load i8, i8* %addr, align 1
242 %val = uitofp i8 %pix_sp0.0.copyload to double
243 %vmull.i = fmul double %val, %val
247 define double @fct14(i16* nocapture %sp0, i64 %offset) {
248 ; CHECK-LABEL: fct14:
249 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
250 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
251 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
253 %addr = getelementptr i16, i16* %sp0, i64 %offset
254 %pix_sp0.0.copyload = load i16, i16* %addr, align 1
255 %val = uitofp i16 %pix_sp0.0.copyload to double
256 %vmull.i = fmul double %val, %val
260 define double @fct15(i32* nocapture %sp0, i64 %offset) {
261 ; CHECK-LABEL: fct15:
262 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
263 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
264 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
266 %addr = getelementptr i32, i32* %sp0, i64 %offset
267 %pix_sp0.0.copyload = load i32, i32* %addr, align 1
268 %val = uitofp i32 %pix_sp0.0.copyload to double
269 %vmull.i = fmul double %val, %val
273 define double @fct16(i64* nocapture %sp0, i64 %offset) {
274 ; CHECK-LABEL: fct16:
275 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
276 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
277 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
279 %addr = getelementptr i64, i64* %sp0, i64 %offset
280 %pix_sp0.0.copyload = load i64, i64* %addr, align 1
281 %val = uitofp i64 %pix_sp0.0.copyload to double
282 %vmull.i = fmul double %val, %val
286 ; ********* 5. load with unscaled imm to float. *********
287 define float @fct17(i8* nocapture %sp0) {
289 ; CHECK-LABEL: fct17:
290 ; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
291 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
292 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
293 %bitcast = ptrtoint i8* %sp0 to i64
294 %add = add i64 %bitcast, -1
295 %addr = inttoptr i64 %add to i8*
296 %pix_sp0.0.copyload = load i8, i8* %addr, align 1
297 %val = uitofp i8 %pix_sp0.0.copyload to float
298 %vmull.i = fmul float %val, %val
302 define float @fct18(i16* nocapture %sp0) {
303 ; CHECK-LABEL: fct18:
304 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
305 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
306 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
307 %bitcast = ptrtoint i16* %sp0 to i64
308 %add = add i64 %bitcast, 1
309 %addr = inttoptr i64 %add to i16*
310 %pix_sp0.0.copyload = load i16, i16* %addr, align 1
311 %val = uitofp i16 %pix_sp0.0.copyload to float
312 %vmull.i = fmul float %val, %val
316 define float @fct19(i32* nocapture %sp0) {
317 ; CHECK-LABEL: fct19:
318 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
319 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
320 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
321 %bitcast = ptrtoint i32* %sp0 to i64
322 %add = add i64 %bitcast, 1
323 %addr = inttoptr i64 %add to i32*
324 %pix_sp0.0.copyload = load i32, i32* %addr, align 1
325 %val = uitofp i32 %pix_sp0.0.copyload to float
326 %vmull.i = fmul float %val, %val
330 ; i64 -> f32 is not supported on floating point unit.
331 define float @fct20(i64* nocapture %sp0) {
332 ; CHECK-LABEL: fct20:
333 ; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1]
334 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
335 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
336 %bitcast = ptrtoint i64* %sp0 to i64
337 %add = add i64 %bitcast, 1
338 %addr = inttoptr i64 %add to i64*
339 %pix_sp0.0.copyload = load i64, i64* %addr, align 1
340 %val = uitofp i64 %pix_sp0.0.copyload to float
341 %vmull.i = fmul float %val, %val
346 ; ********* 6. load with unscaled imm to double. *********
347 define double @fct21(i8* nocapture %sp0) {
349 ; CHECK-LABEL: fct21:
350 ; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
351 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
352 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
353 %bitcast = ptrtoint i8* %sp0 to i64
354 %add = add i64 %bitcast, -1
355 %addr = inttoptr i64 %add to i8*
356 %pix_sp0.0.copyload = load i8, i8* %addr, align 1
357 %val = uitofp i8 %pix_sp0.0.copyload to double
358 %vmull.i = fmul double %val, %val
362 define double @fct22(i16* nocapture %sp0) {
363 ; CHECK-LABEL: fct22:
364 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
365 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
366 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
367 %bitcast = ptrtoint i16* %sp0 to i64
368 %add = add i64 %bitcast, 1
369 %addr = inttoptr i64 %add to i16*
370 %pix_sp0.0.copyload = load i16, i16* %addr, align 1
371 %val = uitofp i16 %pix_sp0.0.copyload to double
372 %vmull.i = fmul double %val, %val
376 define double @fct23(i32* nocapture %sp0) {
377 ; CHECK-LABEL: fct23:
378 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
379 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
380 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
381 %bitcast = ptrtoint i32* %sp0 to i64
382 %add = add i64 %bitcast, 1
383 %addr = inttoptr i64 %add to i32*
384 %pix_sp0.0.copyload = load i32, i32* %addr, align 1
385 %val = uitofp i32 %pix_sp0.0.copyload to double
386 %vmull.i = fmul double %val, %val
390 define double @fct24(i64* nocapture %sp0) {
391 ; CHECK-LABEL: fct24:
392 ; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1]
393 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
394 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
395 %bitcast = ptrtoint i64* %sp0 to i64
396 %add = add i64 %bitcast, 1
397 %addr = inttoptr i64 %add to i64*
398 %pix_sp0.0.copyload = load i64, i64* %addr, align 1
399 %val = uitofp i64 %pix_sp0.0.copyload to double
400 %vmull.i = fmul double %val, %val
405 ; ********* 1s. load with scaled imm to float. *********
406 define float @sfct1(i8* nocapture %sp0) {
407 ; CHECK-LABEL: sfct1:
408 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
409 ; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
410 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
411 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
412 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
413 ; CHECK-A57-LABEL: sfct1:
414 ; CHECK-A57: ldrsb w[[REGNUM:[0-9]+]], [x0, #1]
415 ; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
416 ; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]]
418 %addr = getelementptr i8, i8* %sp0, i64 1
419 %pix_sp0.0.copyload = load i8, i8* %addr, align 1
420 %val = sitofp i8 %pix_sp0.0.copyload to float
421 %vmull.i = fmul float %val, %val
425 define float @sfct2(i16* nocapture %sp0) {
426 ; CHECK-LABEL: sfct2:
427 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
428 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
429 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
430 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
432 %addr = getelementptr i16, i16* %sp0, i64 1
433 %pix_sp0.0.copyload = load i16, i16* %addr, align 1
434 %val = sitofp i16 %pix_sp0.0.copyload to float
435 %vmull.i = fmul float %val, %val
439 define float @sfct3(i32* nocapture %sp0) {
440 ; CHECK-LABEL: sfct3:
441 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
442 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[REGNUM]]
443 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
445 %addr = getelementptr i32, i32* %sp0, i64 1
446 %pix_sp0.0.copyload = load i32, i32* %addr, align 1
447 %val = sitofp i32 %pix_sp0.0.copyload to float
448 %vmull.i = fmul float %val, %val
452 ; i64 -> f32 is not supported on floating point unit.
453 define float @sfct4(i64* nocapture %sp0) {
454 ; CHECK-LABEL: sfct4:
455 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8]
456 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
457 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
459 %addr = getelementptr i64, i64* %sp0, i64 1
460 %pix_sp0.0.copyload = load i64, i64* %addr, align 1
461 %val = sitofp i64 %pix_sp0.0.copyload to float
462 %vmull.i = fmul float %val, %val
466 ; ********* 2s. load with scaled register to float. *********
467 define float @sfct5(i8* nocapture %sp0, i64 %offset) {
468 ; CHECK-LABEL: sfct5:
469 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
470 ; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
471 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
472 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
473 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
474 ; CHECK-A57-LABEL: sfct5:
475 ; CHECK-A57: ldrsb w[[REGNUM:[0-9]+]], [x0, x1]
476 ; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
477 ; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]]
479 %addr = getelementptr i8, i8* %sp0, i64 %offset
480 %pix_sp0.0.copyload = load i8, i8* %addr, align 1
481 %val = sitofp i8 %pix_sp0.0.copyload to float
482 %vmull.i = fmul float %val, %val
486 define float @sfct6(i16* nocapture %sp0, i64 %offset) {
487 ; CHECK-LABEL: sfct6:
488 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
489 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
490 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
491 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
493 %addr = getelementptr i16, i16* %sp0, i64 %offset
494 %pix_sp0.0.copyload = load i16, i16* %addr, align 1
495 %val = sitofp i16 %pix_sp0.0.copyload to float
496 %vmull.i = fmul float %val, %val
500 define float @sfct7(i32* nocapture %sp0, i64 %offset) {
501 ; CHECK-LABEL: sfct7:
502 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
503 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[REGNUM]]
504 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
506 %addr = getelementptr i32, i32* %sp0, i64 %offset
507 %pix_sp0.0.copyload = load i32, i32* %addr, align 1
508 %val = sitofp i32 %pix_sp0.0.copyload to float
509 %vmull.i = fmul float %val, %val
513 ; i64 -> f32 is not supported on floating point unit.
514 define float @sfct8(i64* nocapture %sp0, i64 %offset) {
515 ; CHECK-LABEL: sfct8:
516 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
517 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
518 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
520 %addr = getelementptr i64, i64* %sp0, i64 %offset
521 %pix_sp0.0.copyload = load i64, i64* %addr, align 1
522 %val = sitofp i64 %pix_sp0.0.copyload to float
523 %vmull.i = fmul float %val, %val
527 ; ********* 3s. load with scaled imm to double. *********
528 define double @sfct9(i8* nocapture %sp0) {
529 ; CHECK-LABEL: sfct9:
530 ; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, #1]
531 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
532 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
534 %addr = getelementptr i8, i8* %sp0, i64 1
535 %pix_sp0.0.copyload = load i8, i8* %addr, align 1
536 %val = sitofp i8 %pix_sp0.0.copyload to double
537 %vmull.i = fmul double %val, %val
541 define double @sfct10(i16* nocapture %sp0) {
542 ; CHECK-LABEL: sfct10:
543 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
544 ; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
545 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
546 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
547 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
548 ; CHECK-A57-LABEL: sfct10:
549 ; CHECK-A57: ldrsh w[[REGNUM:[0-9]+]], [x0, #2]
550 ; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
551 ; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]]
553 %addr = getelementptr i16, i16* %sp0, i64 1
554 %pix_sp0.0.copyload = load i16, i16* %addr, align 1
555 %val = sitofp i16 %pix_sp0.0.copyload to double
556 %vmull.i = fmul double %val, %val
560 define double @sfct11(i32* nocapture %sp0) {
561 ; CHECK-LABEL: sfct11:
562 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
563 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
564 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
565 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
567 %addr = getelementptr i32, i32* %sp0, i64 1
568 %pix_sp0.0.copyload = load i32, i32* %addr, align 1
569 %val = sitofp i32 %pix_sp0.0.copyload to double
570 %vmull.i = fmul double %val, %val
574 define double @sfct12(i64* nocapture %sp0) {
575 ; CHECK-LABEL: sfct12:
576 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
577 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[REGNUM]]
578 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
580 %addr = getelementptr i64, i64* %sp0, i64 1
581 %pix_sp0.0.copyload = load i64, i64* %addr, align 1
582 %val = sitofp i64 %pix_sp0.0.copyload to double
583 %vmull.i = fmul double %val, %val
587 ; ********* 4s. load with scaled register to double. *********
588 define double @sfct13(i8* nocapture %sp0, i64 %offset) {
589 ; CHECK-LABEL: sfct13:
590 ; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, x1]
591 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
592 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
594 %addr = getelementptr i8, i8* %sp0, i64 %offset
595 %pix_sp0.0.copyload = load i8, i8* %addr, align 1
596 %val = sitofp i8 %pix_sp0.0.copyload to double
597 %vmull.i = fmul double %val, %val
601 define double @sfct14(i16* nocapture %sp0, i64 %offset) {
602 ; CHECK-LABEL: sfct14:
603 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
604 ; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
605 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
606 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
607 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
608 ; CHECK-A57-LABEL: sfct14:
609 ; CHECK-A57: ldrsh w[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
610 ; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
611 ; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]]
613 %addr = getelementptr i16, i16* %sp0, i64 %offset
614 %pix_sp0.0.copyload = load i16, i16* %addr, align 1
615 %val = sitofp i16 %pix_sp0.0.copyload to double
616 %vmull.i = fmul double %val, %val
620 define double @sfct15(i32* nocapture %sp0, i64 %offset) {
621 ; CHECK-LABEL: sfct15:
622 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
623 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
624 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
625 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
627 %addr = getelementptr i32, i32* %sp0, i64 %offset
628 %pix_sp0.0.copyload = load i32, i32* %addr, align 1
629 %val = sitofp i32 %pix_sp0.0.copyload to double
630 %vmull.i = fmul double %val, %val
634 define double @sfct16(i64* nocapture %sp0, i64 %offset) {
635 ; CHECK-LABEL: sfct16:
636 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
637 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[REGNUM]]
638 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
640 %addr = getelementptr i64, i64* %sp0, i64 %offset
641 %pix_sp0.0.copyload = load i64, i64* %addr, align 1
642 %val = sitofp i64 %pix_sp0.0.copyload to double
643 %vmull.i = fmul double %val, %val
647 ; ********* 5s. load with unscaled imm to float. *********
648 define float @sfct17(i8* nocapture %sp0) {
650 ; CHECK-LABEL: sfct17:
651 ; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
652 ; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
653 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
654 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
655 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
656 ; CHECK-A57-LABEL: sfct17:
657 ; CHECK-A57: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
658 ; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
659 ; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]]
660 %bitcast = ptrtoint i8* %sp0 to i64
661 %add = add i64 %bitcast, -1
662 %addr = inttoptr i64 %add to i8*
663 %pix_sp0.0.copyload = load i8, i8* %addr, align 1
664 %val = sitofp i8 %pix_sp0.0.copyload to float
665 %vmull.i = fmul float %val, %val
669 define float @sfct18(i16* nocapture %sp0) {
670 ; CHECK-LABEL: sfct18:
671 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
672 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
673 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
674 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
675 %bitcast = ptrtoint i16* %sp0 to i64
676 %add = add i64 %bitcast, 1
677 %addr = inttoptr i64 %add to i16*
678 %pix_sp0.0.copyload = load i16, i16* %addr, align 1
679 %val = sitofp i16 %pix_sp0.0.copyload to float
680 %vmull.i = fmul float %val, %val
684 define float @sfct19(i32* nocapture %sp0) {
685 ; CHECK-LABEL: sfct19:
686 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
687 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[REGNUM]]
688 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
689 %bitcast = ptrtoint i32* %sp0 to i64
690 %add = add i64 %bitcast, 1
691 %addr = inttoptr i64 %add to i32*
692 %pix_sp0.0.copyload = load i32, i32* %addr, align 1
693 %val = sitofp i32 %pix_sp0.0.copyload to float
694 %vmull.i = fmul float %val, %val
698 ; i64 -> f32 is not supported on floating point unit.
699 define float @sfct20(i64* nocapture %sp0) {
700 ; CHECK-LABEL: sfct20:
701 ; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1]
702 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
703 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
704 %bitcast = ptrtoint i64* %sp0 to i64
705 %add = add i64 %bitcast, 1
706 %addr = inttoptr i64 %add to i64*
707 %pix_sp0.0.copyload = load i64, i64* %addr, align 1
708 %val = sitofp i64 %pix_sp0.0.copyload to float
709 %vmull.i = fmul float %val, %val
714 ; ********* 6s. load with unscaled imm to double. *********
715 define double @sfct21(i8* nocapture %sp0) {
717 ; CHECK-LABEL: sfct21:
718 ; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
719 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
720 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
721 %bitcast = ptrtoint i8* %sp0 to i64
722 %add = add i64 %bitcast, -1
723 %addr = inttoptr i64 %add to i8*
724 %pix_sp0.0.copyload = load i8, i8* %addr, align 1
725 %val = sitofp i8 %pix_sp0.0.copyload to double
726 %vmull.i = fmul double %val, %val
730 define double @sfct22(i16* nocapture %sp0) {
731 ; CHECK-LABEL: sfct22:
732 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
733 ; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
734 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
735 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
736 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
737 ; CHECK-A57-LABEL: sfct22:
738 ; CHECK-A57: ldursh w[[REGNUM:[0-9]+]], [x0, #1]
739 ; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
740 ; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]]
741 %bitcast = ptrtoint i16* %sp0 to i64
742 %add = add i64 %bitcast, 1
743 %addr = inttoptr i64 %add to i16*
744 %pix_sp0.0.copyload = load i16, i16* %addr, align 1
745 %val = sitofp i16 %pix_sp0.0.copyload to double
746 %vmull.i = fmul double %val, %val
750 define double @sfct23(i32* nocapture %sp0) {
751 ; CHECK-LABEL: sfct23:
752 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
753 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
754 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
755 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
756 %bitcast = ptrtoint i32* %sp0 to i64
757 %add = add i64 %bitcast, 1
758 %addr = inttoptr i64 %add to i32*
759 %pix_sp0.0.copyload = load i32, i32* %addr, align 1
760 %val = sitofp i32 %pix_sp0.0.copyload to double
761 %vmull.i = fmul double %val, %val
765 define double @sfct24(i64* nocapture %sp0) {
766 ; CHECK-LABEL: sfct24:
767 ; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1]
768 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[REGNUM]]
769 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
770 %bitcast = ptrtoint i64* %sp0 to i64
771 %add = add i64 %bitcast, 1
772 %addr = inttoptr i64 %add to i64*
773 %pix_sp0.0.copyload = load i64, i64* %addr, align 1
774 %val = sitofp i64 %pix_sp0.0.copyload to double
775 %vmull.i = fmul double %val, %val
780 ; Check that we do not use SSHLL code sequence when code size is a concern.
781 define float @codesize_sfct17(i8* nocapture %sp0) optsize {
783 ; CHECK-LABEL: codesize_sfct17:
784 ; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
785 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
786 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
787 %bitcast = ptrtoint i8* %sp0 to i64
788 %add = add i64 %bitcast, -1
789 %addr = inttoptr i64 %add to i8*
790 %pix_sp0.0.copyload = load i8, i8* %addr, align 1
791 %val = sitofp i8 %pix_sp0.0.copyload to float
792 %vmull.i = fmul float %val, %val
796 define double @codesize_sfct11(i32* nocapture %sp0) minsize {
797 ; CHECK-LABEL: sfct11:
798 ; CHECK: ldr w[[REGNUM:[0-9]+]], [x0, #4]
799 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
800 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
802 %addr = getelementptr i32, i32* %sp0, i64 1
803 %pix_sp0.0.copyload = load i32, i32* %addr, align 1
804 %val = sitofp i32 %pix_sp0.0.copyload to double
805 %vmull.i = fmul double %val, %val
809 ; Adding fp128 custom lowering makes these a little fragile since we have to
810 ; return the correct mix of Legal/Expand from the custom method.
812 ; rdar://problem/14991489
814 define float @float_from_i128(i128 %in) {
815 ; CHECK-LABEL: float_from_i128:
816 ; CHECK: bl {{_?__floatuntisf}}
817 %conv = uitofp i128 %in to float
821 define double @double_from_i128(i128 %in) {
822 ; CHECK-LABEL: double_from_i128:
823 ; CHECK: bl {{_?__floattidf}}
824 %conv = sitofp i128 %in to double
828 define fp128 @fp128_from_i128(i128 %in) {
829 ; CHECK-LABEL: fp128_from_i128:
830 ; CHECK: bl {{_?__floatuntitf}}
831 %conv = uitofp i128 %in to fp128
835 define i128 @i128_from_float(float %in) {
836 ; CHECK-LABEL: i128_from_float
837 ; CHECK: bl {{_?__fixsfti}}
838 %conv = fptosi float %in to i128
842 define i128 @i128_from_double(double %in) {
843 ; CHECK-LABEL: i128_from_double
844 ; CHECK: bl {{_?__fixunsdfti}}
845 %conv = fptoui double %in to i128
849 define i128 @i128_from_fp128(fp128 %in) {
850 ; CHECK-LABEL: i128_from_fp128
851 ; CHECK: bl {{_?__fixtfti}}
852 %conv = fptosi fp128 %in to i128