1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-NEON
3 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=-neon | FileCheck %s --check-prefixes=CHECK,CHECK-NO-NEON
5 declare i16 @llvm.umax.i16(i16, i16)
6 declare i64 @llvm.umin.i64(i64, i64)
8 declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>)
10 define <4 x float> @fmul_pow2_4xfloat(<4 x i32> %i) {
11 ; CHECK-NEON-LABEL: fmul_pow2_4xfloat:
12 ; CHECK-NEON: // %bb.0:
13 ; CHECK-NEON-NEXT: movi v1.4s, #1
14 ; CHECK-NEON-NEXT: ushl v0.4s, v1.4s, v0.4s
15 ; CHECK-NEON-NEXT: fmov v1.4s, #9.00000000
16 ; CHECK-NEON-NEXT: ucvtf v0.4s, v0.4s
17 ; CHECK-NEON-NEXT: fmul v0.4s, v0.4s, v1.4s
18 ; CHECK-NEON-NEXT: ret
20 ; CHECK-NO-NEON-LABEL: fmul_pow2_4xfloat:
21 ; CHECK-NO-NEON: // %bb.0:
22 ; CHECK-NO-NEON-NEXT: mov w8, #1 // =0x1
23 ; CHECK-NO-NEON-NEXT: fmov s3, #9.00000000
24 ; CHECK-NO-NEON-NEXT: lsl w9, w8, w0
25 ; CHECK-NO-NEON-NEXT: lsl w10, w8, w1
26 ; CHECK-NO-NEON-NEXT: lsl w11, w8, w2
27 ; CHECK-NO-NEON-NEXT: lsl w8, w8, w3
28 ; CHECK-NO-NEON-NEXT: ucvtf s1, w10
29 ; CHECK-NO-NEON-NEXT: ucvtf s0, w9
30 ; CHECK-NO-NEON-NEXT: ucvtf s2, w11
31 ; CHECK-NO-NEON-NEXT: ucvtf s4, w8
32 ; CHECK-NO-NEON-NEXT: fmul s0, s0, s3
33 ; CHECK-NO-NEON-NEXT: fmul s1, s1, s3
34 ; CHECK-NO-NEON-NEXT: fmul s2, s2, s3
35 ; CHECK-NO-NEON-NEXT: fmul s3, s4, s3
36 ; CHECK-NO-NEON-NEXT: ret
37 %p2 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %i
38 %p2_f = uitofp <4 x i32> %p2 to <4 x float>
39 %r = fmul <4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, %p2_f
43 define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) {
44 ; CHECK-NEON-LABEL: fmul_pow2_ldexp_4xfloat:
45 ; CHECK-NEON: // %bb.0:
46 ; CHECK-NEON-NEXT: sub sp, sp, #48
47 ; CHECK-NEON-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
48 ; CHECK-NEON-NEXT: .cfi_def_cfa_offset 48
49 ; CHECK-NEON-NEXT: .cfi_offset w30, -16
50 ; CHECK-NEON-NEXT: mov w0, v0.s[1]
51 ; CHECK-NEON-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
52 ; CHECK-NEON-NEXT: fmov s0, #9.00000000
53 ; CHECK-NEON-NEXT: bl ldexpf
54 ; CHECK-NEON-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
55 ; CHECK-NEON-NEXT: // kill: def $s0 killed $s0 def $q0
56 ; CHECK-NEON-NEXT: str q0, [sp] // 16-byte Folded Spill
57 ; CHECK-NEON-NEXT: fmov s0, #9.00000000
58 ; CHECK-NEON-NEXT: fmov w0, s1
59 ; CHECK-NEON-NEXT: bl ldexpf
60 ; CHECK-NEON-NEXT: ldr q1, [sp] // 16-byte Folded Reload
61 ; CHECK-NEON-NEXT: // kill: def $s0 killed $s0 def $q0
62 ; CHECK-NEON-NEXT: mov v0.s[1], v1.s[0]
63 ; CHECK-NEON-NEXT: str q0, [sp] // 16-byte Folded Spill
64 ; CHECK-NEON-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
65 ; CHECK-NEON-NEXT: mov w0, v0.s[2]
66 ; CHECK-NEON-NEXT: fmov s0, #9.00000000
67 ; CHECK-NEON-NEXT: bl ldexpf
68 ; CHECK-NEON-NEXT: ldr q1, [sp] // 16-byte Folded Reload
69 ; CHECK-NEON-NEXT: // kill: def $s0 killed $s0 def $q0
70 ; CHECK-NEON-NEXT: mov v1.s[2], v0.s[0]
71 ; CHECK-NEON-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
72 ; CHECK-NEON-NEXT: mov w0, v0.s[3]
73 ; CHECK-NEON-NEXT: fmov s0, #9.00000000
74 ; CHECK-NEON-NEXT: str q1, [sp] // 16-byte Folded Spill
75 ; CHECK-NEON-NEXT: bl ldexpf
76 ; CHECK-NEON-NEXT: ldr q1, [sp] // 16-byte Folded Reload
77 ; CHECK-NEON-NEXT: // kill: def $s0 killed $s0 def $q0
78 ; CHECK-NEON-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
79 ; CHECK-NEON-NEXT: mov v1.s[3], v0.s[0]
80 ; CHECK-NEON-NEXT: mov v0.16b, v1.16b
81 ; CHECK-NEON-NEXT: add sp, sp, #48
82 ; CHECK-NEON-NEXT: ret
84 ; CHECK-NO-NEON-LABEL: fmul_pow2_ldexp_4xfloat:
85 ; CHECK-NO-NEON: // %bb.0:
86 ; CHECK-NO-NEON-NEXT: str d10, [sp, #-64]! // 8-byte Folded Spill
87 ; CHECK-NO-NEON-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill
88 ; CHECK-NO-NEON-NEXT: stp x30, x21, [sp, #32] // 16-byte Folded Spill
89 ; CHECK-NO-NEON-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
90 ; CHECK-NO-NEON-NEXT: .cfi_def_cfa_offset 64
91 ; CHECK-NO-NEON-NEXT: .cfi_offset w19, -8
92 ; CHECK-NO-NEON-NEXT: .cfi_offset w20, -16
93 ; CHECK-NO-NEON-NEXT: .cfi_offset w21, -24
94 ; CHECK-NO-NEON-NEXT: .cfi_offset w30, -32
95 ; CHECK-NO-NEON-NEXT: .cfi_offset b8, -40
96 ; CHECK-NO-NEON-NEXT: .cfi_offset b9, -48
97 ; CHECK-NO-NEON-NEXT: .cfi_offset b10, -64
98 ; CHECK-NO-NEON-NEXT: fmov s0, #9.00000000
99 ; CHECK-NO-NEON-NEXT: mov w19, w3
100 ; CHECK-NO-NEON-NEXT: mov w20, w2
101 ; CHECK-NO-NEON-NEXT: mov w21, w1
102 ; CHECK-NO-NEON-NEXT: bl ldexpf
103 ; CHECK-NO-NEON-NEXT: fmov s8, s0
104 ; CHECK-NO-NEON-NEXT: fmov s0, #9.00000000
105 ; CHECK-NO-NEON-NEXT: mov w0, w21
106 ; CHECK-NO-NEON-NEXT: bl ldexpf
107 ; CHECK-NO-NEON-NEXT: fmov s9, s0
108 ; CHECK-NO-NEON-NEXT: fmov s0, #9.00000000
109 ; CHECK-NO-NEON-NEXT: mov w0, w20
110 ; CHECK-NO-NEON-NEXT: bl ldexpf
111 ; CHECK-NO-NEON-NEXT: fmov s10, s0
112 ; CHECK-NO-NEON-NEXT: fmov s0, #9.00000000
113 ; CHECK-NO-NEON-NEXT: mov w0, w19
114 ; CHECK-NO-NEON-NEXT: bl ldexpf
115 ; CHECK-NO-NEON-NEXT: fmov s3, s0
116 ; CHECK-NO-NEON-NEXT: fmov s0, s8
117 ; CHECK-NO-NEON-NEXT: fmov s1, s9
118 ; CHECK-NO-NEON-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
119 ; CHECK-NO-NEON-NEXT: fmov s2, s10
120 ; CHECK-NO-NEON-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload
121 ; CHECK-NO-NEON-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
122 ; CHECK-NO-NEON-NEXT: ldr d10, [sp], #64 // 8-byte Folded Reload
123 ; CHECK-NO-NEON-NEXT: ret
124 %r = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, <4 x i32> %i)
128 define <4 x float> @fdiv_pow2_4xfloat(<4 x i32> %i) {
129 ; CHECK-NEON-LABEL: fdiv_pow2_4xfloat:
130 ; CHECK-NEON: // %bb.0:
131 ; CHECK-NEON-NEXT: fmov v1.4s, #9.00000000
132 ; CHECK-NEON-NEXT: shl v0.4s, v0.4s, #23
133 ; CHECK-NEON-NEXT: sub v0.4s, v1.4s, v0.4s
134 ; CHECK-NEON-NEXT: ret
136 ; CHECK-NO-NEON-LABEL: fdiv_pow2_4xfloat:
137 ; CHECK-NO-NEON: // %bb.0:
138 ; CHECK-NO-NEON-NEXT: mov w8, #1091567616 // =0x41100000
139 ; CHECK-NO-NEON-NEXT: sub w9, w8, w0, lsl #23
140 ; CHECK-NO-NEON-NEXT: sub w10, w8, w1, lsl #23
141 ; CHECK-NO-NEON-NEXT: sub w11, w8, w2, lsl #23
142 ; CHECK-NO-NEON-NEXT: sub w8, w8, w3, lsl #23
143 ; CHECK-NO-NEON-NEXT: fmov s0, w9
144 ; CHECK-NO-NEON-NEXT: fmov s1, w10
145 ; CHECK-NO-NEON-NEXT: fmov s2, w11
146 ; CHECK-NO-NEON-NEXT: fmov s3, w8
147 ; CHECK-NO-NEON-NEXT: ret
148 %p2 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %i
149 %p2_f = uitofp <4 x i32> %p2 to <4 x float>
150 %r = fdiv <4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, %p2_f
154 define double @fmul_pow_shl_cnt(i64 %cnt) nounwind {
155 ; CHECK-LABEL: fmul_pow_shl_cnt:
157 ; CHECK-NEXT: mov w8, #1 // =0x1
158 ; CHECK-NEXT: fmov d1, #9.00000000
159 ; CHECK-NEXT: lsl x8, x8, x0
160 ; CHECK-NEXT: ucvtf d0, x8
161 ; CHECK-NEXT: fmul d0, d0, d1
163 %shl = shl nuw i64 1, %cnt
164 %conv = uitofp i64 %shl to double
165 %mul = fmul double 9.000000e+00, %conv
169 define double @fmul_pow_shl_cnt2(i64 %cnt) nounwind {
170 ; CHECK-LABEL: fmul_pow_shl_cnt2:
172 ; CHECK-NEXT: mov w8, #2 // =0x2
173 ; CHECK-NEXT: fmov d1, #-9.00000000
174 ; CHECK-NEXT: lsl x8, x8, x0
175 ; CHECK-NEXT: ucvtf d0, x8
176 ; CHECK-NEXT: fmul d0, d0, d1
178 %shl = shl nuw i64 2, %cnt
179 %conv = uitofp i64 %shl to double
180 %mul = fmul double -9.000000e+00, %conv
184 define float @fmul_pow_select(i32 %cnt, i1 %c) nounwind {
185 ; CHECK-LABEL: fmul_pow_select:
187 ; CHECK-NEXT: mov w8, #1 // =0x1
188 ; CHECK-NEXT: tst w1, #0x1
189 ; CHECK-NEXT: fmov s1, #9.00000000
190 ; CHECK-NEXT: cinc w8, w8, eq
191 ; CHECK-NEXT: lsl w8, w8, w0
192 ; CHECK-NEXT: ucvtf s0, w8
193 ; CHECK-NEXT: fmul s0, s0, s1
195 %shl2 = shl nuw i32 2, %cnt
196 %shl1 = shl nuw i32 1, %cnt
197 %shl = select i1 %c, i32 %shl1, i32 %shl2
198 %conv = uitofp i32 %shl to float
199 %mul = fmul float 9.000000e+00, %conv
203 define float @fmul_fly_pow_mul_min_pow2(i64 %cnt) nounwind {
204 ; CHECK-LABEL: fmul_fly_pow_mul_min_pow2:
206 ; CHECK-NEXT: mov w8, #8 // =0x8
207 ; CHECK-NEXT: mov w9, #8192 // =0x2000
208 ; CHECK-NEXT: fmov s1, #9.00000000
209 ; CHECK-NEXT: lsl x8, x8, x0
210 ; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192
211 ; CHECK-NEXT: csel x8, x8, x9, lo
212 ; CHECK-NEXT: ucvtf s0, x8
213 ; CHECK-NEXT: fmul s0, s0, s1
215 %shl8 = shl nuw i64 8, %cnt
216 %shl = call i64 @llvm.umin.i64(i64 %shl8, i64 8192)
217 %conv = uitofp i64 %shl to float
218 %mul = fmul float 9.000000e+00, %conv
222 define double @fmul_pow_mul_max_pow2(i16 %cnt) nounwind {
223 ; CHECK-LABEL: fmul_pow_mul_max_pow2:
225 ; CHECK-NEXT: mov w8, #2 // =0x2
226 ; CHECK-NEXT: mov w9, #1 // =0x1
227 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
228 ; CHECK-NEXT: fmov d1, #3.00000000
229 ; CHECK-NEXT: lsl w8, w8, w0
230 ; CHECK-NEXT: lsl w9, w9, w0
231 ; CHECK-NEXT: and w8, w8, #0xfffe
232 ; CHECK-NEXT: and w9, w9, #0xffff
233 ; CHECK-NEXT: cmp w9, w8
234 ; CHECK-NEXT: csel w8, w9, w8, hi
235 ; CHECK-NEXT: ucvtf d0, w8
236 ; CHECK-NEXT: fmul d0, d0, d1
238 %shl2 = shl nuw i16 2, %cnt
239 %shl1 = shl nuw i16 1, %cnt
240 %shl = call i16 @llvm.umax.i16(i16 %shl1, i16 %shl2)
241 %conv = uitofp i16 %shl to double
242 %mul = fmul double 3.000000e+00, %conv
246 define double @fmul_pow_shl_cnt_fail_maybe_non_pow2(i64 %v, i64 %cnt) nounwind {
247 ; CHECK-LABEL: fmul_pow_shl_cnt_fail_maybe_non_pow2:
249 ; CHECK-NEXT: lsl x8, x0, x1
250 ; CHECK-NEXT: fmov d1, #9.00000000
251 ; CHECK-NEXT: ucvtf d0, x8
252 ; CHECK-NEXT: fmul d0, d0, d1
254 %shl = shl nuw i64 %v, %cnt
255 %conv = uitofp i64 %shl to double
256 %mul = fmul double 9.000000e+00, %conv
260 define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nounwind {
261 ; CHECK-NEON-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast:
262 ; CHECK-NEON: // %bb.0:
263 ; CHECK-NEON-NEXT: mov w8, #2 // =0x2
264 ; CHECK-NEON-NEXT: dup v1.2d, x8
265 ; CHECK-NEON-NEXT: ushl v0.2d, v1.2d, v0.2d
266 ; CHECK-NEON-NEXT: fmov v1.2s, #15.00000000
267 ; CHECK-NEON-NEXT: ucvtf v0.2d, v0.2d
268 ; CHECK-NEON-NEXT: fcvtn v0.2s, v0.2d
269 ; CHECK-NEON-NEXT: fmul v0.2s, v0.2s, v1.2s
270 ; CHECK-NEON-NEXT: ret
272 ; CHECK-NO-NEON-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast:
273 ; CHECK-NO-NEON: // %bb.0:
274 ; CHECK-NO-NEON-NEXT: mov w8, #2 // =0x2
275 ; CHECK-NO-NEON-NEXT: fmov s2, #15.00000000
276 ; CHECK-NO-NEON-NEXT: lsl x9, x8, x0
277 ; CHECK-NO-NEON-NEXT: lsl x8, x8, x1
278 ; CHECK-NO-NEON-NEXT: ucvtf s1, x8
279 ; CHECK-NO-NEON-NEXT: ucvtf s0, x9
280 ; CHECK-NO-NEON-NEXT: fmul s0, s0, s2
281 ; CHECK-NO-NEON-NEXT: fmul s1, s1, s2
282 ; CHECK-NO-NEON-NEXT: ret
283 %shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt
284 %conv = uitofp <2 x i64> %shl to <2 x float>
285 %mul = fmul <2 x float> <float 15.000000e+00, float 15.000000e+00>, %conv
289 define <2 x double> @fmul_pow_shl_cnt_vec(<2 x i64> %cnt) nounwind {
290 ; CHECK-NEON-LABEL: fmul_pow_shl_cnt_vec:
291 ; CHECK-NEON: // %bb.0:
292 ; CHECK-NEON-NEXT: mov w8, #2 // =0x2
293 ; CHECK-NEON-NEXT: dup v1.2d, x8
294 ; CHECK-NEON-NEXT: ushl v0.2d, v1.2d, v0.2d
295 ; CHECK-NEON-NEXT: fmov v1.2d, #15.00000000
296 ; CHECK-NEON-NEXT: ucvtf v0.2d, v0.2d
297 ; CHECK-NEON-NEXT: fmul v0.2d, v0.2d, v1.2d
298 ; CHECK-NEON-NEXT: ret
300 ; CHECK-NO-NEON-LABEL: fmul_pow_shl_cnt_vec:
301 ; CHECK-NO-NEON: // %bb.0:
302 ; CHECK-NO-NEON-NEXT: mov w8, #2 // =0x2
303 ; CHECK-NO-NEON-NEXT: fmov d2, #15.00000000
304 ; CHECK-NO-NEON-NEXT: lsl x9, x8, x0
305 ; CHECK-NO-NEON-NEXT: lsl x8, x8, x1
306 ; CHECK-NO-NEON-NEXT: ucvtf d1, x8
307 ; CHECK-NO-NEON-NEXT: ucvtf d0, x9
308 ; CHECK-NO-NEON-NEXT: fmul d0, d0, d2
309 ; CHECK-NO-NEON-NEXT: fmul d1, d1, d2
310 ; CHECK-NO-NEON-NEXT: ret
311 %shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt
312 %conv = uitofp <2 x i64> %shl to <2 x double>
313 %mul = fmul <2 x double> <double 15.000000e+00, double 15.000000e+00>, %conv
314 ret <2 x double> %mul
317 define <4 x float> @fmul_pow_shl_cnt_vec_preserve_fma(<4 x i32> %cnt, <4 x float> %add) nounwind {
318 ; CHECK-NEON-LABEL: fmul_pow_shl_cnt_vec_preserve_fma:
319 ; CHECK-NEON: // %bb.0:
320 ; CHECK-NEON-NEXT: movi v2.4s, #2
321 ; CHECK-NEON-NEXT: ushl v0.4s, v2.4s, v0.4s
322 ; CHECK-NEON-NEXT: fmov v2.4s, #5.00000000
323 ; CHECK-NEON-NEXT: ucvtf v0.4s, v0.4s
324 ; CHECK-NEON-NEXT: fmul v0.4s, v0.4s, v2.4s
325 ; CHECK-NEON-NEXT: fadd v0.4s, v0.4s, v1.4s
326 ; CHECK-NEON-NEXT: ret
328 ; CHECK-NO-NEON-LABEL: fmul_pow_shl_cnt_vec_preserve_fma:
329 ; CHECK-NO-NEON: // %bb.0:
330 ; CHECK-NO-NEON-NEXT: mov w8, #2 // =0x2
331 ; CHECK-NO-NEON-NEXT: fmov s16, #5.00000000
332 ; CHECK-NO-NEON-NEXT: lsl w9, w8, w3
333 ; CHECK-NO-NEON-NEXT: lsl w10, w8, w0
334 ; CHECK-NO-NEON-NEXT: lsl w11, w8, w2
335 ; CHECK-NO-NEON-NEXT: lsl w8, w8, w1
336 ; CHECK-NO-NEON-NEXT: ucvtf s4, w10
337 ; CHECK-NO-NEON-NEXT: ucvtf s5, w9
338 ; CHECK-NO-NEON-NEXT: ucvtf s7, w11
339 ; CHECK-NO-NEON-NEXT: ucvtf s6, w8
340 ; CHECK-NO-NEON-NEXT: fmul s5, s5, s16
341 ; CHECK-NO-NEON-NEXT: fmul s4, s4, s16
342 ; CHECK-NO-NEON-NEXT: fmul s7, s7, s16
343 ; CHECK-NO-NEON-NEXT: fmul s6, s6, s16
344 ; CHECK-NO-NEON-NEXT: fadd s0, s4, s0
345 ; CHECK-NO-NEON-NEXT: fadd s3, s5, s3
346 ; CHECK-NO-NEON-NEXT: fadd s1, s6, s1
347 ; CHECK-NO-NEON-NEXT: fadd s2, s7, s2
348 ; CHECK-NO-NEON-NEXT: ret
349 %shl = shl nsw nuw <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %cnt
350 %conv = uitofp <4 x i32> %shl to <4 x float>
351 %mul = fmul <4 x float> <float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00>, %conv
352 %res = fadd <4 x float> %mul, %add
356 define <2 x double> @fmul_pow_shl_cnt_vec_non_splat_todo(<2 x i64> %cnt) nounwind {
357 ; CHECK-NEON-LABEL: fmul_pow_shl_cnt_vec_non_splat_todo:
358 ; CHECK-NEON: // %bb.0:
359 ; CHECK-NEON-NEXT: mov w8, #2 // =0x2
360 ; CHECK-NEON-NEXT: dup v1.2d, x8
361 ; CHECK-NEON-NEXT: adrp x8, .LCPI12_0
362 ; CHECK-NEON-NEXT: ushl v0.2d, v1.2d, v0.2d
363 ; CHECK-NEON-NEXT: ldr q1, [x8, :lo12:.LCPI12_0]
364 ; CHECK-NEON-NEXT: ucvtf v0.2d, v0.2d
365 ; CHECK-NEON-NEXT: fmul v0.2d, v0.2d, v1.2d
366 ; CHECK-NEON-NEXT: ret
368 ; CHECK-NO-NEON-LABEL: fmul_pow_shl_cnt_vec_non_splat_todo:
369 ; CHECK-NO-NEON: // %bb.0:
370 ; CHECK-NO-NEON-NEXT: mov w8, #2 // =0x2
371 ; CHECK-NO-NEON-NEXT: fmov d2, #15.00000000
372 ; CHECK-NO-NEON-NEXT: fmov d3, #14.00000000
373 ; CHECK-NO-NEON-NEXT: lsl x9, x8, x0
374 ; CHECK-NO-NEON-NEXT: lsl x8, x8, x1
375 ; CHECK-NO-NEON-NEXT: ucvtf d1, x8
376 ; CHECK-NO-NEON-NEXT: ucvtf d0, x9
377 ; CHECK-NO-NEON-NEXT: fmul d0, d0, d2
378 ; CHECK-NO-NEON-NEXT: fmul d1, d1, d3
379 ; CHECK-NO-NEON-NEXT: ret
380 %shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt
381 %conv = uitofp <2 x i64> %shl to <2 x double>
382 %mul = fmul <2 x double> <double 15.000000e+00, double 14.000000e+00>, %conv
383 ret <2 x double> %mul
386 define <2 x double> @fmul_pow_shl_cnt_vec_non_splat2_todo(<2 x i64> %cnt) nounwind {
387 ; CHECK-NEON-LABEL: fmul_pow_shl_cnt_vec_non_splat2_todo:
388 ; CHECK-NEON: // %bb.0:
389 ; CHECK-NEON-NEXT: adrp x8, .LCPI13_0
390 ; CHECK-NEON-NEXT: ldr q1, [x8, :lo12:.LCPI13_0]
391 ; CHECK-NEON-NEXT: ushl v0.2d, v1.2d, v0.2d
392 ; CHECK-NEON-NEXT: fmov v1.2d, #15.00000000
393 ; CHECK-NEON-NEXT: ucvtf v0.2d, v0.2d
394 ; CHECK-NEON-NEXT: fmul v0.2d, v0.2d, v1.2d
395 ; CHECK-NEON-NEXT: ret
397 ; CHECK-NO-NEON-LABEL: fmul_pow_shl_cnt_vec_non_splat2_todo:
398 ; CHECK-NO-NEON: // %bb.0:
399 ; CHECK-NO-NEON-NEXT: mov w8, #2 // =0x2
400 ; CHECK-NO-NEON-NEXT: mov w9, #1 // =0x1
401 ; CHECK-NO-NEON-NEXT: fmov d2, #15.00000000
402 ; CHECK-NO-NEON-NEXT: lsl x8, x8, x0
403 ; CHECK-NO-NEON-NEXT: lsl x9, x9, x1
404 ; CHECK-NO-NEON-NEXT: ucvtf d1, x9
405 ; CHECK-NO-NEON-NEXT: ucvtf d0, x8
406 ; CHECK-NO-NEON-NEXT: fmul d0, d0, d2
407 ; CHECK-NO-NEON-NEXT: fmul d1, d1, d2
408 ; CHECK-NO-NEON-NEXT: ret
409 %shl = shl nsw nuw <2 x i64> <i64 2, i64 1>, %cnt
410 %conv = uitofp <2 x i64> %shl to <2 x double>
411 %mul = fmul <2 x double> <double 15.000000e+00, double 15.000000e+00>, %conv
412 ret <2 x double> %mul
416 define double @fmul_pow_shl_cnt_fail_maybe_bad_exp(i64 %cnt) nounwind {
417 ; CHECK-LABEL: fmul_pow_shl_cnt_fail_maybe_bad_exp:
419 ; CHECK-NEXT: mov w8, #1 // =0x1
420 ; CHECK-NEXT: lsl x8, x8, x0
421 ; CHECK-NEXT: ucvtf d0, x8
422 ; CHECK-NEXT: adrp x8, .LCPI14_0
423 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI14_0]
424 ; CHECK-NEXT: fmul d0, d0, d1
426 %shl = shl nuw i64 1, %cnt
427 %conv = uitofp i64 %shl to double
428 %mul = fmul double 9.745314e+288, %conv
432 define double @fmul_pow_shl_cnt_safe(i16 %cnt) nounwind {
433 ; CHECK-LABEL: fmul_pow_shl_cnt_safe:
435 ; CHECK-NEXT: mov w8, #1 // =0x1
436 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
437 ; CHECK-NEXT: lsl w8, w8, w0
438 ; CHECK-NEXT: and w8, w8, #0xffff
439 ; CHECK-NEXT: ucvtf d0, w8
440 ; CHECK-NEXT: adrp x8, .LCPI15_0
441 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI15_0]
442 ; CHECK-NEXT: fmul d0, d0, d1
444 %shl = shl nuw i16 1, %cnt
445 %conv = uitofp i16 %shl to double
446 %mul = fmul double 9.745314e+288, %conv
450 define <2 x double> @fdiv_pow_shl_cnt_vec(<2 x i64> %cnt) nounwind {
451 ; CHECK-NEON-LABEL: fdiv_pow_shl_cnt_vec:
452 ; CHECK-NEON: // %bb.0:
453 ; CHECK-NEON-NEXT: fmov v1.2d, #1.00000000
454 ; CHECK-NEON-NEXT: shl v0.2d, v0.2d, #52
455 ; CHECK-NEON-NEXT: sub v0.2d, v1.2d, v0.2d
456 ; CHECK-NEON-NEXT: ret
458 ; CHECK-NO-NEON-LABEL: fdiv_pow_shl_cnt_vec:
459 ; CHECK-NO-NEON: // %bb.0:
460 ; CHECK-NO-NEON-NEXT: mov x8, #4607182418800017408 // =0x3ff0000000000000
461 ; CHECK-NO-NEON-NEXT: sub x9, x8, x0, lsl #52
462 ; CHECK-NO-NEON-NEXT: sub x8, x8, x1, lsl #52
463 ; CHECK-NO-NEON-NEXT: fmov d0, x9
464 ; CHECK-NO-NEON-NEXT: fmov d1, x8
465 ; CHECK-NO-NEON-NEXT: ret
466 %shl = shl nuw <2 x i64> <i64 1, i64 1>, %cnt
467 %conv = uitofp <2 x i64> %shl to <2 x double>
468 %mul = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, %conv
469 ret <2 x double> %mul
472 define <2 x float> @fdiv_pow_shl_cnt_vec_with_expensive_cast(<2 x i64> %cnt) nounwind {
473 ; CHECK-NEON-LABEL: fdiv_pow_shl_cnt_vec_with_expensive_cast:
474 ; CHECK-NEON: // %bb.0:
475 ; CHECK-NEON-NEXT: xtn v0.2s, v0.2d
476 ; CHECK-NEON-NEXT: fmov v1.2s, #1.00000000
477 ; CHECK-NEON-NEXT: shl v0.2s, v0.2s, #23
478 ; CHECK-NEON-NEXT: sub v0.2s, v1.2s, v0.2s
479 ; CHECK-NEON-NEXT: ret
481 ; CHECK-NO-NEON-LABEL: fdiv_pow_shl_cnt_vec_with_expensive_cast:
482 ; CHECK-NO-NEON: // %bb.0:
483 ; CHECK-NO-NEON-NEXT: mov w8, #1065353216 // =0x3f800000
484 ; CHECK-NO-NEON-NEXT: sub w9, w8, w0, lsl #23
485 ; CHECK-NO-NEON-NEXT: sub w8, w8, w1, lsl #23
486 ; CHECK-NO-NEON-NEXT: fmov s0, w9
487 ; CHECK-NO-NEON-NEXT: fmov s1, w8
488 ; CHECK-NO-NEON-NEXT: ret
489 %shl = shl nuw <2 x i64> <i64 1, i64 1>, %cnt
490 %conv = uitofp <2 x i64> %shl to <2 x float>
491 %mul = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %conv
495 define float @fdiv_pow_shl_cnt_fail_maybe_z(i64 %cnt) nounwind {
496 ; CHECK-LABEL: fdiv_pow_shl_cnt_fail_maybe_z:
498 ; CHECK-NEXT: mov w8, #8 // =0x8
499 ; CHECK-NEXT: fmov s1, #-9.00000000
500 ; CHECK-NEXT: lsl x8, x8, x0
501 ; CHECK-NEXT: ucvtf s0, x8
502 ; CHECK-NEXT: fdiv s0, s1, s0
504 %shl = shl i64 8, %cnt
505 %conv = uitofp i64 %shl to float
506 %mul = fdiv float -9.000000e+00, %conv
510 define float @fdiv_pow_shl_cnt_fail_neg_int(i64 %cnt) nounwind {
511 ; CHECK-LABEL: fdiv_pow_shl_cnt_fail_neg_int:
513 ; CHECK-NEXT: mov w8, #8 // =0x8
514 ; CHECK-NEXT: fmov s1, #-9.00000000
515 ; CHECK-NEXT: lsl x8, x8, x0
516 ; CHECK-NEXT: scvtf s0, x8
517 ; CHECK-NEXT: fdiv s0, s1, s0
519 %shl = shl i64 8, %cnt
520 %conv = sitofp i64 %shl to float
521 %mul = fdiv float -9.000000e+00, %conv
525 define float @fdiv_pow_shl_cnt(i64 %cnt_in) nounwind {
526 ; CHECK-LABEL: fdiv_pow_shl_cnt:
528 ; CHECK-NEXT: mov w8, #8 // =0x8
529 ; CHECK-NEXT: and x9, x0, #0x1f
530 ; CHECK-NEXT: fmov s1, #-0.50000000
531 ; CHECK-NEXT: lsl x8, x8, x9
532 ; CHECK-NEXT: scvtf s0, x8
533 ; CHECK-NEXT: fdiv s0, s1, s0
535 %cnt = and i64 %cnt_in, 31
536 %shl = shl i64 8, %cnt
537 %conv = sitofp i64 %shl to float
538 %mul = fdiv float -0.500000e+00, %conv
542 define double @fdiv_pow_shl_cnt32_to_dbl_okay(i32 %cnt) nounwind {
543 ; CHECK-LABEL: fdiv_pow_shl_cnt32_to_dbl_okay:
545 ; CHECK-NEXT: mov x8, #3936146074321813504 // =0x36a0000000000000
546 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
547 ; CHECK-NEXT: sub x8, x8, x0, lsl #52
548 ; CHECK-NEXT: fmov d0, x8
550 %shl = shl nuw i32 1, %cnt
551 %conv = uitofp i32 %shl to double
552 %mul = fdiv double 0x36A0000000000000, %conv
556 define float @fdiv_pow_shl_cnt32_out_of_bounds2(i32 %cnt) nounwind {
557 ; CHECK-LABEL: fdiv_pow_shl_cnt32_out_of_bounds2:
559 ; CHECK-NEXT: mov w8, #1 // =0x1
560 ; CHECK-NEXT: lsl w8, w8, w0
561 ; CHECK-NEXT: ucvtf s0, w8
562 ; CHECK-NEXT: mov w8, #65528 // =0xfff8
563 ; CHECK-NEXT: movk w8, #4351, lsl #16
564 ; CHECK-NEXT: fmov s1, w8
565 ; CHECK-NEXT: fdiv s0, s1, s0
567 %shl = shl nuw i32 1, %cnt
568 %conv = uitofp i32 %shl to float
569 %mul = fdiv float 0x3a1fffff00000000, %conv
573 define float @fdiv_pow_shl_cnt32_okay(i32 %cnt) nounwind {
574 ; CHECK-LABEL: fdiv_pow_shl_cnt32_okay:
576 ; CHECK-NEXT: mov w8, #285212672 // =0x11000000
577 ; CHECK-NEXT: sub w8, w8, w0, lsl #23
578 ; CHECK-NEXT: fmov s0, w8
580 %shl = shl nuw i32 1, %cnt
581 %conv = uitofp i32 %shl to float
582 %mul = fdiv float 0x3a20000000000000, %conv
586 define fastcc i1 @quantum_hadamard(i32 %0) {
587 ; CHECK-LABEL: quantum_hadamard:
589 ; CHECK-NEXT: mov x8, #4607182418800017408 // =0x3ff0000000000000
590 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
591 ; CHECK-NEXT: sub x8, x8, x0, lsl #52
592 ; CHECK-NEXT: fmov d0, x8
593 ; CHECK-NEXT: fcvt s0, d0
594 ; CHECK-NEXT: fcmp s0, #0.0
595 ; CHECK-NEXT: cset w0, gt
597 %2 = zext i32 %0 to i64
599 %4 = uitofp i64 %3 to double
600 %5 = fdiv double 1.000000e+00, %4
601 %6 = fptrunc double %5 to float
602 %7 = fcmp olt float 0.000000e+00, %6
606 define <vscale x 4 x float> @fdiv_pow2_nx4xfloat(<vscale x 4 x i32> %i) "target-features"="+sve" {
607 ; CHECK-LABEL: fdiv_pow2_nx4xfloat:
609 ; CHECK-NEXT: ptrue p0.s
610 ; CHECK-NEXT: mov z1.s, #1 // =0x1
611 ; CHECK-NEXT: lslr z0.s, p0/m, z0.s, z1.s
612 ; CHECK-NEXT: fmov z1.s, #9.00000000
613 ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
614 ; CHECK-NEXT: fdivr z0.s, p0/m, z0.s, z1.s
616 %p2 = shl <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %i
617 %p2_f = uitofp <vscale x 4 x i32> %p2 to <vscale x 4 x float>
618 %r = fdiv <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 9.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), %p2_f
619 ret <vscale x 4 x float> %r
622 define <vscale x 2 x double> @scalable2(<vscale x 2 x i64> %0) "target-features"="+sve" {
623 ; CHECK-LABEL: scalable2:
625 ; CHECK-NEXT: ptrue p0.d
626 ; CHECK-NEXT: fmov z1.d, #1.00000000
627 ; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
628 ; CHECK-NEXT: fdivr z0.d, p0/m, z0.d, z1.d
630 %2 = uitofp <vscale x 2 x i64> %0 to <vscale x 2 x double>
631 %3 = fdiv <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 1.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer), %2
632 ret <vscale x 2 x double> %3