1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc --mtriple=loongarch32 --mattr=+f,-d,-frecipe < %s | FileCheck %s --check-prefix=LA32F
3 ; RUN: llc --mtriple=loongarch32 --mattr=+f,-d,+frecipe < %s | FileCheck %s --check-prefix=LA32F-FRECIPE
4 ; RUN: llc --mtriple=loongarch64 --mattr=+d,-frecipe < %s | FileCheck %s --check-prefix=LA64D
5 ; RUN: llc --mtriple=loongarch64 --mattr=+d,+frecipe < %s | FileCheck %s --check-prefix=LA64D-FRECIPE
8 declare float @llvm.sqrt.f32(float)
9 declare double @llvm.sqrt.f64(double)
11 define float @frsqrt_f32(float %a) nounwind {
12 ; LA32F-LABEL: frsqrt_f32:
14 ; LA32F-NEXT: frsqrt.s $fa0, $fa0
17 ; LA32F-FRECIPE-LABEL: frsqrt_f32:
18 ; LA32F-FRECIPE: # %bb.0:
19 ; LA32F-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
20 ; LA32F-FRECIPE-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
21 ; LA32F-FRECIPE-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI0_0)
22 ; LA32F-FRECIPE-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_1)
23 ; LA32F-FRECIPE-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI0_1)
24 ; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
25 ; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1
26 ; LA32F-FRECIPE-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
27 ; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3
28 ; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa1, $fa0
29 ; LA32F-FRECIPE-NEXT: ret
31 ; LA64D-LABEL: frsqrt_f32:
33 ; LA64D-NEXT: frsqrt.s $fa0, $fa0
36 ; LA64D-FRECIPE-LABEL: frsqrt_f32:
37 ; LA64D-FRECIPE: # %bb.0:
38 ; LA64D-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
39 ; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
40 ; LA64D-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1
41 ; LA64D-FRECIPE-NEXT: vldi $vr2, -1144
42 ; LA64D-FRECIPE-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
43 ; LA64D-FRECIPE-NEXT: vldi $vr2, -1056
44 ; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2
45 ; LA64D-FRECIPE-NEXT: fmul.s $fa0, $fa1, $fa0
46 ; LA64D-FRECIPE-NEXT: ret
48 %1 = call fast float @llvm.sqrt.f32(float %a)
49 %2 = fdiv fast float 1.0, %1
53 define double @frsqrt_f64(double %a) nounwind {
54 ; LA32F-LABEL: frsqrt_f64:
56 ; LA32F-NEXT: addi.w $sp, $sp, -16
57 ; LA32F-NEXT: st.w $ra, $sp, 12
58 ; LA32F-NEXT: bl %plt(sqrt)
59 ; LA32F-NEXT: move $a2, $a0
60 ; LA32F-NEXT: move $a3, $a1
61 ; LA32F-NEXT: lu12i.w $a1, 261888
62 ; LA32F-NEXT: move $a0, $zero
63 ; LA32F-NEXT: bl %plt(__divdf3)
64 ; LA32F-NEXT: ld.w $ra, $sp, 12
65 ; LA32F-NEXT: addi.w $sp, $sp, 16
68 ; LA32F-FRECIPE-LABEL: frsqrt_f64:
69 ; LA32F-FRECIPE: # %bb.0:
70 ; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -16
71 ; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
72 ; LA32F-FRECIPE-NEXT: bl %plt(sqrt)
73 ; LA32F-FRECIPE-NEXT: move $a2, $a0
74 ; LA32F-FRECIPE-NEXT: move $a3, $a1
75 ; LA32F-FRECIPE-NEXT: lu12i.w $a1, 261888
76 ; LA32F-FRECIPE-NEXT: move $a0, $zero
77 ; LA32F-FRECIPE-NEXT: bl %plt(__divdf3)
78 ; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
79 ; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 16
80 ; LA32F-FRECIPE-NEXT: ret
82 ; LA64D-LABEL: frsqrt_f64:
84 ; LA64D-NEXT: frsqrt.d $fa0, $fa0
87 ; LA64D-FRECIPE-LABEL: frsqrt_f64:
88 ; LA64D-FRECIPE: # %bb.0:
89 ; LA64D-FRECIPE-NEXT: frsqrte.d $fa1, $fa0
90 ; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa0, $fa1
91 ; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1
92 ; LA64D-FRECIPE-NEXT: vldi $vr3, -888
93 ; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3
94 ; LA64D-FRECIPE-NEXT: vldi $vr4, -800
95 ; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4
96 ; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2
97 ; LA64D-FRECIPE-NEXT: fmul.d $fa0, $fa0, $fa1
98 ; LA64D-FRECIPE-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa3
99 ; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4
100 ; LA64D-FRECIPE-NEXT: fmul.d $fa0, $fa1, $fa0
101 ; LA64D-FRECIPE-NEXT: ret
102 %1 = call fast double @llvm.sqrt.f64(double %a)
103 %2 = fdiv fast double 1.0, %1
107 define double @sqrt_simplify_before_recip_3_uses_f64(double %x, ptr %p1, ptr %p2) nounwind {
108 ; LA32F-LABEL: sqrt_simplify_before_recip_3_uses_f64:
110 ; LA32F-NEXT: addi.w $sp, $sp, -32
111 ; LA32F-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
112 ; LA32F-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
113 ; LA32F-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
114 ; LA32F-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill
115 ; LA32F-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill
116 ; LA32F-NEXT: st.w $s3, $sp, 8 # 4-byte Folded Spill
117 ; LA32F-NEXT: st.w $s4, $sp, 4 # 4-byte Folded Spill
118 ; LA32F-NEXT: move $fp, $a3
119 ; LA32F-NEXT: move $s0, $a2
120 ; LA32F-NEXT: bl %plt(sqrt)
121 ; LA32F-NEXT: move $s1, $a0
122 ; LA32F-NEXT: move $s2, $a1
123 ; LA32F-NEXT: lu12i.w $a1, 261888
124 ; LA32F-NEXT: move $a0, $zero
125 ; LA32F-NEXT: move $a2, $s1
126 ; LA32F-NEXT: move $a3, $s2
127 ; LA32F-NEXT: bl %plt(__divdf3)
128 ; LA32F-NEXT: move $s3, $a0
129 ; LA32F-NEXT: move $s4, $a1
130 ; LA32F-NEXT: lu12i.w $a1, 263248
131 ; LA32F-NEXT: move $a0, $zero
132 ; LA32F-NEXT: move $a2, $s1
133 ; LA32F-NEXT: move $a3, $s2
134 ; LA32F-NEXT: bl %plt(__divdf3)
135 ; LA32F-NEXT: st.w $s3, $s0, 0
136 ; LA32F-NEXT: st.w $s4, $s0, 4
137 ; LA32F-NEXT: st.w $a0, $fp, 0
138 ; LA32F-NEXT: st.w $a1, $fp, 4
139 ; LA32F-NEXT: move $a0, $s1
140 ; LA32F-NEXT: move $a1, $s2
141 ; LA32F-NEXT: ld.w $s4, $sp, 4 # 4-byte Folded Reload
142 ; LA32F-NEXT: ld.w $s3, $sp, 8 # 4-byte Folded Reload
143 ; LA32F-NEXT: ld.w $s2, $sp, 12 # 4-byte Folded Reload
144 ; LA32F-NEXT: ld.w $s1, $sp, 16 # 4-byte Folded Reload
145 ; LA32F-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload
146 ; LA32F-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
147 ; LA32F-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
148 ; LA32F-NEXT: addi.w $sp, $sp, 32
151 ; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_f64:
152 ; LA32F-FRECIPE: # %bb.0:
153 ; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -32
154 ; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
155 ; LA32F-FRECIPE-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
156 ; LA32F-FRECIPE-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
157 ; LA32F-FRECIPE-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill
158 ; LA32F-FRECIPE-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill
159 ; LA32F-FRECIPE-NEXT: st.w $s3, $sp, 8 # 4-byte Folded Spill
160 ; LA32F-FRECIPE-NEXT: st.w $s4, $sp, 4 # 4-byte Folded Spill
161 ; LA32F-FRECIPE-NEXT: move $fp, $a3
162 ; LA32F-FRECIPE-NEXT: move $s0, $a2
163 ; LA32F-FRECIPE-NEXT: bl %plt(sqrt)
164 ; LA32F-FRECIPE-NEXT: move $s1, $a0
165 ; LA32F-FRECIPE-NEXT: move $s2, $a1
166 ; LA32F-FRECIPE-NEXT: lu12i.w $a1, 261888
167 ; LA32F-FRECIPE-NEXT: move $a0, $zero
168 ; LA32F-FRECIPE-NEXT: move $a2, $s1
169 ; LA32F-FRECIPE-NEXT: move $a3, $s2
170 ; LA32F-FRECIPE-NEXT: bl %plt(__divdf3)
171 ; LA32F-FRECIPE-NEXT: move $s3, $a0
172 ; LA32F-FRECIPE-NEXT: move $s4, $a1
173 ; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263248
174 ; LA32F-FRECIPE-NEXT: move $a0, $zero
175 ; LA32F-FRECIPE-NEXT: move $a2, $s1
176 ; LA32F-FRECIPE-NEXT: move $a3, $s2
177 ; LA32F-FRECIPE-NEXT: bl %plt(__divdf3)
178 ; LA32F-FRECIPE-NEXT: st.w $s3, $s0, 0
179 ; LA32F-FRECIPE-NEXT: st.w $s4, $s0, 4
180 ; LA32F-FRECIPE-NEXT: st.w $a0, $fp, 0
181 ; LA32F-FRECIPE-NEXT: st.w $a1, $fp, 4
182 ; LA32F-FRECIPE-NEXT: move $a0, $s1
183 ; LA32F-FRECIPE-NEXT: move $a1, $s2
184 ; LA32F-FRECIPE-NEXT: ld.w $s4, $sp, 4 # 4-byte Folded Reload
185 ; LA32F-FRECIPE-NEXT: ld.w $s3, $sp, 8 # 4-byte Folded Reload
186 ; LA32F-FRECIPE-NEXT: ld.w $s2, $sp, 12 # 4-byte Folded Reload
187 ; LA32F-FRECIPE-NEXT: ld.w $s1, $sp, 16 # 4-byte Folded Reload
188 ; LA32F-FRECIPE-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload
189 ; LA32F-FRECIPE-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
190 ; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
191 ; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 32
192 ; LA32F-FRECIPE-NEXT: ret
194 ; LA64D-LABEL: sqrt_simplify_before_recip_3_uses_f64:
196 ; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_0)
197 ; LA64D-NEXT: fld.d $fa2, $a2, %pc_lo12(.LCPI2_0)
198 ; LA64D-NEXT: fsqrt.d $fa1, $fa0
199 ; LA64D-NEXT: frsqrt.d $fa0, $fa0
200 ; LA64D-NEXT: fdiv.d $fa2, $fa2, $fa1
201 ; LA64D-NEXT: fst.d $fa0, $a0, 0
202 ; LA64D-NEXT: fst.d $fa2, $a1, 0
203 ; LA64D-NEXT: fmov.d $fa0, $fa1
206 ; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_f64:
207 ; LA64D-FRECIPE: # %bb.0:
208 ; LA64D-FRECIPE-NEXT: frsqrte.d $fa1, $fa0
209 ; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa0, $fa1
210 ; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1
211 ; LA64D-FRECIPE-NEXT: vldi $vr3, -888
212 ; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3
213 ; LA64D-FRECIPE-NEXT: vldi $vr4, -800
214 ; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4
215 ; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2
216 ; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1
217 ; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_0)
218 ; LA64D-FRECIPE-NEXT: fld.d $fa5, $a2, %pc_lo12(.LCPI2_0)
219 ; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3
220 ; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4
221 ; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2
222 ; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa1, $fa5
223 ; LA64D-FRECIPE-NEXT: fmul.d $fa0, $fa0, $fa1
224 ; LA64D-FRECIPE-NEXT: fst.d $fa1, $a0, 0
225 ; LA64D-FRECIPE-NEXT: fst.d $fa2, $a1, 0
226 ; LA64D-FRECIPE-NEXT: ret
227 %sqrt = tail call fast double @llvm.sqrt.f64(double %x)
228 %rsqrt = fdiv fast double 1.0, %sqrt
229 %r = fdiv fast double 42.0, %sqrt
230 %sqrt_fast = fdiv fast double %x, %sqrt
231 store double %rsqrt, ptr %p1, align 8
232 store double %r, ptr %p2, align 8
233 ret double %sqrt_fast
237 define double @sqrt_simplify_before_recip_3_uses_order_f64(double %x, ptr %p1, ptr %p2) nounwind {
238 ; LA32F-LABEL: sqrt_simplify_before_recip_3_uses_order_f64:
240 ; LA32F-NEXT: addi.w $sp, $sp, -32
241 ; LA32F-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
242 ; LA32F-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
243 ; LA32F-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
244 ; LA32F-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill
245 ; LA32F-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill
246 ; LA32F-NEXT: st.w $s3, $sp, 8 # 4-byte Folded Spill
247 ; LA32F-NEXT: st.w $s4, $sp, 4 # 4-byte Folded Spill
248 ; LA32F-NEXT: move $fp, $a3
249 ; LA32F-NEXT: move $s0, $a2
250 ; LA32F-NEXT: bl %plt(sqrt)
251 ; LA32F-NEXT: move $s1, $a0
252 ; LA32F-NEXT: move $s2, $a1
253 ; LA32F-NEXT: lu12i.w $a1, 263248
254 ; LA32F-NEXT: move $a0, $zero
255 ; LA32F-NEXT: move $a2, $s1
256 ; LA32F-NEXT: move $a3, $s2
257 ; LA32F-NEXT: bl %plt(__divdf3)
258 ; LA32F-NEXT: move $s3, $a0
259 ; LA32F-NEXT: move $s4, $a1
260 ; LA32F-NEXT: lu12i.w $a1, 263256
261 ; LA32F-NEXT: move $a0, $zero
262 ; LA32F-NEXT: move $a2, $s1
263 ; LA32F-NEXT: move $a3, $s2
264 ; LA32F-NEXT: bl %plt(__divdf3)
265 ; LA32F-NEXT: st.w $s3, $s0, 0
266 ; LA32F-NEXT: st.w $s4, $s0, 4
267 ; LA32F-NEXT: st.w $a0, $fp, 0
268 ; LA32F-NEXT: st.w $a1, $fp, 4
269 ; LA32F-NEXT: move $a0, $s1
270 ; LA32F-NEXT: move $a1, $s2
271 ; LA32F-NEXT: ld.w $s4, $sp, 4 # 4-byte Folded Reload
272 ; LA32F-NEXT: ld.w $s3, $sp, 8 # 4-byte Folded Reload
273 ; LA32F-NEXT: ld.w $s2, $sp, 12 # 4-byte Folded Reload
274 ; LA32F-NEXT: ld.w $s1, $sp, 16 # 4-byte Folded Reload
275 ; LA32F-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload
276 ; LA32F-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
277 ; LA32F-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
278 ; LA32F-NEXT: addi.w $sp, $sp, 32
281 ; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_order_f64:
282 ; LA32F-FRECIPE: # %bb.0:
283 ; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -32
284 ; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
285 ; LA32F-FRECIPE-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
286 ; LA32F-FRECIPE-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
287 ; LA32F-FRECIPE-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill
288 ; LA32F-FRECIPE-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill
289 ; LA32F-FRECIPE-NEXT: st.w $s3, $sp, 8 # 4-byte Folded Spill
290 ; LA32F-FRECIPE-NEXT: st.w $s4, $sp, 4 # 4-byte Folded Spill
291 ; LA32F-FRECIPE-NEXT: move $fp, $a3
292 ; LA32F-FRECIPE-NEXT: move $s0, $a2
293 ; LA32F-FRECIPE-NEXT: bl %plt(sqrt)
294 ; LA32F-FRECIPE-NEXT: move $s1, $a0
295 ; LA32F-FRECIPE-NEXT: move $s2, $a1
296 ; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263248
297 ; LA32F-FRECIPE-NEXT: move $a0, $zero
298 ; LA32F-FRECIPE-NEXT: move $a2, $s1
299 ; LA32F-FRECIPE-NEXT: move $a3, $s2
300 ; LA32F-FRECIPE-NEXT: bl %plt(__divdf3)
301 ; LA32F-FRECIPE-NEXT: move $s3, $a0
302 ; LA32F-FRECIPE-NEXT: move $s4, $a1
303 ; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263256
304 ; LA32F-FRECIPE-NEXT: move $a0, $zero
305 ; LA32F-FRECIPE-NEXT: move $a2, $s1
306 ; LA32F-FRECIPE-NEXT: move $a3, $s2
307 ; LA32F-FRECIPE-NEXT: bl %plt(__divdf3)
308 ; LA32F-FRECIPE-NEXT: st.w $s3, $s0, 0
309 ; LA32F-FRECIPE-NEXT: st.w $s4, $s0, 4
310 ; LA32F-FRECIPE-NEXT: st.w $a0, $fp, 0
311 ; LA32F-FRECIPE-NEXT: st.w $a1, $fp, 4
312 ; LA32F-FRECIPE-NEXT: move $a0, $s1
313 ; LA32F-FRECIPE-NEXT: move $a1, $s2
314 ; LA32F-FRECIPE-NEXT: ld.w $s4, $sp, 4 # 4-byte Folded Reload
315 ; LA32F-FRECIPE-NEXT: ld.w $s3, $sp, 8 # 4-byte Folded Reload
316 ; LA32F-FRECIPE-NEXT: ld.w $s2, $sp, 12 # 4-byte Folded Reload
317 ; LA32F-FRECIPE-NEXT: ld.w $s1, $sp, 16 # 4-byte Folded Reload
318 ; LA32F-FRECIPE-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload
319 ; LA32F-FRECIPE-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
320 ; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
321 ; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 32
322 ; LA32F-FRECIPE-NEXT: ret
324 ; LA64D-LABEL: sqrt_simplify_before_recip_3_uses_order_f64:
326 ; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0)
327 ; LA64D-NEXT: fld.d $fa1, $a2, %pc_lo12(.LCPI3_0)
328 ; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_1)
329 ; LA64D-NEXT: fld.d $fa2, $a2, %pc_lo12(.LCPI3_1)
330 ; LA64D-NEXT: fsqrt.d $fa0, $fa0
331 ; LA64D-NEXT: fdiv.d $fa1, $fa1, $fa0
332 ; LA64D-NEXT: fdiv.d $fa2, $fa2, $fa0
333 ; LA64D-NEXT: fst.d $fa1, $a0, 0
334 ; LA64D-NEXT: fst.d $fa2, $a1, 0
337 ; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_order_f64:
338 ; LA64D-FRECIPE: # %bb.0:
339 ; LA64D-FRECIPE-NEXT: frsqrte.d $fa1, $fa0
340 ; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa0, $fa1
341 ; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1
342 ; LA64D-FRECIPE-NEXT: vldi $vr3, -888
343 ; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3
344 ; LA64D-FRECIPE-NEXT: vldi $vr4, -800
345 ; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4
346 ; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2
347 ; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1
348 ; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3
349 ; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4
350 ; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0)
351 ; LA64D-FRECIPE-NEXT: fld.d $fa3, $a2, %pc_lo12(.LCPI3_0)
352 ; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_1)
353 ; LA64D-FRECIPE-NEXT: fld.d $fa4, $a2, %pc_lo12(.LCPI3_1)
354 ; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2
355 ; LA64D-FRECIPE-NEXT: fmul.d $fa0, $fa0, $fa1
356 ; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa1, $fa3
357 ; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4
358 ; LA64D-FRECIPE-NEXT: fst.d $fa2, $a0, 0
359 ; LA64D-FRECIPE-NEXT: fst.d $fa1, $a1, 0
360 ; LA64D-FRECIPE-NEXT: ret
361 %sqrt = tail call fast double @llvm.sqrt.f64(double %x)
362 %sqrt_fast = fdiv fast double %x, %sqrt
363 %r1 = fdiv fast double 42.0, %sqrt
364 %r2 = fdiv fast double 43.0, %sqrt
365 store double %r1, ptr %p1, align 8
366 store double %r2, ptr %p2, align 8
367 ret double %sqrt_fast
370 define double @sqrt_simplify_before_recip_4_uses_f64(double %x, ptr %p1, ptr %p2, ptr %p3) nounwind {
371 ; LA32F-LABEL: sqrt_simplify_before_recip_4_uses_f64:
373 ; LA32F-NEXT: addi.w $sp, $sp, -48
374 ; LA32F-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill
375 ; LA32F-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill
376 ; LA32F-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill
377 ; LA32F-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill
378 ; LA32F-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill
379 ; LA32F-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill
380 ; LA32F-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill
381 ; LA32F-NEXT: st.w $s5, $sp, 16 # 4-byte Folded Spill
382 ; LA32F-NEXT: st.w $s6, $sp, 12 # 4-byte Folded Spill
383 ; LA32F-NEXT: st.w $s7, $sp, 8 # 4-byte Folded Spill
384 ; LA32F-NEXT: move $fp, $a4
385 ; LA32F-NEXT: move $s0, $a3
386 ; LA32F-NEXT: move $s1, $a2
387 ; LA32F-NEXT: bl %plt(sqrt)
388 ; LA32F-NEXT: move $s2, $a0
389 ; LA32F-NEXT: move $s3, $a1
390 ; LA32F-NEXT: lu12i.w $a1, 261888
391 ; LA32F-NEXT: move $a0, $zero
392 ; LA32F-NEXT: move $a2, $s2
393 ; LA32F-NEXT: move $a3, $s3
394 ; LA32F-NEXT: bl %plt(__divdf3)
395 ; LA32F-NEXT: move $s4, $a0
396 ; LA32F-NEXT: move $s5, $a1
397 ; LA32F-NEXT: lu12i.w $a1, 263248
398 ; LA32F-NEXT: move $a0, $zero
399 ; LA32F-NEXT: move $a2, $s2
400 ; LA32F-NEXT: move $a3, $s3
401 ; LA32F-NEXT: bl %plt(__divdf3)
402 ; LA32F-NEXT: move $s6, $a0
403 ; LA32F-NEXT: move $s7, $a1
404 ; LA32F-NEXT: lu12i.w $a1, 263256
405 ; LA32F-NEXT: move $a0, $zero
406 ; LA32F-NEXT: move $a2, $s2
407 ; LA32F-NEXT: move $a3, $s3
408 ; LA32F-NEXT: bl %plt(__divdf3)
409 ; LA32F-NEXT: st.w $s4, $s1, 0
410 ; LA32F-NEXT: st.w $s5, $s1, 4
411 ; LA32F-NEXT: st.w $s6, $s0, 0
412 ; LA32F-NEXT: st.w $s7, $s0, 4
413 ; LA32F-NEXT: st.w $a0, $fp, 0
414 ; LA32F-NEXT: st.w $a1, $fp, 4
415 ; LA32F-NEXT: move $a0, $s2
416 ; LA32F-NEXT: move $a1, $s3
417 ; LA32F-NEXT: ld.w $s7, $sp, 8 # 4-byte Folded Reload
418 ; LA32F-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload
419 ; LA32F-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload
420 ; LA32F-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
421 ; LA32F-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload
422 ; LA32F-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload
423 ; LA32F-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload
424 ; LA32F-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload
425 ; LA32F-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload
426 ; LA32F-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload
427 ; LA32F-NEXT: addi.w $sp, $sp, 48
430 ; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_4_uses_f64:
431 ; LA32F-FRECIPE: # %bb.0:
432 ; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -48
433 ; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill
434 ; LA32F-FRECIPE-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill
435 ; LA32F-FRECIPE-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill
436 ; LA32F-FRECIPE-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill
437 ; LA32F-FRECIPE-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill
438 ; LA32F-FRECIPE-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill
439 ; LA32F-FRECIPE-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill
440 ; LA32F-FRECIPE-NEXT: st.w $s5, $sp, 16 # 4-byte Folded Spill
441 ; LA32F-FRECIPE-NEXT: st.w $s6, $sp, 12 # 4-byte Folded Spill
442 ; LA32F-FRECIPE-NEXT: st.w $s7, $sp, 8 # 4-byte Folded Spill
443 ; LA32F-FRECIPE-NEXT: move $fp, $a4
444 ; LA32F-FRECIPE-NEXT: move $s0, $a3
445 ; LA32F-FRECIPE-NEXT: move $s1, $a2
446 ; LA32F-FRECIPE-NEXT: bl %plt(sqrt)
447 ; LA32F-FRECIPE-NEXT: move $s2, $a0
448 ; LA32F-FRECIPE-NEXT: move $s3, $a1
449 ; LA32F-FRECIPE-NEXT: lu12i.w $a1, 261888
450 ; LA32F-FRECIPE-NEXT: move $a0, $zero
451 ; LA32F-FRECIPE-NEXT: move $a2, $s2
452 ; LA32F-FRECIPE-NEXT: move $a3, $s3
453 ; LA32F-FRECIPE-NEXT: bl %plt(__divdf3)
454 ; LA32F-FRECIPE-NEXT: move $s4, $a0
455 ; LA32F-FRECIPE-NEXT: move $s5, $a1
456 ; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263248
457 ; LA32F-FRECIPE-NEXT: move $a0, $zero
458 ; LA32F-FRECIPE-NEXT: move $a2, $s2
459 ; LA32F-FRECIPE-NEXT: move $a3, $s3
460 ; LA32F-FRECIPE-NEXT: bl %plt(__divdf3)
461 ; LA32F-FRECIPE-NEXT: move $s6, $a0
462 ; LA32F-FRECIPE-NEXT: move $s7, $a1
463 ; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263256
464 ; LA32F-FRECIPE-NEXT: move $a0, $zero
465 ; LA32F-FRECIPE-NEXT: move $a2, $s2
466 ; LA32F-FRECIPE-NEXT: move $a3, $s3
467 ; LA32F-FRECIPE-NEXT: bl %plt(__divdf3)
468 ; LA32F-FRECIPE-NEXT: st.w $s4, $s1, 0
469 ; LA32F-FRECIPE-NEXT: st.w $s5, $s1, 4
470 ; LA32F-FRECIPE-NEXT: st.w $s6, $s0, 0
471 ; LA32F-FRECIPE-NEXT: st.w $s7, $s0, 4
472 ; LA32F-FRECIPE-NEXT: st.w $a0, $fp, 0
473 ; LA32F-FRECIPE-NEXT: st.w $a1, $fp, 4
474 ; LA32F-FRECIPE-NEXT: move $a0, $s2
475 ; LA32F-FRECIPE-NEXT: move $a1, $s3
476 ; LA32F-FRECIPE-NEXT: ld.w $s7, $sp, 8 # 4-byte Folded Reload
477 ; LA32F-FRECIPE-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload
478 ; LA32F-FRECIPE-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload
479 ; LA32F-FRECIPE-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
480 ; LA32F-FRECIPE-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload
481 ; LA32F-FRECIPE-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload
482 ; LA32F-FRECIPE-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload
483 ; LA32F-FRECIPE-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload
484 ; LA32F-FRECIPE-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload
485 ; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload
486 ; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 48
487 ; LA32F-FRECIPE-NEXT: ret
489 ; LA64D-LABEL: sqrt_simplify_before_recip_4_uses_f64:
491 ; LA64D-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0)
492 ; LA64D-NEXT: fld.d $fa2, $a3, %pc_lo12(.LCPI4_0)
493 ; LA64D-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_1)
494 ; LA64D-NEXT: fld.d $fa3, $a3, %pc_lo12(.LCPI4_1)
495 ; LA64D-NEXT: fsqrt.d $fa1, $fa0
496 ; LA64D-NEXT: frsqrt.d $fa0, $fa0
497 ; LA64D-NEXT: fdiv.d $fa2, $fa2, $fa1
498 ; LA64D-NEXT: fdiv.d $fa3, $fa3, $fa1
499 ; LA64D-NEXT: fst.d $fa0, $a0, 0
500 ; LA64D-NEXT: fst.d $fa2, $a1, 0
501 ; LA64D-NEXT: fst.d $fa3, $a2, 0
502 ; LA64D-NEXT: fmov.d $fa0, $fa1
505 ; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_4_uses_f64:
506 ; LA64D-FRECIPE: # %bb.0:
507 ; LA64D-FRECIPE-NEXT: frsqrte.d $fa1, $fa0
508 ; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa0, $fa1
509 ; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1
510 ; LA64D-FRECIPE-NEXT: vldi $vr3, -888
511 ; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3
512 ; LA64D-FRECIPE-NEXT: vldi $vr4, -800
513 ; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4
514 ; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2
515 ; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1
516 ; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3
517 ; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0)
518 ; LA64D-FRECIPE-NEXT: fld.d $fa3, $a3, %pc_lo12(.LCPI4_0)
519 ; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_1)
520 ; LA64D-FRECIPE-NEXT: fld.d $fa5, $a3, %pc_lo12(.LCPI4_1)
521 ; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4
522 ; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2
523 ; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa1, $fa3
524 ; LA64D-FRECIPE-NEXT: fmul.d $fa3, $fa1, $fa5
525 ; LA64D-FRECIPE-NEXT: fmul.d $fa0, $fa0, $fa1
526 ; LA64D-FRECIPE-NEXT: fst.d $fa1, $a0, 0
527 ; LA64D-FRECIPE-NEXT: fst.d $fa2, $a1, 0
528 ; LA64D-FRECIPE-NEXT: fst.d $fa3, $a2, 0
529 ; LA64D-FRECIPE-NEXT: ret
530 %sqrt = tail call fast double @llvm.sqrt.f64(double %x)
531 %rsqrt = fdiv fast double 1.0, %sqrt
532 %r1 = fdiv fast double 42.0, %sqrt
533 %r2 = fdiv fast double 43.0, %sqrt
534 %sqrt_fast = fdiv fast double %x, %sqrt
535 store double %rsqrt, ptr %p1, align 8
536 store double %r1, ptr %p2, align 8
537 store double %r2, ptr %p3, align 8
538 ret double %sqrt_fast
541 define float @sqrt_simplify_before_recip_3_uses_f32(float %x, ptr %p1, ptr %p2) nounwind {
542 ; LA32F-LABEL: sqrt_simplify_before_recip_3_uses_f32:
544 ; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_0)
545 ; LA32F-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI5_0)
546 ; LA32F-NEXT: fsqrt.s $fa1, $fa0
547 ; LA32F-NEXT: frsqrt.s $fa0, $fa0
548 ; LA32F-NEXT: fdiv.s $fa2, $fa2, $fa1
549 ; LA32F-NEXT: fst.s $fa0, $a0, 0
550 ; LA32F-NEXT: fst.s $fa2, $a1, 0
551 ; LA32F-NEXT: fmov.s $fa0, $fa1
554 ; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_f32:
555 ; LA32F-FRECIPE: # %bb.0:
556 ; LA32F-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
557 ; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
558 ; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1
559 ; LA32F-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_0)
560 ; LA32F-FRECIPE-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI5_0)
561 ; LA32F-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_1)
562 ; LA32F-FRECIPE-NEXT: fld.s $fa4, $a2, %pc_lo12(.LCPI5_1)
563 ; LA32F-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_2)
564 ; LA32F-FRECIPE-NEXT: fld.s $fa5, $a2, %pc_lo12(.LCPI5_2)
565 ; LA32F-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3
566 ; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa4
567 ; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2
568 ; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa5
569 ; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1
570 ; LA32F-FRECIPE-NEXT: fst.s $fa1, $a0, 0
571 ; LA32F-FRECIPE-NEXT: fst.s $fa2, $a1, 0
572 ; LA32F-FRECIPE-NEXT: ret
574 ; LA64D-LABEL: sqrt_simplify_before_recip_3_uses_f32:
576 ; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_0)
577 ; LA64D-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI5_0)
578 ; LA64D-NEXT: fsqrt.s $fa1, $fa0
579 ; LA64D-NEXT: frsqrt.s $fa0, $fa0
580 ; LA64D-NEXT: fdiv.s $fa2, $fa2, $fa1
581 ; LA64D-NEXT: fst.s $fa0, $a0, 0
582 ; LA64D-NEXT: fst.s $fa2, $a1, 0
583 ; LA64D-NEXT: fmov.s $fa0, $fa1
586 ; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_f32:
587 ; LA64D-FRECIPE: # %bb.0:
588 ; LA64D-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
589 ; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
590 ; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1
591 ; LA64D-FRECIPE-NEXT: vldi $vr3, -1144
592 ; LA64D-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3
593 ; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_0)
594 ; LA64D-FRECIPE-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI5_0)
595 ; LA64D-FRECIPE-NEXT: vldi $vr4, -1056
596 ; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa4
597 ; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2
598 ; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa3
599 ; LA64D-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1
600 ; LA64D-FRECIPE-NEXT: fst.s $fa1, $a0, 0
601 ; LA64D-FRECIPE-NEXT: fst.s $fa2, $a1, 0
602 ; LA64D-FRECIPE-NEXT: ret
604 %sqrt = tail call fast float @llvm.sqrt.f32(float %x)
605 %rsqrt = fdiv fast float 1.0, %sqrt
606 %r = fdiv fast float 42.0, %sqrt
607 %sqrt_fast = fdiv fast float %x, %sqrt
608 store float %rsqrt, ptr %p1, align 8
609 store float %r, ptr %p2, align 8
613 define float @sqrt_simplify_before_recip_4_uses_f32(float %x, ptr %p1, ptr %p2, ptr %p3) nounwind {
614 ; LA32F-LABEL: sqrt_simplify_before_recip_4_uses_f32:
616 ; LA32F-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_0)
617 ; LA32F-NEXT: fld.s $fa2, $a3, %pc_lo12(.LCPI6_0)
618 ; LA32F-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_1)
619 ; LA32F-NEXT: fld.s $fa3, $a3, %pc_lo12(.LCPI6_1)
620 ; LA32F-NEXT: fsqrt.s $fa1, $fa0
621 ; LA32F-NEXT: frsqrt.s $fa0, $fa0
622 ; LA32F-NEXT: fdiv.s $fa2, $fa2, $fa1
623 ; LA32F-NEXT: fdiv.s $fa3, $fa3, $fa1
624 ; LA32F-NEXT: fst.s $fa0, $a0, 0
625 ; LA32F-NEXT: fst.s $fa2, $a1, 0
626 ; LA32F-NEXT: fst.s $fa3, $a2, 0
627 ; LA32F-NEXT: fmov.s $fa0, $fa1
630 ; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_4_uses_f32:
631 ; LA32F-FRECIPE: # %bb.0:
632 ; LA32F-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_0)
633 ; LA32F-FRECIPE-NEXT: fld.s $fa1, $a3, %pc_lo12(.LCPI6_0)
634 ; LA32F-FRECIPE-NEXT: frsqrte.s $fa2, $fa0
635 ; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa2
636 ; LA32F-FRECIPE-NEXT: fmul.s $fa3, $fa0, $fa2
637 ; LA32F-FRECIPE-NEXT: fmadd.s $fa1, $fa3, $fa2, $fa1
638 ; LA32F-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_1)
639 ; LA32F-FRECIPE-NEXT: fld.s $fa3, $a3, %pc_lo12(.LCPI6_1)
640 ; LA32F-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_2)
641 ; LA32F-FRECIPE-NEXT: fld.s $fa4, $a3, %pc_lo12(.LCPI6_2)
642 ; LA32F-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_3)
643 ; LA32F-FRECIPE-NEXT: fld.s $fa5, $a3, %pc_lo12(.LCPI6_3)
644 ; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa2, $fa3
645 ; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa2, $fa1
646 ; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa4
647 ; LA32F-FRECIPE-NEXT: fmul.s $fa3, $fa1, $fa5
648 ; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1
649 ; LA32F-FRECIPE-NEXT: fst.s $fa1, $a0, 0
650 ; LA32F-FRECIPE-NEXT: fst.s $fa2, $a1, 0
651 ; LA32F-FRECIPE-NEXT: fst.s $fa3, $a2, 0
652 ; LA32F-FRECIPE-NEXT: ret
654 ; LA64D-LABEL: sqrt_simplify_before_recip_4_uses_f32:
656 ; LA64D-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_0)
657 ; LA64D-NEXT: fld.s $fa2, $a3, %pc_lo12(.LCPI6_0)
658 ; LA64D-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_1)
659 ; LA64D-NEXT: fld.s $fa3, $a3, %pc_lo12(.LCPI6_1)
660 ; LA64D-NEXT: fsqrt.s $fa1, $fa0
661 ; LA64D-NEXT: frsqrt.s $fa0, $fa0
662 ; LA64D-NEXT: fdiv.s $fa2, $fa2, $fa1
663 ; LA64D-NEXT: fdiv.s $fa3, $fa3, $fa1
664 ; LA64D-NEXT: fst.s $fa0, $a0, 0
665 ; LA64D-NEXT: fst.s $fa2, $a1, 0
666 ; LA64D-NEXT: fst.s $fa3, $a2, 0
667 ; LA64D-NEXT: fmov.s $fa0, $fa1
670 ; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_4_uses_f32:
671 ; LA64D-FRECIPE: # %bb.0:
672 ; LA64D-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
673 ; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
674 ; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1
675 ; LA64D-FRECIPE-NEXT: vldi $vr3, -1144
676 ; LA64D-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3
677 ; LA64D-FRECIPE-NEXT: vldi $vr3, -1056
678 ; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_0)
679 ; LA64D-FRECIPE-NEXT: fld.s $fa4, $a3, %pc_lo12(.LCPI6_0)
680 ; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_1)
681 ; LA64D-FRECIPE-NEXT: fld.s $fa5, $a3, %pc_lo12(.LCPI6_1)
682 ; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3
683 ; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2
684 ; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa4
685 ; LA64D-FRECIPE-NEXT: fmul.s $fa3, $fa1, $fa5
686 ; LA64D-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1
687 ; LA64D-FRECIPE-NEXT: fst.s $fa1, $a0, 0
688 ; LA64D-FRECIPE-NEXT: fst.s $fa2, $a1, 0
689 ; LA64D-FRECIPE-NEXT: fst.s $fa3, $a2, 0
690 ; LA64D-FRECIPE-NEXT: ret
692 %sqrt = tail call fast float @llvm.sqrt.f32(float %x)
693 %rsqrt = fdiv fast float 1.0, %sqrt
694 %r1 = fdiv fast float 42.0, %sqrt
695 %r2 = fdiv fast float 43.0, %sqrt
696 %sqrt_fast = fdiv fast float %x, %sqrt
697 store float %rsqrt, ptr %p1, align 8
698 store float %r1, ptr %p2, align 8
699 store float %r2, ptr %p3, align 8
703 define float @sqrt_simplify_before_recip_3_uses_order_f32(float %x, ptr %p1, ptr %p2) nounwind {
704 ; LA32F-LABEL: sqrt_simplify_before_recip_3_uses_order_f32:
706 ; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_0)
707 ; LA32F-NEXT: fld.s $fa1, $a2, %pc_lo12(.LCPI7_0)
708 ; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_1)
709 ; LA32F-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI7_1)
710 ; LA32F-NEXT: fsqrt.s $fa0, $fa0
711 ; LA32F-NEXT: fdiv.s $fa1, $fa1, $fa0
712 ; LA32F-NEXT: fdiv.s $fa2, $fa2, $fa0
713 ; LA32F-NEXT: fst.s $fa1, $a0, 0
714 ; LA32F-NEXT: fst.s $fa2, $a1, 0
717 ; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_order_f32:
718 ; LA32F-FRECIPE: # %bb.0:
719 ; LA32F-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
720 ; LA32F-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_0)
721 ; LA32F-FRECIPE-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI7_0)
722 ; LA32F-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_1)
723 ; LA32F-FRECIPE-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI7_1)
724 ; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
725 ; LA32F-FRECIPE-NEXT: fmul.s $fa4, $fa0, $fa1
726 ; LA32F-FRECIPE-NEXT: fmadd.s $fa2, $fa4, $fa1, $fa2
727 ; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3
728 ; LA32F-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_2)
729 ; LA32F-FRECIPE-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI7_2)
730 ; LA32F-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_3)
731 ; LA32F-FRECIPE-NEXT: fld.s $fa4, $a2, %pc_lo12(.LCPI7_3)
732 ; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2
733 ; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1
734 ; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa3
735 ; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa4
736 ; LA32F-FRECIPE-NEXT: fst.s $fa2, $a0, 0
737 ; LA32F-FRECIPE-NEXT: fst.s $fa1, $a1, 0
738 ; LA32F-FRECIPE-NEXT: ret
740 ; LA64D-LABEL: sqrt_simplify_before_recip_3_uses_order_f32:
742 ; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_0)
743 ; LA64D-NEXT: fld.s $fa1, $a2, %pc_lo12(.LCPI7_0)
744 ; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_1)
745 ; LA64D-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI7_1)
746 ; LA64D-NEXT: fsqrt.s $fa0, $fa0
747 ; LA64D-NEXT: fdiv.s $fa1, $fa1, $fa0
748 ; LA64D-NEXT: fdiv.s $fa2, $fa2, $fa0
749 ; LA64D-NEXT: fst.s $fa1, $a0, 0
750 ; LA64D-NEXT: fst.s $fa2, $a1, 0
753 ; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_order_f32:
754 ; LA64D-FRECIPE: # %bb.0:
755 ; LA64D-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
756 ; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
757 ; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1
758 ; LA64D-FRECIPE-NEXT: vldi $vr3, -1144
759 ; LA64D-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3
760 ; LA64D-FRECIPE-NEXT: vldi $vr3, -1056
761 ; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3
762 ; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_0)
763 ; LA64D-FRECIPE-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI7_0)
764 ; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_1)
765 ; LA64D-FRECIPE-NEXT: fld.s $fa4, $a2, %pc_lo12(.LCPI7_1)
766 ; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2
767 ; LA64D-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1
768 ; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa3
769 ; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa4
770 ; LA64D-FRECIPE-NEXT: fst.s $fa2, $a0, 0
771 ; LA64D-FRECIPE-NEXT: fst.s $fa1, $a1, 0
772 ; LA64D-FRECIPE-NEXT: ret
774 %sqrt = tail call fast float @llvm.sqrt.f32(float %x)
775 %sqrt_fast = fdiv fast float %x, %sqrt
776 %r1 = fdiv fast float 42.0, %sqrt
777 %r2 = fdiv fast float 43.0, %sqrt
778 store float %r1, ptr %p1, align 8
779 store float %r2, ptr %p2, align 8