1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck %s
3 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
4 target triple = "powerpc64-unknown-linux-gnu"
6 declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
7 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
9 define <4 x double> @foo_fmf(<4 x double> %a, <4 x double> %b) nounwind {
10 ; CHECK-LABEL: foo_fmf:
11 ; CHECK: # %bb.0: # %entry
12 ; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha
13 ; CHECK-NEXT: qvfrsqrte 3, 2
14 ; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l
15 ; CHECK-NEXT: qvlfdx 0, 0, 3
16 ; CHECK-NEXT: qvfmul 4, 3, 3
17 ; CHECK-NEXT: qvfmsub 2, 2, 0, 2
18 ; CHECK-NEXT: qvfnmsub 4, 2, 4, 0
19 ; CHECK-NEXT: qvfmul 3, 3, 4
20 ; CHECK-NEXT: qvfmul 4, 3, 3
21 ; CHECK-NEXT: qvfnmsub 0, 2, 4, 0
22 ; CHECK-NEXT: qvfmul 0, 3, 0
23 ; CHECK-NEXT: qvfmul 1, 1, 0
26 %x = call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
27 %r = fdiv fast <4 x double> %a, %x
31 define <4 x double> @foo_safe(<4 x double> %a, <4 x double> %b) nounwind {
32 ; CHECK-LABEL: foo_safe:
33 ; CHECK: # %bb.0: # %entry
34 ; CHECK-NEXT: qvesplati 5, 2, 3
35 ; CHECK-NEXT: qvesplati 3, 2, 1
36 ; CHECK-NEXT: qvesplati 4, 2, 2
37 ; CHECK-NEXT: fsqrt 2, 2
38 ; CHECK-NEXT: fsqrt 5, 5
39 ; CHECK-NEXT: fsqrt 4, 4
40 ; CHECK-NEXT: fsqrt 3, 3
41 ; CHECK-NEXT: qvesplati 6, 1, 3
42 ; CHECK-NEXT: qvgpci 0, 275
43 ; CHECK-NEXT: fdiv 2, 1, 2
44 ; CHECK-NEXT: fdiv 5, 6, 5
45 ; CHECK-NEXT: qvesplati 6, 1, 2
46 ; CHECK-NEXT: qvesplati 1, 1, 1
47 ; CHECK-NEXT: fdiv 4, 6, 4
48 ; CHECK-NEXT: fdiv 1, 1, 3
49 ; CHECK-NEXT: qvfperm 3, 4, 5, 0
50 ; CHECK-NEXT: qvfperm 0, 2, 1, 0
51 ; CHECK-NEXT: qvgpci 1, 101
52 ; CHECK-NEXT: qvfperm 1, 0, 3, 1
55 %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
56 %r = fdiv <4 x double> %a, %x
60 define <4 x double> @foof_fmf(<4 x double> %a, <4 x float> %b) nounwind {
61 ; CHECK-LABEL: foof_fmf:
62 ; CHECK: # %bb.0: # %entry
63 ; CHECK-NEXT: addis 3, 2, .LCPI2_0@toc@ha
64 ; CHECK-NEXT: qvfrsqrtes 3, 2
65 ; CHECK-NEXT: addi 3, 3, .LCPI2_0@toc@l
66 ; CHECK-NEXT: qvlfsx 0, 0, 3
67 ; CHECK-NEXT: qvfmuls 4, 3, 3
68 ; CHECK-NEXT: qvfnmsubs 2, 2, 0, 2
69 ; CHECK-NEXT: qvfmadds 0, 2, 4, 0
70 ; CHECK-NEXT: qvfmuls 0, 3, 0
71 ; CHECK-NEXT: qvfmul 1, 1, 0
74 %x = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
75 %y = fpext <4 x float> %x to <4 x double>
76 %r = fdiv fast <4 x double> %a, %y
80 define <4 x double> @foof_safe(<4 x double> %a, <4 x float> %b) nounwind {
81 ; CHECK-LABEL: foof_safe:
82 ; CHECK: # %bb.0: # %entry
83 ; CHECK-NEXT: qvesplati 0, 2, 3
84 ; CHECK-NEXT: qvesplati 3, 2, 2
85 ; CHECK-NEXT: fsqrts 4, 2
86 ; CHECK-NEXT: qvesplati 2, 2, 1
87 ; CHECK-NEXT: fsqrts 0, 0
88 ; CHECK-NEXT: fsqrts 3, 3
89 ; CHECK-NEXT: fsqrts 2, 2
90 ; CHECK-NEXT: qvgpci 5, 275
91 ; CHECK-NEXT: qvgpci 6, 101
92 ; CHECK-NEXT: qvfperm 0, 3, 0, 5
93 ; CHECK-NEXT: qvesplati 3, 1, 2
94 ; CHECK-NEXT: qvfperm 2, 4, 2, 5
95 ; CHECK-NEXT: qvfperm 0, 2, 0, 6
96 ; CHECK-NEXT: qvesplati 2, 1, 3
97 ; CHECK-NEXT: qvesplati 4, 0, 3
98 ; CHECK-NEXT: fdiv 2, 2, 4
99 ; CHECK-NEXT: qvesplati 4, 0, 2
100 ; CHECK-NEXT: fdiv 3, 3, 4
101 ; CHECK-NEXT: qvesplati 4, 1, 1
102 ; CHECK-NEXT: fdiv 1, 1, 0
103 ; CHECK-NEXT: qvesplati 0, 0, 1
104 ; CHECK-NEXT: fdiv 0, 4, 0
105 ; CHECK-NEXT: qvfperm 2, 3, 2, 5
106 ; CHECK-NEXT: qvfperm 0, 1, 0, 5
107 ; CHECK-NEXT: qvfperm 1, 0, 2, 6
110 %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
111 %y = fpext <4 x float> %x to <4 x double>
112 %r = fdiv <4 x double> %a, %y
116 define <4 x float> @food_fmf(<4 x float> %a, <4 x double> %b) nounwind {
117 ; CHECK-LABEL: food_fmf:
118 ; CHECK: # %bb.0: # %entry
119 ; CHECK-NEXT: addis 3, 2, .LCPI4_0@toc@ha
120 ; CHECK-NEXT: qvfrsqrte 3, 2
121 ; CHECK-NEXT: addi 3, 3, .LCPI4_0@toc@l
122 ; CHECK-NEXT: qvlfdx 0, 0, 3
123 ; CHECK-NEXT: qvfmul 4, 3, 3
124 ; CHECK-NEXT: qvfmsub 2, 2, 0, 2
125 ; CHECK-NEXT: qvfnmsub 4, 2, 4, 0
126 ; CHECK-NEXT: qvfmul 3, 3, 4
127 ; CHECK-NEXT: qvfmul 4, 3, 3
128 ; CHECK-NEXT: qvfnmsub 0, 2, 4, 0
129 ; CHECK-NEXT: qvfmul 0, 3, 0
130 ; CHECK-NEXT: qvfrsp 0, 0
131 ; CHECK-NEXT: qvfmuls 1, 1, 0
134 %x = call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
135 %y = fptrunc <4 x double> %x to <4 x float>
136 %r = fdiv fast <4 x float> %a, %y
140 define <4 x float> @food_safe(<4 x float> %a, <4 x double> %b) nounwind {
141 ; CHECK-LABEL: food_safe:
142 ; CHECK: # %bb.0: # %entry
143 ; CHECK-NEXT: qvesplati 0, 2, 3
144 ; CHECK-NEXT: qvesplati 3, 2, 2
145 ; CHECK-NEXT: fsqrt 4, 2
146 ; CHECK-NEXT: qvesplati 2, 2, 1
147 ; CHECK-NEXT: fsqrt 0, 0
148 ; CHECK-NEXT: fsqrt 3, 3
149 ; CHECK-NEXT: fsqrt 2, 2
150 ; CHECK-NEXT: qvgpci 5, 275
151 ; CHECK-NEXT: qvgpci 6, 101
152 ; CHECK-NEXT: qvfperm 0, 3, 0, 5
153 ; CHECK-NEXT: qvesplati 3, 1, 2
154 ; CHECK-NEXT: qvfperm 2, 4, 2, 5
155 ; CHECK-NEXT: qvfperm 0, 2, 0, 6
156 ; CHECK-NEXT: qvesplati 2, 1, 3
157 ; CHECK-NEXT: qvfrsp 0, 0
158 ; CHECK-NEXT: qvesplati 4, 0, 3
159 ; CHECK-NEXT: fdivs 2, 2, 4
160 ; CHECK-NEXT: qvesplati 4, 0, 2
161 ; CHECK-NEXT: fdivs 3, 3, 4
162 ; CHECK-NEXT: qvesplati 4, 1, 1
163 ; CHECK-NEXT: fdivs 1, 1, 0
164 ; CHECK-NEXT: qvesplati 0, 0, 1
165 ; CHECK-NEXT: fdivs 0, 4, 0
166 ; CHECK-NEXT: qvfperm 2, 3, 2, 5
167 ; CHECK-NEXT: qvfperm 0, 1, 0, 5
168 ; CHECK-NEXT: qvfperm 1, 0, 2, 6
171 %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
172 %y = fptrunc <4 x double> %x to <4 x float>
173 %r = fdiv <4 x float> %a, %y
177 define <4 x float> @goo_fmf(<4 x float> %a, <4 x float> %b) nounwind {
178 ; CHECK-LABEL: goo_fmf:
179 ; CHECK: # %bb.0: # %entry
180 ; CHECK-NEXT: addis 3, 2, .LCPI6_0@toc@ha
181 ; CHECK-NEXT: qvfrsqrtes 3, 2
182 ; CHECK-NEXT: addi 3, 3, .LCPI6_0@toc@l
183 ; CHECK-NEXT: qvlfsx 0, 0, 3
184 ; CHECK-NEXT: qvfmuls 4, 3, 3
185 ; CHECK-NEXT: qvfnmsubs 2, 2, 0, 2
186 ; CHECK-NEXT: qvfmadds 0, 2, 4, 0
187 ; CHECK-NEXT: qvfmuls 0, 3, 0
188 ; CHECK-NEXT: qvfmuls 1, 1, 0
191 %x = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
192 %r = fdiv fast <4 x float> %a, %x
196 define <4 x float> @goo_safe(<4 x float> %a, <4 x float> %b) nounwind {
197 ; CHECK-LABEL: goo_safe:
198 ; CHECK: # %bb.0: # %entry
199 ; CHECK-NEXT: qvesplati 5, 2, 3
200 ; CHECK-NEXT: qvesplati 3, 2, 1
201 ; CHECK-NEXT: qvesplati 4, 2, 2
202 ; CHECK-NEXT: fsqrts 2, 2
203 ; CHECK-NEXT: fsqrts 5, 5
204 ; CHECK-NEXT: fsqrts 4, 4
205 ; CHECK-NEXT: fsqrts 3, 3
206 ; CHECK-NEXT: qvesplati 6, 1, 3
207 ; CHECK-NEXT: qvgpci 0, 275
208 ; CHECK-NEXT: fdivs 2, 1, 2
209 ; CHECK-NEXT: fdivs 5, 6, 5
210 ; CHECK-NEXT: qvesplati 6, 1, 2
211 ; CHECK-NEXT: qvesplati 1, 1, 1
212 ; CHECK-NEXT: fdivs 4, 6, 4
213 ; CHECK-NEXT: fdivs 1, 1, 3
214 ; CHECK-NEXT: qvfperm 3, 4, 5, 0
215 ; CHECK-NEXT: qvfperm 0, 2, 1, 0
216 ; CHECK-NEXT: qvgpci 1, 101
217 ; CHECK-NEXT: qvfperm 1, 0, 3, 1
220 %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
221 %r = fdiv <4 x float> %a, %x
225 define <4 x double> @foo2_fmf(<4 x double> %a, <4 x double> %b) nounwind {
226 ; CHECK-LABEL: foo2_fmf:
227 ; CHECK: # %bb.0: # %entry
228 ; CHECK-NEXT: addis 3, 2, .LCPI8_0@toc@ha
229 ; CHECK-NEXT: qvfre 3, 2
230 ; CHECK-NEXT: addi 3, 3, .LCPI8_0@toc@l
231 ; CHECK-NEXT: qvlfdx 0, 0, 3
232 ; CHECK-NEXT: qvfnmsub 4, 2, 3, 0
233 ; CHECK-NEXT: qvfmadd 3, 3, 4, 3
234 ; CHECK-NEXT: qvfnmsub 0, 2, 3, 0
235 ; CHECK-NEXT: qvfmadd 0, 3, 0, 3
236 ; CHECK-NEXT: qvfmul 1, 1, 0
239 %r = fdiv fast <4 x double> %a, %b
243 define <4 x double> @foo2_safe(<4 x double> %a, <4 x double> %b) nounwind {
244 ; CHECK-LABEL: foo2_safe:
246 ; CHECK-NEXT: qvesplati 3, 2, 3
247 ; CHECK-NEXT: qvesplati 4, 1, 3
248 ; CHECK-NEXT: qvesplati 5, 2, 2
249 ; CHECK-NEXT: qvgpci 0, 275
250 ; CHECK-NEXT: fdiv 3, 4, 3
251 ; CHECK-NEXT: qvesplati 4, 1, 2
252 ; CHECK-NEXT: fdiv 4, 4, 5
253 ; CHECK-NEXT: fdiv 5, 1, 2
254 ; CHECK-NEXT: qvesplati 2, 2, 1
255 ; CHECK-NEXT: qvesplati 1, 1, 1
256 ; CHECK-NEXT: fdiv 1, 1, 2
257 ; CHECK-NEXT: qvfperm 2, 4, 3, 0
258 ; CHECK-NEXT: qvfperm 0, 5, 1, 0
259 ; CHECK-NEXT: qvgpci 1, 101
260 ; CHECK-NEXT: qvfperm 1, 0, 2, 1
262 %r = fdiv <4 x double> %a, %b
266 define <4 x float> @goo2_fmf(<4 x float> %a, <4 x float> %b) nounwind {
267 ; CHECK-LABEL: goo2_fmf:
268 ; CHECK: # %bb.0: # %entry
269 ; CHECK-NEXT: addis 3, 2, .LCPI10_0@toc@ha
270 ; CHECK-NEXT: qvfres 3, 2
271 ; CHECK-NEXT: addi 3, 3, .LCPI10_0@toc@l
272 ; CHECK-NEXT: qvlfsx 0, 0, 3
273 ; CHECK-NEXT: qvfnmsubs 0, 2, 3, 0
274 ; CHECK-NEXT: qvfmadds 0, 3, 0, 3
275 ; CHECK-NEXT: qvfmuls 1, 1, 0
278 %r = fdiv fast <4 x float> %a, %b
282 define <4 x float> @goo2_safe(<4 x float> %a, <4 x float> %b) nounwind {
283 ; CHECK-LABEL: goo2_safe:
284 ; CHECK: # %bb.0: # %entry
285 ; CHECK-NEXT: qvesplati 3, 2, 3
286 ; CHECK-NEXT: qvesplati 4, 1, 3
287 ; CHECK-NEXT: qvesplati 5, 2, 2
288 ; CHECK-NEXT: qvgpci 0, 275
289 ; CHECK-NEXT: fdivs 3, 4, 3
290 ; CHECK-NEXT: qvesplati 4, 1, 2
291 ; CHECK-NEXT: fdivs 4, 4, 5
292 ; CHECK-NEXT: fdivs 5, 1, 2
293 ; CHECK-NEXT: qvesplati 2, 2, 1
294 ; CHECK-NEXT: qvesplati 1, 1, 1
295 ; CHECK-NEXT: fdivs 1, 1, 2
296 ; CHECK-NEXT: qvfperm 2, 4, 3, 0
297 ; CHECK-NEXT: qvfperm 0, 5, 1, 0
298 ; CHECK-NEXT: qvgpci 1, 101
299 ; CHECK-NEXT: qvfperm 1, 0, 2, 1
302 %r = fdiv <4 x float> %a, %b
306 define <4 x double> @foo3_fmf(<4 x double> %a) nounwind {
307 ; CHECK-LABEL: foo3_fmf:
308 ; CHECK: # %bb.0: # %entry
309 ; CHECK-NEXT: addis 3, 2, .LCPI12_0@toc@ha
310 ; CHECK-NEXT: qvfrsqrte 0, 1
311 ; CHECK-NEXT: addi 3, 3, .LCPI12_0@toc@l
312 ; CHECK-NEXT: qvlfdx 2, 0, 3
313 ; CHECK-NEXT: addis 3, 2, .LCPI12_1@toc@ha
314 ; CHECK-NEXT: addi 3, 3, .LCPI12_1@toc@l
315 ; CHECK-NEXT: qvfmul 3, 0, 0
316 ; CHECK-NEXT: qvfmsub 4, 1, 2, 1
317 ; CHECK-NEXT: qvfnmsub 3, 4, 3, 2
318 ; CHECK-NEXT: qvfmul 0, 0, 3
319 ; CHECK-NEXT: qvfmul 3, 0, 0
320 ; CHECK-NEXT: qvfnmsub 2, 4, 3, 2
321 ; CHECK-NEXT: qvfmul 0, 0, 2
322 ; CHECK-NEXT: qvlfdx 2, 0, 3
323 ; CHECK-NEXT: qvfmul 0, 0, 1
324 ; CHECK-NEXT: qvfcmpeq 1, 1, 2
325 ; CHECK-NEXT: qvfsel 1, 1, 2, 0
328 %r = call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
332 define <4 x double> @foo3_safe(<4 x double> %a) nounwind {
333 ; CHECK-LABEL: foo3_safe:
334 ; CHECK: # %bb.0: # %entry
335 ; CHECK-NEXT: qvesplati 2, 1, 3
336 ; CHECK-NEXT: qvesplati 3, 1, 2
337 ; CHECK-NEXT: fsqrt 4, 1
338 ; CHECK-NEXT: qvesplati 1, 1, 1
339 ; CHECK-NEXT: fsqrt 2, 2
340 ; CHECK-NEXT: fsqrt 3, 3
341 ; CHECK-NEXT: fsqrt 1, 1
342 ; CHECK-NEXT: qvgpci 0, 275
343 ; CHECK-NEXT: qvfperm 2, 3, 2, 0
344 ; CHECK-NEXT: qvfperm 0, 4, 1, 0
345 ; CHECK-NEXT: qvgpci 1, 101
346 ; CHECK-NEXT: qvfperm 1, 0, 2, 1
349 %r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
353 define <4 x float> @goo3_fmf(<4 x float> %a) nounwind {
354 ; CHECK-LABEL: goo3_fmf:
355 ; CHECK: # %bb.0: # %entry
356 ; CHECK-NEXT: addis 3, 2, .LCPI14_1@toc@ha
357 ; CHECK-NEXT: qvfrsqrtes 2, 1
358 ; CHECK-NEXT: addi 3, 3, .LCPI14_1@toc@l
359 ; CHECK-NEXT: qvlfsx 0, 0, 3
360 ; CHECK-NEXT: addis 3, 2, .LCPI14_0@toc@ha
361 ; CHECK-NEXT: addi 3, 3, .LCPI14_0@toc@l
362 ; CHECK-NEXT: qvfmuls 4, 2, 2
363 ; CHECK-NEXT: qvfnmsubs 3, 1, 0, 1
364 ; CHECK-NEXT: qvfmadds 0, 3, 4, 0
365 ; CHECK-NEXT: qvlfsx 3, 0, 3
366 ; CHECK-NEXT: qvfmuls 0, 2, 0
367 ; CHECK-NEXT: qvfmuls 0, 0, 1
368 ; CHECK-NEXT: qvfcmpeq 1, 1, 3
369 ; CHECK-NEXT: qvfsel 1, 1, 3, 0
372 %r = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
376 define <4 x float> @goo3_safe(<4 x float> %a) nounwind {
377 ; CHECK-LABEL: goo3_safe:
378 ; CHECK: # %bb.0: # %entry
379 ; CHECK-NEXT: qvesplati 2, 1, 3
380 ; CHECK-NEXT: qvesplati 3, 1, 2
381 ; CHECK-NEXT: fsqrts 4, 1
382 ; CHECK-NEXT: qvesplati 1, 1, 1
383 ; CHECK-NEXT: fsqrts 2, 2
384 ; CHECK-NEXT: fsqrts 3, 3
385 ; CHECK-NEXT: fsqrts 1, 1
386 ; CHECK-NEXT: qvgpci 0, 275
387 ; CHECK-NEXT: qvfperm 2, 3, 2, 0
388 ; CHECK-NEXT: qvfperm 0, 4, 1, 0
389 ; CHECK-NEXT: qvgpci 1, 101
390 ; CHECK-NEXT: qvfperm 1, 0, 2, 1
393 %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)