1 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr10 | FileCheck -check-prefixes=CHECK-PWR9 %s
2 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 | FileCheck -check-prefixes=CHECK-PWR9 %s
3 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 | FileCheck -check-prefixes=CHECK-PWR8 %s
4 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr10 | FileCheck -check-prefixes=CHECK-PWR10 %s
5 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr9 | FileCheck -check-prefixes=CHECK-PWR9 %s
6 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr8 | FileCheck -check-prefixes=CHECK-PWR8 %s
7 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr7 | FileCheck -check-prefixes=CHECK-PWR7 %s
9 ; Exponent is a variable
10 define void @vspow_var(ptr nocapture %z, ptr nocapture readonly %y, ptr nocapture readonly %x) {
11 ; CHECK-LABEL: @vspow_var
12 ; CHECK-PWR10: __powf4_P10
13 ; CHECK-PWR9: __powf4_P9
14 ; CHECK-PWR8: __powf4_P8
15 ; CHECK-PWR7: __powf4_P7
21 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
22 %next.gep = getelementptr float, ptr %z, i64 %index
23 %next.gep31 = getelementptr float, ptr %y, i64 %index
24 %next.gep32 = getelementptr float, ptr %x, i64 %index
25 %wide.load = load <4 x float>, ptr %next.gep32, align 4
26 %wide.load33 = load <4 x float>, ptr %next.gep31, align 4
27 %0 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> %wide.load33)
28 store <4 x float> %0, ptr %next.gep, align 4
29 %index.next = add i64 %index, 4
30 %1 = icmp eq i64 %index.next, 1024
31 br i1 %1, label %for.end, label %vector.body
37 ; Exponent is a constant != 0.75 and !=0.25
38 define void @vspow_const(ptr nocapture %y, ptr nocapture readonly %x) {
39 ; CHECK-LABEL: @vspow_const
40 ; CHECK-PWR10: __powf4_P10
41 ; CHECK-PWR9: __powf4_P9
42 ; CHECK-PWR8: __powf4_P8
43 ; CHECK-PWR7: __powf4_P7
49 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
50 %next.gep = getelementptr float, ptr %y, i64 %index
51 %next.gep19 = getelementptr float, ptr %x, i64 %index
52 %wide.load = load <4 x float>, ptr %next.gep19, align 4
53 %0 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>)
54 store <4 x float> %0, ptr %next.gep, align 4
55 %index.next = add i64 %index, 4
56 %1 = icmp eq i64 %index.next, 1024
57 br i1 %1, label %for.end, label %vector.body
63 ; Exponent is a constant != 0.75 and !=0.25 and they are different
64 define void @vspow_neq_const(ptr nocapture %y, ptr nocapture readonly %x) {
65 ; CHECK-LABEL: @vspow_neq_const
66 ; CHECK-PWR10: __powf4_P10
67 ; CHECK-PWR9: __powf4_P9
68 ; CHECK-PWR8: __powf4_P8
69 ; CHECK-PWR7: __powf4_P7
75 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
76 %next.gep = getelementptr float, ptr %y, i64 %index
77 %next.gep19 = getelementptr float, ptr %x, i64 %index
78 %wide.load = load <4 x float>, ptr %next.gep19, align 4
79 %0 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 0x3FE861EB80000000, float 0x3FE871EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>)
80 store <4 x float> %0, ptr %next.gep, align 4
81 %index.next = add i64 %index, 4
82 %1 = icmp eq i64 %index.next, 1024
83 br i1 %1, label %for.end, label %vector.body
89 ; Exponent is a constant != 0.75 and !=0.25
90 define void @vspow_neq075_const(ptr nocapture %y, ptr nocapture readonly %x) {
91 ; CHECK-LABEL: @vspow_neq075_const
92 ; CHECK-PWR10: __powf4_P10
93 ; CHECK-PWR9: __powf4_P9
94 ; CHECK-PWR8: __powf4_P8
95 ; CHECK-PWR7: __powf4_P7
101 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
102 %next.gep = getelementptr float, ptr %y, i64 %index
103 %next.gep19 = getelementptr float, ptr %x, i64 %index
104 %wide.load = load <4 x float>, ptr %next.gep19, align 4
105 %0 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 0x3FE851EB80000000>)
106 store <4 x float> %0, ptr %next.gep, align 4
107 %index.next = add i64 %index, 4
108 %1 = icmp eq i64 %index.next, 1024
109 br i1 %1, label %for.end, label %vector.body
115 ; Exponent is a constant != 0.75 and !=0.25
116 define void @vspow_neq025_const(ptr nocapture %y, ptr nocapture readonly %x) {
117 ; CHECK-LABEL: @vspow_neq025_const
118 ; CHECK-PWR10: __powf4_P10
119 ; CHECK-PWR9: __powf4_P9
120 ; CHECK-PWR8: __powf4_P8
121 ; CHECK-PWR7: __powf4_P7
124 br label %vector.body
127 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
128 %next.gep = getelementptr float, ptr %y, i64 %index
129 %next.gep19 = getelementptr float, ptr %x, i64 %index
130 %wide.load = load <4 x float>, ptr %next.gep19, align 4
131 %0 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 2.500000e-01, float 0x3FE851EB80000000, float 2.500000e-01>)
132 store <4 x float> %0, ptr %next.gep, align 4
133 %index.next = add i64 %index, 4
134 %1 = icmp eq i64 %index.next, 1024
135 br i1 %1, label %for.end, label %vector.body
142 define void @vspow_075(ptr nocapture %y, ptr nocapture readonly %x) {
143 ; CHECK-LABEL: @vspow_075
144 ; CHECK-NOT: __powf4_P{{[7,8,9,10]}}
148 br label %vector.body
151 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
152 %next.gep = getelementptr float, ptr %y, i64 %index
153 %next.gep19 = getelementptr float, ptr %x, i64 %index
154 %wide.load = load <4 x float>, ptr %next.gep19, align 4
155 %0 = call ninf afn <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>)
156 store <4 x float> %0, ptr %next.gep, align 4
157 %index.next = add i64 %index, 4
158 %1 = icmp eq i64 %index.next, 1024
159 br i1 %1, label %for.end, label %vector.body
166 define void @vspow_025(ptr nocapture %y, ptr nocapture readonly %x) {
167 ; CHECK-LABEL: @vspow_025
168 ; CHECK-NOT: __powf4_P{{[7,8,9,10]}}
172 br label %vector.body
175 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
176 %next.gep = getelementptr float, ptr %y, i64 %index
177 %next.gep19 = getelementptr float, ptr %x, i64 %index
178 %wide.load = load <4 x float>, ptr %next.gep19, align 4
179 %0 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>)
180 store <4 x float> %0, ptr %next.gep, align 4
181 %index.next = add i64 %index, 4
182 %1 = icmp eq i64 %index.next, 1024
183 br i1 %1, label %for.end, label %vector.body
189 ; Exponent is 0.75 but no proper fast-math flags
190 define void @vspow_075_nofast(ptr nocapture %y, ptr nocapture readonly %x) {
191 ; CHECK-LABEL: @vspow_075_nofast
192 ; CHECK-PWR10: __powf4_P10
193 ; CHECK-PWR9: __powf4_P9
194 ; CHECK-PWR8: __powf4_P8
195 ; CHECK-PWR7: __powf4_P7
196 ; CHECK-NOT: xvrsqrtesp
199 br label %vector.body
202 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
203 %next.gep = getelementptr float, ptr %y, i64 %index
204 %next.gep19 = getelementptr float, ptr %x, i64 %index
205 %wide.load = load <4 x float>, ptr %next.gep19, align 4
206 %0 = call <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>)
207 store <4 x float> %0, ptr %next.gep, align 4
208 %index.next = add i64 %index, 4
209 %1 = icmp eq i64 %index.next, 1024
210 br i1 %1, label %for.end, label %vector.body
216 ; Exponent is 0.25 but no proper fast-math flags
217 define void @vspow_025_nofast(ptr nocapture %y, ptr nocapture readonly %x) {
218 ; CHECK-LABEL: @vspow_025_nofast
219 ; CHECK-PWR10: __powf4_P10
220 ; CHECK-PWR9: __powf4_P9
221 ; CHECK-PWR8: __powf4_P8
222 ; CHECK-PWR7: __powf4_P7
223 ; CHECK-NOT: xvrsqrtesp
226 br label %vector.body
229 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
230 %next.gep = getelementptr float, ptr %y, i64 %index
231 %next.gep19 = getelementptr float, ptr %x, i64 %index
232 %wide.load = load <4 x float>, ptr %next.gep19, align 4
233 %0 = call <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>)
234 store <4 x float> %0, ptr %next.gep, align 4
235 %index.next = add i64 %index, 4
236 %1 = icmp eq i64 %index.next, 1024
237 br i1 %1, label %for.end, label %vector.body
243 ; Function Attrs: nounwind readnone speculatable willreturn
244 declare <4 x float> @__powf4(<4 x float>, <4 x float>)