1 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr10 | FileCheck -check-prefixes=CHECK-PWR9 %s
2 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 | FileCheck -check-prefixes=CHECK-PWR9 %s
3 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 | FileCheck -check-prefixes=CHECK-PWR8 %s
4 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr10 | FileCheck -check-prefixes=CHECK-PWR10 %s
5 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr9 | FileCheck -check-prefixes=CHECK-PWR9 %s
6 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr8 | FileCheck -check-prefixes=CHECK-PWR8 %s
7 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr7 | FileCheck -check-prefixes=CHECK-PWR7 %s
9 ; Exponent is a variable
10 define void @vspow_var(float* nocapture %z, float* nocapture readonly %y, float* nocapture readonly %x) {
11 ; CHECK-LABEL: @vspow_var
12 ; CHECK-PWR10: __powf4_P10
13 ; CHECK-PWR9: __powf4_P9
14 ; CHECK-PWR8: __powf4_P8
15 ; CHECK-PWR7: __powf4_P7
21 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
22 %next.gep = getelementptr float, float* %z, i64 %index
23 %next.gep31 = getelementptr float, float* %y, i64 %index
24 %next.gep32 = getelementptr float, float* %x, i64 %index
25 %0 = bitcast float* %next.gep32 to <4 x float>*
26 %wide.load = load <4 x float>, <4 x float>* %0, align 4
27 %1 = bitcast float* %next.gep31 to <4 x float>*
28 %wide.load33 = load <4 x float>, <4 x float>* %1, align 4
29 %2 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> %wide.load33)
30 %3 = bitcast float* %next.gep to <4 x float>*
31 store <4 x float> %2, <4 x float>* %3, align 4
32 %index.next = add i64 %index, 4
33 %4 = icmp eq i64 %index.next, 1024
34 br i1 %4, label %for.end, label %vector.body
40 ; Exponent is a constant != 0.75 and !=0.25
41 define void @vspow_const(float* nocapture %y, float* nocapture readonly %x) {
42 ; CHECK-LABEL: @vspow_const
43 ; CHECK-PWR10: __powf4_P10
44 ; CHECK-PWR9: __powf4_P9
45 ; CHECK-PWR8: __powf4_P8
46 ; CHECK-PWR7: __powf4_P7
52 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
53 %next.gep = getelementptr float, float* %y, i64 %index
54 %next.gep19 = getelementptr float, float* %x, i64 %index
55 %0 = bitcast float* %next.gep19 to <4 x float>*
56 %wide.load = load <4 x float>, <4 x float>* %0, align 4
57 %1 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>)
58 %2 = bitcast float* %next.gep to <4 x float>*
59 store <4 x float> %1, <4 x float>* %2, align 4
60 %index.next = add i64 %index, 4
61 %3 = icmp eq i64 %index.next, 1024
62 br i1 %3, label %for.end, label %vector.body
68 ; Exponent is a constant != 0.75 and !=0.25 and they are different
69 define void @vspow_neq_const(float* nocapture %y, float* nocapture readonly %x) {
70 ; CHECK-LABEL: @vspow_neq_const
71 ; CHECK-PWR10: __powf4_P10
72 ; CHECK-PWR9: __powf4_P9
73 ; CHECK-PWR8: __powf4_P8
74 ; CHECK-PWR7: __powf4_P7
80 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
81 %next.gep = getelementptr float, float* %y, i64 %index
82 %next.gep19 = getelementptr float, float* %x, i64 %index
83 %0 = bitcast float* %next.gep19 to <4 x float>*
84 %wide.load = load <4 x float>, <4 x float>* %0, align 4
85 %1 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 0x3FE861EB80000000, float 0x3FE871EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>)
86 %2 = bitcast float* %next.gep to <4 x float>*
87 store <4 x float> %1, <4 x float>* %2, align 4
88 %index.next = add i64 %index, 4
89 %3 = icmp eq i64 %index.next, 1024
90 br i1 %3, label %for.end, label %vector.body
96 ; Exponent is a constant != 0.75 and !=0.25
97 define void @vspow_neq075_const(float* nocapture %y, float* nocapture readonly %x) {
98 ; CHECK-LABEL: @vspow_neq075_const
99 ; CHECK-PWR10: __powf4_P10
100 ; CHECK-PWR9: __powf4_P9
101 ; CHECK-PWR8: __powf4_P8
102 ; CHECK-PWR7: __powf4_P7
105 br label %vector.body
108 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
109 %next.gep = getelementptr float, float* %y, i64 %index
110 %next.gep19 = getelementptr float, float* %x, i64 %index
111 %0 = bitcast float* %next.gep19 to <4 x float>*
112 %wide.load = load <4 x float>, <4 x float>* %0, align 4
113 %1 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 0x3FE851EB80000000>)
114 %2 = bitcast float* %next.gep to <4 x float>*
115 store <4 x float> %1, <4 x float>* %2, align 4
116 %index.next = add i64 %index, 4
117 %3 = icmp eq i64 %index.next, 1024
118 br i1 %3, label %for.end, label %vector.body
124 ; Exponent is a constant != 0.75 and !=0.25
125 define void @vspow_neq025_const(float* nocapture %y, float* nocapture readonly %x) {
126 ; CHECK-LABEL: @vspow_neq025_const
127 ; CHECK-PWR10: __powf4_P10
128 ; CHECK-PWR9: __powf4_P9
129 ; CHECK-PWR8: __powf4_P8
130 ; CHECK-PWR7: __powf4_P7
133 br label %vector.body
136 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
137 %next.gep = getelementptr float, float* %y, i64 %index
138 %next.gep19 = getelementptr float, float* %x, i64 %index
139 %0 = bitcast float* %next.gep19 to <4 x float>*
140 %wide.load = load <4 x float>, <4 x float>* %0, align 4
141 %1 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 2.500000e-01, float 0x3FE851EB80000000, float 2.500000e-01>)
142 %2 = bitcast float* %next.gep to <4 x float>*
143 store <4 x float> %1, <4 x float>* %2, align 4
144 %index.next = add i64 %index, 4
145 %3 = icmp eq i64 %index.next, 1024
146 br i1 %3, label %for.end, label %vector.body
153 define void @vspow_075(float* nocapture %y, float* nocapture readonly %x) {
154 ; CHECK-LABEL: @vspow_075
155 ; CHECK-NOT: __powf4_P{{[7,8,9,10]}}
159 br label %vector.body
162 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
163 %next.gep = getelementptr float, float* %y, i64 %index
164 %next.gep19 = getelementptr float, float* %x, i64 %index
165 %0 = bitcast float* %next.gep19 to <4 x float>*
166 %wide.load = load <4 x float>, <4 x float>* %0, align 4
167 %1 = call ninf afn <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>)
168 %2 = bitcast float* %next.gep to <4 x float>*
169 store <4 x float> %1, <4 x float>* %2, align 4
170 %index.next = add i64 %index, 4
171 %3 = icmp eq i64 %index.next, 1024
172 br i1 %3, label %for.end, label %vector.body
179 define void @vspow_025(float* nocapture %y, float* nocapture readonly %x) {
180 ; CHECK-LABEL: @vspow_025
181 ; CHECK-NOT: __powf4_P{{[7,8,9,10]}}
185 br label %vector.body
188 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
189 %next.gep = getelementptr float, float* %y, i64 %index
190 %next.gep19 = getelementptr float, float* %x, i64 %index
191 %0 = bitcast float* %next.gep19 to <4 x float>*
192 %wide.load = load <4 x float>, <4 x float>* %0, align 4
193 %1 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>)
194 %2 = bitcast float* %next.gep to <4 x float>*
195 store <4 x float> %1, <4 x float>* %2, align 4
196 %index.next = add i64 %index, 4
197 %3 = icmp eq i64 %index.next, 1024
198 br i1 %3, label %for.end, label %vector.body
204 ; Exponent is 0.75 but no proper fast-math flags
205 define void @vspow_075_nofast(float* nocapture %y, float* nocapture readonly %x) {
206 ; CHECK-LABEL: @vspow_075_nofast
207 ; CHECK-PWR10: __powf4_P10
208 ; CHECK-PWR9: __powf4_P9
209 ; CHECK-PWR8: __powf4_P8
210 ; CHECK-PWR7: __powf4_P7
211 ; CHECK-NOT: xvrsqrtesp
214 br label %vector.body
217 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
218 %next.gep = getelementptr float, float* %y, i64 %index
219 %next.gep19 = getelementptr float, float* %x, i64 %index
220 %0 = bitcast float* %next.gep19 to <4 x float>*
221 %wide.load = load <4 x float>, <4 x float>* %0, align 4
222 %1 = call <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>)
223 %2 = bitcast float* %next.gep to <4 x float>*
224 store <4 x float> %1, <4 x float>* %2, align 4
225 %index.next = add i64 %index, 4
226 %3 = icmp eq i64 %index.next, 1024
227 br i1 %3, label %for.end, label %vector.body
233 ; Exponent is 0.25 but no proper fast-math flags
234 define void @vspow_025_nofast(float* nocapture %y, float* nocapture readonly %x) {
235 ; CHECK-LABEL: @vspow_025_nofast
236 ; CHECK-PWR10: __powf4_P10
237 ; CHECK-PWR9: __powf4_P9
238 ; CHECK-PWR8: __powf4_P8
239 ; CHECK-PWR7: __powf4_P7
240 ; CHECK-NOT: xvrsqrtesp
243 br label %vector.body
246 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
247 %next.gep = getelementptr float, float* %y, i64 %index
248 %next.gep19 = getelementptr float, float* %x, i64 %index
249 %0 = bitcast float* %next.gep19 to <4 x float>*
250 %wide.load = load <4 x float>, <4 x float>* %0, align 4
251 %1 = call <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>)
252 %2 = bitcast float* %next.gep to <4 x float>*
253 store <4 x float> %1, <4 x float>* %2, align 4
254 %index.next = add i64 %index, 4
255 %3 = icmp eq i64 %index.next, 1024
256 br i1 %3, label %for.end, label %vector.body
262 ; Function Attrs: nounwind readnone speculatable willreturn
263 declare <4 x float> @__powf4(<4 x float>, <4 x float>)