1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \
3 ; RUN: -riscv-v-vector-bits-min=-1 -riscv-v-slp-max-vf=0 \
5 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \
6 ; RUN: | FileCheck %s --check-prefix=DEFAULT
8 declare float @fabsf(float) readonly nounwind willreturn
10 define <4 x float> @fabs_4x(ptr %a) {
11 ; CHECK-LABEL: define <4 x float> @fabs_4x
12 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] {
14 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
15 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
16 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
18 ; DEFAULT-LABEL: define <4 x float> @fabs_4x
19 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] {
20 ; DEFAULT-NEXT: entry:
21 ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
22 ; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
23 ; DEFAULT-NEXT: ret <4 x float> [[TMP1]]
26 %0 = load <4 x float>, ptr %a, align 16
27 %vecext = extractelement <4 x float> %0, i32 0
28 %1 = tail call fast float @fabsf(float %vecext)
29 %vecins = insertelement <4 x float> undef, float %1, i32 0
30 %vecext.1 = extractelement <4 x float> %0, i32 1
31 %2 = tail call fast float @fabsf(float %vecext.1)
32 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
33 %vecext.2 = extractelement <4 x float> %0, i32 2
34 %3 = tail call fast float @fabsf(float %vecext.2)
35 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
36 %vecext.3 = extractelement <4 x float> %0, i32 3
37 %4 = tail call fast float @fabsf(float %vecext.3)
38 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
39 ret <4 x float> %vecins.3
42 declare float @llvm.fabs.f32(float)
44 define <4 x float> @int_fabs_4x(ptr %a) {
45 ; CHECK-LABEL: define <4 x float> @int_fabs_4x
46 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
48 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
49 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
50 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
52 ; DEFAULT-LABEL: define <4 x float> @int_fabs_4x
53 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
54 ; DEFAULT-NEXT: entry:
55 ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
56 ; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
57 ; DEFAULT-NEXT: ret <4 x float> [[TMP1]]
60 %0 = load <4 x float>, ptr %a, align 16
61 %vecext = extractelement <4 x float> %0, i32 0
62 %1 = tail call fast float @llvm.fabs.f32(float %vecext)
63 %vecins = insertelement <4 x float> undef, float %1, i32 0
64 %vecext.1 = extractelement <4 x float> %0, i32 1
65 %2 = tail call fast float @llvm.fabs.f32(float %vecext.1)
66 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
67 %vecext.2 = extractelement <4 x float> %0, i32 2
68 %3 = tail call fast float @llvm.fabs.f32(float %vecext.2)
69 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
70 %vecext.3 = extractelement <4 x float> %0, i32 3
71 %4 = tail call fast float @llvm.fabs.f32(float %vecext.3)
72 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
73 ret <4 x float> %vecins.3
76 declare float @sqrtf(float) readonly nounwind willreturn
78 define <4 x float> @sqrt_4x(ptr %a) {
79 ; CHECK-LABEL: define <4 x float> @sqrt_4x
80 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
82 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
83 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
84 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
86 ; DEFAULT-LABEL: define <4 x float> @sqrt_4x
87 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
88 ; DEFAULT-NEXT: entry:
89 ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
90 ; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
91 ; DEFAULT-NEXT: ret <4 x float> [[TMP1]]
94 %0 = load <4 x float>, ptr %a, align 16
95 %vecext = extractelement <4 x float> %0, i32 0
96 %1 = tail call fast float @sqrtf(float %vecext)
97 %vecins = insertelement <4 x float> undef, float %1, i32 0
98 %vecext.1 = extractelement <4 x float> %0, i32 1
99 %2 = tail call fast float @sqrtf(float %vecext.1)
100 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
101 %vecext.2 = extractelement <4 x float> %0, i32 2
102 %3 = tail call fast float @sqrtf(float %vecext.2)
103 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
104 %vecext.3 = extractelement <4 x float> %0, i32 3
105 %4 = tail call fast float @sqrtf(float %vecext.3)
106 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
107 ret <4 x float> %vecins.3
110 declare float @llvm.sqrt.f32(float)
112 define <4 x float> @int_sqrt_4x(ptr %a) {
113 ; CHECK-LABEL: define <4 x float> @int_sqrt_4x
114 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
116 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
117 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
118 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
120 ; DEFAULT-LABEL: define <4 x float> @int_sqrt_4x
121 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
122 ; DEFAULT-NEXT: entry:
123 ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
124 ; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
125 ; DEFAULT-NEXT: ret <4 x float> [[TMP1]]
128 %0 = load <4 x float>, ptr %a, align 16
129 %vecext = extractelement <4 x float> %0, i32 0
130 %1 = tail call fast float @llvm.sqrt.f32(float %vecext)
131 %vecins = insertelement <4 x float> undef, float %1, i32 0
132 %vecext.1 = extractelement <4 x float> %0, i32 1
133 %2 = tail call fast float @llvm.sqrt.f32(float %vecext.1)
134 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
135 %vecext.2 = extractelement <4 x float> %0, i32 2
136 %3 = tail call fast float @llvm.sqrt.f32(float %vecext.2)
137 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
138 %vecext.3 = extractelement <4 x float> %0, i32 3
139 %4 = tail call fast float @llvm.sqrt.f32(float %vecext.3)
140 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
141 ret <4 x float> %vecins.3
144 declare float @expf(float) readonly nounwind willreturn
146 ; We can not vectorized exp since RISCV has no such instruction.
147 define <4 x float> @exp_4x(ptr %a) {
148 ; CHECK-LABEL: define <4 x float> @exp_4x
149 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
151 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
152 ; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
153 ; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
154 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
155 ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
156 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
157 ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
158 ; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
159 ; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]])
160 ; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
161 ; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
162 ; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]])
163 ; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
164 ; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
166 ; DEFAULT-LABEL: define <4 x float> @exp_4x
167 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
168 ; DEFAULT-NEXT: entry:
169 ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
170 ; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
171 ; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
172 ; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
173 ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
174 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
175 ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
176 ; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
177 ; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]])
178 ; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
179 ; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
180 ; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]])
181 ; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
182 ; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
185 %0 = load <4 x float>, ptr %a, align 16
186 %vecext = extractelement <4 x float> %0, i32 0
187 %1 = tail call fast float @expf(float %vecext)
188 %vecins = insertelement <4 x float> undef, float %1, i32 0
189 %vecext.1 = extractelement <4 x float> %0, i32 1
190 %2 = tail call fast float @expf(float %vecext.1)
191 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
192 %vecext.2 = extractelement <4 x float> %0, i32 2
193 %3 = tail call fast float @expf(float %vecext.2)
194 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
195 %vecext.3 = extractelement <4 x float> %0, i32 3
196 %4 = tail call fast float @expf(float %vecext.3)
197 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
198 ret <4 x float> %vecins.3
201 declare float @llvm.exp.f32(float)
203 ; We can not vectorized exp since RISCV has no such instruction.
204 define <4 x float> @int_exp_4x(ptr %a) {
205 ; CHECK-LABEL: define <4 x float> @int_exp_4x
206 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
208 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
209 ; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
210 ; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]])
211 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
212 ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
213 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]])
214 ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
215 ; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
216 ; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]])
217 ; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
218 ; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
219 ; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]])
220 ; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
221 ; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
223 ; DEFAULT-LABEL: define <4 x float> @int_exp_4x
224 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
225 ; DEFAULT-NEXT: entry:
226 ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
227 ; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
228 ; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]])
229 ; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
230 ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
231 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]])
232 ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
233 ; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
234 ; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]])
235 ; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
236 ; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
237 ; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]])
238 ; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
239 ; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
242 %0 = load <4 x float>, ptr %a, align 16
243 %vecext = extractelement <4 x float> %0, i32 0
244 %1 = tail call fast float @llvm.exp.f32(float %vecext)
245 %vecins = insertelement <4 x float> undef, float %1, i32 0
246 %vecext.1 = extractelement <4 x float> %0, i32 1
247 %2 = tail call fast float @llvm.exp.f32(float %vecext.1)
248 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
249 %vecext.2 = extractelement <4 x float> %0, i32 2
250 %3 = tail call fast float @llvm.exp.f32(float %vecext.2)
251 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
252 %vecext.3 = extractelement <4 x float> %0, i32 3
253 %4 = tail call fast float @llvm.exp.f32(float %vecext.3)
254 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
255 ret <4 x float> %vecins.3
258 declare float @logf(float) readonly nounwind willreturn
260 ; We can not vectorized log since RISCV has no such instruction.
261 define <4 x float> @log_4x(ptr %a) {
262 ; CHECK-LABEL: define <4 x float> @log_4x
263 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
265 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
266 ; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
267 ; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
268 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
269 ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
270 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
271 ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
272 ; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
273 ; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]])
274 ; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
275 ; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
276 ; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]])
277 ; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
278 ; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
280 ; DEFAULT-LABEL: define <4 x float> @log_4x
281 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
282 ; DEFAULT-NEXT: entry:
283 ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
284 ; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
285 ; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
286 ; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
287 ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
288 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
289 ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
290 ; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
291 ; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]])
292 ; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
293 ; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
294 ; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]])
295 ; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
296 ; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
299 %0 = load <4 x float>, ptr %a, align 16
300 %vecext = extractelement <4 x float> %0, i32 0
301 %1 = tail call fast float @logf(float %vecext)
302 %vecins = insertelement <4 x float> undef, float %1, i32 0
303 %vecext.1 = extractelement <4 x float> %0, i32 1
304 %2 = tail call fast float @logf(float %vecext.1)
305 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
306 %vecext.2 = extractelement <4 x float> %0, i32 2
307 %3 = tail call fast float @logf(float %vecext.2)
308 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
309 %vecext.3 = extractelement <4 x float> %0, i32 3
310 %4 = tail call fast float @logf(float %vecext.3)
311 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
312 ret <4 x float> %vecins.3
315 declare float @llvm.log.f32(float)
317 ; We can not vectorized log since RISCV has no such instruction.
318 define <4 x float> @int_log_4x(ptr %a) {
319 ; CHECK-LABEL: define <4 x float> @int_log_4x
320 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
322 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
323 ; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
324 ; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]])
325 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
326 ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
327 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]])
328 ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
329 ; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
330 ; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]])
331 ; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
332 ; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
333 ; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]])
334 ; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
335 ; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
337 ; DEFAULT-LABEL: define <4 x float> @int_log_4x
338 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
339 ; DEFAULT-NEXT: entry:
340 ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
341 ; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
342 ; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]])
343 ; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
344 ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
345 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]])
346 ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
347 ; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
348 ; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]])
349 ; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
350 ; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
351 ; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]])
352 ; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
353 ; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
356 %0 = load <4 x float>, ptr %a, align 16
357 %vecext = extractelement <4 x float> %0, i32 0
358 %1 = tail call fast float @llvm.log.f32(float %vecext)
359 %vecins = insertelement <4 x float> undef, float %1, i32 0
360 %vecext.1 = extractelement <4 x float> %0, i32 1
361 %2 = tail call fast float @llvm.log.f32(float %vecext.1)
362 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
363 %vecext.2 = extractelement <4 x float> %0, i32 2
364 %3 = tail call fast float @llvm.log.f32(float %vecext.2)
365 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
366 %vecext.3 = extractelement <4 x float> %0, i32 3
367 %4 = tail call fast float @llvm.log.f32(float %vecext.3)
368 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
369 ret <4 x float> %vecins.3
372 declare float @sinf(float) readonly nounwind willreturn
374 ; We can not vectorized sin since RISCV has no such instruction.
375 define <4 x float> @sin_4x(ptr %a) {
376 ; CHECK-LABEL: define <4 x float> @sin_4x
377 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
379 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
380 ; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
381 ; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]])
382 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
383 ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
384 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]])
385 ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
386 ; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
387 ; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]])
388 ; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
389 ; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
390 ; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]])
391 ; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
392 ; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
394 ; DEFAULT-LABEL: define <4 x float> @sin_4x
395 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
396 ; DEFAULT-NEXT: entry:
397 ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
398 ; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
399 ; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]])
400 ; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
401 ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
402 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]])
403 ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
404 ; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
405 ; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]])
406 ; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
407 ; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
408 ; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]])
409 ; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
410 ; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
413 %0 = load <4 x float>, ptr %a, align 16
414 %vecext = extractelement <4 x float> %0, i32 0
415 %1 = tail call fast float @sinf(float %vecext)
416 %vecins = insertelement <4 x float> undef, float %1, i32 0
417 %vecext.1 = extractelement <4 x float> %0, i32 1
418 %2 = tail call fast float @sinf(float %vecext.1)
419 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
420 %vecext.2 = extractelement <4 x float> %0, i32 2
421 %3 = tail call fast float @sinf(float %vecext.2)
422 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
423 %vecext.3 = extractelement <4 x float> %0, i32 3
424 %4 = tail call fast float @sinf(float %vecext.3)
425 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
426 ret <4 x float> %vecins.3
429 declare float @llvm.sin.f32(float)
431 ; We can not vectorized sin since RISCV has no such instruction.
432 define <4 x float> @int_sin_4x(ptr %a) {
433 ; CHECK-LABEL: define <4 x float> @int_sin_4x
434 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
436 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
437 ; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
438 ; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
439 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
440 ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
441 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
442 ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
443 ; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
444 ; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]])
445 ; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
446 ; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
447 ; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]])
448 ; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
449 ; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
451 ; DEFAULT-LABEL: define <4 x float> @int_sin_4x
452 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
453 ; DEFAULT-NEXT: entry:
454 ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
455 ; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
456 ; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
457 ; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
458 ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
459 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
460 ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
461 ; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
462 ; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]])
463 ; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
464 ; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
465 ; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]])
466 ; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
467 ; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
470 %0 = load <4 x float>, ptr %a, align 16
471 %vecext = extractelement <4 x float> %0, i32 0
472 %1 = tail call fast float @llvm.sin.f32(float %vecext)
473 %vecins = insertelement <4 x float> undef, float %1, i32 0
474 %vecext.1 = extractelement <4 x float> %0, i32 1
475 %2 = tail call fast float @llvm.sin.f32(float %vecext.1)
476 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
477 %vecext.2 = extractelement <4 x float> %0, i32 2
478 %3 = tail call fast float @llvm.sin.f32(float %vecext.2)
479 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
480 %vecext.3 = extractelement <4 x float> %0, i32 3
481 %4 = tail call fast float @llvm.sin.f32(float %vecext.3)
482 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
483 ret <4 x float> %vecins.3
486 declare float @asinf(float) readonly nounwind willreturn
488 ; We can not vectorized asin since RISCV has no such instruction.
489 define <4 x float> @asin_4x(ptr %a) {
490 ; CHECK-LABEL: define <4 x float> @asin_4x
491 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
493 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
494 ; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
495 ; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]])
496 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
497 ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
498 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
499 ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
500 ; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
501 ; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
502 ; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
503 ; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
504 ; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
505 ; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
506 ; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
508 ; DEFAULT-LABEL: define <4 x float> @asin_4x
509 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
510 ; DEFAULT-NEXT: entry:
511 ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
512 ; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
513 ; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]])
514 ; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
515 ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
516 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
517 ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
518 ; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
519 ; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
520 ; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
521 ; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
522 ; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
523 ; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
524 ; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
527 %0 = load <4 x float>, ptr %a, align 16
528 %vecext = extractelement <4 x float> %0, i32 0
529 %1 = tail call fast float @asinf(float %vecext)
530 %vecins = insertelement <4 x float> undef, float %1, i32 0
531 %vecext.1 = extractelement <4 x float> %0, i32 1
532 %2 = tail call fast float @asinf(float %vecext.1)
533 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
534 %vecext.2 = extractelement <4 x float> %0, i32 2
535 %3 = tail call fast float @asinf(float %vecext.2)
536 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
537 %vecext.3 = extractelement <4 x float> %0, i32 3
538 %4 = tail call fast float @asinf(float %vecext.3)
539 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
540 ret <4 x float> %vecins.3
543 declare float @llvm.asin.f32(float)
545 ; We can not vectorized asin since RISCV has no such instruction.
546 define <4 x float> @int_asin_4x(ptr %a) {
547 ; CHECK-LABEL: define <4 x float> @int_asin_4x
548 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
550 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
551 ; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
552 ; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]])
553 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
554 ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
555 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
556 ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
557 ; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
558 ; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
559 ; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
560 ; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
561 ; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
562 ; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
563 ; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
565 ; DEFAULT-LABEL: define <4 x float> @int_asin_4x
566 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
567 ; DEFAULT-NEXT: entry:
568 ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
569 ; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
570 ; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]])
571 ; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
572 ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
573 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
574 ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
575 ; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
576 ; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
577 ; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
578 ; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
579 ; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
580 ; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
581 ; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
584 %0 = load <4 x float>, ptr %a, align 16
585 %vecext = extractelement <4 x float> %0, i32 0
586 %1 = tail call fast float @llvm.asin.f32(float %vecext)
587 %vecins = insertelement <4 x float> undef, float %1, i32 0
588 %vecext.1 = extractelement <4 x float> %0, i32 1
589 %2 = tail call fast float @llvm.asin.f32(float %vecext.1)
590 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
591 %vecext.2 = extractelement <4 x float> %0, i32 2
592 %3 = tail call fast float @llvm.asin.f32(float %vecext.2)
593 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
594 %vecext.3 = extractelement <4 x float> %0, i32 3
595 %4 = tail call fast float @llvm.asin.f32(float %vecext.3)
596 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
597 ret <4 x float> %vecins.3
600 declare float @coshf(float) readonly nounwind willreturn
602 ; We can not vectorized cosh since RISCV has no such instruction.
603 define <4 x float> @cosh_4x(ptr %a) {
604 ; CHECK-LABEL: define <4 x float> @cosh_4x
605 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
607 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
608 ; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
609 ; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]])
610 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
611 ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
612 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
613 ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
614 ; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
615 ; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
616 ; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
617 ; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
618 ; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
619 ; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
620 ; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
622 ; DEFAULT-LABEL: define <4 x float> @cosh_4x
623 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
624 ; DEFAULT-NEXT: entry:
625 ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
626 ; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
627 ; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]])
628 ; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
629 ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
630 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
631 ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
632 ; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
633 ; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
634 ; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
635 ; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
636 ; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
637 ; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
638 ; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
641 %0 = load <4 x float>, ptr %a, align 16
642 %vecext = extractelement <4 x float> %0, i32 0
643 %1 = tail call fast float @coshf(float %vecext)
644 %vecins = insertelement <4 x float> undef, float %1, i32 0
645 %vecext.1 = extractelement <4 x float> %0, i32 1
646 %2 = tail call fast float @coshf(float %vecext.1)
647 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
648 %vecext.2 = extractelement <4 x float> %0, i32 2
649 %3 = tail call fast float @coshf(float %vecext.2)
650 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
651 %vecext.3 = extractelement <4 x float> %0, i32 3
652 %4 = tail call fast float @coshf(float %vecext.3)
653 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
654 ret <4 x float> %vecins.3
657 declare float @llvm.cosh.f32(float)
659 ; We can not vectorized cosh since RISCV has no such instruction.
660 define <4 x float> @int_cosh_4x(ptr %a) {
661 ; CHECK-LABEL: define <4 x float> @int_cosh_4x
662 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
664 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
665 ; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
666 ; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]])
667 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
668 ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
669 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
670 ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
671 ; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
672 ; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
673 ; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
674 ; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
675 ; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
676 ; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
677 ; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
679 ; DEFAULT-LABEL: define <4 x float> @int_cosh_4x
680 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
681 ; DEFAULT-NEXT: entry:
682 ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
683 ; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
684 ; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]])
685 ; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
686 ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
687 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
688 ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
689 ; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
690 ; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
691 ; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
692 ; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
693 ; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
694 ; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
695 ; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
698 %0 = load <4 x float>, ptr %a, align 16
699 %vecext = extractelement <4 x float> %0, i32 0
700 %1 = tail call fast float @llvm.cosh.f32(float %vecext)
701 %vecins = insertelement <4 x float> undef, float %1, i32 0
702 %vecext.1 = extractelement <4 x float> %0, i32 1
703 %2 = tail call fast float @llvm.cosh.f32(float %vecext.1)
704 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
705 %vecext.2 = extractelement <4 x float> %0, i32 2
706 %3 = tail call fast float @llvm.cosh.f32(float %vecext.2)
707 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
708 %vecext.3 = extractelement <4 x float> %0, i32 3
709 %4 = tail call fast float @llvm.cosh.f32(float %vecext.3)
710 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
711 ret <4 x float> %vecins.3
714 declare float @atanhf(float) readonly nounwind willreturn
716 ; We can not vectorized atanh since RISCV has no such instruction.
717 define <4 x float> @atanh_4x(ptr %a) {
718 ; CHECK-LABEL: define <4 x float> @atanh_4x
719 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
721 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
722 ; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
723 ; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]])
724 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
725 ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
726 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]])
727 ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
728 ; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
729 ; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]])
730 ; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
731 ; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
732 ; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]])
733 ; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
734 ; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
736 ; DEFAULT-LABEL: define <4 x float> @atanh_4x
737 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
738 ; DEFAULT-NEXT: entry:
739 ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
740 ; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
741 ; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]])
742 ; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
743 ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
744 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]])
745 ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
746 ; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
747 ; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]])
748 ; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
749 ; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
750 ; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]])
751 ; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
752 ; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
755 %0 = load <4 x float>, ptr %a, align 16
756 %vecext = extractelement <4 x float> %0, i32 0
757 %1 = tail call fast float @atanhf(float %vecext)
758 %vecins = insertelement <4 x float> undef, float %1, i32 0
759 %vecext.1 = extractelement <4 x float> %0, i32 1
760 %2 = tail call fast float @atanhf(float %vecext.1)
761 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
762 %vecext.2 = extractelement <4 x float> %0, i32 2
763 %3 = tail call fast float @atanhf(float %vecext.2)
764 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
765 %vecext.3 = extractelement <4 x float> %0, i32 3
766 %4 = tail call fast float @atanhf(float %vecext.3)
767 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
768 ret <4 x float> %vecins.3
771 declare float @llvm.atanh.f32(float)
773 ; We can not vectorized atanh since RISCV has no such instruction.
774 define <4 x float> @int_atanh_4x(ptr %a) {
775 ; CHECK-LABEL: define <4 x float> @int_atanh_4x
776 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
778 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
779 ; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
780 ; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]])
781 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
782 ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
783 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]])
784 ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
785 ; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
786 ; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]])
787 ; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
788 ; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
789 ; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]])
790 ; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
791 ; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
793 ; DEFAULT-LABEL: define <4 x float> @int_atanh_4x
794 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
795 ; DEFAULT-NEXT: entry:
796 ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
797 ; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
798 ; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]])
799 ; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
800 ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
801 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]])
802 ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
803 ; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
804 ; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]])
805 ; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
806 ; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
807 ; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]])
808 ; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
809 ; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
812 %0 = load <4 x float>, ptr %a, align 16
813 %vecext = extractelement <4 x float> %0, i32 0
814 %1 = tail call fast float @llvm.atanh.f32(float %vecext)
815 %vecins = insertelement <4 x float> undef, float %1, i32 0
816 %vecext.1 = extractelement <4 x float> %0, i32 1
817 %2 = tail call fast float @llvm.atanh.f32(float %vecext.1)
818 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
819 %vecext.2 = extractelement <4 x float> %0, i32 2
820 %3 = tail call fast float @llvm.atanh.f32(float %vecext.2)
821 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
822 %vecext.3 = extractelement <4 x float> %0, i32 3
823 %4 = tail call fast float @llvm.atanh.f32(float %vecext.3)
824 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
825 ret <4 x float> %vecins.3