1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \
3 ; RUN: -riscv-v-vector-bits-min=-1 -riscv-v-slp-max-vf=0 \
5 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \
6 ; RUN: | FileCheck %s --check-prefix=DEFAULT
8 define void @fp_add(ptr %dst, ptr %p, ptr %q) {
9 ; CHECK-LABEL: define void @fp_add
10 ; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0:[0-9]+]] {
12 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
13 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4
14 ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]]
15 ; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[DST]], align 4
16 ; CHECK-NEXT: ret void
18 ; DEFAULT-LABEL: define void @fp_add
19 ; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0:[0-9]+]] {
20 ; DEFAULT-NEXT: entry:
21 ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
22 ; DEFAULT-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4
23 ; DEFAULT-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]]
24 ; DEFAULT-NEXT: store <4 x float> [[TMP2]], ptr [[DST]], align 4
25 ; DEFAULT-NEXT: ret void
28 %e0 = load float, ptr %p, align 4
29 %pe1 = getelementptr inbounds float, ptr %p, i64 1
30 %e1 = load float, ptr %pe1, align 4
31 %pe2 = getelementptr inbounds float, ptr %p, i64 2
32 %e2 = load float, ptr %pe2, align 4
33 %pe3 = getelementptr inbounds float, ptr %p, i64 3
34 %e3 = load float, ptr %pe3, align 4
36 %f0 = load float, ptr %q, align 4
37 %pf1 = getelementptr inbounds float, ptr %q, i64 1
38 %f1 = load float, ptr %pf1, align 4
39 %pf2 = getelementptr inbounds float, ptr %q, i64 2
40 %f2 = load float, ptr %pf2, align 4
41 %pf3 = getelementptr inbounds float, ptr %q, i64 3
42 %f3 = load float, ptr %pf3, align 4
44 %a0 = fadd float %e0, %f0
45 %a1 = fadd float %e1, %f1
46 %a2 = fadd float %e2, %f2
47 %a3 = fadd float %e3, %f3
49 store float %a0, ptr %dst, align 4
50 %pa1 = getelementptr inbounds float, ptr %dst, i64 1
51 store float %a1, ptr %pa1, align 4
52 %pa2 = getelementptr inbounds float, ptr %dst, i64 2
53 store float %a2, ptr %pa2, align 4
54 %pa3 = getelementptr inbounds float, ptr %dst, i64 3
55 store float %a3, ptr %pa3, align 4
60 define void @fp_sub(ptr %dst, ptr %p) {
61 ; CHECK-LABEL: define void @fp_sub
62 ; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
64 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
65 ; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[TMP0]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
66 ; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4
67 ; CHECK-NEXT: ret void
69 ; DEFAULT-LABEL: define void @fp_sub
70 ; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
71 ; DEFAULT-NEXT: entry:
72 ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
73 ; DEFAULT-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[TMP0]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
74 ; DEFAULT-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4
75 ; DEFAULT-NEXT: ret void
78 %e0 = load float, ptr %p, align 4
79 %pe1 = getelementptr inbounds float, ptr %p, i64 1
80 %e1 = load float, ptr %pe1, align 4
81 %pe2 = getelementptr inbounds float, ptr %p, i64 2
82 %e2 = load float, ptr %pe2, align 4
83 %pe3 = getelementptr inbounds float, ptr %p, i64 3
84 %e3 = load float, ptr %pe3, align 4
86 %a0 = fsub float %e0, 3.0
87 %a1 = fsub float %e1, 3.0
88 %a2 = fsub float %e2, 3.0
89 %a3 = fsub float %e3, 3.0
91 store float %a0, ptr %dst, align 4
92 %pa1 = getelementptr inbounds float, ptr %dst, i64 1
93 store float %a1, ptr %pa1, align 4
94 %pa2 = getelementptr inbounds float, ptr %dst, i64 2
95 store float %a2, ptr %pa2, align 4
96 %pa3 = getelementptr inbounds float, ptr %dst, i64 3
97 store float %a3, ptr %pa3, align 4
102 define void @fp_mul(ptr %dst, ptr %p, ptr %q) {
103 ; CHECK-LABEL: define void @fp_mul
104 ; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] {
106 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
107 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4
108 ; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x float> [[TMP0]], [[TMP1]]
109 ; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[DST]], align 4
110 ; CHECK-NEXT: ret void
112 ; DEFAULT-LABEL: define void @fp_mul
113 ; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] {
114 ; DEFAULT-NEXT: entry:
115 ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
116 ; DEFAULT-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4
117 ; DEFAULT-NEXT: [[TMP2:%.*]] = fmul <4 x float> [[TMP0]], [[TMP1]]
118 ; DEFAULT-NEXT: store <4 x float> [[TMP2]], ptr [[DST]], align 4
119 ; DEFAULT-NEXT: ret void
122 %e0 = load float, ptr %p, align 4
123 %pe1 = getelementptr inbounds float, ptr %p, i64 1
124 %e1 = load float, ptr %pe1, align 4
125 %pe2 = getelementptr inbounds float, ptr %p, i64 2
126 %e2 = load float, ptr %pe2, align 4
127 %pe3 = getelementptr inbounds float, ptr %p, i64 3
128 %e3 = load float, ptr %pe3, align 4
130 %f0 = load float, ptr %q, align 4
131 %pf1 = getelementptr inbounds float, ptr %q, i64 1
132 %f1 = load float, ptr %pf1, align 4
133 %pf2 = getelementptr inbounds float, ptr %q, i64 2
134 %f2 = load float, ptr %pf2, align 4
135 %pf3 = getelementptr inbounds float, ptr %q, i64 3
136 %f3 = load float, ptr %pf3, align 4
138 %a0 = fmul float %e0, %f0
139 %a1 = fmul float %e1, %f1
140 %a2 = fmul float %e2, %f2
141 %a3 = fmul float %e3, %f3
143 store float %a0, ptr %dst, align 4
144 %pa1 = getelementptr inbounds float, ptr %dst, i64 1
145 store float %a1, ptr %pa1, align 4
146 %pa2 = getelementptr inbounds float, ptr %dst, i64 2
147 store float %a2, ptr %pa2, align 4
148 %pa3 = getelementptr inbounds float, ptr %dst, i64 3
149 store float %a3, ptr %pa3, align 4
154 define void @fp_div(ptr %dst, ptr %p) {
155 ; CHECK-LABEL: define void @fp_div
156 ; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
158 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
159 ; CHECK-NEXT: [[TMP1:%.*]] = fdiv <4 x float> [[TMP0]], <float 1.050000e+01, float 1.050000e+01, float 1.050000e+01, float 1.050000e+01>
160 ; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4
161 ; CHECK-NEXT: ret void
163 ; DEFAULT-LABEL: define void @fp_div
164 ; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
165 ; DEFAULT-NEXT: entry:
166 ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
167 ; DEFAULT-NEXT: [[TMP1:%.*]] = fdiv <4 x float> [[TMP0]], <float 1.050000e+01, float 1.050000e+01, float 1.050000e+01, float 1.050000e+01>
168 ; DEFAULT-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4
169 ; DEFAULT-NEXT: ret void
172 %e0 = load float, ptr %p, align 4
173 %pe1 = getelementptr inbounds float, ptr %p, i64 1
174 %e1 = load float, ptr %pe1, align 4
175 %pe2 = getelementptr inbounds float, ptr %p, i64 2
176 %e2 = load float, ptr %pe2, align 4
177 %pe3 = getelementptr inbounds float, ptr %p, i64 3
178 %e3 = load float, ptr %pe3, align 4
180 %a0 = fdiv float %e0, 10.5
181 %a1 = fdiv float %e1, 10.5
182 %a2 = fdiv float %e2, 10.5
183 %a3 = fdiv float %e3, 10.5
185 store float %a0, ptr %dst, align 4
186 %pa1 = getelementptr inbounds float, ptr %dst, i64 1
187 store float %a1, ptr %pa1, align 4
188 %pa2 = getelementptr inbounds float, ptr %dst, i64 2
189 store float %a2, ptr %pa2, align 4
190 %pa3 = getelementptr inbounds float, ptr %dst, i64 3
191 store float %a3, ptr %pa3, align 4
196 declare float @llvm.maxnum.f32(float, float)
198 define void @fp_max(ptr %dst, ptr %p, ptr %q) {
199 ; CHECK-LABEL: define void @fp_max
200 ; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] {
202 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
203 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4
204 ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
205 ; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[DST]], align 4
206 ; CHECK-NEXT: ret void
208 ; DEFAULT-LABEL: define void @fp_max
209 ; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] {
210 ; DEFAULT-NEXT: entry:
211 ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
212 ; DEFAULT-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4
213 ; DEFAULT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
214 ; DEFAULT-NEXT: store <4 x float> [[TMP2]], ptr [[DST]], align 4
215 ; DEFAULT-NEXT: ret void
218 %e0 = load float, ptr %p, align 4
219 %pe1 = getelementptr inbounds float, ptr %p, i64 1
220 %e1 = load float, ptr %pe1, align 4
221 %pe2 = getelementptr inbounds float, ptr %p, i64 2
222 %e2 = load float, ptr %pe2, align 4
223 %pe3 = getelementptr inbounds float, ptr %p, i64 3
224 %e3 = load float, ptr %pe3, align 4
226 %f0 = load float, ptr %q, align 4
227 %pf1 = getelementptr inbounds float, ptr %q, i64 1
228 %f1 = load float, ptr %pf1, align 4
229 %pf2 = getelementptr inbounds float, ptr %q, i64 2
230 %f2 = load float, ptr %pf2, align 4
231 %pf3 = getelementptr inbounds float, ptr %q, i64 3
232 %f3 = load float, ptr %pf3, align 4
234 %a0 = tail call float @llvm.maxnum.f32(float %e0, float %f0)
235 %a1 = tail call float @llvm.maxnum.f32(float %e1, float %f1)
236 %a2 = tail call float @llvm.maxnum.f32(float %e2, float %f2)
237 %a3 = tail call float @llvm.maxnum.f32(float %e3, float %f3)
239 store float %a0, ptr %dst, align 4
240 %pa1 = getelementptr inbounds float, ptr %dst, i64 1
241 store float %a1, ptr %pa1, align 4
242 %pa2 = getelementptr inbounds float, ptr %dst, i64 2
243 store float %a2, ptr %pa2, align 4
244 %pa3 = getelementptr inbounds float, ptr %dst, i64 3
245 store float %a3, ptr %pa3, align 4
250 declare float @llvm.minnum.f32(float, float)
252 define void @fp_min(ptr %dst, ptr %p) {
253 ; CHECK-LABEL: define void @fp_min
254 ; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
256 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
257 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> <float 1.250000e+00, float 1.250000e+00, float 1.250000e+00, float 1.250000e+00>)
258 ; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4
259 ; CHECK-NEXT: ret void
261 ; DEFAULT-LABEL: define void @fp_min
262 ; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
263 ; DEFAULT-NEXT: entry:
264 ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
265 ; DEFAULT-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> <float 1.250000e+00, float 1.250000e+00, float 1.250000e+00, float 1.250000e+00>)
266 ; DEFAULT-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4
267 ; DEFAULT-NEXT: ret void
270 %e0 = load float, ptr %p, align 4
271 %pe1 = getelementptr inbounds float, ptr %p, i64 1
272 %e1 = load float, ptr %pe1, align 4
273 %pe2 = getelementptr inbounds float, ptr %p, i64 2
274 %e2 = load float, ptr %pe2, align 4
275 %pe3 = getelementptr inbounds float, ptr %p, i64 3
276 %e3 = load float, ptr %pe3, align 4
278 %a0 = tail call float @llvm.minnum.f32(float %e0, float 1.25)
279 %a1 = tail call float @llvm.minnum.f32(float %e1, float 1.25)
280 %a2 = tail call float @llvm.minnum.f32(float %e2, float 1.25)
281 %a3 = tail call float @llvm.minnum.f32(float %e3, float 1.25)
283 store float %a0, ptr %dst, align 4
284 %pa1 = getelementptr inbounds float, ptr %dst, i64 1
285 store float %a1, ptr %pa1, align 4
286 %pa2 = getelementptr inbounds float, ptr %dst, i64 2
287 store float %a2, ptr %pa2, align 4
288 %pa3 = getelementptr inbounds float, ptr %dst, i64 3
289 store float %a3, ptr %pa3, align 4
294 declare i32 @llvm.fptosi.sat.i32.f32(float)
296 define void @fp_convert(ptr %dst, ptr %p) {
297 ; CHECK-LABEL: define void @fp_convert
298 ; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
300 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
301 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]])
302 ; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[DST]], align 4
303 ; CHECK-NEXT: ret void
305 ; DEFAULT-LABEL: define void @fp_convert
306 ; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
307 ; DEFAULT-NEXT: entry:
308 ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
309 ; DEFAULT-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]])
310 ; DEFAULT-NEXT: store <4 x i32> [[TMP1]], ptr [[DST]], align 4
311 ; DEFAULT-NEXT: ret void
314 %e0 = load float, ptr %p, align 4
315 %pe1 = getelementptr inbounds float, ptr %p, i64 1
316 %e1 = load float, ptr %pe1, align 4
317 %pe2 = getelementptr inbounds float, ptr %p, i64 2
318 %e2 = load float, ptr %pe2, align 4
319 %pe3 = getelementptr inbounds float, ptr %p, i64 3
320 %e3 = load float, ptr %pe3, align 4
322 %a0 = tail call i32 @llvm.fptosi.sat.i32.f32(float %e0)
323 %a1 = tail call i32 @llvm.fptosi.sat.i32.f32(float %e1)
324 %a2 = tail call i32 @llvm.fptosi.sat.i32.f32(float %e2)
325 %a3 = tail call i32 @llvm.fptosi.sat.i32.f32(float %e3)
327 store i32 %a0, ptr %dst, align 4
328 %pa1 = getelementptr inbounds i32, ptr %dst, i64 1
329 store i32 %a1, ptr %pa1, align 4
330 %pa2 = getelementptr inbounds i32, ptr %dst, i64 2
331 store i32 %a2, ptr %pa2, align 4
332 %pa3 = getelementptr inbounds i32, ptr %dst, i64 3
333 store i32 %a3, ptr %pa3, align 4