1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
5 define <4 x i32> @add_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
6 ; CHECK-LABEL: add_constant_rhs:
8 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
9 ; CHECK-NEXT: vslide1down.vx v8, v8, a0
10 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0)
11 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI0_0)
12 ; CHECK-NEXT: vle32.v v9, (a0)
13 ; CHECK-NEXT: vslide1down.vx v8, v8, a1
14 ; CHECK-NEXT: vslide1down.vx v8, v8, a2
15 ; CHECK-NEXT: vslide1down.vx v8, v8, a3
16 ; CHECK-NEXT: vadd.vv v8, v8, v9
21 %e3 = add i32 %d, 2355
22 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
23 %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1
24 %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2
25 %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3
29 define <8 x i32> @add_constant_rhs_8xi32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) {
30 ; CHECK-LABEL: add_constant_rhs_8xi32:
32 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
33 ; CHECK-NEXT: vslide1down.vx v8, v8, a0
34 ; CHECK-NEXT: vslide1down.vx v8, v8, a1
35 ; CHECK-NEXT: vslide1down.vx v8, v8, a2
36 ; CHECK-NEXT: vslide1down.vx v8, v8, a3
37 ; CHECK-NEXT: vslide1down.vx v8, v8, a4
38 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0)
39 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI1_0)
40 ; CHECK-NEXT: vle32.v v10, (a0)
41 ; CHECK-NEXT: vslide1down.vx v8, v8, a5
42 ; CHECK-NEXT: vslide1down.vx v8, v8, a6
43 ; CHECK-NEXT: vslide1down.vx v8, v8, a7
44 ; CHECK-NEXT: vadd.vv v8, v8, v10
49 %e3 = add i32 %d, 2355
54 %v0 = insertelement <8 x i32> poison, i32 %e0, i32 0
55 %v1 = insertelement <8 x i32> %v0, i32 %e1, i32 1
56 %v2 = insertelement <8 x i32> %v1, i32 %e2, i32 2
57 %v3 = insertelement <8 x i32> %v2, i32 %e3, i32 3
58 %v4 = insertelement <8 x i32> %v3, i32 %e4, i32 4
59 %v5 = insertelement <8 x i32> %v4, i32 %e5, i32 5
60 %v6 = insertelement <8 x i32> %v5, i32 %e6, i32 6
61 %v7 = insertelement <8 x i32> %v6, i32 %e7, i32 7
66 define <4 x i32> @sub_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
67 ; CHECK-LABEL: sub_constant_rhs:
69 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
70 ; CHECK-NEXT: vslide1down.vx v8, v8, a0
71 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0)
72 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI2_0)
73 ; CHECK-NEXT: vle32.v v9, (a0)
74 ; CHECK-NEXT: vslide1down.vx v8, v8, a1
75 ; CHECK-NEXT: vslide1down.vx v8, v8, a2
76 ; CHECK-NEXT: vslide1down.vx v8, v8, a3
77 ; CHECK-NEXT: vsub.vv v8, v8, v9
82 %e3 = sub i32 %d, 2355
83 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
84 %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1
85 %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2
86 %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3
90 define <4 x i32> @mul_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
91 ; CHECK-LABEL: mul_constant_rhs:
93 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
94 ; CHECK-NEXT: vslide1down.vx v8, v8, a0
95 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0)
96 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI3_0)
97 ; CHECK-NEXT: vle32.v v9, (a0)
98 ; CHECK-NEXT: vslide1down.vx v8, v8, a1
99 ; CHECK-NEXT: vslide1down.vx v8, v8, a2
100 ; CHECK-NEXT: vslide1down.vx v8, v8, a3
101 ; CHECK-NEXT: vmul.vv v8, v8, v9
106 %e3 = mul i32 %d, 2355
107 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
108 %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1
109 %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2
110 %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3
114 define <4 x i32> @udiv_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
115 ; CHECK-LABEL: udiv_constant_rhs:
117 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
118 ; CHECK-NEXT: vslide1down.vx v8, v8, a0
119 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
120 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_0)
121 ; CHECK-NEXT: vle32.v v9, (a0)
122 ; CHECK-NEXT: vslide1down.vx v8, v8, a1
123 ; CHECK-NEXT: vslide1down.vx v8, v8, a2
124 ; CHECK-NEXT: vslide1down.vx v8, v8, a3
125 ; CHECK-NEXT: vmulhu.vv v9, v8, v9
126 ; CHECK-NEXT: vsub.vv v10, v8, v9
127 ; CHECK-NEXT: vmv.v.i v11, 0
128 ; CHECK-NEXT: lui a0, 524288
129 ; CHECK-NEXT: vslide1down.vx v11, v11, a0
130 ; CHECK-NEXT: lui a0, %hi(.LCPI4_1)
131 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_1)
132 ; CHECK-NEXT: vle32.v v12, (a0)
133 ; CHECK-NEXT: vmulhu.vv v10, v10, v11
134 ; CHECK-NEXT: vadd.vv v9, v10, v9
135 ; CHECK-NEXT: vmv.v.i v0, 4
136 ; CHECK-NEXT: vsrl.vv v9, v9, v12
137 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
139 %e0 = udiv i32 %a, 23
140 %e1 = udiv i32 %b, 25
142 %e3 = udiv i32 %d, 235
143 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
144 %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1
145 %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2
146 %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3
151 define <4 x float> @fadd_constant_rhs(float %a, float %b, float %c, float %d) {
152 ; CHECK-LABEL: fadd_constant_rhs:
154 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
155 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
156 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
157 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0)
158 ; CHECK-NEXT: vle32.v v9, (a0)
159 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
160 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
161 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
162 ; CHECK-NEXT: vfadd.vv v8, v8, v9
164 %e0 = fadd float %a, 23.0
165 %e1 = fadd float %b, 25.0
166 %e2 = fadd float %c, 2.0
167 %e3 = fadd float %d, 23.0
168 %v0 = insertelement <4 x float> poison, float %e0, i32 0
169 %v1 = insertelement <4 x float> %v0, float %e1, i32 1
170 %v2 = insertelement <4 x float> %v1, float %e2, i32 2
171 %v3 = insertelement <4 x float> %v2, float %e3, i32 3
175 define <4 x float> @fdiv_constant_rhs(float %a, float %b, float %c, float %d) {
176 ; CHECK-LABEL: fdiv_constant_rhs:
178 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
179 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
180 ; CHECK-NEXT: lui a0, %hi(.LCPI6_0)
181 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0)
182 ; CHECK-NEXT: vle32.v v9, (a0)
183 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
184 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
185 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
186 ; CHECK-NEXT: vfdiv.vv v8, v8, v9
188 %e0 = fdiv float %a, 23.0
189 %e1 = fdiv float %b, 25.0
190 %e2 = fdiv float %c, 10.0
191 %e3 = fdiv float %d, 23.0
192 %v0 = insertelement <4 x float> poison, float %e0, i32 0
193 %v1 = insertelement <4 x float> %v0, float %e1, i32 1
194 %v2 = insertelement <4 x float> %v1, float %e2, i32 2
195 %v3 = insertelement <4 x float> %v2, float %e3, i32 3
199 define <4 x i32> @add_constant_rhs_splat(i32 %a, i32 %b, i32 %c, i32 %d) {
200 ; CHECK-LABEL: add_constant_rhs_splat:
202 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
203 ; CHECK-NEXT: vslide1down.vx v8, v8, a0
204 ; CHECK-NEXT: vslide1down.vx v8, v8, a1
205 ; CHECK-NEXT: vslide1down.vx v8, v8, a2
206 ; CHECK-NEXT: vslide1down.vx v8, v8, a3
207 ; CHECK-NEXT: li a0, 23
208 ; CHECK-NEXT: vadd.vx v8, v8, a0
214 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
215 %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1
216 %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2
217 %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3
221 define <4 x i32> @add_constant_rhs_with_identity(i32 %a, i32 %b, i32 %c, i32 %d) {
222 ; RV32-LABEL: add_constant_rhs_with_identity:
224 ; RV32-NEXT: addi a1, a1, 25
225 ; RV32-NEXT: addi a2, a2, 1
226 ; RV32-NEXT: addi a3, a3, 2047
227 ; RV32-NEXT: addi a3, a3, 308
228 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
229 ; RV32-NEXT: vslide1down.vx v8, v8, a0
230 ; RV32-NEXT: vslide1down.vx v8, v8, a1
231 ; RV32-NEXT: vslide1down.vx v8, v8, a2
232 ; RV32-NEXT: vslide1down.vx v8, v8, a3
235 ; RV64-LABEL: add_constant_rhs_with_identity:
237 ; RV64-NEXT: addiw a1, a1, 25
238 ; RV64-NEXT: addiw a2, a2, 1
239 ; RV64-NEXT: addi a3, a3, 2047
240 ; RV64-NEXT: addiw a3, a3, 308
241 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
242 ; RV64-NEXT: vslide1down.vx v8, v8, a0
243 ; RV64-NEXT: vslide1down.vx v8, v8, a1
244 ; RV64-NEXT: vslide1down.vx v8, v8, a2
245 ; RV64-NEXT: vslide1down.vx v8, v8, a3
250 %e3 = add i32 %d, 2355
251 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
252 %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1
253 %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2
254 %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3
258 define <4 x i32> @add_constant_rhs_identity(i32 %a, i32 %b, i32 %c, i32 %d) {
259 ; RV32-LABEL: add_constant_rhs_identity:
261 ; RV32-NEXT: addi a1, a1, 25
262 ; RV32-NEXT: addi a2, a2, 1
263 ; RV32-NEXT: addi a3, a3, 2047
264 ; RV32-NEXT: addi a3, a3, 308
265 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
266 ; RV32-NEXT: vslide1down.vx v8, v8, a0
267 ; RV32-NEXT: vslide1down.vx v8, v8, a1
268 ; RV32-NEXT: vslide1down.vx v8, v8, a2
269 ; RV32-NEXT: vslide1down.vx v8, v8, a3
272 ; RV64-LABEL: add_constant_rhs_identity:
274 ; RV64-NEXT: addiw a1, a1, 25
275 ; RV64-NEXT: addiw a2, a2, 1
276 ; RV64-NEXT: addi a3, a3, 2047
277 ; RV64-NEXT: addiw a3, a3, 308
278 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
279 ; RV64-NEXT: vslide1down.vx v8, v8, a0
280 ; RV64-NEXT: vslide1down.vx v8, v8, a1
281 ; RV64-NEXT: vslide1down.vx v8, v8, a2
282 ; RV64-NEXT: vslide1down.vx v8, v8, a3
287 %e3 = add i32 %d, 2355
288 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
289 %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1
290 %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2
291 %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3
295 define <4 x i32> @add_constant_rhs_identity2(i32 %a, i32 %b, i32 %c, i32 %d) {
296 ; RV32-LABEL: add_constant_rhs_identity2:
298 ; RV32-NEXT: addi a0, a0, 23
299 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
300 ; RV32-NEXT: vslide1down.vx v8, v8, a0
301 ; RV32-NEXT: vslide1down.vx v8, v8, a1
302 ; RV32-NEXT: vslide1down.vx v8, v8, a2
303 ; RV32-NEXT: vslide1down.vx v8, v8, a3
306 ; RV64-LABEL: add_constant_rhs_identity2:
308 ; RV64-NEXT: addiw a0, a0, 23
309 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
310 ; RV64-NEXT: vslide1down.vx v8, v8, a0
311 ; RV64-NEXT: vslide1down.vx v8, v8, a1
312 ; RV64-NEXT: vslide1down.vx v8, v8, a2
313 ; RV64-NEXT: vslide1down.vx v8, v8, a3
316 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
317 %v1 = insertelement <4 x i32> %v0, i32 %b, i32 1
318 %v2 = insertelement <4 x i32> %v1, i32 %c, i32 2
319 %v3 = insertelement <4 x i32> %v2, i32 %d, i32 3
323 define <4 x i32> @add_constant_rhs_inverse(i32 %a, i32 %b, i32 %c, i32 %d) {
324 ; CHECK-LABEL: add_constant_rhs_inverse:
326 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
327 ; CHECK-NEXT: vslide1down.vx v8, v8, a0
328 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0)
329 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI11_0)
330 ; CHECK-NEXT: vle32.v v9, (a0)
331 ; CHECK-NEXT: vslide1down.vx v8, v8, a1
332 ; CHECK-NEXT: vslide1down.vx v8, v8, a2
333 ; CHECK-NEXT: vslide1down.vx v8, v8, a3
334 ; CHECK-NEXT: vadd.vv v8, v8, v9
339 %e3 = add i32 %d, 2355
340 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
341 %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1
342 %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2
343 %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3
347 define <4 x i32> @add_constant_rhs_commute(i32 %a, i32 %b, i32 %c, i32 %d) {
348 ; CHECK-LABEL: add_constant_rhs_commute:
350 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
351 ; CHECK-NEXT: vslide1down.vx v8, v8, a0
352 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0)
353 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI12_0)
354 ; CHECK-NEXT: vle32.v v9, (a0)
355 ; CHECK-NEXT: vslide1down.vx v8, v8, a1
356 ; CHECK-NEXT: vslide1down.vx v8, v8, a2
357 ; CHECK-NEXT: vslide1down.vx v8, v8, a3
358 ; CHECK-NEXT: vadd.vv v8, v8, v9
363 %e3 = add i32 %d, 2355
364 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
365 %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1
366 %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2
367 %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3
372 define <4 x i32> @add_general_rhs(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) {
373 ; RV32-LABEL: add_general_rhs:
375 ; RV32-NEXT: add a0, a0, a4
376 ; RV32-NEXT: add a1, a1, a5
377 ; RV32-NEXT: add a2, a2, a6
378 ; RV32-NEXT: add a3, a3, a7
379 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
380 ; RV32-NEXT: vslide1down.vx v8, v8, a0
381 ; RV32-NEXT: vslide1down.vx v8, v8, a1
382 ; RV32-NEXT: vslide1down.vx v8, v8, a2
383 ; RV32-NEXT: vslide1down.vx v8, v8, a3
386 ; RV64-LABEL: add_general_rhs:
388 ; RV64-NEXT: addw a0, a0, a4
389 ; RV64-NEXT: addw a1, a1, a5
390 ; RV64-NEXT: addw a2, a2, a6
391 ; RV64-NEXT: addw a3, a3, a7
392 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
393 ; RV64-NEXT: vslide1down.vx v8, v8, a0
394 ; RV64-NEXT: vslide1down.vx v8, v8, a1
395 ; RV64-NEXT: vslide1down.vx v8, v8, a2
396 ; RV64-NEXT: vslide1down.vx v8, v8, a3
402 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
403 %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1
404 %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2
405 %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3
409 define <4 x i32> @add_general_splat(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
410 ; RV32-LABEL: add_general_splat:
412 ; RV32-NEXT: add a0, a0, a4
413 ; RV32-NEXT: add a1, a1, a4
414 ; RV32-NEXT: add a2, a2, a4
415 ; RV32-NEXT: add a3, a3, a4
416 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
417 ; RV32-NEXT: vslide1down.vx v8, v8, a0
418 ; RV32-NEXT: vslide1down.vx v8, v8, a1
419 ; RV32-NEXT: vslide1down.vx v8, v8, a2
420 ; RV32-NEXT: vslide1down.vx v8, v8, a3
423 ; RV64-LABEL: add_general_splat:
425 ; RV64-NEXT: addw a0, a0, a4
426 ; RV64-NEXT: addw a1, a1, a4
427 ; RV64-NEXT: addw a2, a2, a4
428 ; RV64-NEXT: addw a3, a3, a4
429 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
430 ; RV64-NEXT: vslide1down.vx v8, v8, a0
431 ; RV64-NEXT: vslide1down.vx v8, v8, a1
432 ; RV64-NEXT: vslide1down.vx v8, v8, a2
433 ; RV64-NEXT: vslide1down.vx v8, v8, a3
439 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
440 %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1
441 %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2
442 %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3
446 ; This test previously failed with an assertion failure because constant shift
447 ; amounts are type legalized early.
448 define void @buggy(i32 %0) #0 {
450 %mul.us.us.i.3 = shl i32 %0, 1
451 %1 = insertelement <4 x i32> zeroinitializer, i32 %mul.us.us.i.3, i64 0
452 %2 = or <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
453 %3 = shufflevector <4 x i32> %2, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
454 store <4 x i32> %3, ptr null, align 16