1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v < %s | FileCheck --check-prefix=RV32 %s
3 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v < %s | FileCheck --check-prefix=RV64 %s
5 ; FIXME: We can rematerialize "addi s0, a2, 32" (ideally along the edge
6 ; %do.call -> %exit), and shrink wrap this routine
7 define void @vecaddr_straightline(i32 zeroext %a, ptr %p) {
8 ; RV32-LABEL: vecaddr_straightline:
10 ; RV32-NEXT: addi sp, sp, -16
11 ; RV32-NEXT: .cfi_def_cfa_offset 16
12 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
13 ; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
14 ; RV32-NEXT: .cfi_offset ra, -4
15 ; RV32-NEXT: .cfi_offset s0, -8
16 ; RV32-NEXT: addi s0, a1, 32
17 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
18 ; RV32-NEXT: vle32.v v8, (s0)
19 ; RV32-NEXT: vadd.vi v8, v8, 1
20 ; RV32-NEXT: li a1, 57
21 ; RV32-NEXT: vse32.v v8, (s0)
22 ; RV32-NEXT: beq a0, a1, .LBB0_2
23 ; RV32-NEXT: # %bb.1: # %do_call
25 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
26 ; RV32-NEXT: .LBB0_2: # %exit
27 ; RV32-NEXT: vle32.v v8, (s0)
28 ; RV32-NEXT: vadd.vi v8, v8, 1
29 ; RV32-NEXT: vse32.v v8, (s0)
30 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
31 ; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
32 ; RV32-NEXT: .cfi_restore ra
33 ; RV32-NEXT: .cfi_restore s0
34 ; RV32-NEXT: addi sp, sp, 16
35 ; RV32-NEXT: .cfi_def_cfa_offset 0
38 ; RV64-LABEL: vecaddr_straightline:
40 ; RV64-NEXT: addi sp, sp, -16
41 ; RV64-NEXT: .cfi_def_cfa_offset 16
42 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
43 ; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
44 ; RV64-NEXT: .cfi_offset ra, -8
45 ; RV64-NEXT: .cfi_offset s0, -16
46 ; RV64-NEXT: addi s0, a1, 32
47 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
48 ; RV64-NEXT: vle32.v v8, (s0)
49 ; RV64-NEXT: vadd.vi v8, v8, 1
50 ; RV64-NEXT: li a1, 57
51 ; RV64-NEXT: vse32.v v8, (s0)
52 ; RV64-NEXT: beq a0, a1, .LBB0_2
53 ; RV64-NEXT: # %bb.1: # %do_call
55 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
56 ; RV64-NEXT: .LBB0_2: # %exit
57 ; RV64-NEXT: vle32.v v8, (s0)
58 ; RV64-NEXT: vadd.vi v8, v8, 1
59 ; RV64-NEXT: vse32.v v8, (s0)
60 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
61 ; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
62 ; RV64-NEXT: .cfi_restore ra
63 ; RV64-NEXT: .cfi_restore s0
64 ; RV64-NEXT: addi sp, sp, 16
65 ; RV64-NEXT: .cfi_def_cfa_offset 0
67 %gep = getelementptr i8, ptr %p, i32 32
68 %v1 = load <4 x i32>, ptr %gep
69 %v2 = add <4 x i32> %v1, splat (i32 1)
70 store <4 x i32> %v2, ptr %gep
71 %cmp0 = icmp eq i32 %a, 57
72 br i1 %cmp0, label %exit, label %do_call
77 %v3 = load <4 x i32>, ptr %gep
78 %v4 = add <4 x i32> %v3, splat (i32 1)
79 store <4 x i32> %v4, ptr %gep
83 ; In this case, the second use is in a loop, so using a callee
84 ; saved register to avoid a remat is the profitable choice.
85 ; FIXME: We can shrink wrap the frame setup around the loop
86 ; and avoid it along the %bb.0 -> %exit edge
87 define void @vecaddr_loop(i32 zeroext %a, ptr %p) {
88 ; RV32-LABEL: vecaddr_loop:
90 ; RV32-NEXT: addi sp, sp, -16
91 ; RV32-NEXT: .cfi_def_cfa_offset 16
92 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
93 ; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
94 ; RV32-NEXT: .cfi_offset ra, -4
95 ; RV32-NEXT: .cfi_offset s0, -8
96 ; RV32-NEXT: addi s0, a1, 32
97 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
98 ; RV32-NEXT: vle32.v v8, (s0)
99 ; RV32-NEXT: vadd.vi v8, v8, 1
100 ; RV32-NEXT: li a1, 57
101 ; RV32-NEXT: vse32.v v8, (s0)
102 ; RV32-NEXT: beq a0, a1, .LBB1_2
103 ; RV32-NEXT: .LBB1_1: # %do_call
104 ; RV32-NEXT: # =>This Inner Loop Header: Depth=1
105 ; RV32-NEXT: call foo
106 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
107 ; RV32-NEXT: vle32.v v8, (s0)
108 ; RV32-NEXT: vadd.vi v8, v8, 1
109 ; RV32-NEXT: vse32.v v8, (s0)
110 ; RV32-NEXT: bnez a0, .LBB1_1
111 ; RV32-NEXT: .LBB1_2: # %exit
112 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
113 ; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
114 ; RV32-NEXT: .cfi_restore ra
115 ; RV32-NEXT: .cfi_restore s0
116 ; RV32-NEXT: addi sp, sp, 16
117 ; RV32-NEXT: .cfi_def_cfa_offset 0
120 ; RV64-LABEL: vecaddr_loop:
122 ; RV64-NEXT: addi sp, sp, -16
123 ; RV64-NEXT: .cfi_def_cfa_offset 16
124 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
125 ; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
126 ; RV64-NEXT: .cfi_offset ra, -8
127 ; RV64-NEXT: .cfi_offset s0, -16
128 ; RV64-NEXT: addi s0, a1, 32
129 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
130 ; RV64-NEXT: vle32.v v8, (s0)
131 ; RV64-NEXT: vadd.vi v8, v8, 1
132 ; RV64-NEXT: li a1, 57
133 ; RV64-NEXT: vse32.v v8, (s0)
134 ; RV64-NEXT: beq a0, a1, .LBB1_2
135 ; RV64-NEXT: .LBB1_1: # %do_call
136 ; RV64-NEXT: # =>This Inner Loop Header: Depth=1
137 ; RV64-NEXT: call foo
138 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
139 ; RV64-NEXT: vle32.v v8, (s0)
140 ; RV64-NEXT: vadd.vi v8, v8, 1
141 ; RV64-NEXT: vse32.v v8, (s0)
142 ; RV64-NEXT: bnez a0, .LBB1_1
143 ; RV64-NEXT: .LBB1_2: # %exit
144 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
145 ; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
146 ; RV64-NEXT: .cfi_restore ra
147 ; RV64-NEXT: .cfi_restore s0
148 ; RV64-NEXT: addi sp, sp, 16
149 ; RV64-NEXT: .cfi_def_cfa_offset 0
151 %gep = getelementptr i8, ptr %p, i32 32
152 %v1 = load <4 x i32>, ptr %gep
153 %v2 = add <4 x i32> %v1, splat (i32 1)
154 store <4 x i32> %v2, ptr %gep
155 %cmp0 = icmp eq i32 %a, 57
156 br i1 %cmp0, label %exit, label %do_call
159 %v3 = load <4 x i32>, ptr %gep
160 %v4 = add <4 x i32> %v3, splat (i32 1)
161 store <4 x i32> %v4, ptr %gep
163 %cmp1 = icmp eq i32 %b, 0
164 br i1 %cmp1, label %exit, label %do_call
169 declare zeroext i32 @foo()