1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
3 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
4 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
5 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
6 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
7 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
9 define void @sext_v4i8_v4i32(ptr %x, ptr %z) {
10 ; CHECK-LABEL: sext_v4i8_v4i32:
12 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
13 ; CHECK-NEXT: vle8.v v8, (a0)
14 ; CHECK-NEXT: vsext.vf4 v9, v8
15 ; CHECK-NEXT: vse32.v v9, (a1)
17 %a = load <4 x i8>, ptr %x
18 %b = sext <4 x i8> %a to <4 x i32>
19 store <4 x i32> %b, ptr %z
23 define void @zext_v4i8_v4i32(ptr %x, ptr %z) {
24 ; CHECK-LABEL: zext_v4i8_v4i32:
26 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
27 ; CHECK-NEXT: vle8.v v8, (a0)
28 ; CHECK-NEXT: vzext.vf4 v9, v8
29 ; CHECK-NEXT: vse32.v v9, (a1)
31 %a = load <4 x i8>, ptr %x
32 %b = zext <4 x i8> %a to <4 x i32>
33 store <4 x i32> %b, ptr %z
37 define void @sext_v8i8_v8i32(ptr %x, ptr %z) {
38 ; LMULMAX8-LABEL: sext_v8i8_v8i32:
40 ; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma
41 ; LMULMAX8-NEXT: vle8.v v8, (a0)
42 ; LMULMAX8-NEXT: vsext.vf4 v10, v8
43 ; LMULMAX8-NEXT: vse32.v v10, (a1)
46 ; LMULMAX2-LABEL: sext_v8i8_v8i32:
48 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
49 ; LMULMAX2-NEXT: vle8.v v8, (a0)
50 ; LMULMAX2-NEXT: vsext.vf4 v10, v8
51 ; LMULMAX2-NEXT: vse32.v v10, (a1)
54 ; LMULMAX1-LABEL: sext_v8i8_v8i32:
56 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
57 ; LMULMAX1-NEXT: vle8.v v8, (a0)
58 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
59 ; LMULMAX1-NEXT: vsext.vf4 v9, v8
60 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
61 ; LMULMAX1-NEXT: vslidedown.vi v8, v8, 4
62 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
63 ; LMULMAX1-NEXT: vsext.vf4 v10, v8
64 ; LMULMAX1-NEXT: addi a0, a1, 16
65 ; LMULMAX1-NEXT: vse32.v v10, (a0)
66 ; LMULMAX1-NEXT: vse32.v v9, (a1)
68 %a = load <8 x i8>, ptr %x
69 %b = sext <8 x i8> %a to <8 x i32>
70 store <8 x i32> %b, ptr %z
74 define void @sext_v32i8_v32i32(ptr %x, ptr %z) {
75 ; LMULMAX8-LABEL: sext_v32i8_v32i32:
77 ; LMULMAX8-NEXT: li a2, 32
78 ; LMULMAX8-NEXT: vsetvli zero, a2, e32, m8, ta, ma
79 ; LMULMAX8-NEXT: vle8.v v8, (a0)
80 ; LMULMAX8-NEXT: vsext.vf4 v16, v8
81 ; LMULMAX8-NEXT: vse32.v v16, (a1)
84 ; LMULMAX2-LABEL: sext_v32i8_v32i32:
86 ; LMULMAX2-NEXT: li a2, 32
87 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma
88 ; LMULMAX2-NEXT: vle8.v v8, (a0)
89 ; LMULMAX2-NEXT: vsetivli zero, 8, e8, m1, ta, ma
90 ; LMULMAX2-NEXT: vslidedown.vi v10, v8, 8
91 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
92 ; LMULMAX2-NEXT: vsext.vf4 v12, v10
93 ; LMULMAX2-NEXT: vsext.vf4 v10, v8
94 ; LMULMAX2-NEXT: vsetivli zero, 16, e8, m2, ta, ma
95 ; LMULMAX2-NEXT: vslidedown.vi v8, v8, 16
96 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
97 ; LMULMAX2-NEXT: vsext.vf4 v14, v8
98 ; LMULMAX2-NEXT: vsetivli zero, 8, e8, m1, ta, ma
99 ; LMULMAX2-NEXT: vslidedown.vi v8, v8, 8
100 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
101 ; LMULMAX2-NEXT: vsext.vf4 v16, v8
102 ; LMULMAX2-NEXT: addi a0, a1, 96
103 ; LMULMAX2-NEXT: vse32.v v16, (a0)
104 ; LMULMAX2-NEXT: addi a0, a1, 64
105 ; LMULMAX2-NEXT: vse32.v v14, (a0)
106 ; LMULMAX2-NEXT: vse32.v v10, (a1)
107 ; LMULMAX2-NEXT: addi a0, a1, 32
108 ; LMULMAX2-NEXT: vse32.v v12, (a0)
111 ; LMULMAX1-LABEL: sext_v32i8_v32i32:
113 ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma
114 ; LMULMAX1-NEXT: addi a2, a0, 16
115 ; LMULMAX1-NEXT: vle8.v v8, (a2)
116 ; LMULMAX1-NEXT: vle8.v v9, (a0)
117 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
118 ; LMULMAX1-NEXT: vslidedown.vi v10, v8, 4
119 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
120 ; LMULMAX1-NEXT: vsext.vf4 v11, v10
121 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
122 ; LMULMAX1-NEXT: vslidedown.vi v10, v9, 4
123 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
124 ; LMULMAX1-NEXT: vsext.vf4 v12, v10
125 ; LMULMAX1-NEXT: vsext.vf4 v10, v8
126 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, ta, ma
127 ; LMULMAX1-NEXT: vslidedown.vi v8, v8, 8
128 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
129 ; LMULMAX1-NEXT: vsext.vf4 v13, v8
130 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
131 ; LMULMAX1-NEXT: vslidedown.vi v8, v8, 4
132 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
133 ; LMULMAX1-NEXT: vsext.vf4 v14, v8
134 ; LMULMAX1-NEXT: vsext.vf4 v8, v9
135 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, ta, ma
136 ; LMULMAX1-NEXT: vslidedown.vi v9, v9, 8
137 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
138 ; LMULMAX1-NEXT: vsext.vf4 v15, v9
139 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
140 ; LMULMAX1-NEXT: vslidedown.vi v9, v9, 4
141 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
142 ; LMULMAX1-NEXT: vsext.vf4 v16, v9
143 ; LMULMAX1-NEXT: addi a0, a1, 48
144 ; LMULMAX1-NEXT: vse32.v v16, (a0)
145 ; LMULMAX1-NEXT: addi a0, a1, 32
146 ; LMULMAX1-NEXT: vse32.v v15, (a0)
147 ; LMULMAX1-NEXT: vse32.v v8, (a1)
148 ; LMULMAX1-NEXT: addi a0, a1, 112
149 ; LMULMAX1-NEXT: vse32.v v14, (a0)
150 ; LMULMAX1-NEXT: addi a0, a1, 96
151 ; LMULMAX1-NEXT: vse32.v v13, (a0)
152 ; LMULMAX1-NEXT: addi a0, a1, 64
153 ; LMULMAX1-NEXT: vse32.v v10, (a0)
154 ; LMULMAX1-NEXT: addi a0, a1, 16
155 ; LMULMAX1-NEXT: vse32.v v12, (a0)
156 ; LMULMAX1-NEXT: addi a0, a1, 80
157 ; LMULMAX1-NEXT: vse32.v v11, (a0)
159 %a = load <32 x i8>, ptr %x
160 %b = sext <32 x i8> %a to <32 x i32>
161 store <32 x i32> %b, ptr %z
165 define void @trunc_v4i8_v4i32(ptr %x, ptr %z) {
166 ; CHECK-LABEL: trunc_v4i8_v4i32:
168 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
169 ; CHECK-NEXT: vle32.v v8, (a0)
170 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
171 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
172 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
173 ; CHECK-NEXT: vse8.v v8, (a1)
175 %a = load <4 x i32>, ptr %x
176 %b = trunc <4 x i32> %a to <4 x i8>
177 store <4 x i8> %b, ptr %z
181 define void @trunc_v8i8_v8i32(ptr %x, ptr %z) {
182 ; LMULMAX8-LABEL: trunc_v8i8_v8i32:
184 ; LMULMAX8-NEXT: vsetivli zero, 8, e16, m1, ta, ma
185 ; LMULMAX8-NEXT: vle32.v v8, (a0)
186 ; LMULMAX8-NEXT: vnsrl.wi v10, v8, 0
187 ; LMULMAX8-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
188 ; LMULMAX8-NEXT: vnsrl.wi v8, v10, 0
189 ; LMULMAX8-NEXT: vse8.v v8, (a1)
192 ; LMULMAX2-LABEL: trunc_v8i8_v8i32:
194 ; LMULMAX2-NEXT: vsetivli zero, 8, e16, m1, ta, ma
195 ; LMULMAX2-NEXT: vle32.v v8, (a0)
196 ; LMULMAX2-NEXT: vnsrl.wi v10, v8, 0
197 ; LMULMAX2-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
198 ; LMULMAX2-NEXT: vnsrl.wi v8, v10, 0
199 ; LMULMAX2-NEXT: vse8.v v8, (a1)
202 ; LMULMAX1-LABEL: trunc_v8i8_v8i32:
204 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
205 ; LMULMAX1-NEXT: vle32.v v8, (a0)
206 ; LMULMAX1-NEXT: addi a0, a0, 16
207 ; LMULMAX1-NEXT: vle32.v v9, (a0)
208 ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
209 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
210 ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
211 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
212 ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
213 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
214 ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
215 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
216 ; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
217 ; LMULMAX1-NEXT: vse8.v v8, (a1)
219 %a = load <8 x i32>, ptr %x
220 %b = trunc <8 x i32> %a to <8 x i8>
221 store <8 x i8> %b, ptr %z