1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
4 declare <vscale x 4 x i16> @llvm.smax.v4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
5 declare <vscale x 4 x i16> @llvm.smin.v4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
6 declare <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
7 declare <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
8 declare <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
9 declare <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
11 declare <vscale x 4 x i16> @llvm.umin.v4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
12 declare <vscale x 4 x i32> @llvm.umin.v4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
13 declare <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
15 define void @trunc_sat_i8i16_maxmin(ptr %x, ptr %y) {
16 ; CHECK-LABEL: trunc_sat_i8i16_maxmin:
18 ; CHECK-NEXT: vl1re16.v v8, (a0)
19 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
20 ; CHECK-NEXT: vnclip.wi v8, v8, 0
21 ; CHECK-NEXT: vse8.v v8, (a1)
23 %1 = load <vscale x 4 x i16>, ptr %x, align 16
24 %2 = tail call <vscale x 4 x i16> @llvm.smax.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 -128))
25 %3 = tail call <vscale x 4 x i16> @llvm.smin.v4i16(<vscale x 4 x i16> %2, <vscale x 4 x i16> splat (i16 127))
26 %4 = trunc <vscale x 4 x i16> %3 to <vscale x 4 x i8>
27 store <vscale x 4 x i8> %4, ptr %y, align 8
31 define void @trunc_sat_i8i16_minmax(ptr %x, ptr %y) {
32 ; CHECK-LABEL: trunc_sat_i8i16_minmax:
34 ; CHECK-NEXT: vl1re16.v v8, (a0)
35 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
36 ; CHECK-NEXT: vnclip.wi v8, v8, 0
37 ; CHECK-NEXT: vse8.v v8, (a1)
39 %1 = load <vscale x 4 x i16>, ptr %x, align 16
40 %2 = tail call <vscale x 4 x i16> @llvm.smin.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 127))
41 %3 = tail call <vscale x 4 x i16> @llvm.smax.v4i16(<vscale x 4 x i16> %2, <vscale x 4 x i16> splat (i16 -128))
42 %4 = trunc <vscale x 4 x i16> %3 to <vscale x 4 x i8>
43 store <vscale x 4 x i8> %4, ptr %y, align 8
47 define void @trunc_sat_i8i16_notopt(ptr %x, ptr %y) {
48 ; CHECK-LABEL: trunc_sat_i8i16_notopt:
50 ; CHECK-NEXT: vl1re16.v v8, (a0)
51 ; CHECK-NEXT: li a0, -127
52 ; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
53 ; CHECK-NEXT: vmax.vx v8, v8, a0
54 ; CHECK-NEXT: li a0, 128
55 ; CHECK-NEXT: vmin.vx v8, v8, a0
56 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
57 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
58 ; CHECK-NEXT: vse8.v v8, (a1)
60 %1 = load <vscale x 4 x i16>, ptr %x, align 16
61 %2 = tail call <vscale x 4 x i16> @llvm.smax.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 -127))
62 %3 = tail call <vscale x 4 x i16> @llvm.smin.v4i16(<vscale x 4 x i16> %2, <vscale x 4 x i16> splat (i16 128))
63 %4 = trunc <vscale x 4 x i16> %3 to <vscale x 4 x i8>
64 store <vscale x 4 x i8> %4, ptr %y, align 8
68 define void @trunc_sat_u8u16_min(ptr %x, ptr %y) {
69 ; CHECK-LABEL: trunc_sat_u8u16_min:
71 ; CHECK-NEXT: vl1re16.v v8, (a0)
72 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
73 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
74 ; CHECK-NEXT: vse8.v v8, (a1)
76 %1 = load <vscale x 4 x i16>, ptr %x, align 16
77 %2 = tail call <vscale x 4 x i16> @llvm.umin.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 255))
78 %3 = trunc <vscale x 4 x i16> %2 to <vscale x 4 x i8>
79 store <vscale x 4 x i8> %3, ptr %y, align 8
83 define void @trunc_sat_u8u16_notopt(ptr %x, ptr %y) {
84 ; CHECK-LABEL: trunc_sat_u8u16_notopt:
86 ; CHECK-NEXT: vl1re16.v v8, (a0)
87 ; CHECK-NEXT: li a0, 127
88 ; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
89 ; CHECK-NEXT: vminu.vx v8, v8, a0
90 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
91 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
92 ; CHECK-NEXT: vse8.v v8, (a1)
94 %1 = load <vscale x 4 x i16>, ptr %x, align 16
95 %2 = tail call <vscale x 4 x i16> @llvm.umin.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 127))
96 %3 = trunc <vscale x 4 x i16> %2 to <vscale x 4 x i8>
97 store <vscale x 4 x i8> %3, ptr %y, align 8
101 define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
102 ; CHECK-LABEL: trunc_sat_u8u16_maxmin:
104 ; CHECK-NEXT: vl1re16.v v8, (a0)
105 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
106 ; CHECK-NEXT: vmax.vx v8, v8, zero
107 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
108 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
109 ; CHECK-NEXT: vse8.v v8, (a1)
111 %1 = load <vscale x 4 x i16>, ptr %x, align 16
112 %2 = tail call <vscale x 4 x i16> @llvm.smax.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 0))
113 %3 = tail call <vscale x 4 x i16> @llvm.smin.v4i16(<vscale x 4 x i16> %2, <vscale x 4 x i16> splat (i16 255))
114 %4 = trunc <vscale x 4 x i16> %3 to <vscale x 4 x i8>
115 store <vscale x 4 x i8> %4, ptr %y, align 8
119 define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
120 ; CHECK-LABEL: trunc_sat_u8u16_minmax:
122 ; CHECK-NEXT: vl1re16.v v8, (a0)
123 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
124 ; CHECK-NEXT: vmax.vx v8, v8, zero
125 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
126 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
127 ; CHECK-NEXT: vse8.v v8, (a1)
129 %1 = load <vscale x 4 x i16>, ptr %x, align 16
130 %2 = tail call <vscale x 4 x i16> @llvm.smin.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 255))
131 %3 = tail call <vscale x 4 x i16> @llvm.smax.v4i16(<vscale x 4 x i16> %2, <vscale x 4 x i16> splat (i16 0))
132 %4 = trunc <vscale x 4 x i16> %3 to <vscale x 4 x i8>
133 store <vscale x 4 x i8> %4, ptr %y, align 8
138 define void @trunc_sat_i16i32_notopt(ptr %x, ptr %y) {
139 ; CHECK-LABEL: trunc_sat_i16i32_notopt:
141 ; CHECK-NEXT: vl2re32.v v8, (a0)
142 ; CHECK-NEXT: lui a0, 1048568
143 ; CHECK-NEXT: addi a0, a0, 1
144 ; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
145 ; CHECK-NEXT: vmax.vx v8, v8, a0
146 ; CHECK-NEXT: lui a0, 8
147 ; CHECK-NEXT: vmin.vx v8, v8, a0
148 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
149 ; CHECK-NEXT: vnsrl.wi v10, v8, 0
150 ; CHECK-NEXT: vs1r.v v10, (a1)
152 %1 = load <vscale x 4 x i32>, ptr %x, align 32
153 %2 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 -32767))
154 %3 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 32768))
155 %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i16>
156 store <vscale x 4 x i16> %4, ptr %y, align 16
160 define void @trunc_sat_i16i32_maxmin(ptr %x, ptr %y) {
161 ; CHECK-LABEL: trunc_sat_i16i32_maxmin:
163 ; CHECK-NEXT: vl2re32.v v8, (a0)
164 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
165 ; CHECK-NEXT: vnclip.wi v10, v8, 0
166 ; CHECK-NEXT: vs1r.v v10, (a1)
168 %1 = load <vscale x 4 x i32>, ptr %x, align 32
169 %2 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 -32768))
170 %3 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 32767))
171 %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i16>
172 store <vscale x 4 x i16> %4, ptr %y, align 16
176 define void @trunc_sat_i16i32_minmax(ptr %x, ptr %y) {
177 ; CHECK-LABEL: trunc_sat_i16i32_minmax:
179 ; CHECK-NEXT: vl2re32.v v8, (a0)
180 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
181 ; CHECK-NEXT: vnclip.wi v10, v8, 0
182 ; CHECK-NEXT: vs1r.v v10, (a1)
184 %1 = load <vscale x 4 x i32>, ptr %x, align 32
185 %2 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 32767))
186 %3 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 -32768))
187 %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i16>
188 store <vscale x 4 x i16> %4, ptr %y, align 16
192 define void @trunc_sat_u16u32_notopt(ptr %x, ptr %y) {
193 ; CHECK-LABEL: trunc_sat_u16u32_notopt:
195 ; CHECK-NEXT: vl2re32.v v8, (a0)
196 ; CHECK-NEXT: lui a0, 8
197 ; CHECK-NEXT: addi a0, a0, -1
198 ; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
199 ; CHECK-NEXT: vminu.vx v8, v8, a0
200 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
201 ; CHECK-NEXT: vnsrl.wi v10, v8, 0
202 ; CHECK-NEXT: vs1r.v v10, (a1)
204 %1 = load <vscale x 4 x i32>, ptr %x, align 32
205 %2 = tail call <vscale x 4 x i32> @llvm.umin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 32767))
206 %3 = trunc <vscale x 4 x i32> %2 to <vscale x 4 x i16>
207 store <vscale x 4 x i16> %3, ptr %y, align 16
211 define void @trunc_sat_u16u32_min(ptr %x, ptr %y) {
212 ; CHECK-LABEL: trunc_sat_u16u32_min:
214 ; CHECK-NEXT: vl2re32.v v8, (a0)
215 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
216 ; CHECK-NEXT: vnclipu.wi v10, v8, 0
217 ; CHECK-NEXT: vs1r.v v10, (a1)
219 %1 = load <vscale x 4 x i32>, ptr %x, align 32
220 %2 = tail call <vscale x 4 x i32> @llvm.umin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 65535))
221 %3 = trunc <vscale x 4 x i32> %2 to <vscale x 4 x i16>
222 store <vscale x 4 x i16> %3, ptr %y, align 16
226 define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
227 ; CHECK-LABEL: trunc_sat_u16u32_maxmin:
229 ; CHECK-NEXT: vl2re32.v v8, (a0)
230 ; CHECK-NEXT: li a0, 1
231 ; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
232 ; CHECK-NEXT: vmax.vx v8, v8, a0
233 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
234 ; CHECK-NEXT: vnclipu.wi v10, v8, 0
235 ; CHECK-NEXT: vs1r.v v10, (a1)
237 %1 = load <vscale x 4 x i32>, ptr %x, align 16
238 %2 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 1))
239 %3 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 65535))
240 %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i16>
241 store <vscale x 4 x i16> %4, ptr %y, align 8
245 define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
246 ; CHECK-LABEL: trunc_sat_u16u32_minmax:
248 ; CHECK-NEXT: vl2re32.v v8, (a0)
249 ; CHECK-NEXT: li a0, 50
250 ; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
251 ; CHECK-NEXT: vmax.vx v8, v8, a0
252 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
253 ; CHECK-NEXT: vnclipu.wi v10, v8, 0
254 ; CHECK-NEXT: vs1r.v v10, (a1)
256 %1 = load <vscale x 4 x i32>, ptr %x, align 16
257 %2 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 65535))
258 %3 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 50))
259 %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i16>
260 store <vscale x 4 x i16> %4, ptr %y, align 8
265 define void @trunc_sat_i32i64_notopt(ptr %x, ptr %y) {
266 ; CHECK-LABEL: trunc_sat_i32i64_notopt:
268 ; CHECK-NEXT: vl4re64.v v8, (a0)
269 ; CHECK-NEXT: lui a0, 524288
270 ; CHECK-NEXT: addiw a0, a0, 1
271 ; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
272 ; CHECK-NEXT: vmax.vx v8, v8, a0
273 ; CHECK-NEXT: li a0, 1
274 ; CHECK-NEXT: slli a0, a0, 31
275 ; CHECK-NEXT: vmin.vx v8, v8, a0
276 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
277 ; CHECK-NEXT: vnsrl.wi v12, v8, 0
278 ; CHECK-NEXT: vs2r.v v12, (a1)
280 %1 = load <vscale x 4 x i64>, ptr %x, align 64
281 %2 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 -2147483647))
282 %3 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 2147483648))
283 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i32>
284 store <vscale x 4 x i32> %4, ptr %y, align 32
288 define void @trunc_sat_i32i64_maxmin(ptr %x, ptr %y) {
289 ; CHECK-LABEL: trunc_sat_i32i64_maxmin:
291 ; CHECK-NEXT: vl4re64.v v8, (a0)
292 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
293 ; CHECK-NEXT: vnclip.wi v12, v8, 0
294 ; CHECK-NEXT: vs2r.v v12, (a1)
296 %1 = load <vscale x 4 x i64>, ptr %x, align 64
297 %2 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 -2147483648))
298 %3 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 2147483647))
299 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i32>
300 store <vscale x 4 x i32> %4, ptr %y, align 32
304 define void @trunc_sat_i32i64_minmax(ptr %x, ptr %y) {
305 ; CHECK-LABEL: trunc_sat_i32i64_minmax:
307 ; CHECK-NEXT: vl4re64.v v8, (a0)
308 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
309 ; CHECK-NEXT: vnclip.wi v12, v8, 0
310 ; CHECK-NEXT: vs2r.v v12, (a1)
312 %1 = load <vscale x 4 x i64>, ptr %x, align 64
313 %2 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 2147483647))
314 %3 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 -2147483648))
315 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i32>
316 store <vscale x 4 x i32> %4, ptr %y, align 32
321 define void @trunc_sat_u32u64_notopt(ptr %x, ptr %y) {
322 ; CHECK-LABEL: trunc_sat_u32u64_notopt:
324 ; CHECK-NEXT: vl4re64.v v8, (a0)
325 ; CHECK-NEXT: lui a0, 524288
326 ; CHECK-NEXT: addiw a0, a0, -1
327 ; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
328 ; CHECK-NEXT: vminu.vx v8, v8, a0
329 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
330 ; CHECK-NEXT: vnsrl.wi v12, v8, 0
331 ; CHECK-NEXT: vs2r.v v12, (a1)
333 %1 = load <vscale x 4 x i64>, ptr %x, align 64
334 %2 = tail call <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 2147483647))
335 %3 = trunc <vscale x 4 x i64> %2 to <vscale x 4 x i32>
336 store <vscale x 4 x i32> %3, ptr %y, align 32
340 define void @trunc_sat_u32u64_min(ptr %x, ptr %y) {
341 ; CHECK-LABEL: trunc_sat_u32u64_min:
343 ; CHECK-NEXT: vl4re64.v v8, (a0)
344 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
345 ; CHECK-NEXT: vnclipu.wi v12, v8, 0
346 ; CHECK-NEXT: vs2r.v v12, (a1)
348 %1 = load <vscale x 4 x i64>, ptr %x, align 64
349 %2 = tail call <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 4294967295))
350 %3 = trunc <vscale x 4 x i64> %2 to <vscale x 4 x i32>
351 store <vscale x 4 x i32> %3, ptr %y, align 32
356 define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
357 ; CHECK-LABEL: trunc_sat_u32u64_maxmin:
359 ; CHECK-NEXT: vl4re64.v v8, (a0)
360 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
361 ; CHECK-NEXT: vmax.vx v8, v8, zero
362 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
363 ; CHECK-NEXT: vnclipu.wi v12, v8, 0
364 ; CHECK-NEXT: vs2r.v v12, (a1)
366 %1 = load <vscale x 4 x i64>, ptr %x, align 16
367 %2 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 0))
368 %3 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 4294967295))
369 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i32>
370 store <vscale x 4 x i32> %4, ptr %y, align 8
374 define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
375 ; CHECK-LABEL: trunc_sat_u32u64_minmax:
377 ; CHECK-NEXT: vl4re64.v v8, (a0)
378 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
379 ; CHECK-NEXT: vmax.vx v8, v8, zero
380 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
381 ; CHECK-NEXT: vnclipu.wi v12, v8, 0
382 ; CHECK-NEXT: vs2r.v v12, (a1)
384 %1 = load <vscale x 4 x i64>, ptr %x, align 16
385 %2 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 4294967295))
386 %3 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 0))
387 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i32>
388 store <vscale x 4 x i32> %4, ptr %y, align 8
392 define void @trunc_sat_i8i32_maxmin(ptr %x, ptr %y) {
393 ; CHECK-LABEL: trunc_sat_i8i32_maxmin:
395 ; CHECK-NEXT: vl2re32.v v8, (a0)
396 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
397 ; CHECK-NEXT: vnclip.wi v10, v8, 0
398 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
399 ; CHECK-NEXT: vnclip.wi v8, v10, 0
400 ; CHECK-NEXT: vse8.v v8, (a1)
402 %1 = load <vscale x 4 x i32>, ptr %x, align 16
403 %2 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 -128))
404 %3 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 127))
405 %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i8>
406 store <vscale x 4 x i8> %4, ptr %y, align 8
410 define void @trunc_sat_i8i32_minmax(ptr %x, ptr %y) {
411 ; CHECK-LABEL: trunc_sat_i8i32_minmax:
413 ; CHECK-NEXT: vl2re32.v v8, (a0)
414 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
415 ; CHECK-NEXT: vnclip.wi v10, v8, 0
416 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
417 ; CHECK-NEXT: vnclip.wi v8, v10, 0
418 ; CHECK-NEXT: vse8.v v8, (a1)
420 %1 = load <vscale x 4 x i32>, ptr %x, align 16
421 %2 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 127))
422 %3 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 -128))
423 %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i8>
424 store <vscale x 4 x i8> %4, ptr %y, align 8
428 define void @trunc_sat_u8u32_min(ptr %x, ptr %y) {
429 ; CHECK-LABEL: trunc_sat_u8u32_min:
431 ; CHECK-NEXT: vl2re32.v v8, (a0)
432 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
433 ; CHECK-NEXT: vnclipu.wi v10, v8, 0
434 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
435 ; CHECK-NEXT: vnclipu.wi v8, v10, 0
436 ; CHECK-NEXT: vse8.v v8, (a1)
438 %1 = load <vscale x 4 x i32>, ptr %x, align 16
439 %2 = tail call <vscale x 4 x i32> @llvm.umin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 255))
440 %3 = trunc <vscale x 4 x i32> %2 to <vscale x 4 x i8>
441 store <vscale x 4 x i8> %3, ptr %y, align 8
445 define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
446 ; CHECK-LABEL: trunc_sat_u8u32_maxmin:
448 ; CHECK-NEXT: vl2re32.v v8, (a0)
449 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
450 ; CHECK-NEXT: vmax.vx v8, v8, zero
451 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
452 ; CHECK-NEXT: vnclipu.wi v10, v8, 0
453 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
454 ; CHECK-NEXT: vnclipu.wi v8, v10, 0
455 ; CHECK-NEXT: vse8.v v8, (a1)
457 %1 = load <vscale x 4 x i32>, ptr %x, align 16
458 %2 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 0))
459 %3 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 255))
460 %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i8>
461 store <vscale x 4 x i8> %4, ptr %y, align 8
465 define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) {
466 ; CHECK-LABEL: trunc_sat_u8u32_minmax:
468 ; CHECK-NEXT: vl2re32.v v8, (a0)
469 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
470 ; CHECK-NEXT: vmax.vx v8, v8, zero
471 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
472 ; CHECK-NEXT: vnclipu.wi v10, v8, 0
473 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
474 ; CHECK-NEXT: vnclipu.wi v8, v10, 0
475 ; CHECK-NEXT: vse8.v v8, (a1)
477 %1 = load <vscale x 4 x i32>, ptr %x, align 16
478 %2 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 255))
479 %3 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 0))
480 %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i8>
481 store <vscale x 4 x i8> %4, ptr %y, align 8
485 define void @trunc_sat_i8i64_maxmin(ptr %x, ptr %y) {
486 ; CHECK-LABEL: trunc_sat_i8i64_maxmin:
488 ; CHECK-NEXT: vl4re64.v v8, (a0)
489 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
490 ; CHECK-NEXT: vnclip.wi v12, v8, 0
491 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
492 ; CHECK-NEXT: vnclip.wi v8, v12, 0
493 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
494 ; CHECK-NEXT: vnclip.wi v8, v8, 0
495 ; CHECK-NEXT: vse8.v v8, (a1)
497 %1 = load <vscale x 4 x i64>, ptr %x, align 16
498 %2 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 -128))
499 %3 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 127))
500 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i8>
501 store <vscale x 4 x i8> %4, ptr %y, align 8
505 define void @trunc_sat_i8i64_minmax(ptr %x, ptr %y) {
506 ; CHECK-LABEL: trunc_sat_i8i64_minmax:
508 ; CHECK-NEXT: vl4re64.v v8, (a0)
509 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
510 ; CHECK-NEXT: vnclip.wi v12, v8, 0
511 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
512 ; CHECK-NEXT: vnclip.wi v8, v12, 0
513 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
514 ; CHECK-NEXT: vnclip.wi v8, v8, 0
515 ; CHECK-NEXT: vse8.v v8, (a1)
517 %1 = load <vscale x 4 x i64>, ptr %x, align 16
518 %2 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 127))
519 %3 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 -128))
520 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i8>
521 store <vscale x 4 x i8> %4, ptr %y, align 8
525 define void @trunc_sat_u8u64_min(ptr %x, ptr %y) {
526 ; CHECK-LABEL: trunc_sat_u8u64_min:
528 ; CHECK-NEXT: vl4re64.v v8, (a0)
529 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
530 ; CHECK-NEXT: vnclipu.wi v12, v8, 0
531 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
532 ; CHECK-NEXT: vnclipu.wi v8, v12, 0
533 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
534 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
535 ; CHECK-NEXT: vse8.v v8, (a1)
537 %1 = load <vscale x 4 x i64>, ptr %x, align 16
538 %2 = tail call <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 255))
539 %3 = trunc <vscale x 4 x i64> %2 to <vscale x 4 x i8>
540 store <vscale x 4 x i8> %3, ptr %y, align 8
544 define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
545 ; CHECK-LABEL: trunc_sat_u8u64_maxmin:
547 ; CHECK-NEXT: vl4re64.v v8, (a0)
548 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
549 ; CHECK-NEXT: vmax.vx v8, v8, zero
550 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
551 ; CHECK-NEXT: vnclipu.wi v12, v8, 0
552 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
553 ; CHECK-NEXT: vnclipu.wi v8, v12, 0
554 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
555 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
556 ; CHECK-NEXT: vse8.v v8, (a1)
558 %1 = load <vscale x 4 x i64>, ptr %x, align 16
559 %2 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 0))
560 %3 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 255))
561 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i8>
562 store <vscale x 4 x i8> %4, ptr %y, align 8
566 define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) {
567 ; CHECK-LABEL: trunc_sat_u8u64_minmax:
569 ; CHECK-NEXT: vl4re64.v v8, (a0)
570 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
571 ; CHECK-NEXT: vmax.vx v8, v8, zero
572 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
573 ; CHECK-NEXT: vnclipu.wi v12, v8, 0
574 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
575 ; CHECK-NEXT: vnclipu.wi v8, v12, 0
576 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
577 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
578 ; CHECK-NEXT: vse8.v v8, (a1)
580 %1 = load <vscale x 4 x i64>, ptr %x, align 16
581 %2 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 255))
582 %3 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 0))
583 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i8>
584 store <vscale x 4 x i8> %4, ptr %y, align 8
588 define void @trunc_sat_i16i64_maxmin(ptr %x, ptr %y) {
589 ; CHECK-LABEL: trunc_sat_i16i64_maxmin:
591 ; CHECK-NEXT: vl4re64.v v8, (a0)
592 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
593 ; CHECK-NEXT: vnclip.wi v12, v8, 0
594 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
595 ; CHECK-NEXT: vnclip.wi v8, v12, 0
596 ; CHECK-NEXT: vs1r.v v8, (a1)
598 %1 = load <vscale x 4 x i64>, ptr %x, align 32
599 %2 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 -32768))
600 %3 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 32767))
601 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i16>
602 store <vscale x 4 x i16> %4, ptr %y, align 16
606 define void @trunc_sat_i16i64_minmax(ptr %x, ptr %y) {
607 ; CHECK-LABEL: trunc_sat_i16i64_minmax:
609 ; CHECK-NEXT: vl4re64.v v8, (a0)
610 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
611 ; CHECK-NEXT: vnclip.wi v12, v8, 0
612 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
613 ; CHECK-NEXT: vnclip.wi v8, v12, 0
614 ; CHECK-NEXT: vs1r.v v8, (a1)
616 %1 = load <vscale x 4 x i64>, ptr %x, align 32
617 %2 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 32767))
618 %3 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 -32768))
619 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i16>
620 store <vscale x 4 x i16> %4, ptr %y, align 16
624 define void @trunc_sat_u16u64_min(ptr %x, ptr %y) {
625 ; CHECK-LABEL: trunc_sat_u16u64_min:
627 ; CHECK-NEXT: vl4re64.v v8, (a0)
628 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
629 ; CHECK-NEXT: vnclipu.wi v12, v8, 0
630 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
631 ; CHECK-NEXT: vnclipu.wi v8, v12, 0
632 ; CHECK-NEXT: vs1r.v v8, (a1)
634 %1 = load <vscale x 4 x i64>, ptr %x, align 32
635 %2 = tail call <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 65535))
636 %3 = trunc <vscale x 4 x i64> %2 to <vscale x 4 x i16>
637 store <vscale x 4 x i16> %3, ptr %y, align 16
641 define void @trunc_sat_u16u64_maxmin(ptr %x, ptr %y) {
642 ; CHECK-LABEL: trunc_sat_u16u64_maxmin:
644 ; CHECK-NEXT: vl4re64.v v8, (a0)
645 ; CHECK-NEXT: li a0, 1
646 ; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
647 ; CHECK-NEXT: vmax.vx v8, v8, a0
648 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
649 ; CHECK-NEXT: vnclipu.wi v12, v8, 0
650 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
651 ; CHECK-NEXT: vnclipu.wi v8, v12, 0
652 ; CHECK-NEXT: vs1r.v v8, (a1)
654 %1 = load <vscale x 4 x i64>, ptr %x, align 16
655 %2 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 1))
656 %3 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 65535))
657 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i16>
658 store <vscale x 4 x i16> %4, ptr %y, align 8
662 define void @trunc_sat_u16u64_minmax(ptr %x, ptr %y) {
663 ; CHECK-LABEL: trunc_sat_u16u64_minmax:
665 ; CHECK-NEXT: vl4re64.v v8, (a0)
666 ; CHECK-NEXT: li a0, 50
667 ; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
668 ; CHECK-NEXT: vmax.vx v8, v8, a0
669 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
670 ; CHECK-NEXT: vnclipu.wi v12, v8, 0
671 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
672 ; CHECK-NEXT: vnclipu.wi v8, v12, 0
673 ; CHECK-NEXT: vs1r.v v8, (a1)
675 %1 = load <vscale x 4 x i64>, ptr %x, align 16
676 %2 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 65535))
677 %3 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 50))
678 %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i16>
679 store <vscale x 4 x i16> %4, ptr %y, align 8