1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
4 declare <4 x i16> @llvm.smax.v4i16(<4 x i16>, <4 x i16>)
5 declare <4 x i16> @llvm.smin.v4i16(<4 x i16>, <4 x i16>)
6 declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
7 declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
8 declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>)
9 declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>)
11 declare <4 x i16> @llvm.umin.v4i16(<4 x i16>, <4 x i16>)
12 declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
13 declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>)
15 define void @trunc_sat_i8i16_maxmin(ptr %x, ptr %y) {
16 ; CHECK-LABEL: trunc_sat_i8i16_maxmin:
18 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
19 ; CHECK-NEXT: vle16.v v8, (a0)
20 ; CHECK-NEXT: vnclip.wi v8, v8, 0
21 ; CHECK-NEXT: vse8.v v8, (a1)
23 %1 = load <4 x i16>, ptr %x, align 16
24 %2 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %1, <4 x i16> <i16 -128, i16 -128, i16 -128, i16 -128>)
25 %3 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %2, <4 x i16> <i16 127, i16 127, i16 127, i16 127>)
26 %4 = trunc <4 x i16> %3 to <4 x i8>
27 store <4 x i8> %4, ptr %y, align 8
31 define void @trunc_sat_i8i16_minmax(ptr %x, ptr %y) {
32 ; CHECK-LABEL: trunc_sat_i8i16_minmax:
34 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
35 ; CHECK-NEXT: vle16.v v8, (a0)
36 ; CHECK-NEXT: vnclip.wi v8, v8, 0
37 ; CHECK-NEXT: vse8.v v8, (a1)
39 %1 = load <4 x i16>, ptr %x, align 16
40 %2 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %1, <4 x i16> <i16 127, i16 127, i16 127, i16 127>)
41 %3 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %2, <4 x i16> <i16 -128, i16 -128, i16 -128, i16 -128>)
42 %4 = trunc <4 x i16> %3 to <4 x i8>
43 store <4 x i8> %4, ptr %y, align 8
47 define void @trunc_sat_i8i16_notopt(ptr %x, ptr %y) {
48 ; CHECK-LABEL: trunc_sat_i8i16_notopt:
50 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
51 ; CHECK-NEXT: vle16.v v8, (a0)
52 ; CHECK-NEXT: li a0, -127
53 ; CHECK-NEXT: vmax.vx v8, v8, a0
54 ; CHECK-NEXT: li a0, 128
55 ; CHECK-NEXT: vmin.vx v8, v8, a0
56 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
57 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
58 ; CHECK-NEXT: vse8.v v8, (a1)
60 %1 = load <4 x i16>, ptr %x, align 16
61 %2 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %1, <4 x i16> <i16 -127, i16 -127, i16 -127, i16 -127>)
62 %3 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %2, <4 x i16> <i16 128, i16 128, i16 128, i16 128>)
63 %4 = trunc <4 x i16> %3 to <4 x i8>
64 store <4 x i8> %4, ptr %y, align 8
68 define void @trunc_sat_u8u16_min(ptr %x, ptr %y) {
69 ; CHECK-LABEL: trunc_sat_u8u16_min:
71 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
72 ; CHECK-NEXT: vle16.v v8, (a0)
73 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
74 ; CHECK-NEXT: vse8.v v8, (a1)
76 %1 = load <4 x i16>, ptr %x, align 16
77 %2 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %1, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
78 %3 = trunc <4 x i16> %2 to <4 x i8>
79 store <4 x i8> %3, ptr %y, align 8
83 define void @trunc_sat_u8u16_notopt(ptr %x, ptr %y) {
84 ; CHECK-LABEL: trunc_sat_u8u16_notopt:
86 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
87 ; CHECK-NEXT: vle16.v v8, (a0)
88 ; CHECK-NEXT: li a0, 127
89 ; CHECK-NEXT: vminu.vx v8, v8, a0
90 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
91 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
92 ; CHECK-NEXT: vse8.v v8, (a1)
94 %1 = load <4 x i16>, ptr %x, align 16
95 %2 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %1, <4 x i16> <i16 127, i16 127, i16 127, i16 127>)
96 %3 = trunc <4 x i16> %2 to <4 x i8>
97 store <4 x i8> %3, ptr %y, align 8
101 define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
102 ; CHECK-LABEL: trunc_sat_u8u16_maxmin:
104 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
105 ; CHECK-NEXT: vle16.v v8, (a0)
106 ; CHECK-NEXT: vmax.vx v8, v8, zero
107 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
108 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
109 ; CHECK-NEXT: vse8.v v8, (a1)
111 %1 = load <4 x i16>, ptr %x, align 16
112 %2 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %1, <4 x i16> zeroinitializer)
113 %3 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %2, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
114 %4 = trunc <4 x i16> %3 to <4 x i8>
115 store <4 x i8> %4, ptr %y, align 8
119 define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
120 ; CHECK-LABEL: trunc_sat_u8u16_minmax:
122 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
123 ; CHECK-NEXT: vle16.v v8, (a0)
124 ; CHECK-NEXT: vmax.vx v8, v8, zero
125 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
126 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
127 ; CHECK-NEXT: vse8.v v8, (a1)
129 %1 = load <4 x i16>, ptr %x, align 16
130 %2 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %1, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
131 %3 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %2, <4 x i16> zeroinitializer)
132 %4 = trunc <4 x i16> %3 to <4 x i8>
133 store <4 x i8> %4, ptr %y, align 8
138 define void @trunc_sat_i16i32_notopt(ptr %x, ptr %y) {
139 ; CHECK-LABEL: trunc_sat_i16i32_notopt:
141 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
142 ; CHECK-NEXT: vle32.v v8, (a0)
143 ; CHECK-NEXT: lui a0, 1048568
144 ; CHECK-NEXT: addi a0, a0, 1
145 ; CHECK-NEXT: vmax.vx v8, v8, a0
146 ; CHECK-NEXT: lui a0, 8
147 ; CHECK-NEXT: vmin.vx v8, v8, a0
148 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
149 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
150 ; CHECK-NEXT: vse16.v v8, (a1)
152 %1 = load <4 x i32>, ptr %x, align 32
153 %2 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> <i32 -32767, i32 -32767, i32 -32767, i32 -32767>)
154 %3 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %2, <4 x i32> <i32 32768, i32 32768, i32 32768, i32 32768>)
155 %4 = trunc <4 x i32> %3 to <4 x i16>
156 store <4 x i16> %4, ptr %y, align 16
160 define void @trunc_sat_i16i32_maxmin(ptr %x, ptr %y) {
161 ; CHECK-LABEL: trunc_sat_i16i32_maxmin:
163 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
164 ; CHECK-NEXT: vle32.v v8, (a0)
165 ; CHECK-NEXT: vnclip.wi v8, v8, 0
166 ; CHECK-NEXT: vse16.v v8, (a1)
168 %1 = load <4 x i32>, ptr %x, align 32
169 %2 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
170 %3 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %2, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
171 %4 = trunc <4 x i32> %3 to <4 x i16>
172 store <4 x i16> %4, ptr %y, align 16
176 define void @trunc_sat_i16i32_minmax(ptr %x, ptr %y) {
177 ; CHECK-LABEL: trunc_sat_i16i32_minmax:
179 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
180 ; CHECK-NEXT: vle32.v v8, (a0)
181 ; CHECK-NEXT: vnclip.wi v8, v8, 0
182 ; CHECK-NEXT: vse16.v v8, (a1)
184 %1 = load <4 x i32>, ptr %x, align 32
185 %2 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %1, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
186 %3 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %2, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
187 %4 = trunc <4 x i32> %3 to <4 x i16>
188 store <4 x i16> %4, ptr %y, align 16
192 define void @trunc_sat_u16u32_notopt(ptr %x, ptr %y) {
193 ; CHECK-LABEL: trunc_sat_u16u32_notopt:
195 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
196 ; CHECK-NEXT: vle32.v v8, (a0)
197 ; CHECK-NEXT: lui a0, 8
198 ; CHECK-NEXT: addi a0, a0, -1
199 ; CHECK-NEXT: vminu.vx v8, v8, a0
200 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
201 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
202 ; CHECK-NEXT: vse16.v v8, (a1)
204 %1 = load <4 x i32>, ptr %x, align 32
205 %2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
206 %3 = trunc <4 x i32> %2 to <4 x i16>
207 store <4 x i16> %3, ptr %y, align 16
211 define void @trunc_sat_u16u32_min(ptr %x, ptr %y) {
212 ; CHECK-LABEL: trunc_sat_u16u32_min:
214 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
215 ; CHECK-NEXT: vle32.v v8, (a0)
216 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
217 ; CHECK-NEXT: vse16.v v8, (a1)
219 %1 = load <4 x i32>, ptr %x, align 32
220 %2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
221 %3 = trunc <4 x i32> %2 to <4 x i16>
222 store <4 x i16> %3, ptr %y, align 16
226 define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
227 ; CHECK-LABEL: trunc_sat_u16u32_maxmin:
229 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
230 ; CHECK-NEXT: vle32.v v8, (a0)
231 ; CHECK-NEXT: li a0, 1
232 ; CHECK-NEXT: vmax.vx v8, v8, a0
233 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
234 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
235 ; CHECK-NEXT: vse16.v v8, (a1)
237 %1 = load <4 x i32>, ptr %x, align 16
238 %2 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
239 %3 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %2, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
240 %4 = trunc <4 x i32> %3 to <4 x i16>
241 store <4 x i16> %4, ptr %y, align 8
245 define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
246 ; CHECK-LABEL: trunc_sat_u16u32_minmax:
248 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
249 ; CHECK-NEXT: vle32.v v8, (a0)
250 ; CHECK-NEXT: li a0, 50
251 ; CHECK-NEXT: vmax.vx v8, v8, a0
252 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
253 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
254 ; CHECK-NEXT: vse16.v v8, (a1)
256 %1 = load <4 x i32>, ptr %x, align 16
257 %2 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %1, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
258 %3 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %2, <4 x i32> <i32 50, i32 50, i32 50, i32 50>)
259 %4 = trunc <4 x i32> %3 to <4 x i16>
260 store <4 x i16> %4, ptr %y, align 8
265 define void @trunc_sat_i32i64_notopt(ptr %x, ptr %y) {
266 ; CHECK-LABEL: trunc_sat_i32i64_notopt:
268 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
269 ; CHECK-NEXT: vle64.v v8, (a0)
270 ; CHECK-NEXT: lui a0, 524288
271 ; CHECK-NEXT: addiw a0, a0, 1
272 ; CHECK-NEXT: vmax.vx v8, v8, a0
273 ; CHECK-NEXT: li a0, 1
274 ; CHECK-NEXT: slli a0, a0, 31
275 ; CHECK-NEXT: vmin.vx v8, v8, a0
276 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
277 ; CHECK-NEXT: vnsrl.wi v10, v8, 0
278 ; CHECK-NEXT: vse32.v v10, (a1)
280 %1 = load <4 x i64>, ptr %x, align 64
281 %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 -2147483647, i64 -2147483647, i64 -2147483647, i64 -2147483647>)
282 %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 2147483648, i64 2147483648, i64 2147483648, i64 2147483648>)
283 %4 = trunc <4 x i64> %3 to <4 x i32>
284 store <4 x i32> %4, ptr %y, align 32
288 define void @trunc_sat_i32i64_maxmin(ptr %x, ptr %y) {
289 ; CHECK-LABEL: trunc_sat_i32i64_maxmin:
291 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
292 ; CHECK-NEXT: vle64.v v8, (a0)
293 ; CHECK-NEXT: vnclip.wi v10, v8, 0
294 ; CHECK-NEXT: vse32.v v10, (a1)
296 %1 = load <4 x i64>, ptr %x, align 64
297 %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
298 %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
299 %4 = trunc <4 x i64> %3 to <4 x i32>
300 store <4 x i32> %4, ptr %y, align 32
304 define void @trunc_sat_i32i64_minmax(ptr %x, ptr %y) {
305 ; CHECK-LABEL: trunc_sat_i32i64_minmax:
307 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
308 ; CHECK-NEXT: vle64.v v8, (a0)
309 ; CHECK-NEXT: vnclip.wi v10, v8, 0
310 ; CHECK-NEXT: vse32.v v10, (a1)
312 %1 = load <4 x i64>, ptr %x, align 64
313 %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
314 %3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
315 %4 = trunc <4 x i64> %3 to <4 x i32>
316 store <4 x i32> %4, ptr %y, align 32
321 define void @trunc_sat_u32u64_notopt(ptr %x, ptr %y) {
322 ; CHECK-LABEL: trunc_sat_u32u64_notopt:
324 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
325 ; CHECK-NEXT: vle64.v v8, (a0)
326 ; CHECK-NEXT: lui a0, 524288
327 ; CHECK-NEXT: addiw a0, a0, -1
328 ; CHECK-NEXT: vminu.vx v8, v8, a0
329 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
330 ; CHECK-NEXT: vnsrl.wi v10, v8, 0
331 ; CHECK-NEXT: vse32.v v10, (a1)
333 %1 = load <4 x i64>, ptr %x, align 64
334 %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
335 %3 = trunc <4 x i64> %2 to <4 x i32>
336 store <4 x i32> %3, ptr %y, align 32
340 define void @trunc_sat_u32u64_min(ptr %x, ptr %y) {
341 ; CHECK-LABEL: trunc_sat_u32u64_min:
343 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
344 ; CHECK-NEXT: vle64.v v8, (a0)
345 ; CHECK-NEXT: vnclipu.wi v10, v8, 0
346 ; CHECK-NEXT: vse32.v v10, (a1)
348 %1 = load <4 x i64>, ptr %x, align 64
349 %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
350 %3 = trunc <4 x i64> %2 to <4 x i32>
351 store <4 x i32> %3, ptr %y, align 32
356 define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
357 ; CHECK-LABEL: trunc_sat_u32u64_maxmin:
359 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
360 ; CHECK-NEXT: vle64.v v8, (a0)
361 ; CHECK-NEXT: vmax.vx v8, v8, zero
362 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
363 ; CHECK-NEXT: vnclipu.wi v10, v8, 0
364 ; CHECK-NEXT: vse32.v v10, (a1)
366 %1 = load <4 x i64>, ptr %x, align 16
367 %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> zeroinitializer)
368 %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
369 %4 = trunc <4 x i64> %3 to <4 x i32>
370 store <4 x i32> %4, ptr %y, align 8
374 define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
375 ; CHECK-LABEL: trunc_sat_u32u64_minmax:
377 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
378 ; CHECK-NEXT: vle64.v v8, (a0)
379 ; CHECK-NEXT: vmax.vx v8, v8, zero
380 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
381 ; CHECK-NEXT: vnclipu.wi v10, v8, 0
382 ; CHECK-NEXT: vse32.v v10, (a1)
384 %1 = load <4 x i64>, ptr %x, align 16
385 %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
386 %3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> zeroinitializer)
387 %4 = trunc <4 x i64> %3 to <4 x i32>
388 store <4 x i32> %4, ptr %y, align 8
392 define void @trunc_sat_i8i32_maxmin(ptr %x, ptr %y) {
393 ; CHECK-LABEL: trunc_sat_i8i32_maxmin:
395 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
396 ; CHECK-NEXT: vle32.v v8, (a0)
397 ; CHECK-NEXT: vnclip.wi v8, v8, 0
398 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
399 ; CHECK-NEXT: vnclip.wi v8, v8, 0
400 ; CHECK-NEXT: vse8.v v8, (a1)
402 %1 = load <4 x i32>, ptr %x, align 16
403 %2 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> <i32 -128, i32 -128, i32 -128, i32 -128>)
404 %3 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
405 %4 = trunc <4 x i32> %3 to <4 x i8>
406 store <4 x i8> %4, ptr %y, align 8
410 define void @trunc_sat_i8i32_minmax(ptr %x, ptr %y) {
411 ; CHECK-LABEL: trunc_sat_i8i32_minmax:
413 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
414 ; CHECK-NEXT: vle32.v v8, (a0)
415 ; CHECK-NEXT: vnclip.wi v8, v8, 0
416 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
417 ; CHECK-NEXT: vnclip.wi v8, v8, 0
418 ; CHECK-NEXT: vse8.v v8, (a1)
420 %1 = load <4 x i32>, ptr %x, align 16
421 %2 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %1, <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
422 %3 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %2, <4 x i32> <i32 -128, i32 -128, i32 -128, i32 -128>)
423 %4 = trunc <4 x i32> %3 to <4 x i8>
424 store <4 x i8> %4, ptr %y, align 8
428 define void @trunc_sat_u8u32_min(ptr %x, ptr %y) {
429 ; CHECK-LABEL: trunc_sat_u8u32_min:
431 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
432 ; CHECK-NEXT: vle32.v v8, (a0)
433 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
434 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
435 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
436 ; CHECK-NEXT: vse8.v v8, (a1)
438 %1 = load <4 x i32>, ptr %x, align 16
439 %2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 255, i32 255, i32 255, i32 255>)
440 %3 = trunc <4 x i32> %2 to <4 x i8>
441 store <4 x i8> %3, ptr %y, align 8
445 define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
446 ; CHECK-LABEL: trunc_sat_u8u32_maxmin:
448 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
449 ; CHECK-NEXT: vle32.v v8, (a0)
450 ; CHECK-NEXT: vmax.vx v8, v8, zero
451 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
452 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
453 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
454 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
455 ; CHECK-NEXT: vse8.v v8, (a1)
457 %1 = load <4 x i32>, ptr %x, align 16
458 %2 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> zeroinitializer)
459 %3 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %2, <4 x i32> <i32 255, i32 255, i32 255, i32 255>)
460 %4 = trunc <4 x i32> %3 to <4 x i8>
461 store <4 x i8> %4, ptr %y, align 8
465 define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) {
466 ; CHECK-LABEL: trunc_sat_u8u32_minmax:
468 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
469 ; CHECK-NEXT: vle32.v v8, (a0)
470 ; CHECK-NEXT: vmax.vx v8, v8, zero
471 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
472 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
473 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
474 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
475 ; CHECK-NEXT: vse8.v v8, (a1)
477 %1 = load <4 x i32>, ptr %x, align 16
478 %2 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %1, <4 x i32> <i32 255, i32 255, i32 255, i32 255>)
479 %3 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %2, <4 x i32> zeroinitializer)
480 %4 = trunc <4 x i32> %3 to <4 x i8>
481 store <4 x i8> %4, ptr %y, align 8
485 define void @trunc_sat_i8i64_maxmin(ptr %x, ptr %y) {
486 ; CHECK-LABEL: trunc_sat_i8i64_maxmin:
488 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
489 ; CHECK-NEXT: vle64.v v8, (a0)
490 ; CHECK-NEXT: vnclip.wi v10, v8, 0
491 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
492 ; CHECK-NEXT: vnclip.wi v8, v10, 0
493 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
494 ; CHECK-NEXT: vnclip.wi v8, v8, 0
495 ; CHECK-NEXT: vse8.v v8, (a1)
497 %1 = load <4 x i64>, ptr %x, align 16
498 %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 -128, i64 -128, i64 -128, i64 -128>)
499 %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 127, i64 127, i64 127, i64 127>)
500 %4 = trunc <4 x i64> %3 to <4 x i8>
501 store <4 x i8> %4, ptr %y, align 8
505 define void @trunc_sat_i8i64_minmax(ptr %x, ptr %y) {
506 ; CHECK-LABEL: trunc_sat_i8i64_minmax:
508 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
509 ; CHECK-NEXT: vle64.v v8, (a0)
510 ; CHECK-NEXT: vnclip.wi v10, v8, 0
511 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
512 ; CHECK-NEXT: vnclip.wi v8, v10, 0
513 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
514 ; CHECK-NEXT: vnclip.wi v8, v8, 0
515 ; CHECK-NEXT: vse8.v v8, (a1)
517 %1 = load <4 x i64>, ptr %x, align 16
518 %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 127, i64 127, i64 127, i64 127>)
519 %3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> <i64 -128, i64 -128, i64 -128, i64 -128>)
520 %4 = trunc <4 x i64> %3 to <4 x i8>
521 store <4 x i8> %4, ptr %y, align 8
525 define void @trunc_sat_u8u64_min(ptr %x, ptr %y) {
526 ; CHECK-LABEL: trunc_sat_u8u64_min:
528 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
529 ; CHECK-NEXT: vle64.v v8, (a0)
530 ; CHECK-NEXT: vnclipu.wi v10, v8, 0
531 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
532 ; CHECK-NEXT: vnclipu.wi v8, v10, 0
533 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
534 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
535 ; CHECK-NEXT: vse8.v v8, (a1)
537 %1 = load <4 x i64>, ptr %x, align 16
538 %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 255, i64 255, i64 255, i64 255>)
539 %3 = trunc <4 x i64> %2 to <4 x i8>
540 store <4 x i8> %3, ptr %y, align 8
544 define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
545 ; CHECK-LABEL: trunc_sat_u8u64_maxmin:
547 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
548 ; CHECK-NEXT: vle64.v v8, (a0)
549 ; CHECK-NEXT: vmax.vx v8, v8, zero
550 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
551 ; CHECK-NEXT: vnclipu.wi v10, v8, 0
552 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
553 ; CHECK-NEXT: vnclipu.wi v8, v10, 0
554 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
555 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
556 ; CHECK-NEXT: vse8.v v8, (a1)
558 %1 = load <4 x i64>, ptr %x, align 16
559 %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> zeroinitializer)
560 %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 255, i64 255, i64 255, i64 255>)
561 %4 = trunc <4 x i64> %3 to <4 x i8>
562 store <4 x i8> %4, ptr %y, align 8
566 define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) {
567 ; CHECK-LABEL: trunc_sat_u8u64_minmax:
569 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
570 ; CHECK-NEXT: vle64.v v8, (a0)
571 ; CHECK-NEXT: vmax.vx v8, v8, zero
572 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
573 ; CHECK-NEXT: vnclipu.wi v10, v8, 0
574 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
575 ; CHECK-NEXT: vnclipu.wi v8, v10, 0
576 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
577 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
578 ; CHECK-NEXT: vse8.v v8, (a1)
580 %1 = load <4 x i64>, ptr %x, align 16
581 %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 255, i64 255, i64 255, i64 255>)
582 %3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> zeroinitializer)
583 %4 = trunc <4 x i64> %3 to <4 x i8>
584 store <4 x i8> %4, ptr %y, align 8
588 define void @trunc_sat_i16i64_maxmin(ptr %x, ptr %y) {
589 ; CHECK-LABEL: trunc_sat_i16i64_maxmin:
591 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
592 ; CHECK-NEXT: vle64.v v8, (a0)
593 ; CHECK-NEXT: vnclip.wi v10, v8, 0
594 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
595 ; CHECK-NEXT: vnclip.wi v8, v10, 0
596 ; CHECK-NEXT: vse16.v v8, (a1)
598 %1 = load <4 x i64>, ptr %x, align 32
599 %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768>)
600 %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 32767, i64 32767, i64 32767, i64 32767>)
601 %4 = trunc <4 x i64> %3 to <4 x i16>
602 store <4 x i16> %4, ptr %y, align 16
606 define void @trunc_sat_i16i64_minmax(ptr %x, ptr %y) {
607 ; CHECK-LABEL: trunc_sat_i16i64_minmax:
609 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
610 ; CHECK-NEXT: vle64.v v8, (a0)
611 ; CHECK-NEXT: vnclip.wi v10, v8, 0
612 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
613 ; CHECK-NEXT: vnclip.wi v8, v10, 0
614 ; CHECK-NEXT: vse16.v v8, (a1)
616 %1 = load <4 x i64>, ptr %x, align 32
617 %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 32767, i64 32767, i64 32767, i64 32767>)
618 %3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768>)
619 %4 = trunc <4 x i64> %3 to <4 x i16>
620 store <4 x i16> %4, ptr %y, align 16
624 define void @trunc_sat_u16u64_notopt(ptr %x, ptr %y) {
625 ; CHECK-LABEL: trunc_sat_u16u64_notopt:
627 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
628 ; CHECK-NEXT: vle64.v v8, (a0)
629 ; CHECK-NEXT: lui a0, 8
630 ; CHECK-NEXT: addiw a0, a0, -1
631 ; CHECK-NEXT: vminu.vx v8, v8, a0
632 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
633 ; CHECK-NEXT: vnsrl.wi v10, v8, 0
634 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
635 ; CHECK-NEXT: vnsrl.wi v8, v10, 0
636 ; CHECK-NEXT: vse16.v v8, (a1)
638 %1 = load <4 x i64>, ptr %x, align 32
639 %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 32767, i64 32767, i64 32767, i64 32767>)
640 %3 = trunc <4 x i64> %2 to <4 x i16>
641 store <4 x i16> %3, ptr %y, align 16
645 define void @trunc_sat_u16u64_min(ptr %x, ptr %y) {
646 ; CHECK-LABEL: trunc_sat_u16u64_min:
648 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
649 ; CHECK-NEXT: vle64.v v8, (a0)
650 ; CHECK-NEXT: vnclipu.wi v10, v8, 0
651 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
652 ; CHECK-NEXT: vnclipu.wi v8, v10, 0
653 ; CHECK-NEXT: vse16.v v8, (a1)
655 %1 = load <4 x i64>, ptr %x, align 32
656 %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 65535, i64 65535, i64 65535, i64 65535>)
657 %3 = trunc <4 x i64> %2 to <4 x i16>
658 store <4 x i16> %3, ptr %y, align 16
662 define void @trunc_sat_u16u64_maxmin(ptr %x, ptr %y) {
663 ; CHECK-LABEL: trunc_sat_u16u64_maxmin:
665 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
666 ; CHECK-NEXT: vle64.v v8, (a0)
667 ; CHECK-NEXT: li a0, 1
668 ; CHECK-NEXT: vmax.vx v8, v8, a0
669 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
670 ; CHECK-NEXT: vnclipu.wi v10, v8, 0
671 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
672 ; CHECK-NEXT: vnclipu.wi v8, v10, 0
673 ; CHECK-NEXT: vse16.v v8, (a1)
675 %1 = load <4 x i64>, ptr %x, align 16
676 %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 1, i64 1, i64 1, i64 1>)
677 %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 65535, i64 65535, i64 65535, i64 65535>)
678 %4 = trunc <4 x i64> %3 to <4 x i16>
679 store <4 x i16> %4, ptr %y, align 8
683 define void @trunc_sat_u16u64_minmax(ptr %x, ptr %y) {
684 ; CHECK-LABEL: trunc_sat_u16u64_minmax:
686 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
687 ; CHECK-NEXT: vle64.v v8, (a0)
688 ; CHECK-NEXT: li a0, 50
689 ; CHECK-NEXT: vmax.vx v8, v8, a0
690 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
691 ; CHECK-NEXT: vnclipu.wi v10, v8, 0
692 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
693 ; CHECK-NEXT: vnclipu.wi v8, v10, 0
694 ; CHECK-NEXT: vse16.v v8, (a1)
696 %1 = load <4 x i64>, ptr %x, align 16
697 %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 65535, i64 65535, i64 65535, i64 65535>)
698 %3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> <i64 50, i64 50, i64 50, i64 50>)
699 %4 = trunc <4 x i64> %3 to <4 x i16>
700 store <4 x i16> %4, ptr %y, align 8