1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
4 declare <4 x i16> @llvm.smax.v4i16(<4 x i16>, <4 x i16>)
5 declare <4 x i16> @llvm.smin.v4i16(<4 x i16>, <4 x i16>)
6 declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
7 declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
8 declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>)
9 declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>)
11 declare <4 x i16> @llvm.umax.v4i16(<4 x i16>, <4 x i16>)
12 declare <4 x i16> @llvm.umin.v4i16(<4 x i16>, <4 x i16>)
13 declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
14 declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
15 declare <4 x i64> @llvm.umax.v4i64(<4 x i64>, <4 x i64>)
16 declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>)
18 define void @trunc_sat_i8i16_maxmin(ptr %x, ptr %y) {
19 ; CHECK-LABEL: trunc_sat_i8i16_maxmin:
21 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
22 ; CHECK-NEXT: vle16.v v8, (a0)
23 ; CHECK-NEXT: vnclip.wi v8, v8, 0
24 ; CHECK-NEXT: vse8.v v8, (a1)
26 %1 = load <4 x i16>, ptr %x, align 16
27 %2 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %1, <4 x i16> <i16 -128, i16 -128, i16 -128, i16 -128>)
28 %3 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %2, <4 x i16> <i16 127, i16 127, i16 127, i16 127>)
29 %4 = trunc <4 x i16> %3 to <4 x i8>
30 store <4 x i8> %4, ptr %y, align 8
34 define void @trunc_sat_i8i16_minmax(ptr %x, ptr %y) {
35 ; CHECK-LABEL: trunc_sat_i8i16_minmax:
37 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
38 ; CHECK-NEXT: vle16.v v8, (a0)
39 ; CHECK-NEXT: vnclip.wi v8, v8, 0
40 ; CHECK-NEXT: vse8.v v8, (a1)
42 %1 = load <4 x i16>, ptr %x, align 16
43 %2 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %1, <4 x i16> <i16 127, i16 127, i16 127, i16 127>)
44 %3 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %2, <4 x i16> <i16 -128, i16 -128, i16 -128, i16 -128>)
45 %4 = trunc <4 x i16> %3 to <4 x i8>
46 store <4 x i8> %4, ptr %y, align 8
50 define void @trunc_sat_i8i16_notopt(ptr %x, ptr %y) {
51 ; CHECK-LABEL: trunc_sat_i8i16_notopt:
53 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
54 ; CHECK-NEXT: vle16.v v8, (a0)
55 ; CHECK-NEXT: li a0, -127
56 ; CHECK-NEXT: vmax.vx v8, v8, a0
57 ; CHECK-NEXT: li a0, 128
58 ; CHECK-NEXT: vmin.vx v8, v8, a0
59 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
60 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
61 ; CHECK-NEXT: vse8.v v8, (a1)
63 %1 = load <4 x i16>, ptr %x, align 16
64 %2 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %1, <4 x i16> <i16 -127, i16 -127, i16 -127, i16 -127>)
65 %3 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %2, <4 x i16> <i16 128, i16 128, i16 128, i16 128>)
66 %4 = trunc <4 x i16> %3 to <4 x i8>
67 store <4 x i8> %4, ptr %y, align 8
71 define void @trunc_sat_u8u16_min(ptr %x, ptr %y) {
72 ; CHECK-LABEL: trunc_sat_u8u16_min:
74 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
75 ; CHECK-NEXT: vle16.v v8, (a0)
76 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
77 ; CHECK-NEXT: vse8.v v8, (a1)
79 %1 = load <4 x i16>, ptr %x, align 16
80 %2 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %1, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
81 %3 = trunc <4 x i16> %2 to <4 x i8>
82 store <4 x i8> %3, ptr %y, align 8
86 define void @trunc_sat_u8u16_notopt(ptr %x, ptr %y) {
87 ; CHECK-LABEL: trunc_sat_u8u16_notopt:
89 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
90 ; CHECK-NEXT: vle16.v v8, (a0)
91 ; CHECK-NEXT: li a0, 127
92 ; CHECK-NEXT: vminu.vx v8, v8, a0
93 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
94 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
95 ; CHECK-NEXT: vse8.v v8, (a1)
97 %1 = load <4 x i16>, ptr %x, align 16
98 %2 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %1, <4 x i16> <i16 127, i16 127, i16 127, i16 127>)
99 %3 = trunc <4 x i16> %2 to <4 x i8>
100 store <4 x i8> %3, ptr %y, align 8
104 define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
105 ; CHECK-LABEL: trunc_sat_u8u16_maxmin:
107 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
108 ; CHECK-NEXT: vle16.v v8, (a0)
109 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
110 ; CHECK-NEXT: vse8.v v8, (a1)
112 %1 = load <4 x i16>, ptr %x, align 16
113 %2 = tail call <4 x i16> @llvm.umax.v4i16(<4 x i16> %1, <4 x i16> <i16 0, i16 0, i16 0, i16 0>)
114 %3 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %2, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
115 %4 = trunc <4 x i16> %3 to <4 x i8>
116 store <4 x i8> %4, ptr %y, align 8
120 define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
121 ; CHECK-LABEL: trunc_sat_u8u16_minmax:
123 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
124 ; CHECK-NEXT: vle16.v v8, (a0)
125 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
126 ; CHECK-NEXT: vse8.v v8, (a1)
128 %1 = load <4 x i16>, ptr %x, align 16
129 %2 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %1, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
130 %3 = tail call <4 x i16> @llvm.umax.v4i16(<4 x i16> %2, <4 x i16> <i16 0, i16 0, i16 0, i16 0>)
131 %4 = trunc <4 x i16> %3 to <4 x i8>
132 store <4 x i8> %4, ptr %y, align 8
137 define void @trunc_sat_i16i32_notopt(ptr %x, ptr %y) {
138 ; CHECK-LABEL: trunc_sat_i16i32_notopt:
140 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
141 ; CHECK-NEXT: vle32.v v8, (a0)
142 ; CHECK-NEXT: lui a0, 1048568
143 ; CHECK-NEXT: addi a0, a0, 1
144 ; CHECK-NEXT: vmax.vx v8, v8, a0
145 ; CHECK-NEXT: lui a0, 8
146 ; CHECK-NEXT: vmin.vx v8, v8, a0
147 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
148 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
149 ; CHECK-NEXT: vse16.v v8, (a1)
151 %1 = load <4 x i32>, ptr %x, align 32
152 %2 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> <i32 -32767, i32 -32767, i32 -32767, i32 -32767>)
153 %3 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %2, <4 x i32> <i32 32768, i32 32768, i32 32768, i32 32768>)
154 %4 = trunc <4 x i32> %3 to <4 x i16>
155 store <4 x i16> %4, ptr %y, align 16
159 define void @trunc_sat_i16i32_maxmin(ptr %x, ptr %y) {
160 ; CHECK-LABEL: trunc_sat_i16i32_maxmin:
162 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
163 ; CHECK-NEXT: vle32.v v8, (a0)
164 ; CHECK-NEXT: vnclip.wi v8, v8, 0
165 ; CHECK-NEXT: vse16.v v8, (a1)
167 %1 = load <4 x i32>, ptr %x, align 32
168 %2 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
169 %3 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %2, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
170 %4 = trunc <4 x i32> %3 to <4 x i16>
171 store <4 x i16> %4, ptr %y, align 16
175 define void @trunc_sat_i16i32_minmax(ptr %x, ptr %y) {
176 ; CHECK-LABEL: trunc_sat_i16i32_minmax:
178 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
179 ; CHECK-NEXT: vle32.v v8, (a0)
180 ; CHECK-NEXT: vnclip.wi v8, v8, 0
181 ; CHECK-NEXT: vse16.v v8, (a1)
183 %1 = load <4 x i32>, ptr %x, align 32
184 %2 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %1, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
185 %3 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %2, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
186 %4 = trunc <4 x i32> %3 to <4 x i16>
187 store <4 x i16> %4, ptr %y, align 16
191 define void @trunc_sat_u16u32_notopt(ptr %x, ptr %y) {
192 ; CHECK-LABEL: trunc_sat_u16u32_notopt:
194 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
195 ; CHECK-NEXT: vle32.v v8, (a0)
196 ; CHECK-NEXT: lui a0, 8
197 ; CHECK-NEXT: addi a0, a0, -1
198 ; CHECK-NEXT: vminu.vx v8, v8, a0
199 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
200 ; CHECK-NEXT: vnsrl.wi v8, v8, 0
201 ; CHECK-NEXT: vse16.v v8, (a1)
203 %1 = load <4 x i32>, ptr %x, align 32
204 %2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
205 %3 = trunc <4 x i32> %2 to <4 x i16>
206 store <4 x i16> %3, ptr %y, align 16
210 define void @trunc_sat_u16u32_min(ptr %x, ptr %y) {
211 ; CHECK-LABEL: trunc_sat_u16u32_min:
213 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
214 ; CHECK-NEXT: vle32.v v8, (a0)
215 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
216 ; CHECK-NEXT: vse16.v v8, (a1)
218 %1 = load <4 x i32>, ptr %x, align 32
219 %2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
220 %3 = trunc <4 x i32> %2 to <4 x i16>
221 store <4 x i16> %3, ptr %y, align 16
225 define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
226 ; CHECK-LABEL: trunc_sat_u16u32_minmax:
228 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
229 ; CHECK-NEXT: vle32.v v8, (a0)
230 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
231 ; CHECK-NEXT: vse16.v v8, (a1)
233 %1 = load <4 x i32>, ptr %x, align 32
234 %2 = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
235 %3 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %2, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
236 %4 = trunc <4 x i32> %3 to <4 x i16>
237 store <4 x i16> %4, ptr %y, align 16
241 define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
242 ; CHECK-LABEL: trunc_sat_u16u32_maxmin:
244 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
245 ; CHECK-NEXT: vle32.v v8, (a0)
246 ; CHECK-NEXT: vnclipu.wi v8, v8, 0
247 ; CHECK-NEXT: vse16.v v8, (a1)
249 %1 = load <4 x i32>, ptr %x, align 32
250 %2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
251 %3 = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %2, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
252 %4 = trunc <4 x i32> %3 to <4 x i16>
253 store <4 x i16> %4, ptr %y, align 16
258 define void @trunc_sat_i32i64_notopt(ptr %x, ptr %y) {
259 ; CHECK-LABEL: trunc_sat_i32i64_notopt:
261 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
262 ; CHECK-NEXT: vle64.v v8, (a0)
263 ; CHECK-NEXT: lui a0, 524288
264 ; CHECK-NEXT: addiw a0, a0, 1
265 ; CHECK-NEXT: vmax.vx v8, v8, a0
266 ; CHECK-NEXT: li a0, 1
267 ; CHECK-NEXT: slli a0, a0, 31
268 ; CHECK-NEXT: vmin.vx v8, v8, a0
269 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
270 ; CHECK-NEXT: vnsrl.wi v10, v8, 0
271 ; CHECK-NEXT: vse32.v v10, (a1)
273 %1 = load <4 x i64>, ptr %x, align 64
274 %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 -2147483647, i64 -2147483647, i64 -2147483647, i64 -2147483647>)
275 %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 2147483648, i64 2147483648, i64 2147483648, i64 2147483648>)
276 %4 = trunc <4 x i64> %3 to <4 x i32>
277 store <4 x i32> %4, ptr %y, align 32
281 define void @trunc_sat_i32i64_maxmin(ptr %x, ptr %y) {
282 ; CHECK-LABEL: trunc_sat_i32i64_maxmin:
284 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
285 ; CHECK-NEXT: vle64.v v8, (a0)
286 ; CHECK-NEXT: vnclip.wi v10, v8, 0
287 ; CHECK-NEXT: vse32.v v10, (a1)
289 %1 = load <4 x i64>, ptr %x, align 64
290 %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
291 %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
292 %4 = trunc <4 x i64> %3 to <4 x i32>
293 store <4 x i32> %4, ptr %y, align 32
297 define void @trunc_sat_i32i64_minmax(ptr %x, ptr %y) {
298 ; CHECK-LABEL: trunc_sat_i32i64_minmax:
300 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
301 ; CHECK-NEXT: vle64.v v8, (a0)
302 ; CHECK-NEXT: vnclip.wi v10, v8, 0
303 ; CHECK-NEXT: vse32.v v10, (a1)
305 %1 = load <4 x i64>, ptr %x, align 64
306 %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
307 %3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
308 %4 = trunc <4 x i64> %3 to <4 x i32>
309 store <4 x i32> %4, ptr %y, align 32
314 define void @trunc_sat_u32u64_notopt(ptr %x, ptr %y) {
315 ; CHECK-LABEL: trunc_sat_u32u64_notopt:
317 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
318 ; CHECK-NEXT: vle64.v v8, (a0)
319 ; CHECK-NEXT: lui a0, 524288
320 ; CHECK-NEXT: addiw a0, a0, -1
321 ; CHECK-NEXT: vminu.vx v8, v8, a0
322 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
323 ; CHECK-NEXT: vnsrl.wi v10, v8, 0
324 ; CHECK-NEXT: vse32.v v10, (a1)
326 %1 = load <4 x i64>, ptr %x, align 64
327 %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
328 %3 = trunc <4 x i64> %2 to <4 x i32>
329 store <4 x i32> %3, ptr %y, align 32
333 define void @trunc_sat_u32u64_min(ptr %x, ptr %y) {
334 ; CHECK-LABEL: trunc_sat_u32u64_min:
336 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
337 ; CHECK-NEXT: vle64.v v8, (a0)
338 ; CHECK-NEXT: vnclipu.wi v10, v8, 0
339 ; CHECK-NEXT: vse32.v v10, (a1)
341 %1 = load <4 x i64>, ptr %x, align 64
342 %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
343 %3 = trunc <4 x i64> %2 to <4 x i32>
344 store <4 x i32> %3, ptr %y, align 32
349 define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
350 ; CHECK-LABEL: trunc_sat_u32u64_maxmin:
352 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
353 ; CHECK-NEXT: vle64.v v8, (a0)
354 ; CHECK-NEXT: vnclipu.wi v10, v8, 0
355 ; CHECK-NEXT: vse32.v v10, (a1)
357 %1 = load <4 x i64>, ptr %x, align 64
358 %2 = tail call <4 x i64> @llvm.umax.v4i64(<4 x i64> %1, <4 x i64> <i64 0, i64 0, i64 0, i64 0>)
359 %3 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %2, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
360 %4 = trunc <4 x i64> %3 to <4 x i32>
361 store <4 x i32> %4, ptr %y, align 32
365 define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
366 ; CHECK-LABEL: trunc_sat_u32u64_minmax:
368 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
369 ; CHECK-NEXT: vle64.v v8, (a0)
370 ; CHECK-NEXT: vnclipu.wi v10, v8, 0
371 ; CHECK-NEXT: vse32.v v10, (a1)
373 %1 = load <4 x i64>, ptr %x, align 64
374 %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
375 %3 = tail call <4 x i64> @llvm.umax.v4i64(<4 x i64> %2, <4 x i64> <i64 0, i64 0, i64 0, i64 0>)
376 %4 = trunc <4 x i64> %3 to <4 x i32>
377 store <4 x i32> %4, ptr %y, align 32