1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
4 define <vscale x 1 x i1> @get_lane_mask(ptr %p, i64 %index, i64 %tc) {
5 ; CHECK-LABEL: get_lane_mask:
7 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
9 ; CHECK-NEXT: vsaddu.vx v8, v8, a1
10 ; CHECK-NEXT: vmsltu.vx v0, v8, a2
12 %mask = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 %index, i64 %tc)
13 ret <vscale x 1 x i1> %mask
16 define <vscale x 1 x i1> @constant_zero_index(ptr %p, i64 %tc) {
17 ; CHECK-LABEL: constant_zero_index:
19 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
20 ; CHECK-NEXT: vid.v v8
21 ; CHECK-NEXT: vmsltu.vx v0, v8, a1
23 %mask = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 0, i64 %tc)
24 ret <vscale x 1 x i1> %mask
27 define <vscale x 1 x i1> @constant_nonzero_index(ptr %p, i64 %tc) {
28 ; CHECK-LABEL: constant_nonzero_index:
30 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
31 ; CHECK-NEXT: vid.v v8
32 ; CHECK-NEXT: li a0, 24
33 ; CHECK-NEXT: vsaddu.vx v8, v8, a0
34 ; CHECK-NEXT: vmsltu.vx v0, v8, a1
36 %mask = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 24, i64 %tc)
37 ret <vscale x 1 x i1> %mask
40 define <vscale x 1 x i1> @constant_tripcount(ptr %p, i64 %index) {
41 ; CHECK-LABEL: constant_tripcount:
43 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
44 ; CHECK-NEXT: vid.v v8
45 ; CHECK-NEXT: vsaddu.vx v8, v8, a1
46 ; CHECK-NEXT: li a0, 1024
47 ; CHECK-NEXT: vmsltu.vx v0, v8, a0
49 %mask = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 %index, i64 1024)
50 ret <vscale x 1 x i1> %mask
53 define <vscale x 1 x i1> @constant_both(ptr %p) {
54 ; CHECK-LABEL: constant_both:
56 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
57 ; CHECK-NEXT: vid.v v8
58 ; CHECK-NEXT: li a0, 1024
59 ; CHECK-NEXT: vmsltu.vx v0, v8, a0
61 %mask = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 0, i64 1024)
62 ret <vscale x 1 x i1> %mask
65 ; Architectural max VLEN=64k, so result is "as-if" TC=1024
66 define <vscale x 1 x i1> @above_maxvl(ptr %p) {
67 ; CHECK-LABEL: above_maxvl:
69 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
70 ; CHECK-NEXT: vid.v v8
71 ; CHECK-NEXT: li a0, 1
72 ; CHECK-NEXT: slli a0, a0, 11
73 ; CHECK-NEXT: vmsltu.vx v0, v8, a0
75 %mask = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 0, i64 2048)
76 ret <vscale x 1 x i1> %mask
79 define <2 x i1> @fv2(ptr %p, i64 %index, i64 %tc) {
82 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
83 ; CHECK-NEXT: vid.v v8
84 ; CHECK-NEXT: vsaddu.vx v8, v8, a1
85 ; CHECK-NEXT: vmsltu.vx v0, v8, a2
87 %mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 %index, i64 %tc)
91 define <8 x i1> @fv8(ptr %p, i64 %index, i64 %tc) {
94 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
95 ; CHECK-NEXT: vid.v v8
96 ; CHECK-NEXT: vsaddu.vx v8, v8, a1
97 ; CHECK-NEXT: vmsltu.vx v0, v8, a2
99 %mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 %index, i64 %tc)
103 define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) {
106 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
107 ; CHECK-NEXT: lui a0, %hi(.LCPI8_0)
108 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI8_0)
109 ; CHECK-NEXT: vle8.v v8, (a0)
110 ; CHECK-NEXT: vid.v v16
111 ; CHECK-NEXT: vsaddu.vx v16, v16, a1
112 ; CHECK-NEXT: vmsltu.vx v0, v16, a2
113 ; CHECK-NEXT: vsext.vf8 v16, v8
114 ; CHECK-NEXT: vsaddu.vx v8, v16, a1
115 ; CHECK-NEXT: vmsltu.vx v16, v8, a2
116 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
117 ; CHECK-NEXT: vslideup.vi v0, v16, 2
119 %mask = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 %index, i64 %tc)
123 define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) {
126 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
127 ; CHECK-NEXT: lui a0, %hi(.LCPI9_0)
128 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_0)
129 ; CHECK-NEXT: vle8.v v8, (a0)
130 ; CHECK-NEXT: vid.v v16
131 ; CHECK-NEXT: vsaddu.vx v16, v16, a1
132 ; CHECK-NEXT: vmsltu.vx v0, v16, a2
133 ; CHECK-NEXT: vsext.vf8 v16, v8
134 ; CHECK-NEXT: vsaddu.vx v8, v16, a1
135 ; CHECK-NEXT: vmsltu.vx v16, v8, a2
136 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
137 ; CHECK-NEXT: vslideup.vi v0, v16, 2
138 ; CHECK-NEXT: lui a0, %hi(.LCPI9_1)
139 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_1)
140 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
141 ; CHECK-NEXT: vle8.v v8, (a0)
142 ; CHECK-NEXT: vsext.vf8 v16, v8
143 ; CHECK-NEXT: vsaddu.vx v8, v16, a1
144 ; CHECK-NEXT: vmsltu.vx v16, v8, a2
145 ; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
146 ; CHECK-NEXT: vslideup.vi v0, v16, 4
147 ; CHECK-NEXT: lui a0, %hi(.LCPI9_2)
148 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_2)
149 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
150 ; CHECK-NEXT: vle8.v v8, (a0)
151 ; CHECK-NEXT: vsext.vf8 v16, v8
152 ; CHECK-NEXT: vsaddu.vx v8, v16, a1
153 ; CHECK-NEXT: vmsltu.vx v16, v8, a2
154 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
155 ; CHECK-NEXT: vslideup.vi v0, v16, 6
157 %mask = call <64 x i1> @llvm.get.active.lane.mask.v64i1.i64(i64 %index, i64 %tc)
161 define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
162 ; CHECK-LABEL: fv128:
164 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
165 ; CHECK-NEXT: lui a0, %hi(.LCPI10_0)
166 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_0)
167 ; CHECK-NEXT: vle8.v v8, (a0)
168 ; CHECK-NEXT: vid.v v16
169 ; CHECK-NEXT: vsaddu.vx v16, v16, a1
170 ; CHECK-NEXT: vmsltu.vx v0, v16, a2
171 ; CHECK-NEXT: vsext.vf8 v16, v8
172 ; CHECK-NEXT: vsaddu.vx v8, v16, a1
173 ; CHECK-NEXT: vmsltu.vx v16, v8, a2
174 ; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma
175 ; CHECK-NEXT: vslideup.vi v0, v16, 2
176 ; CHECK-NEXT: lui a0, %hi(.LCPI10_1)
177 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_1)
178 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
179 ; CHECK-NEXT: vle8.v v8, (a0)
180 ; CHECK-NEXT: vsext.vf8 v16, v8
181 ; CHECK-NEXT: vsaddu.vx v8, v16, a1
182 ; CHECK-NEXT: vmsltu.vx v16, v8, a2
183 ; CHECK-NEXT: vsetivli zero, 6, e8, m1, tu, ma
184 ; CHECK-NEXT: vslideup.vi v0, v16, 4
185 ; CHECK-NEXT: lui a0, %hi(.LCPI10_2)
186 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_2)
187 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
188 ; CHECK-NEXT: vle8.v v8, (a0)
189 ; CHECK-NEXT: vsext.vf8 v16, v8
190 ; CHECK-NEXT: vsaddu.vx v8, v16, a1
191 ; CHECK-NEXT: vmsltu.vx v16, v8, a2
192 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, tu, ma
193 ; CHECK-NEXT: vslideup.vi v0, v16, 6
194 ; CHECK-NEXT: lui a0, %hi(.LCPI10_3)
195 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_3)
196 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
197 ; CHECK-NEXT: vle8.v v8, (a0)
198 ; CHECK-NEXT: vsext.vf8 v16, v8
199 ; CHECK-NEXT: vsaddu.vx v8, v16, a1
200 ; CHECK-NEXT: vmsltu.vx v16, v8, a2
201 ; CHECK-NEXT: vsetivli zero, 10, e8, m1, tu, ma
202 ; CHECK-NEXT: vslideup.vi v0, v16, 8
203 ; CHECK-NEXT: lui a0, %hi(.LCPI10_4)
204 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_4)
205 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
206 ; CHECK-NEXT: vle8.v v8, (a0)
207 ; CHECK-NEXT: vsext.vf8 v16, v8
208 ; CHECK-NEXT: vsaddu.vx v8, v16, a1
209 ; CHECK-NEXT: vmsltu.vx v16, v8, a2
210 ; CHECK-NEXT: vsetivli zero, 12, e8, m1, tu, ma
211 ; CHECK-NEXT: vslideup.vi v0, v16, 10
212 ; CHECK-NEXT: lui a0, %hi(.LCPI10_5)
213 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_5)
214 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
215 ; CHECK-NEXT: vle8.v v8, (a0)
216 ; CHECK-NEXT: vsext.vf8 v16, v8
217 ; CHECK-NEXT: vsaddu.vx v8, v16, a1
218 ; CHECK-NEXT: vmsltu.vx v16, v8, a2
219 ; CHECK-NEXT: vsetivli zero, 14, e8, m1, tu, ma
220 ; CHECK-NEXT: vslideup.vi v0, v16, 12
221 ; CHECK-NEXT: lui a0, %hi(.LCPI10_6)
222 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_6)
223 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
224 ; CHECK-NEXT: vle8.v v8, (a0)
225 ; CHECK-NEXT: vsext.vf8 v16, v8
226 ; CHECK-NEXT: vsaddu.vx v8, v16, a1
227 ; CHECK-NEXT: vmsltu.vx v16, v8, a2
228 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
229 ; CHECK-NEXT: vslideup.vi v0, v16, 14
231 %mask = call <128 x i1> @llvm.get.active.lane.mask.v128i1.i64(i64 %index, i64 %tc)
236 declare <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64, i64)
237 declare <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64, i64)
238 declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64, i64)
239 declare <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64, i64)
240 declare <64 x i1> @llvm.get.active.lane.mask.v64i1.i64(i64, i64)
241 declare <128 x i1> @llvm.get.active.lane.mask.v128i1.i64(i64, i64)