1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
4 ; At the moment, BuildUREMEqFold does not handle nonsplat vectors.
7 define <4 x i32> @test_urem_odd_even(<4 x i32> %X) nounwind {
8 ; CHECK-LABEL: test_urem_odd_even:
10 ; CHECK-NEXT: adrp x8, .LCPI0_0
11 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
12 ; CHECK-NEXT: adrp x8, .LCPI0_1
13 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_1]
14 ; CHECK-NEXT: adrp x8, .LCPI0_2
15 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI0_2]
16 ; CHECK-NEXT: neg v1.4s, v1.4s
17 ; CHECK-NEXT: adrp x8, .LCPI0_3
18 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
19 ; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
20 ; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
21 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_3]
22 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
23 ; CHECK-NEXT: neg v3.4s, v3.4s
24 ; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
25 ; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
26 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
27 ; CHECK-NEXT: movi v1.4s, #1
28 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
30 %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 25, i32 100>
31 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
32 %ret = zext <4 x i1> %cmp to <4 x i32>
36 ;==============================================================================;
38 ; One all-ones divisor in odd divisor
39 define <4 x i32> @test_urem_odd_allones_eq(<4 x i32> %X) nounwind {
40 ; CHECK-LABEL: test_urem_odd_allones_eq:
42 ; CHECK-NEXT: adrp x8, .LCPI1_0
43 ; CHECK-NEXT: adrp x9, .LCPI1_1
44 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
45 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI1_1]
46 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
47 ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s
48 ; CHECK-NEXT: movi v1.4s, #1
49 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
51 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 4294967295, i32 5>
52 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
53 %ret = zext <4 x i1> %cmp to <4 x i32>
56 define <4 x i32> @test_urem_odd_allones_ne(<4 x i32> %X) nounwind {
57 ; CHECK-LABEL: test_urem_odd_allones_ne:
59 ; CHECK-NEXT: adrp x8, .LCPI2_0
60 ; CHECK-NEXT: adrp x9, .LCPI2_1
61 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
62 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI2_1]
63 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
64 ; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s
65 ; CHECK-NEXT: movi v1.4s, #1
66 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
68 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 4294967295, i32 5>
69 %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
70 %ret = zext <4 x i1> %cmp to <4 x i32>
74 ; One all-ones divisor in even divisor
75 define <4 x i32> @test_urem_even_allones_eq(<4 x i32> %X) nounwind {
76 ; CHECK-LABEL: test_urem_even_allones_eq:
78 ; CHECK-NEXT: adrp x8, .LCPI3_0
79 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
80 ; CHECK-NEXT: adrp x8, .LCPI3_1
81 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1]
82 ; CHECK-NEXT: adrp x8, .LCPI3_2
83 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI3_2]
84 ; CHECK-NEXT: neg v1.4s, v1.4s
85 ; CHECK-NEXT: adrp x8, .LCPI3_3
86 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
87 ; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
88 ; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
89 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_3]
90 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
91 ; CHECK-NEXT: neg v3.4s, v3.4s
92 ; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
93 ; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
94 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
95 ; CHECK-NEXT: movi v1.4s, #1
96 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
98 %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14>
99 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
100 %ret = zext <4 x i1> %cmp to <4 x i32>
103 define <4 x i32> @test_urem_even_allones_ne(<4 x i32> %X) nounwind {
104 ; CHECK-LABEL: test_urem_even_allones_ne:
106 ; CHECK-NEXT: adrp x8, .LCPI4_0
107 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0]
108 ; CHECK-NEXT: adrp x8, .LCPI4_1
109 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_1]
110 ; CHECK-NEXT: adrp x8, .LCPI4_2
111 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI4_2]
112 ; CHECK-NEXT: neg v1.4s, v1.4s
113 ; CHECK-NEXT: adrp x8, .LCPI4_3
114 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
115 ; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
116 ; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
117 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_3]
118 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
119 ; CHECK-NEXT: neg v3.4s, v3.4s
120 ; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
121 ; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
122 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
123 ; CHECK-NEXT: mvn v0.16b, v0.16b
124 ; CHECK-NEXT: movi v1.4s, #1
125 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
127 %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14>
128 %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
129 %ret = zext <4 x i1> %cmp to <4 x i32>
133 ; One all-ones divisor in odd+even divisor
134 define <4 x i32> @test_urem_odd_even_allones_eq(<4 x i32> %X) nounwind {
135 ; CHECK-LABEL: test_urem_odd_even_allones_eq:
137 ; CHECK-NEXT: adrp x8, .LCPI5_0
138 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_0]
139 ; CHECK-NEXT: adrp x8, .LCPI5_1
140 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_1]
141 ; CHECK-NEXT: adrp x8, .LCPI5_2
142 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI5_2]
143 ; CHECK-NEXT: neg v1.4s, v1.4s
144 ; CHECK-NEXT: adrp x8, .LCPI5_3
145 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
146 ; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
147 ; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
148 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_3]
149 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
150 ; CHECK-NEXT: neg v3.4s, v3.4s
151 ; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
152 ; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
153 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
154 ; CHECK-NEXT: movi v1.4s, #1
155 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
157 %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 4294967295, i32 100>
158 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
159 %ret = zext <4 x i1> %cmp to <4 x i32>
162 define <4 x i32> @test_urem_odd_even_allones_ne(<4 x i32> %X) nounwind {
163 ; CHECK-LABEL: test_urem_odd_even_allones_ne:
165 ; CHECK-NEXT: adrp x8, .LCPI6_0
166 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_0]
167 ; CHECK-NEXT: adrp x8, .LCPI6_1
168 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI6_1]
169 ; CHECK-NEXT: adrp x8, .LCPI6_2
170 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI6_2]
171 ; CHECK-NEXT: neg v1.4s, v1.4s
172 ; CHECK-NEXT: adrp x8, .LCPI6_3
173 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
174 ; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
175 ; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
176 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI6_3]
177 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
178 ; CHECK-NEXT: neg v3.4s, v3.4s
179 ; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
180 ; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
181 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
182 ; CHECK-NEXT: mvn v0.16b, v0.16b
183 ; CHECK-NEXT: movi v1.4s, #1
184 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
186 %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 4294967295, i32 100>
187 %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
188 %ret = zext <4 x i1> %cmp to <4 x i32>
192 ;------------------------------------------------------------------------------;
194 ; One power-of-two divisor in odd divisor
195 define <4 x i32> @test_urem_odd_poweroftwo(<4 x i32> %X) nounwind {
196 ; CHECK-LABEL: test_urem_odd_poweroftwo:
198 ; CHECK-NEXT: adrp x8, .LCPI7_0
199 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_0]
200 ; CHECK-NEXT: adrp x8, .LCPI7_1
201 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_1]
202 ; CHECK-NEXT: adrp x8, .LCPI7_2
203 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI7_2]
204 ; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s
205 ; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
206 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
207 ; CHECK-NEXT: neg v2.4s, v2.4s
208 ; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
209 ; CHECK-NEXT: mls v0.4s, v1.4s, v3.4s
210 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
211 ; CHECK-NEXT: movi v1.4s, #1
212 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
214 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 16, i32 5>
215 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
216 %ret = zext <4 x i1> %cmp to <4 x i32>
220 ; One power-of-two divisor in even divisor
221 define <4 x i32> @test_urem_even_poweroftwo(<4 x i32> %X) nounwind {
222 ; CHECK-LABEL: test_urem_even_poweroftwo:
224 ; CHECK-NEXT: adrp x8, .LCPI8_0
225 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_0]
226 ; CHECK-NEXT: adrp x8, .LCPI8_1
227 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_1]
228 ; CHECK-NEXT: adrp x8, .LCPI8_2
229 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI8_2]
230 ; CHECK-NEXT: neg v1.4s, v1.4s
231 ; CHECK-NEXT: adrp x8, .LCPI8_3
232 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
233 ; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
234 ; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
235 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_3]
236 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
237 ; CHECK-NEXT: neg v3.4s, v3.4s
238 ; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
239 ; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
240 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
241 ; CHECK-NEXT: movi v1.4s, #1
242 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
244 %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 16, i32 14>
245 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
246 %ret = zext <4 x i1> %cmp to <4 x i32>
250 ; One power-of-two divisor in odd+even divisor
251 define <4 x i32> @test_urem_odd_even_poweroftwo(<4 x i32> %X) nounwind {
252 ; CHECK-LABEL: test_urem_odd_even_poweroftwo:
254 ; CHECK-NEXT: adrp x8, .LCPI9_0
255 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_0]
256 ; CHECK-NEXT: adrp x8, .LCPI9_1
257 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_1]
258 ; CHECK-NEXT: adrp x8, .LCPI9_2
259 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI9_2]
260 ; CHECK-NEXT: neg v1.4s, v1.4s
261 ; CHECK-NEXT: adrp x8, .LCPI9_3
262 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
263 ; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
264 ; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
265 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_3]
266 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
267 ; CHECK-NEXT: neg v3.4s, v3.4s
268 ; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
269 ; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
270 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
271 ; CHECK-NEXT: movi v1.4s, #1
272 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
274 %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 16, i32 100>
275 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
276 %ret = zext <4 x i1> %cmp to <4 x i32>
280 ;------------------------------------------------------------------------------;
282 ; One one divisor in odd divisor
283 define <4 x i32> @test_urem_odd_one(<4 x i32> %X) nounwind {
284 ; CHECK-LABEL: test_urem_odd_one:
286 ; CHECK-NEXT: adrp x8, .LCPI10_0
287 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI10_0]
288 ; CHECK-NEXT: mov w8, #52429
289 ; CHECK-NEXT: movk w8, #52428, lsl #16
290 ; CHECK-NEXT: dup v2.4s, w8
291 ; CHECK-NEXT: mul v0.4s, v0.4s, v2.4s
292 ; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
293 ; CHECK-NEXT: movi v1.4s, #1
294 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
296 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 1, i32 5>
297 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
298 %ret = zext <4 x i1> %cmp to <4 x i32>
302 ; One one divisor in even divisor
303 define <4 x i32> @test_urem_even_one(<4 x i32> %X) nounwind {
304 ; CHECK-LABEL: test_urem_even_one:
306 ; CHECK-NEXT: adrp x8, .LCPI11_0
307 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI11_0]
308 ; CHECK-NEXT: adrp x8, .LCPI11_1
309 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_1]
310 ; CHECK-NEXT: adrp x8, .LCPI11_2
311 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI11_2]
312 ; CHECK-NEXT: neg v1.4s, v1.4s
313 ; CHECK-NEXT: adrp x8, .LCPI11_3
314 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
315 ; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
316 ; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
317 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_3]
318 ; CHECK-NEXT: adrp x8, .LCPI11_4
319 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
320 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI11_4]
321 ; CHECK-NEXT: neg v3.4s, v3.4s
322 ; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
323 ; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b
324 ; CHECK-NEXT: mls v0.4s, v2.4s, v4.4s
325 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
326 ; CHECK-NEXT: movi v1.4s, #1
327 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
329 %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 1, i32 14>
330 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
331 %ret = zext <4 x i1> %cmp to <4 x i32>
335 ; One one divisor in odd+even divisor
336 define <4 x i32> @test_urem_odd_even_one(<4 x i32> %X) nounwind {
337 ; CHECK-LABEL: test_urem_odd_even_one:
339 ; CHECK-NEXT: adrp x8, .LCPI12_0
340 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0]
341 ; CHECK-NEXT: adrp x8, .LCPI12_1
342 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_1]
343 ; CHECK-NEXT: adrp x8, .LCPI12_2
344 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI12_2]
345 ; CHECK-NEXT: neg v1.4s, v1.4s
346 ; CHECK-NEXT: adrp x8, .LCPI12_3
347 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
348 ; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
349 ; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
350 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_3]
351 ; CHECK-NEXT: adrp x8, .LCPI12_4
352 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
353 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI12_4]
354 ; CHECK-NEXT: neg v3.4s, v3.4s
355 ; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
356 ; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b
357 ; CHECK-NEXT: mls v0.4s, v2.4s, v4.4s
358 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
359 ; CHECK-NEXT: movi v1.4s, #1
360 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
362 %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 1, i32 100>
363 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
364 %ret = zext <4 x i1> %cmp to <4 x i32>
368 ;==============================================================================;
370 ; One all-ones divisor and power-of-two divisor divisor in odd divisor
371 define <4 x i32> @test_urem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
372 ; CHECK-LABEL: test_urem_odd_allones_and_poweroftwo:
374 ; CHECK-NEXT: adrp x8, .LCPI13_0
375 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_0]
376 ; CHECK-NEXT: adrp x8, .LCPI13_1
377 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_1]
378 ; CHECK-NEXT: adrp x8, .LCPI13_2
379 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI13_2]
380 ; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s
381 ; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
382 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
383 ; CHECK-NEXT: neg v2.4s, v2.4s
384 ; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
385 ; CHECK-NEXT: mls v0.4s, v1.4s, v3.4s
386 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
387 ; CHECK-NEXT: movi v1.4s, #1
388 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
390 %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 5>
391 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
392 %ret = zext <4 x i1> %cmp to <4 x i32>
396 ; One all-ones divisor and power-of-two divisor divisor in even divisor
397 define <4 x i32> @test_urem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
398 ; CHECK-LABEL: test_urem_even_allones_and_poweroftwo:
400 ; CHECK-NEXT: adrp x8, .LCPI14_0
401 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
402 ; CHECK-NEXT: adrp x8, .LCPI14_1
403 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_1]
404 ; CHECK-NEXT: adrp x8, .LCPI14_2
405 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI14_2]
406 ; CHECK-NEXT: neg v1.4s, v1.4s
407 ; CHECK-NEXT: adrp x8, .LCPI14_3
408 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
409 ; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
410 ; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
411 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_3]
412 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
413 ; CHECK-NEXT: neg v3.4s, v3.4s
414 ; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
415 ; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
416 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
417 ; CHECK-NEXT: movi v1.4s, #1
418 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
420 %urem = urem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 14>
421 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
422 %ret = zext <4 x i1> %cmp to <4 x i32>
426 ; One all-ones divisor and power-of-two divisor divisor in odd+even divisor
427 define <4 x i32> @test_urem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
428 ; CHECK-LABEL: test_urem_odd_even_allones_and_poweroftwo:
430 ; CHECK-NEXT: adrp x8, .LCPI15_0
431 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0]
432 ; CHECK-NEXT: adrp x8, .LCPI15_1
433 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_1]
434 ; CHECK-NEXT: adrp x8, .LCPI15_2
435 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI15_2]
436 ; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s
437 ; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
438 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
439 ; CHECK-NEXT: neg v2.4s, v2.4s
440 ; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
441 ; CHECK-NEXT: mls v0.4s, v1.4s, v3.4s
442 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
443 ; CHECK-NEXT: movi v1.4s, #1
444 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
446 %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 100>
447 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
448 %ret = zext <4 x i1> %cmp to <4 x i32>
452 ;------------------------------------------------------------------------------;
454 ; One all-ones divisor and one one divisor in odd divisor
455 define <4 x i32> @test_urem_odd_allones_and_one(<4 x i32> %X) nounwind {
456 ; CHECK-LABEL: test_urem_odd_allones_and_one:
458 ; CHECK-NEXT: adrp x8, .LCPI16_0
459 ; CHECK-NEXT: adrp x9, .LCPI16_1
460 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0]
461 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI16_1]
462 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
463 ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s
464 ; CHECK-NEXT: movi v1.4s, #1
465 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
467 %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 5>
468 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
469 %ret = zext <4 x i1> %cmp to <4 x i32>
473 ; One all-ones divisor and one one divisor in even divisor
474 define <4 x i32> @test_urem_even_allones_and_one(<4 x i32> %X) nounwind {
475 ; CHECK-LABEL: test_urem_even_allones_and_one:
477 ; CHECK-NEXT: adrp x8, .LCPI17_0
478 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0]
479 ; CHECK-NEXT: adrp x8, .LCPI17_1
480 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_1]
481 ; CHECK-NEXT: adrp x8, .LCPI17_2
482 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI17_2]
483 ; CHECK-NEXT: neg v1.4s, v1.4s
484 ; CHECK-NEXT: adrp x8, .LCPI17_3
485 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
486 ; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
487 ; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
488 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_3]
489 ; CHECK-NEXT: adrp x8, .LCPI17_4
490 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
491 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI17_4]
492 ; CHECK-NEXT: neg v3.4s, v3.4s
493 ; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
494 ; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b
495 ; CHECK-NEXT: mls v0.4s, v2.4s, v4.4s
496 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
497 ; CHECK-NEXT: movi v1.4s, #1
498 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
500 %urem = urem <4 x i32> %X, <i32 14, i32 4294967295, i32 1, i32 14>
501 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
502 %ret = zext <4 x i1> %cmp to <4 x i32>
506 ; One all-ones divisor and one one divisor in odd+even divisor
507 define <4 x i32> @test_urem_odd_even_allones_and_one(<4 x i32> %X) nounwind {
508 ; CHECK-LABEL: test_urem_odd_even_allones_and_one:
510 ; CHECK-NEXT: adrp x8, .LCPI18_0
511 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_0]
512 ; CHECK-NEXT: adrp x8, .LCPI18_1
513 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_1]
514 ; CHECK-NEXT: adrp x8, .LCPI18_2
515 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI18_2]
516 ; CHECK-NEXT: adrp x8, .LCPI18_3
517 ; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s
518 ; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
519 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
520 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI18_3]
521 ; CHECK-NEXT: neg v2.4s, v2.4s
522 ; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
523 ; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b
524 ; CHECK-NEXT: mls v0.4s, v3.4s, v4.4s
525 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
526 ; CHECK-NEXT: movi v1.4s, #1
527 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
529 %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 100>
530 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
531 %ret = zext <4 x i1> %cmp to <4 x i32>
535 ;------------------------------------------------------------------------------;
537 ; One power-of-two divisor divisor and one divisor in odd divisor
538 define <4 x i32> @test_urem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind {
539 ; CHECK-LABEL: test_urem_odd_poweroftwo_and_one:
541 ; CHECK-NEXT: adrp x8, .LCPI19_0
542 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI19_0]
543 ; CHECK-NEXT: adrp x8, .LCPI19_1
544 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI19_1]
545 ; CHECK-NEXT: adrp x8, .LCPI19_2
546 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI19_2]
547 ; CHECK-NEXT: adrp x8, .LCPI19_3
548 ; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s
549 ; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
550 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
551 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI19_3]
552 ; CHECK-NEXT: neg v2.4s, v2.4s
553 ; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
554 ; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b
555 ; CHECK-NEXT: mls v0.4s, v3.4s, v4.4s
556 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
557 ; CHECK-NEXT: movi v1.4s, #1
558 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
560 %urem = urem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 5>
561 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
562 %ret = zext <4 x i1> %cmp to <4 x i32>
566 ; One power-of-two divisor divisor and one divisor in even divisor
567 define <4 x i32> @test_urem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
568 ; CHECK-LABEL: test_urem_even_poweroftwo_and_one:
570 ; CHECK-NEXT: adrp x8, .LCPI20_0
571 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_0]
572 ; CHECK-NEXT: adrp x8, .LCPI20_1
573 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_1]
574 ; CHECK-NEXT: adrp x8, .LCPI20_2
575 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI20_2]
576 ; CHECK-NEXT: neg v1.4s, v1.4s
577 ; CHECK-NEXT: adrp x8, .LCPI20_3
578 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
579 ; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
580 ; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
581 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_3]
582 ; CHECK-NEXT: adrp x8, .LCPI20_4
583 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
584 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI20_4]
585 ; CHECK-NEXT: neg v3.4s, v3.4s
586 ; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
587 ; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b
588 ; CHECK-NEXT: mls v0.4s, v2.4s, v4.4s
589 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
590 ; CHECK-NEXT: movi v1.4s, #1
591 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
593 %urem = urem <4 x i32> %X, <i32 14, i32 16, i32 1, i32 14>
594 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
595 %ret = zext <4 x i1> %cmp to <4 x i32>
599 ; One power-of-two divisor divisor and one divisor in odd+even divisor
600 define <4 x i32> @test_urem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
601 ; CHECK-LABEL: test_urem_odd_even_poweroftwo_and_one:
603 ; CHECK-NEXT: adrp x8, .LCPI21_0
604 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_0]
605 ; CHECK-NEXT: adrp x8, .LCPI21_1
606 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI21_1]
607 ; CHECK-NEXT: adrp x8, .LCPI21_2
608 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI21_2]
609 ; CHECK-NEXT: adrp x8, .LCPI21_3
610 ; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s
611 ; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
612 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
613 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI21_3]
614 ; CHECK-NEXT: neg v2.4s, v2.4s
615 ; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
616 ; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b
617 ; CHECK-NEXT: mls v0.4s, v3.4s, v4.4s
618 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
619 ; CHECK-NEXT: movi v1.4s, #1
620 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
622 %urem = urem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 100>
623 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
624 %ret = zext <4 x i1> %cmp to <4 x i32>
628 ;------------------------------------------------------------------------------;
630 define <4 x i32> @test_urem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
631 ; CHECK-LABEL: test_urem_odd_allones_and_poweroftwo_and_one:
633 ; CHECK-NEXT: adrp x8, .LCPI22_0
634 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_0]
635 ; CHECK-NEXT: adrp x8, .LCPI22_1
636 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI22_1]
637 ; CHECK-NEXT: adrp x8, .LCPI22_2
638 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI22_2]
639 ; CHECK-NEXT: adrp x8, .LCPI22_3
640 ; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s
641 ; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
642 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
643 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI22_3]
644 ; CHECK-NEXT: neg v2.4s, v2.4s
645 ; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
646 ; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b
647 ; CHECK-NEXT: mls v0.4s, v3.4s, v4.4s
648 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
649 ; CHECK-NEXT: movi v1.4s, #1
650 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
652 %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 1>
653 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
654 %ret = zext <4 x i1> %cmp to <4 x i32>
658 define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
659 ; CHECK-LABEL: test_urem_even_allones_and_poweroftwo_and_one:
661 ; CHECK-NEXT: adrp x8, .LCPI23_0
662 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_0]
663 ; CHECK-NEXT: adrp x8, .LCPI23_1
664 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_1]
665 ; CHECK-NEXT: adrp x8, .LCPI23_2
666 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI23_2]
667 ; CHECK-NEXT: neg v1.4s, v1.4s
668 ; CHECK-NEXT: adrp x8, .LCPI23_3
669 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
670 ; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
671 ; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
672 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_3]
673 ; CHECK-NEXT: adrp x8, .LCPI23_4
674 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
675 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI23_4]
676 ; CHECK-NEXT: neg v3.4s, v3.4s
677 ; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
678 ; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b
679 ; CHECK-NEXT: mls v0.4s, v2.4s, v4.4s
680 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
681 ; CHECK-NEXT: movi v1.4s, #1
682 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
684 %urem = urem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 1>
685 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
686 %ret = zext <4 x i1> %cmp to <4 x i32>