1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
4 ;------------------------------------------------------------------------------;
6 ;------------------------------------------------------------------------------;
8 define i32 @test_srem_odd(i32 %X) nounwind {
9 ; CHECK-LABEL: test_srem_odd:
11 ; CHECK-NEXT: mov w8, #26215
12 ; CHECK-NEXT: movk w8, #26214, lsl #16
13 ; CHECK-NEXT: smull x8, w0, w8
14 ; CHECK-NEXT: lsr x9, x8, #63
15 ; CHECK-NEXT: asr x8, x8, #33
16 ; CHECK-NEXT: add w8, w8, w9
17 ; CHECK-NEXT: add w8, w8, w8, lsl #2
18 ; CHECK-NEXT: cmp w0, w8
19 ; CHECK-NEXT: cset w0, eq
21 %srem = srem i32 %X, 5
22 %cmp = icmp eq i32 %srem, 0
23 %ret = zext i1 %cmp to i32
27 define i32 @test_srem_odd_25(i32 %X) nounwind {
28 ; CHECK-LABEL: test_srem_odd_25:
30 ; CHECK-NEXT: mov w8, #34079
31 ; CHECK-NEXT: movk w8, #20971, lsl #16
32 ; CHECK-NEXT: smull x8, w0, w8
33 ; CHECK-NEXT: lsr x9, x8, #63
34 ; CHECK-NEXT: asr x8, x8, #35
35 ; CHECK-NEXT: add w8, w8, w9
36 ; CHECK-NEXT: mov w9, #25
37 ; CHECK-NEXT: msub w8, w8, w9, w0
38 ; CHECK-NEXT: cmp w8, #0 // =0
39 ; CHECK-NEXT: cset w0, eq
41 %srem = srem i32 %X, 25
42 %cmp = icmp eq i32 %srem, 0
43 %ret = zext i1 %cmp to i32
47 ; This is like test_srem_odd, except the divisor has bit 30 set.
48 define i32 @test_srem_odd_bit30(i32 %X) nounwind {
49 ; CHECK-LABEL: test_srem_odd_bit30:
51 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
52 ; CHECK-NEXT: sxtw x8, w0
53 ; CHECK-NEXT: sbfiz x9, x0, #29, #32
54 ; CHECK-NEXT: sub x8, x9, x8
55 ; CHECK-NEXT: lsr x9, x8, #63
56 ; CHECK-NEXT: asr x8, x8, #59
57 ; CHECK-NEXT: add w8, w8, w9
58 ; CHECK-NEXT: mov w9, #3
59 ; CHECK-NEXT: movk w9, #16384, lsl #16
60 ; CHECK-NEXT: msub w8, w8, w9, w0
61 ; CHECK-NEXT: cmp w8, #0 // =0
62 ; CHECK-NEXT: cset w0, eq
64 %srem = srem i32 %X, 1073741827
65 %cmp = icmp eq i32 %srem, 0
66 %ret = zext i1 %cmp to i32
70 ; This is like test_srem_odd, except the divisor has bit 31 set.
71 define i32 @test_srem_odd_bit31(i32 %X) nounwind {
72 ; CHECK-LABEL: test_srem_odd_bit31:
74 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
75 ; CHECK-NEXT: sxtw x8, w0
76 ; CHECK-NEXT: add x8, x8, x8, lsl #29
77 ; CHECK-NEXT: neg x8, x8
78 ; CHECK-NEXT: lsr x9, x8, #63
79 ; CHECK-NEXT: asr x8, x8, #60
80 ; CHECK-NEXT: add w8, w8, w9
81 ; CHECK-NEXT: mov w9, #-2147483645
82 ; CHECK-NEXT: msub w8, w8, w9, w0
83 ; CHECK-NEXT: cmp w8, #0 // =0
84 ; CHECK-NEXT: cset w0, eq
86 %srem = srem i32 %X, 2147483651
87 %cmp = icmp eq i32 %srem, 0
88 %ret = zext i1 %cmp to i32
92 ;------------------------------------------------------------------------------;
94 ;------------------------------------------------------------------------------;
96 define i16 @test_srem_even(i16 %X) nounwind {
97 ; CHECK-LABEL: test_srem_even:
99 ; CHECK-NEXT: mov w9, #9363
100 ; CHECK-NEXT: sxth w8, w0
101 ; CHECK-NEXT: movk w9, #37449, lsl #16
102 ; CHECK-NEXT: smull x9, w8, w9
103 ; CHECK-NEXT: lsr x9, x9, #32
104 ; CHECK-NEXT: add w8, w9, w8
105 ; CHECK-NEXT: asr w9, w8, #3
106 ; CHECK-NEXT: add w8, w9, w8, lsr #31
107 ; CHECK-NEXT: mov w9, #14
108 ; CHECK-NEXT: msub w8, w8, w9, w0
109 ; CHECK-NEXT: tst w8, #0xffff
110 ; CHECK-NEXT: cset w0, ne
112 %srem = srem i16 %X, 14
113 %cmp = icmp ne i16 %srem, 0
114 %ret = zext i1 %cmp to i16
118 define i32 @test_srem_even_100(i32 %X) nounwind {
119 ; CHECK-LABEL: test_srem_even_100:
121 ; CHECK-NEXT: mov w8, #34079
122 ; CHECK-NEXT: movk w8, #20971, lsl #16
123 ; CHECK-NEXT: smull x8, w0, w8
124 ; CHECK-NEXT: lsr x9, x8, #63
125 ; CHECK-NEXT: asr x8, x8, #37
126 ; CHECK-NEXT: add w8, w8, w9
127 ; CHECK-NEXT: mov w9, #100
128 ; CHECK-NEXT: msub w8, w8, w9, w0
129 ; CHECK-NEXT: cmp w8, #0 // =0
130 ; CHECK-NEXT: cset w0, eq
132 %srem = srem i32 %X, 100
133 %cmp = icmp eq i32 %srem, 0
134 %ret = zext i1 %cmp to i32
138 ; This is like test_srem_even, except the divisor has bit 30 set.
139 define i32 @test_srem_even_bit30(i32 %X) nounwind {
140 ; CHECK-LABEL: test_srem_even_bit30:
142 ; CHECK-NEXT: mov w8, #65433
143 ; CHECK-NEXT: movk w8, #16383, lsl #16
144 ; CHECK-NEXT: smull x8, w0, w8
145 ; CHECK-NEXT: lsr x9, x8, #63
146 ; CHECK-NEXT: asr x8, x8, #60
147 ; CHECK-NEXT: add w8, w8, w9
148 ; CHECK-NEXT: mov w9, #104
149 ; CHECK-NEXT: movk w9, #16384, lsl #16
150 ; CHECK-NEXT: msub w8, w8, w9, w0
151 ; CHECK-NEXT: cmp w8, #0 // =0
152 ; CHECK-NEXT: cset w0, eq
154 %srem = srem i32 %X, 1073741928
155 %cmp = icmp eq i32 %srem, 0
156 %ret = zext i1 %cmp to i32
160 ; This is like test_srem_odd, except the divisor has bit 31 set.
161 define i32 @test_srem_even_bit31(i32 %X) nounwind {
162 ; CHECK-LABEL: test_srem_even_bit31:
164 ; CHECK-NEXT: mov w8, #65433
165 ; CHECK-NEXT: movk w8, #32767, lsl #16
166 ; CHECK-NEXT: smull x8, w0, w8
167 ; CHECK-NEXT: lsr x8, x8, #32
168 ; CHECK-NEXT: sub w8, w8, w0
169 ; CHECK-NEXT: asr w9, w8, #30
170 ; CHECK-NEXT: add w8, w9, w8, lsr #31
171 ; CHECK-NEXT: mov w9, #102
172 ; CHECK-NEXT: movk w9, #32768, lsl #16
173 ; CHECK-NEXT: msub w8, w8, w9, w0
174 ; CHECK-NEXT: cmp w8, #0 // =0
175 ; CHECK-NEXT: cset w0, eq
177 %srem = srem i32 %X, 2147483750
178 %cmp = icmp eq i32 %srem, 0
179 %ret = zext i1 %cmp to i32
183 ;------------------------------------------------------------------------------;
185 ;------------------------------------------------------------------------------;
187 ; 'NE' predicate is fine too.
188 define i32 @test_srem_odd_setne(i32 %X) nounwind {
189 ; CHECK-LABEL: test_srem_odd_setne:
191 ; CHECK-NEXT: mov w8, #26215
192 ; CHECK-NEXT: movk w8, #26214, lsl #16
193 ; CHECK-NEXT: smull x8, w0, w8
194 ; CHECK-NEXT: lsr x9, x8, #63
195 ; CHECK-NEXT: asr x8, x8, #33
196 ; CHECK-NEXT: add w8, w8, w9
197 ; CHECK-NEXT: add w8, w8, w8, lsl #2
198 ; CHECK-NEXT: cmp w0, w8
199 ; CHECK-NEXT: cset w0, ne
201 %srem = srem i32 %X, 5
202 %cmp = icmp ne i32 %srem, 0
203 %ret = zext i1 %cmp to i32
207 ; The fold is only valid for positive divisors, negative-ones should be negated.
208 define i32 @test_srem_negative_odd(i32 %X) nounwind {
209 ; CHECK-LABEL: test_srem_negative_odd:
211 ; CHECK-NEXT: mov w8, #-1717986919
212 ; CHECK-NEXT: smull x8, w0, w8
213 ; CHECK-NEXT: lsr x9, x8, #63
214 ; CHECK-NEXT: asr x8, x8, #33
215 ; CHECK-NEXT: add w8, w8, w9
216 ; CHECK-NEXT: add w8, w8, w8, lsl #2
217 ; CHECK-NEXT: cmn w0, w8
218 ; CHECK-NEXT: cset w0, ne
220 %srem = srem i32 %X, -5
221 %cmp = icmp ne i32 %srem, 0
222 %ret = zext i1 %cmp to i32
225 define i32 @test_srem_negative_even(i32 %X) nounwind {
226 ; CHECK-LABEL: test_srem_negative_even:
228 ; CHECK-NEXT: mov w8, #56173
229 ; CHECK-NEXT: movk w8, #28086, lsl #16
230 ; CHECK-NEXT: smull x8, w0, w8
231 ; CHECK-NEXT: lsr x8, x8, #32
232 ; CHECK-NEXT: sub w8, w8, w0
233 ; CHECK-NEXT: asr w9, w8, #3
234 ; CHECK-NEXT: add w8, w9, w8, lsr #31
235 ; CHECK-NEXT: mov w9, #-14
236 ; CHECK-NEXT: msub w8, w8, w9, w0
237 ; CHECK-NEXT: cmp w8, #0 // =0
238 ; CHECK-NEXT: cset w0, ne
240 %srem = srem i32 %X, -14
241 %cmp = icmp ne i32 %srem, 0
242 %ret = zext i1 %cmp to i32
246 ;------------------------------------------------------------------------------;
248 ;------------------------------------------------------------------------------;
250 ; We can lower remainder of division by one much better elsewhere.
251 define i32 @test_srem_one(i32 %X) nounwind {
252 ; CHECK-LABEL: test_srem_one:
254 ; CHECK-NEXT: mov w0, #1
256 %srem = srem i32 %X, 1
257 %cmp = icmp eq i32 %srem, 0
258 %ret = zext i1 %cmp to i32
262 ; We can lower remainder of division by powers of two much better elsewhere.
263 define i32 @test_srem_pow2(i32 %X) nounwind {
264 ; CHECK-LABEL: test_srem_pow2:
266 ; CHECK-NEXT: add w8, w0, #15 // =15
267 ; CHECK-NEXT: cmp w0, #0 // =0
268 ; CHECK-NEXT: csel w8, w8, w0, lt
269 ; CHECK-NEXT: and w8, w8, #0xfffffff0
270 ; CHECK-NEXT: cmp w0, w8
271 ; CHECK-NEXT: cset w0, eq
273 %srem = srem i32 %X, 16
274 %cmp = icmp eq i32 %srem, 0
275 %ret = zext i1 %cmp to i32
279 ; The fold is only valid for positive divisors, and we can't negate INT_MIN.
280 define i32 @test_srem_int_min(i32 %X) nounwind {
281 ; CHECK-LABEL: test_srem_int_min:
283 ; CHECK-NEXT: mov w8, #2147483647
284 ; CHECK-NEXT: add w8, w0, w8
285 ; CHECK-NEXT: cmp w0, #0 // =0
286 ; CHECK-NEXT: csel w8, w8, w0, lt
287 ; CHECK-NEXT: and w8, w8, #0x80000000
288 ; CHECK-NEXT: cmn w0, w8
289 ; CHECK-NEXT: cset w0, eq
291 %srem = srem i32 %X, 2147483648
292 %cmp = icmp eq i32 %srem, 0
293 %ret = zext i1 %cmp to i32
297 ; We can lower remainder of division by all-ones much better elsewhere.
298 define i32 @test_srem_allones(i32 %X) nounwind {
299 ; CHECK-LABEL: test_srem_allones:
301 ; CHECK-NEXT: cmp w0, #0 // =0
302 ; CHECK-NEXT: csel w8, w0, w0, lt
303 ; CHECK-NEXT: cmp w0, w8
304 ; CHECK-NEXT: cset w0, eq
306 %srem = srem i32 %X, 4294967295
307 %cmp = icmp eq i32 %srem, 0
308 %ret = zext i1 %cmp to i32