1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
4 define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
5 ; CHECK-LABEL: fold_srem_vec_1:
7 ; CHECK-NEXT: mov w9, #63421
8 ; CHECK-NEXT: mov w12, #33437
9 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
10 ; CHECK-NEXT: smov w8, v0.h[1]
11 ; CHECK-NEXT: movk w9, #31710, lsl #16
12 ; CHECK-NEXT: smov w11, v0.h[2]
13 ; CHECK-NEXT: movk w12, #21399, lsl #16
14 ; CHECK-NEXT: smull x12, w11, w12
15 ; CHECK-NEXT: smull x9, w8, w9
16 ; CHECK-NEXT: lsr x13, x12, #63
17 ; CHECK-NEXT: asr x12, x12, #37
18 ; CHECK-NEXT: lsr x9, x9, #32
19 ; CHECK-NEXT: add w12, w12, w13
20 ; CHECK-NEXT: mov w13, #98
21 ; CHECK-NEXT: sub w9, w9, w8
22 ; CHECK-NEXT: msub w11, w12, w13, w11
23 ; CHECK-NEXT: asr w13, w9, #6
24 ; CHECK-NEXT: add w9, w13, w9, lsr #31
25 ; CHECK-NEXT: mov w13, #37253
26 ; CHECK-NEXT: mov w10, #-124
27 ; CHECK-NEXT: smov w12, v0.h[0]
28 ; CHECK-NEXT: movk w13, #44150, lsl #16
29 ; CHECK-NEXT: msub w8, w9, w10, w8
30 ; CHECK-NEXT: smull x10, w12, w13
31 ; CHECK-NEXT: lsr x10, x10, #32
32 ; CHECK-NEXT: add w10, w10, w12
33 ; CHECK-NEXT: asr w13, w10, #6
34 ; CHECK-NEXT: mov w9, #95
35 ; CHECK-NEXT: add w10, w13, w10, lsr #31
36 ; CHECK-NEXT: msub w9, w10, w9, w12
37 ; CHECK-NEXT: mov w10, #63249
38 ; CHECK-NEXT: smov w13, v0.h[3]
39 ; CHECK-NEXT: movk w10, #48808, lsl #16
40 ; CHECK-NEXT: smull x10, w13, w10
41 ; CHECK-NEXT: lsr x12, x10, #63
42 ; CHECK-NEXT: asr x10, x10, #40
43 ; CHECK-NEXT: fmov s0, w9
44 ; CHECK-NEXT: add w10, w10, w12
45 ; CHECK-NEXT: mov v0.h[1], w8
46 ; CHECK-NEXT: mov w8, #-1003
47 ; CHECK-NEXT: mov v0.h[2], w11
48 ; CHECK-NEXT: msub w8, w10, w8, w13
49 ; CHECK-NEXT: mov v0.h[3], w8
50 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
52 %1 = srem <4 x i16> %x, <i16 95, i16 -124, i16 98, i16 -1003>
56 define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
57 ; CHECK-LABEL: fold_srem_vec_2:
59 ; CHECK-NEXT: mov w9, #37253
60 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
61 ; CHECK-NEXT: smov w8, v0.h[1]
62 ; CHECK-NEXT: movk w9, #44150, lsl #16
63 ; CHECK-NEXT: smov w10, v0.h[0]
64 ; CHECK-NEXT: smull x13, w8, w9
65 ; CHECK-NEXT: smov w11, v0.h[2]
66 ; CHECK-NEXT: smull x14, w10, w9
67 ; CHECK-NEXT: lsr x13, x13, #32
68 ; CHECK-NEXT: smov w12, v0.h[3]
69 ; CHECK-NEXT: smull x15, w11, w9
70 ; CHECK-NEXT: lsr x14, x14, #32
71 ; CHECK-NEXT: add w13, w13, w8
72 ; CHECK-NEXT: smull x9, w12, w9
73 ; CHECK-NEXT: lsr x15, x15, #32
74 ; CHECK-NEXT: add w14, w14, w10
75 ; CHECK-NEXT: asr w16, w13, #6
76 ; CHECK-NEXT: lsr x9, x9, #32
77 ; CHECK-NEXT: add w15, w15, w11
78 ; CHECK-NEXT: add w13, w16, w13, lsr #31
79 ; CHECK-NEXT: asr w16, w14, #6
80 ; CHECK-NEXT: add w9, w9, w12
81 ; CHECK-NEXT: add w14, w16, w14, lsr #31
82 ; CHECK-NEXT: asr w16, w15, #6
83 ; CHECK-NEXT: add w15, w16, w15, lsr #31
84 ; CHECK-NEXT: asr w16, w9, #6
85 ; CHECK-NEXT: add w9, w16, w9, lsr #31
86 ; CHECK-NEXT: mov w16, #95
87 ; CHECK-NEXT: msub w10, w14, w16, w10
88 ; CHECK-NEXT: msub w8, w13, w16, w8
89 ; CHECK-NEXT: fmov s0, w10
90 ; CHECK-NEXT: msub w11, w15, w16, w11
91 ; CHECK-NEXT: mov v0.h[1], w8
92 ; CHECK-NEXT: mov v0.h[2], w11
93 ; CHECK-NEXT: msub w8, w9, w16, w12
94 ; CHECK-NEXT: mov v0.h[3], w8
95 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
97 %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
102 ; Don't fold if we can combine srem with sdiv.
103 define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
104 ; CHECK-LABEL: combine_srem_sdiv:
106 ; CHECK-NEXT: mov w8, #37253
107 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
108 ; CHECK-NEXT: movk w8, #44150, lsl #16
109 ; CHECK-NEXT: smov w9, v0.h[1]
110 ; CHECK-NEXT: smov w10, v0.h[0]
111 ; CHECK-NEXT: smull x13, w9, w8
112 ; CHECK-NEXT: smov w11, v0.h[2]
113 ; CHECK-NEXT: smull x14, w10, w8
114 ; CHECK-NEXT: lsr x13, x13, #32
115 ; CHECK-NEXT: smov w12, v0.h[3]
116 ; CHECK-NEXT: smull x15, w11, w8
117 ; CHECK-NEXT: lsr x14, x14, #32
118 ; CHECK-NEXT: add w13, w13, w9
119 ; CHECK-NEXT: smull x8, w12, w8
120 ; CHECK-NEXT: lsr x15, x15, #32
121 ; CHECK-NEXT: add w14, w14, w10
122 ; CHECK-NEXT: asr w16, w13, #6
123 ; CHECK-NEXT: lsr x8, x8, #32
124 ; CHECK-NEXT: add w15, w15, w11
125 ; CHECK-NEXT: add w13, w16, w13, lsr #31
126 ; CHECK-NEXT: asr w16, w14, #6
127 ; CHECK-NEXT: add w8, w8, w12
128 ; CHECK-NEXT: add w14, w16, w14, lsr #31
129 ; CHECK-NEXT: asr w16, w15, #6
130 ; CHECK-NEXT: add w15, w16, w15, lsr #31
131 ; CHECK-NEXT: asr w16, w8, #6
132 ; CHECK-NEXT: add w8, w16, w8, lsr #31
133 ; CHECK-NEXT: mov w16, #95
134 ; CHECK-NEXT: msub w10, w14, w16, w10
135 ; CHECK-NEXT: msub w9, w13, w16, w9
136 ; CHECK-NEXT: fmov s0, w14
137 ; CHECK-NEXT: fmov s1, w10
138 ; CHECK-NEXT: msub w11, w15, w16, w11
139 ; CHECK-NEXT: mov v0.h[1], w13
140 ; CHECK-NEXT: mov v1.h[1], w9
141 ; CHECK-NEXT: msub w12, w8, w16, w12
142 ; CHECK-NEXT: mov v0.h[2], w15
143 ; CHECK-NEXT: mov v1.h[2], w11
144 ; CHECK-NEXT: mov v1.h[3], w12
145 ; CHECK-NEXT: mov v0.h[3], w8
146 ; CHECK-NEXT: add v0.4h, v1.4h, v0.4h
148 %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
149 %2 = sdiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
150 %3 = add <4 x i16> %1, %2
154 ; Don't fold for divisors that are a power of two.
155 define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
156 ; CHECK-LABEL: dont_fold_srem_power_of_two:
158 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
159 ; CHECK-NEXT: smov w8, v0.h[1]
160 ; CHECK-NEXT: add w12, w8, #31 // =31
161 ; CHECK-NEXT: cmp w8, #0 // =0
162 ; CHECK-NEXT: mov w11, #37253
163 ; CHECK-NEXT: csel w12, w12, w8, lt
164 ; CHECK-NEXT: smov w9, v0.h[0]
165 ; CHECK-NEXT: smov w10, v0.h[3]
166 ; CHECK-NEXT: movk w11, #44150, lsl #16
167 ; CHECK-NEXT: and w12, w12, #0xffffffe0
168 ; CHECK-NEXT: sub w8, w8, w12
169 ; CHECK-NEXT: add w12, w9, #63 // =63
170 ; CHECK-NEXT: smull x11, w10, w11
171 ; CHECK-NEXT: cmp w9, #0 // =0
172 ; CHECK-NEXT: lsr x11, x11, #32
173 ; CHECK-NEXT: csel w12, w12, w9, lt
174 ; CHECK-NEXT: add w11, w11, w10
175 ; CHECK-NEXT: and w12, w12, #0xffffffc0
176 ; CHECK-NEXT: sub w9, w9, w12
177 ; CHECK-NEXT: asr w12, w11, #6
178 ; CHECK-NEXT: add w11, w12, w11, lsr #31
179 ; CHECK-NEXT: smov w12, v0.h[2]
180 ; CHECK-NEXT: fmov s0, w9
181 ; CHECK-NEXT: add w9, w12, #7 // =7
182 ; CHECK-NEXT: cmp w12, #0 // =0
183 ; CHECK-NEXT: csel w9, w9, w12, lt
184 ; CHECK-NEXT: and w9, w9, #0xfffffff8
185 ; CHECK-NEXT: sub w9, w12, w9
186 ; CHECK-NEXT: mov v0.h[1], w8
187 ; CHECK-NEXT: mov w8, #95
188 ; CHECK-NEXT: mov v0.h[2], w9
189 ; CHECK-NEXT: msub w8, w11, w8, w10
190 ; CHECK-NEXT: mov v0.h[3], w8
191 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
193 %1 = srem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
197 ; Don't fold if the divisor is one.
198 define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
199 ; CHECK-LABEL: dont_fold_srem_one:
201 ; CHECK-NEXT: mov w9, #17097
202 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
203 ; CHECK-NEXT: smov w8, v0.h[2]
204 ; CHECK-NEXT: movk w9, #45590, lsl #16
205 ; CHECK-NEXT: smull x9, w8, w9
206 ; CHECK-NEXT: lsr x9, x9, #32
207 ; CHECK-NEXT: add w9, w9, w8
208 ; CHECK-NEXT: asr w12, w9, #4
209 ; CHECK-NEXT: add w9, w12, w9, lsr #31
210 ; CHECK-NEXT: mov w12, #30865
211 ; CHECK-NEXT: mov w10, #23
212 ; CHECK-NEXT: smov w11, v0.h[1]
213 ; CHECK-NEXT: movk w12, #51306, lsl #16
214 ; CHECK-NEXT: msub w8, w9, w10, w8
215 ; CHECK-NEXT: smull x10, w11, w12
216 ; CHECK-NEXT: lsr x10, x10, #32
217 ; CHECK-NEXT: add w10, w10, w11
218 ; CHECK-NEXT: asr w12, w10, #9
219 ; CHECK-NEXT: mov w9, #654
220 ; CHECK-NEXT: add w10, w12, w10, lsr #31
221 ; CHECK-NEXT: msub w9, w10, w9, w11
222 ; CHECK-NEXT: mov w10, #47143
223 ; CHECK-NEXT: smov w12, v0.h[3]
224 ; CHECK-NEXT: movk w10, #24749, lsl #16
225 ; CHECK-NEXT: smull x10, w12, w10
226 ; CHECK-NEXT: lsr x11, x10, #63
227 ; CHECK-NEXT: asr x10, x10, #43
228 ; CHECK-NEXT: movi d0, #0000000000000000
229 ; CHECK-NEXT: add w10, w10, w11
230 ; CHECK-NEXT: mov v0.h[1], w9
231 ; CHECK-NEXT: mov w9, #5423
232 ; CHECK-NEXT: mov v0.h[2], w8
233 ; CHECK-NEXT: msub w8, w10, w9, w12
234 ; CHECK-NEXT: mov v0.h[3], w8
235 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
237 %1 = srem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
241 ; Don't fold if the divisor is 2^15.
242 define <4 x i16> @dont_fold_srem_i16_smax(<4 x i16> %x) {
243 ; CHECK-LABEL: dont_fold_srem_i16_smax:
245 ; CHECK-NEXT: mov w10, #17097
246 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
247 ; CHECK-NEXT: smov w9, v0.h[2]
248 ; CHECK-NEXT: movk w10, #45590, lsl #16
249 ; CHECK-NEXT: smull x10, w9, w10
250 ; CHECK-NEXT: lsr x10, x10, #32
251 ; CHECK-NEXT: add w10, w10, w9
252 ; CHECK-NEXT: asr w12, w10, #4
253 ; CHECK-NEXT: mov w11, #23
254 ; CHECK-NEXT: add w10, w12, w10, lsr #31
255 ; CHECK-NEXT: msub w9, w10, w11, w9
256 ; CHECK-NEXT: mov w10, #47143
257 ; CHECK-NEXT: smov w12, v0.h[3]
258 ; CHECK-NEXT: movk w10, #24749, lsl #16
259 ; CHECK-NEXT: smull x10, w12, w10
260 ; CHECK-NEXT: lsr x11, x10, #63
261 ; CHECK-NEXT: asr x10, x10, #43
262 ; CHECK-NEXT: smov w8, v0.h[1]
263 ; CHECK-NEXT: add w10, w10, w11
264 ; CHECK-NEXT: mov w11, #32767
265 ; CHECK-NEXT: add w11, w8, w11
266 ; CHECK-NEXT: cmp w8, #0 // =0
267 ; CHECK-NEXT: csel w11, w11, w8, lt
268 ; CHECK-NEXT: and w11, w11, #0xffff8000
269 ; CHECK-NEXT: sub w8, w8, w11
270 ; CHECK-NEXT: movi d0, #0000000000000000
271 ; CHECK-NEXT: mov v0.h[1], w8
272 ; CHECK-NEXT: mov w8, #5423
273 ; CHECK-NEXT: mov v0.h[2], w9
274 ; CHECK-NEXT: msub w8, w10, w8, w12
275 ; CHECK-NEXT: mov v0.h[3], w8
276 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
278 %1 = srem <4 x i16> %x, <i16 1, i16 32768, i16 23, i16 5423>
282 ; Don't fold i64 srem.
283 define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) {
284 ; CHECK-LABEL: dont_fold_srem_i64:
286 ; CHECK-NEXT: mov x9, #6055
287 ; CHECK-NEXT: movk x9, #58853, lsl #16
288 ; CHECK-NEXT: movk x9, #47142, lsl #32
289 ; CHECK-NEXT: mov x8, v1.d[1]
290 ; CHECK-NEXT: movk x9, #24749, lsl #48
291 ; CHECK-NEXT: smulh x9, x8, x9
292 ; CHECK-NEXT: asr x12, x9, #11
293 ; CHECK-NEXT: mov w10, #5423
294 ; CHECK-NEXT: add x9, x12, x9, lsr #63
295 ; CHECK-NEXT: msub x8, x9, x10, x8
296 ; CHECK-NEXT: mov x9, #21445
297 ; CHECK-NEXT: movk x9, #1603, lsl #16
298 ; CHECK-NEXT: movk x9, #15432, lsl #32
299 ; CHECK-NEXT: mov x12, v0.d[1]
300 ; CHECK-NEXT: movk x9, #25653, lsl #48
301 ; CHECK-NEXT: smulh x9, x12, x9
302 ; CHECK-NEXT: asr x10, x9, #8
303 ; CHECK-NEXT: add x9, x10, x9, lsr #63
304 ; CHECK-NEXT: mov w10, #654
305 ; CHECK-NEXT: msub x9, x9, x10, x12
306 ; CHECK-NEXT: mov x10, #8549
307 ; CHECK-NEXT: movk x10, #22795, lsl #16
308 ; CHECK-NEXT: movk x10, #17096, lsl #32
309 ; CHECK-NEXT: fmov x11, d1
310 ; CHECK-NEXT: movk x10, #45590, lsl #48
311 ; CHECK-NEXT: smulh x10, x11, x10
312 ; CHECK-NEXT: add x10, x10, x11
313 ; CHECK-NEXT: asr x12, x10, #4
314 ; CHECK-NEXT: add x10, x12, x10, lsr #63
315 ; CHECK-NEXT: mov w12, #23
316 ; CHECK-NEXT: msub x10, x10, x12, x11
317 ; CHECK-NEXT: movi v0.2d, #0000000000000000
318 ; CHECK-NEXT: fmov d1, x10
319 ; CHECK-NEXT: mov v1.d[1], x8
320 ; CHECK-NEXT: mov v0.d[1], x9
322 %1 = srem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423>