1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefixes=SDAG
3 ; RUN: llc < %s -mtriple=aarch64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=GISEL
5 ; These tests are taken from the combine-udiv.ll in X86.
6 define <8 x i16> @combine_vec_udiv_uniform(<8 x i16> %x) {
7 ; SDAG-LABEL: combine_vec_udiv_uniform:
9 ; SDAG-NEXT: mov w8, #25645 // =0x642d
10 ; SDAG-NEXT: dup v1.8h, w8
11 ; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h
12 ; SDAG-NEXT: umull v1.4s, v0.4h, v1.4h
13 ; SDAG-NEXT: uzp2 v1.8h, v1.8h, v2.8h
14 ; SDAG-NEXT: sub v0.8h, v0.8h, v1.8h
15 ; SDAG-NEXT: usra v1.8h, v0.8h, #1
16 ; SDAG-NEXT: ushr v0.8h, v1.8h, #4
19 ; GISEL-LABEL: combine_vec_udiv_uniform:
21 ; GISEL-NEXT: adrp x8, .LCPI0_0
22 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
23 ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
24 ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
25 ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
26 ; GISEL-NEXT: sub v0.8h, v0.8h, v1.8h
27 ; GISEL-NEXT: usra v1.8h, v0.8h, #1
28 ; GISEL-NEXT: ushr v0.8h, v1.8h, #4
30 %1 = udiv <8 x i16> %x, <i16 23, i16 23, i16 23, i16 23, i16 23, i16 23, i16 23, i16 23>
34 define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
35 ; SDAG-LABEL: combine_vec_udiv_nonuniform:
37 ; SDAG-NEXT: adrp x8, .LCPI1_0
38 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
39 ; SDAG-NEXT: adrp x8, .LCPI1_1
40 ; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI1_1]
41 ; SDAG-NEXT: adrp x8, .LCPI1_2
42 ; SDAG-NEXT: ushl v1.8h, v0.8h, v1.8h
43 ; SDAG-NEXT: umull2 v3.4s, v1.8h, v2.8h
44 ; SDAG-NEXT: umull v1.4s, v1.4h, v2.4h
45 ; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI1_2]
46 ; SDAG-NEXT: adrp x8, .LCPI1_3
47 ; SDAG-NEXT: uzp2 v1.8h, v1.8h, v3.8h
48 ; SDAG-NEXT: sub v0.8h, v0.8h, v1.8h
49 ; SDAG-NEXT: umull2 v3.4s, v0.8h, v2.8h
50 ; SDAG-NEXT: umull v0.4s, v0.4h, v2.4h
51 ; SDAG-NEXT: uzp2 v0.8h, v0.8h, v3.8h
52 ; SDAG-NEXT: add v0.8h, v0.8h, v1.8h
53 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI1_3]
54 ; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h
57 ; GISEL-LABEL: combine_vec_udiv_nonuniform:
59 ; GISEL-NEXT: adrp x8, .LCPI1_3
60 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI1_3]
61 ; GISEL-NEXT: adrp x8, .LCPI1_2
62 ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_2]
63 ; GISEL-NEXT: adrp x8, .LCPI1_1
64 ; GISEL-NEXT: neg v1.8h, v1.8h
65 ; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h
66 ; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h
67 ; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h
68 ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_1]
69 ; GISEL-NEXT: adrp x8, .LCPI1_0
70 ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h
71 ; GISEL-NEXT: sub v0.8h, v0.8h, v1.8h
72 ; GISEL-NEXT: umull2 v3.4s, v0.8h, v2.8h
73 ; GISEL-NEXT: umull v0.4s, v0.4h, v2.4h
74 ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_0]
75 ; GISEL-NEXT: uzp2 v0.8h, v0.8h, v3.8h
76 ; GISEL-NEXT: add v0.8h, v0.8h, v1.8h
77 ; GISEL-NEXT: neg v1.8h, v2.8h
78 ; GISEL-NEXT: ushl v0.8h, v0.8h, v1.8h
80 %1 = udiv <8 x i16> %x, <i16 23, i16 34, i16 -23, i16 56, i16 128, i16 -1, i16 -256, i16 -32768>
84 define <8 x i16> @combine_vec_udiv_nonuniform2(<8 x i16> %x) {
85 ; SDAG-LABEL: combine_vec_udiv_nonuniform2:
87 ; SDAG-NEXT: adrp x8, .LCPI2_0
88 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
89 ; SDAG-NEXT: adrp x8, .LCPI2_1
90 ; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h
91 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_1]
92 ; SDAG-NEXT: adrp x8, .LCPI2_2
93 ; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h
94 ; SDAG-NEXT: umull v0.4s, v0.4h, v1.4h
95 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_2]
96 ; SDAG-NEXT: uzp2 v0.8h, v0.8h, v2.8h
97 ; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h
100 ; GISEL-LABEL: combine_vec_udiv_nonuniform2:
102 ; GISEL-NEXT: adrp x8, .LCPI2_2
103 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI2_2]
104 ; GISEL-NEXT: adrp x8, .LCPI2_1
105 ; GISEL-NEXT: neg v1.8h, v1.8h
106 ; GISEL-NEXT: ushl v0.8h, v0.8h, v1.8h
107 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI2_1]
108 ; GISEL-NEXT: adrp x8, .LCPI2_0
109 ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
110 ; GISEL-NEXT: umull v0.4s, v0.4h, v1.4h
111 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
112 ; GISEL-NEXT: neg v1.8h, v1.8h
113 ; GISEL-NEXT: uzp2 v0.8h, v0.8h, v2.8h
114 ; GISEL-NEXT: ushl v0.8h, v0.8h, v1.8h
116 %1 = udiv <8 x i16> %x, <i16 -34, i16 35, i16 36, i16 -37, i16 38, i16 -39, i16 40, i16 -41>
120 define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) {
121 ; SDAG-LABEL: combine_vec_udiv_nonuniform3:
123 ; SDAG-NEXT: adrp x8, .LCPI3_0
124 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
125 ; SDAG-NEXT: adrp x8, .LCPI3_1
126 ; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h
127 ; SDAG-NEXT: umull v1.4s, v0.4h, v1.4h
128 ; SDAG-NEXT: uzp2 v1.8h, v1.8h, v2.8h
129 ; SDAG-NEXT: sub v0.8h, v0.8h, v1.8h
130 ; SDAG-NEXT: usra v1.8h, v0.8h, #1
131 ; SDAG-NEXT: ldr q0, [x8, :lo12:.LCPI3_1]
132 ; SDAG-NEXT: ushl v0.8h, v1.8h, v0.8h
135 ; GISEL-LABEL: combine_vec_udiv_nonuniform3:
137 ; GISEL-NEXT: adrp x8, .LCPI3_1
138 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI3_1]
139 ; GISEL-NEXT: adrp x8, .LCPI3_0
140 ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
141 ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
142 ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
143 ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_0]
144 ; GISEL-NEXT: sub v0.8h, v0.8h, v1.8h
145 ; GISEL-NEXT: usra v1.8h, v0.8h, #1
146 ; GISEL-NEXT: neg v0.8h, v2.8h
147 ; GISEL-NEXT: ushl v0.8h, v1.8h, v0.8h
149 %1 = udiv <8 x i16> %x, <i16 7, i16 23, i16 25, i16 27, i16 31, i16 47, i16 63, i16 127>
153 define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
154 ; SDAG-LABEL: combine_vec_udiv_nonuniform4:
156 ; SDAG-NEXT: movi v1.16b, #171
157 ; SDAG-NEXT: adrp x8, .LCPI4_0
158 ; SDAG-NEXT: adrp x9, .LCPI4_1
159 ; SDAG-NEXT: ldr q3, [x9, :lo12:.LCPI4_1]
160 ; SDAG-NEXT: umull2 v2.8h, v0.16b, v1.16b
161 ; SDAG-NEXT: umull v1.8h, v0.8b, v1.8b
162 ; SDAG-NEXT: and v0.16b, v0.16b, v3.16b
163 ; SDAG-NEXT: uzp2 v1.16b, v1.16b, v2.16b
164 ; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
165 ; SDAG-NEXT: ushr v1.16b, v1.16b, #7
166 ; SDAG-NEXT: and v1.16b, v1.16b, v2.16b
167 ; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b
170 ; GISEL-LABEL: combine_vec_udiv_nonuniform4:
172 ; GISEL-NEXT: adrp x8, .LCPI4_2
173 ; GISEL-NEXT: adrp x9, .LCPI4_0
174 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_2]
175 ; GISEL-NEXT: adrp x8, .LCPI4_1
176 ; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_0]
177 ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_1]
178 ; GISEL-NEXT: umull2 v2.8h, v0.16b, v1.16b
179 ; GISEL-NEXT: umull v1.8h, v0.8b, v1.8b
180 ; GISEL-NEXT: uzp2 v1.16b, v1.16b, v2.16b
181 ; GISEL-NEXT: neg v2.16b, v3.16b
182 ; GISEL-NEXT: shl v3.16b, v4.16b, #7
183 ; GISEL-NEXT: ushl v1.16b, v1.16b, v2.16b
184 ; GISEL-NEXT: sshr v2.16b, v3.16b, #7
185 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
187 %div = udiv <16 x i8> %x, <i8 -64, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
191 define <8 x i16> @pr38477(<8 x i16> %a0) {
192 ; SDAG-LABEL: pr38477:
194 ; SDAG-NEXT: adrp x8, .LCPI5_0
195 ; SDAG-NEXT: adrp x9, .LCPI5_4
196 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI5_0]
197 ; SDAG-NEXT: adrp x8, .LCPI5_1
198 ; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI5_1]
199 ; SDAG-NEXT: adrp x8, .LCPI5_2
200 ; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h
201 ; SDAG-NEXT: umull v1.4s, v0.4h, v1.4h
202 ; SDAG-NEXT: uzp2 v1.8h, v1.8h, v2.8h
203 ; SDAG-NEXT: sub v2.8h, v0.8h, v1.8h
204 ; SDAG-NEXT: umull2 v4.4s, v2.8h, v3.8h
205 ; SDAG-NEXT: umull v2.4s, v2.4h, v3.4h
206 ; SDAG-NEXT: ldr q3, [x9, :lo12:.LCPI5_4]
207 ; SDAG-NEXT: and v0.16b, v0.16b, v3.16b
208 ; SDAG-NEXT: uzp2 v2.8h, v2.8h, v4.8h
209 ; SDAG-NEXT: add v1.8h, v2.8h, v1.8h
210 ; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_2]
211 ; SDAG-NEXT: adrp x8, .LCPI5_3
212 ; SDAG-NEXT: ushl v1.8h, v1.8h, v2.8h
213 ; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_3]
214 ; SDAG-NEXT: and v1.16b, v1.16b, v2.16b
215 ; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b
218 ; GISEL-LABEL: pr38477:
220 ; GISEL-NEXT: adrp x8, .LCPI5_3
221 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI5_3]
222 ; GISEL-NEXT: adrp x8, .LCPI5_2
223 ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_2]
224 ; GISEL-NEXT: adrp x8, .LCPI5_0
225 ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
226 ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
227 ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
228 ; GISEL-NEXT: sub v2.8h, v0.8h, v1.8h
229 ; GISEL-NEXT: umull2 v4.4s, v2.8h, v3.8h
230 ; GISEL-NEXT: umull v2.4s, v2.4h, v3.4h
231 ; GISEL-NEXT: ldr d3, [x8, :lo12:.LCPI5_0]
232 ; GISEL-NEXT: adrp x8, .LCPI5_1
233 ; GISEL-NEXT: ushll v3.8h, v3.8b, #0
234 ; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h
235 ; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI5_1]
236 ; GISEL-NEXT: shl v3.8h, v3.8h, #15
237 ; GISEL-NEXT: add v1.8h, v2.8h, v1.8h
238 ; GISEL-NEXT: neg v2.8h, v4.8h
239 ; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
240 ; GISEL-NEXT: sshr v2.8h, v3.8h, #15
241 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
243 %1 = udiv <8 x i16> %a0, <i16 1, i16 119, i16 73, i16 -111, i16 -3, i16 118, i16 32, i16 31>
247 define i32 @udiv_div_by_180(i32 %x)
248 ; SDAG-LABEL: udiv_div_by_180:
250 ; SDAG-NEXT: mov w8, #5826 // =0x16c2
251 ; SDAG-NEXT: and w9, w0, #0xff
252 ; SDAG-NEXT: movk w8, #364, lsl #16
253 ; SDAG-NEXT: umull x8, w9, w8
254 ; SDAG-NEXT: lsr x0, x8, #32
255 ; SDAG-NEXT: // kill: def $w0 killed $w0 killed $x0
258 ; GISEL-LABEL: udiv_div_by_180:
260 ; GISEL-NEXT: uxtb w8, w0
261 ; GISEL-NEXT: mov w9, #5826 // =0x16c2
262 ; GISEL-NEXT: movk w9, #364, lsl #16
263 ; GISEL-NEXT: umull x8, w8, w9
264 ; GISEL-NEXT: lsr x0, x8, #32
265 ; GISEL-NEXT: // kill: def $w0 killed $w0 killed $x0
268 %truncate = and i32 %x, 255
269 %udiv = udiv i32 %truncate, 180
273 define i32 @udiv_div_by_180_exact(i32 %x)
274 ; SDAG-LABEL: udiv_div_by_180_exact:
276 ; SDAG-NEXT: lsr w8, w0, #2
277 ; SDAG-NEXT: mov w9, #20389 // =0x4fa5
278 ; SDAG-NEXT: movk w9, #42234, lsl #16
279 ; SDAG-NEXT: mul w0, w8, w9
282 ; GISEL-LABEL: udiv_div_by_180_exact:
284 ; GISEL-NEXT: lsr w8, w0, #2
285 ; GISEL-NEXT: mov w9, #20389 // =0x4fa5
286 ; GISEL-NEXT: movk w9, #42234, lsl #16
287 ; GISEL-NEXT: mul w0, w8, w9
290 %udiv = udiv exact i32 %x, 180
294 define <4 x i32> @udiv_div_by_104_exact(<4 x i32> %x)
295 ; SDAG-LABEL: udiv_div_by_104_exact:
297 ; SDAG-NEXT: adrp x8, .LCPI8_0
298 ; SDAG-NEXT: ushr v0.4s, v0.4s, #3
299 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI8_0]
300 ; SDAG-NEXT: mul v0.4s, v0.4s, v1.4s
303 ; GISEL-LABEL: udiv_div_by_104_exact:
305 ; GISEL-NEXT: adrp x8, .LCPI8_0
306 ; GISEL-NEXT: ushr v0.4s, v0.4s, #3
307 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI8_0]
308 ; GISEL-NEXT: mul v0.4s, v0.4s, v1.4s
311 %udiv = udiv exact <4 x i32> %x, <i32 104, i32 72, i32 104, i32 72>