1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefixes=SDAG
3 ; RUN: llc < %s -mtriple=aarch64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=GISEL
5 ; These tests are taken from the combine-udiv.ll in X86.
6 define <8 x i16> @combine_vec_udiv_uniform(<8 x i16> %x) {
7 ; SDAG-LABEL: combine_vec_udiv_uniform:
9 ; SDAG-NEXT: mov w8, #25645 // =0x642d
10 ; SDAG-NEXT: dup v1.8h, w8
11 ; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h
12 ; SDAG-NEXT: umull v1.4s, v0.4h, v1.4h
13 ; SDAG-NEXT: uzp2 v1.8h, v1.8h, v2.8h
14 ; SDAG-NEXT: sub v0.8h, v0.8h, v1.8h
15 ; SDAG-NEXT: usra v1.8h, v0.8h, #1
16 ; SDAG-NEXT: ushr v0.8h, v1.8h, #4
19 ; GISEL-LABEL: combine_vec_udiv_uniform:
21 ; GISEL-NEXT: adrp x8, .LCPI0_0
22 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
23 ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
24 ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
25 ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
26 ; GISEL-NEXT: sub v0.8h, v0.8h, v1.8h
27 ; GISEL-NEXT: usra v1.8h, v0.8h, #1
28 ; GISEL-NEXT: ushr v0.8h, v1.8h, #4
30 %1 = udiv <8 x i16> %x, <i16 23, i16 23, i16 23, i16 23, i16 23, i16 23, i16 23, i16 23>
34 define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
35 ; SDAG-LABEL: combine_vec_udiv_nonuniform:
37 ; SDAG-NEXT: adrp x8, .LCPI1_0
38 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
39 ; SDAG-NEXT: adrp x8, .LCPI1_1
40 ; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI1_1]
41 ; SDAG-NEXT: adrp x8, .LCPI1_2
42 ; SDAG-NEXT: ushl v1.8h, v0.8h, v1.8h
43 ; SDAG-NEXT: umull2 v3.4s, v1.8h, v2.8h
44 ; SDAG-NEXT: umull v1.4s, v1.4h, v2.4h
45 ; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI1_2]
46 ; SDAG-NEXT: adrp x8, .LCPI1_3
47 ; SDAG-NEXT: uzp2 v1.8h, v1.8h, v3.8h
48 ; SDAG-NEXT: sub v0.8h, v0.8h, v1.8h
49 ; SDAG-NEXT: umull2 v3.4s, v0.8h, v2.8h
50 ; SDAG-NEXT: umull v0.4s, v0.4h, v2.4h
51 ; SDAG-NEXT: uzp2 v0.8h, v0.8h, v3.8h
52 ; SDAG-NEXT: add v0.8h, v0.8h, v1.8h
53 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI1_3]
54 ; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h
57 ; GISEL-LABEL: combine_vec_udiv_nonuniform:
59 ; GISEL-NEXT: adrp x8, .LCPI1_4
60 ; GISEL-NEXT: adrp x9, .LCPI1_0
61 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI1_4]
62 ; GISEL-NEXT: adrp x8, .LCPI1_3
63 ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_3]
64 ; GISEL-NEXT: adrp x8, .LCPI1_2
65 ; GISEL-NEXT: neg v1.8h, v1.8h
66 ; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h
67 ; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h
68 ; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h
69 ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h
70 ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_2]
71 ; GISEL-NEXT: adrp x8, .LCPI1_1
72 ; GISEL-NEXT: sub v2.8h, v0.8h, v1.8h
73 ; GISEL-NEXT: umull2 v4.4s, v2.8h, v3.8h
74 ; GISEL-NEXT: umull v2.4s, v2.4h, v3.4h
75 ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_1]
76 ; GISEL-NEXT: adrp x8, .LCPI1_5
77 ; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h
78 ; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI1_0]
79 ; GISEL-NEXT: add v1.8h, v2.8h, v1.8h
80 ; GISEL-NEXT: neg v2.8h, v3.8h
81 ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_5]
82 ; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
83 ; GISEL-NEXT: cmeq v2.8h, v3.8h, v4.8h
84 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
86 %1 = udiv <8 x i16> %x, <i16 23, i16 34, i16 -23, i16 56, i16 128, i16 -1, i16 -256, i16 -32768>
90 define <8 x i16> @combine_vec_udiv_nonuniform2(<8 x i16> %x) {
91 ; SDAG-LABEL: combine_vec_udiv_nonuniform2:
93 ; SDAG-NEXT: adrp x8, .LCPI2_0
94 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
95 ; SDAG-NEXT: adrp x8, .LCPI2_1
96 ; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h
97 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_1]
98 ; SDAG-NEXT: adrp x8, .LCPI2_2
99 ; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h
100 ; SDAG-NEXT: umull v0.4s, v0.4h, v1.4h
101 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_2]
102 ; SDAG-NEXT: uzp2 v0.8h, v0.8h, v2.8h
103 ; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h
106 ; GISEL-LABEL: combine_vec_udiv_nonuniform2:
108 ; GISEL-NEXT: adrp x8, .LCPI2_3
109 ; GISEL-NEXT: adrp x9, .LCPI2_0
110 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI2_3]
111 ; GISEL-NEXT: adrp x8, .LCPI2_2
112 ; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI2_0]
113 ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_2]
114 ; GISEL-NEXT: adrp x8, .LCPI2_1
115 ; GISEL-NEXT: neg v1.8h, v1.8h
116 ; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h
117 ; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h
118 ; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h
119 ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_1]
120 ; GISEL-NEXT: adrp x8, .LCPI2_4
121 ; GISEL-NEXT: neg v2.8h, v2.8h
122 ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h
123 ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI2_4]
124 ; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
125 ; GISEL-NEXT: cmeq v2.8h, v3.8h, v4.8h
126 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
128 %1 = udiv <8 x i16> %x, <i16 -34, i16 35, i16 36, i16 -37, i16 38, i16 -39, i16 40, i16 -41>
132 define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) {
133 ; SDAG-LABEL: combine_vec_udiv_nonuniform3:
135 ; SDAG-NEXT: adrp x8, .LCPI3_0
136 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
137 ; SDAG-NEXT: adrp x8, .LCPI3_1
138 ; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h
139 ; SDAG-NEXT: umull v1.4s, v0.4h, v1.4h
140 ; SDAG-NEXT: uzp2 v1.8h, v1.8h, v2.8h
141 ; SDAG-NEXT: sub v0.8h, v0.8h, v1.8h
142 ; SDAG-NEXT: usra v1.8h, v0.8h, #1
143 ; SDAG-NEXT: ldr q0, [x8, :lo12:.LCPI3_1]
144 ; SDAG-NEXT: ushl v0.8h, v1.8h, v0.8h
147 ; GISEL-LABEL: combine_vec_udiv_nonuniform3:
149 ; GISEL-NEXT: adrp x8, .LCPI3_2
150 ; GISEL-NEXT: adrp x9, .LCPI3_0
151 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI3_2]
152 ; GISEL-NEXT: adrp x8, .LCPI3_1
153 ; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI3_0]
154 ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
155 ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
156 ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
157 ; GISEL-NEXT: sub v2.8h, v0.8h, v1.8h
158 ; GISEL-NEXT: usra v1.8h, v2.8h, #1
159 ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_1]
160 ; GISEL-NEXT: adrp x8, .LCPI3_3
161 ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_3]
162 ; GISEL-NEXT: neg v2.8h, v2.8h
163 ; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
164 ; GISEL-NEXT: cmeq v2.8h, v3.8h, v4.8h
165 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
167 %1 = udiv <8 x i16> %x, <i16 7, i16 23, i16 25, i16 27, i16 31, i16 47, i16 63, i16 127>
171 define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
172 ; SDAG-LABEL: combine_vec_udiv_nonuniform4:
174 ; SDAG-NEXT: movi v1.16b, #171
175 ; SDAG-NEXT: adrp x8, .LCPI4_0
176 ; SDAG-NEXT: adrp x9, .LCPI4_1
177 ; SDAG-NEXT: ldr q3, [x9, :lo12:.LCPI4_1]
178 ; SDAG-NEXT: umull2 v2.8h, v0.16b, v1.16b
179 ; SDAG-NEXT: umull v1.8h, v0.8b, v1.8b
180 ; SDAG-NEXT: and v0.16b, v0.16b, v3.16b
181 ; SDAG-NEXT: uzp2 v1.16b, v1.16b, v2.16b
182 ; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
183 ; SDAG-NEXT: ushr v1.16b, v1.16b, #7
184 ; SDAG-NEXT: and v1.16b, v1.16b, v2.16b
185 ; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b
188 ; GISEL-LABEL: combine_vec_udiv_nonuniform4:
190 ; GISEL-NEXT: adrp x8, .LCPI4_2
191 ; GISEL-NEXT: adrp x9, .LCPI4_0
192 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_2]
193 ; GISEL-NEXT: adrp x8, .LCPI4_1
194 ; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_0]
195 ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_1]
196 ; GISEL-NEXT: adrp x8, .LCPI4_3
197 ; GISEL-NEXT: umull2 v2.8h, v0.16b, v1.16b
198 ; GISEL-NEXT: umull v1.8h, v0.8b, v1.8b
199 ; GISEL-NEXT: uzp2 v1.16b, v1.16b, v2.16b
200 ; GISEL-NEXT: neg v2.16b, v3.16b
201 ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_3]
202 ; GISEL-NEXT: ushl v1.16b, v1.16b, v2.16b
203 ; GISEL-NEXT: cmeq v2.16b, v3.16b, v4.16b
204 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
206 %div = udiv <16 x i8> %x, <i8 -64, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
210 define <8 x i16> @pr38477(<8 x i16> %a0) {
211 ; SDAG-LABEL: pr38477:
213 ; SDAG-NEXT: adrp x8, .LCPI5_0
214 ; SDAG-NEXT: adrp x9, .LCPI5_4
215 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI5_0]
216 ; SDAG-NEXT: adrp x8, .LCPI5_1
217 ; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI5_1]
218 ; SDAG-NEXT: adrp x8, .LCPI5_2
219 ; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h
220 ; SDAG-NEXT: umull v1.4s, v0.4h, v1.4h
221 ; SDAG-NEXT: uzp2 v1.8h, v1.8h, v2.8h
222 ; SDAG-NEXT: sub v2.8h, v0.8h, v1.8h
223 ; SDAG-NEXT: umull2 v4.4s, v2.8h, v3.8h
224 ; SDAG-NEXT: umull v2.4s, v2.4h, v3.4h
225 ; SDAG-NEXT: ldr q3, [x9, :lo12:.LCPI5_4]
226 ; SDAG-NEXT: and v0.16b, v0.16b, v3.16b
227 ; SDAG-NEXT: uzp2 v2.8h, v2.8h, v4.8h
228 ; SDAG-NEXT: add v1.8h, v2.8h, v1.8h
229 ; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_2]
230 ; SDAG-NEXT: adrp x8, .LCPI5_3
231 ; SDAG-NEXT: ushl v1.8h, v1.8h, v2.8h
232 ; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_3]
233 ; SDAG-NEXT: and v1.16b, v1.16b, v2.16b
234 ; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b
237 ; GISEL-LABEL: pr38477:
239 ; GISEL-NEXT: adrp x8, .LCPI5_3
240 ; GISEL-NEXT: adrp x9, .LCPI5_0
241 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI5_3]
242 ; GISEL-NEXT: adrp x8, .LCPI5_2
243 ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_2]
244 ; GISEL-NEXT: adrp x8, .LCPI5_1
245 ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
246 ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
247 ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
248 ; GISEL-NEXT: sub v2.8h, v0.8h, v1.8h
249 ; GISEL-NEXT: umull2 v4.4s, v2.8h, v3.8h
250 ; GISEL-NEXT: umull v2.4s, v2.4h, v3.4h
251 ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_1]
252 ; GISEL-NEXT: adrp x8, .LCPI5_4
253 ; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h
254 ; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI5_0]
255 ; GISEL-NEXT: add v1.8h, v2.8h, v1.8h
256 ; GISEL-NEXT: neg v2.8h, v3.8h
257 ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_4]
258 ; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
259 ; GISEL-NEXT: cmeq v2.8h, v3.8h, v4.8h
260 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
262 %1 = udiv <8 x i16> %a0, <i16 1, i16 119, i16 73, i16 -111, i16 -3, i16 118, i16 32, i16 31>