1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefixes=SDAG
3 ; RUN: llc < %s -mtriple=aarch64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=GISEL
5 ; These tests are taken from the combine-udiv.ll in X86.
6 define <8 x i16> @combine_vec_udiv_uniform(<8 x i16> %x) {
7 ; SDAG-LABEL: combine_vec_udiv_uniform:
9 ; SDAG-NEXT: mov w8, #25645 // =0x642d
10 ; SDAG-NEXT: dup v1.8h, w8
11 ; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h
12 ; SDAG-NEXT: umull v1.4s, v0.4h, v1.4h
13 ; SDAG-NEXT: uzp2 v1.8h, v1.8h, v2.8h
14 ; SDAG-NEXT: sub v0.8h, v0.8h, v1.8h
15 ; SDAG-NEXT: usra v1.8h, v0.8h, #1
16 ; SDAG-NEXT: ushr v0.8h, v1.8h, #4
19 ; GISEL-LABEL: combine_vec_udiv_uniform:
21 ; GISEL-NEXT: adrp x8, .LCPI0_0
22 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
23 ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
24 ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
25 ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
26 ; GISEL-NEXT: sub v0.8h, v0.8h, v1.8h
27 ; GISEL-NEXT: usra v1.8h, v0.8h, #1
28 ; GISEL-NEXT: ushr v0.8h, v1.8h, #4
30 %1 = udiv <8 x i16> %x, <i16 23, i16 23, i16 23, i16 23, i16 23, i16 23, i16 23, i16 23>
34 define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
35 ; SDAG-LABEL: combine_vec_udiv_nonuniform:
37 ; SDAG-NEXT: adrp x8, .LCPI1_0
38 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
39 ; SDAG-NEXT: adrp x8, .LCPI1_1
40 ; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI1_1]
41 ; SDAG-NEXT: adrp x8, .LCPI1_2
42 ; SDAG-NEXT: ushl v1.8h, v0.8h, v1.8h
43 ; SDAG-NEXT: umull2 v3.4s, v1.8h, v2.8h
44 ; SDAG-NEXT: umull v1.4s, v1.4h, v2.4h
45 ; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI1_2]
46 ; SDAG-NEXT: adrp x8, .LCPI1_3
47 ; SDAG-NEXT: uzp2 v1.8h, v1.8h, v3.8h
48 ; SDAG-NEXT: sub v0.8h, v0.8h, v1.8h
49 ; SDAG-NEXT: umull2 v3.4s, v0.8h, v2.8h
50 ; SDAG-NEXT: umull v0.4s, v0.4h, v2.4h
51 ; SDAG-NEXT: uzp2 v0.8h, v0.8h, v3.8h
52 ; SDAG-NEXT: add v0.8h, v0.8h, v1.8h
53 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI1_3]
54 ; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h
57 ; GISEL-LABEL: combine_vec_udiv_nonuniform:
59 ; GISEL-NEXT: adrp x8, .LCPI1_3
60 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI1_3]
61 ; GISEL-NEXT: adrp x8, .LCPI1_2
62 ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_2]
63 ; GISEL-NEXT: adrp x8, .LCPI1_1
64 ; GISEL-NEXT: neg v1.8h, v1.8h
65 ; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h
66 ; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h
67 ; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h
68 ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h
69 ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_1]
70 ; GISEL-NEXT: adrp x8, .LCPI1_0
71 ; GISEL-NEXT: sub v2.8h, v0.8h, v1.8h
72 ; GISEL-NEXT: umull2 v4.4s, v2.8h, v3.8h
73 ; GISEL-NEXT: umull v2.4s, v2.4h, v3.4h
74 ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_0]
75 ; GISEL-NEXT: adrp x8, .LCPI1_4
76 ; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h
77 ; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI1_4]
78 ; GISEL-NEXT: add v1.8h, v2.8h, v1.8h
79 ; GISEL-NEXT: neg v2.8h, v3.8h
80 ; GISEL-NEXT: movi v3.8h, #1
81 ; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
82 ; GISEL-NEXT: cmeq v2.8h, v4.8h, v3.8h
83 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
85 %1 = udiv <8 x i16> %x, <i16 23, i16 34, i16 -23, i16 56, i16 128, i16 -1, i16 -256, i16 -32768>
89 define <8 x i16> @combine_vec_udiv_nonuniform2(<8 x i16> %x) {
90 ; SDAG-LABEL: combine_vec_udiv_nonuniform2:
92 ; SDAG-NEXT: adrp x8, .LCPI2_0
93 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
94 ; SDAG-NEXT: adrp x8, .LCPI2_1
95 ; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h
96 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_1]
97 ; SDAG-NEXT: adrp x8, .LCPI2_2
98 ; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h
99 ; SDAG-NEXT: umull v0.4s, v0.4h, v1.4h
100 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_2]
101 ; SDAG-NEXT: uzp2 v0.8h, v0.8h, v2.8h
102 ; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h
105 ; GISEL-LABEL: combine_vec_udiv_nonuniform2:
107 ; GISEL-NEXT: adrp x8, .LCPI2_2
108 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI2_2]
109 ; GISEL-NEXT: adrp x8, .LCPI2_1
110 ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_1]
111 ; GISEL-NEXT: adrp x8, .LCPI2_0
112 ; GISEL-NEXT: neg v1.8h, v1.8h
113 ; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h
114 ; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h
115 ; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h
116 ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_0]
117 ; GISEL-NEXT: adrp x8, .LCPI2_3
118 ; GISEL-NEXT: neg v2.8h, v2.8h
119 ; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI2_3]
120 ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h
121 ; GISEL-NEXT: movi v3.8h, #1
122 ; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
123 ; GISEL-NEXT: cmeq v2.8h, v4.8h, v3.8h
124 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
126 %1 = udiv <8 x i16> %x, <i16 -34, i16 35, i16 36, i16 -37, i16 38, i16 -39, i16 40, i16 -41>
130 define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) {
131 ; SDAG-LABEL: combine_vec_udiv_nonuniform3:
133 ; SDAG-NEXT: adrp x8, .LCPI3_0
134 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
135 ; SDAG-NEXT: adrp x8, .LCPI3_1
136 ; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h
137 ; SDAG-NEXT: umull v1.4s, v0.4h, v1.4h
138 ; SDAG-NEXT: uzp2 v1.8h, v1.8h, v2.8h
139 ; SDAG-NEXT: sub v0.8h, v0.8h, v1.8h
140 ; SDAG-NEXT: usra v1.8h, v0.8h, #1
141 ; SDAG-NEXT: ldr q0, [x8, :lo12:.LCPI3_1]
142 ; SDAG-NEXT: ushl v0.8h, v1.8h, v0.8h
145 ; GISEL-LABEL: combine_vec_udiv_nonuniform3:
147 ; GISEL-NEXT: adrp x8, .LCPI3_1
148 ; GISEL-NEXT: movi v3.8h, #1
149 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI3_1]
150 ; GISEL-NEXT: adrp x8, .LCPI3_0
151 ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
152 ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
153 ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
154 ; GISEL-NEXT: sub v2.8h, v0.8h, v1.8h
155 ; GISEL-NEXT: usra v1.8h, v2.8h, #1
156 ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_0]
157 ; GISEL-NEXT: adrp x8, .LCPI3_2
158 ; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI3_2]
159 ; GISEL-NEXT: neg v2.8h, v2.8h
160 ; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
161 ; GISEL-NEXT: cmeq v2.8h, v4.8h, v3.8h
162 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
164 %1 = udiv <8 x i16> %x, <i16 7, i16 23, i16 25, i16 27, i16 31, i16 47, i16 63, i16 127>
168 define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
169 ; SDAG-LABEL: combine_vec_udiv_nonuniform4:
171 ; SDAG-NEXT: movi v1.16b, #171
172 ; SDAG-NEXT: adrp x8, .LCPI4_0
173 ; SDAG-NEXT: adrp x9, .LCPI4_1
174 ; SDAG-NEXT: ldr q3, [x9, :lo12:.LCPI4_1]
175 ; SDAG-NEXT: umull2 v2.8h, v0.16b, v1.16b
176 ; SDAG-NEXT: umull v1.8h, v0.8b, v1.8b
177 ; SDAG-NEXT: and v0.16b, v0.16b, v3.16b
178 ; SDAG-NEXT: uzp2 v1.16b, v1.16b, v2.16b
179 ; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
180 ; SDAG-NEXT: ushr v1.16b, v1.16b, #7
181 ; SDAG-NEXT: and v1.16b, v1.16b, v2.16b
182 ; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b
185 ; GISEL-LABEL: combine_vec_udiv_nonuniform4:
187 ; GISEL-NEXT: adrp x8, .LCPI4_1
188 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_1]
189 ; GISEL-NEXT: adrp x8, .LCPI4_0
190 ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_0]
191 ; GISEL-NEXT: adrp x8, .LCPI4_2
192 ; GISEL-NEXT: umull2 v2.8h, v0.16b, v1.16b
193 ; GISEL-NEXT: umull v1.8h, v0.8b, v1.8b
194 ; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI4_2]
195 ; GISEL-NEXT: uzp2 v1.16b, v1.16b, v2.16b
196 ; GISEL-NEXT: neg v2.16b, v3.16b
197 ; GISEL-NEXT: movi v3.16b, #1
198 ; GISEL-NEXT: ushl v1.16b, v1.16b, v2.16b
199 ; GISEL-NEXT: cmeq v2.16b, v4.16b, v3.16b
200 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
202 %div = udiv <16 x i8> %x, <i8 -64, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
206 define <8 x i16> @pr38477(<8 x i16> %a0) {
207 ; SDAG-LABEL: pr38477:
209 ; SDAG-NEXT: adrp x8, .LCPI5_0
210 ; SDAG-NEXT: adrp x9, .LCPI5_4
211 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI5_0]
212 ; SDAG-NEXT: adrp x8, .LCPI5_1
213 ; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI5_1]
214 ; SDAG-NEXT: adrp x8, .LCPI5_2
215 ; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h
216 ; SDAG-NEXT: umull v1.4s, v0.4h, v1.4h
217 ; SDAG-NEXT: uzp2 v1.8h, v1.8h, v2.8h
218 ; SDAG-NEXT: sub v2.8h, v0.8h, v1.8h
219 ; SDAG-NEXT: umull2 v4.4s, v2.8h, v3.8h
220 ; SDAG-NEXT: umull v2.4s, v2.4h, v3.4h
221 ; SDAG-NEXT: ldr q3, [x9, :lo12:.LCPI5_4]
222 ; SDAG-NEXT: and v0.16b, v0.16b, v3.16b
223 ; SDAG-NEXT: uzp2 v2.8h, v2.8h, v4.8h
224 ; SDAG-NEXT: add v1.8h, v2.8h, v1.8h
225 ; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_2]
226 ; SDAG-NEXT: adrp x8, .LCPI5_3
227 ; SDAG-NEXT: ushl v1.8h, v1.8h, v2.8h
228 ; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_3]
229 ; SDAG-NEXT: and v1.16b, v1.16b, v2.16b
230 ; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b
233 ; GISEL-LABEL: pr38477:
235 ; GISEL-NEXT: adrp x8, .LCPI5_2
236 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI5_2]
237 ; GISEL-NEXT: adrp x8, .LCPI5_1
238 ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_1]
239 ; GISEL-NEXT: adrp x8, .LCPI5_0
240 ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
241 ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
242 ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
243 ; GISEL-NEXT: sub v2.8h, v0.8h, v1.8h
244 ; GISEL-NEXT: umull2 v4.4s, v2.8h, v3.8h
245 ; GISEL-NEXT: umull v2.4s, v2.4h, v3.4h
246 ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_0]
247 ; GISEL-NEXT: adrp x8, .LCPI5_3
248 ; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h
249 ; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI5_3]
250 ; GISEL-NEXT: add v1.8h, v2.8h, v1.8h
251 ; GISEL-NEXT: neg v2.8h, v3.8h
252 ; GISEL-NEXT: movi v3.8h, #1
253 ; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
254 ; GISEL-NEXT: cmeq v2.8h, v4.8h, v3.8h
255 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
257 %1 = udiv <8 x i16> %a0, <i16 1, i16 119, i16 73, i16 -111, i16 -3, i16 118, i16 32, i16 31>