1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
4 ; At the moment, BuildUREMEqFold does not handle nonsplat vectors.
6 define <4 x i32> @test_urem_odd_div(<4 x i32> %X) nounwind readnone {
7 ; CHECK-LABEL: test_urem_odd_div:
9 ; CHECK-NEXT: adrp x8, .LCPI0_0
10 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
11 ; CHECK-NEXT: adrp x8, .LCPI0_1
12 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_1]
13 ; CHECK-NEXT: adrp x8, .LCPI0_2
14 ; CHECK-NEXT: umull2 v3.2d, v0.4s, v1.4s
15 ; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
16 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v3.4s
17 ; CHECK-NEXT: sub v3.4s, v0.4s, v1.4s
18 ; CHECK-NEXT: umull2 v4.2d, v3.4s, v2.4s
19 ; CHECK-NEXT: umull v2.2d, v3.2s, v2.2s
20 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI0_2]
21 ; CHECK-NEXT: adrp x8, .LCPI0_3
22 ; CHECK-NEXT: uzp2 v2.4s, v2.4s, v4.4s
23 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI0_3]
24 ; CHECK-NEXT: neg v3.4s, v3.4s
25 ; CHECK-NEXT: add v1.4s, v2.4s, v1.4s
26 ; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
27 ; CHECK-NEXT: mls v0.4s, v1.4s, v4.4s
28 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
29 ; CHECK-NEXT: movi v1.4s, #1
30 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
32 %urem = urem <4 x i32> %X, <i32 3, i32 5, i32 7, i32 9>
33 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
34 %ret = zext <4 x i1> %cmp to <4 x i32>
38 define <4 x i32> @test_urem_even_div(<4 x i32> %X) nounwind readnone {
39 ; CHECK-LABEL: test_urem_even_div:
41 ; CHECK-NEXT: adrp x8, .LCPI1_0
42 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
43 ; CHECK-NEXT: adrp x8, .LCPI1_1
44 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI1_1]
45 ; CHECK-NEXT: adrp x8, .LCPI1_2
46 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI1_2]
47 ; CHECK-NEXT: neg v1.4s, v1.4s
48 ; CHECK-NEXT: adrp x8, .LCPI1_3
49 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
50 ; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
51 ; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
52 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI1_3]
53 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
54 ; CHECK-NEXT: neg v3.4s, v3.4s
55 ; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
56 ; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
57 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
58 ; CHECK-NEXT: movi v1.4s, #1
59 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
61 %urem = urem <4 x i32> %X, <i32 6, i32 10, i32 12, i32 14>
62 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
63 %ret = zext <4 x i1> %cmp to <4 x i32>
67 define <4 x i32> @test_urem_pow2(<4 x i32> %X) nounwind readnone {
68 ; CHECK-LABEL: test_urem_pow2:
70 ; CHECK-NEXT: adrp x8, .LCPI2_0
71 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
72 ; CHECK-NEXT: adrp x8, .LCPI2_1
73 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI2_1]
74 ; CHECK-NEXT: adrp x8, .LCPI2_2
75 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI2_2]
76 ; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s
77 ; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
78 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
79 ; CHECK-NEXT: neg v2.4s, v2.4s
80 ; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
81 ; CHECK-NEXT: mls v0.4s, v1.4s, v3.4s
82 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
83 ; CHECK-NEXT: movi v1.4s, #1
84 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
86 %urem = urem <4 x i32> %X, <i32 6, i32 10, i32 12, i32 16>
87 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
88 %ret = zext <4 x i1> %cmp to <4 x i32>
92 define <4 x i32> @test_urem_one(<4 x i32> %X) nounwind readnone {
93 ; CHECK-LABEL: test_urem_one:
95 ; CHECK-NEXT: adrp x8, .LCPI3_0
96 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
97 ; CHECK-NEXT: adrp x8, .LCPI3_1
98 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1]
99 ; CHECK-NEXT: adrp x8, .LCPI3_2
100 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI3_2]
101 ; CHECK-NEXT: neg v1.4s, v1.4s
102 ; CHECK-NEXT: adrp x8, .LCPI3_3
103 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
104 ; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
105 ; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
106 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_3]
107 ; CHECK-NEXT: adrp x8, .LCPI3_4
108 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
109 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI3_4]
110 ; CHECK-NEXT: neg v3.4s, v3.4s
111 ; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
112 ; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b
113 ; CHECK-NEXT: mls v0.4s, v2.4s, v4.4s
114 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
115 ; CHECK-NEXT: movi v1.4s, #1
116 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
118 %urem = urem <4 x i32> %X, <i32 6, i32 1, i32 12, i32 14>
119 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
120 %ret = zext <4 x i1> %cmp to <4 x i32>
124 define <4 x i32> @test_urem_comp(<4 x i32> %X) nounwind readnone {
125 ; CHECK-LABEL: test_urem_comp:
127 ; CHECK-NEXT: mov w8, #52429
128 ; CHECK-NEXT: movk w8, #52428, lsl #16
129 ; CHECK-NEXT: adrp x9, .LCPI4_0
130 ; CHECK-NEXT: dup v2.4s, w8
131 ; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI4_0]
132 ; CHECK-NEXT: umull2 v4.2d, v0.4s, v2.4s
133 ; CHECK-NEXT: umull v2.2d, v0.2s, v2.2s
134 ; CHECK-NEXT: uzp2 v2.4s, v2.4s, v4.4s
135 ; CHECK-NEXT: movi v1.4s, #5
136 ; CHECK-NEXT: ushr v2.4s, v2.4s, #2
137 ; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s
138 ; CHECK-NEXT: cmeq v0.4s, v0.4s, v3.4s
139 ; CHECK-NEXT: movi v1.4s, #1
140 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
142 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 5, i32 5>
143 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 1, i32 0>
144 %ret = zext <4 x i1> %cmp to <4 x i32>
148 define <4 x i32> @test_urem_both(<4 x i32> %X) nounwind readnone {
149 ; CHECK-LABEL: test_urem_both:
151 ; CHECK-NEXT: adrp x8, .LCPI5_0
152 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_0]
153 ; CHECK-NEXT: adrp x8, .LCPI5_1
154 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_1]
155 ; CHECK-NEXT: adrp x8, .LCPI5_2
156 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI5_2]
157 ; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s
158 ; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
159 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
160 ; CHECK-NEXT: ushr v1.4s, v1.4s, #2
161 ; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
162 ; CHECK-NEXT: cmeq v0.4s, v0.4s, v3.4s
163 ; CHECK-NEXT: movi v1.4s, #1
164 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
166 %urem = urem <4 x i32> %X, <i32 6, i32 5, i32 6, i32 5>
167 %cmp = icmp eq <4 x i32> %urem, <i32 1, i32 0, i32 1, i32 0>
168 %ret = zext <4 x i1> %cmp to <4 x i32>
172 define <4 x i32> @test_urem_div_undef(<4 x i32> %X) nounwind readnone {
173 ; CHECK-LABEL: test_urem_div_undef:
175 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
176 ; CHECK-NEXT: movi v1.4s, #1
177 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
179 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 undef, i32 5>
180 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
181 %ret = zext <4 x i1> %cmp to <4 x i32>
185 define <4 x i32> @test_urem_comp_undef(<4 x i32> %X) nounwind readnone {
186 ; CHECK-LABEL: test_urem_comp_undef:
188 ; CHECK-NEXT: mov w8, #52429
189 ; CHECK-NEXT: movk w8, #52428, lsl #16
190 ; CHECK-NEXT: dup v2.4s, w8
191 ; CHECK-NEXT: umull2 v3.2d, v0.4s, v2.4s
192 ; CHECK-NEXT: umull v2.2d, v0.2s, v2.2s
193 ; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s
194 ; CHECK-NEXT: movi v1.4s, #5
195 ; CHECK-NEXT: ushr v2.4s, v2.4s, #2
196 ; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s
197 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
198 ; CHECK-NEXT: movi v1.4s, #1
199 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
201 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 5, i32 5>
202 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 undef, i32 0, i32 0>
203 %ret = zext <4 x i1> %cmp to <4 x i32>
207 define <4 x i32> @test_urem_both_undef(<4 x i32> %X) nounwind readnone {
208 ; CHECK-LABEL: test_urem_both_undef:
210 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
211 ; CHECK-NEXT: movi v1.4s, #1
212 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
214 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 undef, i32 5>
215 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 undef, i32 0, i32 0>
216 %ret = zext <4 x i1> %cmp to <4 x i32>
220 define <4 x i32> @test_urem_div_even_odd(<4 x i32> %X) nounwind readnone {
221 ; CHECK-LABEL: test_urem_div_even_odd:
223 ; CHECK-NEXT: adrp x8, .LCPI9_0
224 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_0]
225 ; CHECK-NEXT: adrp x8, .LCPI9_1
226 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_1]
227 ; CHECK-NEXT: umull2 v3.2d, v0.4s, v1.4s
228 ; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
229 ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v3.4s
230 ; CHECK-NEXT: ushr v1.4s, v1.4s, #2
231 ; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
232 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
233 ; CHECK-NEXT: movi v1.4s, #1
234 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
236 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 6, i32 6>
237 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
238 %ret = zext <4 x i1> %cmp to <4 x i32>