1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
5 define <4 x i32> @test_srem_odd_25(<4 x i32> %X) nounwind {
6 ; CHECK-LABEL: test_srem_odd_25:
8 ; CHECK-NEXT: mov w8, #34079
9 ; CHECK-NEXT: movk w8, #20971, lsl #16
10 ; CHECK-NEXT: dup v2.4s, w8
11 ; CHECK-NEXT: smull2 v3.2d, v0.4s, v2.4s
12 ; CHECK-NEXT: smull v2.2d, v0.2s, v2.2s
13 ; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s
14 ; CHECK-NEXT: sshr v3.4s, v2.4s, #3
15 ; CHECK-NEXT: movi v1.4s, #25
16 ; CHECK-NEXT: usra v3.4s, v2.4s, #31
17 ; CHECK-NEXT: mls v0.4s, v3.4s, v1.4s
18 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
19 ; CHECK-NEXT: movi v1.4s, #1
20 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
22 %srem = srem <4 x i32> %X, <i32 25, i32 25, i32 25, i32 25>
23 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
24 %ret = zext <4 x i1> %cmp to <4 x i32>
29 define <4 x i32> @test_srem_even_100(<4 x i32> %X) nounwind {
30 ; CHECK-LABEL: test_srem_even_100:
32 ; CHECK-NEXT: mov w8, #34079
33 ; CHECK-NEXT: movk w8, #20971, lsl #16
34 ; CHECK-NEXT: dup v2.4s, w8
35 ; CHECK-NEXT: smull2 v3.2d, v0.4s, v2.4s
36 ; CHECK-NEXT: smull v2.2d, v0.2s, v2.2s
37 ; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s
38 ; CHECK-NEXT: sshr v3.4s, v2.4s, #5
39 ; CHECK-NEXT: movi v1.4s, #100
40 ; CHECK-NEXT: usra v3.4s, v2.4s, #31
41 ; CHECK-NEXT: mls v0.4s, v3.4s, v1.4s
42 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
43 ; CHECK-NEXT: movi v1.4s, #1
44 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
46 %srem = srem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100>
47 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
48 %ret = zext <4 x i1> %cmp to <4 x i32>
52 ;------------------------------------------------------------------------------;
53 ; Comparison constant has undef elements.
54 ;------------------------------------------------------------------------------;
56 define <4 x i32> @test_srem_odd_undef1(<4 x i32> %X) nounwind {
57 ; CHECK-LABEL: test_srem_odd_undef1:
59 ; CHECK-NEXT: mov w8, #34079
60 ; CHECK-NEXT: movk w8, #20971, lsl #16
61 ; CHECK-NEXT: dup v2.4s, w8
62 ; CHECK-NEXT: smull2 v3.2d, v0.4s, v2.4s
63 ; CHECK-NEXT: smull v2.2d, v0.2s, v2.2s
64 ; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s
65 ; CHECK-NEXT: sshr v3.4s, v2.4s, #3
66 ; CHECK-NEXT: movi v1.4s, #25
67 ; CHECK-NEXT: usra v3.4s, v2.4s, #31
68 ; CHECK-NEXT: mls v0.4s, v3.4s, v1.4s
69 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
70 ; CHECK-NEXT: movi v1.4s, #1
71 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
73 %srem = srem <4 x i32> %X, <i32 25, i32 25, i32 25, i32 25>
74 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 undef, i32 0>
75 %ret = zext <4 x i1> %cmp to <4 x i32>
79 define <4 x i32> @test_srem_even_undef1(<4 x i32> %X) nounwind {
80 ; CHECK-LABEL: test_srem_even_undef1:
82 ; CHECK-NEXT: mov w8, #34079
83 ; CHECK-NEXT: movk w8, #20971, lsl #16
84 ; CHECK-NEXT: dup v2.4s, w8
85 ; CHECK-NEXT: smull2 v3.2d, v0.4s, v2.4s
86 ; CHECK-NEXT: smull v2.2d, v0.2s, v2.2s
87 ; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s
88 ; CHECK-NEXT: sshr v3.4s, v2.4s, #5
89 ; CHECK-NEXT: movi v1.4s, #100
90 ; CHECK-NEXT: usra v3.4s, v2.4s, #31
91 ; CHECK-NEXT: mls v0.4s, v3.4s, v1.4s
92 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
93 ; CHECK-NEXT: movi v1.4s, #1
94 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
96 %srem = srem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100>
97 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 undef, i32 0>
98 %ret = zext <4 x i1> %cmp to <4 x i32>
102 ;------------------------------------------------------------------------------;
104 ;------------------------------------------------------------------------------;
106 ; We can lower remainder of division by powers of two much better elsewhere.
107 define <4 x i32> @test_srem_pow2(<4 x i32> %X) nounwind {
108 ; CHECK-LABEL: test_srem_pow2:
110 ; CHECK-NEXT: sshr v1.4s, v0.4s, #31
111 ; CHECK-NEXT: mov v2.16b, v0.16b
112 ; CHECK-NEXT: usra v2.4s, v1.4s, #28
113 ; CHECK-NEXT: bic v2.4s, #15
114 ; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s
115 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
116 ; CHECK-NEXT: movi v1.4s, #1
117 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
119 %srem = srem <4 x i32> %X, <i32 16, i32 16, i32 16, i32 16>
120 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
121 %ret = zext <4 x i1> %cmp to <4 x i32>
125 ; We could lower remainder of division by all-ones much better elsewhere.
126 define <4 x i32> @test_srem_allones(<4 x i32> %X) nounwind {
127 ; CHECK-LABEL: test_srem_allones:
129 ; CHECK-NEXT: movi v0.4s, #1
131 %srem = srem <4 x i32> %X, <i32 4294967295, i32 4294967295, i32 4294967295, i32 4294967295>
132 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
133 %ret = zext <4 x i1> %cmp to <4 x i32>
137 ; If all divisors are ones, this is constant-folded.
138 define <4 x i32> @test_srem_one_eq(<4 x i32> %X) nounwind {
139 ; CHECK-LABEL: test_srem_one_eq:
141 ; CHECK-NEXT: movi v0.4s, #1
143 %srem = srem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
144 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
145 %ret = zext <4 x i1> %cmp to <4 x i32>
148 define <4 x i32> @test_srem_one_ne(<4 x i32> %X) nounwind {
149 ; CHECK-LABEL: test_srem_one_ne:
151 ; CHECK-NEXT: movi v0.2d, #0000000000000000
153 %srem = srem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
154 %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
155 %ret = zext <4 x i1> %cmp to <4 x i32>