1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
4 define i32 @func(i32 %x, i32 %y) nounwind {
7 ; CHECK-NEXT: smull x9, w0, w1
8 ; CHECK-NEXT: mov w8, #2147483647 // =0x7fffffff
9 ; CHECK-NEXT: lsr x10, x9, #32
10 ; CHECK-NEXT: extr w9, w10, w9, #2
11 ; CHECK-NEXT: cmp w10, #1
12 ; CHECK-NEXT: csel w8, w8, w9, gt
13 ; CHECK-NEXT: cmn w10, #2
14 ; CHECK-NEXT: mov w9, #-2147483648 // =0x80000000
15 ; CHECK-NEXT: csel w0, w9, w8, lt
17 %tmp = call i32 @llvm.smul.fix.sat.i32(i32 %x, i32 %y, i32 2)
21 define i64 @func2(i64 %x, i64 %y) nounwind {
24 ; CHECK-NEXT: mul x9, x0, x1
25 ; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff
26 ; CHECK-NEXT: smulh x10, x0, x1
27 ; CHECK-NEXT: extr x9, x10, x9, #2
28 ; CHECK-NEXT: cmp x10, #1
29 ; CHECK-NEXT: csel x8, x8, x9, gt
30 ; CHECK-NEXT: cmn x10, #2
31 ; CHECK-NEXT: mov x9, #-9223372036854775808 // =0x8000000000000000
32 ; CHECK-NEXT: csel x0, x9, x8, lt
34 %tmp = call i64 @llvm.smul.fix.sat.i64(i64 %x, i64 %y, i32 2)
38 define i4 @func3(i4 %x, i4 %y) nounwind {
41 ; CHECK-NEXT: sbfx w9, w1, #0, #4
42 ; CHECK-NEXT: lsl w10, w0, #28
43 ; CHECK-NEXT: mov w8, #2147483647 // =0x7fffffff
44 ; CHECK-NEXT: smull x9, w10, w9
45 ; CHECK-NEXT: lsr x10, x9, #32
46 ; CHECK-NEXT: extr w9, w10, w9, #2
47 ; CHECK-NEXT: cmp w10, #1
48 ; CHECK-NEXT: csel w8, w8, w9, gt
49 ; CHECK-NEXT: cmn w10, #2
50 ; CHECK-NEXT: mov w9, #-2147483648 // =0x80000000
51 ; CHECK-NEXT: csel w8, w9, w8, lt
52 ; CHECK-NEXT: asr w0, w8, #28
54 %tmp = call i4 @llvm.smul.fix.sat.i4(i4 %x, i4 %y, i32 2)
58 ;; These result in regular integer multiplication with a saturation check.
59 define i32 @func4(i32 %x, i32 %y) nounwind {
62 ; CHECK-NEXT: smull x9, w0, w1
63 ; CHECK-NEXT: eor w10, w0, w1
64 ; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000
65 ; CHECK-NEXT: cmp w10, #0
66 ; CHECK-NEXT: cinv w8, w8, ge
67 ; CHECK-NEXT: cmp x9, w9, sxtw
68 ; CHECK-NEXT: csel w0, w8, w9, ne
70 %tmp = call i32 @llvm.smul.fix.sat.i32(i32 %x, i32 %y, i32 0)
74 define i64 @func5(i64 %x, i64 %y) {
77 ; CHECK-NEXT: mul x9, x0, x1
78 ; CHECK-NEXT: eor x11, x0, x1
79 ; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
80 ; CHECK-NEXT: cmp x11, #0
81 ; CHECK-NEXT: smulh x10, x0, x1
82 ; CHECK-NEXT: cinv x8, x8, ge
83 ; CHECK-NEXT: cmp x10, x9, asr #63
84 ; CHECK-NEXT: csel x0, x8, x9, ne
86 %tmp = call i64 @llvm.smul.fix.sat.i64(i64 %x, i64 %y, i32 0)
90 define i4 @func6(i4 %x, i4 %y) nounwind {
93 ; CHECK-NEXT: sbfx w9, w1, #0, #4
94 ; CHECK-NEXT: lsl w10, w0, #28
95 ; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000
96 ; CHECK-NEXT: smull x11, w10, w9
97 ; CHECK-NEXT: eor w9, w10, w9
98 ; CHECK-NEXT: cmp w9, #0
99 ; CHECK-NEXT: cinv w8, w8, ge
100 ; CHECK-NEXT: cmp x11, w11, sxtw
101 ; CHECK-NEXT: csel w8, w8, w11, ne
102 ; CHECK-NEXT: asr w0, w8, #28
104 %tmp = call i4 @llvm.smul.fix.sat.i4(i4 %x, i4 %y, i32 0)
108 define i64 @func7(i64 %x, i64 %y) nounwind {
109 ; CHECK-LABEL: func7:
111 ; CHECK-NEXT: mul x9, x0, x1
112 ; CHECK-NEXT: mov w8, #2147483647 // =0x7fffffff
113 ; CHECK-NEXT: mov x11, #-2147483648 // =0xffffffff80000000
114 ; CHECK-NEXT: smulh x10, x0, x1
115 ; CHECK-NEXT: extr x9, x10, x9, #32
116 ; CHECK-NEXT: cmp x10, x8
117 ; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff
118 ; CHECK-NEXT: csel x8, x8, x9, gt
119 ; CHECK-NEXT: cmp x10, x11
120 ; CHECK-NEXT: mov x9, #-9223372036854775808 // =0x8000000000000000
121 ; CHECK-NEXT: csel x0, x9, x8, lt
123 %tmp = call i64 @llvm.smul.fix.sat.i64(i64 %x, i64 %y, i32 32)
127 define i64 @func8(i64 %x, i64 %y) nounwind {
128 ; CHECK-LABEL: func8:
130 ; CHECK-NEXT: mul x9, x0, x1
131 ; CHECK-NEXT: mov x8, #4611686018427387903 // =0x3fffffffffffffff
132 ; CHECK-NEXT: mov x11, #-4611686018427387904 // =0xc000000000000000
133 ; CHECK-NEXT: smulh x10, x0, x1
134 ; CHECK-NEXT: extr x9, x10, x9, #63
135 ; CHECK-NEXT: cmp x10, x8
136 ; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff
137 ; CHECK-NEXT: csel x8, x8, x9, gt
138 ; CHECK-NEXT: cmp x10, x11
139 ; CHECK-NEXT: mov x9, #-9223372036854775808 // =0x8000000000000000
140 ; CHECK-NEXT: csel x0, x9, x8, lt
142 %tmp = call i64 @llvm.smul.fix.sat.i64(i64 %x, i64 %y, i32 63)
146 define <2 x i32> @vec(<2 x i32> %x, <2 x i32> %y) nounwind {
149 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
150 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
151 ; CHECK-NEXT: mov w9, v1.s[1]
152 ; CHECK-NEXT: mov w10, v0.s[1]
153 ; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000
154 ; CHECK-NEXT: fmov w12, s0
155 ; CHECK-NEXT: smull x11, w10, w9
156 ; CHECK-NEXT: eor w9, w10, w9
157 ; CHECK-NEXT: fmov w10, s1
158 ; CHECK-NEXT: cmp w9, #0
159 ; CHECK-NEXT: smull x9, w12, w10
160 ; CHECK-NEXT: eor w10, w12, w10
161 ; CHECK-NEXT: cinv w12, w8, ge
162 ; CHECK-NEXT: cmp x11, w11, sxtw
163 ; CHECK-NEXT: csel w11, w12, w11, ne
164 ; CHECK-NEXT: cmp w10, #0
165 ; CHECK-NEXT: cinv w8, w8, ge
166 ; CHECK-NEXT: cmp x9, w9, sxtw
167 ; CHECK-NEXT: csel w8, w8, w9, ne
168 ; CHECK-NEXT: fmov s0, w8
169 ; CHECK-NEXT: mov v0.s[1], w11
170 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
172 %tmp = call <2 x i32> @llvm.smul.fix.sat.v2i32(<2 x i32> %x, <2 x i32> %y, i32 0)
176 define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind {
179 ; CHECK-NEXT: mov w9, v1.s[1]
180 ; CHECK-NEXT: mov w10, v0.s[1]
181 ; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000
182 ; CHECK-NEXT: fmov w12, s1
183 ; CHECK-NEXT: fmov w13, s0
184 ; CHECK-NEXT: mov w14, v0.s[2]
185 ; CHECK-NEXT: eor w11, w10, w9
186 ; CHECK-NEXT: smull x9, w10, w9
187 ; CHECK-NEXT: mov w10, v1.s[2]
188 ; CHECK-NEXT: cmp w11, #0
189 ; CHECK-NEXT: smull x11, w13, w12
190 ; CHECK-NEXT: eor w12, w13, w12
191 ; CHECK-NEXT: cinv w13, w8, ge
192 ; CHECK-NEXT: cmp x9, w9, sxtw
193 ; CHECK-NEXT: csel w9, w13, w9, ne
194 ; CHECK-NEXT: cmp w12, #0
195 ; CHECK-NEXT: mov w13, v1.s[3]
196 ; CHECK-NEXT: cinv w12, w8, ge
197 ; CHECK-NEXT: cmp x11, w11, sxtw
198 ; CHECK-NEXT: csel w11, w12, w11, ne
199 ; CHECK-NEXT: mov w12, v0.s[3]
200 ; CHECK-NEXT: fmov s0, w11
201 ; CHECK-NEXT: smull x11, w14, w10
202 ; CHECK-NEXT: mov v0.s[1], w9
203 ; CHECK-NEXT: eor w9, w14, w10
204 ; CHECK-NEXT: smull x10, w12, w13
205 ; CHECK-NEXT: cmp w9, #0
206 ; CHECK-NEXT: cinv w9, w8, ge
207 ; CHECK-NEXT: cmp x11, w11, sxtw
208 ; CHECK-NEXT: csel w9, w9, w11, ne
209 ; CHECK-NEXT: mov v0.s[2], w9
210 ; CHECK-NEXT: eor w9, w12, w13
211 ; CHECK-NEXT: cmp w9, #0
212 ; CHECK-NEXT: cinv w8, w8, ge
213 ; CHECK-NEXT: cmp x10, w10, sxtw
214 ; CHECK-NEXT: csel w8, w8, w10, ne
215 ; CHECK-NEXT: mov v0.s[3], w8
217 %tmp = call <4 x i32> @llvm.smul.fix.sat.v4i32(<4 x i32> %x, <4 x i32> %y, i32 0)
221 define <4 x i64> @vec3(<4 x i64> %x, <4 x i64> %y) nounwind {
224 ; CHECK-NEXT: mov x8, v2.d[1]
225 ; CHECK-NEXT: mov x9, v0.d[1]
226 ; CHECK-NEXT: mov w16, #2147483647 // =0x7fffffff
227 ; CHECK-NEXT: fmov x10, d2
228 ; CHECK-NEXT: fmov x11, d0
229 ; CHECK-NEXT: mov x18, #9223372036854775807 // =0x7fffffffffffffff
230 ; CHECK-NEXT: mov x14, v3.d[1]
231 ; CHECK-NEXT: mov x15, v1.d[1]
232 ; CHECK-NEXT: mul x13, x9, x8
233 ; CHECK-NEXT: smulh x8, x9, x8
234 ; CHECK-NEXT: mul x12, x11, x10
235 ; CHECK-NEXT: smulh x9, x11, x10
236 ; CHECK-NEXT: extr x13, x8, x13, #32
237 ; CHECK-NEXT: cmp x8, x16
238 ; CHECK-NEXT: mul x10, x15, x14
239 ; CHECK-NEXT: csel x13, x18, x13, gt
240 ; CHECK-NEXT: smulh x11, x15, x14
241 ; CHECK-NEXT: fmov x14, d3
242 ; CHECK-NEXT: fmov x15, d1
243 ; CHECK-NEXT: extr x12, x9, x12, #32
244 ; CHECK-NEXT: mul x17, x15, x14
245 ; CHECK-NEXT: smulh x14, x15, x14
246 ; CHECK-NEXT: mov x15, #-2147483648 // =0xffffffff80000000
247 ; CHECK-NEXT: cmp x8, x15
248 ; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
249 ; CHECK-NEXT: csel x13, x8, x13, lt
250 ; CHECK-NEXT: cmp x9, x16
251 ; CHECK-NEXT: csel x12, x18, x12, gt
252 ; CHECK-NEXT: cmp x9, x15
253 ; CHECK-NEXT: extr x9, x11, x10, #32
254 ; CHECK-NEXT: csel x10, x8, x12, lt
255 ; CHECK-NEXT: cmp x11, x16
256 ; CHECK-NEXT: csel x9, x18, x9, gt
257 ; CHECK-NEXT: cmp x11, x15
258 ; CHECK-NEXT: extr x11, x14, x17, #32
259 ; CHECK-NEXT: csel x9, x8, x9, lt
260 ; CHECK-NEXT: cmp x14, x16
261 ; CHECK-NEXT: fmov d0, x10
262 ; CHECK-NEXT: csel x11, x18, x11, gt
263 ; CHECK-NEXT: cmp x14, x15
264 ; CHECK-NEXT: csel x8, x8, x11, lt
265 ; CHECK-NEXT: fmov d1, x8
266 ; CHECK-NEXT: mov v0.d[1], x13
267 ; CHECK-NEXT: mov v1.d[1], x9
269 %tmp = call <4 x i64> @llvm.smul.fix.sat.v4i64(<4 x i64> %x, <4 x i64> %y, i32 32)