1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
4 define i32 @func(i32 %x, i32 %y) nounwind {
7 ; CHECK-NEXT: umull x8, w0, w1
8 ; CHECK-NEXT: lsr x9, x8, #32
9 ; CHECK-NEXT: extr w8, w9, w8, #2
10 ; CHECK-NEXT: cmp w9, #3
11 ; CHECK-NEXT: csinv w0, w8, wzr, ls
13 %tmp = call i32 @llvm.umul.fix.sat.i32(i32 %x, i32 %y, i32 2)
17 define i64 @func2(i64 %x, i64 %y) nounwind {
20 ; CHECK-NEXT: mul x8, x0, x1
21 ; CHECK-NEXT: umulh x9, x0, x1
22 ; CHECK-NEXT: extr x8, x9, x8, #2
23 ; CHECK-NEXT: cmp x9, #3
24 ; CHECK-NEXT: csinv x0, x8, xzr, ls
26 %tmp = call i64 @llvm.umul.fix.sat.i64(i64 %x, i64 %y, i32 2)
30 define i4 @func3(i4 %x, i4 %y) nounwind {
33 ; CHECK-NEXT: lsl w8, w0, #28
34 ; CHECK-NEXT: and w9, w1, #0xf
35 ; CHECK-NEXT: umull x8, w8, w9
36 ; CHECK-NEXT: lsr x9, x8, #32
37 ; CHECK-NEXT: extr w8, w9, w8, #2
38 ; CHECK-NEXT: cmp w9, #3
39 ; CHECK-NEXT: csinv w8, w8, wzr, ls
40 ; CHECK-NEXT: lsr w0, w8, #28
42 %tmp = call i4 @llvm.umul.fix.sat.i4(i4 %x, i4 %y, i32 2)
46 ;; These result in regular integer multiplication with a saturation check.
47 define i32 @func4(i32 %x, i32 %y) nounwind {
50 ; CHECK-NEXT: umull x8, w0, w1
51 ; CHECK-NEXT: tst x8, #0xffffffff00000000
52 ; CHECK-NEXT: csinv w0, w8, wzr, eq
54 %tmp = call i32 @llvm.umul.fix.sat.i32(i32 %x, i32 %y, i32 0)
58 define i64 @func5(i64 %x, i64 %y) {
61 ; CHECK-NEXT: umulh x8, x0, x1
62 ; CHECK-NEXT: mul x9, x0, x1
63 ; CHECK-NEXT: cmp xzr, x8
64 ; CHECK-NEXT: csinv x0, x9, xzr, eq
66 %tmp = call i64 @llvm.umul.fix.sat.i64(i64 %x, i64 %y, i32 0)
70 define i4 @func6(i4 %x, i4 %y) nounwind {
73 ; CHECK-NEXT: lsl w8, w0, #28
74 ; CHECK-NEXT: and w9, w1, #0xf
75 ; CHECK-NEXT: umull x8, w8, w9
76 ; CHECK-NEXT: tst x8, #0xffffffff00000000
77 ; CHECK-NEXT: csinv w8, w8, wzr, eq
78 ; CHECK-NEXT: lsr w0, w8, #28
80 %tmp = call i4 @llvm.umul.fix.sat.i4(i4 %x, i4 %y, i32 0)
84 define <2 x i32> @vec(<2 x i32> %x, <2 x i32> %y) nounwind {
87 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
88 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
89 ; CHECK-NEXT: mov w8, v1.s[1]
90 ; CHECK-NEXT: mov w9, v0.s[1]
91 ; CHECK-NEXT: fmov w10, s0
92 ; CHECK-NEXT: umull x8, w9, w8
93 ; CHECK-NEXT: fmov w9, s1
94 ; CHECK-NEXT: umull x9, w10, w9
95 ; CHECK-NEXT: tst x8, #0xffffffff00000000
96 ; CHECK-NEXT: csinv w8, w8, wzr, eq
97 ; CHECK-NEXT: tst x9, #0xffffffff00000000
98 ; CHECK-NEXT: csinv w9, w9, wzr, eq
99 ; CHECK-NEXT: fmov s0, w9
100 ; CHECK-NEXT: mov v0.s[1], w8
101 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
103 %tmp = call <2 x i32> @llvm.umul.fix.sat.v2i32(<2 x i32> %x, <2 x i32> %y, i32 0)
107 define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind {
110 ; CHECK-NEXT: mov w8, v1.s[1]
111 ; CHECK-NEXT: mov w9, v0.s[1]
112 ; CHECK-NEXT: fmov w10, s0
113 ; CHECK-NEXT: mov w11, v0.s[2]
114 ; CHECK-NEXT: mov w13, v0.s[3]
115 ; CHECK-NEXT: mov w12, v1.s[3]
116 ; CHECK-NEXT: umull x8, w9, w8
117 ; CHECK-NEXT: fmov w9, s1
118 ; CHECK-NEXT: umull x9, w10, w9
119 ; CHECK-NEXT: tst x8, #0xffffffff00000000
120 ; CHECK-NEXT: mov w10, v1.s[2]
121 ; CHECK-NEXT: csinv w8, w8, wzr, eq
122 ; CHECK-NEXT: tst x9, #0xffffffff00000000
123 ; CHECK-NEXT: csinv w9, w9, wzr, eq
124 ; CHECK-NEXT: fmov s0, w9
125 ; CHECK-NEXT: umull x9, w11, w10
126 ; CHECK-NEXT: mov v0.s[1], w8
127 ; CHECK-NEXT: tst x9, #0xffffffff00000000
128 ; CHECK-NEXT: csinv w8, w9, wzr, eq
129 ; CHECK-NEXT: umull x9, w13, w12
130 ; CHECK-NEXT: mov v0.s[2], w8
131 ; CHECK-NEXT: tst x9, #0xffffffff00000000
132 ; CHECK-NEXT: csinv w8, w9, wzr, eq
133 ; CHECK-NEXT: mov v0.s[3], w8
135 %tmp = call <4 x i32> @llvm.umul.fix.sat.v4i32(<4 x i32> %x, <4 x i32> %y, i32 0)
139 define <4 x i64> @vec3(<4 x i64> %x, <4 x i64> %y) nounwind {
142 ; CHECK-NEXT: mov x8, v2.d[1]
143 ; CHECK-NEXT: mov x9, v0.d[1]
144 ; CHECK-NEXT: mov x14, v3.d[1]
145 ; CHECK-NEXT: mov x15, v1.d[1]
146 ; CHECK-NEXT: fmov x10, d2
147 ; CHECK-NEXT: fmov x11, d0
148 ; CHECK-NEXT: mul x12, x11, x10
149 ; CHECK-NEXT: mul x13, x9, x8
150 ; CHECK-NEXT: umulh x8, x9, x8
151 ; CHECK-NEXT: umulh x9, x11, x10
152 ; CHECK-NEXT: mul x10, x15, x14
153 ; CHECK-NEXT: extr x13, x8, x13, #32
154 ; CHECK-NEXT: umulh x11, x15, x14
155 ; CHECK-NEXT: fmov x14, d3
156 ; CHECK-NEXT: fmov x15, d1
157 ; CHECK-NEXT: mul x16, x15, x14
158 ; CHECK-NEXT: umulh x14, x15, x14
159 ; CHECK-NEXT: mov w15, #-1 // =0xffffffff
160 ; CHECK-NEXT: cmp x8, x15
161 ; CHECK-NEXT: extr x8, x9, x12, #32
162 ; CHECK-NEXT: csinv x12, x13, xzr, ls
163 ; CHECK-NEXT: cmp x9, x15
164 ; CHECK-NEXT: extr x9, x11, x10, #32
165 ; CHECK-NEXT: csinv x8, x8, xzr, ls
166 ; CHECK-NEXT: cmp x11, x15
167 ; CHECK-NEXT: csinv x9, x9, xzr, ls
168 ; CHECK-NEXT: fmov d0, x8
169 ; CHECK-NEXT: extr x10, x14, x16, #32
170 ; CHECK-NEXT: cmp x14, x15
171 ; CHECK-NEXT: csinv x10, x10, xzr, ls
172 ; CHECK-NEXT: mov v0.d[1], x12
173 ; CHECK-NEXT: fmov d1, x10
174 ; CHECK-NEXT: mov v1.d[1], x9
176 %tmp = call <4 x i64> @llvm.umul.fix.sat.v4i64(<4 x i64> %x, <4 x i64> %y, i32 32)
180 define i64 @func7(i64 %x, i64 %y) nounwind {
181 ; CHECK-LABEL: func7:
183 ; CHECK-NEXT: mul x9, x0, x1
184 ; CHECK-NEXT: mov w8, #-1 // =0xffffffff
185 ; CHECK-NEXT: umulh x10, x0, x1
186 ; CHECK-NEXT: extr x9, x10, x9, #32
187 ; CHECK-NEXT: cmp x10, x8
188 ; CHECK-NEXT: csinv x0, x9, xzr, ls
190 %tmp = call i64 @llvm.umul.fix.sat.i64(i64 %x, i64 %y, i32 32)
194 define i64 @func8(i64 %x, i64 %y) nounwind {
195 ; CHECK-LABEL: func8:
197 ; CHECK-NEXT: mul x9, x0, x1
198 ; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff
199 ; CHECK-NEXT: umulh x10, x0, x1
200 ; CHECK-NEXT: extr x9, x10, x9, #63
201 ; CHECK-NEXT: cmp x10, x8
202 ; CHECK-NEXT: csinv x0, x9, xzr, ls
204 %tmp = call i64 @llvm.umul.fix.sat.i64(i64 %x, i64 %y, i32 63)