1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=hexagon < %s | FileCheck %s
4 define <4 x i8> @f0(<4 x i8> %a0, <4 x i8> %a1) #0 {
11 ; CHECK-NEXT: r7:6 = combine(#0,#0)
14 ; CHECK-NEXT: r5:4 = vmpybu(r0,r1)
17 ; CHECK-NEXT: p1 = vcmpb.gt(r1:0,#-1)
20 ; CHECK-NEXT: p0 = vcmpb.gt(r3:2,#-1)
23 ; CHECK-NEXT: r3:2 = vmux(p1,r7:6,r3:2)
26 ; CHECK-NEXT: r1:0 = vmux(p0,r7:6,r1:0)
29 ; CHECK-NEXT: r4 = vtrunohb(r5:4)
32 ; CHECK-NEXT: r7:6 = vmpybu(r0,r0)
35 ; CHECK-NEXT: r1:0 = vaddub(r3:2,r1:0)
38 ; CHECK-NEXT: r5 = vtrunohb(r7:6)
41 ; CHECK-NEXT: r1:0 = vsubub(r5:4,r1:0)
44 ; CHECK-NEXT: jumpr r31
46 %v0 = sext <4 x i8> %a0 to <4 x i16>
47 %v1 = sext <4 x i8> %a1 to <4 x i16>
48 %v2 = mul <4 x i16> %v0, %v1
49 %v3 = lshr <4 x i16> %v2, <i16 8, i16 8, i16 8, i16 8>
50 %v4 = trunc <4 x i16> %v3 to <4 x i8>
54 define <4 x i8> @f1(<4 x i8> %a0, <4 x i8> %a1) #0 {
58 ; CHECK-NEXT: r1:0 = vmpybu(r0,r1)
61 ; CHECK-NEXT: r0 = vtrunohb(r1:0)
64 ; CHECK-NEXT: jumpr r31
66 %v0 = zext <4 x i8> %a0 to <4 x i16>
67 %v1 = zext <4 x i8> %a1 to <4 x i16>
68 %v2 = mul <4 x i16> %v0, %v1
69 %v3 = lshr <4 x i16> %v2, <i16 8, i16 8, i16 8, i16 8>
70 %v4 = trunc <4 x i16> %v3 to <4 x i8>
74 define <8 x i8> @f2(<8 x i8> %a0, <8 x i8> %a1) #0 {
78 ; CHECK-NEXT: r7:6 = combine(#0,#0)
81 ; CHECK-NEXT: p0 = vcmpb.gt(r3:2,#-1)
84 ; CHECK-NEXT: r5:4 = vmpybu(r0,r2)
87 ; CHECK-NEXT: r9:8 = vmux(p0,r7:6,r1:0)
90 ; CHECK-NEXT: p0 = vcmpb.gt(r1:0,#-1)
93 ; CHECK-NEXT: r1:0 = vmpybu(r1,r3)
96 ; CHECK-NEXT: r7:6 = vmux(p0,r7:6,r3:2)
99 ; CHECK-NEXT: r4 = vtrunohb(r5:4)
102 ; CHECK-NEXT: r3:2 = vaddub(r7:6,r9:8)
105 ; CHECK-NEXT: r5 = vtrunohb(r1:0)
108 ; CHECK-NEXT: r1:0 = vsubub(r5:4,r3:2)
111 ; CHECK-NEXT: jumpr r31
113 %v0 = sext <8 x i8> %a0 to <8 x i16>
114 %v1 = sext <8 x i8> %a1 to <8 x i16>
115 %v2 = mul <8 x i16> %v0, %v1
116 %v3 = lshr <8 x i16> %v2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
117 %v4 = trunc <8 x i16> %v3 to <8 x i8>
121 define <8 x i8> @f3(<8 x i8> %a0, <8 x i8> %a1) #0 {
125 ; CHECK-NEXT: r5:4 = vmpybu(r0,r2)
128 ; CHECK-NEXT: r7:6 = vmpybu(r1,r3)
131 ; CHECK-NEXT: r0 = vtrunohb(r5:4)
134 ; CHECK-NEXT: r1 = vtrunohb(r7:6)
137 ; CHECK-NEXT: jumpr r31
139 %v0 = zext <8 x i8> %a0 to <8 x i16>
140 %v1 = zext <8 x i8> %a1 to <8 x i16>
141 %v2 = mul <8 x i16> %v0, %v1
142 %v3 = lshr <8 x i16> %v2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
143 %v4 = trunc <8 x i16> %v3 to <8 x i8>
147 define <2 x i16> @f4(<2 x i16> %a0, <2 x i16> %a1) #0 {
151 ; CHECK-NEXT: r1:0 = vmpyh(r0,r1):sat
154 ; CHECK-NEXT: r0 = combine(r1.h,r0.h)
157 ; CHECK-NEXT: jumpr r31
159 %v0 = sext <2 x i16> %a0 to <2 x i32>
160 %v1 = sext <2 x i16> %a1 to <2 x i32>
161 %v2 = mul <2 x i32> %v0, %v1
162 %v3 = lshr <2 x i32> %v2, <i32 16, i32 16>
163 %v4 = trunc <2 x i32> %v3 to <2 x i16>
167 define <2 x i16> @f5(<2 x i16> %a0, <2 x i16> %a1) #0 {
171 ; CHECK-NEXT: r3:2 = combine(r0,r1)
174 ; CHECK-NEXT: r1:0 = vasrh(r3:2,#15)
177 ; CHECK-NEXT: r5:4 = vmpyh(r3,r2):sat
180 ; CHECK-NEXT: r0 = and(r3,r0)
183 ; CHECK-NEXT: r1 = and(r2,r1)
186 ; CHECK-NEXT: r4 = combine(r5.h,r4.h)
189 ; CHECK-NEXT: r0 = vaddh(r0,r1)
192 ; CHECK-NEXT: r0 = vaddh(r4,r0)
195 ; CHECK-NEXT: jumpr r31
197 %v0 = zext <2 x i16> %a0 to <2 x i32>
198 %v1 = zext <2 x i16> %a1 to <2 x i32>
199 %v2 = mul <2 x i32> %v0, %v1
200 %v3 = lshr <2 x i32> %v2, <i32 16, i32 16>
201 %v4 = trunc <2 x i32> %v3 to <2 x i16>
205 define <4 x i16> @f6(<4 x i16> %a0, <4 x i16> %a1) #0 {
209 ; CHECK-NEXT: r5:4 = vmpyh(r0,r2):sat
212 ; CHECK-NEXT: r7:6 = vmpyh(r1,r3):sat
215 ; CHECK-NEXT: r0 = combine(r5.h,r4.h)
218 ; CHECK-NEXT: r1 = combine(r7.h,r6.h)
221 ; CHECK-NEXT: jumpr r31
223 %v0 = sext <4 x i16> %a0 to <4 x i32>
224 %v1 = sext <4 x i16> %a1 to <4 x i32>
225 %v2 = mul <4 x i32> %v0, %v1
226 %v3 = lshr <4 x i32> %v2, <i32 16, i32 16, i32 16, i32 16>
227 %v4 = trunc <4 x i32> %v3 to <4 x i16>
231 define <4 x i16> @f7(<4 x i16> %a0, <4 x i16> %a1) #0 {
235 ; CHECK-NEXT: r7:6 = vasrh(r1:0,#15)
238 ; CHECK-NEXT: r9:8 = vasrh(r3:2,#15)
241 ; CHECK-NEXT: r5:4 = vmpyh(r0,r2):sat
244 ; CHECK-NEXT: r7:6 = and(r3:2,r7:6)
247 ; CHECK-NEXT: r3:2 = vmpyh(r1,r3):sat
250 ; CHECK-NEXT: r1:0 = and(r1:0,r9:8)
253 ; CHECK-NEXT: r4 = combine(r5.h,r4.h)
256 ; CHECK-NEXT: r5 = combine(r3.h,r2.h)
259 ; CHECK-NEXT: r1:0 = vaddh(r1:0,r7:6)
262 ; CHECK-NEXT: r1:0 = vaddh(r5:4,r1:0)
265 ; CHECK-NEXT: jumpr r31
267 %v0 = zext <4 x i16> %a0 to <4 x i32>
268 %v1 = zext <4 x i16> %a1 to <4 x i32>
269 %v2 = mul <4 x i32> %v0, %v1
270 %v3 = lshr <4 x i32> %v2, <i32 16, i32 16, i32 16, i32 16>
271 %v4 = trunc <4 x i32> %v3 to <4 x i16>
275 define <2 x i32> @f8(<2 x i32> %a0, <2 x i32> %a1) #0 {
279 ; CHECK-NEXT: r0 = mpy(r0,r2)
282 ; CHECK-NEXT: r1 = mpy(r1,r3)
285 ; CHECK-NEXT: jumpr r31
287 %v0 = sext <2 x i32> %a0 to <2 x i64>
288 %v1 = sext <2 x i32> %a1 to <2 x i64>
289 %v2 = mul <2 x i64> %v0, %v1
290 %v3 = lshr <2 x i64> %v2, <i64 32, i64 32>
291 %v4 = trunc <2 x i64> %v3 to <2 x i32>
295 define <2 x i32> @f9(<2 x i32> %a0, <2 x i32> %a1) #0 {
299 ; CHECK-NEXT: r0 = mpyu(r0,r2)
302 ; CHECK-NEXT: r1 = mpyu(r1,r3)
305 ; CHECK-NEXT: jumpr r31
307 %v0 = zext <2 x i32> %a0 to <2 x i64>
308 %v1 = zext <2 x i32> %a1 to <2 x i64>
309 %v2 = mul <2 x i64> %v0, %v1
310 %v3 = lshr <2 x i64> %v2, <i64 32, i64 32>
311 %v4 = trunc <2 x i64> %v3 to <2 x i32>
315 attributes #0 = { nounwind memory(none) "target-features"="-packets" }