1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=hexagon < %s | FileCheck %s
4 define void @f0(ptr %a0, ptr %a1, ptr %a2) #0 {
6 ; CHECK: // %bb.0: // %b0
8 ; CHECK-NEXT: v0 = vmem(r0+#0)
11 ; CHECK-NEXT: v1 = vmem(r1+#0)
14 ; CHECK-NEXT: v2.w = vmpye(v0.w,v1.uh)
17 ; CHECK-NEXT: v2.w += vmpyo(v0.w,v1.h):<<1:sat:shift
20 ; CHECK-NEXT: vmem(r2+#0) = v2
23 ; CHECK-NEXT: jumpr r31
26 %v0 = load <32 x i32>, ptr %a0, align 128
27 %v1 = load <32 x i32>, ptr %a1, align 128
28 %v2 = sext <32 x i32> %v0 to <32 x i64>
29 %v3 = sext <32 x i32> %v1 to <32 x i64>
30 %v4 = mul nsw <32 x i64> %v2, %v3
31 %v5 = lshr <32 x i64> %v4, <i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31>
32 %v6 = trunc <32 x i64> %v5 to <32 x i32>
33 store <32 x i32> %v6, ptr %a2, align 128
37 define void @f1(ptr %a0, ptr %a1, ptr %a2) #0 {
39 ; CHECK: // %bb.0: // %b0
41 ; CHECK-NEXT: v0 = vmem(r0+#0)
44 ; CHECK-NEXT: v1 = vmem(r1+#0)
47 ; CHECK-NEXT: v2.w = vmpye(v0.w,v1.uh)
50 ; CHECK-NEXT: v2.w += vmpyo(v0.w,v1.h):<<1:rnd:sat:shift
53 ; CHECK-NEXT: vmem(r2+#0) = v2
56 ; CHECK-NEXT: jumpr r31
59 %v0 = load <32 x i32>, ptr %a0, align 128
60 %v1 = load <32 x i32>, ptr %a1, align 128
61 %v2 = sext <32 x i32> %v0 to <32 x i64>
62 %v3 = sext <32 x i32> %v1 to <32 x i64>
63 %v4 = mul nsw <32 x i64> %v2, %v3
64 %v5 = add nsw <32 x i64> %v4, <i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824, i64 1073741824>
65 %v6 = lshr <32 x i64> %v5, <i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31>
66 %v7 = trunc <32 x i64> %v6 to <32 x i32>
67 store <32 x i32> %v7, ptr %a2, align 128
71 define void @f2(ptr %a0, ptr %a1, ptr %a2) #0 {
73 ; CHECK: // %bb.0: // %b0
75 ; CHECK-NEXT: v0 = vmem(r1+#0)
78 ; CHECK-NEXT: r7 = #124
81 ; CHECK-NEXT: r3 = #15
84 ; CHECK-NEXT: v1 = vmem(r0+#0)
87 ; CHECK-NEXT: v1:0.w = vmpy(v0.h,v1.h)
90 ; CHECK-NEXT: v1:0 = vshuff(v1,v0,r7)
93 ; CHECK-NEXT: v0.uw = vlsr(v0.uw,r3)
96 ; CHECK-NEXT: v1.uw = vlsr(v1.uw,r3)
99 ; CHECK-NEXT: v0.h = vpacke(v1.w,v0.w)
102 ; CHECK-NEXT: vmem(r2+#0) = v0
105 ; CHECK-NEXT: jumpr r31
108 %v0 = load <64 x i16>, ptr %a0, align 128
109 %v1 = load <64 x i16>, ptr %a1, align 128
110 %v2 = sext <64 x i16> %v0 to <64 x i32>
111 %v3 = sext <64 x i16> %v1 to <64 x i32>
112 %v4 = mul nsw <64 x i32> %v2, %v3
113 %v5 = lshr <64 x i32> %v4, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
114 %v6 = trunc <64 x i32> %v5 to <64 x i16>
115 store <64 x i16> %v6, ptr %a2, align 128
119 define void @f3(ptr %a0, ptr %a1, ptr %a2) #0 {
121 ; CHECK: // %bb.0: // %b0
123 ; CHECK-NEXT: v0 = vmem(r0+#0)
126 ; CHECK-NEXT: v1 = vmem(r1+#0)
129 ; CHECK-NEXT: v0.h = vmpy(v0.h,v1.h):<<1:rnd:sat
132 ; CHECK-NEXT: vmem(r2+#0) = v0
135 ; CHECK-NEXT: jumpr r31
138 %v0 = load <64 x i16>, ptr %a0, align 128
139 %v1 = load <64 x i16>, ptr %a1, align 128
140 %v2 = sext <64 x i16> %v0 to <64 x i32>
141 %v3 = sext <64 x i16> %v1 to <64 x i32>
142 %v4 = mul nsw <64 x i32> %v2, %v3
143 %v5 = add nsw <64 x i32> %v4, <i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384, i32 16384>
144 %v6 = lshr <64 x i32> %v5, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
145 %v7 = trunc <64 x i32> %v6 to <64 x i16>
146 store <64 x i16> %v7, ptr %a2, align 128
150 attributes #0 = { nounwind "target-features"="+v68,+hvxv68,+hvx-length128b,-packets" }