1 ; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
3 ;;; Test vector merge intrinsic instructions
6 ;;; We test VMRG*vvml, VMRG*vvml_v, VMRG*rvml, VMRG*rvml_v, VMRG*ivml, and
7 ;;; VMRG*ivml_v instructions.
9 ; Function Attrs: nounwind readnone
10 define fastcc <256 x double> @vmrg_vvvml(<256 x double> %0, <256 x double> %1, <256 x i1> %2) {
11 ; CHECK-LABEL: vmrg_vvvml:
13 ; CHECK-NEXT: lea %s0, 256
15 ; CHECK-NEXT: vmrg %v0, %v0, %v1, %vm1
16 ; CHECK-NEXT: b.l.t (, %s10)
17 %4 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vvvml(<256 x double> %0, <256 x double> %1, <256 x i1> %2, i32 256)
21 ; Function Attrs: nounwind readnone
22 declare <256 x double> @llvm.ve.vl.vmrg.vvvml(<256 x double>, <256 x double>, <256 x i1>, i32)
24 ; Function Attrs: nounwind readnone
25 define fastcc <256 x double> @vmrg_vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
26 ; CHECK-LABEL: vmrg_vvvmvl:
28 ; CHECK-NEXT: lea %s0, 128
30 ; CHECK-NEXT: vmrg %v2, %v0, %v1, %vm1
31 ; CHECK-NEXT: lea %s16, 256
32 ; CHECK-NEXT: lvl %s16
33 ; CHECK-NEXT: vor %v0, (0)1, %v2
34 ; CHECK-NEXT: b.l.t (, %s10)
35 %5 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
39 ; Function Attrs: nounwind readnone
40 declare <256 x double> @llvm.ve.vl.vmrg.vvvmvl(<256 x double>, <256 x double>, <256 x i1>, <256 x double>, i32)
42 ; Function Attrs: nounwind readnone
43 define fastcc <256 x double> @vmrg_vsvml(i64 %0, <256 x double> %1, <256 x i1> %2) {
44 ; CHECK-LABEL: vmrg_vsvml:
46 ; CHECK-NEXT: lea %s1, 256
48 ; CHECK-NEXT: vmrg %v0, %s0, %v0, %vm1
49 ; CHECK-NEXT: b.l.t (, %s10)
50 %4 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vsvml(i64 %0, <256 x double> %1, <256 x i1> %2, i32 256)
54 ; Function Attrs: nounwind readnone
55 declare <256 x double> @llvm.ve.vl.vmrg.vsvml(i64, <256 x double>, <256 x i1>, i32)
57 ; Function Attrs: nounwind readnone
58 define fastcc <256 x double> @vmrg_vsvmvl(i64 %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
59 ; CHECK-LABEL: vmrg_vsvmvl:
61 ; CHECK-NEXT: lea %s1, 128
63 ; CHECK-NEXT: vmrg %v1, %s0, %v0, %vm1
64 ; CHECK-NEXT: lea %s16, 256
65 ; CHECK-NEXT: lvl %s16
66 ; CHECK-NEXT: vor %v0, (0)1, %v1
67 ; CHECK-NEXT: b.l.t (, %s10)
68 %5 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vsvmvl(i64 %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
72 ; Function Attrs: nounwind readnone
73 declare <256 x double> @llvm.ve.vl.vmrg.vsvmvl(i64, <256 x double>, <256 x i1>, <256 x double>, i32)
75 ; Function Attrs: nounwind readnone
76 define fastcc <256 x double> @vmrg_vsvml_imm(<256 x double> %0, <256 x i1> %1) {
77 ; CHECK-LABEL: vmrg_vsvml_imm:
79 ; CHECK-NEXT: lea %s0, 256
81 ; CHECK-NEXT: vmrg %v0, 8, %v0, %vm1
82 ; CHECK-NEXT: b.l.t (, %s10)
83 %3 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vsvml(i64 8, <256 x double> %0, <256 x i1> %1, i32 256)
87 ; Function Attrs: nounwind readnone
88 define fastcc <256 x double> @vmrg_vsvmvl_imm(<256 x double> %0, <256 x i1> %1, <256 x double> %2) {
89 ; CHECK-LABEL: vmrg_vsvmvl_imm:
91 ; CHECK-NEXT: lea %s0, 128
93 ; CHECK-NEXT: vmrg %v1, 8, %v0, %vm1
94 ; CHECK-NEXT: lea %s16, 256
95 ; CHECK-NEXT: lvl %s16
96 ; CHECK-NEXT: vor %v0, (0)1, %v1
97 ; CHECK-NEXT: b.l.t (, %s10)
98 %4 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vsvmvl(i64 8, <256 x double> %0, <256 x i1> %1, <256 x double> %2, i32 128)
102 ; Function Attrs: nounwind readnone
103 define fastcc <256 x double> @vmrgw_vvvMl(<256 x double> %0, <256 x double> %1, <512 x i1> %2) {
104 ; CHECK-LABEL: vmrgw_vvvMl:
106 ; CHECK-NEXT: lea %s0, 256
107 ; CHECK-NEXT: lvl %s0
108 ; CHECK-NEXT: vmrg.w %v0, %v0, %v1, %vm2
109 ; CHECK-NEXT: b.l.t (, %s10)
110 %4 = tail call fast <256 x double> @llvm.ve.vl.vmrgw.vvvMl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, i32 256)
111 ret <256 x double> %4
114 ; Function Attrs: nounwind readnone
115 declare <256 x double> @llvm.ve.vl.vmrgw.vvvMl(<256 x double>, <256 x double>, <512 x i1>, i32)
117 ; Function Attrs: nounwind readnone
118 define fastcc <256 x double> @vmrgw_vvvMvl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3) {
119 ; CHECK-LABEL: vmrgw_vvvMvl:
121 ; CHECK-NEXT: lea %s0, 128
122 ; CHECK-NEXT: lvl %s0
123 ; CHECK-NEXT: vmrg.w %v2, %v0, %v1, %vm2
124 ; CHECK-NEXT: lea %s16, 256
125 ; CHECK-NEXT: lvl %s16
126 ; CHECK-NEXT: vor %v0, (0)1, %v2
127 ; CHECK-NEXT: b.l.t (, %s10)
128 %5 = tail call fast <256 x double> @llvm.ve.vl.vmrgw.vvvMvl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3, i32 128)
129 ret <256 x double> %5
132 ; Function Attrs: nounwind readnone
133 declare <256 x double> @llvm.ve.vl.vmrgw.vvvMvl(<256 x double>, <256 x double>, <512 x i1>, <256 x double>, i32)
135 ; Function Attrs: nounwind readnone
136 define fastcc <256 x double> @vmrgw_vsvMl(i32 signext %0, <256 x double> %1, <512 x i1> %2) {
137 ; CHECK-LABEL: vmrgw_vsvMl:
139 ; CHECK-NEXT: and %s0, %s0, (32)0
140 ; CHECK-NEXT: lea %s1, 256
141 ; CHECK-NEXT: lvl %s1
142 ; CHECK-NEXT: vmrg.w %v0, %s0, %v0, %vm2
143 ; CHECK-NEXT: b.l.t (, %s10)
144 %4 = tail call fast <256 x double> @llvm.ve.vl.vmrgw.vsvMl(i32 %0, <256 x double> %1, <512 x i1> %2, i32 256)
145 ret <256 x double> %4
148 ; Function Attrs: nounwind readnone
149 declare <256 x double> @llvm.ve.vl.vmrgw.vsvMl(i32, <256 x double>, <512 x i1>, i32)
151 ; Function Attrs: nounwind readnone
152 define fastcc <256 x double> @vmrgw_vsvMvl(i32 signext %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3) {
153 ; CHECK-LABEL: vmrgw_vsvMvl:
155 ; CHECK-NEXT: and %s0, %s0, (32)0
156 ; CHECK-NEXT: lea %s1, 128
157 ; CHECK-NEXT: lvl %s1
158 ; CHECK-NEXT: vmrg.w %v1, %s0, %v0, %vm2
159 ; CHECK-NEXT: lea %s16, 256
160 ; CHECK-NEXT: lvl %s16
161 ; CHECK-NEXT: vor %v0, (0)1, %v1
162 ; CHECK-NEXT: b.l.t (, %s10)
163 %5 = tail call fast <256 x double> @llvm.ve.vl.vmrgw.vsvMvl(i32 %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3, i32 128)
164 ret <256 x double> %5
167 ; Function Attrs: nounwind readnone
168 declare <256 x double> @llvm.ve.vl.vmrgw.vsvMvl(i32, <256 x double>, <512 x i1>, <256 x double>, i32)