1 ; RUN: opt -S -instcombine < %s | FileCheck %s
3 target triple = "aarch64"
5 define <vscale x 8 x i1> @reinterpret_test_h(<vscale x 8 x i1> %a) {
6 ; CHECK-LABEL: @reinterpret_test_h(
8 ; CHECK: ret <vscale x 8 x i1> %a
9 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
10 %2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %1)
11 ret <vscale x 8 x i1> %2
14 ; Reinterprets are not redundant because the second reinterpret zeros the
15 ; lanes that don't exist within its input.
16 define <vscale x 16 x i1> @reinterpret_test_h_rev(<vscale x 16 x i1> %a) {
17 ; CHECK-LABEL: @reinterpret_test_h_rev(
18 ; CHECK: %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %a)
19 ; CHECK-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
20 ; CHECK-NEXT: ret <vscale x 16 x i1> %2
21 %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %a)
22 %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
23 ret <vscale x 16 x i1> %2
26 define <vscale x 4 x i1> @reinterpret_test_w(<vscale x 4 x i1> %a) {
27 ; CHECK-LABEL: @reinterpret_test_w(
29 ; CHECK: ret <vscale x 4 x i1> %a
30 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a)
31 %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
32 ret <vscale x 4 x i1> %2
35 ; Reinterprets are not redundant because the second reinterpret zeros the
36 ; lanes that don't exist within its input.
37 define <vscale x 16 x i1> @reinterpret_test_w_rev(<vscale x 16 x i1> %a) {
38 ; CHECK-LABEL: @reinterpret_test_w_rev(
39 ; CHECK: %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %a)
40 ; CHECK-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
41 ; CHECK-NEXT: ret <vscale x 16 x i1> %2
42 %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %a)
43 %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
44 ret <vscale x 16 x i1> %2
47 define <vscale x 2 x i1> @reinterpret_test_d(<vscale x 2 x i1> %a) {
48 ; CHECK-LABEL: @reinterpret_test_d(
50 ; CHECK: ret <vscale x 2 x i1> %a
51 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
52 %2 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %1)
53 ret <vscale x 2 x i1> %2
56 ; Reinterprets are not redundant because the second reinterpret zeros the
57 ; lanes that don't exist within its input.
58 define <vscale x 16 x i1> @reinterpret_test_d_rev(<vscale x 16 x i1> %a) {
59 ; CHECK-LABEL: @reinterpret_test_d_rev(
60 ; CHECK: %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %a)
61 ; CHECK-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %1)
62 ; CHECK-NEXT: ret <vscale x 16 x i1> %2
63 %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %a)
64 %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %1)
65 ret <vscale x 16 x i1> %2
68 define <vscale x 2 x i1> @reinterpret_test_full_chain(<vscale x 2 x i1> %a) {
69 ; CHECK-LABEL: @reinterpret_test_full_chain(
70 ; CHECK: ret <vscale x 2 x i1> %a
71 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
72 %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
73 %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
74 %4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
75 %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %4)
76 %6 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %5)
77 ret <vscale x 2 x i1> %6
80 ; The last two reinterprets are not necessary, since they are doing the same
81 ; work as the first two.
82 define <vscale x 4 x i1> @reinterpret_test_partial_chain(<vscale x 2 x i1> %a) {
83 ; CHECK-LABEL: @reinterpret_test_partial_chain(
84 ; CHECK: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
85 ; CHECK-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
86 ; CHECK-NEXT: ret <vscale x 4 x i1> %2
87 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
88 %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
89 %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
90 %4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
91 ret <vscale x 4 x i1> %4
94 ; The chain cannot be reduced because of the second reinterpret, which causes
96 define <vscale x 8 x i1> @reinterpret_test_irreducible_chain(<vscale x 8 x i1> %a) {
97 ; CHECK-LABEL: @reinterpret_test_irreducible_chain(
98 ; CHECK: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
99 ; CHECK-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
100 ; CHECK-NEXT: %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
101 ; CHECK-NEXT: %4 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %3)
102 ; CHECK-NEXT: ret <vscale x 8 x i1> %4
103 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
104 %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
105 %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
106 %4 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %3)
107 ret <vscale x 8 x i1> %4
110 ; Here, the candidate list is larger than the number of instructions that we
112 define <vscale x 4 x i1> @reinterpret_test_keep_some_candidates(<vscale x 8 x i1> %a) {
113 ; CHECK-LABEL: @reinterpret_test_keep_some_candidates(
114 ; CHECK: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
115 ; CHECK-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
116 ; CHECK-NEXT: ret <vscale x 4 x i1> %2
117 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
118 %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
119 %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
120 %4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
121 ret <vscale x 4 x i1> %4
124 define <vscale x 2 x i1> @reinterpret_reductions(i32 %cond, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c) {
125 ; CHECK-LABEL: reinterpret_reductions
127 ; CHECK-NOT: phi <vscale x 16 x i1>
128 ; CHECK: phi <vscale x 2 x i1> [ %a, %br_phi_a ], [ %b, %br_phi_b ], [ %c, %br_phi_c ]
133 switch i32 %cond, label %br_phi_c [
134 i32 43, label %br_phi_a
135 i32 45, label %br_phi_b
139 %a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
143 %b1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %b)
147 %c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
151 %pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
152 %pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
153 ret <vscale x 2 x i1> %pg1
156 ; No transform as the reinterprets are converting from different types (nxv2i1 & nxv4i1)
157 ; As the incoming values to the phi must all be the same type, we cannot remove the reinterprets.
158 define <vscale x 2 x i1> @reinterpret_reductions_1(i32 %cond, <vscale x 2 x i1> %a, <vscale x 4 x i1> %b, <vscale x 2 x i1> %c) {
159 ; CHECK-LABEL: reinterpret_reductions_1
161 ; CHECK: phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
162 ; CHECK-NOT: phi <vscale x 2 x i1>
163 ; CHECK: tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
167 switch i32 %cond, label %br_phi_c [
168 i32 43, label %br_phi_a
169 i32 45, label %br_phi_b
173 %a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
177 %b1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %b)
181 %c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
185 %pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
186 %pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
187 ret <vscale x 2 x i1> %pg1
190 ; No transform. Similar to the the test above, but here only two of the arguments need to
191 ; be converted to svbool.
192 define <vscale x 2 x i1> @reinterpret_reductions_2(i32 %cond, <vscale x 2 x i1> %a, <vscale x 16 x i1> %b, <vscale x 2 x i1> %c) {
193 ; CHECK-LABEL: reinterpret_reductions_2
195 ; CHECK: phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b, %br_phi_b ], [ %c1, %br_phi_c ]
196 ; CHECK-NOT: phi <vscale x 2 x i1>
197 ; CHECK: tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
201 switch i32 %cond, label %br_phi_c [
202 i32 43, label %br_phi_a
203 i32 45, label %br_phi_b
207 %a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
214 %c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
218 %pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b, %br_phi_b ], [ %c1, %br_phi_c ]
219 %pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
220 ret <vscale x 2 x i1> %pg1
223 ; Similar to reinterpret_reductions but the reinterprets remain because the
224 ; original phi cannot be removed (i.e. prefer reinterprets over multiple phis).
225 define <vscale x 16 x i1> @reinterpret_reductions3(i32 %cond, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c) {
226 ; CHECK-LABEL: reinterpret_reductions3
227 ; CHECK: phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
228 ; CHECK-NOT: phi <vscale x 2 x i1>
229 ; CHECK: ret <vscale x 16 x i1> %pg
232 switch i32 %cond, label %br_phi_c [
233 i32 43, label %br_phi_a
234 i32 45, label %br_phi_b
238 %a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
242 %b1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %b)
246 %c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
250 %pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
251 %pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
252 ret <vscale x 16 x i1> %pg
255 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
256 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
257 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
258 declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
259 declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
260 declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
262 attributes #0 = { "target-features"="+sve" }