1 ; RUN: opt -S -aarch64-sve-intrinsic-opts < %s | FileCheck %s
3 target triple = "aarch64-unknown-linux-gnu"
5 define <vscale x 16 x i1> @pred_load_v2i8(<2 x i8>* %addr) #0 {
6 ; CHECK-LABEL: @pred_load_v2i8(
7 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i8>* %addr to <vscale x 16 x i1>*
8 ; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP1]]
9 ; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
10 %load = load <2 x i8>, <2 x i8>* %addr, align 4
11 %insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %load, i64 0)
12 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
13 ret <vscale x 16 x i1> %ret
16 define <vscale x 16 x i1> @pred_load_v4i8(<4 x i8>* %addr) #1 {
17 ; CHECK-LABEL: @pred_load_v4i8(
18 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8>* %addr to <vscale x 16 x i1>*
19 ; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP1]]
20 ; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
21 %load = load <4 x i8>, <4 x i8>* %addr, align 4
22 %insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
23 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
24 ret <vscale x 16 x i1> %ret
27 define <vscale x 16 x i1> @pred_load_v8i8(<8 x i8>* %addr) #2 {
28 ; CHECK-LABEL: @pred_load_v8i8(
29 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* %addr to <vscale x 16 x i1>*
30 ; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP1]]
31 ; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
32 %load = load <8 x i8>, <8 x i8>* %addr, align 4
33 %insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> %load, i64 0)
34 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
35 ret <vscale x 16 x i1> %ret
38 ; Ensure the insertion point is at the load
39 define <vscale x 16 x i1> @pred_load_insertion_point(<2 x i8>* %addr) #0 {
40 ; CHECK-LABEL: @pred_load_insertion_point(
42 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i8>* %addr to <vscale x 16 x i1>*
43 ; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP1]]
44 ; CHECK-NEXT: br label %bb1
46 ; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
48 %load = load <2 x i8>, <2 x i8>* %addr, align 4
52 %insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %load, i64 0)
53 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
54 ret <vscale x 16 x i1> %ret
57 ; Check that too small of a vscale prevents optimization
58 define <vscale x 16 x i1> @pred_load_neg1(<4 x i8>* %addr) #0 {
59 ; CHECK-LABEL: @pred_load_neg1(
60 ; CHECK: call <vscale x 2 x i8> @llvm.experimental.vector.insert
61 %load = load <4 x i8>, <4 x i8>* %addr, align 4
62 %insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
63 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
64 ret <vscale x 16 x i1> %ret
67 ; Check that too large of a vscale prevents optimization
68 define <vscale x 16 x i1> @pred_load_neg2(<4 x i8>* %addr) #2 {
69 ; CHECK-LABEL: @pred_load_neg2(
70 ; CHECK: call <vscale x 2 x i8> @llvm.experimental.vector.insert
71 %load = load <4 x i8>, <4 x i8>* %addr, align 4
72 %insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
73 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
74 ret <vscale x 16 x i1> %ret
77 ; Check that a non-zero index prevents optimization
78 define <vscale x 16 x i1> @pred_load_neg3(<4 x i8>* %addr) #1 {
79 ; CHECK-LABEL: @pred_load_neg3(
80 ; CHECK: call <vscale x 2 x i8> @llvm.experimental.vector.insert
81 %load = load <4 x i8>, <4 x i8>* %addr, align 4
82 %insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 4)
83 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
84 ret <vscale x 16 x i1> %ret
87 ; Check that differing vscale min/max prevents optimization
88 define <vscale x 16 x i1> @pred_load_neg4(<4 x i8>* %addr) #3 {
89 ; CHECK-LABEL: @pred_load_neg4(
90 ; CHECK: call <vscale x 2 x i8> @llvm.experimental.vector.insert
91 %load = load <4 x i8>, <4 x i8>* %addr, align 4
92 %insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
93 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
94 ret <vscale x 16 x i1> %ret
97 ; Check that insertion into a non-undef vector prevents optimization
98 define <vscale x 16 x i1> @pred_load_neg5(<4 x i8>* %addr, <vscale x 2 x i8> %passthru) #1 {
99 ; CHECK-LABEL: @pred_load_neg5(
100 ; CHECK: call <vscale x 2 x i8> @llvm.experimental.vector.insert
101 %load = load <4 x i8>, <4 x i8>* %addr, align 4
102 %insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> %passthru, <4 x i8> %load, i64 0)
103 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
104 ret <vscale x 16 x i1> %ret
107 declare <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8>, <2 x i8>, i64)
108 declare <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8>, <4 x i8>, i64)
109 declare <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8>, <8 x i8>, i64)
111 attributes #0 = { "target-features"="+sve" vscale_range(1,1) }
112 attributes #1 = { "target-features"="+sve" vscale_range(2,2) }
113 attributes #2 = { "target-features"="+sve" vscale_range(4,4) }
114 attributes #3 = { "target-features"="+sve" vscale_range(2,4) }