1 ; RUN: opt -S -aarch64-sve-intrinsic-opts < %s | FileCheck %s
3 target triple = "aarch64-unknown-linux-gnu"
5 define <vscale x 16 x i1> @pred_load_v2i8(ptr %addr) #0 {
6 ; CHECK-LABEL: @pred_load_v2i8(
7 ; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr
8 ; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
9 %load = load <2 x i8>, ptr %addr, align 4
10 %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %load, i64 0)
11 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
12 ret <vscale x 16 x i1> %ret
15 define <vscale x 16 x i1> @pred_load_v4i8(ptr %addr) #1 {
16 ; CHECK-LABEL: @pred_load_v4i8(
17 ; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr
18 ; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
19 %load = load <4 x i8>, ptr %addr, align 4
20 %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
21 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
22 ret <vscale x 16 x i1> %ret
25 define <vscale x 16 x i1> @pred_load_v8i8(ptr %addr) #2 {
26 ; CHECK-LABEL: @pred_load_v8i8(
27 ; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr
28 ; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
29 %load = load <8 x i8>, ptr %addr, align 4
30 %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> %load, i64 0)
31 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
32 ret <vscale x 16 x i1> %ret
35 ; Ensure the insertion point is at the load
36 define <vscale x 16 x i1> @pred_load_insertion_point(ptr %addr) #0 {
37 ; CHECK-LABEL: @pred_load_insertion_point(
39 ; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr
40 ; CHECK-NEXT: br label %bb1
42 ; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
44 %load = load <2 x i8>, ptr %addr, align 4
48 %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %load, i64 0)
49 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
50 ret <vscale x 16 x i1> %ret
53 ; Check that too small of a vscale prevents optimization
54 define <vscale x 16 x i1> @pred_load_neg1(ptr %addr) #0 {
55 ; CHECK-LABEL: @pred_load_neg1(
56 ; CHECK: call <vscale x 2 x i8> @llvm.vector.insert
57 %load = load <4 x i8>, ptr %addr, align 4
58 %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
59 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
60 ret <vscale x 16 x i1> %ret
63 ; Check that too large of a vscale prevents optimization
64 define <vscale x 16 x i1> @pred_load_neg2(ptr %addr) #2 {
65 ; CHECK-LABEL: @pred_load_neg2(
66 ; CHECK: call <vscale x 2 x i8> @llvm.vector.insert
67 %load = load <4 x i8>, ptr %addr, align 4
68 %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
69 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
70 ret <vscale x 16 x i1> %ret
73 ; Check that a non-zero index prevents optimization
74 define <vscale x 16 x i1> @pred_load_neg3(ptr %addr) #1 {
75 ; CHECK-LABEL: @pred_load_neg3(
76 ; CHECK: call <vscale x 2 x i8> @llvm.vector.insert
77 %load = load <4 x i8>, ptr %addr, align 4
78 %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 4)
79 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
80 ret <vscale x 16 x i1> %ret
83 ; Check that differing vscale min/max prevents optimization
84 define <vscale x 16 x i1> @pred_load_neg4(ptr %addr) #3 {
85 ; CHECK-LABEL: @pred_load_neg4(
86 ; CHECK: call <vscale x 2 x i8> @llvm.vector.insert
87 %load = load <4 x i8>, ptr %addr, align 4
88 %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
89 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
90 ret <vscale x 16 x i1> %ret
93 ; Check that insertion into a non-undef vector prevents optimization
94 define <vscale x 16 x i1> @pred_load_neg5(ptr %addr, <vscale x 2 x i8> %passthru) #1 {
95 ; CHECK-LABEL: @pred_load_neg5(
96 ; CHECK: call <vscale x 2 x i8> @llvm.vector.insert
97 %load = load <4 x i8>, ptr %addr, align 4
98 %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> %passthru, <4 x i8> %load, i64 0)
99 %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
100 ret <vscale x 16 x i1> %ret
103 declare <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8>, <2 x i8>, i64)
104 declare <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8>, <4 x i8>, i64)
105 declare <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8>, <8 x i8>, i64)
107 attributes #0 = { "target-features"="+sve" vscale_range(1,1) }
108 attributes #1 = { "target-features"="+sve" vscale_range(2,2) }
109 attributes #2 = { "target-features"="+sve" vscale_range(4,4) }
110 attributes #3 = { "target-features"="+sve" vscale_range(2,4) }