1 ; RUN: opt -S -argpromotion < %s | FileCheck %s
2 ; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
3 ; Test that we only promote arguments when the caller/callee have compatible
6 target triple = "x86_64-unknown-linux-gnu"
9 ; CHECK-LABEL: @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64> %arg1.val)
10 define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 {
12 %tmp = load <8 x i64>, <8 x i64>* %arg1
13 store <8 x i64> %tmp, <8 x i64>* %arg
17 define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg) #0 {
19 %tmp = alloca <8 x i64>, align 32
20 %tmp2 = alloca <8 x i64>, align 32
21 %tmp3 = bitcast <8 x i64>* %tmp to i8*
22 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
23 call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp)
24 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
25 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
30 ; CHECK-LABEL: @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64> %arg1.val)
31 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
33 %tmp = load <8 x i64>, <8 x i64>* %arg1
34 store <8 x i64> %tmp, <8 x i64>* %arg
38 define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #1 {
40 %tmp = alloca <8 x i64>, align 32
41 %tmp2 = alloca <8 x i64>, align 32
42 %tmp3 = bitcast <8 x i64>* %tmp to i8*
43 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
44 call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
45 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
46 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
51 ; CHECK-LABEL: @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64> %arg1.val)
52 define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
54 %tmp = load <8 x i64>, <8 x i64>* %arg1
55 store <8 x i64> %tmp, <8 x i64>* %arg
59 define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg) #0 {
61 %tmp = alloca <8 x i64>, align 32
62 %tmp2 = alloca <8 x i64>, align 32
63 %tmp3 = bitcast <8 x i64>* %tmp to i8*
64 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
65 call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
66 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
67 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
72 ; CHECK-LABEL: @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64> %arg1.val)
73 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 {
75 %tmp = load <8 x i64>, <8 x i64>* %arg1
76 store <8 x i64> %tmp, <8 x i64>* %arg
80 define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg) #1 {
82 %tmp = alloca <8 x i64>, align 32
83 %tmp2 = alloca <8 x i64>, align 32
84 %tmp3 = bitcast <8 x i64>* %tmp to i8*
85 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
86 call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp)
87 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
88 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
92 ; This should not promote
93 ; CHECK-LABEL: @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1)
94 define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
96 %tmp = load <8 x i64>, <8 x i64>* %arg1
97 store <8 x i64> %tmp, <8 x i64>* %arg
101 define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #2 {
103 %tmp = alloca <8 x i64>, align 32
104 %tmp2 = alloca <8 x i64>, align 32
105 %tmp3 = bitcast <8 x i64>* %tmp to i8*
106 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
107 call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
108 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
109 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
113 ; This should not promote
114 ; CHECK-LABEL: @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1)
115 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #2 {
117 %tmp = load <8 x i64>, <8 x i64>* %arg1
118 store <8 x i64> %tmp, <8 x i64>* %arg
122 define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg) #1 {
124 %tmp = alloca <8 x i64>, align 32
125 %tmp2 = alloca <8 x i64>, align 32
126 %tmp3 = bitcast <8 x i64>* %tmp to i8*
127 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
128 call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
129 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
130 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
134 ; This should promote
135 ; CHECK-LABEL: @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64> %arg1.val)
136 define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #3 {
138 %tmp = load <8 x i64>, <8 x i64>* %arg1
139 store <8 x i64> %tmp, <8 x i64>* %arg
143 define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg) #4 {
145 %tmp = alloca <8 x i64>, align 32
146 %tmp2 = alloca <8 x i64>, align 32
147 %tmp3 = bitcast <8 x i64>* %tmp to i8*
148 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
149 call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
150 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
151 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
155 ; This should promote
156 ; CHECK-LABEL: @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64> %arg1.val)
157 define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #4 {
159 %tmp = load <8 x i64>, <8 x i64>* %arg1
160 store <8 x i64> %tmp, <8 x i64>* %arg
164 define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg) #3 {
166 %tmp = alloca <8 x i64>, align 32
167 %tmp2 = alloca <8 x i64>, align 32
168 %tmp3 = bitcast <8 x i64>* %tmp to i8*
169 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
170 call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
171 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
172 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
176 ; Function Attrs: argmemonly nounwind
177 declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #5
179 attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="512" }
180 attributes #1 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="256" }
181 attributes #2 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="256" "prefer-vector-width"="256" }
182 attributes #3 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="512" "prefer-vector-width"="256" }
183 attributes #4 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="256" "prefer-vector-width"="256" }
184 attributes #5 = { argmemonly nounwind }