1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
2 ; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
3 ; Test that we only promote arguments when the caller/callee have compatible
6 target triple = "x86_64-unknown-linux-gnu"
9 define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg, ptr readonly %arg1) #0 {
10 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
11 ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
13 ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
14 ; CHECK-NEXT: ret void
17 %tmp = load <8 x i64>, ptr %arg1
18 store <8 x i64> %tmp, ptr %arg
22 define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg) #0 {
23 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512
24 ; CHECK-SAME: (ptr [[ARG:%.*]])
26 ; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
27 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
28 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
29 ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
30 ; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
31 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
32 ; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
33 ; CHECK-NEXT: ret void
36 %tmp = alloca <8 x i64>, align 32
37 %tmp2 = alloca <8 x i64>, align 32
38 call void @llvm.memset.p0.i64(ptr align 32 %tmp, i8 0, i64 32, i1 false)
39 call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %tmp2, ptr %tmp)
40 %tmp4 = load <8 x i64>, ptr %tmp2, align 32
41 store <8 x i64> %tmp4, ptr %arg, align 2
46 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
47 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
48 ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
50 ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
51 ; CHECK-NEXT: ret void
54 %tmp = load <8 x i64>, ptr %arg1
55 store <8 x i64> %tmp, ptr %arg
59 define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg) #1 {
60 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256
61 ; CHECK-SAME: (ptr [[ARG:%.*]])
63 ; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
64 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
65 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
66 ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
67 ; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
68 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
69 ; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
70 ; CHECK-NEXT: ret void
73 %tmp = alloca <8 x i64>, align 32
74 %tmp2 = alloca <8 x i64>, align 32
75 call void @llvm.memset.p0.i64(ptr align 32 %tmp, i8 0, i64 32, i1 false)
76 call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %tmp2, ptr %tmp)
77 %tmp4 = load <8 x i64>, ptr %tmp2, align 32
78 store <8 x i64> %tmp4, ptr %arg, align 2
83 define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
84 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
85 ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
87 ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
88 ; CHECK-NEXT: ret void
91 %tmp = load <8 x i64>, ptr %arg1
92 store <8 x i64> %tmp, ptr %arg
96 define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg) #0 {
97 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256
98 ; CHECK-SAME: (ptr [[ARG:%.*]])
100 ; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
101 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
102 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
103 ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
104 ; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
105 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
106 ; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
107 ; CHECK-NEXT: ret void
110 %tmp = alloca <8 x i64>, align 32
111 %tmp2 = alloca <8 x i64>, align 32
112 call void @llvm.memset.p0.i64(ptr align 32 %tmp, i8 0, i64 32, i1 false)
113 call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %tmp2, ptr %tmp)
114 %tmp4 = load <8 x i64>, ptr %tmp2, align 32
115 store <8 x i64> %tmp4, ptr %arg, align 2
119 ; This should promote
120 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg, ptr readonly %arg1) #0 {
121 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
122 ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
124 ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
125 ; CHECK-NEXT: ret void
128 %tmp = load <8 x i64>, ptr %arg1
129 store <8 x i64> %tmp, ptr %arg
133 define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg) #1 {
134 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512
135 ; CHECK-SAME: (ptr [[ARG:%.*]])
137 ; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
138 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
139 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
140 ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
141 ; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
142 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
143 ; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
144 ; CHECK-NEXT: ret void
147 %tmp = alloca <8 x i64>, align 32
148 %tmp2 = alloca <8 x i64>, align 32
149 call void @llvm.memset.p0.i64(ptr align 32 %tmp, i8 0, i64 32, i1 false)
150 call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %tmp2, ptr %tmp)
151 %tmp4 = load <8 x i64>, ptr %tmp2, align 32
152 store <8 x i64> %tmp4, ptr %arg, align 2
156 ; This should not promote
157 define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
158 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
159 ; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]])
161 ; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]]
162 ; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]]
163 ; CHECK-NEXT: ret void
166 %tmp = load <8 x i64>, ptr %arg1
167 store <8 x i64> %tmp, ptr %arg
171 define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg) #2 {
172 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256
173 ; CHECK-SAME: (ptr [[ARG:%.*]])
175 ; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
176 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
177 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
178 ; CHECK-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr [[TMP2]], ptr [[TMP]])
179 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
180 ; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
181 ; CHECK-NEXT: ret void
184 %tmp = alloca <8 x i64>, align 32
185 %tmp2 = alloca <8 x i64>, align 32
186 call void @llvm.memset.p0.i64(ptr align 32 %tmp, i8 0, i64 32, i1 false)
187 call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %tmp2, ptr %tmp)
188 %tmp4 = load <8 x i64>, ptr %tmp2, align 32
189 store <8 x i64> %tmp4, ptr %arg, align 2
193 ; This should not promote
194 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg, ptr readonly %arg1) #2 {
195 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
196 ; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]])
198 ; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]]
199 ; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]]
200 ; CHECK-NEXT: ret void
203 %tmp = load <8 x i64>, ptr %arg1
204 store <8 x i64> %tmp, ptr %arg
208 define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg) #1 {
209 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256
210 ; CHECK-SAME: (ptr [[ARG:%.*]])
212 ; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
213 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
214 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
215 ; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr [[TMP2]], ptr [[TMP]])
216 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
217 ; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
218 ; CHECK-NEXT: ret void
221 %tmp = alloca <8 x i64>, align 32
222 %tmp2 = alloca <8 x i64>, align 32
223 call void @llvm.memset.p0.i64(ptr align 32 %tmp, i8 0, i64 32, i1 false)
224 call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %tmp2, ptr %tmp)
225 %tmp4 = load <8 x i64>, ptr %tmp2, align 32
226 store <8 x i64> %tmp4, ptr %arg, align 2
230 ; This should promote
231 define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg, ptr readonly %arg1) #3 {
232 ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
233 ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
235 ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
236 ; CHECK-NEXT: ret void
239 %tmp = load <8 x i64>, ptr %arg1
240 store <8 x i64> %tmp, ptr %arg
244 define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg) #4 {
245 ; CHECK-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256
246 ; CHECK-SAME: (ptr [[ARG:%.*]])
248 ; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
249 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
250 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
251 ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
252 ; CHECK-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
253 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
254 ; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
255 ; CHECK-NEXT: ret void
258 %tmp = alloca <8 x i64>, align 32
259 %tmp2 = alloca <8 x i64>, align 32
260 call void @llvm.memset.p0.i64(ptr align 32 %tmp, i8 0, i64 32, i1 false)
261 call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %tmp2, ptr %tmp)
262 %tmp4 = load <8 x i64>, ptr %tmp2, align 32
263 store <8 x i64> %tmp4, ptr %arg, align 2
267 ; This should promote
268 define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg, ptr readonly %arg1) #4 {
269 ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
270 ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
272 ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
273 ; CHECK-NEXT: ret void
276 %tmp = load <8 x i64>, ptr %arg1
277 store <8 x i64> %tmp, ptr %arg
281 define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg) #3 {
282 ; CHECK-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256
283 ; CHECK-SAME: (ptr [[ARG:%.*]])
285 ; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
286 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
287 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
288 ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
289 ; CHECK-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
290 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
291 ; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
292 ; CHECK-NEXT: ret void
295 %tmp = alloca <8 x i64>, align 32
296 %tmp2 = alloca <8 x i64>, align 32
297 call void @llvm.memset.p0.i64(ptr align 32 %tmp, i8 0, i64 32, i1 false)
298 call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %tmp2, ptr %tmp)
299 %tmp4 = load <8 x i64>, ptr %tmp2, align 32
300 store <8 x i64> %tmp4, ptr %arg, align 2
304 ; If the arguments are scalar, its ok to promote.
305 define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %X, ptr %Y) #2 {
306 ; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
307 ; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
308 ; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
309 ; CHECK-NEXT: ret i32 [[C]]
311 %A = load i32, ptr %X
312 %B = load i32, ptr %Y
317 define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %B) #2 {
318 ; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256
319 ; CHECK-SAME: (ptr [[B:%.*]])
320 ; CHECK-NEXT: [[A:%.*]] = alloca i32
321 ; CHECK-NEXT: store i32 1, ptr [[A]]
322 ; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]]
323 ; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]]
324 ; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
325 ; CHECK-NEXT: ret i32 [[C]]
329 %C = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %A, ptr %B)
333 ; If the arguments are scalar, its ok to promote.
334 define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %X, ptr %Y) #2 {
335 ; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
336 ; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
337 ; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
338 ; CHECK-NEXT: ret i32 [[C]]
340 %A = load i32, ptr %X
341 %B = load i32, ptr %Y
346 define i32 @scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %B) #2 {
347 ; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256
348 ; CHECK-SAME: (ptr [[B:%.*]])
349 ; CHECK-NEXT: [[A:%.*]] = alloca i32
350 ; CHECK-NEXT: store i32 1, ptr [[A]]
351 ; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]]
352 ; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]]
353 ; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
354 ; CHECK-NEXT: ret i32 [[C]]
358 %C = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %A, ptr %B)
362 ; Function Attrs: argmemonly nounwind
363 declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1) #5
365 attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="512" }
366 attributes #1 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="256" }
367 attributes #2 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="256" "prefer-vector-width"="256" }
368 attributes #3 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="512" "prefer-vector-width"="256" }
369 attributes #4 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="256" "prefer-vector-width"="256" }
370 attributes #5 = { argmemonly nounwind }