1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
2 ; RUN: opt -S -argpromotion < %s | FileCheck %s
3 ; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
4 ; Test that we only promote arguments when the caller/callee have compatible
7 target triple = "x86_64-unknown-linux-gnu"
9 define internal fastcc void @no_promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 {
10 ; CHECK-LABEL: define {{[^@]+}}@no_promote_avx2
11 ; CHECK-SAME: (<4 x i64>* [[ARG:%.*]], <4 x i64>* readonly [[ARG1:%.*]])
13 ; CHECK-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1]]
14 ; CHECK-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]]
15 ; CHECK-NEXT: ret void
18 %tmp = load <4 x i64>, <4 x i64>* %arg1
19 store <4 x i64> %tmp, <4 x i64>* %arg
23 define void @no_promote(<4 x i64>* %arg) #1 {
24 ; CHECK-LABEL: define {{[^@]+}}@no_promote
25 ; CHECK-SAME: (<4 x i64>* [[ARG:%.*]])
27 ; CHECK-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32
28 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32
29 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8*
30 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
31 ; CHECK-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* [[TMP2]], <4 x i64>* [[TMP]])
32 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32
33 ; CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2
34 ; CHECK-NEXT: ret void
37 %tmp = alloca <4 x i64>, align 32
38 %tmp2 = alloca <4 x i64>, align 32
39 %tmp3 = bitcast <4 x i64>* %tmp to i8*
40 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
41 call fastcc void @no_promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp)
42 %tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32
43 store <4 x i64> %tmp4, <4 x i64>* %arg, align 2
47 define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 {
48 ; CHECK-LABEL: define {{[^@]+}}@promote_avx2
49 ; CHECK-SAME: (<4 x i64>* [[ARG:%.*]], <4 x i64> [[ARG1_VAL:%.*]])
51 ; CHECK-NEXT: store <4 x i64> [[ARG1_VAL]], <4 x i64>* [[ARG]]
52 ; CHECK-NEXT: ret void
55 %tmp = load <4 x i64>, <4 x i64>* %arg1
56 store <4 x i64> %tmp, <4 x i64>* %arg
60 define void @promote(<4 x i64>* %arg) #0 {
61 ; CHECK-LABEL: define {{[^@]+}}@promote
62 ; CHECK-SAME: (<4 x i64>* [[ARG:%.*]])
64 ; CHECK-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32
65 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32
66 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8*
67 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
68 ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]]
69 ; CHECK-NEXT: call fastcc void @promote_avx2(<4 x i64>* [[TMP2]], <4 x i64> [[TMP_VAL]])
70 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32
71 ; CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2
72 ; CHECK-NEXT: ret void
75 %tmp = alloca <4 x i64>, align 32
76 %tmp2 = alloca <4 x i64>, align 32
77 %tmp3 = bitcast <4 x i64>* %tmp to i8*
78 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
79 call fastcc void @promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp)
80 %tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32
81 store <4 x i64> %tmp4, <4 x i64>* %arg, align 2
85 ; Function Attrs: argmemonly nounwind
86 declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #2
88 attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" }
89 attributes #1 = { nounwind uwtable }
90 attributes #2 = { argmemonly nounwind }