1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
2 ; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
3 ; Test that we only promote arguments when the caller/callee have compatible
6 target triple = "x86_64-unknown-linux-gnu"
8 define internal fastcc void @no_promote_avx2(ptr %arg, ptr readonly %arg1) #0 {
9 ; CHECK-LABEL: define {{[^@]+}}@no_promote_avx2
10 ; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]])
12 ; CHECK-NEXT: [[TMP:%.*]] = load <4 x i64>, ptr [[ARG1]]
13 ; CHECK-NEXT: store <4 x i64> [[TMP]], ptr [[ARG]]
14 ; CHECK-NEXT: ret void
17 %tmp = load <4 x i64>, ptr %arg1
18 store <4 x i64> %tmp, ptr %arg
22 define void @no_promote(ptr %arg) #1 {
23 ; CHECK-LABEL: define {{[^@]+}}@no_promote
24 ; CHECK-SAME: (ptr [[ARG:%.*]])
26 ; CHECK-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32
27 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32
28 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
29 ; CHECK-NEXT: call fastcc void @no_promote_avx2(ptr [[TMP2]], ptr [[TMP]])
30 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32
31 ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[ARG]], align 2
32 ; CHECK-NEXT: ret void
35 %tmp = alloca <4 x i64>, align 32
36 %tmp2 = alloca <4 x i64>, align 32
37 call void @llvm.memset.p0.i64(ptr align 32 %tmp, i8 0, i64 32, i1 false)
38 call fastcc void @no_promote_avx2(ptr %tmp2, ptr %tmp)
39 %tmp4 = load <4 x i64>, ptr %tmp2, align 32
40 store <4 x i64> %tmp4, ptr %arg, align 2
44 define internal fastcc void @promote_avx2(ptr %arg, ptr readonly %arg1) #0 {
45 ; CHECK-LABEL: define {{[^@]+}}@promote_avx2
46 ; CHECK-SAME: (ptr [[ARG:%.*]], <4 x i64> [[ARG1_VAL:%.*]])
48 ; CHECK-NEXT: store <4 x i64> [[ARG1_VAL]], ptr [[ARG]]
49 ; CHECK-NEXT: ret void
52 %tmp = load <4 x i64>, ptr %arg1
53 store <4 x i64> %tmp, ptr %arg
57 define void @promote(ptr %arg) #0 {
58 ; CHECK-LABEL: define {{[^@]+}}@promote
59 ; CHECK-SAME: (ptr [[ARG:%.*]])
61 ; CHECK-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32
62 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32
63 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
64 ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <4 x i64>, ptr [[TMP]]
65 ; CHECK-NEXT: call fastcc void @promote_avx2(ptr [[TMP2]], <4 x i64> [[TMP_VAL]])
66 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32
67 ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[ARG]], align 2
68 ; CHECK-NEXT: ret void
71 %tmp = alloca <4 x i64>, align 32
72 %tmp2 = alloca <4 x i64>, align 32
73 call void @llvm.memset.p0.i64(ptr align 32 %tmp, i8 0, i64 32, i1 false)
74 call fastcc void @promote_avx2(ptr %tmp2, ptr %tmp)
75 %tmp4 = load <4 x i64>, ptr %tmp2, align 32
76 store <4 x i64> %tmp4, ptr %arg, align 2
80 ; Function Attrs: argmemonly nounwind
81 declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1) #2
83 attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" }
84 attributes #1 = { nounwind uwtable }
85 attributes #2 = { argmemonly nounwind }