1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -memcpyopt -S %s | FileCheck %s
4 ; memset -> memcpy forwarding, if memcpy is larger than memset, but trailing
5 ; bytes are known to be undef.
8 %T = type { i64, i32, i32 }
10 define void @test_alloca(i8* %result) {
11 ; CHECK-LABEL: @test_alloca(
12 ; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8
13 ; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8*
14 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false)
15 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false)
16 ; CHECK-NEXT: ret void
18 %a = alloca %T, align 8
19 %b = bitcast %T* %a to i8*
20 call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 false)
21 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
25 define void @test_alloca_with_lifetimes(i8* %result) {
26 ; CHECK-LABEL: @test_alloca_with_lifetimes(
27 ; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8
28 ; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8*
29 ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[B]])
30 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false)
31 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false)
32 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[B]])
33 ; CHECK-NEXT: ret void
35 %a = alloca %T, align 8
36 %b = bitcast %T* %a to i8*
37 call void @llvm.lifetime.start.p0i8(i64 16, i8* %b)
38 call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 false)
39 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
40 call void @llvm.lifetime.end.p0i8(i64 16, i8* %b)
44 define void @test_malloc_with_lifetimes(i8* %result) {
45 ; CHECK-LABEL: @test_malloc_with_lifetimes(
46 ; CHECK-NEXT: [[A:%.*]] = call i8* @malloc(i64 16)
47 ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[A]])
48 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[A]], i8 0, i64 12, i1 false)
49 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false)
50 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[A]])
51 ; CHECK-NEXT: call void @free(i8* [[A]])
52 ; CHECK-NEXT: ret void
54 %a = call i8* @malloc(i64 16)
55 call void @llvm.lifetime.start.p0i8(i64 16, i8* %a)
56 call void @llvm.memset.p0i8.i64(i8* align 8 %a, i8 0, i64 12, i1 false)
57 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %a, i64 16, i1 false)
58 call void @llvm.lifetime.end.p0i8(i64 16, i8* %a)
59 call void @free(i8* %a)
63 ; memcpy size is larger than lifetime, don't optimize.
64 define void @test_copy_larger_than_lifetime_size(i8* %result) {
65 ; CHECK-LABEL: @test_copy_larger_than_lifetime_size(
66 ; CHECK-NEXT: [[A:%.*]] = call i8* @malloc(i64 16)
67 ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 12, i8* [[A]])
68 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[A]], i8 0, i64 12, i1 false)
69 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[A]], i64 16, i1 false)
70 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 12, i8* [[A]])
71 ; CHECK-NEXT: call void @free(i8* [[A]])
72 ; CHECK-NEXT: ret void
74 %a = call i8* @malloc(i64 16)
75 call void @llvm.lifetime.start.p0i8(i64 12, i8* %a)
76 call void @llvm.memset.p0i8.i64(i8* align 8 %a, i8 0, i64 12, i1 false)
77 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %a, i64 16, i1 false)
78 call void @llvm.lifetime.end.p0i8(i64 12, i8* %a)
79 call void @free(i8* %a)
83 ; The trailing bytes are not known to be undef, we can't ignore them.
84 define void @test_not_undef_memory(i8* %result, i8* %input) {
85 ; CHECK-LABEL: @test_not_undef_memory(
86 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[INPUT:%.*]], i8 0, i64 12, i1 false)
87 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[INPUT]], i64 16, i1 false)
88 ; CHECK-NEXT: ret void
90 call void @llvm.memset.p0i8.i64(i8* align 8 %input, i8 0, i64 12, i1 false)
91 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %input, i64 16, i1 false)
95 ; Memset is volatile, memcpy is not. Can be optimized.
96 define void @test_volatile_memset(i8* %result) {
97 ; CHECK-LABEL: @test_volatile_memset(
98 ; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8
99 ; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8*
100 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 true)
101 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false)
102 ; CHECK-NEXT: ret void
104 %a = alloca %T, align 8
105 %b = bitcast %T* %a to i8*
106 call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 true)
107 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
111 ; Memcpy is volatile, memset is not. Cannot be optimized.
112 define void @test_volatile_memcpy(i8* %result) {
113 ; CHECK-LABEL: @test_volatile_memcpy(
114 ; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8
115 ; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8*
116 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false)
117 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 true)
118 ; CHECK-NEXT: ret void
120 %a = alloca %T, align 8
121 %b = bitcast %T* %a to i8*
122 call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 false)
123 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 true)
127 ; Write between memset and memcpy, can't optimize.
128 define void @test_write_between(i8* %result) {
129 ; CHECK-LABEL: @test_write_between(
130 ; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8
131 ; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8*
132 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false)
133 ; CHECK-NEXT: store i8 -1, i8* [[B]]
134 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
135 ; CHECK-NEXT: ret void
137 %a = alloca %T, align 8
138 %b = bitcast %T* %a to i8*
139 call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 false)
141 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
145 ; A write prior to the memset, which is part of the memset region.
146 ; We could optimize this, but currently don't, because the used memory location is imprecise.
147 define void @test_write_before_memset_in_memset_region(i8* %result) {
148 ; CHECK-LABEL: @test_write_before_memset_in_memset_region(
149 ; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8
150 ; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8*
151 ; CHECK-NEXT: store i8 -1, i8* [[B]]
152 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 8, i1 false)
153 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
154 ; CHECK-NEXT: ret void
156 %a = alloca %T, align 8
157 %b = bitcast %T* %a to i8*
159 call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 8, i1 false)
160 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
164 ; A write prior to the memset, which is part of the memcpy (but not memset) region.
165 ; This cannot be optimized.
166 define void @test_write_before_memset_in_memcpy_region(i8* %result) {
167 ; CHECK-LABEL: @test_write_before_memset_in_memcpy_region(
168 ; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8
169 ; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8*
170 ; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[T]], %T* [[A]], i64 0, i32 2
171 ; CHECK-NEXT: store i32 -1, i32* [[C]]
172 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 8, i1 false)
173 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
174 ; CHECK-NEXT: ret void
176 %a = alloca %T, align 8
177 %b = bitcast %T* %a to i8*
178 %c = getelementptr inbounds %T, %T* %a, i64 0, i32 2
179 store i32 -1, i32* %c
180 call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 8, i1 false)
181 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
185 ; A write prior to the memset, which is part of both the memset and memcpy regions.
186 ; This cannot be optimized.
187 define void @test_write_before_memset_in_both_regions(i8* %result) {
188 ; CHECK-LABEL: @test_write_before_memset_in_both_regions(
189 ; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8
190 ; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8*
191 ; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[T]], %T* [[A]], i64 0, i32 1
192 ; CHECK-NEXT: store i32 -1, i32* [[C]]
193 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 10, i1 false)
194 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
195 ; CHECK-NEXT: ret void
197 %a = alloca %T, align 8
198 %b = bitcast %T* %a to i8*
199 %c = getelementptr inbounds %T, %T* %a, i64 0, i32 1
200 store i32 -1, i32* %c
201 call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 10, i1 false)
202 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
206 declare i8* @malloc(i64)
207 declare void @free(i8*)
209 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
210 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
212 declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
213 declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)