1 ; RUN: opt -basic-aa -loop-idiom < %s -S | FileCheck %s
2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
3 target triple = "x86_64-unknown-linux-gnu"
5 ;; memcpy.atomic formation (atomic load & store)
6 define void @test1(i64 %Size) nounwind ssp {
8 ; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
12 %Base = alloca i8, i32 10000
13 %Dest = alloca i8, i32 10000
16 for.body: ; preds = %bb.nph, %for.body
17 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
18 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
19 %DestI = getelementptr i8, i8* %Dest, i64 %indvar
20 %V = load atomic i8, i8* %I.0.014 unordered, align 1
21 store atomic i8 %V, i8* %DestI unordered, align 1
22 %indvar.next = add i64 %indvar, 1
23 %exitcond = icmp eq i64 %indvar.next, %Size
24 br i1 %exitcond, label %for.end, label %for.body
26 for.end: ; preds = %for.body, %entry
30 ;; memcpy.atomic formation (atomic store, normal load)
31 define void @test2(i64 %Size) nounwind ssp {
32 ; CHECK-LABEL: @test2(
33 ; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
37 %Base = alloca i8, i32 10000
38 %Dest = alloca i8, i32 10000
41 for.body: ; preds = %bb.nph, %for.body
42 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
43 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
44 %DestI = getelementptr i8, i8* %Dest, i64 %indvar
45 %V = load i8, i8* %I.0.014, align 1
46 store atomic i8 %V, i8* %DestI unordered, align 1
47 %indvar.next = add i64 %indvar, 1
48 %exitcond = icmp eq i64 %indvar.next, %Size
49 br i1 %exitcond, label %for.end, label %for.body
51 for.end: ; preds = %for.body, %entry
55 ;; memcpy.atomic formation (atomic store, normal load w/ no align)
56 define void @test2b(i64 %Size) nounwind ssp {
57 ; CHECK-LABEL: @test2b(
58 ; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
62 %Base = alloca i8, i32 10000
63 %Dest = alloca i8, i32 10000
66 for.body: ; preds = %bb.nph, %for.body
67 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
68 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
69 %DestI = getelementptr i8, i8* %Dest, i64 %indvar
70 %V = load i8, i8* %I.0.014
71 store atomic i8 %V, i8* %DestI unordered, align 1
72 %indvar.next = add i64 %indvar, 1
73 %exitcond = icmp eq i64 %indvar.next, %Size
74 br i1 %exitcond, label %for.end, label %for.body
76 for.end: ; preds = %for.body, %entry
80 ;; memcpy.atomic formation rejection (atomic store, normal load w/ bad align)
81 define void @test2c(i64 %Size) nounwind ssp {
82 ; CHECK-LABEL: @test2c(
83 ; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
87 %Base = alloca i32, i32 10000
88 %Dest = alloca i32, i32 10000
91 for.body: ; preds = %bb.nph, %for.body
92 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
93 %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
94 %DestI = getelementptr i32, i32* %Dest, i64 %indvar
95 %V = load i32, i32* %I.0.014, align 2
96 store atomic i32 %V, i32* %DestI unordered, align 4
97 %indvar.next = add i64 %indvar, 1
98 %exitcond = icmp eq i64 %indvar.next, %Size
99 br i1 %exitcond, label %for.end, label %for.body
101 for.end: ; preds = %for.body, %entry
105 ;; memcpy.atomic formation rejection (atomic store w/ bad align, normal load)
106 define void @test2d(i64 %Size) nounwind ssp {
107 ; CHECK-LABEL: @test2d(
108 ; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
112 %Base = alloca i32, i32 10000
113 %Dest = alloca i32, i32 10000
116 for.body: ; preds = %bb.nph, %for.body
117 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
118 %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
119 %DestI = getelementptr i32, i32* %Dest, i64 %indvar
120 %V = load i32, i32* %I.0.014, align 4
121 store atomic i32 %V, i32* %DestI unordered, align 2
122 %indvar.next = add i64 %indvar, 1
123 %exitcond = icmp eq i64 %indvar.next, %Size
124 br i1 %exitcond, label %for.end, label %for.body
126 for.end: ; preds = %for.body, %entry
131 ;; memcpy.atomic formation (normal store, atomic load)
132 define void @test3(i64 %Size) nounwind ssp {
133 ; CHECK-LABEL: @test3(
134 ; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
138 %Base = alloca i8, i32 10000
139 %Dest = alloca i8, i32 10000
142 for.body: ; preds = %bb.nph, %for.body
143 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
144 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
145 %DestI = getelementptr i8, i8* %Dest, i64 %indvar
146 %V = load atomic i8, i8* %I.0.014 unordered, align 1
147 store i8 %V, i8* %DestI, align 1
148 %indvar.next = add i64 %indvar, 1
149 %exitcond = icmp eq i64 %indvar.next, %Size
150 br i1 %exitcond, label %for.end, label %for.body
152 for.end: ; preds = %for.body, %entry
156 ;; memcpy.atomic formation rejection (normal store w/ no align, atomic load)
157 define void @test3b(i64 %Size) nounwind ssp {
158 ; CHECK-LABEL: @test3b(
159 ; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
163 %Base = alloca i8, i32 10000
164 %Dest = alloca i8, i32 10000
167 for.body: ; preds = %bb.nph, %for.body
168 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
169 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
170 %DestI = getelementptr i8, i8* %Dest, i64 %indvar
171 %V = load atomic i8, i8* %I.0.014 unordered, align 1
172 store i8 %V, i8* %DestI
173 %indvar.next = add i64 %indvar, 1
174 %exitcond = icmp eq i64 %indvar.next, %Size
175 br i1 %exitcond, label %for.end, label %for.body
177 for.end: ; preds = %for.body, %entry
181 ;; memcpy.atomic formation rejection (normal store, atomic load w/ bad align)
182 define void @test3c(i64 %Size) nounwind ssp {
183 ; CHECK-LABEL: @test3c(
184 ; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
188 %Base = alloca i32, i32 10000
189 %Dest = alloca i32, i32 10000
192 for.body: ; preds = %bb.nph, %for.body
193 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
194 %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
195 %DestI = getelementptr i32, i32* %Dest, i64 %indvar
196 %V = load atomic i32, i32* %I.0.014 unordered, align 2
197 store i32 %V, i32* %DestI, align 4
198 %indvar.next = add i64 %indvar, 1
199 %exitcond = icmp eq i64 %indvar.next, %Size
200 br i1 %exitcond, label %for.end, label %for.body
202 for.end: ; preds = %for.body, %entry
206 ;; memcpy.atomic formation rejection (normal store w/ bad align, atomic load)
207 define void @test3d(i64 %Size) nounwind ssp {
208 ; CHECK-LABEL: @test3d(
209 ; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
213 %Base = alloca i32, i32 10000
214 %Dest = alloca i32, i32 10000
217 for.body: ; preds = %bb.nph, %for.body
218 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
219 %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
220 %DestI = getelementptr i32, i32* %Dest, i64 %indvar
221 %V = load atomic i32, i32* %I.0.014 unordered, align 4
222 store i32 %V, i32* %DestI, align 2
223 %indvar.next = add i64 %indvar, 1
224 %exitcond = icmp eq i64 %indvar.next, %Size
225 br i1 %exitcond, label %for.end, label %for.body
227 for.end: ; preds = %for.body, %entry
232 ;; memcpy.atomic formation rejection (atomic load, ordered-atomic store)
233 define void @test4(i64 %Size) nounwind ssp {
234 ; CHECK-LABEL: @test4(
235 ; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
239 %Base = alloca i8, i32 10000
240 %Dest = alloca i8, i32 10000
243 for.body: ; preds = %bb.nph, %for.body
244 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
245 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
246 %DestI = getelementptr i8, i8* %Dest, i64 %indvar
247 %V = load atomic i8, i8* %I.0.014 unordered, align 1
248 store atomic i8 %V, i8* %DestI monotonic, align 1
249 %indvar.next = add i64 %indvar, 1
250 %exitcond = icmp eq i64 %indvar.next, %Size
251 br i1 %exitcond, label %for.end, label %for.body
253 for.end: ; preds = %for.body, %entry
257 ;; memcpy.atomic formation rejection (ordered-atomic load, unordered-atomic store)
258 define void @test5(i64 %Size) nounwind ssp {
259 ; CHECK-LABEL: @test5(
260 ; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
264 %Base = alloca i8, i32 10000
265 %Dest = alloca i8, i32 10000
268 for.body: ; preds = %bb.nph, %for.body
269 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
270 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
271 %DestI = getelementptr i8, i8* %Dest, i64 %indvar
272 %V = load atomic i8, i8* %I.0.014 monotonic, align 1
273 store atomic i8 %V, i8* %DestI unordered, align 1
274 %indvar.next = add i64 %indvar, 1
275 %exitcond = icmp eq i64 %indvar.next, %Size
276 br i1 %exitcond, label %for.end, label %for.body
278 for.end: ; preds = %for.body, %entry
282 ;; memcpy.atomic formation (atomic load & store) -- element size 2
283 define void @test6(i64 %Size) nounwind ssp {
284 ; CHECK-LABEL: @test6(
285 ; CHECK: [[Sz:%[0-9]+]] = shl nuw i64 %Size, 1
286 ; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 2 %Dest{{[0-9]*}}, i8* align 2 %Base{{[0-9]*}}, i64 [[Sz]], i32 2)
290 %Base = alloca i16, i32 10000
291 %Dest = alloca i16, i32 10000
294 for.body: ; preds = %bb.nph, %for.body
295 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
296 %I.0.014 = getelementptr i16, i16* %Base, i64 %indvar
297 %DestI = getelementptr i16, i16* %Dest, i64 %indvar
298 %V = load atomic i16, i16* %I.0.014 unordered, align 2
299 store atomic i16 %V, i16* %DestI unordered, align 2
300 %indvar.next = add i64 %indvar, 1
301 %exitcond = icmp eq i64 %indvar.next, %Size
302 br i1 %exitcond, label %for.end, label %for.body
304 for.end: ; preds = %for.body, %entry
308 ;; memcpy.atomic formation (atomic load & store) -- element size 4
309 define void @test7(i64 %Size) nounwind ssp {
310 ; CHECK-LABEL: @test7(
311 ; CHECK: [[Sz:%[0-9]+]] = shl nuw i64 %Size, 2
312 ; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %Dest{{[0-9]*}}, i8* align 4 %Base{{[0-9]*}}, i64 [[Sz]], i32 4)
316 %Base = alloca i32, i32 10000
317 %Dest = alloca i32, i32 10000
320 for.body: ; preds = %bb.nph, %for.body
321 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
322 %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
323 %DestI = getelementptr i32, i32* %Dest, i64 %indvar
324 %V = load atomic i32, i32* %I.0.014 unordered, align 4
325 store atomic i32 %V, i32* %DestI unordered, align 4
326 %indvar.next = add i64 %indvar, 1
327 %exitcond = icmp eq i64 %indvar.next, %Size
328 br i1 %exitcond, label %for.end, label %for.body
330 for.end: ; preds = %for.body, %entry
334 ;; memcpy.atomic formation (atomic load & store) -- element size 8
335 define void @test8(i64 %Size) nounwind ssp {
336 ; CHECK-LABEL: @test8(
337 ; CHECK: [[Sz:%[0-9]+]] = shl nuw i64 %Size, 3
338 ; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 8 %Dest{{[0-9]*}}, i8* align 8 %Base{{[0-9]*}}, i64 [[Sz]], i32 8)
342 %Base = alloca i64, i32 10000
343 %Dest = alloca i64, i32 10000
346 for.body: ; preds = %bb.nph, %for.body
347 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
348 %I.0.014 = getelementptr i64, i64* %Base, i64 %indvar
349 %DestI = getelementptr i64, i64* %Dest, i64 %indvar
350 %V = load atomic i64, i64* %I.0.014 unordered, align 8
351 store atomic i64 %V, i64* %DestI unordered, align 8
352 %indvar.next = add i64 %indvar, 1
353 %exitcond = icmp eq i64 %indvar.next, %Size
354 br i1 %exitcond, label %for.end, label %for.body
356 for.end: ; preds = %for.body, %entry
360 ;; memcpy.atomic formation rejection (atomic load & store) -- element size 16
361 define void @test9(i64 %Size) nounwind ssp {
362 ; CHECK-LABEL: @test9(
363 ; CHECK: [[Sz:%[0-9]+]] = shl nuw i64 %Size, 4
364 ; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 %Dest{{[0-9]*}}, i8* align 16 %Base{{[0-9]*}}, i64 [[Sz]], i32 16)
368 %Base = alloca i128, i32 10000
369 %Dest = alloca i128, i32 10000
372 for.body: ; preds = %bb.nph, %for.body
373 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
374 %I.0.014 = getelementptr i128, i128* %Base, i64 %indvar
375 %DestI = getelementptr i128, i128* %Dest, i64 %indvar
376 %V = load atomic i128, i128* %I.0.014 unordered, align 16
377 store atomic i128 %V, i128* %DestI unordered, align 16
378 %indvar.next = add i64 %indvar, 1
379 %exitcond = icmp eq i64 %indvar.next, %Size
380 br i1 %exitcond, label %for.end, label %for.body
382 for.end: ; preds = %for.body, %entry
386 ;; memcpy.atomic formation rejection (atomic load & store) -- element size 32
387 define void @test10(i64 %Size) nounwind ssp {
388 ; CHECK-LABEL: @test10(
389 ; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
393 %Base = alloca i256, i32 10000
394 %Dest = alloca i256, i32 10000
397 for.body: ; preds = %bb.nph, %for.body
398 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
399 %I.0.014 = getelementptr i256, i256* %Base, i64 %indvar
400 %DestI = getelementptr i256, i256* %Dest, i64 %indvar
401 %V = load atomic i256, i256* %I.0.014 unordered, align 32
402 store atomic i256 %V, i256* %DestI unordered, align 32
403 %indvar.next = add i64 %indvar, 1
404 %exitcond = icmp eq i64 %indvar.next, %Size
405 br i1 %exitcond, label %for.end, label %for.body
407 for.end: ; preds = %for.body, %entry
413 ; Make sure that atomic memset doesn't get recognized by mistake
414 define void @test_nomemset(i8* %Base, i64 %Size) nounwind ssp {
415 ; CHECK-LABEL: @test_nomemset(
416 ; CHECK-NOT: call void @llvm.memset
419 bb.nph: ; preds = %entry
422 for.body: ; preds = %bb.nph, %for.body
423 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
424 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
425 store atomic i8 0, i8* %I.0.014 unordered, align 1
426 %indvar.next = add i64 %indvar, 1
427 %exitcond = icmp eq i64 %indvar.next, %Size
428 br i1 %exitcond, label %for.end, label %for.body
430 for.end: ; preds = %for.body, %entry
434 ; Verify that unordered memset_pattern isn't recognized.
435 ; This is a replica of test11_pattern from basic.ll
436 define void @test_nomemset_pattern(i32* nocapture %P) nounwind ssp {
437 ; CHECK-LABEL: @test_nomemset_pattern(
440 ; CHECK-NOT: memset_pattern
441 ; CHECK: store atomic
446 for.body: ; preds = %entry, %for.body
447 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
448 %arrayidx = getelementptr i32, i32* %P, i64 %indvar
449 store atomic i32 1, i32* %arrayidx unordered, align 4
450 %indvar.next = add i64 %indvar, 1
451 %exitcond = icmp eq i64 %indvar.next, 10000
452 br i1 %exitcond, label %for.end, label %for.body
454 for.end: ; preds = %for.body
458 ; Make sure that atomic memcpy or memmove don't get recognized by mistake
459 ; when looping with positive stride
460 define void @test_no_memcpy_memmove1(i8* %Src, i64 %Size) {
461 ; CHECK-LABEL: @test_no_memcpy_memmove1(
462 ; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
463 ; CHECK-NOT: call void @llvm.memmove.element.unordered.atomic
469 for.body: ; preds = %bb.nph, %for.body
470 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
471 %Step = add nuw nsw i64 %indvar, 1
472 %SrcI = getelementptr i8, i8* %Src, i64 %Step
473 %DestI = getelementptr i8, i8* %Src, i64 %indvar
474 %V = load i8, i8* %SrcI, align 1
475 store atomic i8 %V, i8* %DestI unordered, align 1
476 %indvar.next = add i64 %indvar, 1
477 %exitcond = icmp eq i64 %indvar.next, %Size
478 br i1 %exitcond, label %for.end, label %for.body
480 for.end: ; preds = %for.body, %entry
484 ; Make sure that atomic memcpy or memmove don't get recognized by mistake
485 ; when looping with negative stride
486 define void @test_no_memcpy_memmove2(i8* %Src, i64 %Size) {
487 ; CHECK-LABEL: @test_no_memcpy_memmove2(
488 ; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic
489 ; CHECK-NOT: call void @llvm.memmove.element.unordered.atomic
493 %cmp1 = icmp sgt i64 %Size, 0
494 br i1 %cmp1, label %for.body, label %for.end
496 for.body: ; preds = %bb.nph, %.for.body
497 %indvar = phi i64 [ %Step, %for.body ], [ %Size, %bb.nph ]
498 %Step = add nsw i64 %indvar, -1
499 %SrcI = getelementptr inbounds i8, i8* %Src, i64 %Step
500 %V = load i8, i8* %SrcI, align 1
501 %DestI = getelementptr inbounds i8, i8* %Src, i64 %indvar
502 store atomic i8 %V, i8* %DestI unordered, align 1
503 %exitcond = icmp sgt i64 %indvar, 1
504 br i1 %exitcond, label %for.body, label %for.end
506 for.end: ; preds = %for.body, %entry