1 ; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads=true -verify-machineinstrs < %s | FileCheck %s
5 ; Load from %arg has alias store in Loop
7 ; CHECK: flat_load_dword
9 ; #####################################################################
11 ; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i]
15 ; #####################################################################
19 ; Load from %arg in a Loop body has alias store
21 ; CHECK: flat_load_dword
25 ; CHECK: flat_store_dword
27 define amdgpu_kernel void @cfg(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 {
29 %tmp = sext i32 %arg2 to i64
30 %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp
31 %tmp4 = load i32, i32 addrspace(1)* %tmp3, align 4, !tbaa !0
32 %tmp5 = icmp sgt i32 %tmp4, 0
33 br i1 %tmp5, label %bb6, label %bb8
41 bb8: ; preds = %bb7, %bb
42 %tmp9 = phi i32 [ 0, %bb ], [ %tmp30, %bb7 ]
43 %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp
44 store i32 %tmp9, i32 addrspace(1)* %tmp10, align 4, !tbaa !0
47 bb11: ; preds = %bb22, %bb6
48 %tmp12 = phi i32 [ %tmp30, %bb22 ], [ 0, %bb6 ]
49 %tmp13 = phi i32 [ %tmp25, %bb22 ], [ 0, %bb6 ]
50 %tmp14 = srem i32 %tmp13, %arg2
51 %tmp15 = sext i32 %tmp14 to i64
52 %tmp16 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp15
53 %tmp17 = load i32, i32 addrspace(1)* %tmp16, align 4, !tbaa !0
54 %tmp18 = icmp sgt i32 %tmp17, 100
55 %tmp19 = sext i32 %tmp13 to i64
56 br i1 %tmp18, label %bb20, label %bb22
59 %tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp19
60 store i32 0, i32 addrspace(1)* %tmp21, align 4, !tbaa !0
63 bb22: ; preds = %bb20, %bb11
64 %tmp23 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp19
65 %tmp24 = load i32, i32 addrspace(1)* %tmp23, align 4, !tbaa !0
66 %tmp25 = add nuw nsw i32 %tmp13, 1
67 %tmp26 = sext i32 %tmp25 to i64
68 %tmp27 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp26
69 %tmp28 = load i32, i32 addrspace(1)* %tmp27, align 4, !tbaa !0
70 %tmp29 = add i32 %tmp24, %tmp12
71 %tmp30 = add i32 %tmp29, %tmp28
72 %tmp31 = icmp eq i32 %tmp25, %tmp4
73 br i1 %tmp31, label %bb7, label %bb11
76 ; one more test to ensure that aliasing store after the load
77 ; is considered clobbering if load parent block is the same
78 ; as a loop header block.
82 ; Load from %arg has alias store that is after the load
83 ; but is considered clobbering because of the loop.
85 ; CHECK: flat_load_dword
87 define amdgpu_kernel void @cfg_selfloop(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 {
95 %tmp13 = phi i32 [ %tmp25, %bb1 ], [ 0, %bb ]
96 %tmp14 = srem i32 %tmp13, %arg2
97 %tmp15 = sext i32 %tmp14 to i64
98 %tmp16 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp15
99 %tmp17 = load i32, i32 addrspace(1)* %tmp16, align 4, !tbaa !0
100 %tmp19 = sext i32 %tmp13 to i64
101 %tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp19
102 store i32 %tmp17, i32 addrspace(1)* %tmp21, align 4, !tbaa !0
103 %tmp25 = add nuw nsw i32 %tmp13, 1
104 %tmp31 = icmp eq i32 %tmp25, 100
105 br i1 %tmp31, label %bb2, label %bb1
109 attributes #0 = { "target-cpu"="fiji" }
111 !0 = !{!1, !1, i64 0}
112 !1 = !{!"int", !2, i64 0}
113 !2 = !{!"omnipotent char", !3, i64 0}
114 !3 = !{!"Simple C/C++ TBAA"}