1 ; RUN: opt -S -mtriple=amdgcn-- -separate-const-offset-from-gep -slsr -gvn < %s | FileCheck %s
2 ; RUN: opt -S -mtriple=amdgcn-- -passes="separate-const-offset-from-gep,slsr,gvn" < %s | FileCheck %s
4 target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
7 ; CHECK-LABEL: @slsr_after_reassociate_global_geps_mubuf_max_offset(
8 ; CHECK: [[b1:%[0-9]+]] = getelementptr float, float addrspace(1)* %arr, i64 [[bump:%[0-9]+]]
9 ; CHECK: [[b2:%[0-9]+]] = getelementptr float, float addrspace(1)* [[b1]], i64 [[bump]]
10 define amdgpu_kernel void @slsr_after_reassociate_global_geps_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) {
12 %i2 = shl nsw i32 %i, 1
13 %j1 = add nsw i32 %i, 1023
14 %tmp = sext i32 %j1 to i64
15 %p1 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
16 %tmp3 = bitcast float addrspace(1)* %p1 to i32 addrspace(1)*
17 %v11 = load i32, i32 addrspace(1)* %tmp3, align 4
18 %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
19 store i32 %v11, i32 addrspace(1)* %tmp4, align 4
21 %j2 = add nsw i32 %i2, 1023
22 %tmp5 = sext i32 %j2 to i64
23 %p2 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
24 %tmp6 = bitcast float addrspace(1)* %p2 to i32 addrspace(1)*
25 %v22 = load i32, i32 addrspace(1)* %tmp6, align 4
26 %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
27 store i32 %v22, i32 addrspace(1)* %tmp7, align 4
32 ; CHECK-LABEL: @slsr_after_reassociate_global_geps_over_mubuf_max_offset(
33 ; CHECK: %j1 = add nsw i32 %i, 1024
34 ; CHECK: %tmp = sext i32 %j1 to i64
35 ; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
36 ; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
37 define amdgpu_kernel void @slsr_after_reassociate_global_geps_over_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) {
39 %i2 = shl nsw i32 %i, 1
40 %j1 = add nsw i32 %i, 1024
41 %tmp = sext i32 %j1 to i64
42 %p1 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
43 %tmp3 = bitcast float addrspace(1)* %p1 to i32 addrspace(1)*
44 %v11 = load i32, i32 addrspace(1)* %tmp3, align 4
45 %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
46 store i32 %v11, i32 addrspace(1)* %tmp4, align 4
48 %j2 = add nsw i32 %i2, 1024
49 %tmp5 = sext i32 %j2 to i64
50 %p2 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
51 %tmp6 = bitcast float addrspace(1)* %p2 to i32 addrspace(1)*
52 %v22 = load i32, i32 addrspace(1)* %tmp6, align 4
53 %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
54 store i32 %v22, i32 addrspace(1)* %tmp7, align 4
59 ; CHECK-LABEL: @slsr_after_reassociate_lds_geps_ds_max_offset(
60 ; CHECK: [[B1:%[0-9]+]] = getelementptr float, float addrspace(3)* %arr, i32 %i
61 ; CHECK: getelementptr inbounds float, float addrspace(3)* [[B1]], i32 16383
63 ; CHECK: [[B2:%[0-9]+]] = getelementptr float, float addrspace(3)* [[B1]], i32 %i
64 ; CHECK: getelementptr inbounds float, float addrspace(3)* [[B2]], i32 16383
65 define amdgpu_kernel void @slsr_after_reassociate_lds_geps_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) {
67 %i2 = shl nsw i32 %i, 1
68 %j1 = add nsw i32 %i, 16383
69 %p1 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
70 %tmp3 = bitcast float addrspace(3)* %p1 to i32 addrspace(3)*
71 %v11 = load i32, i32 addrspace(3)* %tmp3, align 4
72 %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
73 store i32 %v11, i32 addrspace(1)* %tmp4, align 4
75 %j2 = add nsw i32 %i2, 16383
76 %p2 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
77 %tmp6 = bitcast float addrspace(3)* %p2 to i32 addrspace(3)*
78 %v22 = load i32, i32 addrspace(3)* %tmp6, align 4
79 %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
80 store i32 %v22, i32 addrspace(1)* %tmp7, align 4
85 ; CHECK-LABEL: @slsr_after_reassociate_lds_geps_over_ds_max_offset(
86 ; CHECK: %j1 = add nsw i32 %i, 16384
87 ; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
88 ; CHECK: %j2 = add i32 %j1, %i
89 ; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
90 define amdgpu_kernel void @slsr_after_reassociate_lds_geps_over_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) {
92 %i2 = shl nsw i32 %i, 1
93 %j1 = add nsw i32 %i, 16384
94 %p1 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
95 %tmp3 = bitcast float addrspace(3)* %p1 to i32 addrspace(3)*
96 %v11 = load i32, i32 addrspace(3)* %tmp3, align 4
97 %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
98 store i32 %v11, i32 addrspace(1)* %tmp4, align 4
100 %j2 = add nsw i32 %i2, 16384
101 %p2 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
102 %tmp6 = bitcast float addrspace(3)* %p2 to i32 addrspace(3)*
103 %v22 = load i32, i32 addrspace(3)* %tmp6, align 4
104 %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
105 store i32 %v22, i32 addrspace(1)* %tmp7, align 4