1 ; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck %s
3 @lds0 = addrspace(3) global [512 x float] undef
4 @lds1 = addrspace(3) global [256 x float] undef
5 @lds2 = addrspace(3) global [4096 x float] undef
6 @lds3 = addrspace(3) global [67 x i8] undef
8 @dynamic_shared0 = external addrspace(3) global [0 x float]
9 @dynamic_shared1 = external addrspace(3) global [0 x double]
10 @dynamic_shared2 = external addrspace(3) global [0 x double], align 4
11 @dynamic_shared3 = external addrspace(3) global [0 x double], align 16
13 ; CHECK-LABEL: {{^}}dynamic_shared_array_0:
14 ; CHECK: v_add_u32_e32 v{{[0-9]+}}, 0x800, v{{[0-9]+}}
15 define amdgpu_kernel void @dynamic_shared_array_0(ptr addrspace(1) %out) {
16 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
17 %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %tid.x
18 %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
19 %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x
20 store float %val0, ptr addrspace(3) %arrayidx1, align 4
24 ; CHECK-LABEL: {{^}}dynamic_shared_array_1:
25 ; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}}
26 ; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0xc00, [[IDX]]
27 define amdgpu_kernel void @dynamic_shared_array_1(ptr addrspace(1) %out, i32 %cond) {
29 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
30 %idx.0 = add nsw i32 %tid.x, 64
31 %tmp = icmp eq i32 %cond, 0
32 br i1 %tmp, label %if, label %else
35 %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
36 %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
39 else: ; preds = %entry
40 %arrayidx1 = getelementptr inbounds [256 x float], ptr addrspace(3) @lds1, i32 0, i32 %idx.0
41 %val1 = load float, ptr addrspace(3) %arrayidx1, align 4
44 endif: ; preds = %else, %if
45 %val = phi float [ %val0, %if ], [ %val1, %else ]
46 %arrayidx = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x
47 store float %val, ptr addrspace(3) %arrayidx, align 4
51 ; CHECK-LABEL: {{^}}dynamic_shared_array_2:
52 ; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}}
53 ; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0x4000, [[IDX]]
54 define amdgpu_kernel void @dynamic_shared_array_2(i32 %idx) {
55 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
56 %vidx = add i32 %tid.x, %idx
57 %arrayidx0 = getelementptr inbounds [4096 x float], ptr addrspace(3) @lds2, i32 0, i32 %vidx
58 %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
59 %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x
60 store float %val0, ptr addrspace(3) %arrayidx1, align 4
64 ; The offset to the dynamic shared memory array should be aligned on the type
66 ; CHECK-LABEL: {{^}}dynamic_shared_array_3:
67 ; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}}
68 ; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0x44, [[IDX]]
69 define amdgpu_kernel void @dynamic_shared_array_3(i32 %idx) {
70 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
71 %vidx = add i32 %tid.x, %idx
72 %arrayidx0 = getelementptr inbounds [67 x i8], ptr addrspace(3) @lds3, i32 0, i32 %vidx
73 %val0 = load i8, ptr addrspace(3) %arrayidx0, align 4
74 %val1 = uitofp i8 %val0 to float
75 %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x
76 store float %val1, ptr addrspace(3) %arrayidx1, align 4
80 ; The offset to the dynamic shared memory array should be aligned on the
82 ; CHECK-LABEL: {{^}}dynamic_shared_array_4:
83 ; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}}
84 ; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0x48, [[IDX]]
85 define amdgpu_kernel void @dynamic_shared_array_4(i32 %idx) {
86 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
87 %vidx = add i32 %tid.x, %idx
88 %arrayidx0 = getelementptr inbounds [67 x i8], ptr addrspace(3) @lds3, i32 0, i32 %vidx
89 %val0 = load i8, ptr addrspace(3) %arrayidx0, align 4
90 %val1 = uitofp i8 %val0 to float
91 %val2 = uitofp i8 %val0 to double
92 %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x
93 store float %val1, ptr addrspace(3) %arrayidx1, align 4
94 %arrayidx2 = getelementptr inbounds [0 x double], ptr addrspace(3) @dynamic_shared1, i32 0, i32 %tid.x
95 store double %val2, ptr addrspace(3) %arrayidx2, align 4
99 ; Honor the explicit alignment from the specified variable.
100 ; CHECK-LABEL: {{^}}dynamic_shared_array_5:
101 ; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}}
102 ; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0x44, [[IDX]]
103 define amdgpu_kernel void @dynamic_shared_array_5(i32 %idx) {
104 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
105 %vidx = add i32 %tid.x, %idx
106 %arrayidx0 = getelementptr inbounds [67 x i8], ptr addrspace(3) @lds3, i32 0, i32 %vidx
107 %val0 = load i8, ptr addrspace(3) %arrayidx0, align 4
108 %val1 = uitofp i8 %val0 to float
109 %val2 = uitofp i8 %val0 to double
110 %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x
111 store float %val1, ptr addrspace(3) %arrayidx1, align 4
112 %arrayidx2 = getelementptr inbounds [0 x double], ptr addrspace(3) @dynamic_shared2, i32 0, i32 %tid.x
113 store double %val2, ptr addrspace(3) %arrayidx2, align 4
117 ; Honor the explicit alignment from the specified variable.
118 ; CHECK-LABEL: {{^}}dynamic_shared_array_6:
119 ; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}}
120 ; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0x50, [[IDX]]
121 define amdgpu_kernel void @dynamic_shared_array_6(i32 %idx) {
122 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
123 %vidx = add i32 %tid.x, %idx
124 %arrayidx0 = getelementptr inbounds [67 x i8], ptr addrspace(3) @lds3, i32 0, i32 %vidx
125 %val0 = load i8, ptr addrspace(3) %arrayidx0, align 4
126 %val1 = uitofp i8 %val0 to float
127 %val2 = uitofp i8 %val0 to double
128 %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x
129 store float %val1, ptr addrspace(3) %arrayidx1, align 4
130 %arrayidx2 = getelementptr inbounds [0 x double], ptr addrspace(3) @dynamic_shared3, i32 0, i32 %tid.x
131 store double %val2, ptr addrspace(3) %arrayidx2, align 4
135 declare i32 @llvm.amdgcn.workitem.id.x()