1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; gfx8 required knowing no overflow happened to fold the addressing mode
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
6 define void @gep_noflags_alloca(i32 %idx, i32 %val) #0 {
7 ; GFX8-LABEL: gep_noflags_alloca:
9 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
11 ; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
12 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
13 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
14 ; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
15 ; GFX8-NEXT: s_waitcnt vmcnt(0)
16 ; GFX8-NEXT: s_setpc_b64 s[30:31]
18 ; GFX9-LABEL: gep_noflags_alloca:
20 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21 ; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
22 ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
23 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
24 ; GFX9-NEXT: s_waitcnt vmcnt(0)
25 ; GFX9-NEXT: s_setpc_b64 s[30:31]
26 %alloca = alloca [32 x i32], addrspace(5)
27 %gep0 = getelementptr [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
28 %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
29 store volatile i32 %val, ptr addrspace(5) %gep1
33 define void @gep_inbounds_alloca(i32 %idx, i32 %val) #0 {
34 ; GFX8-LABEL: gep_inbounds_alloca:
36 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
38 ; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
39 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
40 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
41 ; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
42 ; GFX8-NEXT: s_waitcnt vmcnt(0)
43 ; GFX8-NEXT: s_setpc_b64 s[30:31]
45 ; GFX9-LABEL: gep_inbounds_alloca:
47 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48 ; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
49 ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
50 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
51 ; GFX9-NEXT: s_waitcnt vmcnt(0)
52 ; GFX9-NEXT: s_setpc_b64 s[30:31]
53 %alloca = alloca [32 x i32], addrspace(5)
54 %gep0 = getelementptr inbounds [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
55 %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
56 store volatile i32 %val, ptr addrspace(5) %gep1
60 define void @gep_nuw_alloca(i32 %idx, i32 %val) #0 {
61 ; GFX8-LABEL: gep_nuw_alloca:
63 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
65 ; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
66 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
67 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
68 ; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
69 ; GFX8-NEXT: s_waitcnt vmcnt(0)
70 ; GFX8-NEXT: s_setpc_b64 s[30:31]
72 ; GFX9-LABEL: gep_nuw_alloca:
74 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75 ; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
76 ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
77 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
78 ; GFX9-NEXT: s_waitcnt vmcnt(0)
79 ; GFX9-NEXT: s_setpc_b64 s[30:31]
80 %alloca = alloca [32 x i32], addrspace(5)
81 %gep0 = getelementptr nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
82 %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
83 store volatile i32 %val, ptr addrspace(5) %gep1
87 define void @gep_nusw_alloca(i32 %idx, i32 %val) #0 {
88 ; GFX8-LABEL: gep_nusw_alloca:
90 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
91 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
92 ; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
93 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
94 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
95 ; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
96 ; GFX8-NEXT: s_waitcnt vmcnt(0)
97 ; GFX8-NEXT: s_setpc_b64 s[30:31]
99 ; GFX9-LABEL: gep_nusw_alloca:
101 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102 ; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
103 ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
104 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
105 ; GFX9-NEXT: s_waitcnt vmcnt(0)
106 ; GFX9-NEXT: s_setpc_b64 s[30:31]
107 %alloca = alloca [32 x i32], addrspace(5)
108 %gep0 = getelementptr nusw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
109 %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
110 store volatile i32 %val, ptr addrspace(5) %gep1
114 define void @gep_inbounds_nuw_alloca(i32 %idx, i32 %val) #0 {
115 ; GFX8-LABEL: gep_inbounds_nuw_alloca:
117 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
119 ; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
120 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
121 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
122 ; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
123 ; GFX8-NEXT: s_waitcnt vmcnt(0)
124 ; GFX8-NEXT: s_setpc_b64 s[30:31]
126 ; GFX9-LABEL: gep_inbounds_nuw_alloca:
128 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
129 ; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
130 ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
131 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
132 ; GFX9-NEXT: s_waitcnt vmcnt(0)
133 ; GFX9-NEXT: s_setpc_b64 s[30:31]
134 %alloca = alloca [32 x i32], addrspace(5)
135 %gep0 = getelementptr inbounds nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
136 %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
137 store volatile i32 %val, ptr addrspace(5) %gep1
141 define void @gep_nusw_nuw_alloca(i32 %idx, i32 %val) #0 {
142 ; GFX8-LABEL: gep_nusw_nuw_alloca:
144 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
146 ; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
147 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
148 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
149 ; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
150 ; GFX8-NEXT: s_waitcnt vmcnt(0)
151 ; GFX8-NEXT: s_setpc_b64 s[30:31]
153 ; GFX9-LABEL: gep_nusw_nuw_alloca:
155 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156 ; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
157 ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
158 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
159 ; GFX9-NEXT: s_waitcnt vmcnt(0)
160 ; GFX9-NEXT: s_setpc_b64 s[30:31]
161 %alloca = alloca [32 x i32], addrspace(5)
162 %gep0 = getelementptr nusw nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
163 %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
164 store volatile i32 %val, ptr addrspace(5) %gep1
168 define void @gep_inbounds_nuw_alloca_nonpow2_scale(i32 %idx, i32 %val) #0 {
169 ; GFX8-LABEL: gep_inbounds_nuw_alloca_nonpow2_scale:
171 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172 ; GFX8-NEXT: s_movk_i32 s4, 0x84
173 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, s4
174 ; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
175 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
176 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
177 ; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
178 ; GFX8-NEXT: s_waitcnt vmcnt(0)
179 ; GFX8-NEXT: s_setpc_b64 s[30:31]
181 ; GFX9-LABEL: gep_inbounds_nuw_alloca_nonpow2_scale:
183 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
184 ; GFX9-NEXT: s_movk_i32 s4, 0x84
185 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, s4
186 ; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
187 ; GFX9-NEXT: v_add_u32_e32 v0, v0, v2
188 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
189 ; GFX9-NEXT: s_waitcnt vmcnt(0)
190 ; GFX9-NEXT: s_setpc_b64 s[30:31]
191 %alloca = alloca [5 x [33 x i32]], align 4, addrspace(5)
192 %gep1 = getelementptr inbounds nuw [5 x [33 x i32]], ptr addrspace(5) %alloca, i32 0, i32 %idx, i32 4
193 store volatile i32 %val, ptr addrspace(5) %gep1, align 4
197 attributes #0 = { nounwind }