1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -stop-after=irtranslator -o - %s | FileCheck %s
4 declare align(8) dereferenceable(8) ptr @declared_with_ret_deref() #0
5 declare align(8) ptr @unknown_decl() #0
6 declare align(8) dereferenceable(4) ptr @declared_with_ret_deref4() #0
7 declare align(8) dereferenceable_or_null(8) ptr @declared_with_ret_deref_or_null() #0
8 declare align(8) nonnull ptr @nonnull_decl() #0
9 declare align(8) dereferenceable_or_null(4) ptr @declared_with_ret_deref_or_null4() #0
11 ; Should have dereferenceable on mem operand
12 define i64 @load_deref_declaration_only() {
13 ; CHECK-LABEL: name: load_deref_declaration_only
14 ; CHECK: bb.1 (%ir-block.0):
15 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
16 ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref
17 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
18 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
19 ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
20 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
21 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
22 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
23 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
24 ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
25 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call)
26 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64)
27 ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
28 ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
29 ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
30 %call = call ptr @declared_with_ret_deref()
31 %load = load i64, ptr %call, align 8
35 ; No dereferenceable on mem operand
36 define i64 @load_deref_unknown_decl() {
37 ; CHECK-LABEL: name: load_deref_unknown_decl
38 ; CHECK: bb.1 (%ir-block.0):
39 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
40 ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @unknown_decl
41 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
42 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
43 ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @unknown_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
44 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
45 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
46 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
47 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
48 ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
49 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (load (s64) from %ir.call)
50 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64)
51 ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
52 ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
53 ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
54 %call = call ptr @unknown_decl()
55 %load = load i64, ptr %call, align 8
59 ; Should have dereferenceable on mem operand
60 define i64 @load_deref_callsite_only() {
61 ; CHECK-LABEL: name: load_deref_callsite_only
62 ; CHECK: bb.1 (%ir-block.0):
63 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
64 ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @unknown_decl
65 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
66 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
67 ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @unknown_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
68 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
69 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
70 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
71 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
72 ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
73 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call)
74 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64)
75 ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
76 ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
77 ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
78 %call = call dereferenceable(8) ptr @unknown_decl()
79 %load = load i64, ptr %call, align 8
83 ; Both loads should have effective dereferenceable(8) since the
84 ; maximum should be used.
85 define i64 @load_deref_maxmimum_callsite_declaration_only() {
86 ; CHECK-LABEL: name: load_deref_maxmimum_callsite_declaration_only
87 ; CHECK: bb.1 (%ir-block.0):
88 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
89 ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref
90 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
91 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
92 ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
93 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
94 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
95 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
96 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
97 ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
98 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call0)
99 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
100 ; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref4
101 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
102 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>)
103 ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV1]](p0), @declared_with_ret_deref4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
104 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0
105 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr1
106 ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
107 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
108 ; CHECK-NEXT: [[ASSERT_ALIGN1:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV1]], 8
109 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN1]](p0) :: (dereferenceable load (s64) from %ir.call1)
110 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD]], [[LOAD1]]
111 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64)
112 ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
113 ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
114 ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
115 %call0 = call dereferenceable(4) ptr @declared_with_ret_deref()
116 %load0 = load i64, ptr %call0, align 8
117 %call1 = call dereferenceable(8) ptr @declared_with_ret_deref4()
118 %load1 = load i64, ptr %call1, align 8
119 %add = add i64 %load0, %load1
123 ; Should have deref_or_nullerenceable on mem operand
124 define i64 @load_deref_or_null_declaration_only() {
125 ; CHECK-LABEL: name: load_deref_or_null_declaration_only
126 ; CHECK: bb.1 (%ir-block.0):
127 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
128 ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref_or_null
129 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
130 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
131 ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref_or_null, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
132 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
133 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
134 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
135 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
136 ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
137 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call)
138 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64)
139 ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
140 ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
141 ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
142 %call = call nonnull ptr @declared_with_ret_deref_or_null()
143 %load = load i64, ptr %call, align 8
147 ; No deref_or_nullerenceable on mem operand
148 define i64 @load_deref_or_null_nonnull_decl() {
149 ; CHECK-LABEL: name: load_deref_or_null_nonnull_decl
150 ; CHECK: bb.1 (%ir-block.0):
151 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
152 ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @nonnull_decl
153 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
154 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
155 ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @nonnull_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
156 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
157 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
158 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
159 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
160 ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
161 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (load (s64) from %ir.call)
162 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64)
163 ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
164 ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
165 ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
166 %call = call ptr @nonnull_decl()
167 %load = load i64, ptr %call, align 8
171 ; Should have deref_or_nullerenceable on mem operand
172 define i64 @load_deref_or_null_callsite_only() {
173 ; CHECK-LABEL: name: load_deref_or_null_callsite_only
174 ; CHECK: bb.1 (%ir-block.0):
175 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
176 ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @nonnull_decl
177 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
178 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
179 ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @nonnull_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
180 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
181 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
182 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
183 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
184 ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
185 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call)
186 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64)
187 ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
188 ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
189 ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
190 %call = call dereferenceable_or_null(8) ptr @nonnull_decl()
191 %load = load i64, ptr %call, align 8
195 ; Both loads should have effective deref_or_nullerenceable(8) since the
196 ; maximum should be used.
197 define i64 @load_deref_or_null_maxmimum_callsite_declaration_only() {
198 ; CHECK-LABEL: name: load_deref_or_null_maxmimum_callsite_declaration_only
199 ; CHECK: bb.1 (%ir-block.0):
200 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
201 ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref_or_null
202 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
203 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
204 ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref_or_null, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
205 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
206 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
207 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
208 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
209 ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
210 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call0)
211 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
212 ; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref_or_null4
213 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
214 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>)
215 ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV1]](p0), @declared_with_ret_deref_or_null4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
216 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0
217 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr1
218 ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
219 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
220 ; CHECK-NEXT: [[ASSERT_ALIGN1:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV1]], 8
221 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN1]](p0) :: (dereferenceable load (s64) from %ir.call1)
222 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD]], [[LOAD1]]
223 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64)
224 ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
225 ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
226 ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
227 %call0 = call dereferenceable_or_null(4) nonnull ptr @declared_with_ret_deref_or_null()
228 %load0 = load i64, ptr %call0, align 8
229 %call1 = call dereferenceable_or_null(8) nonnull ptr @declared_with_ret_deref_or_null4()
230 %load1 = load i64, ptr %call1, align 8
231 %add = add i64 %load0, %load1
235 attributes #0 = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }