1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
2 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -o - -passes=amdgpu-lower-kernel-arguments %s | FileCheck -check-prefixes=GCN,HSA %s
3 ; RUN: opt -mtriple=amdgcn-- -S -o - -passes=amdgpu-lower-kernel-arguments %s | FileCheck -check-prefixes=GCN,MESA %s
5 target datalayout = "A5"
7 declare void @llvm.fake.use(...)
9 define amdgpu_kernel void @kern_noargs() {
10 ; GCN-LABEL: @kern_noargs(
16 define amdgpu_kernel void @kern_i8(i8 %arg) #0 {
17 ; HSA-LABEL: @kern_i8(
18 ; HSA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
19 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_KERNARG_SEGMENT]], i64 0
20 ; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1:![0-9]+]]
21 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
22 ; HSA-NEXT: store i8 [[TMP2]], ptr addrspace(1) undef, align 1
25 ; MESA-LABEL: @kern_i8(
26 ; MESA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
27 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_KERNARG_SEGMENT]], i64 36
28 ; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1:![0-9]+]]
29 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
30 ; MESA-NEXT: store i8 [[TMP2]], ptr addrspace(1) undef, align 1
33 store i8 %arg, ptr addrspace(1) undef, align 1
37 define amdgpu_kernel void @kern_i16(i16 %arg) #0 {
38 ; HSA-LABEL: @kern_i16(
39 ; HSA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
40 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I16_KERNARG_SEGMENT]], i64 0
41 ; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
42 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
43 ; HSA-NEXT: store i16 [[TMP2]], ptr addrspace(1) undef, align 1
46 ; MESA-LABEL: @kern_i16(
47 ; MESA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
48 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I16_KERNARG_SEGMENT]], i64 36
49 ; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
50 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
51 ; MESA-NEXT: store i16 [[TMP2]], ptr addrspace(1) undef, align 1
54 store i16 %arg, ptr addrspace(1) undef, align 1
58 define amdgpu_kernel void @kern_f16(half %arg) #0 {
59 ; HSA-LABEL: @kern_f16(
60 ; HSA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
61 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_F16_KERNARG_SEGMENT]], i64 0
62 ; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
63 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
64 ; HSA-NEXT: [[ARG_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
65 ; HSA-NEXT: store half [[ARG_LOAD]], ptr addrspace(1) undef, align 1
68 ; MESA-LABEL: @kern_f16(
69 ; MESA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
70 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_F16_KERNARG_SEGMENT]], i64 36
71 ; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
72 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
73 ; MESA-NEXT: [[ARG_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
74 ; MESA-NEXT: store half [[ARG_LOAD]], ptr addrspace(1) undef, align 1
77 store half %arg, ptr addrspace(1) undef, align 1
81 define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) #0 {
82 ; HSA-LABEL: @kern_zeroext_i8(
83 ; HSA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
84 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 0
85 ; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
86 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
87 ; HSA-NEXT: store i8 [[TMP2]], ptr addrspace(1) undef, align 1
90 ; MESA-LABEL: @kern_zeroext_i8(
91 ; MESA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
92 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 36
93 ; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
94 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
95 ; MESA-NEXT: store i8 [[TMP2]], ptr addrspace(1) undef, align 1
98 store i8 %arg, ptr addrspace(1) undef, align 1
102 define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) #0 {
103 ; HSA-LABEL: @kern_zeroext_i16(
104 ; HSA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
105 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 0
106 ; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
107 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
108 ; HSA-NEXT: store i16 [[TMP2]], ptr addrspace(1) undef, align 1
111 ; MESA-LABEL: @kern_zeroext_i16(
112 ; MESA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
113 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 36
114 ; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
115 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
116 ; MESA-NEXT: store i16 [[TMP2]], ptr addrspace(1) undef, align 1
117 ; MESA-NEXT: ret void
119 store i16 %arg, ptr addrspace(1) undef, align 1
123 define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) #0 {
124 ; HSA-LABEL: @kern_signext_i8(
125 ; HSA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
126 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 0
127 ; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
128 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
129 ; HSA-NEXT: store i8 [[TMP2]], ptr addrspace(1) undef, align 1
132 ; MESA-LABEL: @kern_signext_i8(
133 ; MESA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
134 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 36
135 ; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
136 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
137 ; MESA-NEXT: store i8 [[TMP2]], ptr addrspace(1) undef, align 1
138 ; MESA-NEXT: ret void
140 store i8 %arg, ptr addrspace(1) undef, align 1
144 define amdgpu_kernel void @kern_signext_i16(i16 signext %arg) #0 {
145 ; HSA-LABEL: @kern_signext_i16(
146 ; HSA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
147 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 0
148 ; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
149 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
150 ; HSA-NEXT: store i16 [[TMP2]], ptr addrspace(1) undef, align 1
153 ; MESA-LABEL: @kern_signext_i16(
154 ; MESA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
155 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 36
156 ; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
157 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
158 ; MESA-NEXT: store i16 [[TMP2]], ptr addrspace(1) undef, align 1
159 ; MESA-NEXT: ret void
161 store i16 %arg, ptr addrspace(1) undef, align 1
165 define amdgpu_kernel void @kern_i8_i8(i8 %arg0, i8 %arg1) {
166 ; HSA-LABEL: @kern_i8_i8(
167 ; HSA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
168 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_I8_KERNARG_SEGMENT]], i64 0
169 ; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
170 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
171 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_I8_KERNARG_SEGMENT]], i64 0
172 ; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
173 ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
174 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
175 ; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
176 ; HSA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1
179 ; MESA-LABEL: @kern_i8_i8(
180 ; MESA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
181 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_I8_KERNARG_SEGMENT]], i64 36
182 ; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
183 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
184 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_I8_KERNARG_SEGMENT]], i64 36
185 ; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
186 ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
187 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
188 ; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
189 ; MESA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1
190 ; MESA-NEXT: ret void
192 store volatile i8 %arg0, ptr addrspace(1) undef, align 1
193 store volatile i8 %arg1, ptr addrspace(1) undef, align 1
197 define amdgpu_kernel void @kern_v3i8(<3 x i8> %arg) {
198 ; HSA-LABEL: @kern_v3i8(
199 ; HSA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
200 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V3I8_KERNARG_SEGMENT]], i64 0
201 ; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
202 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
203 ; HSA-NEXT: [[ARG_LOAD:%.*]] = bitcast i24 [[TMP2]] to <3 x i8>
204 ; HSA-NEXT: store <3 x i8> [[ARG_LOAD]], ptr addrspace(1) undef, align 4
207 ; MESA-LABEL: @kern_v3i8(
208 ; MESA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
209 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V3I8_KERNARG_SEGMENT]], i64 36
210 ; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
211 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
212 ; MESA-NEXT: [[ARG_LOAD:%.*]] = bitcast i24 [[TMP2]] to <3 x i8>
213 ; MESA-NEXT: store <3 x i8> [[ARG_LOAD]], ptr addrspace(1) undef, align 4
214 ; MESA-NEXT: ret void
216 store <3 x i8> %arg, ptr addrspace(1) undef, align 4
220 define amdgpu_kernel void @kern_i24(i24 %arg0) {
221 ; HSA-LABEL: @kern_i24(
222 ; HSA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
223 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I24_KERNARG_SEGMENT]], i64 0
224 ; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
225 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
226 ; HSA-NEXT: store i24 [[TMP2]], ptr addrspace(1) undef, align 4
229 ; MESA-LABEL: @kern_i24(
230 ; MESA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
231 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I24_KERNARG_SEGMENT]], i64 36
232 ; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
233 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
234 ; MESA-NEXT: store i24 [[TMP2]], ptr addrspace(1) undef, align 4
235 ; MESA-NEXT: ret void
237 store i24 %arg0, ptr addrspace(1) undef
241 define amdgpu_kernel void @kern_i32(i32 %arg0) {
242 ; HSA-LABEL: @kern_i32(
243 ; HSA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
244 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_KERNARG_SEGMENT]], i64 0
245 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
246 ; HSA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
249 ; MESA-LABEL: @kern_i32(
250 ; MESA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
251 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_KERNARG_SEGMENT]], i64 36
252 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
253 ; MESA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
254 ; MESA-NEXT: ret void
256 store i32 %arg0, ptr addrspace(1) undef
260 define amdgpu_kernel void @kern_range_noundef_i32(i32 noundef range(i32 0, 8) %arg0) {
261 ; HSA-LABEL: @kern_range_noundef_i32(
262 ; HSA-NEXT: [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
263 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT]], i64 0
264 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !range [[RNG2:![0-9]+]], !invariant.load [[META1]], !noundef [[META1]]
265 ; HSA-NEXT: call void (...) @llvm.fake.use(i32 [[ARG0_LOAD]])
268 ; MESA-LABEL: @kern_range_noundef_i32(
269 ; MESA-NEXT: [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
270 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT]], i64 36
271 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !range [[RNG2:![0-9]+]], !invariant.load [[META1]], !noundef [[META1]]
272 ; MESA-NEXT: call void (...) @llvm.fake.use(i32 [[ARG0_LOAD]])
273 ; MESA-NEXT: ret void
275 call void (...) @llvm.fake.use(i32 %arg0)
279 define amdgpu_kernel void @kern_f32(float %arg0) {
280 ; HSA-LABEL: @kern_f32(
281 ; HSA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
282 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_F32_KERNARG_SEGMENT]], i64 0
283 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
284 ; HSA-NEXT: store float [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
287 ; MESA-LABEL: @kern_f32(
288 ; MESA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
289 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_F32_KERNARG_SEGMENT]], i64 36
290 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
291 ; MESA-NEXT: store float [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
292 ; MESA-NEXT: ret void
294 store float %arg0, ptr addrspace(1) undef
298 define amdgpu_kernel void @kern_v3i32(<3 x i32> %arg0) {
299 ; HSA-LABEL: @kern_v3i32(
300 ; HSA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
301 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V3I32_KERNARG_SEGMENT]], i64 0
302 ; HSA-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
303 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
304 ; HSA-NEXT: store <3 x i32> [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
307 ; MESA-LABEL: @kern_v3i32(
308 ; MESA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
309 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V3I32_KERNARG_SEGMENT]], i64 36
310 ; MESA-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
311 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
312 ; MESA-NEXT: store <3 x i32> [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
313 ; MESA-NEXT: ret void
315 store <3 x i32> %arg0, ptr addrspace(1) undef, align 4
319 define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) #0 {
320 ; HSA-LABEL: @kern_v8i32(
321 ; HSA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
322 ; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V8I32_KERNARG_SEGMENT]], i64 0
323 ; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
324 ; HSA-NEXT: store <8 x i32> [[ARG_LOAD]], ptr addrspace(1) undef, align 32
327 ; MESA-LABEL: @kern_v8i32(
328 ; MESA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
329 ; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V8I32_KERNARG_SEGMENT]], i64 36
330 ; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
331 ; MESA-NEXT: store <8 x i32> [[ARG_LOAD]], ptr addrspace(1) undef, align 32
332 ; MESA-NEXT: ret void
334 store <8 x i32> %arg, ptr addrspace(1) undef
338 define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) #0 {
339 ; HSA-LABEL: @kern_v8i64(
340 ; HSA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(320) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
341 ; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V8I64_KERNARG_SEGMENT]], i64 0
342 ; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
343 ; HSA-NEXT: store <8 x i64> [[ARG_LOAD]], ptr addrspace(1) undef, align 64
346 ; MESA-LABEL: @kern_v8i64(
347 ; MESA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(320) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
348 ; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V8I64_KERNARG_SEGMENT]], i64 36
349 ; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
350 ; MESA-NEXT: store <8 x i64> [[ARG_LOAD]], ptr addrspace(1) undef, align 64
351 ; MESA-NEXT: ret void
353 store <8 x i64> %arg, ptr addrspace(1) undef
357 define amdgpu_kernel void @kern_v16i64(<16 x i64> %arg) #0 {
358 ; HSA-LABEL: @kern_v16i64(
359 ; HSA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(384) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
360 ; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V16I64_KERNARG_SEGMENT]], i64 0
361 ; HSA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
362 ; HSA-NEXT: store <16 x i64> [[ARG_LOAD]], ptr addrspace(1) undef, align 128
365 ; MESA-LABEL: @kern_v16i64(
366 ; MESA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(384) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
367 ; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V16I64_KERNARG_SEGMENT]], i64 36
368 ; MESA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
369 ; MESA-NEXT: store <16 x i64> [[ARG_LOAD]], ptr addrspace(1) undef, align 128
370 ; MESA-NEXT: ret void
372 store <16 x i64> %arg, ptr addrspace(1) undef
376 define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) {
377 ; HSA-LABEL: @kern_i32_v3i32(
378 ; HSA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
379 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 0
380 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
381 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 16
382 ; HSA-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
383 ; HSA-NEXT: [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
384 ; HSA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
385 ; HSA-NEXT: store <3 x i32> [[ARG1_LOAD]], ptr addrspace(1) undef, align 4
388 ; MESA-LABEL: @kern_i32_v3i32(
389 ; MESA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
390 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 36
391 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
392 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 52
393 ; MESA-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
394 ; MESA-NEXT: [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
395 ; MESA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
396 ; MESA-NEXT: store <3 x i32> [[ARG1_LOAD]], ptr addrspace(1) undef, align 4
397 ; MESA-NEXT: ret void
399 store i32 %arg0, ptr addrspace(1) undef
400 store <3 x i32> %arg1, ptr addrspace(1) undef, align 4
404 %struct.a = type { i32, i8, [4 x i8] }
405 %struct.b.packed = type { i8, i32, [3 x i16], <2 x double> }
407 define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) {
408 ; HSA-LABEL: @kern_struct_a(
409 ; HSA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
410 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 0
411 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A:%.*]], ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
412 ; HSA-NEXT: store [[STRUCT_A]] [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
415 ; MESA-LABEL: @kern_struct_a(
416 ; MESA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
417 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 36
418 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A:%.*]], ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
419 ; MESA-NEXT: store [[STRUCT_A]] [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
420 ; MESA-NEXT: ret void
422 store %struct.a %arg0, ptr addrspace(1) undef
426 define amdgpu_kernel void @kern_struct_b_packed(%struct.b.packed %arg0) #0 {
427 ; HSA-LABEL: @kern_struct_b_packed(
428 ; HSA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
429 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 0
430 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED:%.*]], ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
431 ; HSA-NEXT: store [[STRUCT_B_PACKED]] [[ARG0_LOAD]], ptr addrspace(1) undef, align 16
434 ; MESA-LABEL: @kern_struct_b_packed(
435 ; MESA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
436 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 36
437 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED:%.*]], ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
438 ; MESA-NEXT: store [[STRUCT_B_PACKED]] [[ARG0_LOAD]], ptr addrspace(1) undef, align 16
439 ; MESA-NEXT: ret void
441 store %struct.b.packed %arg0, ptr addrspace(1) undef
445 define amdgpu_kernel void @kern_implicit_arg_num_bytes(i32 %arg0) #1 {
446 ; HSA-LABEL: @kern_implicit_arg_num_bytes(
447 ; HSA-NEXT: [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
448 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT]], i64 0
449 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
450 ; HSA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
453 ; MESA-LABEL: @kern_implicit_arg_num_bytes(
454 ; MESA-NEXT: [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
455 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT]], i64 36
456 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
457 ; MESA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
458 ; MESA-NEXT: ret void
460 store i32 %arg0, ptr addrspace(1) undef
464 define amdgpu_kernel void @kernel_implicitarg_no_struct_align(<16 x i32>, i32 %arg1) #1 {
465 ; HSA-LABEL: @kernel_implicitarg_no_struct_align(
466 ; HSA-NEXT: [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(112) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
467 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT]], i64 64
468 ; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
469 ; HSA-NEXT: store i32 [[ARG1_LOAD]], ptr addrspace(1) undef, align 4
472 ; MESA-LABEL: @kernel_implicitarg_no_struct_align(
473 ; MESA-NEXT: [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(108) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
474 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT]], i64 100
475 ; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
476 ; MESA-NEXT: store i32 [[ARG1_LOAD]], ptr addrspace(1) undef, align 4
477 ; MESA-NEXT: ret void
479 store i32 %arg1, ptr addrspace(1) undef
483 define amdgpu_kernel void @kern_lds_ptr(ptr addrspace(3) %lds) #0 {
484 ; HSA-LABEL: @kern_lds_ptr(
485 ; HSA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
486 ; HSA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 0
487 ; HSA-NEXT: [[LDS_LOAD:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[LDS_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
488 ; HSA-NEXT: store i32 0, ptr addrspace(3) [[LDS_LOAD]], align 4
491 ; MESA-LABEL: @kern_lds_ptr(
492 ; MESA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
493 ; MESA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 36
494 ; MESA-NEXT: [[LDS_LOAD:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[LDS_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
495 ; MESA-NEXT: store i32 0, ptr addrspace(3) [[LDS_LOAD]], align 4
496 ; MESA-NEXT: ret void
498 store i32 0, ptr addrspace(3) %lds, align 4
502 define amdgpu_kernel void @kern_lds_ptr_si(ptr addrspace(3) %lds) #2 {
503 ; GCN-LABEL: @kern_lds_ptr_si(
504 ; GCN-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
505 ; GCN-NEXT: store i32 0, ptr addrspace(3) [[LDS:%.*]], align 4
508 store i32 0, ptr addrspace(3) %lds, align 4
512 define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 {
513 ; HSA-LABEL: @kern_realign_i8_i8(
514 ; HSA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
515 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0
516 ; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
517 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
518 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0
519 ; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
520 ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
521 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
522 ; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
523 ; HSA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1
526 ; MESA-LABEL: @kern_realign_i8_i8(
527 ; MESA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
528 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 36
529 ; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
530 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
531 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 36
532 ; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
533 ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
534 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
535 ; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
536 ; MESA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1
537 ; MESA-NEXT: ret void
539 store volatile i8 %arg0, ptr addrspace(1) undef
540 store volatile i8 %arg1, ptr addrspace(1) undef
544 define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) #0 {
545 ; HSA-LABEL: @kern_realign_i8_i8_i8(
546 ; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
547 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0
548 ; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
549 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
550 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0
551 ; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
552 ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
553 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
554 ; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0
555 ; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
556 ; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
557 ; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
558 ; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
559 ; HSA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1
560 ; HSA-NEXT: store volatile i8 [[TMP8]], ptr addrspace(1) undef, align 1
563 ; MESA-LABEL: @kern_realign_i8_i8_i8(
564 ; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
565 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36
566 ; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
567 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
568 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36
569 ; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
570 ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
571 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
572 ; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36
573 ; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
574 ; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
575 ; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
576 ; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
577 ; MESA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1
578 ; MESA-NEXT: store volatile i8 [[TMP8]], ptr addrspace(1) undef, align 1
579 ; MESA-NEXT: ret void
581 store volatile i8 %arg0, ptr addrspace(1) undef
582 store volatile i8 %arg1, ptr addrspace(1) undef
583 store volatile i8 %arg2, ptr addrspace(1) undef
587 define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) #0 {
588 ; HSA-LABEL: @kern_realign_i8_i8_i8_i8(
589 ; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
590 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
591 ; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
592 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
593 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
594 ; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
595 ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
596 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
597 ; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
598 ; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
599 ; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
600 ; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
601 ; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
602 ; HSA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
603 ; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
604 ; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
605 ; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
606 ; HSA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1
607 ; HSA-NEXT: store volatile i8 [[TMP8]], ptr addrspace(1) undef, align 1
608 ; HSA-NEXT: store volatile i8 [[TMP11]], ptr addrspace(1) undef, align 1
611 ; MESA-LABEL: @kern_realign_i8_i8_i8_i8(
612 ; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
613 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
614 ; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
615 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
616 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
617 ; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
618 ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
619 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
620 ; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
621 ; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
622 ; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
623 ; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
624 ; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
625 ; MESA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
626 ; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
627 ; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
628 ; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
629 ; MESA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1
630 ; MESA-NEXT: store volatile i8 [[TMP8]], ptr addrspace(1) undef, align 1
631 ; MESA-NEXT: store volatile i8 [[TMP11]], ptr addrspace(1) undef, align 1
632 ; MESA-NEXT: ret void
634 store volatile i8 %arg0, ptr addrspace(1) undef
635 store volatile i8 %arg1, ptr addrspace(1) undef
636 store volatile i8 %arg2, ptr addrspace(1) undef
637 store volatile i8 %arg3, ptr addrspace(1) undef
641 define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 {
642 ; HSA-LABEL: @kern_realign_i8_v3i8(
643 ; HSA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
644 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 0
645 ; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
646 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
647 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 4
648 ; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
649 ; HSA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i24
650 ; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i24 [[TMP4]] to <3 x i8>
651 ; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
652 ; HSA-NEXT: store volatile <3 x i8> [[ARG1_LOAD]], ptr addrspace(1) undef, align 4
655 ; MESA-LABEL: @kern_realign_i8_v3i8(
656 ; MESA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
657 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 36
658 ; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
659 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
660 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 40
661 ; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 8, !invariant.load [[META1]]
662 ; MESA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i24
663 ; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i24 [[TMP4]] to <3 x i8>
664 ; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
665 ; MESA-NEXT: store volatile <3 x i8> [[ARG1_LOAD]], ptr addrspace(1) undef, align 4
666 ; MESA-NEXT: ret void
668 store volatile i8 %arg0, ptr addrspace(1) undef
669 store volatile <3 x i8> %arg1, ptr addrspace(1) undef
673 define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 {
674 ; HSA-LABEL: @kern_realign_i8_i16(
675 ; HSA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
676 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0
677 ; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
678 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
679 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0
680 ; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
681 ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
682 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
683 ; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
684 ; HSA-NEXT: store volatile i16 [[TMP5]], ptr addrspace(1) undef, align 2
687 ; MESA-LABEL: @kern_realign_i8_i16(
688 ; MESA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
689 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 36
690 ; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
691 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
692 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 36
693 ; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
694 ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
695 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
696 ; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
697 ; MESA-NEXT: store volatile i16 [[TMP5]], ptr addrspace(1) undef, align 2
698 ; MESA-NEXT: ret void
700 store volatile i8 %arg0, ptr addrspace(1) undef
701 store volatile i16 %arg1, ptr addrspace(1) undef
705 define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 {
706 ; HSA-LABEL: @kern_realign_i1_i1(
707 ; HSA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
708 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0
709 ; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
710 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
711 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0
712 ; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
713 ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
714 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
715 ; HSA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1
716 ; HSA-NEXT: store volatile i1 [[TMP5]], ptr addrspace(1) undef, align 1
719 ; MESA-LABEL: @kern_realign_i1_i1(
720 ; MESA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
721 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 36
722 ; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
723 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
724 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 36
725 ; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
726 ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
727 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
728 ; MESA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1
729 ; MESA-NEXT: store volatile i1 [[TMP5]], ptr addrspace(1) undef, align 1
730 ; MESA-NEXT: ret void
732 store volatile i1 %arg0, ptr addrspace(1) undef
733 store volatile i1 %arg1, ptr addrspace(1) undef
737 define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) #0 {
738 ; HSA-LABEL: @kern_realign_i1_i1_i1(
739 ; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
740 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0
741 ; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
742 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
743 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0
744 ; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
745 ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
746 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
747 ; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0
748 ; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
749 ; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
750 ; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
751 ; HSA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1
752 ; HSA-NEXT: store volatile i1 [[TMP5]], ptr addrspace(1) undef, align 1
753 ; HSA-NEXT: store volatile i1 [[TMP8]], ptr addrspace(1) undef, align 1
756 ; MESA-LABEL: @kern_realign_i1_i1_i1(
757 ; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
758 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36
759 ; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
760 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
761 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36
762 ; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
763 ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
764 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
765 ; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36
766 ; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
767 ; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
768 ; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
769 ; MESA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1
770 ; MESA-NEXT: store volatile i1 [[TMP5]], ptr addrspace(1) undef, align 1
771 ; MESA-NEXT: store volatile i1 [[TMP8]], ptr addrspace(1) undef, align 1
772 ; MESA-NEXT: ret void
774 store volatile i1 %arg0, ptr addrspace(1) undef
775 store volatile i1 %arg1, ptr addrspace(1) undef
776 store volatile i1 %arg2, ptr addrspace(1) undef
780 define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2, i1 %arg3) #0 {
781 ; HSA-LABEL: @kern_realign_i1_i1_i1_i1(
782 ; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
783 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
784 ; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
785 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
786 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
787 ; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
788 ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
789 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
790 ; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
791 ; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
792 ; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
793 ; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
794 ; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
795 ; HSA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
796 ; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
797 ; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i1
798 ; HSA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1
799 ; HSA-NEXT: store volatile i1 [[TMP5]], ptr addrspace(1) undef, align 1
800 ; HSA-NEXT: store volatile i1 [[TMP8]], ptr addrspace(1) undef, align 1
801 ; HSA-NEXT: store volatile i1 [[TMP11]], ptr addrspace(1) undef, align 1
804 ; MESA-LABEL: @kern_realign_i1_i1_i1_i1(
805 ; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
806 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
807 ; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
808 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
809 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
810 ; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
811 ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
812 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
813 ; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
814 ; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
815 ; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
816 ; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
817 ; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
818 ; MESA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
819 ; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
820 ; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i1
821 ; MESA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1
822 ; MESA-NEXT: store volatile i1 [[TMP5]], ptr addrspace(1) undef, align 1
823 ; MESA-NEXT: store volatile i1 [[TMP8]], ptr addrspace(1) undef, align 1
824 ; MESA-NEXT: store volatile i1 [[TMP11]], ptr addrspace(1) undef, align 1
825 ; MESA-NEXT: ret void
827 store volatile i1 %arg0, ptr addrspace(1) undef
828 store volatile i1 %arg1, ptr addrspace(1) undef
829 store volatile i1 %arg2, ptr addrspace(1) undef
830 store volatile i1 %arg3, ptr addrspace(1) undef
834 define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 {
835 ; HSA-LABEL: @kern_realign_i1_v3i1(
836 ; HSA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
837 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 0
838 ; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
839 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
840 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 0
841 ; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
842 ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
843 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i3
844 ; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i3 [[TMP5]] to <3 x i1>
845 ; HSA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1
846 ; HSA-NEXT: store volatile <3 x i1> [[ARG1_LOAD]], ptr addrspace(1) undef, align 1
849 ; MESA-LABEL: @kern_realign_i1_v3i1(
850 ; MESA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
851 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 36
852 ; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
853 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
854 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 36
855 ; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
856 ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
857 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i3
858 ; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i3 [[TMP5]] to <3 x i1>
859 ; MESA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1
860 ; MESA-NEXT: store volatile <3 x i1> [[ARG1_LOAD]], ptr addrspace(1) undef, align 1
861 ; MESA-NEXT: ret void
863 store volatile i1 %arg0, ptr addrspace(1) undef
864 store volatile <3 x i1> %arg1, ptr addrspace(1) undef
868 define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 {
869 ; HSA-LABEL: @kern_realign_i1_i16(
870 ; HSA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
871 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0
872 ; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
873 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
874 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0
875 ; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
876 ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
877 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
878 ; HSA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1
879 ; HSA-NEXT: store volatile i16 [[TMP5]], ptr addrspace(1) undef, align 2
882 ; MESA-LABEL: @kern_realign_i1_i16(
883 ; MESA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
884 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 36
885 ; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
886 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
887 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 36
888 ; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
889 ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
890 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
891 ; MESA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1
892 ; MESA-NEXT: store volatile i16 [[TMP5]], ptr addrspace(1) undef, align 2
893 ; MESA-NEXT: ret void
895 store volatile i1 %arg0, ptr addrspace(1) undef
896 store volatile i16 %arg1, ptr addrspace(1) undef
900 define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, i8 %arg5, i8 %arg6, i8 %arg7) #0 {
901 ; HSA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(
902 ; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
903 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
904 ; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
905 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
906 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
907 ; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
908 ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
909 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
910 ; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
911 ; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
912 ; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
913 ; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
914 ; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
915 ; HSA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
916 ; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
917 ; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
918 ; HSA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4
919 ; HSA-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(4) [[ARG5_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
920 ; HSA-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 8
921 ; HSA-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8
922 ; HSA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4
923 ; HSA-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(4) [[ARG6_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
924 ; HSA-NEXT: [[TMP16:%.*]] = lshr i32 [[TMP15]], 16
925 ; HSA-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8
926 ; HSA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4
927 ; HSA-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(4) [[ARG7_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
928 ; HSA-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP18]], 24
929 ; HSA-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i8
930 ; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
931 ; HSA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1
932 ; HSA-NEXT: store volatile i8 [[TMP8]], ptr addrspace(1) undef, align 1
933 ; HSA-NEXT: store volatile i8 [[TMP11]], ptr addrspace(1) undef, align 1
934 ; HSA-NEXT: store volatile i8 [[TMP14]], ptr addrspace(1) undef, align 1
935 ; HSA-NEXT: store volatile i8 [[TMP17]], ptr addrspace(1) undef, align 1
936 ; HSA-NEXT: store volatile i8 [[TMP20]], ptr addrspace(1) undef, align 1
939 ; MESA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(
940 ; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
941 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
942 ; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
943 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
944 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
945 ; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
946 ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
947 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
948 ; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
949 ; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
950 ; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
951 ; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
952 ; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
953 ; MESA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
954 ; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
955 ; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
956 ; MESA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40
957 ; MESA-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(4) [[ARG5_KERNARG_OFFSET_ALIGN_DOWN]], align 8, !invariant.load [[META1]]
958 ; MESA-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 8
959 ; MESA-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8
960 ; MESA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40
961 ; MESA-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(4) [[ARG6_KERNARG_OFFSET_ALIGN_DOWN]], align 8, !invariant.load [[META1]]
962 ; MESA-NEXT: [[TMP16:%.*]] = lshr i32 [[TMP15]], 16
963 ; MESA-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8
964 ; MESA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40
965 ; MESA-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(4) [[ARG7_KERNARG_OFFSET_ALIGN_DOWN]], align 8, !invariant.load [[META1]]
966 ; MESA-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP18]], 24
967 ; MESA-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i8
968 ; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
969 ; MESA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1
970 ; MESA-NEXT: store volatile i8 [[TMP8]], ptr addrspace(1) undef, align 1
971 ; MESA-NEXT: store volatile i8 [[TMP11]], ptr addrspace(1) undef, align 1
972 ; MESA-NEXT: store volatile i8 [[TMP14]], ptr addrspace(1) undef, align 1
973 ; MESA-NEXT: store volatile i8 [[TMP17]], ptr addrspace(1) undef, align 1
974 ; MESA-NEXT: store volatile i8 [[TMP20]], ptr addrspace(1) undef, align 1
975 ; MESA-NEXT: ret void
977 store volatile i8 %arg0, ptr addrspace(1) undef
978 store volatile i8 %arg1, ptr addrspace(1) undef
979 store volatile i8 %arg2, ptr addrspace(1) undef
980 store volatile i8 %arg3, ptr addrspace(1) undef
981 store volatile i8 %arg5, ptr addrspace(1) undef
982 store volatile i8 %arg6, ptr addrspace(1) undef
983 store volatile i8 %arg7, ptr addrspace(1) undef
987 define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 {
988 ; HSA-LABEL: @kern_realign_f16_f16(
989 ; HSA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
990 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0
991 ; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
992 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
993 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
994 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0
995 ; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
996 ; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
997 ; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
998 ; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i16 [[TMP5]] to half
999 ; HSA-NEXT: store volatile half [[ARG0_LOAD]], ptr addrspace(1) undef, align 2
1000 ; HSA-NEXT: store volatile half [[ARG1_LOAD]], ptr addrspace(1) undef, align 2
1001 ; HSA-NEXT: ret void
1003 ; MESA-LABEL: @kern_realign_f16_f16(
1004 ; MESA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1005 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 36
1006 ; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
1007 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
1008 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
1009 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 36
1010 ; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
1011 ; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
1012 ; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
1013 ; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i16 [[TMP5]] to half
1014 ; MESA-NEXT: store volatile half [[ARG0_LOAD]], ptr addrspace(1) undef, align 2
1015 ; MESA-NEXT: store volatile half [[ARG1_LOAD]], ptr addrspace(1) undef, align 2
1016 ; MESA-NEXT: ret void
1018 store volatile half %arg0, ptr addrspace(1) undef
1019 store volatile half %arg1, ptr addrspace(1) undef
1023 define amdgpu_kernel void @kern_global_ptr(ptr addrspace(1) %ptr) #0 {
1024 ; HSA-LABEL: @kern_global_ptr(
1025 ; HSA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1026 ; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
1027 ; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1028 ; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
1029 ; HSA-NEXT: ret void
1031 ; MESA-LABEL: @kern_global_ptr(
1032 ; MESA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1033 ; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
1034 ; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1035 ; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
1036 ; MESA-NEXT: ret void
1038 store volatile ptr addrspace(1) %ptr, ptr addrspace(1) undef
1042 define amdgpu_kernel void @kern_global_ptr_dereferencable(ptr addrspace(1) dereferenceable(42) %ptr) #0 {
1043 ; HSA-LABEL: @kern_global_ptr_dereferencable(
1044 ; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1045 ; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 0
1046 ; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable [[META3:![0-9]+]]
1047 ; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
1048 ; HSA-NEXT: ret void
1050 ; MESA-LABEL: @kern_global_ptr_dereferencable(
1051 ; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1052 ; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 36
1053 ; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable [[META3:![0-9]+]]
1054 ; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
1055 ; MESA-NEXT: ret void
1057 store volatile ptr addrspace(1) %ptr, ptr addrspace(1) undef
1061 define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(ptr addrspace(1) dereferenceable_or_null(128) %ptr) #0 {
1062 ; HSA-LABEL: @kern_global_ptr_dereferencable_or_null(
1063 ; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1064 ; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 0
1065 ; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable_or_null [[META4:![0-9]+]]
1066 ; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
1067 ; HSA-NEXT: ret void
1069 ; MESA-LABEL: @kern_global_ptr_dereferencable_or_null(
1070 ; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1071 ; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 36
1072 ; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable_or_null [[META4:![0-9]+]]
1073 ; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
1074 ; MESA-NEXT: ret void
1076 store volatile ptr addrspace(1) %ptr, ptr addrspace(1) undef
1080 define amdgpu_kernel void @kern_nonnull_global_ptr(ptr addrspace(1) nonnull %ptr) #0 {
1081 ; HSA-LABEL: @kern_nonnull_global_ptr(
1082 ; HSA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1083 ; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
1084 ; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !nonnull [[META1]]
1085 ; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
1086 ; HSA-NEXT: ret void
1088 ; MESA-LABEL: @kern_nonnull_global_ptr(
1089 ; MESA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1090 ; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
1091 ; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !nonnull [[META1]]
1092 ; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
1093 ; MESA-NEXT: ret void
1095 store volatile ptr addrspace(1) %ptr, ptr addrspace(1) undef
1099 define amdgpu_kernel void @kern_align32_global_ptr(ptr addrspace(1) align 1024 %ptr) #0 {
1100 ; HSA-LABEL: @kern_align32_global_ptr(
1101 ; HSA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1102 ; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
1103 ; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !align [[META5:![0-9]+]]
1104 ; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
1105 ; HSA-NEXT: ret void
1107 ; MESA-LABEL: @kern_align32_global_ptr(
1108 ; MESA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1109 ; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
1110 ; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !align [[META5:![0-9]+]]
1111 ; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
1112 ; MESA-NEXT: ret void
1114 store volatile ptr addrspace(1) %ptr, ptr addrspace(1) undef
1118 define amdgpu_kernel void @kern_noalias_global_ptr(ptr addrspace(1) noalias %ptr) #0 {
1119 ; GCN-LABEL: @kern_noalias_global_ptr(
1120 ; GCN-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1121 ; GCN-NEXT: store volatile ptr addrspace(1) [[PTR:%.*]], ptr addrspace(1) undef, align 8
1122 ; GCN-NEXT: ret void
1124 store volatile ptr addrspace(1) %ptr, ptr addrspace(1) undef
1128 define amdgpu_kernel void @kern_noalias_global_ptr_x2(ptr addrspace(1) noalias %ptr0, ptr addrspace(1) noalias %ptr1) #0 {
1129 ; GCN-LABEL: @kern_noalias_global_ptr_x2(
1130 ; GCN-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1131 ; GCN-NEXT: store volatile ptr addrspace(1) [[PTR0:%.*]], ptr addrspace(1) undef, align 8
1132 ; GCN-NEXT: store volatile ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(1) undef, align 8
1133 ; GCN-NEXT: ret void
1135 store volatile ptr addrspace(1) %ptr0, ptr addrspace(1) undef
1136 store volatile ptr addrspace(1) %ptr1, ptr addrspace(1) undef
1140 define amdgpu_kernel void @kern_noundef_global_ptr(ptr addrspace(1) noundef %ptr) #0 {
1141 ; HSA-LABEL: @kern_noundef_global_ptr(
1142 ; HSA-NEXT: [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1143 ; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
1144 ; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]]
1145 ; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) null, align 8
1146 ; HSA-NEXT: ret void
1148 ; MESA-LABEL: @kern_noundef_global_ptr(
1149 ; MESA-NEXT: [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1150 ; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
1151 ; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]]
1152 ; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) null, align 8
1153 ; MESA-NEXT: ret void
1155 store volatile ptr addrspace(1) %ptr, ptr addrspace(1) null
1159 define amdgpu_kernel void @struct_i8_i8_arg({i8, i8} %in) #0 {
1160 ; HSA-LABEL: @struct_i8_i8_arg(
1162 ; HSA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1163 ; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 0
1164 ; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1165 ; HSA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 0
1166 ; HSA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 1
1167 ; HSA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4
1168 ; HSA-NEXT: store volatile i8 [[ELT1]], ptr addrspace(1) null, align 4
1169 ; HSA-NEXT: ret void
1171 ; MESA-LABEL: @struct_i8_i8_arg(
1173 ; MESA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1174 ; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 36
1175 ; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1176 ; MESA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 0
1177 ; MESA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 1
1178 ; MESA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4
1179 ; MESA-NEXT: store volatile i8 [[ELT1]], ptr addrspace(1) null, align 4
1180 ; MESA-NEXT: ret void
1183 %elt0 = extractvalue {i8, i8} %in, 0
1184 %elt1 = extractvalue {i8, i8} %in, 1
1185 store volatile i8 %elt0, ptr addrspace(1) null, align 4
1186 store volatile i8 %elt1, ptr addrspace(1) null, align 4
1190 define amdgpu_kernel void @struct_i8_i16_arg({i8, i16} %in) #0 {
1191 ; HSA-LABEL: @struct_i8_i16_arg(
1193 ; HSA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1194 ; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 0
1195 ; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1196 ; HSA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 0
1197 ; HSA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 1
1198 ; HSA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4
1199 ; HSA-NEXT: store volatile i16 [[ELT1]], ptr addrspace(1) null, align 4
1200 ; HSA-NEXT: ret void
1202 ; MESA-LABEL: @struct_i8_i16_arg(
1204 ; MESA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1205 ; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 36
1206 ; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1207 ; MESA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 0
1208 ; MESA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 1
1209 ; MESA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4
1210 ; MESA-NEXT: store volatile i16 [[ELT1]], ptr addrspace(1) null, align 4
1211 ; MESA-NEXT: ret void
1214 %elt0 = extractvalue {i8, i16} %in, 0
1215 %elt1 = extractvalue {i8, i16} %in, 1
1216 store volatile i8 %elt0, ptr addrspace(1) null, align 4
1217 store volatile i16 %elt1, ptr addrspace(1) null, align 4
1221 define amdgpu_kernel void @array_2xi8_arg([2 x i8] %in) #0 {
1222 ; HSA-LABEL: @array_2xi8_arg(
1224 ; HSA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1225 ; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 0
1226 ; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1227 ; HSA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 0
1228 ; HSA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 1
1229 ; HSA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4
1230 ; HSA-NEXT: store volatile i8 [[ELT1]], ptr addrspace(1) null, align 4
1231 ; HSA-NEXT: ret void
1233 ; MESA-LABEL: @array_2xi8_arg(
1235 ; MESA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1236 ; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 36
1237 ; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1238 ; MESA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 0
1239 ; MESA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 1
1240 ; MESA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4
1241 ; MESA-NEXT: store volatile i8 [[ELT1]], ptr addrspace(1) null, align 4
1242 ; MESA-NEXT: ret void
1245 %elt0 = extractvalue [2 x i8] %in, 0
1246 %elt1 = extractvalue [2 x i8] %in, 1
1247 store volatile i8 %elt0, ptr addrspace(1) null, align 4
1248 store volatile i8 %elt1, ptr addrspace(1) null, align 4
1252 define amdgpu_kernel void @array_2xi1_arg([2 x i1] %in) #0 {
1253 ; HSA-LABEL: @array_2xi1_arg(
1255 ; HSA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1256 ; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 0
1257 ; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1258 ; HSA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 0
1259 ; HSA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 1
1260 ; HSA-NEXT: store volatile i1 [[ELT0]], ptr addrspace(1) null, align 4
1261 ; HSA-NEXT: store volatile i1 [[ELT1]], ptr addrspace(1) null, align 4
1262 ; HSA-NEXT: ret void
1264 ; MESA-LABEL: @array_2xi1_arg(
1266 ; MESA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1267 ; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 36
1268 ; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1269 ; MESA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 0
1270 ; MESA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 1
1271 ; MESA-NEXT: store volatile i1 [[ELT0]], ptr addrspace(1) null, align 4
1272 ; MESA-NEXT: store volatile i1 [[ELT1]], ptr addrspace(1) null, align 4
1273 ; MESA-NEXT: ret void
1276 %elt0 = extractvalue [2 x i1] %in, 0
1277 %elt1 = extractvalue [2 x i1] %in, 1
1278 store volatile i1 %elt0, ptr addrspace(1) null, align 4
1279 store volatile i1 %elt1, ptr addrspace(1) null, align 4
1283 define amdgpu_kernel void @only_empty_struct({} %empty) #0 {
1284 ; GCN-LABEL: @only_empty_struct(
1285 ; GCN-NEXT: [[ONLY_EMPTY_STRUCT_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1286 ; GCN-NEXT: ret void
1291 define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) #0 {
1292 ; HSA-LABEL: @empty_struct_with_other(
1293 ; HSA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1294 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 0
1295 ; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1296 ; HSA-NEXT: store i32 [[ARG1_LOAD]], ptr addrspace(1) undef, align 4
1297 ; HSA-NEXT: ret void
1299 ; MESA-LABEL: @empty_struct_with_other(
1300 ; MESA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1301 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 36
1302 ; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1303 ; MESA-NEXT: store i32 [[ARG1_LOAD]], ptr addrspace(1) undef, align 4
1304 ; MESA-NEXT: ret void
1306 store i32 %arg1, ptr addrspace(1) undef
1310 ; Should insert code after the allocas
1311 define amdgpu_kernel void @static_alloca_kern_i32(i32 %arg0) {
1312 ; HSA-LABEL: @static_alloca_kern_i32(
1313 ; HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
1314 ; HSA-NEXT: [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1315 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT]], i64 0
1316 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1317 ; HSA-NEXT: store volatile i32 [[ARG0_LOAD]], ptr addrspace(5) [[ALLOCA]], align 4
1318 ; HSA-NEXT: ret void
1320 ; MESA-LABEL: @static_alloca_kern_i32(
1321 ; MESA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
1322 ; MESA-NEXT: [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1323 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT]], i64 36
1324 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1325 ; MESA-NEXT: store volatile i32 [[ARG0_LOAD]], ptr addrspace(5) [[ALLOCA]], align 4
1326 ; MESA-NEXT: ret void
1328 %alloca = alloca i32, addrspace(5)
1329 store volatile i32 %arg0, ptr addrspace(5) %alloca
1333 ; Make sure we don't break the IR if an alloca depends on the
1335 define amdgpu_kernel void @dyn_alloca_kernarg_i32(i32 %n) {
1336 ; HSA-LABEL: @dyn_alloca_kernarg_i32(
1337 ; HSA-NEXT: [[ALLOCA0:%.*]] = alloca i32, align 4, addrspace(5)
1338 ; HSA-NEXT: [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1339 ; HSA-NEXT: [[N_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT]], i64 0
1340 ; HSA-NEXT: [[N_LOAD:%.*]] = load i32, ptr addrspace(4) [[N_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1341 ; HSA-NEXT: [[ALLOCA1:%.*]] = alloca i32, i32 [[N_LOAD]], align 4, addrspace(5)
1342 ; HSA-NEXT: store volatile i32 0, ptr addrspace(5) [[ALLOCA0]], align 4
1343 ; HSA-NEXT: store volatile i32 1, ptr addrspace(5) [[ALLOCA1]], align 4
1344 ; HSA-NEXT: ret void
1346 ; MESA-LABEL: @dyn_alloca_kernarg_i32(
1347 ; MESA-NEXT: [[ALLOCA0:%.*]] = alloca i32, align 4, addrspace(5)
1348 ; MESA-NEXT: [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1349 ; MESA-NEXT: [[N_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT]], i64 36
1350 ; MESA-NEXT: [[N_LOAD:%.*]] = load i32, ptr addrspace(4) [[N_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1351 ; MESA-NEXT: [[ALLOCA1:%.*]] = alloca i32, i32 [[N_LOAD]], align 4, addrspace(5)
1352 ; MESA-NEXT: store volatile i32 0, ptr addrspace(5) [[ALLOCA0]], align 4
1353 ; MESA-NEXT: store volatile i32 1, ptr addrspace(5) [[ALLOCA1]], align 4
1354 ; MESA-NEXT: ret void
1356 %alloca0 = alloca i32, addrspace(5)
1357 %alloca1 = alloca i32, i32 %n, addrspace(5)
1358 store volatile i32 0, ptr addrspace(5) %alloca0
1359 store volatile i32 1, ptr addrspace(5) %alloca1
1363 ; Byref pointers should only be treated as offsets from kernarg
1364 define amdgpu_kernel void @byref_constant_i8_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i8) %in.byref) {
1365 ; HSA-LABEL: @byref_constant_i8_arg(
1366 ; HSA-NEXT: [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1367 ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT]], i64 0
1368 ; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1369 ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT]], i64 8
1370 ; HSA-NEXT: [[IN:%.*]] = load i8, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 1
1371 ; HSA-NEXT: [[EXT:%.*]] = zext i8 [[IN]] to i32
1372 ; HSA-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT_LOAD]], align 4
1373 ; HSA-NEXT: ret void
1375 ; MESA-LABEL: @byref_constant_i8_arg(
1376 ; MESA-NEXT: [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1377 ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT]], i64 36
1378 ; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1379 ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT]], i64 44
1380 ; MESA-NEXT: [[IN:%.*]] = load i8, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 1
1381 ; MESA-NEXT: [[EXT:%.*]] = zext i8 [[IN]] to i32
1382 ; MESA-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT_LOAD]], align 4
1383 ; MESA-NEXT: ret void
1385 %in = load i8, ptr addrspace(4) %in.byref
1386 %ext = zext i8 %in to i32
1387 store i32 %ext, ptr addrspace(1) %out, align 4
1391 define amdgpu_kernel void @byref_constant_i16_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i16) %in.byref) {
1392 ; HSA-LABEL: @byref_constant_i16_arg(
1393 ; HSA-NEXT: [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1394 ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT]], i64 0
1395 ; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1396 ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT]], i64 8
1397 ; HSA-NEXT: [[IN:%.*]] = load i16, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 2
1398 ; HSA-NEXT: [[EXT:%.*]] = zext i16 [[IN]] to i32
1399 ; HSA-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT_LOAD]], align 4
1400 ; HSA-NEXT: ret void
1402 ; MESA-LABEL: @byref_constant_i16_arg(
1403 ; MESA-NEXT: [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1404 ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT]], i64 36
1405 ; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1406 ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT]], i64 44
1407 ; MESA-NEXT: [[IN:%.*]] = load i16, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 2
1408 ; MESA-NEXT: [[EXT:%.*]] = zext i16 [[IN]] to i32
1409 ; MESA-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT_LOAD]], align 4
1410 ; MESA-NEXT: ret void
1412 %in = load i16, ptr addrspace(4) %in.byref
1413 %ext = zext i16 %in to i32
1414 store i32 %ext, ptr addrspace(1) %out, align 4
1418 define amdgpu_kernel void @byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) %in.byref, i32 %after.offset) {
1419 ; HSA-LABEL: @byref_constant_i32_arg(
1420 ; HSA-NEXT: [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1421 ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 0
1422 ; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1423 ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 8
1424 ; HSA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 12
1425 ; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1426 ; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 4
1427 ; HSA-NEXT: store volatile i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
1428 ; HSA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4
1429 ; HSA-NEXT: ret void
1431 ; MESA-LABEL: @byref_constant_i32_arg(
1432 ; MESA-NEXT: [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1433 ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 36
1434 ; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1435 ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 44
1436 ; MESA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 48
1437 ; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1438 ; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 4
1439 ; MESA-NEXT: store volatile i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
1440 ; MESA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4
1441 ; MESA-NEXT: ret void
1443 %in = load i32, ptr addrspace(4) %in.byref
1444 store volatile i32 %in, ptr addrspace(1) %out, align 4
1445 store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
1449 define amdgpu_kernel void @byref_constant_v4i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(<4 x i32>) %in.byref, i32 %after.offset) {
1450 ; HSA-LABEL: @byref_constant_v4i32_arg(
1451 ; HSA-NEXT: [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(296) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1452 ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 0
1453 ; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1454 ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 16
1455 ; HSA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 32
1456 ; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1457 ; HSA-NEXT: [[IN:%.*]] = load <4 x i32>, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 16
1458 ; HSA-NEXT: store volatile <4 x i32> [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
1459 ; HSA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4
1460 ; HSA-NEXT: ret void
1462 ; MESA-LABEL: @byref_constant_v4i32_arg(
1463 ; MESA-NEXT: [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(292) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1464 ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 36
1465 ; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1466 ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 52
1467 ; MESA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 68
1468 ; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1469 ; MESA-NEXT: [[IN:%.*]] = load <4 x i32>, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 16
1470 ; MESA-NEXT: store volatile <4 x i32> [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
1471 ; MESA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4
1472 ; MESA-NEXT: ret void
1474 %in = load <4 x i32>, ptr addrspace(4) %in.byref
1475 store volatile <4 x i32> %in, ptr addrspace(1) %out, align 4
1476 store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
1480 define amdgpu_kernel void @byref_align_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) align(256) %in.byref, i32 %after.offset) {
1481 ; HSA-LABEL: @byref_align_constant_i32_arg(
1482 ; HSA-NEXT: [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 256 dereferenceable(520) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1483 ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 0
1484 ; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1485 ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 256
1486 ; HSA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 260
1487 ; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1488 ; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 4
1489 ; HSA-NEXT: store volatile i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
1490 ; HSA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4
1491 ; HSA-NEXT: ret void
1493 ; MESA-LABEL: @byref_align_constant_i32_arg(
1494 ; MESA-NEXT: [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 256 dereferenceable(520) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1495 ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 36
1496 ; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1497 ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 292
1498 ; MESA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 296
1499 ; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 8, !invariant.load [[META1]]
1500 ; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 4
1501 ; MESA-NEXT: store volatile i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
1502 ; MESA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4
1503 ; MESA-NEXT: ret void
1505 %in = load i32, ptr addrspace(4) %in.byref
1506 store volatile i32 %in, ptr addrspace(1) %out, align 4
1507 store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
1511 define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace(1) nocapture %out, i8, ptr addrspace(4) byref(<16 x i32>) %in.byref, i32 %after.offset) {
1512 ; HSA-LABEL: @byref_natural_align_constant_v16i32_arg(
1513 ; HSA-NEXT: [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(392) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1514 ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 0
1515 ; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1516 ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 64
1517 ; HSA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 128
1518 ; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1519 ; HSA-NEXT: [[IN:%.*]] = load <16 x i32>, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 64
1520 ; HSA-NEXT: store volatile <16 x i32> [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
1521 ; HSA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4
1522 ; HSA-NEXT: ret void
1524 ; MESA-LABEL: @byref_natural_align_constant_v16i32_arg(
1525 ; MESA-NEXT: [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(388) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1526 ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 36
1527 ; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1528 ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 100
1529 ; MESA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 164
1530 ; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1531 ; MESA-NEXT: [[IN:%.*]] = load <16 x i32>, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 64
1532 ; MESA-NEXT: store volatile <16 x i32> [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
1533 ; MESA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4
1534 ; MESA-NEXT: ret void
1536 %in = load <16 x i32>, ptr addrspace(4) %in.byref
1537 store volatile <16 x i32> %in, ptr addrspace(1) %out, align 4
1538 store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
1542 ; Also accept byref kernel arguments with other global address spaces.
1543 define amdgpu_kernel void @byref_global_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(1) byref(i32) %in.byref) {
1544 ; HSA-LABEL: @byref_global_i32_arg(
1545 ; HSA-NEXT: [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1546 ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT]], i64 0
1547 ; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1548 ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT]], i64 8
1549 ; HSA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(1)
1550 ; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(1) [[TMP1]], align 4
1551 ; HSA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
1552 ; HSA-NEXT: ret void
1554 ; MESA-LABEL: @byref_global_i32_arg(
1555 ; MESA-NEXT: [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1556 ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT]], i64 36
1557 ; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1558 ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT]], i64 44
1559 ; MESA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(1)
1560 ; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(1) [[TMP1]], align 4
1561 ; MESA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
1562 ; MESA-NEXT: ret void
1564 %in = load i32, ptr addrspace(1) %in.byref
1565 store i32 %in, ptr addrspace(1) %out, align 4
1569 define amdgpu_kernel void @byref_flat_i32_arg(ptr addrspace(1) nocapture %out, ptr byref(i32) %in.byref) {
1570 ; HSA-LABEL: @byref_flat_i32_arg(
1571 ; HSA-NEXT: [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1572 ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT]], i64 0
1573 ; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1574 ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT]], i64 8
1575 ; HSA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr
1576 ; HSA-NEXT: [[IN:%.*]] = load i32, ptr [[TMP1]], align 4
1577 ; HSA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
1578 ; HSA-NEXT: ret void
1580 ; MESA-LABEL: @byref_flat_i32_arg(
1581 ; MESA-NEXT: [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1582 ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT]], i64 36
1583 ; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1584 ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT]], i64 44
1585 ; MESA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr
1586 ; MESA-NEXT: [[IN:%.*]] = load i32, ptr [[TMP1]], align 4
1587 ; MESA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
1588 ; MESA-NEXT: ret void
1590 %in = load i32, ptr %in.byref
1591 store i32 %in, ptr addrspace(1) %out, align 4
1595 define amdgpu_kernel void @byref_constant_32bit_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(6) byref(i32) %in.byref) {
1596 ; HSA-LABEL: @byref_constant_32bit_i32_arg(
1597 ; HSA-NEXT: [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1598 ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT]], i64 0
1599 ; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1600 ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT]], i64 8
1601 ; HSA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(6)
1602 ; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(6) [[TMP1]], align 4
1603 ; HSA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
1604 ; HSA-NEXT: ret void
1606 ; MESA-LABEL: @byref_constant_32bit_i32_arg(
1607 ; MESA-NEXT: [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1608 ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT]], i64 36
1609 ; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1610 ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT]], i64 44
1611 ; MESA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(6)
1612 ; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(6) [[TMP1]], align 4
1613 ; MESA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
1614 ; MESA-NEXT: ret void
1616 %in = load i32, ptr addrspace(6) %in.byref
1617 store i32 %in, ptr addrspace(1) %out, align 4
1621 define amdgpu_kernel void @byref_unknown_as_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(999) byref(i32) %in.byref) {
1622 ; HSA-LABEL: @byref_unknown_as_i32_arg(
1623 ; HSA-NEXT: [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1624 ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT]], i64 0
1625 ; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1626 ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT]], i64 8
1627 ; HSA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(999)
1628 ; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(999) [[TMP1]], align 4
1629 ; HSA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
1630 ; HSA-NEXT: ret void
1632 ; MESA-LABEL: @byref_unknown_as_i32_arg(
1633 ; MESA-NEXT: [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1634 ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT]], i64 36
1635 ; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1636 ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT]], i64 44
1637 ; MESA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(999)
1638 ; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(999) [[TMP1]], align 4
1639 ; MESA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
1640 ; MESA-NEXT: ret void
1642 %in = load i32, ptr addrspace(999) %in.byref
1643 store i32 %in, ptr addrspace(1) %out, align 4
1647 ; Invalid, but should not crash.
1648 define amdgpu_kernel void @byref_local_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(3) byref(i32) %in.byref) {
1649 ; HSA-LABEL: @byref_local_i32_arg(
1650 ; HSA-NEXT: [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1651 ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT]], i64 0
1652 ; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1653 ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT]], i64 8
1654 ; HSA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(3)
1655 ; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(3) [[TMP1]], align 4
1656 ; HSA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
1657 ; HSA-NEXT: ret void
1659 ; MESA-LABEL: @byref_local_i32_arg(
1660 ; MESA-NEXT: [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1661 ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT]], i64 36
1662 ; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1663 ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT]], i64 44
1664 ; MESA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(3)
1665 ; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(3) [[TMP1]], align 4
1666 ; MESA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
1667 ; MESA-NEXT: ret void
1669 %in = load i32, ptr addrspace(3) %in.byref
1670 store i32 %in, ptr addrspace(1) %out, align 4
1674 define amdgpu_kernel void @multi_byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) %in0.byref, ptr addrspace(4) byref(i32) %in1.byref, i32 %after.offset) {
1675 ; HSA-LABEL: @multi_byref_constant_i32_arg(
1676 ; HSA-NEXT: [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(280) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1677 ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 0
1678 ; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1679 ; HSA-NEXT: [[IN0_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 8
1680 ; HSA-NEXT: [[IN1_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 12
1681 ; HSA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 16
1682 ; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1683 ; HSA-NEXT: [[IN0:%.*]] = load i32, ptr addrspace(4) [[IN0_BYREF_BYVAL_KERNARG_OFFSET]], align 4
1684 ; HSA-NEXT: [[IN1:%.*]] = load i32, ptr addrspace(4) [[IN1_BYREF_BYVAL_KERNARG_OFFSET]], align 4
1685 ; HSA-NEXT: store volatile i32 [[IN0]], ptr addrspace(1) [[OUT_LOAD]], align 4
1686 ; HSA-NEXT: store volatile i32 [[IN1]], ptr addrspace(1) [[OUT_LOAD]], align 4
1687 ; HSA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4
1688 ; HSA-NEXT: ret void
1690 ; MESA-LABEL: @multi_byref_constant_i32_arg(
1691 ; MESA-NEXT: [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(276) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1692 ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 36
1693 ; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1694 ; MESA-NEXT: [[IN0_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 44
1695 ; MESA-NEXT: [[IN1_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 48
1696 ; MESA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 52
1697 ; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1698 ; MESA-NEXT: [[IN0:%.*]] = load i32, ptr addrspace(4) [[IN0_BYREF_BYVAL_KERNARG_OFFSET]], align 4
1699 ; MESA-NEXT: [[IN1:%.*]] = load i32, ptr addrspace(4) [[IN1_BYREF_BYVAL_KERNARG_OFFSET]], align 4
1700 ; MESA-NEXT: store volatile i32 [[IN0]], ptr addrspace(1) [[OUT_LOAD]], align 4
1701 ; MESA-NEXT: store volatile i32 [[IN1]], ptr addrspace(1) [[OUT_LOAD]], align 4
1702 ; MESA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4
1703 ; MESA-NEXT: ret void
1705 %in0 = load i32, ptr addrspace(4) %in0.byref
1706 %in1 = load i32, ptr addrspace(4) %in1.byref
1707 store volatile i32 %in0, ptr addrspace(1) %out, align 4
1708 store volatile i32 %in1, ptr addrspace(1) %out, align 4
1709 store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
1713 define amdgpu_kernel void @byref_constant_i32_arg_offset0(ptr addrspace(4) byref(i32) %in.byref) {
1714 ; HSA-LABEL: @byref_constant_i32_arg_offset0(
1715 ; HSA-NEXT: [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1716 ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT]], i64 0
1717 ; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 4
1718 ; HSA-NEXT: store i32 [[IN]], ptr addrspace(1) undef, align 4
1719 ; HSA-NEXT: ret void
1721 ; MESA-LABEL: @byref_constant_i32_arg_offset0(
1722 ; MESA-NEXT: [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1723 ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT]], i64 36
1724 ; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 4
1725 ; MESA-NEXT: store i32 [[IN]], ptr addrspace(1) undef, align 4
1726 ; MESA-NEXT: ret void
1728 %in = load i32, ptr addrspace(4) %in.byref
1729 store i32 %in, ptr addrspace(1) undef, align 4
1733 define amdgpu_kernel void @noundef_f32(float noundef %arg0) {
1734 ; HSA-LABEL: @noundef_f32(
1735 ; HSA-NEXT: [[NOUNDEF_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1736 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F32_KERNARG_SEGMENT]], i64 0
1737 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]]
1738 ; HSA-NEXT: call void (...) @llvm.fake.use(float [[ARG0_LOAD]])
1739 ; HSA-NEXT: ret void
1741 ; MESA-LABEL: @noundef_f32(
1742 ; MESA-NEXT: [[NOUNDEF_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1743 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F32_KERNARG_SEGMENT]], i64 36
1744 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]]
1745 ; MESA-NEXT: call void (...) @llvm.fake.use(float [[ARG0_LOAD]])
1746 ; MESA-NEXT: ret void
1748 call void (...) @llvm.fake.use(float %arg0)
1752 define amdgpu_kernel void @noundef_f16(half noundef %arg0) {
1753 ; HSA-LABEL: @noundef_f16(
1754 ; HSA-NEXT: [[NOUNDEF_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1755 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F16_KERNARG_SEGMENT]], i64 0
1756 ; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]], !noundef [[META1]]
1757 ; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
1758 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
1759 ; HSA-NEXT: call void (...) @llvm.fake.use(half [[ARG0_LOAD]])
1760 ; HSA-NEXT: ret void
1762 ; MESA-LABEL: @noundef_f16(
1763 ; MESA-NEXT: [[NOUNDEF_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1764 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F16_KERNARG_SEGMENT]], i64 36
1765 ; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]], !noundef [[META1]]
1766 ; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
1767 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
1768 ; MESA-NEXT: call void (...) @llvm.fake.use(half [[ARG0_LOAD]])
1769 ; MESA-NEXT: ret void
1771 call void (...) @llvm.fake.use(half %arg0)
1775 define amdgpu_kernel void @noundef_v2i32(<2 x i32> noundef %arg0) {
1776 ; HSA-LABEL: @noundef_v2i32(
1777 ; HSA-NEXT: [[NOUNDEF_V2I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1778 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2I32_KERNARG_SEGMENT]], i64 0
1779 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]]
1780 ; HSA-NEXT: call void (...) @llvm.fake.use(<2 x i32> [[ARG0_LOAD]])
1781 ; HSA-NEXT: ret void
1783 ; MESA-LABEL: @noundef_v2i32(
1784 ; MESA-NEXT: [[NOUNDEF_V2I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1785 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2I32_KERNARG_SEGMENT]], i64 36
1786 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]]
1787 ; MESA-NEXT: call void (...) @llvm.fake.use(<2 x i32> [[ARG0_LOAD]])
1788 ; MESA-NEXT: ret void
1790 call void (...) @llvm.fake.use(<2 x i32> %arg0)
1794 define amdgpu_kernel void @noundef_p0(ptr noundef %arg0) {
1795 ; HSA-LABEL: @noundef_p0(
1796 ; HSA-NEXT: [[NOUNDEF_P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1797 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_P0_KERNARG_SEGMENT]], i64 0
1798 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load ptr, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]]
1799 ; HSA-NEXT: call void (...) @llvm.fake.use(ptr [[ARG0_LOAD]])
1800 ; HSA-NEXT: ret void
1802 ; MESA-LABEL: @noundef_p0(
1803 ; MESA-NEXT: [[NOUNDEF_P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1804 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_P0_KERNARG_SEGMENT]], i64 36
1805 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load ptr, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]]
1806 ; MESA-NEXT: call void (...) @llvm.fake.use(ptr [[ARG0_LOAD]])
1807 ; MESA-NEXT: ret void
1809 call void (...) @llvm.fake.use(ptr %arg0)
1813 define amdgpu_kernel void @noundef_v2p0(<2 x ptr> noundef %arg0) {
1814 ; HSA-LABEL: @noundef_v2p0(
1815 ; HSA-NEXT: [[NOUNDEF_V2P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1816 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2P0_KERNARG_SEGMENT]], i64 0
1817 ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]]
1818 ; HSA-NEXT: call void (...) @llvm.fake.use(<2 x ptr> [[ARG0_LOAD]])
1819 ; HSA-NEXT: ret void
1821 ; MESA-LABEL: @noundef_v2p0(
1822 ; MESA-NEXT: [[NOUNDEF_V2P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1823 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2P0_KERNARG_SEGMENT]], i64 36
1824 ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]]
1825 ; MESA-NEXT: call void (...) @llvm.fake.use(<2 x ptr> [[ARG0_LOAD]])
1826 ; MESA-NEXT: ret void
1828 call void (...) @llvm.fake.use(<2 x ptr> %arg0)
1832 attributes #0 = { nounwind "target-cpu"="kaveri" }
1833 attributes #1 = { nounwind "target-cpu"="kaveri" "amdgpu-implicitarg-num-bytes"="40" }
1834 attributes #2 = { nounwind "target-cpu"="tahiti" }
1837 !llvm.module.flags = !{!0}
1838 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
1840 ; HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind }
1841 ; HSA: attributes #[[ATTR1:[0-9]+]] = { nounwind "target-cpu"="kaveri" }
1842 ; HSA: attributes #[[ATTR2:[0-9]+]] = { nounwind "amdgpu-implicitarg-num-bytes"="40" "target-cpu"="kaveri" }
1843 ; HSA: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="tahiti" }
1844 ; HSA: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
1846 ; MESA: attributes #[[ATTR0:[0-9]+]] = { nounwind }
1847 ; MESA: attributes #[[ATTR1:[0-9]+]] = { nounwind "target-cpu"="kaveri" }
1848 ; MESA: attributes #[[ATTR2:[0-9]+]] = { nounwind "amdgpu-implicitarg-num-bytes"="40" "target-cpu"="kaveri" }
1849 ; MESA: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="tahiti" }
1850 ; MESA: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
1852 ; HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
1853 ; HSA: [[META1]] = !{}
1854 ; HSA: [[RNG2]] = !{i32 0, i32 8}
1855 ; HSA: [[META3]] = !{i64 42}
1856 ; HSA: [[META4]] = !{i64 128}
1857 ; HSA: [[META5]] = !{i64 1024}
1859 ; MESA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
1860 ; MESA: [[META1]] = !{}
1861 ; MESA: [[RNG2]] = !{i32 0, i32 8}
1862 ; MESA: [[META3]] = !{i64 42}
1863 ; MESA: [[META4]] = !{i64 128}
1864 ; MESA: [[META5]] = !{i64 1024}