1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare -amdgpu-codegenprepare-widen-constant-loads < %s | FileCheck -check-prefix=OPT %s
4 declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
6 define amdgpu_kernel void @constant_load_i1(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
7 ; OPT-LABEL: @constant_load_i1(
8 ; OPT-NEXT: [[VAL:%.*]] = load i1, ptr addrspace(4) [[IN:%.*]], align 1
9 ; OPT-NEXT: store i1 [[VAL]], ptr addrspace(1) [[OUT:%.*]], align 1
12 %val = load i1, ptr addrspace(4) %in
13 store i1 %val, ptr addrspace(1) %out
17 define amdgpu_kernel void @constant_load_i1_align2(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
18 ; OPT-LABEL: @constant_load_i1_align2(
19 ; OPT-NEXT: [[VAL:%.*]] = load i1, ptr addrspace(4) [[IN:%.*]], align 2
20 ; OPT-NEXT: store i1 [[VAL]], ptr addrspace(1) [[OUT:%.*]], align 2
23 %val = load i1, ptr addrspace(4) %in, align 2
24 store i1 %val, ptr addrspace(1) %out, align 2
28 define amdgpu_kernel void @constant_load_i1_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
29 ; OPT-LABEL: @constant_load_i1_align4(
30 ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4
31 ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i1
32 ; OPT-NEXT: store i1 [[TMP3]], ptr addrspace(1) [[OUT:%.*]], align 4
35 %val = load i1, ptr addrspace(4) %in, align 4
36 store i1 %val, ptr addrspace(1) %out, align 4
40 define amdgpu_kernel void @constant_load_i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
41 ; OPT-LABEL: @constant_load_i8(
42 ; OPT-NEXT: [[VAL:%.*]] = load i8, ptr addrspace(4) [[IN:%.*]], align 1
43 ; OPT-NEXT: store i8 [[VAL]], ptr addrspace(1) [[OUT:%.*]], align 1
46 %val = load i8, ptr addrspace(4) %in
47 store i8 %val, ptr addrspace(1) %out
51 define amdgpu_kernel void @constant_load_i8_align2(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
52 ; OPT-LABEL: @constant_load_i8_align2(
53 ; OPT-NEXT: [[VAL:%.*]] = load i8, ptr addrspace(4) [[IN:%.*]], align 2
54 ; OPT-NEXT: store i8 [[VAL]], ptr addrspace(1) [[OUT:%.*]], align 2
57 %val = load i8, ptr addrspace(4) %in, align 2
58 store i8 %val, ptr addrspace(1) %out, align 2
62 define amdgpu_kernel void @constant_load_i8align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
63 ; OPT-LABEL: @constant_load_i8align4(
64 ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4
65 ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
66 ; OPT-NEXT: store i8 [[TMP3]], ptr addrspace(1) [[OUT:%.*]], align 4
69 %val = load i8, ptr addrspace(4) %in, align 4
70 store i8 %val, ptr addrspace(1) %out, align 4
74 define amdgpu_kernel void @constant_load_v2i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
75 ; OPT-LABEL: @constant_load_v2i8(
76 ; OPT-NEXT: [[LD:%.*]] = load <2 x i8>, ptr addrspace(4) [[IN:%.*]], align 2
77 ; OPT-NEXT: store <2 x i8> [[LD]], ptr addrspace(1) [[OUT:%.*]], align 2
80 %ld = load <2 x i8>, ptr addrspace(4) %in
81 store <2 x i8> %ld, ptr addrspace(1) %out
85 define amdgpu_kernel void @constant_load_v2i8_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
86 ; OPT-LABEL: @constant_load_v2i8_align4(
87 ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4
88 ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
89 ; OPT-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP3]] to <2 x i8>
90 ; OPT-NEXT: store <2 x i8> [[TMP4]], ptr addrspace(1) [[OUT:%.*]], align 4
93 %ld = load <2 x i8>, ptr addrspace(4) %in, align 4
94 store <2 x i8> %ld, ptr addrspace(1) %out, align 4
98 define amdgpu_kernel void @constant_load_v3i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
99 ; OPT-LABEL: @constant_load_v3i8(
100 ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4
101 ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i24
102 ; OPT-NEXT: [[TMP4:%.*]] = bitcast i24 [[TMP3]] to <3 x i8>
103 ; OPT-NEXT: store <3 x i8> [[TMP4]], ptr addrspace(1) [[OUT:%.*]], align 4
106 %ld = load <3 x i8>, ptr addrspace(4) %in
107 store <3 x i8> %ld, ptr addrspace(1) %out
111 define amdgpu_kernel void @constant_load_v3i8_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
112 ; OPT-LABEL: @constant_load_v3i8_align4(
113 ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4
114 ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i24
115 ; OPT-NEXT: [[TMP4:%.*]] = bitcast i24 [[TMP3]] to <3 x i8>
116 ; OPT-NEXT: store <3 x i8> [[TMP4]], ptr addrspace(1) [[OUT:%.*]], align 4
119 %ld = load <3 x i8>, ptr addrspace(4) %in, align 4
120 store <3 x i8> %ld, ptr addrspace(1) %out, align 4
124 define amdgpu_kernel void @constant_load_i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
125 ; OPT-LABEL: @constant_load_i16(
126 ; OPT-NEXT: [[LD:%.*]] = load i16, ptr addrspace(4) [[IN:%.*]], align 2
127 ; OPT-NEXT: [[EXT:%.*]] = sext i16 [[LD]] to i32
128 ; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4
131 %ld = load i16, ptr addrspace(4) %in
132 %ext = sext i16 %ld to i32
133 store i32 %ext, ptr addrspace(1) %out
137 define amdgpu_kernel void @constant_load_i16_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
138 ; OPT-LABEL: @constant_load_i16_align4(
139 ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4
140 ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
141 ; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32
142 ; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4
145 %ld = load i16, ptr addrspace(4) %in, align 4
146 %ext = sext i16 %ld to i32
147 store i32 %ext, ptr addrspace(1) %out, align 4
151 define amdgpu_kernel void @constant_load_f16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
152 ; OPT-LABEL: @constant_load_f16(
153 ; OPT-NEXT: [[LD:%.*]] = load half, ptr addrspace(4) [[IN:%.*]], align 2
154 ; OPT-NEXT: store half [[LD]], ptr addrspace(1) [[OUT:%.*]], align 2
157 %ld = load half, ptr addrspace(4) %in
158 store half %ld, ptr addrspace(1) %out
162 define amdgpu_kernel void @constant_load_v2f16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
163 ; OPT-LABEL: @constant_load_v2f16(
164 ; OPT-NEXT: [[LD:%.*]] = load <2 x half>, ptr addrspace(4) [[IN:%.*]], align 4
165 ; OPT-NEXT: store <2 x half> [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
168 %ld = load <2 x half>, ptr addrspace(4) %in
169 store <2 x half> %ld, ptr addrspace(1) %out
173 define amdgpu_kernel void @load_volatile(ptr addrspace(1) %out, ptr addrspace(4) %in) {
174 ; OPT-LABEL: @load_volatile(
175 ; OPT-NEXT: [[A:%.*]] = load volatile i16, ptr addrspace(4) [[IN:%.*]], align 2
176 ; OPT-NEXT: store i16 [[A]], ptr addrspace(1) [[OUT:%.*]], align 2
179 %a = load volatile i16, ptr addrspace(4) %in
180 store i16 %a, ptr addrspace(1) %out
184 define amdgpu_kernel void @constant_load_v2i8_volatile(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
185 ; OPT-LABEL: @constant_load_v2i8_volatile(
186 ; OPT-NEXT: [[LD:%.*]] = load volatile <2 x i8>, ptr addrspace(4) [[IN:%.*]], align 2
187 ; OPT-NEXT: store <2 x i8> [[LD]], ptr addrspace(1) [[OUT:%.*]], align 2
190 %ld = load volatile <2 x i8>, ptr addrspace(4) %in
191 store <2 x i8> %ld, ptr addrspace(1) %out
195 define amdgpu_kernel void @constant_load_v2i8_addrspace1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
196 ; OPT-LABEL: @constant_load_v2i8_addrspace1(
197 ; OPT-NEXT: [[LD:%.*]] = load <2 x i8>, ptr addrspace(1) [[IN:%.*]], align 2
198 ; OPT-NEXT: store <2 x i8> [[LD]], ptr addrspace(1) [[OUT:%.*]], align 2
201 %ld = load <2 x i8>, ptr addrspace(1) %in
202 store <2 x i8> %ld, ptr addrspace(1) %out
206 define amdgpu_kernel void @use_dispatch_ptr(ptr addrspace(1) %ptr) #1 {
207 ; OPT-LABEL: @use_dispatch_ptr(
208 ; OPT-NEXT: [[DISPATCH_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
209 ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[DISPATCH_PTR]], align 4
210 ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
211 ; OPT-NEXT: [[LD:%.*]] = zext i8 [[TMP3]] to i32
212 ; OPT-NEXT: store i32 [[LD]], ptr addrspace(1) [[PTR:%.*]], align 4
215 %dispatch.ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
216 %val = load i8, ptr addrspace(4) %dispatch.ptr, align 4
217 %ld = zext i8 %val to i32
218 store i32 %ld, ptr addrspace(1) %ptr
222 define amdgpu_kernel void @constant_load_i16_align4_range(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
223 ; OPT-LABEL: @constant_load_i16_align4_range(
224 ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG0:![0-9]+]]
225 ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
226 ; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32
227 ; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4
230 %ld = load i16, ptr addrspace(4) %in, align 4, !range !0
231 %ext = sext i16 %ld to i32
232 store i32 %ext, ptr addrspace(1) %out
236 define amdgpu_kernel void @constant_load_i16_align4_range_max(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
237 ; OPT-LABEL: @constant_load_i16_align4_range_max(
238 ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG0]]
239 ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
240 ; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32
241 ; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4
244 %ld = load i16, ptr addrspace(4) %in, align 4, !range !1
245 %ext = sext i16 %ld to i32
246 store i32 %ext, ptr addrspace(1) %out
250 define amdgpu_kernel void @constant_load_i16_align4_complex_range(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
251 ; OPT-LABEL: @constant_load_i16_align4_complex_range(
252 ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG1:![0-9]+]]
253 ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
254 ; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32
255 ; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4
258 %ld = load i16, ptr addrspace(4) %in, align 4, !range !2
259 %ext = sext i16 %ld to i32
260 store i32 %ext, ptr addrspace(1) %out
264 define amdgpu_kernel void @constant_load_i16_align4_range_from_0(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
265 ; OPT-LABEL: @constant_load_i16_align4_range_from_0(
266 ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4
267 ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
268 ; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32
269 ; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4
272 %ld = load i16, ptr addrspace(4) %in, align 4, !range !3
273 %ext = sext i16 %ld to i32
274 store i32 %ext, ptr addrspace(1) %out
278 define amdgpu_kernel void @constant_load_i16_align4_range_from_neg(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
279 ; OPT-LABEL: @constant_load_i16_align4_range_from_neg(
280 ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG2:![0-9]+]]
281 ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
282 ; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32
283 ; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4
286 %ld = load i16, ptr addrspace(4) %in, align 4, !range !4
287 %ext = sext i16 %ld to i32
288 store i32 %ext, ptr addrspace(1) %out
292 define amdgpu_kernel void @constant_load_i16_align4_range_from_neg_to_0(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
293 ; OPT-LABEL: @constant_load_i16_align4_range_from_neg_to_0(
294 ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG2]]
295 ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
296 ; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32
297 ; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4
300 %ld = load i16, ptr addrspace(4) %in, align 4, !range !5
301 %ext = sext i16 %ld to i32
302 store i32 %ext, ptr addrspace(1) %out
306 define amdgpu_kernel void @constant_load_i16_align4_invariant(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
307 ; OPT-LABEL: @constant_load_i16_align4_invariant(
308 ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !invariant.load !3
309 ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
310 ; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32
311 ; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4
314 %ld = load i16, ptr addrspace(4) %in, align 4, !invariant.load !6
315 %ext = sext i16 %ld to i32
316 store i32 %ext, ptr addrspace(1) %out
320 attributes #0 = { nounwind }
322 ; OPT: !0 = !{i32 5, i32 0}
323 ; OPT: !1 = !{i32 8, i32 0}
324 ; OPT: !2 = !{i32 65520, i32 0}
327 !0 = !{i16 5, i16 500}
328 !1 = !{i16 5, i16 -1}
329 !2 = !{i16 8, i16 12, i16 42, i16 99}
330 !3 = !{i16 0, i16 255}
331 !4 = !{i16 -16, i16 16}
332 !5 = !{i16 -16, i16 0}