1 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare -amdgpu-codegenprepare-widen-constant-loads < %s | FileCheck -check-prefix=OPT %s
3 declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
5 ; OPT-LABEL: @constant_load_i1
8 define amdgpu_kernel void @constant_load_i1(i1 addrspace(1)* %out, i1 addrspace(4)* %in) #0 {
9 %val = load i1, i1 addrspace(4)* %in
10 store i1 %val, i1 addrspace(1)* %out
14 ; OPT-LABEL: @constant_load_i1_align2
17 define amdgpu_kernel void @constant_load_i1_align2(i1 addrspace(1)* %out, i1 addrspace(4)* %in) #0 {
18 %val = load i1, i1 addrspace(4)* %in, align 2
19 store i1 %val, i1 addrspace(1)* %out, align 2
23 ; OPT-LABEL: @constant_load_i1_align4
28 define amdgpu_kernel void @constant_load_i1_align4(i1 addrspace(1)* %out, i1 addrspace(4)* %in) #0 {
29 %val = load i1, i1 addrspace(4)* %in, align 4
30 store i1 %val, i1 addrspace(1)* %out, align 4
34 ; OPT-LABEL: @constant_load_i8
37 define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
38 %val = load i8, i8 addrspace(4)* %in
39 store i8 %val, i8 addrspace(1)* %out
43 ; OPT-LABEL: @constant_load_i8_align2
46 define amdgpu_kernel void @constant_load_i8_align2(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
47 %val = load i8, i8 addrspace(4)* %in, align 2
48 store i8 %val, i8 addrspace(1)* %out, align 2
52 ; OPT-LABEL: @constant_load_i8align4
57 define amdgpu_kernel void @constant_load_i8align4(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
58 %val = load i8, i8 addrspace(4)* %in, align 4
59 store i8 %val, i8 addrspace(1)* %out, align 4
64 ; OPT-LABEL: @constant_load_v2i8
67 define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
68 %ld = load <2 x i8>, <2 x i8> addrspace(4)* %in
69 store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
73 ; OPT-LABEL: @constant_load_v2i8_align4
79 define amdgpu_kernel void @constant_load_v2i8_align4(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
80 %ld = load <2 x i8>, <2 x i8> addrspace(4)* %in, align 4
81 store <2 x i8> %ld, <2 x i8> addrspace(1)* %out, align 4
85 ; OPT-LABEL: @constant_load_v3i8
86 ; OPT: bitcast <3 x i8>
87 ; OPT-NEXT: load i32, i32 addrspace(4)
89 ; OPT-NEXT: bitcast i24
90 ; OPT-NEXT: store <3 x i8>
91 define amdgpu_kernel void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
92 %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in
93 store <3 x i8> %ld, <3 x i8> addrspace(1)* %out
97 ; OPT-LABEL: @constant_load_v3i8_align4
98 ; OPT: bitcast <3 x i8>
99 ; OPT-NEXT: load i32, i32 addrspace(4)
100 ; OPT-NEXT: trunc i32
101 ; OPT-NEXT: bitcast i24
102 ; OPT-NEXT: store <3 x i8>
103 define amdgpu_kernel void @constant_load_v3i8_align4(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
104 %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in, align 4
105 store <3 x i8> %ld, <3 x i8> addrspace(1)* %out, align 4
109 ; OPT-LABEL: @constant_load_i16
113 define amdgpu_kernel void @constant_load_i16(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
114 %ld = load i16, i16 addrspace(4)* %in
115 %ext = sext i16 %ld to i32
116 store i32 %ext, i32 addrspace(1)* %out
120 ; OPT-LABEL: @constant_load_i16_align4
126 define amdgpu_kernel void @constant_load_i16_align4(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
127 %ld = load i16, i16 addrspace(4)* %in, align 4
128 %ext = sext i16 %ld to i32
129 store i32 %ext, i32 addrspace(1)* %out, align 4
133 ; OPT-LABEL: @constant_load_f16
136 define amdgpu_kernel void @constant_load_f16(half addrspace(1)* %out, half addrspace(4)* %in) #0 {
137 %ld = load half, half addrspace(4)* %in
138 store half %ld, half addrspace(1)* %out
142 ; OPT-LABEL: @constant_load_v2f16
143 ; OPT: load <2 x half>
145 define amdgpu_kernel void @constant_load_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(4)* %in) #0 {
146 %ld = load <2 x half>, <2 x half> addrspace(4)* %in
147 store <2 x half> %ld, <2 x half> addrspace(1)* %out
151 ; OPT-LABEL: @load_volatile
152 ; OPT: load volatile i16
154 define amdgpu_kernel void @load_volatile(i16 addrspace(1)* %out, i16 addrspace(4)* %in) {
155 %a = load volatile i16, i16 addrspace(4)* %in
156 store i16 %a, i16 addrspace(1)* %out
160 ; OPT-LABEL: @constant_load_v2i8_volatile
161 ; OPT: load volatile <2 x i8>
163 define amdgpu_kernel void @constant_load_v2i8_volatile(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
164 %ld = load volatile <2 x i8>, <2 x i8> addrspace(4)* %in
165 store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
169 ; OPT-LABEL: @constant_load_v2i8_addrspace1
172 define amdgpu_kernel void @constant_load_v2i8_addrspace1(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
173 %ld = load <2 x i8>, <2 x i8> addrspace(1)* %in
174 store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
178 ; OPT-LABEL: @use_dispatch_ptr
184 define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
185 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
186 %val = load i8, i8 addrspace(4)* %dispatch.ptr, align 4
187 %ld = zext i8 %val to i32
188 store i32 %ld, i32 addrspace(1)* %ptr
192 ; OPT-LABEL: @constant_load_i16_align4_range(
193 ; OPT: load i32, i32 addrspace(4)* %1, align 4, !range !0
194 define amdgpu_kernel void @constant_load_i16_align4_range(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
195 %ld = load i16, i16 addrspace(4)* %in, align 4, !range !0
196 %ext = sext i16 %ld to i32
197 store i32 %ext, i32 addrspace(1)* %out
201 ; OPT-LABEL: @constant_load_i16_align4_range_max(
202 ; OPT: load i32, i32 addrspace(4)* %1, align 4, !range !0
203 define amdgpu_kernel void @constant_load_i16_align4_range_max(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
204 %ld = load i16, i16 addrspace(4)* %in, align 4, !range !1
205 %ext = sext i16 %ld to i32
206 store i32 %ext, i32 addrspace(1)* %out
210 ; OPT-LABEL: @constant_load_i16_align4_complex_range(
211 ; OPT: load i32, i32 addrspace(4)* %1, align 4, !range !1
212 define amdgpu_kernel void @constant_load_i16_align4_complex_range(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
213 %ld = load i16, i16 addrspace(4)* %in, align 4, !range !2
214 %ext = sext i16 %ld to i32
215 store i32 %ext, i32 addrspace(1)* %out
219 ; OPT-LABEL: @constant_load_i16_align4_range_from_0(
220 ; OPT: load i32, i32 addrspace(4)* %1, align 4{{$}}
221 define amdgpu_kernel void @constant_load_i16_align4_range_from_0(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
222 %ld = load i16, i16 addrspace(4)* %in, align 4, !range !3
223 %ext = sext i16 %ld to i32
224 store i32 %ext, i32 addrspace(1)* %out
228 ; OPT-LABEL: @constant_load_i16_align4_range_from_neg(
229 ; OPT: load i32, i32 addrspace(4)* %1, align 4, !range !2
230 define amdgpu_kernel void @constant_load_i16_align4_range_from_neg(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
231 %ld = load i16, i16 addrspace(4)* %in, align 4, !range !4
232 %ext = sext i16 %ld to i32
233 store i32 %ext, i32 addrspace(1)* %out
237 ; OPT-LABEL: @constant_load_i16_align4_range_from_neg_to_0(
238 ; OPT: load i32, i32 addrspace(4)* %1, align 4, !range !2
239 define amdgpu_kernel void @constant_load_i16_align4_range_from_neg_to_0(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
240 %ld = load i16, i16 addrspace(4)* %in, align 4, !range !5
241 %ext = sext i16 %ld to i32
242 store i32 %ext, i32 addrspace(1)* %out
246 ; OPT-LABEL: @constant_load_i16_align4_invariant
247 ; OPT: load i32, i32 addrspace(4)* %1, align 4, !invariant.load !3
248 define amdgpu_kernel void @constant_load_i16_align4_invariant(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
249 %ld = load i16, i16 addrspace(4)* %in, align 4, !invariant.load !6
250 %ext = sext i16 %ld to i32
251 store i32 %ext, i32 addrspace(1)* %out
255 attributes #0 = { nounwind }
257 ; OPT: !0 = !{i32 5, i32 0}
258 ; OPT: !1 = !{i32 8, i32 0}
259 ; OPT: !2 = !{i32 65520, i32 0}
262 !0 = !{i16 5, i16 500}
263 !1 = !{i16 5, i16 -1}
264 !2 = !{i16 8, i16 12, i16 42, i16 99}
265 !3 = !{i16 0, i16 255}
266 !4 = !{i16 -16, i16 16}
267 !5 = !{i16 -16, i16 0}