1 ; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-promote-alloca,sroa,instcombine' < %s | FileCheck -check-prefix=OPT %s
2 ; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-promote-alloca,sroa,instcombine' -amdgpu-promote-alloca-to-vector-limit=32 < %s | FileCheck -check-prefix=LIMIT32 %s
4 target datalayout = "A5"
6 ; OPT-LABEL: @alloca_8xi64_max1024(
10 ; LIMIT32-NOT: <8 x i64>
11 define amdgpu_kernel void @alloca_8xi64_max1024(ptr addrspace(1) %out, i32 %index) #0 {
13 %tmp = alloca [8 x i64], addrspace(5)
14 store i64 0, ptr addrspace(5) %tmp
15 %tmp1 = getelementptr [8 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index
16 %tmp2 = load i64, ptr addrspace(5) %tmp1
17 store i64 %tmp2, ptr addrspace(1) %out
21 ; OPT-LABEL: @alloca_9xi64_max1024(
22 ; OPT: alloca [9 x i64]
25 ; LIMIT32-NOT: <9 x i64>
26 define amdgpu_kernel void @alloca_9xi64_max1024(ptr addrspace(1) %out, i32 %index) #0 {
28 %tmp = alloca [9 x i64], addrspace(5)
29 store i64 0, ptr addrspace(5) %tmp
30 %tmp1 = getelementptr [9 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index
31 %tmp2 = load i64, ptr addrspace(5) %tmp1
32 store i64 %tmp2, ptr addrspace(1) %out
36 ; OPT-LABEL: @alloca_16xi64_max512(
40 ; LIMIT32-NOT: <16 x i64>
41 define amdgpu_kernel void @alloca_16xi64_max512(ptr addrspace(1) %out, i32 %index) #1 {
43 %tmp = alloca [16 x i64], addrspace(5)
44 store i64 0, ptr addrspace(5) %tmp
45 %tmp1 = getelementptr [16 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index
46 %tmp2 = load i64, ptr addrspace(5) %tmp1
47 store i64 %tmp2, ptr addrspace(1) %out
51 ; OPT-LABEL: @alloca_17xi64_max512(
52 ; OPT: alloca [17 x i64]
55 ; LIMIT32-NOT: <17 x i64>
56 define amdgpu_kernel void @alloca_17xi64_max512(ptr addrspace(1) %out, i32 %index) #1 {
58 %tmp = alloca [17 x i64], addrspace(5)
59 store i64 0, ptr addrspace(5) %tmp
60 %tmp1 = getelementptr [17 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index
61 %tmp2 = load i64, ptr addrspace(5) %tmp1
62 store i64 %tmp2, ptr addrspace(1) %out
66 ; OPT-LABEL: @alloca_9xi128_max512(
67 ; OPT: alloca [9 x i128]
70 ; LIMIT32-NOT: <9 x i128>
71 define amdgpu_kernel void @alloca_9xi128_max512(ptr addrspace(1) %out, i32 %index) #1 {
73 %tmp = alloca [9 x i128], addrspace(5)
74 store i128 0, ptr addrspace(5) %tmp
75 %tmp1 = getelementptr [9 x i128], ptr addrspace(5) %tmp, i32 0, i32 %index
76 %tmp2 = load i128, ptr addrspace(5) %tmp1
77 store i128 %tmp2, ptr addrspace(1) %out
81 ; OPT-LABEL: @alloca_9xi128_max256(
85 ; LIMIT32-NOT: <9 x i128>
86 define amdgpu_kernel void @alloca_9xi128_max256(ptr addrspace(1) %out, i32 %index) #2 {
88 %tmp = alloca [9 x i128], addrspace(5)
89 store i128 0, ptr addrspace(5) %tmp
90 %tmp1 = getelementptr [9 x i128], ptr addrspace(5) %tmp, i32 0, i32 %index
91 %tmp2 = load i128, ptr addrspace(5) %tmp1
92 store i128 %tmp2, ptr addrspace(1) %out
96 ; OPT-LABEL: @alloca_16xi128_max256(
100 ; LIMIT32-NOT: <16 x i128>
101 define amdgpu_kernel void @alloca_16xi128_max256(ptr addrspace(1) %out, i32 %index) #2 {
103 %tmp = alloca [16 x i128], addrspace(5)
104 store i128 0, ptr addrspace(5) %tmp
105 %tmp1 = getelementptr [16 x i128], ptr addrspace(5) %tmp, i32 0, i32 %index
106 %tmp2 = load i128, ptr addrspace(5) %tmp1
107 store i128 %tmp2, ptr addrspace(1) %out
111 ; OPT-LABEL: @alloca_9xi256_max256(
112 ; OPT: alloca [9 x i256]
113 ; OPT-NOT: <9 x i256>
115 ; LIMIT32-NOT: <9 x i256>
116 define amdgpu_kernel void @alloca_9xi256_max256(ptr addrspace(1) %out, i32 %index) #2 {
118 %tmp = alloca [9 x i256], addrspace(5)
119 store i256 0, ptr addrspace(5) %tmp
120 %tmp1 = getelementptr [9 x i256], ptr addrspace(5) %tmp, i32 0, i32 %index
121 %tmp2 = load i256, ptr addrspace(5) %tmp1
122 store i256 %tmp2, ptr addrspace(1) %out
126 ; OPT-LABEL: @alloca_9xi64_max256(
130 ; LIMIT32-NOT: <9 x i64>
131 define amdgpu_kernel void @alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) #2 {
133 %tmp = alloca [9 x i64], addrspace(5)
134 store i64 0, ptr addrspace(5) %tmp
135 %tmp1 = getelementptr [9 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index
136 %tmp2 = load i64, ptr addrspace(5) %tmp1
137 store i64 %tmp2, ptr addrspace(1) %out
141 ; OPT-LABEL: @func_alloca_9xi64_max256(
145 ; LIMIT32-NOT: <9 x i64>
146 define void @func_alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) #2 {
148 %tmp = alloca [9 x i64], addrspace(5)
149 store i64 0, ptr addrspace(5) %tmp
150 %tmp1 = getelementptr [9 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index
151 %tmp2 = load i64, ptr addrspace(5) %tmp1
152 store i64 %tmp2, ptr addrspace(1) %out
156 ; OPT-LABEL: @alwaysinlined_func_alloca_9xi64_max256(
160 ; LIMIT32-NOT: <9 x i64>
161 define void @alwaysinlined_func_alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) #3 {
163 %tmp = alloca [9 x i64], addrspace(5)
164 store i64 0, ptr addrspace(5) %tmp
165 %tmp1 = getelementptr [9 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index
166 %tmp2 = load i64, ptr addrspace(5) %tmp1
167 store i64 %tmp2, ptr addrspace(1) %out
171 attributes #0 = { "amdgpu-flat-work-group-size"="1,1024" }
172 attributes #1 = { "amdgpu-flat-work-group-size"="1,512" }
173 attributes #2 = { "amdgpu-flat-work-group-size"="1,256" }
174 attributes #3 = { alwaysinline "amdgpu-flat-work-group-size"="1,256" }