1 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-promote-alloca -sroa -instcombine < %s | FileCheck -check-prefix=OPT %s
2 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-promote-alloca -sroa -instcombine -amdgpu-promote-alloca-to-vector-limit=32 < %s | FileCheck -check-prefix=LIMIT32 %s
4 target datalayout = "A5"
6 ; OPT-LABEL: @alloca_8xi64_max1024(
10 ; LIMIT32-NOT: <8 x i64>
11 define amdgpu_kernel void @alloca_8xi64_max1024(i64 addrspace(1)* %out, i32 %index) #0 {
13 %tmp = alloca [8 x i64], addrspace(5)
14 %x = getelementptr [8 x i64], [8 x i64] addrspace(5)* %tmp, i32 0, i32 0
15 store i64 0, i64 addrspace(5)* %x
16 %tmp1 = getelementptr [8 x i64], [8 x i64] addrspace(5)* %tmp, i32 0, i32 %index
17 %tmp2 = load i64, i64 addrspace(5)* %tmp1
18 store i64 %tmp2, i64 addrspace(1)* %out
22 ; OPT-LABEL: @alloca_9xi64_max1024(
23 ; OPT: alloca [9 x i64]
26 ; LIMIT32-NOT: <9 x i64>
27 define amdgpu_kernel void @alloca_9xi64_max1024(i64 addrspace(1)* %out, i32 %index) #0 {
29 %tmp = alloca [9 x i64], addrspace(5)
30 %x = getelementptr [9 x i64], [9 x i64] addrspace(5)* %tmp, i32 0, i32 0
31 store i64 0, i64 addrspace(5)* %x
32 %tmp1 = getelementptr [9 x i64], [9 x i64] addrspace(5)* %tmp, i32 0, i32 %index
33 %tmp2 = load i64, i64 addrspace(5)* %tmp1
34 store i64 %tmp2, i64 addrspace(1)* %out
38 ; OPT-LABEL: @alloca_16xi64_max512(
42 ; LIMIT32-NOT: <16 x i64>
43 define amdgpu_kernel void @alloca_16xi64_max512(i64 addrspace(1)* %out, i32 %index) #1 {
45 %tmp = alloca [16 x i64], addrspace(5)
46 %x = getelementptr [16 x i64], [16 x i64] addrspace(5)* %tmp, i32 0, i32 0
47 store i64 0, i64 addrspace(5)* %x
48 %tmp1 = getelementptr [16 x i64], [16 x i64] addrspace(5)* %tmp, i32 0, i32 %index
49 %tmp2 = load i64, i64 addrspace(5)* %tmp1
50 store i64 %tmp2, i64 addrspace(1)* %out
54 ; OPT-LABEL: @alloca_17xi64_max512(
55 ; OPT: alloca [17 x i64]
58 ; LIMIT32-NOT: <17 x i64>
59 define amdgpu_kernel void @alloca_17xi64_max512(i64 addrspace(1)* %out, i32 %index) #1 {
61 %tmp = alloca [17 x i64], addrspace(5)
62 %x = getelementptr [17 x i64], [17 x i64] addrspace(5)* %tmp, i32 0, i32 0
63 store i64 0, i64 addrspace(5)* %x
64 %tmp1 = getelementptr [17 x i64], [17 x i64] addrspace(5)* %tmp, i32 0, i32 %index
65 %tmp2 = load i64, i64 addrspace(5)* %tmp1
66 store i64 %tmp2, i64 addrspace(1)* %out
70 ; OPT-LABEL: @alloca_9xi128_max512(
71 ; OPT: alloca [9 x i128]
74 ; LIMIT32-NOT: <9 x i128>
75 define amdgpu_kernel void @alloca_9xi128_max512(i128 addrspace(1)* %out, i32 %index) #1 {
77 %tmp = alloca [9 x i128], addrspace(5)
78 %x = getelementptr [9 x i128], [9 x i128] addrspace(5)* %tmp, i32 0, i32 0
79 store i128 0, i128 addrspace(5)* %x
80 %tmp1 = getelementptr [9 x i128], [9 x i128] addrspace(5)* %tmp, i32 0, i32 %index
81 %tmp2 = load i128, i128 addrspace(5)* %tmp1
82 store i128 %tmp2, i128 addrspace(1)* %out
86 ; OPT-LABEL: @alloca_9xi128_max256(
90 ; LIMIT32-NOT: <9 x i128>
91 define amdgpu_kernel void @alloca_9xi128_max256(i128 addrspace(1)* %out, i32 %index) #2 {
93 %tmp = alloca [9 x i128], addrspace(5)
94 %x = getelementptr [9 x i128], [9 x i128] addrspace(5)* %tmp, i32 0, i32 0
95 store i128 0, i128 addrspace(5)* %x
96 %tmp1 = getelementptr [9 x i128], [9 x i128] addrspace(5)* %tmp, i32 0, i32 %index
97 %tmp2 = load i128, i128 addrspace(5)* %tmp1
98 store i128 %tmp2, i128 addrspace(1)* %out
102 ; OPT-LABEL: @alloca_16xi128_max256(
106 ; LIMIT32-NOT: <16 x i128>
107 define amdgpu_kernel void @alloca_16xi128_max256(i128 addrspace(1)* %out, i32 %index) #2 {
109 %tmp = alloca [16 x i128], addrspace(5)
110 %x = getelementptr [16 x i128], [16 x i128] addrspace(5)* %tmp, i32 0, i32 0
111 store i128 0, i128 addrspace(5)* %x
112 %tmp1 = getelementptr [16 x i128], [16 x i128] addrspace(5)* %tmp, i32 0, i32 %index
113 %tmp2 = load i128, i128 addrspace(5)* %tmp1
114 store i128 %tmp2, i128 addrspace(1)* %out
118 ; OPT-LABEL: @alloca_9xi256_max256(
119 ; OPT: alloca [9 x i256]
120 ; OPT-NOT: <9 x i256>
122 ; LIMIT32-NOT: <9 x i256>
123 define amdgpu_kernel void @alloca_9xi256_max256(i256 addrspace(1)* %out, i32 %index) #2 {
125 %tmp = alloca [9 x i256], addrspace(5)
126 %x = getelementptr [9 x i256], [9 x i256] addrspace(5)* %tmp, i32 0, i32 0
127 store i256 0, i256 addrspace(5)* %x
128 %tmp1 = getelementptr [9 x i256], [9 x i256] addrspace(5)* %tmp, i32 0, i32 %index
129 %tmp2 = load i256, i256 addrspace(5)* %tmp1
130 store i256 %tmp2, i256 addrspace(1)* %out
134 ; OPT-LABEL: @alloca_9xi64_max256(
138 ; LIMIT32-NOT: <9 x i64>
139 define amdgpu_kernel void @alloca_9xi64_max256(i64 addrspace(1)* %out, i32 %index) #2 {
141 %tmp = alloca [9 x i64], addrspace(5)
142 %x = getelementptr [9 x i64], [9 x i64] addrspace(5)* %tmp, i32 0, i32 0
143 store i64 0, i64 addrspace(5)* %x
144 %tmp1 = getelementptr [9 x i64], [9 x i64] addrspace(5)* %tmp, i32 0, i32 %index
145 %tmp2 = load i64, i64 addrspace(5)* %tmp1
146 store i64 %tmp2, i64 addrspace(1)* %out
150 ; OPT-LABEL: @func_alloca_9xi64_max256(
154 ; LIMIT32-NOT: <9 x i64>
155 define void @func_alloca_9xi64_max256(i64 addrspace(1)* %out, i32 %index) #2 {
157 %tmp = alloca [9 x i64], addrspace(5)
158 %x = getelementptr [9 x i64], [9 x i64] addrspace(5)* %tmp, i32 0, i32 0
159 store i64 0, i64 addrspace(5)* %x
160 %tmp1 = getelementptr [9 x i64], [9 x i64] addrspace(5)* %tmp, i32 0, i32 %index
161 %tmp2 = load i64, i64 addrspace(5)* %tmp1
162 store i64 %tmp2, i64 addrspace(1)* %out
166 attributes #0 = { "amdgpu-flat-work-group-size"="1,1024" }
167 attributes #1 = { "amdgpu-flat-work-group-size"="1,512" }
168 attributes #2 = { "amdgpu-flat-work-group-size"="1,256" }