llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll

   1 ; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-promote-alloca,sroa,instcombine' < %s | FileCheck -check-prefix=OPT %s
   2 ; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-promote-alloca,sroa,instcombine' -amdgpu-promote-alloca-to-vector-limit=32 < %s | FileCheck -check-prefix=LIMIT32 %s
   3
   4 target datalayout = "A5"
   5
   6 ; OPT-LABEL: @alloca_8xi64_max1024(
   7 ; OPT-NOT: alloca
   8 ; OPT: <8 x i64>
   9 ; LIMIT32: alloca
  10 ; LIMIT32-NOT: <8 x i64>
  11 define amdgpu_kernel void @alloca_8xi64_max1024(ptr addrspace(1) %out, i32 %index) #0 {
  12 entry:
  13   %tmp = alloca [8 x i64], addrspace(5)
  14   store i64 0, ptr addrspace(5) %tmp
  15   %tmp1 = getelementptr [8 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index
  16   %tmp2 = load i64, ptr addrspace(5) %tmp1
  17   store i64 %tmp2, ptr addrspace(1) %out
  18   ret void
  19 }
  20
  21 ; OPT-LABEL: @alloca_9xi64_max1024(
  22 ; OPT: alloca [9 x i64]
  23 ; OPT-NOT: <9 x i64>
  24 ; LIMIT32: alloca
  25 ; LIMIT32-NOT: <9 x i64>
  26 define amdgpu_kernel void @alloca_9xi64_max1024(ptr addrspace(1) %out, i32 %index) #0 {
  27 entry:
  28   %tmp = alloca [9 x i64], addrspace(5)
  29   store i64 0, ptr addrspace(5) %tmp
  30   %tmp1 = getelementptr [9 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index
  31   %tmp2 = load i64, ptr addrspace(5) %tmp1
  32   store i64 %tmp2, ptr addrspace(1) %out
  33   ret void
  34 }
  35
  36 ; OPT-LABEL: @alloca_16xi64_max512(
  37 ; OPT-NOT: alloca
  38 ; OPT: <16 x i64>
  39 ; LIMIT32: alloca
  40 ; LIMIT32-NOT: <16 x i64>
  41 define amdgpu_kernel void @alloca_16xi64_max512(ptr addrspace(1) %out, i32 %index) #1 {
  42 entry:
  43   %tmp = alloca [16 x i64], addrspace(5)
  44   store i64 0, ptr addrspace(5) %tmp
  45   %tmp1 = getelementptr [16 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index
  46   %tmp2 = load i64, ptr addrspace(5) %tmp1
  47   store i64 %tmp2, ptr addrspace(1) %out
  48   ret void
  49 }
  50
  51 ; OPT-LABEL: @alloca_17xi64_max512(
  52 ; OPT: alloca [17 x i64]
  53 ; OPT-NOT: <17 x i64>
  54 ; LIMIT32: alloca
  55 ; LIMIT32-NOT: <17 x i64>
  56 define amdgpu_kernel void @alloca_17xi64_max512(ptr addrspace(1) %out, i32 %index) #1 {
  57 entry:
  58   %tmp = alloca [17 x i64], addrspace(5)
  59   store i64 0, ptr addrspace(5) %tmp
  60   %tmp1 = getelementptr [17 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index
  61   %tmp2 = load i64, ptr addrspace(5) %tmp1
  62   store i64 %tmp2, ptr addrspace(1) %out
  63   ret void
  64 }
  65
  66 ; OPT-LABEL: @alloca_9xi128_max512(
  67 ; OPT: alloca [9 x i128]
  68 ; OPT-NOT: <9 x i128>
  69 ; LIMIT32: alloca
  70 ; LIMIT32-NOT: <9 x i128>
  71 define amdgpu_kernel void @alloca_9xi128_max512(ptr addrspace(1) %out, i32 %index) #1 {
  72 entry:
  73   %tmp = alloca [9 x i128], addrspace(5)
  74   store i128 0, ptr addrspace(5) %tmp
  75   %tmp1 = getelementptr [9 x i128], ptr addrspace(5) %tmp, i32 0, i32 %index
  76   %tmp2 = load i128, ptr addrspace(5) %tmp1
  77   store i128 %tmp2, ptr addrspace(1) %out
  78   ret void
  79 }
  80
  81 ; OPT-LABEL: @alloca_9xi128_max256(
  82 ; OPT-NOT: alloca
  83 ; OPT: <9 x i128>
  84 ; LIMIT32: alloca
  85 ; LIMIT32-NOT: <9 x i128>
  86 define amdgpu_kernel void @alloca_9xi128_max256(ptr addrspace(1) %out, i32 %index) #2 {
  87 entry:
  88   %tmp = alloca [9 x i128], addrspace(5)
  89   store i128 0, ptr addrspace(5) %tmp
  90   %tmp1 = getelementptr [9 x i128], ptr addrspace(5) %tmp, i32 0, i32 %index
  91   %tmp2 = load i128, ptr addrspace(5) %tmp1
  92   store i128 %tmp2, ptr addrspace(1) %out
  93   ret void
  94 }
  95
  96 ; OPT-LABEL: @alloca_16xi128_max256(
  97 ; OPT-NOT: alloca
  98 ; OPT: <16 x i128>
  99 ; LIMIT32: alloca
 100 ; LIMIT32-NOT: <16 x i128>
 101 define amdgpu_kernel void @alloca_16xi128_max256(ptr addrspace(1) %out, i32 %index) #2 {
 102 entry:
 103   %tmp = alloca [16 x i128], addrspace(5)
 104   store i128 0, ptr addrspace(5) %tmp
 105   %tmp1 = getelementptr [16 x i128], ptr addrspace(5) %tmp, i32 0, i32 %index
 106   %tmp2 = load i128, ptr addrspace(5) %tmp1
 107   store i128 %tmp2, ptr addrspace(1) %out
 108   ret void
 109 }
 110
 111 ; OPT-LABEL: @alloca_9xi256_max256(
 112 ; OPT: alloca [9 x i256]
 113 ; OPT-NOT: <9 x i256>
 114 ; LIMIT32: alloca
 115 ; LIMIT32-NOT: <9 x i256>
 116 define amdgpu_kernel void @alloca_9xi256_max256(ptr addrspace(1) %out, i32 %index) #2 {
 117 entry:
 118   %tmp = alloca [9 x i256], addrspace(5)
 119   store i256 0, ptr addrspace(5) %tmp
 120   %tmp1 = getelementptr [9 x i256], ptr addrspace(5) %tmp, i32 0, i32 %index
 121   %tmp2 = load i256, ptr addrspace(5) %tmp1
 122   store i256 %tmp2, ptr addrspace(1) %out
 123   ret void
 124 }
 125
 126 ; OPT-LABEL: @alloca_9xi64_max256(
 127 ; OPT-NOT: alloca
 128 ; OPT: <9 x i64>
 129 ; LIMIT32: alloca
 130 ; LIMIT32-NOT: <9 x i64>
 131 define amdgpu_kernel void @alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) #2 {
 132 entry:
 133   %tmp = alloca [9 x i64], addrspace(5)
 134   store i64 0, ptr addrspace(5) %tmp
 135   %tmp1 = getelementptr [9 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index
 136   %tmp2 = load i64, ptr addrspace(5) %tmp1
 137   store i64 %tmp2, ptr addrspace(1) %out
 138   ret void
 139 }
 140
 141 ; OPT-LABEL: @func_alloca_9xi64_max256(
 142 ; OPT: alloca
 143 ; OPT-NOT: <9 x i64>
 144 ; LIMIT32: alloca
 145 ; LIMIT32-NOT: <9 x i64>
 146 define void @func_alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) #2 {
 147 entry:
 148   %tmp = alloca [9 x i64], addrspace(5)
 149   store i64 0, ptr addrspace(5) %tmp
 150   %tmp1 = getelementptr [9 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index
 151   %tmp2 = load i64, ptr addrspace(5) %tmp1
 152   store i64 %tmp2, ptr addrspace(1) %out
 153   ret void
 154 }
 155
 156 ; OPT-LABEL: @alwaysinlined_func_alloca_9xi64_max256(
 157 ; OPT-NOT: alloca
 158 ; OPT: <9 x i64>
 159 ; LIMIT32: alloca
 160 ; LIMIT32-NOT: <9 x i64>
 161 define void @alwaysinlined_func_alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) #3 {
 162 entry:
 163   %tmp = alloca [9 x i64], addrspace(5)
 164   store i64 0, ptr addrspace(5) %tmp
 165   %tmp1 = getelementptr [9 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index
 166   %tmp2 = load i64, ptr addrspace(5) %tmp1
 167   store i64 %tmp2, ptr addrspace(1) %out
 168   ret void
 169 }
 170
 171 attributes #0 = { "amdgpu-flat-work-group-size"="1,1024" }
 172 attributes #1 = { "amdgpu-flat-work-group-size"="1,512" }
 173 attributes #2 = { "amdgpu-flat-work-group-size"="1,256" }
 174 attributes #3 = { alwaysinline "amdgpu-flat-work-group-size"="1,256" }