1 ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -debug-only=amdgpu-promote-alloca -amdgpu-promote-alloca-to-vector-limit=512 -passes=amdgpu-promote-alloca %s -o - 2>&1 | FileCheck %s
4 ; CHECK: Scoring: %simpleuser = alloca [4 x i64], align 4, addrspace(5)
5 ; CHECK-NEXT: [+1]: store i32 42, ptr addrspace(5) %simpleuser, align 4
6 ; CHECK-NEXT: => Final Score:1
7 ; CHECK-NEXT: Scoring: %manyusers = alloca [4 x i64], align 4, addrspace(5)
8 ; CHECK-NEXT: [+1]: store i32 %v0.ext, ptr addrspace(5) %manyusers.1, align 4
9 ; CHECK-NEXT: [+1]: %v0 = load i8, ptr addrspace(5) %manyusers.1, align 1
10 ; CHECK-NEXT: [+1]: store i32 %v1.ext, ptr addrspace(5) %manyusers.2, align 4
11 ; CHECK-NEXT: [+1]: %v1 = load i8, ptr addrspace(5) %manyusers.2, align 1
12 ; CHECK-NEXT: => Final Score:4
13 ; CHECK-NEXT: Sorted Worklist:
14 ; CHECK-NEXT: %manyusers = alloca [4 x i64], align 4, addrspace(5)
15 ; CHECK-NEXT: %simpleuser = alloca [4 x i64], align 4, addrspace(5)
16 define amdgpu_kernel void @simple_users_scores() #0 {
18 ; should get a score of 1
19 %simpleuser = alloca [4 x i64], align 4, addrspace(5)
20 ; should get a score of 4
21 %manyusers = alloca [4 x i64], align 4, addrspace(5)
23 store i32 42, ptr addrspace(5) %simpleuser
25 %manyusers.1 = getelementptr i8, ptr addrspace(5) %manyusers, i64 2
26 %v0 = load i8, ptr addrspace(5) %manyusers.1
27 %v0.ext = zext i8 %v0 to i32
28 store i32 %v0.ext, ptr addrspace(5) %manyusers.1
30 %manyusers.2 = getelementptr i8, ptr addrspace(5) %manyusers, i64 1
31 %v1 = load i8, ptr addrspace(5) %manyusers.2
32 %v1.ext = zext i8 %v0 to i32
33 store i32 %v1.ext, ptr addrspace(5) %manyusers.2
38 ; CHECK: Scoring: %stack = alloca [4 x i64], align 4, addrspace(5)
39 ; CHECK-NEXT: [+5]: store i32 32, ptr addrspace(5) %stack, align 4
40 ; CHECK-NEXT: [+1]: store i32 42, ptr addrspace(5) %stack, align 4
41 ; CHECK-NEXT: [+9]: store i32 32, ptr addrspace(5) %stack.1, align 4
42 ; CHECK-NEXT: [+5]: %outer.cmp = load i1, ptr addrspace(5) %stack.1, align 1
43 ; CHECK-NEXT: [+1]: store i32 64, ptr addrspace(5) %stack.2, align 4
44 ; CHECK-NEXT: [+9]: %inner.cmp = load i1, ptr addrspace(5) %stack.2, align 1
45 ; CHECK-NEXT: => Final Score:30
46 define amdgpu_kernel void @loop_users_alloca(i1 %x, i2) #0 {
48 ; should get a score of 1
49 %stack = alloca [4 x i64], align 4, addrspace(5)
50 %stack.1 = getelementptr i8, ptr addrspace(5) %stack, i64 4
51 %stack.2 = getelementptr i8, ptr addrspace(5) %stack, i64 8
53 store i32 42, ptr addrspace(5) %stack
57 store i32 32, ptr addrspace(5) %stack
58 %outer.cmp = load i1, ptr addrspace(5) %stack.1
62 store i32 32, ptr addrspace(5) %stack.1
63 %inner.cmp = load i1, ptr addrspace(5) %stack.2
64 br i1 %inner.cmp, label %loop.inner, label %loop.outer
67 store i32 64, ptr addrspace(5) %stack.2