Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / test / CodeGen / AMDGPU / uniform-work-group-recursion-test.ll
blob7ba7566506ca8cc7f0858679fa1cad1181caab65
1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
2 ; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck %s
4 ; Test to ensure recursive functions exhibit proper behaviour
5 ; Test to generate fibonacci numbers
7 define i32 @fib(i32 %n) #0 {
8 ; CHECK-LABEL: define {{[^@]+}}@fib
9 ; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
10 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[N]], 0
11 ; CHECK-NEXT:    br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]]
12 ; CHECK:       cont1:
13 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[N]], 1
14 ; CHECK-NEXT:    br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]]
15 ; CHECK:       cont2:
16 ; CHECK-NEXT:    [[NM1:%.*]] = sub i32 [[N]], 1
17 ; CHECK-NEXT:    [[FIBM1:%.*]] = call i32 @fib(i32 [[NM1]])
18 ; CHECK-NEXT:    [[NM2:%.*]] = sub i32 [[N]], 2
19 ; CHECK-NEXT:    [[FIBM2:%.*]] = call i32 @fib(i32 [[NM2]])
20 ; CHECK-NEXT:    [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]]
21 ; CHECK-NEXT:    ret i32 [[RETVAL]]
22 ; CHECK:       exit:
23 ; CHECK-NEXT:    ret i32 1
25   %cmp1 = icmp eq i32 %n, 0
26   br i1 %cmp1, label %exit, label %cont1
28 cont1:
29   %cmp2 = icmp eq i32 %n, 1
30   br i1 %cmp2, label %exit, label %cont2
32 cont2:
33   %nm1 = sub i32 %n, 1
34   %fibm1 = call i32 @fib(i32 %nm1)
35   %nm2 = sub i32 %n, 2
36   %fibm2 = call i32 @fib(i32 %nm2)
37   %retval = add i32 %fibm1, %fibm2
39   ret i32 %retval
41 exit:
42   ret i32 1
45 define internal i32 @fib_internal(i32 %n) #0 {
46 ; CHECK-LABEL: define {{[^@]+}}@fib_internal
47 ; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
48 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[N]], 0
49 ; CHECK-NEXT:    br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]]
50 ; CHECK:       cont1:
51 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[N]], 1
52 ; CHECK-NEXT:    br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]]
53 ; CHECK:       cont2:
54 ; CHECK-NEXT:    [[NM1:%.*]] = sub i32 [[N]], 1
55 ; CHECK-NEXT:    [[FIBM1:%.*]] = call i32 @fib_internal(i32 [[NM1]])
56 ; CHECK-NEXT:    [[NM2:%.*]] = sub i32 [[N]], 2
57 ; CHECK-NEXT:    [[FIBM2:%.*]] = call i32 @fib_internal(i32 [[NM2]])
58 ; CHECK-NEXT:    [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]]
59 ; CHECK-NEXT:    ret i32 [[RETVAL]]
60 ; CHECK:       exit:
61 ; CHECK-NEXT:    ret i32 1
63   %cmp1 = icmp eq i32 %n, 0
64   br i1 %cmp1, label %exit, label %cont1
66 cont1:
67   %cmp2 = icmp eq i32 %n, 1
68   br i1 %cmp2, label %exit, label %cont2
70 cont2:
71   %nm1 = sub i32 %n, 1
72   %fibm1 = call i32 @fib_internal(i32 %nm1)
73   %nm2 = sub i32 %n, 2
74   %fibm2 = call i32 @fib_internal(i32 %nm2)
75   %retval = add i32 %fibm1, %fibm2
77   ret i32 %retval
79 exit:
80   ret i32 1
83 define amdgpu_kernel void @kernel(ptr addrspace(1) %m) #1 {
84 ; CHECK-LABEL: define {{[^@]+}}@kernel
85 ; CHECK-SAME: (ptr addrspace(1) [[M:%.*]]) #[[ATTR2:[0-9]+]] {
86 ; CHECK-NEXT:    [[R:%.*]] = call i32 @fib(i32 5)
87 ; CHECK-NEXT:    [[R2:%.*]] = call i32 @fib_internal(i32 5)
88 ; CHECK-NEXT:    store i32 [[R]], ptr addrspace(1) [[M]], align 4
89 ; CHECK-NEXT:    store i32 [[R2]], ptr addrspace(1) [[M]], align 4
90 ; CHECK-NEXT:    ret void
92   %r = call i32 @fib(i32 5)
93   %r2 = call i32 @fib_internal(i32 5)
95   store i32 %r, ptr addrspace(1) %m
96   store i32 %r2, ptr addrspace(1) %m
97   ret void
100 ; nounwind and readnone are added to match attributor results.
101 attributes #0 = { nounwind readnone }
102 attributes #1 = { "uniform-work-group-size"="true" }
104 ; CHECK: attributes #[[ATTR0]] = { nounwind memory(none) "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
105 ; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="true" }
106 ; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="true" }