1 ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=loop-unroll < %s | FileCheck %s
3 ; Test that max iterations count to analyze (specific for the target)
4 ; is enough to make the inner loop completely unrolled
6 define void @foo(ptr addrspace(5) %ptrB, ptr addrspace(5) %ptrC, i32 %A, i32 %A2, float %M) {
10 bb2: ; preds = %bb7, %bb
11 %i = phi i32 [ 0, %bb ], [ %i8, %bb7 ]
17 bb4: ; preds = %bb10, %bb2
18 %i5 = phi i32 [ 0, %bb2 ], [ %i11, %bb10 ]
19 %i6 = add nuw nsw i32 %i5, %i
23 %i8 = add nuw nsw i32 %i, 1
24 %i9 = icmp eq i32 %i8, 8
25 br i1 %i9, label %bb3, label %bb2
27 bb10: ; preds = %for.body
28 %i11 = add nuw nsw i32 %i5, 1
29 %cmpj = icmp ult i32 %i11, 8
30 br i1 %cmpj, label %bb7, label %bb4
32 ; CHECK-LABEL: for.body
33 ; CHECK-NOT: %phi = phi {{.*}}
34 for.body: ; preds = %bb4, %for.body
35 %phi = phi i32 [ 0, %bb4 ], [ %inc, %for.body ]
36 %mul = shl nuw nsw i32 %phi, 6
37 %add = add i32 %A, %mul
38 %arrayidx = getelementptr inbounds float, ptr addrspace(5) %ptrC, i32 %add
39 %ld1 = load float, ptr addrspace(5) %arrayidx, align 4
40 %mul2 = shl nuw nsw i32 %phi, 3
41 %add2 = add i32 %A2, %mul2
42 %arrayidx2 = getelementptr inbounds float, ptr addrspace(5) %ptrB, i32 %add2
43 %ld2 = load float, ptr addrspace(5) %arrayidx2, align 4
44 %mul3 = fmul contract float %M, %ld2
45 %add3 = fadd contract float %ld1, %mul3
46 store float %add3, ptr addrspace(5) %arrayidx, align 4
47 %add1 = add nuw nsw i32 %add, 2048
48 %arrayidx3 = getelementptr inbounds float, ptr addrspace(5) %ptrC, i32 %add1
49 %ld3 = load float, ptr addrspace(5) %arrayidx3, align 4
50 %mul4 = fmul contract float %ld2, %M
51 %add4 = fadd contract float %ld3, %mul4
52 store float %add4, ptr addrspace(5) %arrayidx3, align 4
53 %inc = add nuw nsw i32 %phi, 1
54 %cmpi = icmp ult i32 %phi, 31
55 br i1 %cmpi, label %for.body, label %bb10