1 ; RUN: opt %loadPolly -polly-import-jscop \
2 ; RUN: -polly-codegen -S < %s | FileCheck %s
3 ; RUN: opt %loadPolly -polly-import-jscop \
4 ; RUN: -polly-codegen -polly-import-jscop-postfix=pow2 \
5 ; RUN: -S < %s | FileCheck %s -check-prefix=POW2
7 ; void exprModDiv(float *A, float *B, float *C, long N, long p) {
8 ; for (long i = 0; i < N; i++)
9 ; C[i] += A[i] + B[i] + A[i] + B[i + p];
13 ; This test case changes the access functions such that the resulting index
14 ; expressions are modulo or division operations. We test that the code we
15 ; generate takes advantage of knowledge about unsigned numerators. This is
16 ; useful as LLVM will translate urem and udiv operations with power-of-two
17 ; denominators to fast bitwise and or shift operations.
20 ; CHECK: %pexp.pdiv_r = urem i64 %polly.indvar, 127
21 ; CHECK: %polly.access.A9 = getelementptr float, ptr %A, i64 %pexp.pdiv_r
25 ; Note: without the floor, we would create a map i -> i/127, which only contains
26 ; values of i that are divisible by 127. All other values of i would not
27 ; be mapped to any value. However, to generate correct code we require
28 ; each value of i to indeed be mapped to a value.
30 ; CHECK: %pexp.p_div_q = udiv i64 %polly.indvar, 127
31 ; CHECK: %polly.access.B10 = getelementptr float, ptr %B, i64 %pexp.p_div_q
36 ; CHECK: %pexp.div = sdiv exact i64 %p, 127
37 ; CHECK: %polly.access.B12 = getelementptr float, ptr %B, i64 %pexp.div
40 ; POW2: %pexp.pdiv_r = urem i64 %polly.indvar, 128
41 ; POW2: %polly.access.A9 = getelementptr float, ptr %A, i64 %pexp.pdiv_r
44 ; POW2: %pexp.p_div_q = udiv i64 %polly.indvar, 128
45 ; POW2: %polly.access.B10 = getelementptr float, ptr %B, i64 %pexp.p_div_q
50 ; POW2: %pexp.div = sdiv exact i64 %p, 128
51 ; POW2: %polly.access.B12 = getelementptr float, ptr %B, i64 %pexp.div
53 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
55 define void @exprModDiv(ptr %A, ptr %B, ptr %C, i64 %N, i64 %p) {
59 for.cond: ; preds = %for.inc, %entry
60 %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
61 %cmp = icmp slt i64 %i.0, %N
62 br i1 %cmp, label %for.body, label %for.end
64 for.body: ; preds = %for.cond
65 %arrayidx = getelementptr inbounds float, ptr %A, i64 %i.0
66 %tmp = load float, ptr %arrayidx, align 4
67 %arrayidx1 = getelementptr inbounds float, ptr %B, i64 %i.0
68 %tmp1 = load float, ptr %arrayidx1, align 4
69 %add = fadd float %tmp, %tmp1
70 %arrayidx2 = getelementptr inbounds float, ptr %A, i64 %i.0
71 %tmp2 = load float, ptr %arrayidx2, align 4
72 %add3 = fadd float %add, %tmp2
73 %padd = add nsw i64 %p, %i.0
74 %arrayidx4 = getelementptr inbounds float, ptr %B, i64 %padd
75 %tmp3 = load float, ptr %arrayidx4, align 4
76 %add5 = fadd float %add3, %tmp3
77 %arrayidx6 = getelementptr inbounds float, ptr %C, i64 %i.0
78 %tmp4 = load float, ptr %arrayidx6, align 4
79 %add7 = fadd float %tmp4, %add5
80 store float %add7, ptr %arrayidx6, align 4
83 for.inc: ; preds = %for.body
84 %inc = add nuw nsw i64 %i.0, 1
87 for.end: ; preds = %for.cond