llvm/test/Transforms/LoopDistribute/basic.ll

   1 ; RUN: opt -basic-aa -loop-distribute -enable-loop-distribute -verify-loop-info -verify-dom-info -S \
   2 ; RUN:   < %s | FileCheck %s
   3
   4 ; RUN: opt -basic-aa -loop-distribute -enable-loop-distribute -verify-loop-info -verify-dom-info \
   5 ; RUN:   -loop-accesses -analyze < %s -enable-new-pm=0 | FileCheck %s --check-prefix=ANALYSIS
   6
   7 ; TODO: the following changes the order loop-access printing prints loops, remove legacy RUN and change after NPM switch
   8 ; TODO: opt -aa-pipeline=basic-aa -passes='loop-distribute,print-access-info' -enable-loop-distribute \
   9 ; TODO:   -verify-loop-info -verify-dom-info -disable-output < %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS
  10
  11 ; RUN: opt -basic-aa -loop-distribute -enable-loop-distribute -loop-vectorize -force-vector-width=4 -S \
  12 ; RUN:   < %s | FileCheck %s --check-prefix=VECTORIZE
  13
  14 ; We should distribute this loop into a safe (2nd statement) and unsafe loop
  15 ; (1st statement):
  16 ;   for (i = 0; i < n; i++) {
  17 ;     A[i + 1] = A[i] * B[i];
  18 ;     =======================
  19 ;     C[i] = D[i] * E[i];
  20 ;   }
  21
  22 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
  23 target triple = "x86_64-apple-macosx10.10.0"
  24
  25 ; CHECK-LABEL: @f(
  26 define void @f(i32* noalias %a,
  27                i32* noalias %b,
  28                i32* noalias %c,
  29                i32* noalias %d,
  30                i32* noalias %e) {
  31 entry:
  32   br label %for.body
  33
  34 ; Verify the two distributed loops.
  35
  36 ; CHECK: entry.split.ldist1:
  37 ; CHECK:    br label %for.body.ldist1
  38 ; CHECK: for.body.ldist1:
  39 ; CHECK:    %mulA.ldist1 = mul i32 %loadB.ldist1, %loadA.ldist1
  40 ; CHECK:    br i1 %exitcond.ldist1, label %entry.split, label %for.body.ldist1
  41
  42 ; CHECK: entry.split:
  43 ; CHECK:    br label %for.body
  44 ; CHECK: for.body:
  45 ; CHECK:    %mulC = mul i32 %loadD, %loadE
  46 ; CHECK: for.end:
  47
  48
  49 ; ANALYSIS: for.body:
  50 ; ANALYSIS-NEXT: Memory dependences are safe{{$}}
  51 ; ANALYSIS: for.body.ldist1:
  52 ; ANALYSIS-NEXT: Report: unsafe dependent memory operations in loop
  53
  54
  55 ; VECTORIZE: mul <4 x i32>
  56
  57 for.body:                                         ; preds = %for.body, %entry
  58   %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
  59
  60   %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
  61   %loadA = load i32, i32* %arrayidxA, align 4
  62
  63   %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
  64   %loadB = load i32, i32* %arrayidxB, align 4
  65
  66   %mulA = mul i32 %loadB, %loadA
  67
  68   %add = add nuw nsw i64 %ind, 1
  69   %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
  70   store i32 %mulA, i32* %arrayidxA_plus_4, align 4
  71
  72   %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind
  73   %loadD = load i32, i32* %arrayidxD, align 4
  74
  75   %arrayidxE = getelementptr inbounds i32, i32* %e, i64 %ind
  76   %loadE = load i32, i32* %arrayidxE, align 4
  77
  78   %mulC = mul i32 %loadD, %loadE
  79
  80   %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
  81   store i32 %mulC, i32* %arrayidxC, align 4
  82
  83   %exitcond = icmp eq i64 %add, 20
  84   br i1 %exitcond, label %for.end, label %for.body
  85
  86 for.end:                                          ; preds = %for.body
  87   ret void
  88 }
  89
  90 declare i32 @llvm.convergent(i32) #0
  91
  92 ; It is OK to distribute with a convergent operation, since in each
  93 ; new loop the convergent operation has the ssame control dependency.
  94 ; CHECK-LABEL: @f_with_convergent(
  95 define void @f_with_convergent(i32* noalias %a,
  96                                i32* noalias %b,
  97                                i32* noalias %c,
  98                                i32* noalias %d,
  99                                i32* noalias %e) {
 100 entry:
 101   br label %for.body
 102
 103 ; Verify the two distributed loops.
 104
 105 ; CHECK: entry.split.ldist1:
 106 ; CHECK:    br label %for.body.ldist1
 107 ; CHECK: for.body.ldist1:
 108 ; CHECK:    %mulA.ldist1 = mul i32 %loadB.ldist1, %loadA.ldist1
 109 ; CHECK:    br i1 %exitcond.ldist1, label %entry.split, label %for.body.ldist1
 110
 111 ; CHECK: entry.split:
 112 ; CHECK:    br label %for.body
 113 ; CHECK: for.body:
 114 ; CHECK:    %convergentD = call i32 @llvm.convergent(i32 %loadD)
 115 ; CHECK:    %mulC = mul i32 %convergentD, %loadE
 116 ; CHECK: for.end:
 117
 118
 119 ; ANALYSIS: for.body:
 120 ; ANALYSIS-NEXT: Has convergent operation in loop
 121 ; ANALYSIS-NEXT: Report: cannot add control dependency to convergent operation
 122 ; ANALYSIS: for.body.ldist1:
 123 ; ANALYSIS-NEXT: Report: unsafe dependent memory operations in loop
 124
 125 ; convergent instruction happens to block vectorization
 126 ; VECTORIZE: call i32 @llvm.convergent
 127 ; VECTORIZE: mul i32
 128
 129 for.body:                                         ; preds = %for.body, %entry
 130   %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
 131
 132   %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
 133   %loadA = load i32, i32* %arrayidxA, align 4
 134
 135   %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
 136   %loadB = load i32, i32* %arrayidxB, align 4
 137
 138   %mulA = mul i32 %loadB, %loadA
 139
 140   %add = add nuw nsw i64 %ind, 1
 141   %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
 142   store i32 %mulA, i32* %arrayidxA_plus_4, align 4
 143
 144   %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind
 145   %loadD = load i32, i32* %arrayidxD, align 4
 146
 147   %arrayidxE = getelementptr inbounds i32, i32* %e, i64 %ind
 148   %loadE = load i32, i32* %arrayidxE, align 4
 149
 150   %convergentD = call i32 @llvm.convergent(i32 %loadD)
 151   %mulC = mul i32 %convergentD, %loadE
 152
 153   %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
 154   store i32 %mulC, i32* %arrayidxC, align 4
 155
 156   %exitcond = icmp eq i64 %add, 20
 157   br i1 %exitcond, label %for.end, label %for.body
 158
 159 for.end:                                          ; preds = %for.body
 160   ret void
 161 }
 162
 163 attributes #0 = { nounwind readnone convergent }