1 ; RUN: opt < %s -S -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -loop-unroll | FileCheck %s
2 ; RUN: opt < %s -S -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -loop-unroll | FileCheck %s
4 target datalayout = "e-m:e-i64:64-n32:64"
5 target triple = "powerpc64le-unknown-linux-gnu"
7 ; Function Attrs: norecurse nounwind
8 define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_addr #0 {
10 %cmp10 = icmp sgt i32 %k, 0
11 br i1 %cmp10, label %for.body.lr.ph, label %for.end
13 for.body.lr.ph: ; preds = %entry
14 %wide.trip.count = zext i32 %k to i64
15 %min.iters.check = icmp ult i32 %k, 16
16 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
18 vector.ph: ; preds = %for.body.lr.ph
19 %n.vec = and i64 %wide.trip.count, 4294967280
20 %broadcast.splatinsert = insertelement <16 x i32> undef, i32 %x, i32 0
21 %broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer
24 vector.body: ; preds = %vector.body, %vector.ph
25 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
26 %vec.ind12 = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, %vector.ph ], [ %vec.ind.next13, %vector.body ]
27 %0 = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %vec.ind12
28 %1 = and <16 x i32> %0, %broadcast.splat
29 %2 = icmp eq <16 x i32> %1, zeroinitializer
30 %3 = select <16 x i1> %2, <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
31 %4 = getelementptr inbounds i8, i8* %s, i64 %index
32 %5 = bitcast i8* %4 to <16 x i8>*
33 store <16 x i8> %3, <16 x i8>* %5, align 1
34 %index.next = add i64 %index, 16
35 %vec.ind.next13 = add <16 x i32> %vec.ind12, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
36 %6 = icmp eq i64 %index.next, %n.vec
37 br i1 %6, label %middle.block, label %vector.body
39 middle.block: ; preds = %vector.body
40 %cmp.n = icmp eq i64 %n.vec, %wide.trip.count
41 br i1 %cmp.n, label %for.end, label %for.body.preheader
43 for.body.preheader: ; preds = %middle.block, %for.body.lr.ph
44 %indvars.iv.ph = phi i64 [ 0, %for.body.lr.ph ], [ %n.vec, %middle.block ]
47 for.body: ; preds = %for.body.preheader, %for.body
48 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
49 %7 = trunc i64 %indvars.iv to i32
51 %and = and i32 %shl, %x
52 %tobool = icmp eq i32 %and, 0
53 %conv = select i1 %tobool, i8 48, i8 49
54 %arrayidx = getelementptr inbounds i8, i8* %s, i64 %indvars.iv
55 store i8 %conv, i8* %arrayidx, align 1
56 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
57 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
58 br i1 %exitcond, label %for.end, label %for.body
60 for.end: ; preds = %for.body, %middle.block, %entry
61 %idxprom1 = sext i32 %k to i64
62 %arrayidx2 = getelementptr inbounds i8, i8* %s, i64 %idxprom1
63 store i8 0, i8* %arrayidx2, align 1
68 ; CHECK-LABEL: vector.body
73 ; CHECK: label %vector.body