llvm/test/Transforms/LoopVectorize/X86/x86-pr39099.ll

   1 ; RUN: opt -mcpu=skx -S -passes=loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses < %s | FileCheck %s
   2
   3 target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
   4
   5 ; This test checks the fix for PR39099.
   6 ;
   7 ; Check that the predicated load is not vectorized as an
   8 ; interleaved-group (which requires proper masking, currently unsupported)
   9 ; but rather as a scalarized accesses.
  10 ; (For SKX, Gather is not supported by the compiler for chars, therefore
  11 ;  the only remaining alternative is to scalarize).
  12 ;
  13 ; void masked_strided(const unsigned char* restrict p,
  14 ;                     unsigned char* restrict q,
  15 ;                     unsigned char guard) {
  16 ;   for(ix=0; ix < 1024; ++ix) {
  17 ;     if (ix > guard) {
  18 ;         char t = p[2*ix];
  19 ;         q[ix] = t;
  20 ;     }
  21 ;   }
  22 ; }
  23
  24 ;CHECK-LABEL: @masked_strided(
  25 ;CHECK: vector.body:
  26 ;CHECK-NEXT:  %index = phi i32
  27 ;CHECK-NEXT:  %[[VECIND:.+]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  28 ;CHECK-NEXT:  %[[VMASK:.+]] = icmp ugt <8 x i32> %[[VECIND]], %{{broadcast.splat*}}
  29 ;CHECK-NEXT:  %{{.*}} = shl nuw nsw <8 x i32> %[[VECIND]], splat (i32 1)
  30 ;CHECK-NEXT:  %[[M:.+]] = extractelement <8 x i1> %[[VMASK]], i32 0
  31 ;CHECK-NEXT:  br i1 %[[M]], label %pred.store.if, label %pred.store.continue
  32 ;CHECK-NOT:   %{{.+}} = load <16 x i8>, ptr %{{.*}}, align 1
  33
  34 define dso_local void @masked_strided(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr {
  35 entry:
  36   %conv = zext i8 %guard to i32
  37   br label %for.body
  38
  39 for.body:
  40   %ix.09 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
  41   %cmp1 = icmp ugt i32 %ix.09, %conv
  42   br i1 %cmp1, label %if.then, label %for.inc
  43
  44 if.then:
  45   %mul = shl nuw nsw i32 %ix.09, 1
  46   %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
  47   %0 = load i8, ptr %arrayidx, align 1
  48   %arrayidx3 = getelementptr inbounds i8, ptr %q, i32 %ix.09
  49   store i8 %0, ptr %arrayidx3, align 1
  50   br label %for.inc
  51
  52 for.inc:
  53   %inc = add nuw nsw i32 %ix.09, 1
  54   %exitcond = icmp eq i32 %inc, 1024
  55   br i1 %exitcond, label %for.end, label %for.body
  56
  57 for.end:
  58   ret void
  59 }