llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt -mtriple=systemz-unknown -mcpu=z13 -slp-vectorizer -S < %s | FileCheck %s
   3
   4 @bar = external global [4 x [4 x i32]], align 4
   5 @dct_luma = external global [4 x [4 x i32]], align 4
   6
   7 define void @foo() local_unnamed_addr {
   8 ; CHECK-LABEL: @foo(
   9 ; CHECK-NEXT:  entry:
  10 ; CHECK-NEXT:    [[ADD277:%.*]] = add nsw i32 undef, undef
  11 ; CHECK-NEXT:    store i32 [[ADD277]], i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 1), align 4
  12 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 0), align 4
  13 ; CHECK-NEXT:    [[ARRAYIDX372:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 0
  14 ; CHECK-NEXT:    [[ARRAYIDX372_1:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 1
  15 ; CHECK-NEXT:    [[ARRAYIDX372_2:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 2
  16 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 2) to <2 x i32>*), align 4
  17 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
  18 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ADD277]], i32 1
  19 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  20 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
  21 ; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> poison, [[TMP5]]
  22 ; CHECK-NEXT:    [[TMP7:%.*]] = ashr <4 x i32> [[TMP6]], <i32 6, i32 6, i32 6, i32 6>
  23 ; CHECK-NEXT:    [[ARRAYIDX372_3:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 3
  24 ; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32* [[ARRAYIDX372]] to <4 x i32>*
  25 ; CHECK-NEXT:    store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4
  26 ; CHECK-NEXT:    unreachable
  27 ;
  28 entry:
  29   %add277 = add nsw i32 undef, undef
  30   store i32 %add277, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 1), align 4
  31   %0 = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 0), align 4
  32   %sub355 = add nsw i32 undef, %0
  33   %shr.i = ashr i32 %sub355, 6
  34   %arrayidx372 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 0
  35   store i32 %shr.i, i32* %arrayidx372, align 4
  36   %sub355.1 = add nsw i32 undef, %add277
  37   %shr.i.1 = ashr i32 %sub355.1, 6
  38   %arrayidx372.1 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 1
  39   store i32 %shr.i.1, i32* %arrayidx372.1, align 4
  40   %1 = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 2), align 4
  41   %sub355.2 = add nsw i32 undef, %1
  42   %shr.i.2 = ashr i32 %sub355.2, 6
  43   %arrayidx372.2 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 2
  44   store i32 %shr.i.2, i32* %arrayidx372.2, align 4
  45   %2 = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 3), align 4
  46   %sub355.3 = add nsw i32 undef, %2
  47   %shr.i.3 = ashr i32 %sub355.3, 6
  48   %arrayidx372.3 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 3
  49   store i32 %shr.i.3, i32* %arrayidx372.3, align 4
  50   unreachable
  51 }