llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes=slp-vectorizer -mattr=+sse2 -S | FileCheck %s --check-prefix=SSE
   3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes=slp-vectorizer -mattr=+avx  -S | FileCheck %s --check-prefix=AVX
   4 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes=slp-vectorizer -mattr=+avx2 -S | FileCheck %s --check-prefix=AVX
   5
   6 %class.1 = type { %class.2 }
   7 %class.2 = type { %"class.3" }
   8 %"class.3" = type { %"struct.1", i64 }
   9 %"struct.1" = type { [8 x i64] }
  10
  11 $_ZN1C10SwitchModeEv = comdat any
  12
  13 ; Function Attrs: uwtable
  14 define void @_ZN1C10SwitchModeEv() local_unnamed_addr #0 comdat align 2 {
  15 ; SSE-LABEL: @_ZN1C10SwitchModeEv(
  16 ; SSE-NEXT:  for.body.lr.ph.i:
  17 ; SSE-NEXT:    [[OR_1:%.*]] = or i64 undef, 1
  18 ; SSE-NEXT:    store i64 [[OR_1]], ptr undef, align 8
  19 ; SSE-NEXT:    [[FOO_3:%.*]] = load i64, ptr undef, align 8
  20 ; SSE-NEXT:    [[FOO_2:%.*]] = getelementptr inbounds [[CLASS_1:%.*]], ptr undef, i64 0, i32 0, i32 0, i32 0, i32 0, i64 1
  21 ; SSE-NEXT:    [[FOO_4:%.*]] = load i64, ptr [[FOO_2]], align 8
  22 ; SSE-NEXT:    [[BAR5:%.*]] = load i64, ptr undef, align 8
  23 ; SSE-NEXT:    [[AND_2:%.*]] = and i64 [[OR_1]], [[FOO_3]]
  24 ; SSE-NEXT:    [[AND_1:%.*]] = and i64 [[BAR5]], [[FOO_4]]
  25 ; SSE-NEXT:    store i64 [[AND_2]], ptr undef, align 8
  26 ; SSE-NEXT:    [[BAR4:%.*]] = getelementptr inbounds [[CLASS_2:%.*]], ptr undef, i64 0, i32 0, i32 0, i32 0, i64 1
  27 ; SSE-NEXT:    store i64 [[AND_1]], ptr [[BAR4]], align 8
  28 ; SSE-NEXT:    ret void
  29 ;
  30 ; AVX-LABEL: @_ZN1C10SwitchModeEv(
  31 ; AVX-NEXT:  for.body.lr.ph.i:
  32 ; AVX-NEXT:    [[OR_1:%.*]] = or i64 undef, 1
  33 ; AVX-NEXT:    store i64 [[OR_1]], ptr undef, align 8
  34 ; AVX-NEXT:    [[BAR5:%.*]] = load i64, ptr undef, align 8
  35 ; AVX-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr undef, align 8
  36 ; AVX-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[OR_1]], i32 0
  37 ; AVX-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[BAR5]], i32 1
  38 ; AVX-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[TMP2]], [[TMP0]]
  39 ; AVX-NEXT:    store <2 x i64> [[TMP3]], ptr undef, align 8
  40 ; AVX-NEXT:    ret void
  41 ;
  42 for.body.lr.ph.i:
  43   %or.1 = or i64 undef, 1
  44   store i64 %or.1, ptr undef, align 8
  45   %foo.3 = load i64, ptr undef, align 8
  46   %foo.2 = getelementptr inbounds %class.1, ptr undef, i64 0, i32 0, i32 0, i32 0, i32 0, i64 1
  47   %foo.4 = load i64, ptr %foo.2, align 8
  48   %bar5 = load i64, ptr undef, align 8
  49   %and.2 = and i64 %or.1, %foo.3
  50   %and.1 = and i64 %bar5, %foo.4
  51   store i64 %and.2, ptr undef, align 8
  52   %bar4 = getelementptr inbounds %class.2, ptr undef, i64 0, i32 0, i32 0, i32 0, i64 1
  53   store i64 %and.1, ptr %bar4, align 8
  54   ret void
  55 }
  56
  57 ; Function Attrs: norecurse nounwind uwtable
  58 define void @pr35497() local_unnamed_addr #0 {
  59 ; SSE-LABEL: @pr35497(
  60 ; SSE-NEXT:  entry:
  61 ; SSE-NEXT:    [[TMP0:%.*]] = load i64, ptr undef, align 1
  62 ; SSE-NEXT:    [[ADD:%.*]] = add i64 undef, undef
  63 ; SSE-NEXT:    store i64 [[ADD]], ptr undef, align 1
  64 ; SSE-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds [0 x i64], ptr undef, i64 0, i64 4
  65 ; SSE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> <i64 poison, i64 undef>, i64 [[TMP0]], i32 0
  66 ; SSE-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 2)
  67 ; SSE-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[TMP2]], splat (i64 20)
  68 ; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
  69 ; SSE-NEXT:    [[TMP5:%.*]] = add nuw nsw <2 x i64> [[TMP4]], zeroinitializer
  70 ; SSE-NEXT:    store <2 x i64> [[TMP5]], ptr undef, align 1
  71 ; SSE-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> poison, <2 x i32> <i32 1, i32 poison>
  72 ; SSE-NEXT:    [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[ADD]], i32 1
  73 ; SSE-NEXT:    [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], splat (i64 2)
  74 ; SSE-NEXT:    [[TMP9:%.*]] = and <2 x i64> [[TMP8]], splat (i64 20)
  75 ; SSE-NEXT:    [[TMP10:%.*]] = lshr <2 x i64> [[TMP5]], splat (i64 6)
  76 ; SSE-NEXT:    [[TMP11:%.*]] = add nuw nsw <2 x i64> [[TMP9]], [[TMP10]]
  77 ; SSE-NEXT:    store <2 x i64> [[TMP11]], ptr [[ARRAYIDX2_2]], align 1
  78 ; SSE-NEXT:    ret void
  79 ;
  80 ; AVX-LABEL: @pr35497(
  81 ; AVX-NEXT:  entry:
  82 ; AVX-NEXT:    [[TMP0:%.*]] = load i64, ptr undef, align 1
  83 ; AVX-NEXT:    [[ADD:%.*]] = add i64 undef, undef
  84 ; AVX-NEXT:    store i64 [[ADD]], ptr undef, align 1
  85 ; AVX-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds [0 x i64], ptr undef, i64 0, i64 4
  86 ; AVX-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> <i64 undef, i64 poison>, i64 [[TMP0]], i32 1
  87 ; AVX-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 2)
  88 ; AVX-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[TMP2]], splat (i64 20)
  89 ; AVX-NEXT:    [[TMP4:%.*]] = add nuw nsw <2 x i64> [[TMP3]], zeroinitializer
  90 ; AVX-NEXT:    store <2 x i64> [[TMP4]], ptr undef, align 1
  91 ; AVX-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> poison, <2 x i32> <i32 1, i32 poison>
  92 ; AVX-NEXT:    [[TMP6:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[ADD]], i32 1
  93 ; AVX-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[TMP6]], splat (i64 2)
  94 ; AVX-NEXT:    [[TMP8:%.*]] = and <2 x i64> [[TMP7]], splat (i64 20)
  95 ; AVX-NEXT:    [[TMP9:%.*]] = lshr <2 x i64> [[TMP4]], splat (i64 6)
  96 ; AVX-NEXT:    [[TMP10:%.*]] = add nuw nsw <2 x i64> [[TMP8]], [[TMP9]]
  97 ; AVX-NEXT:    store <2 x i64> [[TMP10]], ptr [[ARRAYIDX2_2]], align 1
  98 ; AVX-NEXT:    ret void
  99 ;
 100 entry:
 101   %0 = load i64, ptr undef, align 1
 102   %and = shl i64 %0, 2
 103   %shl = and i64 %and, 20
 104   %add = add i64 undef, undef
 105   store i64 %add, ptr undef, align 1
 106   %arrayidx2.1 = getelementptr inbounds [0 x i64], ptr undef, i64 0, i64 5
 107   %and.1 = shl i64 undef, 2
 108   %shl.1 = and i64 %and.1, 20
 109   %shr.1 = lshr i64 undef, 6
 110   %add.1 = add nuw nsw i64 %shl, %shr.1
 111   %arrayidx2.2 = getelementptr inbounds [0 x i64], ptr undef, i64 0, i64 4
 112   %shr.2 = lshr i64 undef, 6
 113   %add.2 = add nuw nsw i64 %shl.1, %shr.2
 114   %and.4 = shl i64 %add, 2
 115   %shl.4 = and i64 %and.4, 20
 116   %arrayidx2.5 = getelementptr inbounds [0 x i64], ptr undef, i64 0, i64 1
 117   store i64 %add.1, ptr %arrayidx2.5, align 1
 118   %and.5 = shl nuw nsw i64 %add.1, 2
 119   %shl.5 = and i64 %and.5, 20
 120   %shr.5 = lshr i64 %add.1, 6
 121   %add.5 = add nuw nsw i64 %shl.4, %shr.5
 122   store i64 %add.5, ptr %arrayidx2.1, align 1
 123   store i64 %add.2, ptr undef, align 1
 124   %shr.6 = lshr i64 %add.2, 6
 125   %add.6 = add nuw nsw i64 %shl.5, %shr.6
 126   store i64 %add.6, ptr %arrayidx2.2, align 1
 127   ret void
 128 }