llvm/test/Transforms/PhaseOrdering/X86/addsub-inseltpoison.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -O3 -S                                        | FileCheck %s
   3 ; RUN: opt < %s -passes='default<O3>' -S | FileCheck %s
   4
   5 target triple = "x86_64--"
   6 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
   7
   8 ; Ideally, this should reach the backend with 1 fsub, 1 fadd, and 1 shuffle.
   9 ; That may require some coordination between VectorCombine, SLP, and other passes.
  10 ; The end goal is to get a single "vaddsubps" instruction for x86 with AVX.
  11
  12 define <4 x float> @PR45015(<4 x float> %arg, <4 x float> %arg1) {
  13 ; CHECK-LABEL: @PR45015(
  14 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub <4 x float> [[ARG:%.*]], [[ARG1:%.*]]
  15 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd <4 x float> [[ARG]], [[ARG1]]
  16 ; CHECK-NEXT:    [[T16:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
  17 ; CHECK-NEXT:    ret <4 x float> [[T16]]
  18 ;
  19   %t = extractelement <4 x float> %arg, i32 0
  20   %t2 = extractelement <4 x float> %arg1, i32 0
  21   %t3 = fsub float %t, %t2
  22   %t4 = insertelement <4 x float> poison, float %t3, i32 0
  23   %t5 = extractelement <4 x float> %arg, i32 1
  24   %t6 = extractelement <4 x float> %arg1, i32 1
  25   %t7 = fadd float %t5, %t6
  26   %t8 = insertelement <4 x float> %t4, float %t7, i32 1
  27   %t9 = extractelement <4 x float> %arg, i32 2
  28   %t10 = extractelement <4 x float> %arg1, i32 2
  29   %t11 = fsub float %t9, %t10
  30   %t12 = insertelement <4 x float> %t8, float %t11, i32 2
  31   %t13 = extractelement <4 x float> %arg, i32 3
  32   %t14 = extractelement <4 x float> %arg1, i32 3
  33   %t15 = fadd float %t13, %t14
  34   %t16 = insertelement <4 x float> %t12, float %t15, i32 3
  35   ret <4 x float> %t16
  36 }
  37
  38 ; PR42022 - https://bugs.llvm.org/show_bug.cgi?id=42022
  39
  40 %struct.Vector4 = type { float, float, float, float }
  41
  42 define { <2 x float>, <2 x float> } @add_aggregate(<2 x float> %a0, <2 x float> %a1, <2 x float> %b0, <2 x float> %b1) {
  43 ; CHECK-LABEL: @add_aggregate(
  44 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x float> [[A0:%.*]], [[B0:%.*]]
  45 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x float> [[A1:%.*]], [[B1:%.*]]
  46 ; CHECK-NEXT:    [[FCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP1]], 0
  47 ; CHECK-NEXT:    [[FCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[FCA_0_INSERT]], <2 x float> [[TMP2]], 1
  48 ; CHECK-NEXT:    ret { <2 x float>, <2 x float> } [[FCA_1_INSERT]]
  49 ;
  50   %a00 = extractelement <2 x float> %a0, i32 0
  51   %b00 = extractelement <2 x float> %b0, i32 0
  52   %add = fadd float %a00, %b00
  53   %retval.0.0.insert = insertelement <2 x float> poison, float %add, i32 0
  54   %a01 = extractelement <2 x float> %a0, i32 1
  55   %b01 = extractelement <2 x float> %b0, i32 1
  56   %add4 = fadd float %a01, %b01
  57   %retval.0.1.insert = insertelement <2 x float> %retval.0.0.insert, float %add4, i32 1
  58   %a10 = extractelement <2 x float> %a1, i32 0
  59   %b10 = extractelement <2 x float> %b1, i32 0
  60   %add7 = fadd float %a10, %b10
  61   %retval.1.0.insert = insertelement <2 x float> poison, float %add7, i32 0
  62   %a11 = extractelement <2 x float> %a1, i32 1
  63   %b11 = extractelement <2 x float> %b1, i32 1
  64   %add10 = fadd float %a11, %b11
  65   %retval.1.1.insert = insertelement <2 x float> %retval.1.0.insert, float %add10, i32 1
  66   %fca.0.insert = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> %retval.0.1.insert, 0
  67   %fca.1.insert = insertvalue { <2 x float>, <2 x float> } %fca.0.insert, <2 x float> %retval.1.1.insert, 1
  68   ret { <2 x float>, <2 x float> } %fca.1.insert
  69 }
  70
  71 define void @add_aggregate_store(<2 x float> %a0, <2 x float> %a1, <2 x float> %b0, <2 x float> %b1, %struct.Vector4* nocapture dereferenceable(16) %r) {
  72 ; CHECK-LABEL: @add_aggregate_store(
  73 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x float> [[A0:%.*]], [[B0:%.*]]
  74 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x float> [[A1:%.*]], [[B1:%.*]]
  75 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  76 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast %struct.Vector4* [[R:%.*]] to <4 x float>*
  77 ; CHECK-NEXT:    store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4
  78 ; CHECK-NEXT:    ret void
  79 ;
  80   %a00 = extractelement <2 x float> %a0, i32 0
  81   %b00 = extractelement <2 x float> %b0, i32 0
  82   %add = fadd float %a00, %b00
  83   %r0 = getelementptr inbounds %struct.Vector4, %struct.Vector4* %r, i64 0, i32 0
  84   store float %add, float* %r0, align 4
  85   %a01 = extractelement <2 x float> %a0, i32 1
  86   %b01 = extractelement <2 x float> %b0, i32 1
  87   %add4 = fadd float %a01, %b01
  88   %r1 = getelementptr inbounds %struct.Vector4, %struct.Vector4* %r, i64 0, i32 1
  89   store float %add4, float* %r1, align 4
  90   %a10 = extractelement <2 x float> %a1, i32 0
  91   %b10 = extractelement <2 x float> %b1, i32 0
  92   %add7 = fadd float %a10, %b10
  93   %r2 = getelementptr inbounds %struct.Vector4, %struct.Vector4* %r, i64 0, i32 2
  94   store float %add7, float* %r2, align 4
  95   %a11 = extractelement <2 x float> %a1, i32 1
  96   %b11 = extractelement <2 x float> %b1, i32 1
  97   %add10 = fadd float %a11, %b11
  98   %r3 = getelementptr inbounds %struct.Vector4, %struct.Vector4* %r, i64 0, i32 3
  99   store float %add10, float* %r3, align 4
 100   ret void
 101 }